From a791f742b470454738c393491e30972e1177c705 Mon Sep 17 00:00:00 2001 From: Mees Delzenne Date: Mon, 29 Apr 2024 14:13:41 +0200 Subject: [PATCH] Flexible record id's (#3937) --- core/src/syn/lexer/duration.rs | 12 +- core/src/syn/lexer/mod.rs | 7 + core/src/syn/lexer/number.rs | 146 ++++++++------- core/src/syn/parser/basic.rs | 7 +- core/src/syn/parser/mac.rs | 33 ++++ core/src/syn/parser/mod.rs | 8 + core/src/syn/parser/thing.rs | 330 +++++++++++++++++++++------------ core/src/syn/token/mod.rs | 2 + 8 files changed, 348 insertions(+), 197 deletions(-) diff --git a/core/src/syn/lexer/duration.rs b/core/src/syn/lexer/duration.rs index 0ab777bf..15b2edbc 100644 --- a/core/src/syn/lexer/duration.rs +++ b/core/src/syn/lexer/duration.rs @@ -25,12 +25,21 @@ impl<'a> Lexer<'a> { /// /// Expect the lexer to have already eaten the digits starting the duration. pub fn lex_duration(&mut self) -> Token { + let backup = self.reader.offset(); match self.lex_duration_err() { Ok(x) => { + self.scratch.clear(); self.duration = Some(x); self.finish_token(TokenKind::Duration) } - Err(e) => self.invalid_token(LexError::Duration(e)), + Err(e) => { + if self.flexible_ident { + self.reader.backup(backup); + return self.lex_ident(); + } + self.scratch.clear(); + self.invalid_token(LexError::Duration(e)) + } } } @@ -63,7 +72,6 @@ impl<'a> Lexer<'a> { current_value = current_value.checked_mul(10).ok_or(Error::Overflow)?; current_value = current_value.checked_add((b - b'0') as u64).ok_or(Error::Overflow)?; } - self.scratch.clear(); loop { let Some(next) = self.reader.peek() else { diff --git a/core/src/syn/lexer/mod.rs b/core/src/syn/lexer/mod.rs index fc51b738..19dde1e5 100644 --- a/core/src/syn/lexer/mod.rs +++ b/core/src/syn/lexer/mod.rs @@ -84,6 +84,10 @@ pub struct Lexer<'a> { /// like for example strings with escape characters. scratch: String, + /// Allow the next parsed idents to be flexible, i.e. support idents which don't start with a + /// number. + pub flexible_ident: bool, + // below are a collection of storage for values produced by tokens. // For performance reasons we wan't to keep the tokens as small as possible. // As only some tokens have an additional value associated with them we don't store that value @@ -117,6 +121,7 @@ impl<'a> Lexer<'a> { last_offset: 0, whitespace_span: None, scratch: String::new(), + flexible_ident: false, string: None, datetime: None, duration: None, @@ -132,6 +137,7 @@ impl<'a> Lexer<'a> { pub fn reset(&mut self) { self.last_offset = 0; self.scratch.clear(); + self.flexible_ident = false; self.whitespace_span = None; self.string = None; self.datetime = None; @@ -155,6 +161,7 @@ impl<'a> Lexer<'a> { last_offset: 0, whitespace_span: None, scratch: self.scratch, + flexible_ident: false, string: self.string, datetime: self.datetime, duration: self.duration, diff --git a/core/src/syn/lexer/number.rs b/core/src/syn/lexer/number.rs index 28313e8d..14e36c13 100644 --- a/core/src/syn/lexer/number.rs +++ b/core/src/syn/lexer/number.rs @@ -15,23 +15,22 @@ pub enum Error { } impl Lexer<'_> { + pub fn finish_number_token(&mut self, kind: NumberKind) -> Token { + let mut str = mem::take(&mut self.scratch); + str.retain(|x| x != '_'); + self.string = Some(str); + self.finish_token(TokenKind::Number(kind)) + } /// Lex only an integer. /// Use when a number can be followed immediatly by a `.` like in a model version. pub fn lex_only_integer(&mut self) -> Token { - match self.lex_only_integer_err() { - Ok(x) => x, - Err(e) => self.invalid_token(LexError::Number(e)), - } - } - - fn lex_only_integer_err(&mut self) -> Result { let Some(next) = self.reader.peek() else { - return Ok(self.eof_token()); + return self.eof_token(); }; // not a number, return a different token kind, for error reporting. if !next.is_ascii_digit() { - return Ok(self.next_token()); + return self.next_token(); } self.scratch.push(next as char); @@ -39,9 +38,7 @@ impl Lexer<'_> { // eat all the ascii digits while let Some(x) = self.reader.peek() { - if x == b'_' { - self.reader.next(); - } else if !x.is_ascii_digit() { + if !x.is_ascii_digit() && x != b'_' { break; } else { self.scratch.push(x as char); @@ -53,33 +50,25 @@ impl Lexer<'_> { match self.reader.peek() { Some(b'd' | b'f') => { // not an integer but parse anyway for error reporting. - return self.lex_suffix(false, true); + return self.lex_suffix(false, false); } - Some(x) if x.is_ascii_alphabetic() => return Err(self.invalid_suffix()), + Some(x) if x.is_ascii_alphabetic() => return self.invalid_suffix_token(), _ => {} } - self.string = Some(mem::take(&mut self.scratch)); - Ok(self.finish_token(TokenKind::Number(NumberKind::Integer))) + self.finish_number_token(NumberKind::Integer) } - pub fn lex_number(&mut self, start: u8) -> Token { - match self.lex_number_err(start) { - Ok(x) => x, - Err(e) => self.invalid_token(LexError::Number(e)), - } - } /// Lex a number. /// /// Expects the digit which started the number as the start argument. - pub fn lex_number_err(&mut self, start: u8) -> Result { + pub fn lex_number(&mut self, start: u8) -> Token { debug_assert!(start.is_ascii_digit()); debug_assert_eq!(self.scratch, ""); self.scratch.push(start as char); loop { let Some(x) = self.reader.peek() else { - self.string = Some(mem::take(&mut self.scratch)); - return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer))); + return self.finish_number_token(NumberKind::Integer); }; match x { b'0'..=b'9' => { @@ -87,10 +76,10 @@ impl Lexer<'_> { self.reader.next(); self.scratch.push(x as char); } - b'e' | b'E' => { + x @ (b'e' | b'E') => { // scientific notation self.reader.next(); - self.scratch.push('e'); + self.scratch.push(x as char); return self.lex_exponent(false); } b'.' => { @@ -104,33 +93,33 @@ impl Lexer<'_> { } else { // indexing a number self.reader.backup(backup); - self.string = Some(mem::take(&mut self.scratch)); - return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer))); + return self.finish_number_token(NumberKind::Integer); } } - b'f' | b'd' => return self.lex_suffix(false, true), + b'f' | b'd' => return self.lex_suffix(false, false), // Oxc2 is the start byte of 'ยต' 0xc2 | b'n' | b'u' | b'm' | b'h' | b'w' | b'y' | b's' => { // duration suffix, switch to lexing duration. - return Ok(self.lex_duration()); + return self.lex_duration(); } b'_' => { self.reader.next(); } b'a'..=b'z' | b'A'..=b'Z' => { - return Err(self.invalid_suffix()); - // invalid token, unexpected identifier character immediatly after number. - // Eat all remaining identifier like characters. + if self.flexible_ident { + return self.lex_ident(); + } else { + return self.invalid_suffix_token(); + } } _ => { - self.string = Some(mem::take(&mut self.scratch)); - return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer))); + return self.finish_number_token(NumberKind::Integer); } } } } - fn invalid_suffix(&mut self) -> Error { + fn invalid_suffix_token(&mut self) -> Token { // eat the whole suffix. while let Some(x) = self.reader.peek() { if !x.is_ascii_alphanumeric() { @@ -139,20 +128,29 @@ impl Lexer<'_> { self.reader.next(); } self.scratch.clear(); - Error::InvalidSuffix + self.invalid_token(LexError::Number(Error::InvalidSuffix)) } /// Lex a number suffix, either 'f' or 'dec'. - fn lex_suffix(&mut self, had_exponent: bool, can_be_duration: bool) -> Result { + fn lex_suffix(&mut self, had_mantissa: bool, had_exponent: bool) -> Token { match self.reader.peek() { Some(b'f') => { // float suffix self.reader.next(); if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) { - Err(self.invalid_suffix()) + if self.flexible_ident && !had_mantissa { + self.scratch.push('f'); + self.lex_ident() + } else { + self.invalid_suffix_token() + } } else { - self.string = Some(mem::take(&mut self.scratch)); - Ok(self.finish_token(TokenKind::Number(NumberKind::Float))) + let kind = if had_mantissa { + NumberKind::FloatMantissa + } else { + NumberKind::Float + }; + self.finish_number_token(kind) } } Some(b'd') => { @@ -160,44 +158,53 @@ impl Lexer<'_> { self.reader.next(); let checkpoint = self.reader.offset(); if !self.eat(b'e') { - if can_be_duration { + if !had_mantissa && !had_exponent { self.reader.backup(checkpoint - 1); - return Ok(self.lex_duration()); + return self.lex_duration(); + } else if !had_mantissa && self.flexible_ident { + self.scratch.push('d'); + return self.lex_ident(); } else { - return Err(self.invalid_suffix()); + return self.invalid_suffix_token(); } } if !self.eat(b'c') { - return Err(self.invalid_suffix()); + if self.flexible_ident { + self.scratch.push('d'); + self.scratch.push('e'); + return self.lex_ident(); + } else { + return self.invalid_suffix_token(); + } } if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) { - Err(self.invalid_suffix()) + self.invalid_suffix_token() } else { - self.string = Some(mem::take(&mut self.scratch)); - if had_exponent { - Ok(self.finish_token(TokenKind::Number(NumberKind::DecimalExponent))) + let kind = if had_exponent { + NumberKind::DecimalExponent } else { - Ok(self.finish_token(TokenKind::Number(NumberKind::Decimal))) - } + NumberKind::Decimal + }; + self.finish_number_token(kind) } } + // Caller should ensure this is unreachable _ => unreachable!(), } } /// Lexes the mantissa of a number, i.e. `.8` in `1.8` - pub fn lex_mantissa(&mut self) -> Result { + pub fn lex_mantissa(&mut self) -> Token { loop { // lex_number already checks if there exists a digit after the dot. // So this will never fail the first iteration of the loop. let Some(x) = self.reader.peek() else { - self.string = Some(mem::take(&mut self.scratch)); - return Ok(self.finish_token(TokenKind::Number(NumberKind::Mantissa))); + return self.finish_number_token(NumberKind::Mantissa); }; match x { - b'0'..=b'9' => { + b'0'..=b'9' | b'_' => { // next digit. self.reader.next(); self.scratch.push(x as char); @@ -208,25 +215,21 @@ impl Lexer<'_> { self.scratch.push('e'); return self.lex_exponent(true); } - b'_' => { - self.reader.next(); - } - b'f' | b'd' => return self.lex_suffix(false, false), + b'f' | b'd' => return self.lex_suffix(true, false), b'a'..=b'z' | b'A'..=b'Z' => { // invalid token, random identifier characters immediately after number. self.scratch.clear(); - return Err(Error::InvalidSuffix); + return self.invalid_suffix_token(); } _ => { - self.string = Some(mem::take(&mut self.scratch)); - return Ok(self.finish_token(TokenKind::Number(NumberKind::Mantissa))); + return self.finish_number_token(NumberKind::Mantissa); } } } } /// Lexes the exponent of a number, i.e. `e10` in `1.1e10`; - fn lex_exponent(&mut self, had_mantissa: bool) -> Result { + fn lex_exponent(&mut self, had_mantissa: bool) -> Token { loop { match self.reader.peek() { Some(x @ b'-' | x @ b'+') => { @@ -238,30 +241,29 @@ impl Lexer<'_> { break; } _ => { + if self.flexible_ident && !had_mantissa { + return self.lex_ident(); + } // random other character, expected atleast one digit. - return Err(Error::DigitExpectedExponent); + return self.invalid_token(LexError::Number(Error::DigitExpectedExponent)); } } } self.reader.next(); loop { match self.reader.peek() { - Some(x @ b'0'..=b'9') => { + Some(x @ (b'0'..=b'9' | b'_')) => { self.reader.next(); self.scratch.push(x as char); } - Some(b'_') => { - self.reader.next(); - } - Some(b'f' | b'd') => return self.lex_suffix(true, false), + Some(b'f' | b'd') => return self.lex_suffix(had_mantissa, true), _ => { let kind = if had_mantissa { NumberKind::MantissaExponent } else { NumberKind::Exponent }; - self.string = Some(mem::take(&mut self.scratch)); - return Ok(self.finish_token(TokenKind::Number(kind))); + return self.finish_number_token(kind); } } } diff --git a/core/src/syn/parser/basic.rs b/core/src/syn/parser/basic.rs index 609d1de8..46284e8e 100644 --- a/core/src/syn/parser/basic.rs +++ b/core/src/syn/parser/basic.rs @@ -127,6 +127,7 @@ impl TokenValue for f32 { TokenKind::Number( NumberKind::Integer | NumberKind::Float + | NumberKind::FloatMantissa | NumberKind::Mantissa | NumberKind::MantissaExponent, ) => { @@ -152,6 +153,7 @@ impl TokenValue for f64 { TokenKind::Number( NumberKind::Integer | NumberKind::Float + | NumberKind::FloatMantissa | NumberKind::Mantissa | NumberKind::MantissaExponent, ) => { @@ -203,7 +205,10 @@ impl TokenValue for Number { Ok(Number::Float(x)) } TokenKind::Number( - NumberKind::Mantissa | NumberKind::MantissaExponent | NumberKind::Float, + NumberKind::Mantissa + | NumberKind::MantissaExponent + | NumberKind::Float + | NumberKind::FloatMantissa, ) => { let source = parser.lexer.string.take().unwrap(); // As far as I can tell this will never fail for valid integers. diff --git a/core/src/syn/parser/mac.rs b/core/src/syn/parser/mac.rs index 24f89dc5..ee38a734 100644 --- a/core/src/syn/parser/mac.rs +++ b/core/src/syn/parser/mac.rs @@ -149,6 +149,39 @@ macro_rules! enter_query_recursion { }}; } +#[macro_export] +macro_rules! enter_flexible_ident{ + ($name:ident = $this:expr => ($enabled:expr){ $($t:tt)* }) => {{ + struct Dropper<'a, 'b>(&'a mut $crate::syn::parser::Parser<'b>,bool); + impl Drop for Dropper<'_, '_> { + fn drop(&mut self) { + self.0.lexer.flexible_ident = self.1; + } + } + impl<'a> ::std::ops::Deref for Dropper<'_,'a>{ + type Target = $crate::syn::parser::Parser<'a>; + + fn deref(&self) -> &Self::Target{ + self.0 + } + } + + impl<'a> ::std::ops::DerefMut for Dropper<'_,'a>{ + fn deref_mut(&mut self) -> &mut Self::Target{ + self.0 + } + } + + let enabled = $this.lexer.flexible_ident; + $this.lexer.flexible_ident = $enabled; + #[allow(unused_mut)] + let mut $name = Dropper($this,enabled); + { + $($t)* + } + }}; +} + pub(super) use expected; pub(super) use unexpected; diff --git a/core/src/syn/parser/mod.rs b/core/src/syn/parser/mod.rs index c2de5b50..61d4dc52 100644 --- a/core/src/syn/parser/mod.rs +++ b/core/src/syn/parser/mod.rs @@ -79,6 +79,7 @@ pub struct Parser<'a> { token_buffer: TokenBuffer<4>, table_as_field: bool, legacy_strands: bool, + flexible_record_id: bool, object_recursion: usize, query_recursion: usize, } @@ -92,6 +93,7 @@ impl<'a> Parser<'a> { token_buffer: TokenBuffer::new(), table_as_field: false, legacy_strands: false, + flexible_record_id: true, object_recursion: 100, query_recursion: 20, } @@ -117,6 +119,11 @@ impl<'a> Parser<'a> { self.legacy_strands = value; } + /// Set whether to allow record-id's which don't adheare to regular ident rules. + pub fn allow_fexible_record_id(&mut self, value: bool) { + self.flexible_record_id = value; + } + /// Reset the parser state. Doesnt change the position of the parser in buffer. pub fn reset(&mut self) { self.last_span = Span::empty(); @@ -132,6 +139,7 @@ impl<'a> Parser<'a> { last_span: Span::empty(), token_buffer: TokenBuffer::new(), legacy_strands: self.legacy_strands, + flexible_record_id: self.flexible_record_id, table_as_field: false, object_recursion: self.object_recursion, query_recursion: self.query_recursion, diff --git a/core/src/syn/parser/thing.rs b/core/src/syn/parser/thing.rs index f25b0897..e77f429f 100644 --- a/core/src/syn/parser/thing.rs +++ b/core/src/syn/parser/thing.rs @@ -2,6 +2,7 @@ use reblessive::Stk; use super::{ParseResult, Parser}; use crate::{ + enter_flexible_ident, sql::{id::Gen, Id, Ident, Range, Thing, Value}, syn::{ parser::{ @@ -37,6 +38,14 @@ impl Parser<'_> { Ok(thing) } + fn peek_can_start_id(&mut self) -> bool { + self.peek_can_be_ident() + || matches!( + self.peek_kind(), + TokenKind::Number(_) | t!("{") | t!("[") | TokenKind::Duration + ) + } + pub async fn parse_thing_or_range( &mut self, stk: &mut Stk, @@ -44,92 +53,89 @@ impl Parser<'_> { ) -> ParseResult { expected!(self, t!(":")); - self.peek(); - self.no_whitespace()?; + enter_flexible_ident!(this = self =>(self.flexible_record_id){ - if self.eat(t!("..")) { - let end = if self.eat(t!("=")) { - self.no_whitespace()?; - let id = stk.run(|stk| self.parse_id(stk)).await?; - Bound::Included(id) - } else if self.peek_can_be_ident() - || matches!(self.peek_kind(), TokenKind::Number(_) | t!("{") | t!("[")) - { - self.no_whitespace()?; - let id = stk.run(|stk| self.parse_id(stk)).await?; - Bound::Excluded(id) - } else { - Bound::Unbounded - }; - return Ok(Value::Range(Box::new(Range { - tb: ident, - beg: Bound::Unbounded, - end, - }))); - } + this.peek(); + this.no_whitespace()?; - let beg = if self.peek_can_be_ident() - || matches!(self.peek_kind(), TokenKind::Number(_) | t!("{") | t!("[")) - { - let id = stk.run(|ctx| self.parse_id(ctx)).await?; - - if self.eat(t!(">")) { - self.no_whitespace()?; - Bound::Excluded(id) - } else { - Bound::Included(id) + if this.eat(t!("..")) { + let end = if this.eat(t!("=")) { + this.no_whitespace()?; + let id = stk.run(|stk| this.parse_id(stk)).await?; + Bound::Included(id) + } else if this.peek_can_start_id() { + this.no_whitespace()?; + let id = stk.run(|stk| this.parse_id(stk)).await?; + Bound::Excluded(id) + } else { + Bound::Unbounded + }; + return Ok(Value::Range(Box::new(Range { + tb: ident, + beg: Bound::Unbounded, + end, + }))); } - } else { - Bound::Unbounded - }; - if self.eat(t!("..")) { - let end = if self.eat(t!("=")) { - self.no_whitespace()?; - let id = stk.run(|ctx| self.parse_id(ctx)).await?; - Bound::Included(id) - } else if self.peek_can_be_ident() - || matches!(self.peek_kind(), TokenKind::Number(_) | t!("{") | t!("[")) - { - self.no_whitespace()?; - let id = stk.run(|ctx| self.parse_id(ctx)).await?; - Bound::Excluded(id) - } else { - Bound::Unbounded - }; - Ok(Value::Range(Box::new(Range { - tb: ident, - beg, - end, - }))) - } else { - let id = match beg { - Bound::Unbounded => { - if self.peek_kind() == t!("$param") { - return Err(ParseError::new( - ParseErrorKind::UnexpectedExplain { - found: t!("$param"), - expected: "a record-id id", - explain: "you can create a record-id from a param with the function 'type::thing'", - }, - self.recent_span(), - )); + let beg = if this.peek_can_start_id(){ + let id = stk.run(|ctx| this.parse_id(ctx)).await?; + + if this.eat(t!(">")) { + this.no_whitespace()?; + Bound::Excluded(id) + } else { + Bound::Included(id) } + } else { + Bound::Unbounded + }; - // we haven't matched anythong so far so we still want any type of id. - unexpected!(self, self.peek_kind(), "a record-id id") - } - Bound::Excluded(_) => { - // we have matched a bounded id but we don't see an range operator. - unexpected!(self, self.peek_kind(), "the range operator `..`") - } - Bound::Included(id) => id, - }; - Ok(Value::Thing(Thing { - tb: ident, - id, - })) - } + if this.eat(t!("..")) { + let end = if this.eat(t!("=")) { + this.no_whitespace()?; + let id = stk.run(|ctx| this.parse_id(ctx)).await?; + Bound::Included(id) + } else if this.peek_can_start_id(){ + this.no_whitespace()?; + let id = stk.run(|ctx| this.parse_id(ctx)).await?; + Bound::Excluded(id) + } else { + Bound::Unbounded + }; + Ok(Value::Range(Box::new(Range { + tb: ident, + beg, + end, + }))) + } else { + let id = match beg { + Bound::Unbounded => { + if this.peek_kind() == t!("$param") { + return Err(ParseError::new( + ParseErrorKind::UnexpectedExplain { + found: t!("$param"), + expected: "a record-id id", + explain: "you can create a record-id from a param with the function 'type::thing'", + }, + this.recent_span(), + )); + } + + // we haven't matched anythong so far so we still want any type of id. + unexpected!(this, this.peek_kind(), "a record-id id") + } + Bound::Excluded(_) => { + // we have matched a bounded id but we don't see an range operator. + unexpected!(this, this.peek_kind(), "the range operator `..`") + } + Bound::Included(id) => id, + }; + Ok(Value::Thing(Thing { + tb: ident, + id, + })) + } + }) } pub async fn parse_range(&mut self, ctx: &mut Stk) -> ParseResult { @@ -137,61 +143,65 @@ impl Parser<'_> { expected!(self, t!(":")); - self.peek(); - self.no_whitespace()?; + enter_flexible_ident!(this = self =>(self.flexible_record_id){ + this.peek(); + this.no_whitespace()?; - let beg = if self.peek_can_be_ident() { - self.peek(); - self.no_whitespace()?; + let beg = if this.peek_can_be_ident() { + this.peek(); + this.no_whitespace()?; - let id = ctx.run(|ctx| self.parse_id(ctx)).await?; + let id = ctx.run(|ctx| this.parse_id(ctx)).await?; - self.peek(); - self.no_whitespace()?; + this.peek(); + this.no_whitespace()?; - if self.eat(t!(">")) { - Bound::Excluded(id) + if this.eat(t!(">")) { + Bound::Excluded(id) + } else { + Bound::Included(id) + } } else { - Bound::Included(id) - } - } else { - Bound::Unbounded - }; + Bound::Unbounded + }; - self.peek(); - self.no_whitespace()?; + this.peek(); + this.no_whitespace()?; - expected!(self, t!("..")); + expected!(this, t!("..")); - self.peek(); - self.no_whitespace()?; + this.peek(); + this.no_whitespace()?; - let inclusive = self.eat(t!("=")); + let inclusive = this.eat(t!("=")); - self.peek(); - self.no_whitespace()?; + this.peek(); + this.no_whitespace()?; - let end = if self.peek_can_be_ident() { - let id = ctx.run(|ctx| self.parse_id(ctx)).await?; - if inclusive { - Bound::Included(id) + let end = if this.peek_can_be_ident() { + let id = ctx.run(|ctx| this.parse_id(ctx)).await?; + if inclusive { + Bound::Included(id) + } else { + Bound::Excluded(id) + } } else { - Bound::Excluded(id) - } - } else { - Bound::Unbounded - }; + Bound::Unbounded + }; - Ok(Range { - tb, - beg, - end, + Ok(Range { + tb, + beg, + end, + }) }) } pub async fn parse_thing(&mut self, ctx: &mut Stk) -> ParseResult { let ident = self.next_token_value::()?.0; - self.parse_thing_from_ident(ctx, ident).await + enter_flexible_ident!(this = self =>(self.flexible_record_id){ + this.parse_thing_from_ident(ctx, ident).await + }) } pub async fn parse_thing_from_ident( @@ -201,10 +211,13 @@ impl Parser<'_> { ) -> ParseResult { expected!(self, t!(":")); - self.peek(); - self.no_whitespace()?; + let id = enter_flexible_ident!(this = self =>(self.flexible_record_id){ + this.peek(); + this.no_whitespace()?; + + ctx.run(|ctx| this.parse_id(ctx)).await + })?; - let id = ctx.run(|ctx| self.parse_id(ctx)).await?; Ok(Thing { tb: ident, id, @@ -215,11 +228,15 @@ impl Parser<'_> { let token = self.next(); match token.kind { t!("{") => { - let object = self.parse_object(stk, token.span).await?; + let object = enter_flexible_ident!(this = self => (false){ + this.parse_object(stk, token.span).await + })?; Ok(Id::Object(object)) } t!("[") => { - let array = self.parse_array(stk, token.span).await?; + let array = enter_flexible_ident!(this = self => (false){ + this.parse_array(stk, token.span).await + })?; Ok(Id::Array(array)) } t!("+") => { @@ -260,6 +277,30 @@ impl Parser<'_> { Ok(Id::String(text)) } } + TokenKind::Number(NumberKind::Decimal | NumberKind::DecimalExponent) + if self.flexible_record_id => + { + let mut text = self.lexer.string.take().unwrap(); + text.push('d'); + text.push('e'); + text.push('c'); + Ok(Id::String(text)) + } + TokenKind::Number(NumberKind::Float) if self.flexible_record_id => { + let mut text = self.lexer.string.take().unwrap(); + text.push('f'); + Ok(Id::String(text)) + } + TokenKind::Duration if self.flexible_record_id => { + self.lexer.duration = None; + let slice = self.lexer.reader.span(token.span); + if slice.iter().any(|x| *x > 0b0111_1111) { + unexpected!(self, token.kind, "a identifier"); + } + // Should be valid utf-8 as it was already parsed by the lexer + let text = String::from_utf8(slice.to_vec()).unwrap(); + Ok(Id::String(text)) + } t!("ULID") => { // TODO: error message about how to use `ulid` as an identifier. expected!(self, t!("(")); @@ -480,4 +521,49 @@ mod tests { } ); } + + #[test] + fn weird_things() { + use crate::sql; + + fn assert_ident_parses_correctly(ident: &str) { + let thing = format!("t:{}", ident); + let mut parser = Parser::new(thing.as_bytes()); + parser.allow_fexible_record_id(true); + let mut stack = Stack::new(); + let r = stack + .enter(|ctx| async move { parser.parse_thing(ctx).await }) + .finish() + .expect(&format!("failed on {}", ident)) + .id; + assert_eq!(r, Id::String(ident.to_string()),); + + let mut parser = Parser::new(thing.as_bytes()); + let r = stack + .enter(|ctx| async move { parser.parse_query(ctx).await }) + .finish() + .expect(&format!("failed on {}", ident)); + + assert_eq!( + r, + sql::Query(sql::Statements(vec![sql::Statement::Value(sql::Value::Thing( + sql::Thing { + tb: "t".to_string(), + id: Id::String(ident.to_string()) + } + ))])) + ) + } + + assert_ident_parses_correctly("123abc"); + assert_ident_parses_correctly("123d"); + assert_ident_parses_correctly("123de"); + assert_ident_parses_correctly("123dec"); + assert_ident_parses_correctly("1e23dec"); + assert_ident_parses_correctly("1e23f"); + assert_ident_parses_correctly("123f"); + assert_ident_parses_correctly("1ns"); + assert_ident_parses_correctly("1ns1"); + assert_ident_parses_correctly("1ns1h"); + } } diff --git a/core/src/syn/token/mod.rs b/core/src/syn/token/mod.rs index ebf9955e..f6ba952d 100644 --- a/core/src/syn/token/mod.rs +++ b/core/src/syn/token/mod.rs @@ -232,6 +232,8 @@ pub enum NumberKind { DecimalExponent, // A number with a float postfix. Float, + // A number with a float postfix that had a mantissa. + FloatMantissa, // A number with a `.3` part. Mantissa, // A number with a `.3e10` part.