Flexible record id's (#3937)
This commit is contained in:
parent
22aec455b5
commit
a791f742b4
8 changed files with 348 additions and 197 deletions
|
@ -25,12 +25,21 @@ impl<'a> Lexer<'a> {
|
||||||
///
|
///
|
||||||
/// Expect the lexer to have already eaten the digits starting the duration.
|
/// Expect the lexer to have already eaten the digits starting the duration.
|
||||||
pub fn lex_duration(&mut self) -> Token {
|
pub fn lex_duration(&mut self) -> Token {
|
||||||
|
let backup = self.reader.offset();
|
||||||
match self.lex_duration_err() {
|
match self.lex_duration_err() {
|
||||||
Ok(x) => {
|
Ok(x) => {
|
||||||
|
self.scratch.clear();
|
||||||
self.duration = Some(x);
|
self.duration = Some(x);
|
||||||
self.finish_token(TokenKind::Duration)
|
self.finish_token(TokenKind::Duration)
|
||||||
}
|
}
|
||||||
Err(e) => self.invalid_token(LexError::Duration(e)),
|
Err(e) => {
|
||||||
|
if self.flexible_ident {
|
||||||
|
self.reader.backup(backup);
|
||||||
|
return self.lex_ident();
|
||||||
|
}
|
||||||
|
self.scratch.clear();
|
||||||
|
self.invalid_token(LexError::Duration(e))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,7 +72,6 @@ impl<'a> Lexer<'a> {
|
||||||
current_value = current_value.checked_mul(10).ok_or(Error::Overflow)?;
|
current_value = current_value.checked_mul(10).ok_or(Error::Overflow)?;
|
||||||
current_value = current_value.checked_add((b - b'0') as u64).ok_or(Error::Overflow)?;
|
current_value = current_value.checked_add((b - b'0') as u64).ok_or(Error::Overflow)?;
|
||||||
}
|
}
|
||||||
self.scratch.clear();
|
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let Some(next) = self.reader.peek() else {
|
let Some(next) = self.reader.peek() else {
|
||||||
|
|
|
@ -84,6 +84,10 @@ pub struct Lexer<'a> {
|
||||||
/// like for example strings with escape characters.
|
/// like for example strings with escape characters.
|
||||||
scratch: String,
|
scratch: String,
|
||||||
|
|
||||||
|
/// Allow the next parsed idents to be flexible, i.e. support idents which don't start with a
|
||||||
|
/// number.
|
||||||
|
pub flexible_ident: bool,
|
||||||
|
|
||||||
// below are a collection of storage for values produced by tokens.
|
// below are a collection of storage for values produced by tokens.
|
||||||
// For performance reasons we wan't to keep the tokens as small as possible.
|
// For performance reasons we wan't to keep the tokens as small as possible.
|
||||||
// As only some tokens have an additional value associated with them we don't store that value
|
// As only some tokens have an additional value associated with them we don't store that value
|
||||||
|
@ -117,6 +121,7 @@ impl<'a> Lexer<'a> {
|
||||||
last_offset: 0,
|
last_offset: 0,
|
||||||
whitespace_span: None,
|
whitespace_span: None,
|
||||||
scratch: String::new(),
|
scratch: String::new(),
|
||||||
|
flexible_ident: false,
|
||||||
string: None,
|
string: None,
|
||||||
datetime: None,
|
datetime: None,
|
||||||
duration: None,
|
duration: None,
|
||||||
|
@ -132,6 +137,7 @@ impl<'a> Lexer<'a> {
|
||||||
pub fn reset(&mut self) {
|
pub fn reset(&mut self) {
|
||||||
self.last_offset = 0;
|
self.last_offset = 0;
|
||||||
self.scratch.clear();
|
self.scratch.clear();
|
||||||
|
self.flexible_ident = false;
|
||||||
self.whitespace_span = None;
|
self.whitespace_span = None;
|
||||||
self.string = None;
|
self.string = None;
|
||||||
self.datetime = None;
|
self.datetime = None;
|
||||||
|
@ -155,6 +161,7 @@ impl<'a> Lexer<'a> {
|
||||||
last_offset: 0,
|
last_offset: 0,
|
||||||
whitespace_span: None,
|
whitespace_span: None,
|
||||||
scratch: self.scratch,
|
scratch: self.scratch,
|
||||||
|
flexible_ident: false,
|
||||||
string: self.string,
|
string: self.string,
|
||||||
datetime: self.datetime,
|
datetime: self.datetime,
|
||||||
duration: self.duration,
|
duration: self.duration,
|
||||||
|
|
|
@ -15,23 +15,22 @@ pub enum Error {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Lexer<'_> {
|
impl Lexer<'_> {
|
||||||
|
pub fn finish_number_token(&mut self, kind: NumberKind) -> Token {
|
||||||
|
let mut str = mem::take(&mut self.scratch);
|
||||||
|
str.retain(|x| x != '_');
|
||||||
|
self.string = Some(str);
|
||||||
|
self.finish_token(TokenKind::Number(kind))
|
||||||
|
}
|
||||||
/// Lex only an integer.
|
/// Lex only an integer.
|
||||||
/// Use when a number can be followed immediatly by a `.` like in a model version.
|
/// Use when a number can be followed immediatly by a `.` like in a model version.
|
||||||
pub fn lex_only_integer(&mut self) -> Token {
|
pub fn lex_only_integer(&mut self) -> Token {
|
||||||
match self.lex_only_integer_err() {
|
|
||||||
Ok(x) => x,
|
|
||||||
Err(e) => self.invalid_token(LexError::Number(e)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn lex_only_integer_err(&mut self) -> Result<Token, Error> {
|
|
||||||
let Some(next) = self.reader.peek() else {
|
let Some(next) = self.reader.peek() else {
|
||||||
return Ok(self.eof_token());
|
return self.eof_token();
|
||||||
};
|
};
|
||||||
|
|
||||||
// not a number, return a different token kind, for error reporting.
|
// not a number, return a different token kind, for error reporting.
|
||||||
if !next.is_ascii_digit() {
|
if !next.is_ascii_digit() {
|
||||||
return Ok(self.next_token());
|
return self.next_token();
|
||||||
}
|
}
|
||||||
|
|
||||||
self.scratch.push(next as char);
|
self.scratch.push(next as char);
|
||||||
|
@ -39,9 +38,7 @@ impl Lexer<'_> {
|
||||||
|
|
||||||
// eat all the ascii digits
|
// eat all the ascii digits
|
||||||
while let Some(x) = self.reader.peek() {
|
while let Some(x) = self.reader.peek() {
|
||||||
if x == b'_' {
|
if !x.is_ascii_digit() && x != b'_' {
|
||||||
self.reader.next();
|
|
||||||
} else if !x.is_ascii_digit() {
|
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
self.scratch.push(x as char);
|
self.scratch.push(x as char);
|
||||||
|
@ -53,33 +50,25 @@ impl Lexer<'_> {
|
||||||
match self.reader.peek() {
|
match self.reader.peek() {
|
||||||
Some(b'd' | b'f') => {
|
Some(b'd' | b'f') => {
|
||||||
// not an integer but parse anyway for error reporting.
|
// not an integer but parse anyway for error reporting.
|
||||||
return self.lex_suffix(false, true);
|
return self.lex_suffix(false, false);
|
||||||
}
|
}
|
||||||
Some(x) if x.is_ascii_alphabetic() => return Err(self.invalid_suffix()),
|
Some(x) if x.is_ascii_alphabetic() => return self.invalid_suffix_token(),
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.string = Some(mem::take(&mut self.scratch));
|
self.finish_number_token(NumberKind::Integer)
|
||||||
Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn lex_number(&mut self, start: u8) -> Token {
|
|
||||||
match self.lex_number_err(start) {
|
|
||||||
Ok(x) => x,
|
|
||||||
Err(e) => self.invalid_token(LexError::Number(e)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/// Lex a number.
|
/// Lex a number.
|
||||||
///
|
///
|
||||||
/// Expects the digit which started the number as the start argument.
|
/// Expects the digit which started the number as the start argument.
|
||||||
pub fn lex_number_err(&mut self, start: u8) -> Result<Token, Error> {
|
pub fn lex_number(&mut self, start: u8) -> Token {
|
||||||
debug_assert!(start.is_ascii_digit());
|
debug_assert!(start.is_ascii_digit());
|
||||||
debug_assert_eq!(self.scratch, "");
|
debug_assert_eq!(self.scratch, "");
|
||||||
self.scratch.push(start as char);
|
self.scratch.push(start as char);
|
||||||
loop {
|
loop {
|
||||||
let Some(x) = self.reader.peek() else {
|
let Some(x) = self.reader.peek() else {
|
||||||
self.string = Some(mem::take(&mut self.scratch));
|
return self.finish_number_token(NumberKind::Integer);
|
||||||
return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)));
|
|
||||||
};
|
};
|
||||||
match x {
|
match x {
|
||||||
b'0'..=b'9' => {
|
b'0'..=b'9' => {
|
||||||
|
@ -87,10 +76,10 @@ impl Lexer<'_> {
|
||||||
self.reader.next();
|
self.reader.next();
|
||||||
self.scratch.push(x as char);
|
self.scratch.push(x as char);
|
||||||
}
|
}
|
||||||
b'e' | b'E' => {
|
x @ (b'e' | b'E') => {
|
||||||
// scientific notation
|
// scientific notation
|
||||||
self.reader.next();
|
self.reader.next();
|
||||||
self.scratch.push('e');
|
self.scratch.push(x as char);
|
||||||
return self.lex_exponent(false);
|
return self.lex_exponent(false);
|
||||||
}
|
}
|
||||||
b'.' => {
|
b'.' => {
|
||||||
|
@ -104,33 +93,33 @@ impl Lexer<'_> {
|
||||||
} else {
|
} else {
|
||||||
// indexing a number
|
// indexing a number
|
||||||
self.reader.backup(backup);
|
self.reader.backup(backup);
|
||||||
self.string = Some(mem::take(&mut self.scratch));
|
return self.finish_number_token(NumberKind::Integer);
|
||||||
return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
b'f' | b'd' => return self.lex_suffix(false, true),
|
b'f' | b'd' => return self.lex_suffix(false, false),
|
||||||
// Oxc2 is the start byte of 'µ'
|
// Oxc2 is the start byte of 'µ'
|
||||||
0xc2 | b'n' | b'u' | b'm' | b'h' | b'w' | b'y' | b's' => {
|
0xc2 | b'n' | b'u' | b'm' | b'h' | b'w' | b'y' | b's' => {
|
||||||
// duration suffix, switch to lexing duration.
|
// duration suffix, switch to lexing duration.
|
||||||
return Ok(self.lex_duration());
|
return self.lex_duration();
|
||||||
}
|
}
|
||||||
b'_' => {
|
b'_' => {
|
||||||
self.reader.next();
|
self.reader.next();
|
||||||
}
|
}
|
||||||
b'a'..=b'z' | b'A'..=b'Z' => {
|
b'a'..=b'z' | b'A'..=b'Z' => {
|
||||||
return Err(self.invalid_suffix());
|
if self.flexible_ident {
|
||||||
// invalid token, unexpected identifier character immediatly after number.
|
return self.lex_ident();
|
||||||
// Eat all remaining identifier like characters.
|
} else {
|
||||||
|
return self.invalid_suffix_token();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
self.string = Some(mem::take(&mut self.scratch));
|
return self.finish_number_token(NumberKind::Integer);
|
||||||
return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn invalid_suffix(&mut self) -> Error {
|
fn invalid_suffix_token(&mut self) -> Token {
|
||||||
// eat the whole suffix.
|
// eat the whole suffix.
|
||||||
while let Some(x) = self.reader.peek() {
|
while let Some(x) = self.reader.peek() {
|
||||||
if !x.is_ascii_alphanumeric() {
|
if !x.is_ascii_alphanumeric() {
|
||||||
|
@ -139,20 +128,29 @@ impl Lexer<'_> {
|
||||||
self.reader.next();
|
self.reader.next();
|
||||||
}
|
}
|
||||||
self.scratch.clear();
|
self.scratch.clear();
|
||||||
Error::InvalidSuffix
|
self.invalid_token(LexError::Number(Error::InvalidSuffix))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lex a number suffix, either 'f' or 'dec'.
|
/// Lex a number suffix, either 'f' or 'dec'.
|
||||||
fn lex_suffix(&mut self, had_exponent: bool, can_be_duration: bool) -> Result<Token, Error> {
|
fn lex_suffix(&mut self, had_mantissa: bool, had_exponent: bool) -> Token {
|
||||||
match self.reader.peek() {
|
match self.reader.peek() {
|
||||||
Some(b'f') => {
|
Some(b'f') => {
|
||||||
// float suffix
|
// float suffix
|
||||||
self.reader.next();
|
self.reader.next();
|
||||||
if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) {
|
if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) {
|
||||||
Err(self.invalid_suffix())
|
if self.flexible_ident && !had_mantissa {
|
||||||
|
self.scratch.push('f');
|
||||||
|
self.lex_ident()
|
||||||
} else {
|
} else {
|
||||||
self.string = Some(mem::take(&mut self.scratch));
|
self.invalid_suffix_token()
|
||||||
Ok(self.finish_token(TokenKind::Number(NumberKind::Float)))
|
}
|
||||||
|
} else {
|
||||||
|
let kind = if had_mantissa {
|
||||||
|
NumberKind::FloatMantissa
|
||||||
|
} else {
|
||||||
|
NumberKind::Float
|
||||||
|
};
|
||||||
|
self.finish_number_token(kind)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(b'd') => {
|
Some(b'd') => {
|
||||||
|
@ -160,44 +158,53 @@ impl Lexer<'_> {
|
||||||
self.reader.next();
|
self.reader.next();
|
||||||
let checkpoint = self.reader.offset();
|
let checkpoint = self.reader.offset();
|
||||||
if !self.eat(b'e') {
|
if !self.eat(b'e') {
|
||||||
if can_be_duration {
|
if !had_mantissa && !had_exponent {
|
||||||
self.reader.backup(checkpoint - 1);
|
self.reader.backup(checkpoint - 1);
|
||||||
return Ok(self.lex_duration());
|
return self.lex_duration();
|
||||||
|
} else if !had_mantissa && self.flexible_ident {
|
||||||
|
self.scratch.push('d');
|
||||||
|
return self.lex_ident();
|
||||||
} else {
|
} else {
|
||||||
return Err(self.invalid_suffix());
|
return self.invalid_suffix_token();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !self.eat(b'c') {
|
if !self.eat(b'c') {
|
||||||
return Err(self.invalid_suffix());
|
if self.flexible_ident {
|
||||||
|
self.scratch.push('d');
|
||||||
|
self.scratch.push('e');
|
||||||
|
return self.lex_ident();
|
||||||
|
} else {
|
||||||
|
return self.invalid_suffix_token();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) {
|
if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) {
|
||||||
Err(self.invalid_suffix())
|
self.invalid_suffix_token()
|
||||||
} else {
|
} else {
|
||||||
self.string = Some(mem::take(&mut self.scratch));
|
let kind = if had_exponent {
|
||||||
if had_exponent {
|
NumberKind::DecimalExponent
|
||||||
Ok(self.finish_token(TokenKind::Number(NumberKind::DecimalExponent)))
|
|
||||||
} else {
|
} else {
|
||||||
Ok(self.finish_token(TokenKind::Number(NumberKind::Decimal)))
|
NumberKind::Decimal
|
||||||
}
|
};
|
||||||
|
self.finish_number_token(kind)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Caller should ensure this is unreachable
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lexes the mantissa of a number, i.e. `.8` in `1.8`
|
/// Lexes the mantissa of a number, i.e. `.8` in `1.8`
|
||||||
pub fn lex_mantissa(&mut self) -> Result<Token, Error> {
|
pub fn lex_mantissa(&mut self) -> Token {
|
||||||
loop {
|
loop {
|
||||||
// lex_number already checks if there exists a digit after the dot.
|
// lex_number already checks if there exists a digit after the dot.
|
||||||
// So this will never fail the first iteration of the loop.
|
// So this will never fail the first iteration of the loop.
|
||||||
let Some(x) = self.reader.peek() else {
|
let Some(x) = self.reader.peek() else {
|
||||||
self.string = Some(mem::take(&mut self.scratch));
|
return self.finish_number_token(NumberKind::Mantissa);
|
||||||
return Ok(self.finish_token(TokenKind::Number(NumberKind::Mantissa)));
|
|
||||||
};
|
};
|
||||||
match x {
|
match x {
|
||||||
b'0'..=b'9' => {
|
b'0'..=b'9' | b'_' => {
|
||||||
// next digit.
|
// next digit.
|
||||||
self.reader.next();
|
self.reader.next();
|
||||||
self.scratch.push(x as char);
|
self.scratch.push(x as char);
|
||||||
|
@ -208,25 +215,21 @@ impl Lexer<'_> {
|
||||||
self.scratch.push('e');
|
self.scratch.push('e');
|
||||||
return self.lex_exponent(true);
|
return self.lex_exponent(true);
|
||||||
}
|
}
|
||||||
b'_' => {
|
b'f' | b'd' => return self.lex_suffix(true, false),
|
||||||
self.reader.next();
|
|
||||||
}
|
|
||||||
b'f' | b'd' => return self.lex_suffix(false, false),
|
|
||||||
b'a'..=b'z' | b'A'..=b'Z' => {
|
b'a'..=b'z' | b'A'..=b'Z' => {
|
||||||
// invalid token, random identifier characters immediately after number.
|
// invalid token, random identifier characters immediately after number.
|
||||||
self.scratch.clear();
|
self.scratch.clear();
|
||||||
return Err(Error::InvalidSuffix);
|
return self.invalid_suffix_token();
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
self.string = Some(mem::take(&mut self.scratch));
|
return self.finish_number_token(NumberKind::Mantissa);
|
||||||
return Ok(self.finish_token(TokenKind::Number(NumberKind::Mantissa)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lexes the exponent of a number, i.e. `e10` in `1.1e10`;
|
/// Lexes the exponent of a number, i.e. `e10` in `1.1e10`;
|
||||||
fn lex_exponent(&mut self, had_mantissa: bool) -> Result<Token, Error> {
|
fn lex_exponent(&mut self, had_mantissa: bool) -> Token {
|
||||||
loop {
|
loop {
|
||||||
match self.reader.peek() {
|
match self.reader.peek() {
|
||||||
Some(x @ b'-' | x @ b'+') => {
|
Some(x @ b'-' | x @ b'+') => {
|
||||||
|
@ -238,30 +241,29 @@ impl Lexer<'_> {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
|
if self.flexible_ident && !had_mantissa {
|
||||||
|
return self.lex_ident();
|
||||||
|
}
|
||||||
// random other character, expected atleast one digit.
|
// random other character, expected atleast one digit.
|
||||||
return Err(Error::DigitExpectedExponent);
|
return self.invalid_token(LexError::Number(Error::DigitExpectedExponent));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.reader.next();
|
self.reader.next();
|
||||||
loop {
|
loop {
|
||||||
match self.reader.peek() {
|
match self.reader.peek() {
|
||||||
Some(x @ b'0'..=b'9') => {
|
Some(x @ (b'0'..=b'9' | b'_')) => {
|
||||||
self.reader.next();
|
self.reader.next();
|
||||||
self.scratch.push(x as char);
|
self.scratch.push(x as char);
|
||||||
}
|
}
|
||||||
Some(b'_') => {
|
Some(b'f' | b'd') => return self.lex_suffix(had_mantissa, true),
|
||||||
self.reader.next();
|
|
||||||
}
|
|
||||||
Some(b'f' | b'd') => return self.lex_suffix(true, false),
|
|
||||||
_ => {
|
_ => {
|
||||||
let kind = if had_mantissa {
|
let kind = if had_mantissa {
|
||||||
NumberKind::MantissaExponent
|
NumberKind::MantissaExponent
|
||||||
} else {
|
} else {
|
||||||
NumberKind::Exponent
|
NumberKind::Exponent
|
||||||
};
|
};
|
||||||
self.string = Some(mem::take(&mut self.scratch));
|
return self.finish_number_token(kind);
|
||||||
return Ok(self.finish_token(TokenKind::Number(kind)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -127,6 +127,7 @@ impl TokenValue for f32 {
|
||||||
TokenKind::Number(
|
TokenKind::Number(
|
||||||
NumberKind::Integer
|
NumberKind::Integer
|
||||||
| NumberKind::Float
|
| NumberKind::Float
|
||||||
|
| NumberKind::FloatMantissa
|
||||||
| NumberKind::Mantissa
|
| NumberKind::Mantissa
|
||||||
| NumberKind::MantissaExponent,
|
| NumberKind::MantissaExponent,
|
||||||
) => {
|
) => {
|
||||||
|
@ -152,6 +153,7 @@ impl TokenValue for f64 {
|
||||||
TokenKind::Number(
|
TokenKind::Number(
|
||||||
NumberKind::Integer
|
NumberKind::Integer
|
||||||
| NumberKind::Float
|
| NumberKind::Float
|
||||||
|
| NumberKind::FloatMantissa
|
||||||
| NumberKind::Mantissa
|
| NumberKind::Mantissa
|
||||||
| NumberKind::MantissaExponent,
|
| NumberKind::MantissaExponent,
|
||||||
) => {
|
) => {
|
||||||
|
@ -203,7 +205,10 @@ impl TokenValue for Number {
|
||||||
Ok(Number::Float(x))
|
Ok(Number::Float(x))
|
||||||
}
|
}
|
||||||
TokenKind::Number(
|
TokenKind::Number(
|
||||||
NumberKind::Mantissa | NumberKind::MantissaExponent | NumberKind::Float,
|
NumberKind::Mantissa
|
||||||
|
| NumberKind::MantissaExponent
|
||||||
|
| NumberKind::Float
|
||||||
|
| NumberKind::FloatMantissa,
|
||||||
) => {
|
) => {
|
||||||
let source = parser.lexer.string.take().unwrap();
|
let source = parser.lexer.string.take().unwrap();
|
||||||
// As far as I can tell this will never fail for valid integers.
|
// As far as I can tell this will never fail for valid integers.
|
||||||
|
|
|
@ -149,6 +149,39 @@ macro_rules! enter_query_recursion {
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! enter_flexible_ident{
|
||||||
|
($name:ident = $this:expr => ($enabled:expr){ $($t:tt)* }) => {{
|
||||||
|
struct Dropper<'a, 'b>(&'a mut $crate::syn::parser::Parser<'b>,bool);
|
||||||
|
impl Drop for Dropper<'_, '_> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
self.0.lexer.flexible_ident = self.1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<'a> ::std::ops::Deref for Dropper<'_,'a>{
|
||||||
|
type Target = $crate::syn::parser::Parser<'a>;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target{
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ::std::ops::DerefMut for Dropper<'_,'a>{
|
||||||
|
fn deref_mut(&mut self) -> &mut Self::Target{
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let enabled = $this.lexer.flexible_ident;
|
||||||
|
$this.lexer.flexible_ident = $enabled;
|
||||||
|
#[allow(unused_mut)]
|
||||||
|
let mut $name = Dropper($this,enabled);
|
||||||
|
{
|
||||||
|
$($t)*
|
||||||
|
}
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
pub(super) use expected;
|
pub(super) use expected;
|
||||||
pub(super) use unexpected;
|
pub(super) use unexpected;
|
||||||
|
|
||||||
|
|
|
@ -79,6 +79,7 @@ pub struct Parser<'a> {
|
||||||
token_buffer: TokenBuffer<4>,
|
token_buffer: TokenBuffer<4>,
|
||||||
table_as_field: bool,
|
table_as_field: bool,
|
||||||
legacy_strands: bool,
|
legacy_strands: bool,
|
||||||
|
flexible_record_id: bool,
|
||||||
object_recursion: usize,
|
object_recursion: usize,
|
||||||
query_recursion: usize,
|
query_recursion: usize,
|
||||||
}
|
}
|
||||||
|
@ -92,6 +93,7 @@ impl<'a> Parser<'a> {
|
||||||
token_buffer: TokenBuffer::new(),
|
token_buffer: TokenBuffer::new(),
|
||||||
table_as_field: false,
|
table_as_field: false,
|
||||||
legacy_strands: false,
|
legacy_strands: false,
|
||||||
|
flexible_record_id: true,
|
||||||
object_recursion: 100,
|
object_recursion: 100,
|
||||||
query_recursion: 20,
|
query_recursion: 20,
|
||||||
}
|
}
|
||||||
|
@ -117,6 +119,11 @@ impl<'a> Parser<'a> {
|
||||||
self.legacy_strands = value;
|
self.legacy_strands = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set whether to allow record-id's which don't adheare to regular ident rules.
|
||||||
|
pub fn allow_fexible_record_id(&mut self, value: bool) {
|
||||||
|
self.flexible_record_id = value;
|
||||||
|
}
|
||||||
|
|
||||||
/// Reset the parser state. Doesnt change the position of the parser in buffer.
|
/// Reset the parser state. Doesnt change the position of the parser in buffer.
|
||||||
pub fn reset(&mut self) {
|
pub fn reset(&mut self) {
|
||||||
self.last_span = Span::empty();
|
self.last_span = Span::empty();
|
||||||
|
@ -132,6 +139,7 @@ impl<'a> Parser<'a> {
|
||||||
last_span: Span::empty(),
|
last_span: Span::empty(),
|
||||||
token_buffer: TokenBuffer::new(),
|
token_buffer: TokenBuffer::new(),
|
||||||
legacy_strands: self.legacy_strands,
|
legacy_strands: self.legacy_strands,
|
||||||
|
flexible_record_id: self.flexible_record_id,
|
||||||
table_as_field: false,
|
table_as_field: false,
|
||||||
object_recursion: self.object_recursion,
|
object_recursion: self.object_recursion,
|
||||||
query_recursion: self.query_recursion,
|
query_recursion: self.query_recursion,
|
||||||
|
|
|
@ -2,6 +2,7 @@ use reblessive::Stk;
|
||||||
|
|
||||||
use super::{ParseResult, Parser};
|
use super::{ParseResult, Parser};
|
||||||
use crate::{
|
use crate::{
|
||||||
|
enter_flexible_ident,
|
||||||
sql::{id::Gen, Id, Ident, Range, Thing, Value},
|
sql::{id::Gen, Id, Ident, Range, Thing, Value},
|
||||||
syn::{
|
syn::{
|
||||||
parser::{
|
parser::{
|
||||||
|
@ -37,6 +38,14 @@ impl Parser<'_> {
|
||||||
Ok(thing)
|
Ok(thing)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn peek_can_start_id(&mut self) -> bool {
|
||||||
|
self.peek_can_be_ident()
|
||||||
|
|| matches!(
|
||||||
|
self.peek_kind(),
|
||||||
|
TokenKind::Number(_) | t!("{") | t!("[") | TokenKind::Duration
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn parse_thing_or_range(
|
pub async fn parse_thing_or_range(
|
||||||
&mut self,
|
&mut self,
|
||||||
stk: &mut Stk,
|
stk: &mut Stk,
|
||||||
|
@ -44,19 +53,19 @@ impl Parser<'_> {
|
||||||
) -> ParseResult<Value> {
|
) -> ParseResult<Value> {
|
||||||
expected!(self, t!(":"));
|
expected!(self, t!(":"));
|
||||||
|
|
||||||
self.peek();
|
enter_flexible_ident!(this = self =>(self.flexible_record_id){
|
||||||
self.no_whitespace()?;
|
|
||||||
|
|
||||||
if self.eat(t!("..")) {
|
this.peek();
|
||||||
let end = if self.eat(t!("=")) {
|
this.no_whitespace()?;
|
||||||
self.no_whitespace()?;
|
|
||||||
let id = stk.run(|stk| self.parse_id(stk)).await?;
|
if this.eat(t!("..")) {
|
||||||
|
let end = if this.eat(t!("=")) {
|
||||||
|
this.no_whitespace()?;
|
||||||
|
let id = stk.run(|stk| this.parse_id(stk)).await?;
|
||||||
Bound::Included(id)
|
Bound::Included(id)
|
||||||
} else if self.peek_can_be_ident()
|
} else if this.peek_can_start_id() {
|
||||||
|| matches!(self.peek_kind(), TokenKind::Number(_) | t!("{") | t!("["))
|
this.no_whitespace()?;
|
||||||
{
|
let id = stk.run(|stk| this.parse_id(stk)).await?;
|
||||||
self.no_whitespace()?;
|
|
||||||
let id = stk.run(|stk| self.parse_id(stk)).await?;
|
|
||||||
Bound::Excluded(id)
|
Bound::Excluded(id)
|
||||||
} else {
|
} else {
|
||||||
Bound::Unbounded
|
Bound::Unbounded
|
||||||
|
@ -68,13 +77,11 @@ impl Parser<'_> {
|
||||||
})));
|
})));
|
||||||
}
|
}
|
||||||
|
|
||||||
let beg = if self.peek_can_be_ident()
|
let beg = if this.peek_can_start_id(){
|
||||||
|| matches!(self.peek_kind(), TokenKind::Number(_) | t!("{") | t!("["))
|
let id = stk.run(|ctx| this.parse_id(ctx)).await?;
|
||||||
{
|
|
||||||
let id = stk.run(|ctx| self.parse_id(ctx)).await?;
|
|
||||||
|
|
||||||
if self.eat(t!(">")) {
|
if this.eat(t!(">")) {
|
||||||
self.no_whitespace()?;
|
this.no_whitespace()?;
|
||||||
Bound::Excluded(id)
|
Bound::Excluded(id)
|
||||||
} else {
|
} else {
|
||||||
Bound::Included(id)
|
Bound::Included(id)
|
||||||
|
@ -83,16 +90,14 @@ impl Parser<'_> {
|
||||||
Bound::Unbounded
|
Bound::Unbounded
|
||||||
};
|
};
|
||||||
|
|
||||||
if self.eat(t!("..")) {
|
if this.eat(t!("..")) {
|
||||||
let end = if self.eat(t!("=")) {
|
let end = if this.eat(t!("=")) {
|
||||||
self.no_whitespace()?;
|
this.no_whitespace()?;
|
||||||
let id = stk.run(|ctx| self.parse_id(ctx)).await?;
|
let id = stk.run(|ctx| this.parse_id(ctx)).await?;
|
||||||
Bound::Included(id)
|
Bound::Included(id)
|
||||||
} else if self.peek_can_be_ident()
|
} else if this.peek_can_start_id(){
|
||||||
|| matches!(self.peek_kind(), TokenKind::Number(_) | t!("{") | t!("["))
|
this.no_whitespace()?;
|
||||||
{
|
let id = stk.run(|ctx| this.parse_id(ctx)).await?;
|
||||||
self.no_whitespace()?;
|
|
||||||
let id = stk.run(|ctx| self.parse_id(ctx)).await?;
|
|
||||||
Bound::Excluded(id)
|
Bound::Excluded(id)
|
||||||
} else {
|
} else {
|
||||||
Bound::Unbounded
|
Bound::Unbounded
|
||||||
|
@ -105,23 +110,23 @@ impl Parser<'_> {
|
||||||
} else {
|
} else {
|
||||||
let id = match beg {
|
let id = match beg {
|
||||||
Bound::Unbounded => {
|
Bound::Unbounded => {
|
||||||
if self.peek_kind() == t!("$param") {
|
if this.peek_kind() == t!("$param") {
|
||||||
return Err(ParseError::new(
|
return Err(ParseError::new(
|
||||||
ParseErrorKind::UnexpectedExplain {
|
ParseErrorKind::UnexpectedExplain {
|
||||||
found: t!("$param"),
|
found: t!("$param"),
|
||||||
expected: "a record-id id",
|
expected: "a record-id id",
|
||||||
explain: "you can create a record-id from a param with the function 'type::thing'",
|
explain: "you can create a record-id from a param with the function 'type::thing'",
|
||||||
},
|
},
|
||||||
self.recent_span(),
|
this.recent_span(),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
// we haven't matched anythong so far so we still want any type of id.
|
// we haven't matched anythong so far so we still want any type of id.
|
||||||
unexpected!(self, self.peek_kind(), "a record-id id")
|
unexpected!(this, this.peek_kind(), "a record-id id")
|
||||||
}
|
}
|
||||||
Bound::Excluded(_) => {
|
Bound::Excluded(_) => {
|
||||||
// we have matched a bounded id but we don't see an range operator.
|
// we have matched a bounded id but we don't see an range operator.
|
||||||
unexpected!(self, self.peek_kind(), "the range operator `..`")
|
unexpected!(this, this.peek_kind(), "the range operator `..`")
|
||||||
}
|
}
|
||||||
Bound::Included(id) => id,
|
Bound::Included(id) => id,
|
||||||
};
|
};
|
||||||
|
@ -130,6 +135,7 @@ impl Parser<'_> {
|
||||||
id,
|
id,
|
||||||
}))
|
}))
|
||||||
}
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn parse_range(&mut self, ctx: &mut Stk) -> ParseResult<Range> {
|
pub async fn parse_range(&mut self, ctx: &mut Stk) -> ParseResult<Range> {
|
||||||
|
@ -137,19 +143,20 @@ impl Parser<'_> {
|
||||||
|
|
||||||
expected!(self, t!(":"));
|
expected!(self, t!(":"));
|
||||||
|
|
||||||
self.peek();
|
enter_flexible_ident!(this = self =>(self.flexible_record_id){
|
||||||
self.no_whitespace()?;
|
this.peek();
|
||||||
|
this.no_whitespace()?;
|
||||||
|
|
||||||
let beg = if self.peek_can_be_ident() {
|
let beg = if this.peek_can_be_ident() {
|
||||||
self.peek();
|
this.peek();
|
||||||
self.no_whitespace()?;
|
this.no_whitespace()?;
|
||||||
|
|
||||||
let id = ctx.run(|ctx| self.parse_id(ctx)).await?;
|
let id = ctx.run(|ctx| this.parse_id(ctx)).await?;
|
||||||
|
|
||||||
self.peek();
|
this.peek();
|
||||||
self.no_whitespace()?;
|
this.no_whitespace()?;
|
||||||
|
|
||||||
if self.eat(t!(">")) {
|
if this.eat(t!(">")) {
|
||||||
Bound::Excluded(id)
|
Bound::Excluded(id)
|
||||||
} else {
|
} else {
|
||||||
Bound::Included(id)
|
Bound::Included(id)
|
||||||
|
@ -158,21 +165,21 @@ impl Parser<'_> {
|
||||||
Bound::Unbounded
|
Bound::Unbounded
|
||||||
};
|
};
|
||||||
|
|
||||||
self.peek();
|
this.peek();
|
||||||
self.no_whitespace()?;
|
this.no_whitespace()?;
|
||||||
|
|
||||||
expected!(self, t!(".."));
|
expected!(this, t!(".."));
|
||||||
|
|
||||||
self.peek();
|
this.peek();
|
||||||
self.no_whitespace()?;
|
this.no_whitespace()?;
|
||||||
|
|
||||||
let inclusive = self.eat(t!("="));
|
let inclusive = this.eat(t!("="));
|
||||||
|
|
||||||
self.peek();
|
this.peek();
|
||||||
self.no_whitespace()?;
|
this.no_whitespace()?;
|
||||||
|
|
||||||
let end = if self.peek_can_be_ident() {
|
let end = if this.peek_can_be_ident() {
|
||||||
let id = ctx.run(|ctx| self.parse_id(ctx)).await?;
|
let id = ctx.run(|ctx| this.parse_id(ctx)).await?;
|
||||||
if inclusive {
|
if inclusive {
|
||||||
Bound::Included(id)
|
Bound::Included(id)
|
||||||
} else {
|
} else {
|
||||||
|
@ -187,11 +194,14 @@ impl Parser<'_> {
|
||||||
beg,
|
beg,
|
||||||
end,
|
end,
|
||||||
})
|
})
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn parse_thing(&mut self, ctx: &mut Stk) -> ParseResult<Thing> {
|
pub async fn parse_thing(&mut self, ctx: &mut Stk) -> ParseResult<Thing> {
|
||||||
let ident = self.next_token_value::<Ident>()?.0;
|
let ident = self.next_token_value::<Ident>()?.0;
|
||||||
self.parse_thing_from_ident(ctx, ident).await
|
enter_flexible_ident!(this = self =>(self.flexible_record_id){
|
||||||
|
this.parse_thing_from_ident(ctx, ident).await
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn parse_thing_from_ident(
|
pub async fn parse_thing_from_ident(
|
||||||
|
@ -201,10 +211,13 @@ impl Parser<'_> {
|
||||||
) -> ParseResult<Thing> {
|
) -> ParseResult<Thing> {
|
||||||
expected!(self, t!(":"));
|
expected!(self, t!(":"));
|
||||||
|
|
||||||
self.peek();
|
let id = enter_flexible_ident!(this = self =>(self.flexible_record_id){
|
||||||
self.no_whitespace()?;
|
this.peek();
|
||||||
|
this.no_whitespace()?;
|
||||||
|
|
||||||
|
ctx.run(|ctx| this.parse_id(ctx)).await
|
||||||
|
})?;
|
||||||
|
|
||||||
let id = ctx.run(|ctx| self.parse_id(ctx)).await?;
|
|
||||||
Ok(Thing {
|
Ok(Thing {
|
||||||
tb: ident,
|
tb: ident,
|
||||||
id,
|
id,
|
||||||
|
@ -215,11 +228,15 @@ impl Parser<'_> {
|
||||||
let token = self.next();
|
let token = self.next();
|
||||||
match token.kind {
|
match token.kind {
|
||||||
t!("{") => {
|
t!("{") => {
|
||||||
let object = self.parse_object(stk, token.span).await?;
|
let object = enter_flexible_ident!(this = self => (false){
|
||||||
|
this.parse_object(stk, token.span).await
|
||||||
|
})?;
|
||||||
Ok(Id::Object(object))
|
Ok(Id::Object(object))
|
||||||
}
|
}
|
||||||
t!("[") => {
|
t!("[") => {
|
||||||
let array = self.parse_array(stk, token.span).await?;
|
let array = enter_flexible_ident!(this = self => (false){
|
||||||
|
this.parse_array(stk, token.span).await
|
||||||
|
})?;
|
||||||
Ok(Id::Array(array))
|
Ok(Id::Array(array))
|
||||||
}
|
}
|
||||||
t!("+") => {
|
t!("+") => {
|
||||||
|
@ -260,6 +277,30 @@ impl Parser<'_> {
|
||||||
Ok(Id::String(text))
|
Ok(Id::String(text))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
TokenKind::Number(NumberKind::Decimal | NumberKind::DecimalExponent)
|
||||||
|
if self.flexible_record_id =>
|
||||||
|
{
|
||||||
|
let mut text = self.lexer.string.take().unwrap();
|
||||||
|
text.push('d');
|
||||||
|
text.push('e');
|
||||||
|
text.push('c');
|
||||||
|
Ok(Id::String(text))
|
||||||
|
}
|
||||||
|
TokenKind::Number(NumberKind::Float) if self.flexible_record_id => {
|
||||||
|
let mut text = self.lexer.string.take().unwrap();
|
||||||
|
text.push('f');
|
||||||
|
Ok(Id::String(text))
|
||||||
|
}
|
||||||
|
TokenKind::Duration if self.flexible_record_id => {
|
||||||
|
self.lexer.duration = None;
|
||||||
|
let slice = self.lexer.reader.span(token.span);
|
||||||
|
if slice.iter().any(|x| *x > 0b0111_1111) {
|
||||||
|
unexpected!(self, token.kind, "a identifier");
|
||||||
|
}
|
||||||
|
// Should be valid utf-8 as it was already parsed by the lexer
|
||||||
|
let text = String::from_utf8(slice.to_vec()).unwrap();
|
||||||
|
Ok(Id::String(text))
|
||||||
|
}
|
||||||
t!("ULID") => {
|
t!("ULID") => {
|
||||||
// TODO: error message about how to use `ulid` as an identifier.
|
// TODO: error message about how to use `ulid` as an identifier.
|
||||||
expected!(self, t!("("));
|
expected!(self, t!("("));
|
||||||
|
@ -480,4 +521,49 @@ mod tests {
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn weird_things() {
|
||||||
|
use crate::sql;
|
||||||
|
|
||||||
|
fn assert_ident_parses_correctly(ident: &str) {
|
||||||
|
let thing = format!("t:{}", ident);
|
||||||
|
let mut parser = Parser::new(thing.as_bytes());
|
||||||
|
parser.allow_fexible_record_id(true);
|
||||||
|
let mut stack = Stack::new();
|
||||||
|
let r = stack
|
||||||
|
.enter(|ctx| async move { parser.parse_thing(ctx).await })
|
||||||
|
.finish()
|
||||||
|
.expect(&format!("failed on {}", ident))
|
||||||
|
.id;
|
||||||
|
assert_eq!(r, Id::String(ident.to_string()),);
|
||||||
|
|
||||||
|
let mut parser = Parser::new(thing.as_bytes());
|
||||||
|
let r = stack
|
||||||
|
.enter(|ctx| async move { parser.parse_query(ctx).await })
|
||||||
|
.finish()
|
||||||
|
.expect(&format!("failed on {}", ident));
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
r,
|
||||||
|
sql::Query(sql::Statements(vec![sql::Statement::Value(sql::Value::Thing(
|
||||||
|
sql::Thing {
|
||||||
|
tb: "t".to_string(),
|
||||||
|
id: Id::String(ident.to_string())
|
||||||
|
}
|
||||||
|
))]))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
assert_ident_parses_correctly("123abc");
|
||||||
|
assert_ident_parses_correctly("123d");
|
||||||
|
assert_ident_parses_correctly("123de");
|
||||||
|
assert_ident_parses_correctly("123dec");
|
||||||
|
assert_ident_parses_correctly("1e23dec");
|
||||||
|
assert_ident_parses_correctly("1e23f");
|
||||||
|
assert_ident_parses_correctly("123f");
|
||||||
|
assert_ident_parses_correctly("1ns");
|
||||||
|
assert_ident_parses_correctly("1ns1");
|
||||||
|
assert_ident_parses_correctly("1ns1h");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -232,6 +232,8 @@ pub enum NumberKind {
|
||||||
DecimalExponent,
|
DecimalExponent,
|
||||||
// A number with a float postfix.
|
// A number with a float postfix.
|
||||||
Float,
|
Float,
|
||||||
|
// A number with a float postfix that had a mantissa.
|
||||||
|
FloatMantissa,
|
||||||
// A number with a `.3` part.
|
// A number with a `.3` part.
|
||||||
Mantissa,
|
Mantissa,
|
||||||
// A number with a `.3e10` part.
|
// A number with a `.3e10` part.
|
||||||
|
|
Loading…
Reference in a new issue