Flexible record id's (#3937)

This commit is contained in:
Mees Delzenne 2024-04-29 14:13:41 +02:00 committed by GitHub
parent 22aec455b5
commit a791f742b4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 348 additions and 197 deletions

View file

@ -25,12 +25,21 @@ impl<'a> Lexer<'a> {
/// ///
/// Expect the lexer to have already eaten the digits starting the duration. /// Expect the lexer to have already eaten the digits starting the duration.
pub fn lex_duration(&mut self) -> Token { pub fn lex_duration(&mut self) -> Token {
let backup = self.reader.offset();
match self.lex_duration_err() { match self.lex_duration_err() {
Ok(x) => { Ok(x) => {
self.scratch.clear();
self.duration = Some(x); self.duration = Some(x);
self.finish_token(TokenKind::Duration) self.finish_token(TokenKind::Duration)
} }
Err(e) => self.invalid_token(LexError::Duration(e)), Err(e) => {
if self.flexible_ident {
self.reader.backup(backup);
return self.lex_ident();
}
self.scratch.clear();
self.invalid_token(LexError::Duration(e))
}
} }
} }
@ -63,7 +72,6 @@ impl<'a> Lexer<'a> {
current_value = current_value.checked_mul(10).ok_or(Error::Overflow)?; current_value = current_value.checked_mul(10).ok_or(Error::Overflow)?;
current_value = current_value.checked_add((b - b'0') as u64).ok_or(Error::Overflow)?; current_value = current_value.checked_add((b - b'0') as u64).ok_or(Error::Overflow)?;
} }
self.scratch.clear();
loop { loop {
let Some(next) = self.reader.peek() else { let Some(next) = self.reader.peek() else {

View file

@ -84,6 +84,10 @@ pub struct Lexer<'a> {
/// like for example strings with escape characters. /// like for example strings with escape characters.
scratch: String, scratch: String,
/// Allow the next parsed idents to be flexible, i.e. support idents which don't start with a
/// number.
pub flexible_ident: bool,
// below are a collection of storage for values produced by tokens. // below are a collection of storage for values produced by tokens.
// For performance reasons we wan't to keep the tokens as small as possible. // For performance reasons we wan't to keep the tokens as small as possible.
// As only some tokens have an additional value associated with them we don't store that value // As only some tokens have an additional value associated with them we don't store that value
@ -117,6 +121,7 @@ impl<'a> Lexer<'a> {
last_offset: 0, last_offset: 0,
whitespace_span: None, whitespace_span: None,
scratch: String::new(), scratch: String::new(),
flexible_ident: false,
string: None, string: None,
datetime: None, datetime: None,
duration: None, duration: None,
@ -132,6 +137,7 @@ impl<'a> Lexer<'a> {
pub fn reset(&mut self) { pub fn reset(&mut self) {
self.last_offset = 0; self.last_offset = 0;
self.scratch.clear(); self.scratch.clear();
self.flexible_ident = false;
self.whitespace_span = None; self.whitespace_span = None;
self.string = None; self.string = None;
self.datetime = None; self.datetime = None;
@ -155,6 +161,7 @@ impl<'a> Lexer<'a> {
last_offset: 0, last_offset: 0,
whitespace_span: None, whitespace_span: None,
scratch: self.scratch, scratch: self.scratch,
flexible_ident: false,
string: self.string, string: self.string,
datetime: self.datetime, datetime: self.datetime,
duration: self.duration, duration: self.duration,

View file

@ -15,23 +15,22 @@ pub enum Error {
} }
impl Lexer<'_> { impl Lexer<'_> {
pub fn finish_number_token(&mut self, kind: NumberKind) -> Token {
let mut str = mem::take(&mut self.scratch);
str.retain(|x| x != '_');
self.string = Some(str);
self.finish_token(TokenKind::Number(kind))
}
/// Lex only an integer. /// Lex only an integer.
/// Use when a number can be followed immediatly by a `.` like in a model version. /// Use when a number can be followed immediatly by a `.` like in a model version.
pub fn lex_only_integer(&mut self) -> Token { pub fn lex_only_integer(&mut self) -> Token {
match self.lex_only_integer_err() {
Ok(x) => x,
Err(e) => self.invalid_token(LexError::Number(e)),
}
}
fn lex_only_integer_err(&mut self) -> Result<Token, Error> {
let Some(next) = self.reader.peek() else { let Some(next) = self.reader.peek() else {
return Ok(self.eof_token()); return self.eof_token();
}; };
// not a number, return a different token kind, for error reporting. // not a number, return a different token kind, for error reporting.
if !next.is_ascii_digit() { if !next.is_ascii_digit() {
return Ok(self.next_token()); return self.next_token();
} }
self.scratch.push(next as char); self.scratch.push(next as char);
@ -39,9 +38,7 @@ impl Lexer<'_> {
// eat all the ascii digits // eat all the ascii digits
while let Some(x) = self.reader.peek() { while let Some(x) = self.reader.peek() {
if x == b'_' { if !x.is_ascii_digit() && x != b'_' {
self.reader.next();
} else if !x.is_ascii_digit() {
break; break;
} else { } else {
self.scratch.push(x as char); self.scratch.push(x as char);
@ -53,33 +50,25 @@ impl Lexer<'_> {
match self.reader.peek() { match self.reader.peek() {
Some(b'd' | b'f') => { Some(b'd' | b'f') => {
// not an integer but parse anyway for error reporting. // not an integer but parse anyway for error reporting.
return self.lex_suffix(false, true); return self.lex_suffix(false, false);
} }
Some(x) if x.is_ascii_alphabetic() => return Err(self.invalid_suffix()), Some(x) if x.is_ascii_alphabetic() => return self.invalid_suffix_token(),
_ => {} _ => {}
} }
self.string = Some(mem::take(&mut self.scratch)); self.finish_number_token(NumberKind::Integer)
Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)))
} }
pub fn lex_number(&mut self, start: u8) -> Token {
match self.lex_number_err(start) {
Ok(x) => x,
Err(e) => self.invalid_token(LexError::Number(e)),
}
}
/// Lex a number. /// Lex a number.
/// ///
/// Expects the digit which started the number as the start argument. /// Expects the digit which started the number as the start argument.
pub fn lex_number_err(&mut self, start: u8) -> Result<Token, Error> { pub fn lex_number(&mut self, start: u8) -> Token {
debug_assert!(start.is_ascii_digit()); debug_assert!(start.is_ascii_digit());
debug_assert_eq!(self.scratch, ""); debug_assert_eq!(self.scratch, "");
self.scratch.push(start as char); self.scratch.push(start as char);
loop { loop {
let Some(x) = self.reader.peek() else { let Some(x) = self.reader.peek() else {
self.string = Some(mem::take(&mut self.scratch)); return self.finish_number_token(NumberKind::Integer);
return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)));
}; };
match x { match x {
b'0'..=b'9' => { b'0'..=b'9' => {
@ -87,10 +76,10 @@ impl Lexer<'_> {
self.reader.next(); self.reader.next();
self.scratch.push(x as char); self.scratch.push(x as char);
} }
b'e' | b'E' => { x @ (b'e' | b'E') => {
// scientific notation // scientific notation
self.reader.next(); self.reader.next();
self.scratch.push('e'); self.scratch.push(x as char);
return self.lex_exponent(false); return self.lex_exponent(false);
} }
b'.' => { b'.' => {
@ -104,33 +93,33 @@ impl Lexer<'_> {
} else { } else {
// indexing a number // indexing a number
self.reader.backup(backup); self.reader.backup(backup);
self.string = Some(mem::take(&mut self.scratch)); return self.finish_number_token(NumberKind::Integer);
return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)));
} }
} }
b'f' | b'd' => return self.lex_suffix(false, true), b'f' | b'd' => return self.lex_suffix(false, false),
// Oxc2 is the start byte of 'µ' // Oxc2 is the start byte of 'µ'
0xc2 | b'n' | b'u' | b'm' | b'h' | b'w' | b'y' | b's' => { 0xc2 | b'n' | b'u' | b'm' | b'h' | b'w' | b'y' | b's' => {
// duration suffix, switch to lexing duration. // duration suffix, switch to lexing duration.
return Ok(self.lex_duration()); return self.lex_duration();
} }
b'_' => { b'_' => {
self.reader.next(); self.reader.next();
} }
b'a'..=b'z' | b'A'..=b'Z' => { b'a'..=b'z' | b'A'..=b'Z' => {
return Err(self.invalid_suffix()); if self.flexible_ident {
// invalid token, unexpected identifier character immediatly after number. return self.lex_ident();
// Eat all remaining identifier like characters. } else {
return self.invalid_suffix_token();
}
} }
_ => { _ => {
self.string = Some(mem::take(&mut self.scratch)); return self.finish_number_token(NumberKind::Integer);
return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)));
} }
} }
} }
} }
fn invalid_suffix(&mut self) -> Error { fn invalid_suffix_token(&mut self) -> Token {
// eat the whole suffix. // eat the whole suffix.
while let Some(x) = self.reader.peek() { while let Some(x) = self.reader.peek() {
if !x.is_ascii_alphanumeric() { if !x.is_ascii_alphanumeric() {
@ -139,20 +128,29 @@ impl Lexer<'_> {
self.reader.next(); self.reader.next();
} }
self.scratch.clear(); self.scratch.clear();
Error::InvalidSuffix self.invalid_token(LexError::Number(Error::InvalidSuffix))
} }
/// Lex a number suffix, either 'f' or 'dec'. /// Lex a number suffix, either 'f' or 'dec'.
fn lex_suffix(&mut self, had_exponent: bool, can_be_duration: bool) -> Result<Token, Error> { fn lex_suffix(&mut self, had_mantissa: bool, had_exponent: bool) -> Token {
match self.reader.peek() { match self.reader.peek() {
Some(b'f') => { Some(b'f') => {
// float suffix // float suffix
self.reader.next(); self.reader.next();
if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) { if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) {
Err(self.invalid_suffix()) if self.flexible_ident && !had_mantissa {
self.scratch.push('f');
self.lex_ident()
} else { } else {
self.string = Some(mem::take(&mut self.scratch)); self.invalid_suffix_token()
Ok(self.finish_token(TokenKind::Number(NumberKind::Float))) }
} else {
let kind = if had_mantissa {
NumberKind::FloatMantissa
} else {
NumberKind::Float
};
self.finish_number_token(kind)
} }
} }
Some(b'd') => { Some(b'd') => {
@ -160,44 +158,53 @@ impl Lexer<'_> {
self.reader.next(); self.reader.next();
let checkpoint = self.reader.offset(); let checkpoint = self.reader.offset();
if !self.eat(b'e') { if !self.eat(b'e') {
if can_be_duration { if !had_mantissa && !had_exponent {
self.reader.backup(checkpoint - 1); self.reader.backup(checkpoint - 1);
return Ok(self.lex_duration()); return self.lex_duration();
} else if !had_mantissa && self.flexible_ident {
self.scratch.push('d');
return self.lex_ident();
} else { } else {
return Err(self.invalid_suffix()); return self.invalid_suffix_token();
} }
} }
if !self.eat(b'c') { if !self.eat(b'c') {
return Err(self.invalid_suffix()); if self.flexible_ident {
self.scratch.push('d');
self.scratch.push('e');
return self.lex_ident();
} else {
return self.invalid_suffix_token();
}
} }
if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) { if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) {
Err(self.invalid_suffix()) self.invalid_suffix_token()
} else { } else {
self.string = Some(mem::take(&mut self.scratch)); let kind = if had_exponent {
if had_exponent { NumberKind::DecimalExponent
Ok(self.finish_token(TokenKind::Number(NumberKind::DecimalExponent)))
} else { } else {
Ok(self.finish_token(TokenKind::Number(NumberKind::Decimal))) NumberKind::Decimal
} };
self.finish_number_token(kind)
} }
} }
// Caller should ensure this is unreachable
_ => unreachable!(), _ => unreachable!(),
} }
} }
/// Lexes the mantissa of a number, i.e. `.8` in `1.8` /// Lexes the mantissa of a number, i.e. `.8` in `1.8`
pub fn lex_mantissa(&mut self) -> Result<Token, Error> { pub fn lex_mantissa(&mut self) -> Token {
loop { loop {
// lex_number already checks if there exists a digit after the dot. // lex_number already checks if there exists a digit after the dot.
// So this will never fail the first iteration of the loop. // So this will never fail the first iteration of the loop.
let Some(x) = self.reader.peek() else { let Some(x) = self.reader.peek() else {
self.string = Some(mem::take(&mut self.scratch)); return self.finish_number_token(NumberKind::Mantissa);
return Ok(self.finish_token(TokenKind::Number(NumberKind::Mantissa)));
}; };
match x { match x {
b'0'..=b'9' => { b'0'..=b'9' | b'_' => {
// next digit. // next digit.
self.reader.next(); self.reader.next();
self.scratch.push(x as char); self.scratch.push(x as char);
@ -208,25 +215,21 @@ impl Lexer<'_> {
self.scratch.push('e'); self.scratch.push('e');
return self.lex_exponent(true); return self.lex_exponent(true);
} }
b'_' => { b'f' | b'd' => return self.lex_suffix(true, false),
self.reader.next();
}
b'f' | b'd' => return self.lex_suffix(false, false),
b'a'..=b'z' | b'A'..=b'Z' => { b'a'..=b'z' | b'A'..=b'Z' => {
// invalid token, random identifier characters immediately after number. // invalid token, random identifier characters immediately after number.
self.scratch.clear(); self.scratch.clear();
return Err(Error::InvalidSuffix); return self.invalid_suffix_token();
} }
_ => { _ => {
self.string = Some(mem::take(&mut self.scratch)); return self.finish_number_token(NumberKind::Mantissa);
return Ok(self.finish_token(TokenKind::Number(NumberKind::Mantissa)));
} }
} }
} }
} }
/// Lexes the exponent of a number, i.e. `e10` in `1.1e10`; /// Lexes the exponent of a number, i.e. `e10` in `1.1e10`;
fn lex_exponent(&mut self, had_mantissa: bool) -> Result<Token, Error> { fn lex_exponent(&mut self, had_mantissa: bool) -> Token {
loop { loop {
match self.reader.peek() { match self.reader.peek() {
Some(x @ b'-' | x @ b'+') => { Some(x @ b'-' | x @ b'+') => {
@ -238,30 +241,29 @@ impl Lexer<'_> {
break; break;
} }
_ => { _ => {
if self.flexible_ident && !had_mantissa {
return self.lex_ident();
}
// random other character, expected atleast one digit. // random other character, expected atleast one digit.
return Err(Error::DigitExpectedExponent); return self.invalid_token(LexError::Number(Error::DigitExpectedExponent));
} }
} }
} }
self.reader.next(); self.reader.next();
loop { loop {
match self.reader.peek() { match self.reader.peek() {
Some(x @ b'0'..=b'9') => { Some(x @ (b'0'..=b'9' | b'_')) => {
self.reader.next(); self.reader.next();
self.scratch.push(x as char); self.scratch.push(x as char);
} }
Some(b'_') => { Some(b'f' | b'd') => return self.lex_suffix(had_mantissa, true),
self.reader.next();
}
Some(b'f' | b'd') => return self.lex_suffix(true, false),
_ => { _ => {
let kind = if had_mantissa { let kind = if had_mantissa {
NumberKind::MantissaExponent NumberKind::MantissaExponent
} else { } else {
NumberKind::Exponent NumberKind::Exponent
}; };
self.string = Some(mem::take(&mut self.scratch)); return self.finish_number_token(kind);
return Ok(self.finish_token(TokenKind::Number(kind)));
} }
} }
} }

View file

@ -127,6 +127,7 @@ impl TokenValue for f32 {
TokenKind::Number( TokenKind::Number(
NumberKind::Integer NumberKind::Integer
| NumberKind::Float | NumberKind::Float
| NumberKind::FloatMantissa
| NumberKind::Mantissa | NumberKind::Mantissa
| NumberKind::MantissaExponent, | NumberKind::MantissaExponent,
) => { ) => {
@ -152,6 +153,7 @@ impl TokenValue for f64 {
TokenKind::Number( TokenKind::Number(
NumberKind::Integer NumberKind::Integer
| NumberKind::Float | NumberKind::Float
| NumberKind::FloatMantissa
| NumberKind::Mantissa | NumberKind::Mantissa
| NumberKind::MantissaExponent, | NumberKind::MantissaExponent,
) => { ) => {
@ -203,7 +205,10 @@ impl TokenValue for Number {
Ok(Number::Float(x)) Ok(Number::Float(x))
} }
TokenKind::Number( TokenKind::Number(
NumberKind::Mantissa | NumberKind::MantissaExponent | NumberKind::Float, NumberKind::Mantissa
| NumberKind::MantissaExponent
| NumberKind::Float
| NumberKind::FloatMantissa,
) => { ) => {
let source = parser.lexer.string.take().unwrap(); let source = parser.lexer.string.take().unwrap();
// As far as I can tell this will never fail for valid integers. // As far as I can tell this will never fail for valid integers.

View file

@ -149,6 +149,39 @@ macro_rules! enter_query_recursion {
}}; }};
} }
#[macro_export]
macro_rules! enter_flexible_ident{
($name:ident = $this:expr => ($enabled:expr){ $($t:tt)* }) => {{
struct Dropper<'a, 'b>(&'a mut $crate::syn::parser::Parser<'b>,bool);
impl Drop for Dropper<'_, '_> {
fn drop(&mut self) {
self.0.lexer.flexible_ident = self.1;
}
}
impl<'a> ::std::ops::Deref for Dropper<'_,'a>{
type Target = $crate::syn::parser::Parser<'a>;
fn deref(&self) -> &Self::Target{
self.0
}
}
impl<'a> ::std::ops::DerefMut for Dropper<'_,'a>{
fn deref_mut(&mut self) -> &mut Self::Target{
self.0
}
}
let enabled = $this.lexer.flexible_ident;
$this.lexer.flexible_ident = $enabled;
#[allow(unused_mut)]
let mut $name = Dropper($this,enabled);
{
$($t)*
}
}};
}
pub(super) use expected; pub(super) use expected;
pub(super) use unexpected; pub(super) use unexpected;

View file

@ -79,6 +79,7 @@ pub struct Parser<'a> {
token_buffer: TokenBuffer<4>, token_buffer: TokenBuffer<4>,
table_as_field: bool, table_as_field: bool,
legacy_strands: bool, legacy_strands: bool,
flexible_record_id: bool,
object_recursion: usize, object_recursion: usize,
query_recursion: usize, query_recursion: usize,
} }
@ -92,6 +93,7 @@ impl<'a> Parser<'a> {
token_buffer: TokenBuffer::new(), token_buffer: TokenBuffer::new(),
table_as_field: false, table_as_field: false,
legacy_strands: false, legacy_strands: false,
flexible_record_id: true,
object_recursion: 100, object_recursion: 100,
query_recursion: 20, query_recursion: 20,
} }
@ -117,6 +119,11 @@ impl<'a> Parser<'a> {
self.legacy_strands = value; self.legacy_strands = value;
} }
/// Set whether to allow record-id's which don't adheare to regular ident rules.
pub fn allow_fexible_record_id(&mut self, value: bool) {
self.flexible_record_id = value;
}
/// Reset the parser state. Doesnt change the position of the parser in buffer. /// Reset the parser state. Doesnt change the position of the parser in buffer.
pub fn reset(&mut self) { pub fn reset(&mut self) {
self.last_span = Span::empty(); self.last_span = Span::empty();
@ -132,6 +139,7 @@ impl<'a> Parser<'a> {
last_span: Span::empty(), last_span: Span::empty(),
token_buffer: TokenBuffer::new(), token_buffer: TokenBuffer::new(),
legacy_strands: self.legacy_strands, legacy_strands: self.legacy_strands,
flexible_record_id: self.flexible_record_id,
table_as_field: false, table_as_field: false,
object_recursion: self.object_recursion, object_recursion: self.object_recursion,
query_recursion: self.query_recursion, query_recursion: self.query_recursion,

View file

@ -2,6 +2,7 @@ use reblessive::Stk;
use super::{ParseResult, Parser}; use super::{ParseResult, Parser};
use crate::{ use crate::{
enter_flexible_ident,
sql::{id::Gen, Id, Ident, Range, Thing, Value}, sql::{id::Gen, Id, Ident, Range, Thing, Value},
syn::{ syn::{
parser::{ parser::{
@ -37,6 +38,14 @@ impl Parser<'_> {
Ok(thing) Ok(thing)
} }
fn peek_can_start_id(&mut self) -> bool {
self.peek_can_be_ident()
|| matches!(
self.peek_kind(),
TokenKind::Number(_) | t!("{") | t!("[") | TokenKind::Duration
)
}
pub async fn parse_thing_or_range( pub async fn parse_thing_or_range(
&mut self, &mut self,
stk: &mut Stk, stk: &mut Stk,
@ -44,19 +53,19 @@ impl Parser<'_> {
) -> ParseResult<Value> { ) -> ParseResult<Value> {
expected!(self, t!(":")); expected!(self, t!(":"));
self.peek(); enter_flexible_ident!(this = self =>(self.flexible_record_id){
self.no_whitespace()?;
if self.eat(t!("..")) { this.peek();
let end = if self.eat(t!("=")) { this.no_whitespace()?;
self.no_whitespace()?;
let id = stk.run(|stk| self.parse_id(stk)).await?; if this.eat(t!("..")) {
let end = if this.eat(t!("=")) {
this.no_whitespace()?;
let id = stk.run(|stk| this.parse_id(stk)).await?;
Bound::Included(id) Bound::Included(id)
} else if self.peek_can_be_ident() } else if this.peek_can_start_id() {
|| matches!(self.peek_kind(), TokenKind::Number(_) | t!("{") | t!("[")) this.no_whitespace()?;
{ let id = stk.run(|stk| this.parse_id(stk)).await?;
self.no_whitespace()?;
let id = stk.run(|stk| self.parse_id(stk)).await?;
Bound::Excluded(id) Bound::Excluded(id)
} else { } else {
Bound::Unbounded Bound::Unbounded
@ -68,13 +77,11 @@ impl Parser<'_> {
}))); })));
} }
let beg = if self.peek_can_be_ident() let beg = if this.peek_can_start_id(){
|| matches!(self.peek_kind(), TokenKind::Number(_) | t!("{") | t!("[")) let id = stk.run(|ctx| this.parse_id(ctx)).await?;
{
let id = stk.run(|ctx| self.parse_id(ctx)).await?;
if self.eat(t!(">")) { if this.eat(t!(">")) {
self.no_whitespace()?; this.no_whitespace()?;
Bound::Excluded(id) Bound::Excluded(id)
} else { } else {
Bound::Included(id) Bound::Included(id)
@ -83,16 +90,14 @@ impl Parser<'_> {
Bound::Unbounded Bound::Unbounded
}; };
if self.eat(t!("..")) { if this.eat(t!("..")) {
let end = if self.eat(t!("=")) { let end = if this.eat(t!("=")) {
self.no_whitespace()?; this.no_whitespace()?;
let id = stk.run(|ctx| self.parse_id(ctx)).await?; let id = stk.run(|ctx| this.parse_id(ctx)).await?;
Bound::Included(id) Bound::Included(id)
} else if self.peek_can_be_ident() } else if this.peek_can_start_id(){
|| matches!(self.peek_kind(), TokenKind::Number(_) | t!("{") | t!("[")) this.no_whitespace()?;
{ let id = stk.run(|ctx| this.parse_id(ctx)).await?;
self.no_whitespace()?;
let id = stk.run(|ctx| self.parse_id(ctx)).await?;
Bound::Excluded(id) Bound::Excluded(id)
} else { } else {
Bound::Unbounded Bound::Unbounded
@ -105,23 +110,23 @@ impl Parser<'_> {
} else { } else {
let id = match beg { let id = match beg {
Bound::Unbounded => { Bound::Unbounded => {
if self.peek_kind() == t!("$param") { if this.peek_kind() == t!("$param") {
return Err(ParseError::new( return Err(ParseError::new(
ParseErrorKind::UnexpectedExplain { ParseErrorKind::UnexpectedExplain {
found: t!("$param"), found: t!("$param"),
expected: "a record-id id", expected: "a record-id id",
explain: "you can create a record-id from a param with the function 'type::thing'", explain: "you can create a record-id from a param with the function 'type::thing'",
}, },
self.recent_span(), this.recent_span(),
)); ));
} }
// we haven't matched anythong so far so we still want any type of id. // we haven't matched anythong so far so we still want any type of id.
unexpected!(self, self.peek_kind(), "a record-id id") unexpected!(this, this.peek_kind(), "a record-id id")
} }
Bound::Excluded(_) => { Bound::Excluded(_) => {
// we have matched a bounded id but we don't see an range operator. // we have matched a bounded id but we don't see an range operator.
unexpected!(self, self.peek_kind(), "the range operator `..`") unexpected!(this, this.peek_kind(), "the range operator `..`")
} }
Bound::Included(id) => id, Bound::Included(id) => id,
}; };
@ -130,6 +135,7 @@ impl Parser<'_> {
id, id,
})) }))
} }
})
} }
pub async fn parse_range(&mut self, ctx: &mut Stk) -> ParseResult<Range> { pub async fn parse_range(&mut self, ctx: &mut Stk) -> ParseResult<Range> {
@ -137,19 +143,20 @@ impl Parser<'_> {
expected!(self, t!(":")); expected!(self, t!(":"));
self.peek(); enter_flexible_ident!(this = self =>(self.flexible_record_id){
self.no_whitespace()?; this.peek();
this.no_whitespace()?;
let beg = if self.peek_can_be_ident() { let beg = if this.peek_can_be_ident() {
self.peek(); this.peek();
self.no_whitespace()?; this.no_whitespace()?;
let id = ctx.run(|ctx| self.parse_id(ctx)).await?; let id = ctx.run(|ctx| this.parse_id(ctx)).await?;
self.peek(); this.peek();
self.no_whitespace()?; this.no_whitespace()?;
if self.eat(t!(">")) { if this.eat(t!(">")) {
Bound::Excluded(id) Bound::Excluded(id)
} else { } else {
Bound::Included(id) Bound::Included(id)
@ -158,21 +165,21 @@ impl Parser<'_> {
Bound::Unbounded Bound::Unbounded
}; };
self.peek(); this.peek();
self.no_whitespace()?; this.no_whitespace()?;
expected!(self, t!("..")); expected!(this, t!(".."));
self.peek(); this.peek();
self.no_whitespace()?; this.no_whitespace()?;
let inclusive = self.eat(t!("=")); let inclusive = this.eat(t!("="));
self.peek(); this.peek();
self.no_whitespace()?; this.no_whitespace()?;
let end = if self.peek_can_be_ident() { let end = if this.peek_can_be_ident() {
let id = ctx.run(|ctx| self.parse_id(ctx)).await?; let id = ctx.run(|ctx| this.parse_id(ctx)).await?;
if inclusive { if inclusive {
Bound::Included(id) Bound::Included(id)
} else { } else {
@ -187,11 +194,14 @@ impl Parser<'_> {
beg, beg,
end, end,
}) })
})
} }
pub async fn parse_thing(&mut self, ctx: &mut Stk) -> ParseResult<Thing> { pub async fn parse_thing(&mut self, ctx: &mut Stk) -> ParseResult<Thing> {
let ident = self.next_token_value::<Ident>()?.0; let ident = self.next_token_value::<Ident>()?.0;
self.parse_thing_from_ident(ctx, ident).await enter_flexible_ident!(this = self =>(self.flexible_record_id){
this.parse_thing_from_ident(ctx, ident).await
})
} }
pub async fn parse_thing_from_ident( pub async fn parse_thing_from_ident(
@ -201,10 +211,13 @@ impl Parser<'_> {
) -> ParseResult<Thing> { ) -> ParseResult<Thing> {
expected!(self, t!(":")); expected!(self, t!(":"));
self.peek(); let id = enter_flexible_ident!(this = self =>(self.flexible_record_id){
self.no_whitespace()?; this.peek();
this.no_whitespace()?;
ctx.run(|ctx| this.parse_id(ctx)).await
})?;
let id = ctx.run(|ctx| self.parse_id(ctx)).await?;
Ok(Thing { Ok(Thing {
tb: ident, tb: ident,
id, id,
@ -215,11 +228,15 @@ impl Parser<'_> {
let token = self.next(); let token = self.next();
match token.kind { match token.kind {
t!("{") => { t!("{") => {
let object = self.parse_object(stk, token.span).await?; let object = enter_flexible_ident!(this = self => (false){
this.parse_object(stk, token.span).await
})?;
Ok(Id::Object(object)) Ok(Id::Object(object))
} }
t!("[") => { t!("[") => {
let array = self.parse_array(stk, token.span).await?; let array = enter_flexible_ident!(this = self => (false){
this.parse_array(stk, token.span).await
})?;
Ok(Id::Array(array)) Ok(Id::Array(array))
} }
t!("+") => { t!("+") => {
@ -260,6 +277,30 @@ impl Parser<'_> {
Ok(Id::String(text)) Ok(Id::String(text))
} }
} }
TokenKind::Number(NumberKind::Decimal | NumberKind::DecimalExponent)
if self.flexible_record_id =>
{
let mut text = self.lexer.string.take().unwrap();
text.push('d');
text.push('e');
text.push('c');
Ok(Id::String(text))
}
TokenKind::Number(NumberKind::Float) if self.flexible_record_id => {
let mut text = self.lexer.string.take().unwrap();
text.push('f');
Ok(Id::String(text))
}
TokenKind::Duration if self.flexible_record_id => {
self.lexer.duration = None;
let slice = self.lexer.reader.span(token.span);
if slice.iter().any(|x| *x > 0b0111_1111) {
unexpected!(self, token.kind, "a identifier");
}
// Should be valid utf-8 as it was already parsed by the lexer
let text = String::from_utf8(slice.to_vec()).unwrap();
Ok(Id::String(text))
}
t!("ULID") => { t!("ULID") => {
// TODO: error message about how to use `ulid` as an identifier. // TODO: error message about how to use `ulid` as an identifier.
expected!(self, t!("(")); expected!(self, t!("("));
@ -480,4 +521,49 @@ mod tests {
} }
); );
} }
#[test]
fn weird_things() {
use crate::sql;
fn assert_ident_parses_correctly(ident: &str) {
let thing = format!("t:{}", ident);
let mut parser = Parser::new(thing.as_bytes());
parser.allow_fexible_record_id(true);
let mut stack = Stack::new();
let r = stack
.enter(|ctx| async move { parser.parse_thing(ctx).await })
.finish()
.expect(&format!("failed on {}", ident))
.id;
assert_eq!(r, Id::String(ident.to_string()),);
let mut parser = Parser::new(thing.as_bytes());
let r = stack
.enter(|ctx| async move { parser.parse_query(ctx).await })
.finish()
.expect(&format!("failed on {}", ident));
assert_eq!(
r,
sql::Query(sql::Statements(vec![sql::Statement::Value(sql::Value::Thing(
sql::Thing {
tb: "t".to_string(),
id: Id::String(ident.to_string())
}
))]))
)
}
assert_ident_parses_correctly("123abc");
assert_ident_parses_correctly("123d");
assert_ident_parses_correctly("123de");
assert_ident_parses_correctly("123dec");
assert_ident_parses_correctly("1e23dec");
assert_ident_parses_correctly("1e23f");
assert_ident_parses_correctly("123f");
assert_ident_parses_correctly("1ns");
assert_ident_parses_correctly("1ns1");
assert_ident_parses_correctly("1ns1h");
}
} }

View file

@ -232,6 +232,8 @@ pub enum NumberKind {
DecimalExponent, DecimalExponent,
// A number with a float postfix. // A number with a float postfix.
Float, Float,
// A number with a float postfix that had a mantissa.
FloatMantissa,
// A number with a `.3` part. // A number with a `.3` part.
Mantissa, Mantissa,
// A number with a `.3e10` part. // A number with a `.3e10` part.