Restructure the parser around the concept of token gluing. (#4081)
This commit is contained in:
parent
2184e80f45
commit
3539eac25d
45 changed files with 3137 additions and 2785 deletions
|
@ -95,7 +95,8 @@ impl Debug for Regex {
|
|||
|
||||
impl Display for Regex {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
write!(f, "/{}/", &self.0)
|
||||
let t = self.0.to_string().replace('/', "\\/");
|
||||
write!(f, "/{}/", &t)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -88,6 +88,7 @@ impl Uuid {
|
|||
|
||||
impl Display for Uuid {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
write!(f, "u")?;
|
||||
Display::fmt("e_str(&self.0.to_string()), f)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,6 +49,21 @@ impl Location {
|
|||
|
||||
pub fn of_offset(source: &str, offset: usize) -> Self {
|
||||
assert!(offset <= source.len(), "tried to find location of substring in unrelated string");
|
||||
|
||||
if offset == source.len() {
|
||||
// Eof character
|
||||
|
||||
let (last_line, column) = LineIterator::new(source)
|
||||
.enumerate()
|
||||
.last()
|
||||
.map(|(idx, (l, _))| (idx, l.len()))
|
||||
.unwrap_or((0, 0));
|
||||
return Self {
|
||||
line: last_line + 1,
|
||||
column: column + 1,
|
||||
};
|
||||
}
|
||||
|
||||
// Bytes of input prior to line being iterated.
|
||||
let mut bytes_prior = 0;
|
||||
for (line_idx, (line, seperator_len)) in LineIterator::new(source).enumerate() {
|
||||
|
@ -109,6 +124,22 @@ impl Location {
|
|||
let offset = span.offset as usize;
|
||||
let end = offset + span.len as usize;
|
||||
|
||||
if span.len == 0 && source.len() == span.offset as usize {
|
||||
// EOF span
|
||||
let (last_line, column) = LineIterator::new(source)
|
||||
.enumerate()
|
||||
.last()
|
||||
.map(|(idx, (l, _))| (idx, l.len()))
|
||||
.unwrap_or((0, 0));
|
||||
return Self {
|
||||
line: last_line + 1,
|
||||
column,
|
||||
}..Self {
|
||||
line: last_line + 1,
|
||||
column: column + 1,
|
||||
};
|
||||
}
|
||||
|
||||
// Bytes of input prior to line being iteratated.
|
||||
let mut bytes_prior = 0;
|
||||
let mut iterator = LineIterator::new(source).enumerate().peekable();
|
||||
|
|
|
@ -3,7 +3,7 @@ use crate::syn::{
|
|||
unicode::{byte, chars},
|
||||
Error, Lexer,
|
||||
},
|
||||
token::{t, Token, TokenKind},
|
||||
token::{t, DatetimeChars, Token, TokenKind},
|
||||
};
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
|
@ -41,8 +41,6 @@ impl<'a> Lexer<'a> {
|
|||
_ => {}
|
||||
}
|
||||
}
|
||||
self.set_whitespace_span(self.current_span());
|
||||
self.skip_offset();
|
||||
}
|
||||
|
||||
/// Eats a multi line comment and returns an error if `*/` would be missing.
|
||||
|
@ -57,8 +55,6 @@ impl<'a> Lexer<'a> {
|
|||
};
|
||||
if b'/' == byte {
|
||||
self.reader.next();
|
||||
self.set_whitespace_span(self.current_span());
|
||||
self.skip_offset();
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
@ -100,8 +96,6 @@ impl<'a> Lexer<'a> {
|
|||
_ => break,
|
||||
}
|
||||
}
|
||||
self.set_whitespace_span(self.current_span());
|
||||
self.skip_offset();
|
||||
}
|
||||
|
||||
// re-lexes a `/` token to a regex token.
|
||||
|
@ -109,7 +103,6 @@ impl<'a> Lexer<'a> {
|
|||
debug_assert_eq!(token.kind, t!("/"));
|
||||
debug_assert_eq!(token.span.offset + 1, self.last_offset);
|
||||
debug_assert_eq!(token.span.len, 1);
|
||||
debug_assert_eq!(self.scratch, "");
|
||||
|
||||
self.last_offset = token.span.offset;
|
||||
loop {
|
||||
|
@ -117,21 +110,13 @@ impl<'a> Lexer<'a> {
|
|||
Some(b'\\') => {
|
||||
if let Some(b'/') = self.reader.peek() {
|
||||
self.reader.next();
|
||||
self.scratch.push('/')
|
||||
} else {
|
||||
self.scratch.push('\\')
|
||||
}
|
||||
}
|
||||
Some(b'/') => break,
|
||||
Some(x) => {
|
||||
if x.is_ascii() {
|
||||
self.scratch.push(x as char);
|
||||
} else {
|
||||
match self.reader.complete_char(x) {
|
||||
Ok(x) => {
|
||||
self.scratch.push(x);
|
||||
}
|
||||
Err(e) => return self.invalid_token(e.into()),
|
||||
if !x.is_ascii() {
|
||||
if let Err(e) = self.reader.complete_char(x) {
|
||||
return self.invalid_token(e.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -139,14 +124,7 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
match self.scratch.parse() {
|
||||
Ok(x) => {
|
||||
self.scratch.clear();
|
||||
self.regex = Some(x);
|
||||
self.finish_token(TokenKind::Regex)
|
||||
}
|
||||
Err(e) => self.invalid_token(Error::Regex(e)),
|
||||
}
|
||||
self.finish_token(TokenKind::Regex)
|
||||
}
|
||||
|
||||
/// Lex the next token, starting from the given byte.
|
||||
|
@ -163,7 +141,7 @@ impl<'a> Lexer<'a> {
|
|||
b'@' => t!("@"),
|
||||
byte::CR | byte::FF | byte::LF | byte::SP | byte::VT | byte::TAB => {
|
||||
self.eat_whitespace();
|
||||
return self.next_token_inner();
|
||||
TokenKind::WhiteSpace
|
||||
}
|
||||
b'|' => match self.reader.peek() {
|
||||
Some(b'|') => {
|
||||
|
@ -262,7 +240,7 @@ impl<'a> Lexer<'a> {
|
|||
Some(b'-') => {
|
||||
self.reader.next();
|
||||
self.eat_single_line_comment();
|
||||
return self.next_token_inner();
|
||||
TokenKind::WhiteSpace
|
||||
}
|
||||
Some(b'=') => {
|
||||
self.reader.next();
|
||||
|
@ -294,12 +272,12 @@ impl<'a> Lexer<'a> {
|
|||
if let Err(e) = self.eat_multi_line_comment() {
|
||||
return self.invalid_token(e);
|
||||
}
|
||||
return self.next_token_inner();
|
||||
TokenKind::WhiteSpace
|
||||
}
|
||||
Some(b'/') => {
|
||||
self.reader.next();
|
||||
self.eat_single_line_comment();
|
||||
return self.next_token_inner();
|
||||
TokenKind::WhiteSpace
|
||||
}
|
||||
_ => t!("/"),
|
||||
},
|
||||
|
@ -340,41 +318,140 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
b'#' => {
|
||||
self.eat_single_line_comment();
|
||||
return self.next_token_inner();
|
||||
TokenKind::WhiteSpace
|
||||
}
|
||||
b'`' => return self.lex_surrounded_ident(true),
|
||||
b'"' => return self.lex_strand(true),
|
||||
b'\'' => return self.lex_strand(false),
|
||||
b'd' => {
|
||||
match self.reader.peek() {
|
||||
Some(b'"') => {
|
||||
self.reader.next();
|
||||
return self.lex_datetime(true);
|
||||
}
|
||||
Some(b'\'') => {
|
||||
self.reader.next();
|
||||
return self.lex_datetime(false);
|
||||
}
|
||||
_ => {}
|
||||
b'"' => t!("\""),
|
||||
b'\'' => t!("'"),
|
||||
b'd' => match self.reader.peek() {
|
||||
Some(b'"') => {
|
||||
self.reader.next();
|
||||
t!("d\"")
|
||||
}
|
||||
return self.lex_ident_from_next_byte(b'd');
|
||||
}
|
||||
b'u' => {
|
||||
match self.reader.peek() {
|
||||
Some(b'"') => {
|
||||
self.reader.next();
|
||||
return self.lex_uuid(true);
|
||||
}
|
||||
Some(b'\'') => {
|
||||
self.reader.next();
|
||||
return self.lex_uuid(false);
|
||||
}
|
||||
_ => {}
|
||||
Some(b'\'') => {
|
||||
self.reader.next();
|
||||
t!("d'")
|
||||
}
|
||||
Some(b'e') => {
|
||||
self.reader.next();
|
||||
|
||||
let Some(b'c') = self.reader.peek() else {
|
||||
self.scratch.push('d');
|
||||
return self.lex_ident_from_next_byte(b'e');
|
||||
};
|
||||
|
||||
self.reader.next();
|
||||
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphanumeric()).unwrap_or(false) {
|
||||
self.scratch.push('d');
|
||||
self.scratch.push('e');
|
||||
return self.lex_ident_from_next_byte(b'c');
|
||||
}
|
||||
|
||||
t!("dec")
|
||||
}
|
||||
Some(x) if !x.is_ascii_alphabetic() => {
|
||||
t!("d")
|
||||
}
|
||||
None => {
|
||||
t!("d")
|
||||
}
|
||||
_ => {
|
||||
return self.lex_ident_from_next_byte(b'd');
|
||||
}
|
||||
},
|
||||
b'f' => match self.reader.peek() {
|
||||
Some(x) if !x.is_ascii_alphanumeric() => {
|
||||
t!("f")
|
||||
}
|
||||
None => t!("f"),
|
||||
_ => {
|
||||
return self.lex_ident_from_next_byte(b'f');
|
||||
}
|
||||
},
|
||||
b'n' => match self.reader.peek() {
|
||||
Some(b's') => {
|
||||
self.reader.next();
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
self.scratch.push('n');
|
||||
return self.lex_ident_from_next_byte(b's');
|
||||
}
|
||||
t!("ns")
|
||||
}
|
||||
_ => {
|
||||
return self.lex_ident_from_next_byte(b'n');
|
||||
}
|
||||
},
|
||||
b'm' => match self.reader.peek() {
|
||||
Some(b's') => {
|
||||
self.reader.next();
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
self.scratch.push('m');
|
||||
return self.lex_ident_from_next_byte(b's');
|
||||
}
|
||||
t!("ms")
|
||||
}
|
||||
Some(x) if !x.is_ascii_alphabetic() => {
|
||||
t!("m")
|
||||
}
|
||||
None => {
|
||||
t!("m")
|
||||
}
|
||||
_ => {
|
||||
return self.lex_ident_from_next_byte(b'm');
|
||||
}
|
||||
},
|
||||
b's' => {
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
return self.lex_ident_from_next_byte(b's');
|
||||
} else {
|
||||
t!("s")
|
||||
}
|
||||
return self.lex_ident_from_next_byte(b'u');
|
||||
}
|
||||
b'h' => {
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
return self.lex_ident_from_next_byte(b'h');
|
||||
} else {
|
||||
t!("h")
|
||||
}
|
||||
}
|
||||
b'w' => {
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
return self.lex_ident_from_next_byte(b'w');
|
||||
} else {
|
||||
t!("w")
|
||||
}
|
||||
}
|
||||
b'y' => {
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
return self.lex_ident_from_next_byte(b'y');
|
||||
} else {
|
||||
t!("y")
|
||||
}
|
||||
}
|
||||
b'u' => match self.reader.peek() {
|
||||
Some(b'"') => {
|
||||
self.reader.next();
|
||||
t!("u\"")
|
||||
}
|
||||
Some(b'\'') => {
|
||||
self.reader.next();
|
||||
t!("u'")
|
||||
}
|
||||
Some(b's') => {
|
||||
self.reader.next();
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
self.scratch.push('u');
|
||||
return self.lex_ident_from_next_byte(b's');
|
||||
}
|
||||
t!("us")
|
||||
}
|
||||
_ => {
|
||||
return self.lex_ident_from_next_byte(b'u');
|
||||
}
|
||||
},
|
||||
b'r' => match self.reader.peek() {
|
||||
Some(b'\"') => {
|
||||
Some(b'"') => {
|
||||
self.reader.next();
|
||||
t!("r\"")
|
||||
}
|
||||
|
@ -382,12 +459,33 @@ impl<'a> Lexer<'a> {
|
|||
self.reader.next();
|
||||
t!("r'")
|
||||
}
|
||||
_ => return self.lex_ident_from_next_byte(byte),
|
||||
_ => {
|
||||
return self.lex_ident_from_next_byte(b'r');
|
||||
}
|
||||
},
|
||||
b'Z' => match self.reader.peek() {
|
||||
Some(x) if x.is_ascii_alphabetic() => {
|
||||
return self.lex_ident_from_next_byte(b'Z');
|
||||
}
|
||||
_ => TokenKind::DatetimeChars(DatetimeChars::Z),
|
||||
},
|
||||
b'T' => match self.reader.peek() {
|
||||
Some(x) if x.is_ascii_alphabetic() => {
|
||||
return self.lex_ident_from_next_byte(b'T');
|
||||
}
|
||||
_ => TokenKind::DatetimeChars(DatetimeChars::T),
|
||||
},
|
||||
b'e' => {
|
||||
return self.lex_exponent(b'e');
|
||||
}
|
||||
b'E' => {
|
||||
return self.lex_exponent(b'E');
|
||||
}
|
||||
b'0'..=b'9' => return self.lex_digits(),
|
||||
b'a'..=b'z' | b'A'..=b'Z' | b'_' => {
|
||||
return self.lex_ident_from_next_byte(byte);
|
||||
}
|
||||
b'0'..=b'9' => return self.lex_number(byte),
|
||||
//b'0'..=b'9' => return self.lex_number(byte),
|
||||
x => return self.invalid_token(Error::UnexpectedCharacter(x as char)),
|
||||
};
|
||||
|
||||
|
|
|
@ -30,6 +30,18 @@ impl<'a> Lexer<'a> {
|
|||
'⊄' => t!("⊄"),
|
||||
'×' => t!("×"),
|
||||
'÷' => t!("÷"),
|
||||
'µ' => {
|
||||
let Some(b's') = self.reader.peek() else {
|
||||
return self.invalid_token(Error::UnexpectedCharacter('µ'));
|
||||
};
|
||||
self.reader.next();
|
||||
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
return self.invalid_token(Error::UnexpectedCharacter('µ'));
|
||||
}
|
||||
|
||||
t!("µs")
|
||||
}
|
||||
x => return self.invalid_token(Error::UnexpectedCharacter(x)),
|
||||
};
|
||||
self.finish_token(kind)
|
||||
|
|
|
@ -1,269 +0,0 @@
|
|||
use std::ops::RangeInclusive;
|
||||
|
||||
use chrono::{FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone, Utc};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::{
|
||||
sql::Datetime,
|
||||
syn::token::{Token, TokenKind},
|
||||
};
|
||||
|
||||
use super::{Error as LexError, Lexer};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub enum PartError {
|
||||
#[error("value outside of allowed range")]
|
||||
OutsideRange,
|
||||
#[error("missing digit(s)")]
|
||||
MissingDigits,
|
||||
#[error("too many digits")]
|
||||
TooManyDigits,
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub enum Error {
|
||||
#[error("invalid year, {0}")]
|
||||
Year(PartError),
|
||||
#[error("invalid month, {0}")]
|
||||
Month(PartError),
|
||||
#[error("invalid day, {0}")]
|
||||
Day(PartError),
|
||||
#[error("invalid hour, {0}")]
|
||||
Hour(PartError),
|
||||
#[error("invalid time minute, {0}")]
|
||||
Minute(PartError),
|
||||
#[error("invalid second, {0}")]
|
||||
Second(PartError),
|
||||
#[error("invalid nano_seconds, {0}")]
|
||||
NanoSeconds(PartError),
|
||||
#[error("invalid time-zone hour, {0}")]
|
||||
TimeZoneHour(PartError),
|
||||
#[error("invalid time-zone minute, {0}")]
|
||||
TimeZoneMinute(PartError),
|
||||
#[error("missing seperator `{}`",*(.0) as char)]
|
||||
MissingSeparator(u8),
|
||||
#[error("expected date-time strand to end")]
|
||||
ExpectedEnd,
|
||||
#[error("missing time-zone")]
|
||||
MissingTimeZone,
|
||||
#[error("date does not exist")]
|
||||
NonExistantDate,
|
||||
#[error("time does not exist")]
|
||||
NonExistantTime,
|
||||
#[error("time-zone offset too big")]
|
||||
TimeZoneOutOfRange,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Lex a date-time strand.
|
||||
pub fn lex_datetime(&mut self, double: bool) -> Token {
|
||||
match self.lex_datetime_err(double) {
|
||||
Ok(x) => {
|
||||
self.datetime = Some(x);
|
||||
self.finish_token(TokenKind::DateTime)
|
||||
}
|
||||
Err(e) => self.invalid_token(LexError::DateTime(e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex datetime without enclosing `"` or `'` but return a result or parser error.
|
||||
pub fn lex_datetime_raw_err(&mut self) -> Result<Datetime, Error> {
|
||||
let negative = match self.reader.peek() {
|
||||
Some(b'+') => {
|
||||
self.reader.next();
|
||||
false
|
||||
}
|
||||
Some(b'-') => {
|
||||
self.reader.next();
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
};
|
||||
|
||||
let mut year = self.lex_datetime_part(4, 0..=9999).map_err(Error::Year)? as i16;
|
||||
if negative {
|
||||
year = -year;
|
||||
}
|
||||
if !self.eat(b'-') {
|
||||
return Err(Error::MissingSeparator(b'-'));
|
||||
}
|
||||
let month = self.lex_datetime_part(2, 1..=12).map_err(Error::Month)?;
|
||||
if !self.eat(b'-') {
|
||||
return Err(Error::MissingSeparator(b'-'));
|
||||
}
|
||||
let day = self.lex_datetime_part(2, 1..=31).map_err(Error::Day)?;
|
||||
|
||||
if !self.eat(b'T') {
|
||||
let Some(date) = NaiveDate::from_ymd_opt(year as i32, month as u32, day as u32) else {
|
||||
return Err(Error::NonExistantDate);
|
||||
};
|
||||
let time = NaiveTime::default();
|
||||
let date_time = NaiveDateTime::new(date, time);
|
||||
|
||||
let datetime = Utc
|
||||
.fix()
|
||||
.from_local_datetime(&date_time)
|
||||
.earliest()
|
||||
// this should never panic with a fixed offset.
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
return Ok(Datetime(datetime));
|
||||
}
|
||||
|
||||
let hour = self.lex_datetime_part(2, 0..=24).map_err(Error::Hour)?;
|
||||
if !self.eat(b':') {
|
||||
return Err(Error::MissingSeparator(b':'));
|
||||
}
|
||||
|
||||
let minutes = self.lex_datetime_part(2, 0..=59).map_err(Error::Minute)?;
|
||||
|
||||
if !self.eat(b':') {
|
||||
return Err(Error::MissingSeparator(b':'));
|
||||
}
|
||||
|
||||
let seconds = self.lex_datetime_part(2, 0..=59).map_err(Error::Second)?;
|
||||
|
||||
// nano seconds
|
||||
let nano = if let Some(b'.') = self.reader.peek() {
|
||||
self.reader.next();
|
||||
// check if there is atleast one digit.
|
||||
if !matches!(self.reader.peek(), Some(b'0'..=b'9')) {
|
||||
return Err(Error::NanoSeconds(PartError::MissingDigits));
|
||||
}
|
||||
let mut number = 0u32;
|
||||
for i in 0..9 {
|
||||
let Some(c) = self.reader.peek() else {
|
||||
// always invalid token, just let the next section handle the error.
|
||||
break;
|
||||
};
|
||||
if !c.is_ascii_digit() {
|
||||
// If digits are missing they are counted as 0's
|
||||
for _ in i..9 {
|
||||
number *= 10;
|
||||
}
|
||||
break;
|
||||
}
|
||||
self.reader.next();
|
||||
number *= 10;
|
||||
number += (c - b'0') as u32;
|
||||
}
|
||||
// ensure nano_seconds are at most 9 digits.
|
||||
if matches!(self.reader.peek(), Some(b'0'..=b'9')) {
|
||||
return Err(Error::NanoSeconds(PartError::TooManyDigits));
|
||||
}
|
||||
number
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
// time zone
|
||||
let time_zone = match self.reader.peek() {
|
||||
Some(b'Z') => {
|
||||
self.reader.next();
|
||||
None
|
||||
}
|
||||
Some(x @ (b'-' | b'+')) => {
|
||||
self.reader.next();
|
||||
let negative = x == b'-';
|
||||
let hour = self.lex_datetime_part(2, 0..=24).map_err(Error::TimeZoneHour)? as i32;
|
||||
let Some(b':') = self.reader.next() else {
|
||||
return Err(Error::MissingSeparator(b':'));
|
||||
};
|
||||
let minute =
|
||||
self.lex_datetime_part(2, 0..=59).map_err(Error::TimeZoneMinute)? as i32;
|
||||
let time = hour * 3600 + minute * 60;
|
||||
if negative {
|
||||
Some(-time)
|
||||
} else {
|
||||
Some(time)
|
||||
}
|
||||
}
|
||||
_ => return Err(Error::MissingTimeZone),
|
||||
};
|
||||
|
||||
// calculate the given datetime from individual parts.
|
||||
let Some(date) = NaiveDate::from_ymd_opt(year as i32, month as u32, day as u32) else {
|
||||
return Err(Error::NonExistantDate);
|
||||
};
|
||||
let Some(time) =
|
||||
NaiveTime::from_hms_nano_opt(hour as u32, minutes as u32, seconds as u32, nano)
|
||||
else {
|
||||
return Err(Error::NonExistantTime);
|
||||
};
|
||||
|
||||
let date_time = NaiveDateTime::new(date, time);
|
||||
|
||||
let zone = match time_zone {
|
||||
None => Utc.fix(),
|
||||
Some(offset) => if offset < 0 {
|
||||
FixedOffset::west_opt(-offset)
|
||||
} else {
|
||||
FixedOffset::east_opt(offset)
|
||||
}
|
||||
.ok_or(Error::TimeZoneOutOfRange)?,
|
||||
};
|
||||
|
||||
let datetime = zone
|
||||
.from_local_datetime(&date_time)
|
||||
.earliest()
|
||||
// this should never panic with a fixed offset.
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
Ok(Datetime(datetime))
|
||||
}
|
||||
|
||||
/// Lex full datetime but return an result instead of a token.
|
||||
pub fn lex_datetime_err(&mut self, double: bool) -> Result<Datetime, Error> {
|
||||
let datetime = self.lex_datetime_raw_err()?;
|
||||
|
||||
let end_char = if double {
|
||||
b'"'
|
||||
} else {
|
||||
b'\''
|
||||
};
|
||||
|
||||
if !self.eat(end_char) {
|
||||
return Err(Error::ExpectedEnd);
|
||||
}
|
||||
|
||||
Ok(datetime)
|
||||
}
|
||||
|
||||
/// Lexes a digit part of date time.
|
||||
///
|
||||
/// This function eats an amount of digits and then checks if the value the digits represent
|
||||
/// is within the given range.
|
||||
pub fn lex_datetime_part(
|
||||
&mut self,
|
||||
mut amount: u8,
|
||||
range: RangeInclusive<u16>,
|
||||
) -> Result<u16, PartError> {
|
||||
let mut value = 0u16;
|
||||
|
||||
while amount != 0 {
|
||||
value *= 10;
|
||||
let Some(char) = self.reader.peek() else {
|
||||
return Err(PartError::MissingDigits);
|
||||
};
|
||||
if !char.is_ascii_digit() {
|
||||
return Err(PartError::MissingDigits);
|
||||
}
|
||||
self.reader.next();
|
||||
value += (char - b'0') as u16;
|
||||
amount -= 1;
|
||||
}
|
||||
|
||||
if matches!(self.reader.peek(), Some(b'0'..=b'8')) {
|
||||
return Err(PartError::TooManyDigits);
|
||||
}
|
||||
|
||||
if !range.contains(&value) {
|
||||
return Err(PartError::OutsideRange);
|
||||
}
|
||||
Ok(value)
|
||||
}
|
||||
}
|
|
@ -1,179 +0,0 @@
|
|||
use std::time::Duration as StdDuration;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::{
|
||||
sql::duration::{
|
||||
Duration, SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE, SECONDS_PER_WEEK,
|
||||
SECONDS_PER_YEAR,
|
||||
},
|
||||
syn::token::{Token, TokenKind},
|
||||
};
|
||||
|
||||
use super::{Error as LexError, Lexer};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub enum Error {
|
||||
#[error("invalid duration suffix")]
|
||||
InvalidSuffix,
|
||||
#[error("duration value overflowed")]
|
||||
Overflow,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Lex a duration.
|
||||
///
|
||||
/// Expect the lexer to have already eaten the digits starting the duration.
|
||||
pub fn lex_duration(&mut self) -> Token {
|
||||
let backup = self.reader.offset();
|
||||
match self.lex_duration_err() {
|
||||
Ok(x) => {
|
||||
self.scratch.clear();
|
||||
self.duration = Some(x);
|
||||
self.finish_token(TokenKind::Duration)
|
||||
}
|
||||
Err(e) => {
|
||||
if self.flexible_ident {
|
||||
self.reader.backup(backup);
|
||||
return self.lex_ident();
|
||||
}
|
||||
self.scratch.clear();
|
||||
self.invalid_token(LexError::Duration(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn invalid_suffix_duration(&mut self) -> Error {
|
||||
// eat the whole suffix.
|
||||
while let Some(x) = self.reader.peek() {
|
||||
if !x.is_ascii_alphanumeric() {
|
||||
break;
|
||||
}
|
||||
self.reader.next();
|
||||
}
|
||||
Error::InvalidSuffix
|
||||
}
|
||||
|
||||
/// Lex a duration,
|
||||
///
|
||||
/// Should only be called from lexing a number.
|
||||
///
|
||||
/// Expects any number but at least one numeric characters be pushed into scratch.
|
||||
pub fn lex_duration_err(&mut self) -> Result<Duration, Error> {
|
||||
let mut duration = StdDuration::ZERO;
|
||||
|
||||
let mut current_value = 0u64;
|
||||
// use the existing eat span to generate the current value.
|
||||
// span already contains
|
||||
let mut span = self.current_span();
|
||||
span.len -= 1;
|
||||
for b in self.scratch.as_bytes() {
|
||||
debug_assert!(b.is_ascii_digit(), "`{}` is not a digit", b);
|
||||
current_value = current_value.checked_mul(10).ok_or(Error::Overflow)?;
|
||||
current_value = current_value.checked_add((b - b'0') as u64).ok_or(Error::Overflow)?;
|
||||
}
|
||||
|
||||
loop {
|
||||
let Some(next) = self.reader.peek() else {
|
||||
return Err(Error::InvalidSuffix);
|
||||
};
|
||||
|
||||
// Match the suffix.
|
||||
let new_duration = match next {
|
||||
x @ (b'n' | b'u') => {
|
||||
// Nano or micro suffix
|
||||
self.reader.next();
|
||||
if !self.eat(b's') {
|
||||
return Err(Error::InvalidSuffix);
|
||||
};
|
||||
|
||||
if x == b'n' {
|
||||
StdDuration::from_nanos(current_value)
|
||||
} else {
|
||||
StdDuration::from_micros(current_value)
|
||||
}
|
||||
}
|
||||
// Starting byte of 'µ'
|
||||
0xc2 => {
|
||||
self.reader.next();
|
||||
// Second byte of 'µ'.
|
||||
// Always consume as the next byte will always be part of a two byte character.
|
||||
if !self.eat(0xb5) {
|
||||
return Err(self.invalid_suffix_duration());
|
||||
}
|
||||
|
||||
if !self.eat(b's') {
|
||||
return Err(self.invalid_suffix_duration());
|
||||
}
|
||||
|
||||
StdDuration::from_micros(current_value)
|
||||
}
|
||||
b'm' => {
|
||||
self.reader.next();
|
||||
// Either milli or minute
|
||||
let is_milli = self.eat(b's');
|
||||
|
||||
if is_milli {
|
||||
StdDuration::from_millis(current_value)
|
||||
} else {
|
||||
let Some(number) = current_value.checked_mul(SECONDS_PER_MINUTE) else {
|
||||
return Err(Error::Overflow);
|
||||
};
|
||||
StdDuration::from_secs(number)
|
||||
}
|
||||
}
|
||||
x @ (b's' | b'h' | b'd' | b'w' | b'y') => {
|
||||
self.reader.next();
|
||||
// second, hour, day, week or year.
|
||||
|
||||
let new_duration = match x {
|
||||
b's' => Some(StdDuration::from_secs(current_value)),
|
||||
b'h' => {
|
||||
current_value.checked_mul(SECONDS_PER_HOUR).map(StdDuration::from_secs)
|
||||
}
|
||||
b'd' => {
|
||||
current_value.checked_mul(SECONDS_PER_DAY).map(StdDuration::from_secs)
|
||||
}
|
||||
b'w' => {
|
||||
current_value.checked_mul(SECONDS_PER_WEEK).map(StdDuration::from_secs)
|
||||
}
|
||||
b'y' => {
|
||||
current_value.checked_mul(SECONDS_PER_YEAR).map(StdDuration::from_secs)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let Some(new_duration) = new_duration else {
|
||||
return Err(Error::Overflow);
|
||||
};
|
||||
new_duration
|
||||
}
|
||||
_ => {
|
||||
return Err(self.invalid_suffix_duration());
|
||||
}
|
||||
};
|
||||
|
||||
duration = duration.checked_add(new_duration).ok_or(Error::Overflow)?;
|
||||
|
||||
let next = self.reader.peek();
|
||||
match next {
|
||||
// there was some remaining alphabetic characters after the valid suffix, so the
|
||||
// suffix is invalid.
|
||||
Some(b'a'..=b'z' | b'A'..=b'Z' | b'_') => {
|
||||
return Err(self.invalid_suffix_duration())
|
||||
}
|
||||
Some(b'0'..=b'9') => {} // Duration continues.
|
||||
_ => return Ok(Duration(duration)),
|
||||
}
|
||||
|
||||
current_value = 0;
|
||||
// Eat all the next numbers
|
||||
while let Some(b @ b'0'..=b'9') = self.reader.peek() {
|
||||
self.reader.next();
|
||||
current_value = current_value.checked_mul(10).ok_or(Error::Overflow)?;
|
||||
current_value =
|
||||
current_value.checked_add((b - b'0') as u64).ok_or(Error::Overflow)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -2,10 +2,16 @@ use std::mem;
|
|||
|
||||
use unicase::UniCase;
|
||||
|
||||
use crate::syn::lexer::{keywords::KEYWORDS, Error, Lexer};
|
||||
use crate::syn::token::{NumberKind, Token, TokenKind};
|
||||
use crate::syn::{
|
||||
lexer::{keywords::KEYWORDS, Error, Lexer},
|
||||
token::{Token, TokenKind},
|
||||
};
|
||||
|
||||
use super::unicode::{chars, U8Ext};
|
||||
use super::unicode::chars;
|
||||
|
||||
fn is_identifier_continue(x: u8) -> bool {
|
||||
matches!(x, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_')
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Lex a parameter in the form of `$[a-zA-Z0-9_]*`
|
||||
|
@ -35,7 +41,6 @@ impl<'a> Lexer<'a> {
|
|||
/// by `[a-zA-Z0-9_]*`.
|
||||
pub fn lex_ident_from_next_byte(&mut self, start: u8) -> Token {
|
||||
debug_assert!(matches!(start, b'a'..=b'z' | b'A'..=b'Z' | b'_'));
|
||||
debug_assert_eq!(self.scratch, "");
|
||||
self.scratch.push(start as char);
|
||||
self.lex_ident()
|
||||
}
|
||||
|
@ -46,7 +51,7 @@ impl<'a> Lexer<'a> {
|
|||
pub fn lex_ident(&mut self) -> Token {
|
||||
loop {
|
||||
if let Some(x) = self.reader.peek() {
|
||||
if x.is_identifier_continue() {
|
||||
if is_identifier_continue(x) {
|
||||
self.scratch.push(x as char);
|
||||
self.reader.next();
|
||||
continue;
|
||||
|
@ -64,7 +69,7 @@ impl<'a> Lexer<'a> {
|
|||
|
||||
if self.scratch == "NaN" {
|
||||
self.scratch.clear();
|
||||
return self.finish_token(TokenKind::Number(NumberKind::NaN));
|
||||
return self.finish_token(TokenKind::NaN);
|
||||
} else {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return self.finish_token(TokenKind::Identifier);
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
use crate::syn::token::VectorTypeKind;
|
||||
use crate::{
|
||||
sql::change_feed_include::ChangeFeedInclude,
|
||||
sql::{language::Language, Algorithm},
|
||||
syn::token::{DistanceKind, Keyword, TokenKind},
|
||||
syn::token::{DistanceKind, Keyword, TokenKind, VectorTypeKind},
|
||||
};
|
||||
use phf::{phf_map, phf_set};
|
||||
use unicase::UniCase;
|
||||
|
@ -167,6 +165,7 @@ pub(crate) static KEYWORDS: phf::Map<UniCase<&'static str>, TokenKind> = phf_map
|
|||
UniCase::ascii("ONLY") => TokenKind::Keyword(Keyword::Only),
|
||||
UniCase::ascii("OPTION") => TokenKind::Keyword(Keyword::Option),
|
||||
UniCase::ascii("ORDER") => TokenKind::Keyword(Keyword::Order),
|
||||
UniCase::ascii("ORIGINAL") => TokenKind::Keyword(Keyword::Original),
|
||||
UniCase::ascii("PARALLEL") => TokenKind::Keyword(Keyword::Parallel),
|
||||
UniCase::ascii("PARAM") => TokenKind::Keyword(Keyword::Param),
|
||||
UniCase::ascii("PASSHASH") => TokenKind::Keyword(Keyword::Passhash),
|
||||
|
@ -366,6 +365,4 @@ pub(crate) static KEYWORDS: phf::Map<UniCase<&'static str>, TokenKind> = phf_map
|
|||
UniCase::ascii("I32") => TokenKind::VectorType(VectorTypeKind::I32),
|
||||
UniCase::ascii("I16") => TokenKind::VectorType(VectorTypeKind::I16),
|
||||
|
||||
// Change Feed keywords
|
||||
UniCase::ascii("ORIGINAL") => TokenKind::ChangeFeedInclude(ChangeFeedInclude::Original),
|
||||
};
|
||||
|
|
|
@ -1,13 +1,10 @@
|
|||
use crate::{
|
||||
sql::{Datetime, Duration, Regex, Uuid},
|
||||
syn::token::{Span, Token, TokenKind},
|
||||
};
|
||||
use std::time::Duration;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use thiserror::Error;
|
||||
|
||||
mod byte;
|
||||
mod char;
|
||||
mod datetime;
|
||||
mod duration;
|
||||
mod ident;
|
||||
mod js;
|
||||
pub mod keywords;
|
||||
|
@ -15,12 +12,14 @@ mod number;
|
|||
mod reader;
|
||||
mod strand;
|
||||
mod unicode;
|
||||
mod uuid;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
pub use reader::{BytesReader, CharError};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::syn::token::{Span, Token, TokenKind};
|
||||
|
||||
/// A error returned by the lexer when an invalid token is encountered.
|
||||
///
|
||||
|
@ -39,16 +38,6 @@ pub enum Error {
|
|||
InvalidUtf8,
|
||||
#[error("expected next character to be '{0}'")]
|
||||
ExpectedEnd(char),
|
||||
#[error("failed to lex date-time, {0}")]
|
||||
DateTime(#[from] datetime::Error),
|
||||
#[error("failed to lex uuid, {0}")]
|
||||
Uuid(#[from] uuid::Error),
|
||||
#[error("failed to lex duration, {0}")]
|
||||
Duration(#[from] duration::Error),
|
||||
#[error("failed to lex number, {0}")]
|
||||
Number(#[from] number::Error),
|
||||
#[error("failed to parse regex, {0}")]
|
||||
Regex(regex::Error),
|
||||
}
|
||||
|
||||
impl From<CharError> for Error {
|
||||
|
@ -78,16 +67,10 @@ pub struct Lexer<'a> {
|
|||
pub reader: BytesReader<'a>,
|
||||
/// The one past the last character of the previous token.
|
||||
last_offset: u32,
|
||||
/// The span of whitespace if it was read between two tokens.
|
||||
whitespace_span: Option<Span>,
|
||||
/// A buffer used to build the value of tokens which can't be read straight from the source.
|
||||
/// like for example strings with escape characters.
|
||||
scratch: String,
|
||||
|
||||
/// Allow the next parsed idents to be flexible, i.e. support idents which don't start with a
|
||||
/// number.
|
||||
pub flexible_ident: bool,
|
||||
|
||||
// below are a collection of storage for values produced by tokens.
|
||||
// For performance reasons we wan't to keep the tokens as small as possible.
|
||||
// As only some tokens have an additional value associated with them we don't store that value
|
||||
|
@ -103,8 +86,7 @@ pub struct Lexer<'a> {
|
|||
// actual number value to when the parser can decide on a format.
|
||||
pub string: Option<String>,
|
||||
pub duration: Option<Duration>,
|
||||
pub datetime: Option<Datetime>,
|
||||
pub regex: Option<Regex>,
|
||||
pub datetime: Option<DateTime<Utc>>,
|
||||
pub uuid: Option<Uuid>,
|
||||
pub error: Option<Error>,
|
||||
}
|
||||
|
@ -119,15 +101,12 @@ impl<'a> Lexer<'a> {
|
|||
Lexer {
|
||||
reader,
|
||||
last_offset: 0,
|
||||
whitespace_span: None,
|
||||
scratch: String::new(),
|
||||
flexible_ident: false,
|
||||
string: None,
|
||||
datetime: None,
|
||||
duration: None,
|
||||
regex: None,
|
||||
uuid: None,
|
||||
error: None,
|
||||
duration: None,
|
||||
datetime: None,
|
||||
uuid: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -137,13 +116,7 @@ impl<'a> Lexer<'a> {
|
|||
pub fn reset(&mut self) {
|
||||
self.last_offset = 0;
|
||||
self.scratch.clear();
|
||||
self.flexible_ident = false;
|
||||
self.whitespace_span = None;
|
||||
self.string = None;
|
||||
self.datetime = None;
|
||||
self.duration = None;
|
||||
self.regex = None;
|
||||
self.uuid = None;
|
||||
self.error = None;
|
||||
}
|
||||
|
||||
|
@ -159,30 +132,12 @@ impl<'a> Lexer<'a> {
|
|||
Lexer {
|
||||
reader,
|
||||
last_offset: 0,
|
||||
whitespace_span: None,
|
||||
scratch: self.scratch,
|
||||
flexible_ident: false,
|
||||
string: self.string,
|
||||
datetime: self.datetime,
|
||||
duration: self.duration,
|
||||
regex: self.regex,
|
||||
uuid: self.uuid,
|
||||
error: self.error,
|
||||
}
|
||||
}
|
||||
|
||||
/// return the whitespace of the last token buffered, either peeked or poped.
|
||||
pub fn whitespace_span(&self) -> Option<Span> {
|
||||
self.whitespace_span
|
||||
}
|
||||
|
||||
/// Used for seting the span of whitespace between tokens. Will extend the current whitespace
|
||||
/// if there already is one.
|
||||
fn set_whitespace_span(&mut self, span: Span) {
|
||||
if let Some(existing) = self.whitespace_span.as_mut() {
|
||||
*existing = existing.covers(span);
|
||||
} else {
|
||||
self.whitespace_span = Some(span);
|
||||
duration: self.duration,
|
||||
datetime: self.datetime,
|
||||
uuid: self.uuid,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -190,11 +145,6 @@ impl<'a> Lexer<'a> {
|
|||
///
|
||||
/// If the lexer is at the end the source it will always return the Eof token.
|
||||
pub fn next_token(&mut self) -> Token {
|
||||
self.whitespace_span = None;
|
||||
self.next_token_inner()
|
||||
}
|
||||
|
||||
fn next_token_inner(&mut self) -> Token {
|
||||
let Some(byte) = self.reader.next() else {
|
||||
return self.eof_token();
|
||||
};
|
||||
|
@ -219,13 +169,6 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Skip the last consumed bytes in the reader.
|
||||
///
|
||||
/// The bytes consumed before this point won't be part of the span.
|
||||
fn skip_offset(&mut self) {
|
||||
self.last_offset = self.reader.offset() as u32;
|
||||
}
|
||||
|
||||
/// Return an invalid token.
|
||||
fn invalid_token(&mut self, error: Error) -> Token {
|
||||
self.error = Some(error);
|
||||
|
@ -305,112 +248,6 @@ impl<'a> Lexer<'a> {
|
|||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex a single `"` character with possible leading whitespace.
|
||||
///
|
||||
/// Used for parsing record strings.
|
||||
pub fn lex_record_string_close(&mut self) -> Token {
|
||||
loop {
|
||||
let Some(byte) = self.reader.next() else {
|
||||
return self.invalid_token(Error::UnexpectedEof);
|
||||
};
|
||||
match byte {
|
||||
unicode::byte::CR
|
||||
| unicode::byte::FF
|
||||
| unicode::byte::LF
|
||||
| unicode::byte::SP
|
||||
| unicode::byte::VT
|
||||
| unicode::byte::TAB => {
|
||||
self.eat_whitespace();
|
||||
continue;
|
||||
}
|
||||
b'"' => {
|
||||
return self.finish_token(TokenKind::CloseRecordString {
|
||||
double: true,
|
||||
});
|
||||
}
|
||||
b'\'' => {
|
||||
return self.finish_token(TokenKind::CloseRecordString {
|
||||
double: false,
|
||||
});
|
||||
}
|
||||
b'-' => match self.reader.next() {
|
||||
Some(b'-') => {
|
||||
self.eat_single_line_comment();
|
||||
continue;
|
||||
}
|
||||
Some(x) => match self.reader.convert_to_char(x) {
|
||||
Ok(c) => return self.invalid_token(Error::UnexpectedCharacter(c)),
|
||||
Err(e) => return self.invalid_token(e.into()),
|
||||
},
|
||||
None => return self.invalid_token(Error::UnexpectedEof),
|
||||
},
|
||||
b'/' => match self.reader.next() {
|
||||
Some(b'*') => {
|
||||
if let Err(e) = self.eat_multi_line_comment() {
|
||||
return self.invalid_token(e);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
Some(b'/') => {
|
||||
self.eat_single_line_comment();
|
||||
continue;
|
||||
}
|
||||
Some(x) => match self.reader.convert_to_char(x) {
|
||||
Ok(c) => return self.invalid_token(Error::UnexpectedCharacter(c)),
|
||||
Err(e) => return self.invalid_token(e.into()),
|
||||
},
|
||||
None => return self.invalid_token(Error::UnexpectedEof),
|
||||
},
|
||||
b'#' => {
|
||||
self.eat_single_line_comment();
|
||||
continue;
|
||||
}
|
||||
x => match self.reader.convert_to_char(x) {
|
||||
Ok(c) => return self.invalid_token(Error::UnexpectedCharacter(c)),
|
||||
Err(e) => return self.invalid_token(e.into()),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex only a datetime without enclosing delimiters.
|
||||
///
|
||||
/// Used for reusing lexer lexing code for parsing datetimes. Should not be called during
|
||||
/// normal parsing.
|
||||
pub fn lex_only_datetime(&mut self) -> Result<Datetime, Error> {
|
||||
self.lex_datetime_raw_err().map_err(Error::DateTime)
|
||||
}
|
||||
|
||||
/// Lex only a duration.
|
||||
///
|
||||
/// Used for reusing lexer lexing code for parsing durations. Should not be used during normal
|
||||
/// parsing.
|
||||
pub fn lex_only_duration(&mut self) -> Result<Duration, Error> {
|
||||
match self.reader.next() {
|
||||
Some(x @ b'0'..=b'9') => {
|
||||
self.scratch.push(x as char);
|
||||
while let Some(x @ b'0'..=b'9') = self.reader.peek() {
|
||||
self.reader.next();
|
||||
self.scratch.push(x as char);
|
||||
}
|
||||
self.lex_duration_err().map_err(Error::Duration)
|
||||
}
|
||||
Some(x) => {
|
||||
let char = self.reader.convert_to_char(x)?;
|
||||
Err(Error::UnexpectedCharacter(char))
|
||||
}
|
||||
None => Err(Error::UnexpectedEof),
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex only a UUID.
|
||||
///
|
||||
/// Used for reusing lexer lexing code for parsing UUID's. Should not be used during normal
|
||||
/// parsing.
|
||||
pub fn lex_only_uuid(&mut self) -> Result<Uuid, Error> {
|
||||
Ok(self.lex_uuid_err_inner()?)
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Lexer<'_> {
|
||||
|
|
|
@ -1,277 +1,24 @@
|
|||
use crate::syn::{
|
||||
lexer::{unicode::U8Ext, Error as LexError, Lexer},
|
||||
token::{NumberKind, Token, TokenKind},
|
||||
};
|
||||
use std::mem;
|
||||
use thiserror::Error;
|
||||
use crate::syn::token::{Token, TokenKind};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub enum Error {
|
||||
#[error("invalid number suffix")]
|
||||
InvalidSuffix,
|
||||
#[error("expected atleast a single digit in the exponent")]
|
||||
DigitExpectedExponent,
|
||||
}
|
||||
use super::Lexer;
|
||||
|
||||
impl Lexer<'_> {
|
||||
pub fn finish_number_token(&mut self, kind: NumberKind) -> Token {
|
||||
let mut str = mem::take(&mut self.scratch);
|
||||
str.retain(|x| x != '_');
|
||||
self.string = Some(str);
|
||||
self.finish_token(TokenKind::Number(kind))
|
||||
}
|
||||
/// Lex only an integer.
|
||||
/// Use when a number can be followed immediatly by a `.` like in a model version.
|
||||
pub fn lex_only_integer(&mut self) -> Token {
|
||||
let Some(next) = self.reader.peek() else {
|
||||
return self.eof_token();
|
||||
};
|
||||
|
||||
// not a number, return a different token kind, for error reporting.
|
||||
if !next.is_ascii_digit() {
|
||||
return self.next_token();
|
||||
}
|
||||
|
||||
self.scratch.push(next as char);
|
||||
self.reader.next();
|
||||
|
||||
// eat all the ascii digits
|
||||
while let Some(x) = self.reader.peek() {
|
||||
if !x.is_ascii_digit() && x != b'_' {
|
||||
break;
|
||||
} else {
|
||||
self.scratch.push(x as char);
|
||||
self.reader.next();
|
||||
}
|
||||
}
|
||||
|
||||
// test for a suffix.
|
||||
match self.reader.peek() {
|
||||
Some(b'd' | b'f') => {
|
||||
// not an integer but parse anyway for error reporting.
|
||||
return self.lex_suffix(false, false, false);
|
||||
}
|
||||
Some(x) if x.is_ascii_alphabetic() => return self.invalid_suffix_token(),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
self.finish_number_token(NumberKind::Integer)
|
||||
}
|
||||
|
||||
/// Lex a number.
|
||||
///
|
||||
/// Expects the digit which started the number as the start argument.
|
||||
pub fn lex_number(&mut self, start: u8) -> Token {
|
||||
debug_assert!(start.is_ascii_digit());
|
||||
debug_assert_eq!(self.scratch, "");
|
||||
self.scratch.push(start as char);
|
||||
loop {
|
||||
let Some(x) = self.reader.peek() else {
|
||||
return self.finish_number_token(NumberKind::Integer);
|
||||
};
|
||||
match x {
|
||||
b'0'..=b'9' => {
|
||||
// next digits.
|
||||
self.reader.next();
|
||||
self.scratch.push(x as char);
|
||||
}
|
||||
x @ (b'e' | b'E') => {
|
||||
// scientific notation
|
||||
self.reader.next();
|
||||
self.scratch.push(x as char);
|
||||
return self.lex_exponent(false);
|
||||
}
|
||||
b'.' => {
|
||||
// mantissa
|
||||
let backup = self.reader.offset();
|
||||
self.reader.next();
|
||||
let next = self.reader.peek();
|
||||
if let Some(b'0'..=b'9') = next {
|
||||
self.scratch.push('.');
|
||||
return self.lex_mantissa();
|
||||
} else {
|
||||
// indexing a number
|
||||
self.reader.backup(backup);
|
||||
return self.finish_number_token(NumberKind::Integer);
|
||||
}
|
||||
}
|
||||
b'f' | b'd' => return self.lex_suffix(false, false, false),
|
||||
// Oxc2 is the start byte of 'µ'
|
||||
0xc2 | b'n' | b'u' | b'm' | b'h' | b'w' | b'y' | b's' => {
|
||||
// duration suffix, switch to lexing duration.
|
||||
return self.lex_duration();
|
||||
}
|
||||
b'_' => {
|
||||
self.reader.next();
|
||||
}
|
||||
b'a'..=b'z' | b'A'..=b'Z' => {
|
||||
if self.flexible_ident {
|
||||
return self.lex_ident();
|
||||
} else {
|
||||
return self.invalid_suffix_token();
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return self.finish_number_token(NumberKind::Integer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn invalid_suffix_token(&mut self) -> Token {
|
||||
// eat the whole suffix.
|
||||
while let Some(x) = self.reader.peek() {
|
||||
if !x.is_ascii_alphanumeric() {
|
||||
break;
|
||||
}
|
||||
pub fn lex_digits(&mut self) -> Token {
|
||||
while let Some(b'0'..=b'9' | b'_') = self.reader.peek() {
|
||||
self.reader.next();
|
||||
}
|
||||
self.scratch.clear();
|
||||
self.invalid_token(LexError::Number(Error::InvalidSuffix))
|
||||
|
||||
self.finish_token(TokenKind::Digits)
|
||||
}
|
||||
|
||||
/// Lex a number suffix, either 'f' or 'dec'.
|
||||
fn lex_suffix(&mut self, had_mantissa: bool, had_exponent: bool, had_operator: bool) -> Token {
|
||||
match self.reader.peek() {
|
||||
Some(b'f') => {
|
||||
// float suffix
|
||||
self.reader.next();
|
||||
if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) {
|
||||
if self.flexible_ident && !had_mantissa && !had_operator {
|
||||
self.scratch.push('f');
|
||||
self.lex_ident()
|
||||
} else {
|
||||
self.invalid_suffix_token()
|
||||
}
|
||||
} else {
|
||||
let kind = if had_mantissa {
|
||||
NumberKind::FloatMantissa
|
||||
} else {
|
||||
NumberKind::Float
|
||||
};
|
||||
self.finish_number_token(kind)
|
||||
}
|
||||
}
|
||||
Some(b'd') => {
|
||||
// decimal suffix
|
||||
self.reader.next();
|
||||
let checkpoint = self.reader.offset();
|
||||
if !self.eat(b'e') {
|
||||
if !had_mantissa && !had_exponent && !had_operator {
|
||||
self.reader.backup(checkpoint - 1);
|
||||
return self.lex_duration();
|
||||
} else if !had_mantissa && self.flexible_ident {
|
||||
self.scratch.push('d');
|
||||
return self.lex_ident();
|
||||
} else {
|
||||
return self.invalid_suffix_token();
|
||||
}
|
||||
}
|
||||
|
||||
if !self.eat(b'c') {
|
||||
if self.flexible_ident {
|
||||
self.scratch.push('d');
|
||||
self.scratch.push('e');
|
||||
return self.lex_ident();
|
||||
} else {
|
||||
return self.invalid_suffix_token();
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) {
|
||||
self.invalid_suffix_token()
|
||||
} else {
|
||||
let kind = if had_exponent {
|
||||
NumberKind::DecimalExponent
|
||||
} else {
|
||||
NumberKind::Decimal
|
||||
};
|
||||
self.finish_number_token(kind)
|
||||
}
|
||||
}
|
||||
// Caller should ensure this is unreachable
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Lexes the mantissa of a number, i.e. `.8` in `1.8`
|
||||
pub fn lex_mantissa(&mut self) -> Token {
|
||||
loop {
|
||||
// lex_number already checks if there exists a digit after the dot.
|
||||
// So this will never fail the first iteration of the loop.
|
||||
let Some(x) = self.reader.peek() else {
|
||||
return self.finish_number_token(NumberKind::Mantissa);
|
||||
};
|
||||
match x {
|
||||
b'0'..=b'9' | b'_' => {
|
||||
// next digit.
|
||||
self.reader.next();
|
||||
self.scratch.push(x as char);
|
||||
}
|
||||
b'e' | b'E' => {
|
||||
// scientific notation
|
||||
self.reader.next();
|
||||
self.scratch.push('e');
|
||||
return self.lex_exponent(true);
|
||||
}
|
||||
b'f' | b'd' => return self.lex_suffix(true, false, false),
|
||||
b'a'..=b'z' | b'A'..=b'Z' => {
|
||||
// invalid token, random identifier characters immediately after number.
|
||||
self.scratch.clear();
|
||||
return self.invalid_suffix_token();
|
||||
}
|
||||
_ => {
|
||||
return self.finish_number_token(NumberKind::Mantissa);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lexes the exponent of a number, i.e. `e10` in `1.1e10`;
|
||||
fn lex_exponent(&mut self, had_mantissa: bool) -> Token {
|
||||
let mut had_operator = false;
|
||||
let mut peek = self.reader.peek();
|
||||
|
||||
if let Some(x @ b'-' | x @ b'+') = peek {
|
||||
had_operator = true;
|
||||
self.reader.next();
|
||||
self.scratch.push(x as char);
|
||||
peek = self.reader.peek();
|
||||
}
|
||||
|
||||
if let Some(x @ b'0'..=b'9') = peek {
|
||||
self.reader.next();
|
||||
self.scratch.push(x as char);
|
||||
} else {
|
||||
if self.flexible_ident && !had_mantissa && !had_operator {
|
||||
pub fn lex_exponent(&mut self, start: u8) -> Token {
|
||||
if let Some(x) = self.reader.peek() {
|
||||
if x.is_ascii_alphabetic() || x == b'_' {
|
||||
self.scratch.push(start as char);
|
||||
return self.lex_ident();
|
||||
}
|
||||
return self.invalid_token(LexError::Number(Error::DigitExpectedExponent));
|
||||
}
|
||||
};
|
||||
|
||||
loop {
|
||||
match self.reader.peek() {
|
||||
Some(x @ (b'0'..=b'9' | b'_')) => {
|
||||
self.reader.next();
|
||||
self.scratch.push(x as char);
|
||||
}
|
||||
Some(b'f' | b'd') => return self.lex_suffix(had_mantissa, true, had_operator),
|
||||
Some(x) if x.is_identifier_continue() => {
|
||||
if self.flexible_ident && !had_operator && !had_mantissa {
|
||||
return self.lex_ident();
|
||||
}
|
||||
return self.invalid_token(LexError::Number(Error::InvalidSuffix));
|
||||
}
|
||||
_ => {
|
||||
let kind = if had_mantissa {
|
||||
NumberKind::MantissaExponent
|
||||
} else {
|
||||
NumberKind::Exponent
|
||||
};
|
||||
return self.finish_number_token(kind);
|
||||
}
|
||||
}
|
||||
}
|
||||
self.finish_token(TokenKind::Exponent)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -77,7 +77,7 @@ impl<'a> BytesReader<'a> {
|
|||
self.remaining().first().copied()
|
||||
}
|
||||
#[inline]
|
||||
pub fn span(&self, span: Span) -> &[u8] {
|
||||
pub fn span(&self, span: Span) -> &'a [u8] {
|
||||
&self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)]
|
||||
}
|
||||
#[inline]
|
||||
|
|
|
@ -2,49 +2,45 @@
|
|||
|
||||
use std::mem;
|
||||
|
||||
use crate::syn::token::{Token, TokenKind};
|
||||
use crate::syn::token::{QouteKind, Token, TokenKind};
|
||||
|
||||
use super::{unicode::chars, Error, Lexer};
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Lex a plain strand with either single or double quotes.
|
||||
pub fn lex_strand(&mut self, is_double: bool) -> Token {
|
||||
match self.lex_strand_err(is_double) {
|
||||
Ok(x) => x,
|
||||
Err(x) => {
|
||||
self.scratch.clear();
|
||||
self.invalid_token(x)
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn relex_strand(&mut self, token: Token) -> Token {
|
||||
let is_double = match token.kind {
|
||||
TokenKind::Qoute(QouteKind::Plain) => false,
|
||||
TokenKind::Qoute(QouteKind::PlainDouble) => true,
|
||||
x => panic!("invalid token kind, '{:?}' is not allowed for re-lexing strands", x),
|
||||
};
|
||||
|
||||
self.last_offset = token.span.offset;
|
||||
|
||||
/// Lex a strand with either double or single quotes but return an result instead of a token.
|
||||
pub fn lex_strand_err(&mut self, is_double: bool) -> Result<Token, Error> {
|
||||
loop {
|
||||
let Some(x) = self.reader.next() else {
|
||||
self.scratch.clear();
|
||||
return Ok(self.eof_token());
|
||||
return self.eof_token();
|
||||
};
|
||||
|
||||
if x.is_ascii() {
|
||||
match x {
|
||||
b'\'' if !is_double => {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Strand));
|
||||
return self.finish_token(TokenKind::Strand);
|
||||
}
|
||||
b'"' if is_double => {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Strand));
|
||||
return self.finish_token(TokenKind::Strand);
|
||||
}
|
||||
b'\0' => {
|
||||
// null bytes not allowed
|
||||
return Err(Error::UnexpectedCharacter('\0'));
|
||||
return self.invalid_token(Error::UnexpectedCharacter('\0'));
|
||||
}
|
||||
b'\\' => {
|
||||
// Handle escape sequences.
|
||||
let Some(next) = self.reader.next() else {
|
||||
self.scratch.clear();
|
||||
return Ok(self.eof_token());
|
||||
return self.eof_token();
|
||||
};
|
||||
match next {
|
||||
b'\\' => {
|
||||
|
@ -78,17 +74,22 @@ impl<'a> Lexer<'a> {
|
|||
let char = if x.is_ascii() {
|
||||
x as char
|
||||
} else {
|
||||
self.reader.complete_char(x)?
|
||||
match self.reader.complete_char(x) {
|
||||
Ok(x) => x,
|
||||
Err(e) => return self.invalid_token(e.into()),
|
||||
}
|
||||
};
|
||||
return Err(Error::InvalidEscapeCharacter(char));
|
||||
return self.invalid_token(Error::InvalidEscapeCharacter(char));
|
||||
}
|
||||
}
|
||||
}
|
||||
x => self.scratch.push(x as char),
|
||||
}
|
||||
} else {
|
||||
let c = self.reader.complete_char(x)?;
|
||||
self.scratch.push(c);
|
||||
match self.reader.complete_char(x) {
|
||||
Ok(x) => self.scratch.push(x),
|
||||
Err(e) => return self.invalid_token(e.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
use chrono::{FixedOffset, NaiveDate, Offset, TimeZone, Utc};
|
||||
|
||||
use crate::syn::token::{t, NumberKind, TokenKind};
|
||||
use crate::syn::token::{t, DurationSuffix, TokenKind};
|
||||
|
||||
macro_rules! test_case(
|
||||
($source:expr => [$($token:expr),*$(,)?]) => {
|
||||
|
@ -40,23 +38,23 @@ fn operators() {
|
|||
|
||||
^
|
||||
"# => [
|
||||
t!("-"), t!("+"), t!("/"), t!("*"), t!("!"), t!("**"),
|
||||
t!("-"), t!(" "), t!("+"),t!(" "), t!("/"),t!(" "), t!("*"),t!(" "), t!("!"),t!(" "), t!("**"), t!(" "),
|
||||
|
||||
t!("<"), t!(">"), t!("<="), t!(">="), t!("<-"), t!("<->"), t!("->"),
|
||||
t!("<"), t!(" "), t!(">"), t!(" "), t!("<="), t!(" "), t!(">="), t!(" "), t!("<-"), t!(" "), t!("<->"), t!(" "), t!("->"), t!(" "),
|
||||
|
||||
t!("="), t!("=="), t!("-="), t!("+="), t!("!="), t!("+?="),
|
||||
t!("="), t!(" "), t!("=="), t!(" "), t!("-="), t!(" "), t!("+="), t!(" "), t!("!="), t!(" "), t!("+?="), t!(" "),
|
||||
|
||||
t!("?"), t!("??"), t!("?:"), t!("?~"), t!("?="),
|
||||
t!("?"), t!(" "), t!("??"), t!(" "), t!("?:"), t!(" "), t!("?~"), t!(" "), t!("?="), t!(" "),
|
||||
|
||||
t!("{"), t!("}"), t!("["), t!("]"), t!("("), t!(")"),
|
||||
t!("{"), t!(" "), t!("}"), t!(" "), t!("["), t!(" "), t!("]"), t!(" "), t!("("), t!(" "), t!(")"), t!(" "),
|
||||
|
||||
t!(";"), t!(","), t!("|"), t!("||"), TokenKind::Invalid, t!("&&"),
|
||||
t!(";"), t!(" "), t!(","), t!(" "), t!("|"), t!(" "), t!("||"), t!(" "), TokenKind::Invalid, t!(" "), t!("&&"), t!(" "),
|
||||
|
||||
t!("$"),
|
||||
t!("$"), t!(" "),
|
||||
|
||||
t!("."), t!(".."), t!("..."),
|
||||
t!("."), t!(" "), t!(".."), t!(" "), t!("..."), t!(" "),
|
||||
|
||||
TokenKind::Invalid
|
||||
TokenKind::Invalid, t!(" ")
|
||||
]
|
||||
}
|
||||
}
|
||||
|
@ -70,10 +68,10 @@ fn comments() {
|
|||
+ -- a third comment
|
||||
-
|
||||
" => [
|
||||
t!("+"),
|
||||
t!("-"),
|
||||
t!("+"),
|
||||
t!("-"),
|
||||
t!(" "), t!("+"), t!(" "), t!(" "), t!(" "),
|
||||
t!("-"), t!(" "), t!(" "), t!(" "),
|
||||
t!("+"), t!(" "), t!(" "), t!(" "),
|
||||
t!("-"), t!(" ")
|
||||
]
|
||||
}
|
||||
}
|
||||
|
@ -82,7 +80,7 @@ fn comments() {
|
|||
fn whitespace() {
|
||||
test_case! {
|
||||
"+= \t\n\r -=" => [
|
||||
t!("+="),
|
||||
t!("+="), t!(" "),
|
||||
t!("-="),
|
||||
]
|
||||
}
|
||||
|
@ -92,23 +90,22 @@ fn whitespace() {
|
|||
fn identifiers() {
|
||||
test_case! {
|
||||
r#"
|
||||
123123adwad +
|
||||
akdwkj +
|
||||
akdwkj1231312313123 +
|
||||
_a_k_d_wkj1231312313123 +
|
||||
____wdw____ +
|
||||
123123adwad+akdwkj+akdwkj1231312313123+_a_k_d_wkj1231312313123+____wdw____+
|
||||
"#
|
||||
=> [
|
||||
TokenKind::Invalid,
|
||||
t!(" "),
|
||||
TokenKind::Digits, // 123123
|
||||
TokenKind::Identifier, // adwad
|
||||
t!("+"),
|
||||
TokenKind::Identifier,
|
||||
TokenKind::Identifier, // akdwkj
|
||||
t!("+"),
|
||||
TokenKind::Identifier,
|
||||
TokenKind::Identifier, // akdwkj1231312313123
|
||||
t!("+"),
|
||||
TokenKind::Identifier,
|
||||
TokenKind::Identifier, // _a_k_d_wkj1231312313123
|
||||
t!("+"),
|
||||
TokenKind::Identifier,
|
||||
TokenKind::Identifier, // ____wdw____
|
||||
t!("+"),
|
||||
t!(" "),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
@ -116,20 +113,24 @@ fn identifiers() {
|
|||
#[test]
|
||||
fn numbers() {
|
||||
test_case! {
|
||||
r#"
|
||||
123123+32010230.123012031+33043030dec+33043030f+303e10dec+
|
||||
|
||||
"#
|
||||
r#"123123+32010230.123012031+33043030dec+33043030f+303e10dec+"#
|
||||
=> [
|
||||
TokenKind::Number(NumberKind::Integer),
|
||||
TokenKind::Digits, // 123123
|
||||
t!("+"),
|
||||
TokenKind::Number(NumberKind::Mantissa),
|
||||
TokenKind::Digits, // 32010230
|
||||
t!("."),
|
||||
TokenKind::Digits, // 123012031
|
||||
t!("+"),
|
||||
TokenKind::Number(NumberKind::Decimal),
|
||||
TokenKind::Digits, // 33043030
|
||||
t!("dec"),
|
||||
t!("+"),
|
||||
TokenKind::Number(NumberKind::Float),
|
||||
TokenKind::Digits, // 33043030
|
||||
t!("f"),
|
||||
t!("+"),
|
||||
TokenKind::Number(NumberKind::DecimalExponent),
|
||||
TokenKind::Digits, // 303
|
||||
TokenKind::Exponent , // e
|
||||
TokenKind::Digits, // 10
|
||||
t!("dec"),
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
@ -138,7 +139,8 @@ fn numbers() {
|
|||
"+123129decs+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits, // 123129
|
||||
TokenKind::Identifier, // decs
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
@ -147,7 +149,8 @@ fn numbers() {
|
|||
"+39349fs+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits, // 39349
|
||||
TokenKind::Identifier, // fs
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
@ -156,7 +159,8 @@ fn numbers() {
|
|||
"+394393df+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits, // 39349
|
||||
TokenKind::Identifier, // df
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
@ -165,7 +169,8 @@ fn numbers() {
|
|||
"+32932932def+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits, // 32932932
|
||||
TokenKind::Identifier, // def
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
@ -174,7 +179,8 @@ fn numbers() {
|
|||
"+329239329z+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits, // 329239329
|
||||
TokenKind::Identifier, // z
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
@ -189,53 +195,82 @@ fn duration() {
|
|||
1nsa+1ans+1aus+1usa+1ams+1msa+1am+1ma+1ah+1ha+1aw+1wa+1ay+1ya+1µsa
|
||||
"#
|
||||
=> [
|
||||
TokenKind::Duration,
|
||||
t!(" "),
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Nano),
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::MicroUnicode),
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Micro),
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Milli),
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Second),
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Minute),
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Hour),
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Week),
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Year),
|
||||
|
||||
TokenKind::Invalid,
|
||||
t!(" "),
|
||||
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Identifier,
|
||||
t!(" "),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
@ -244,238 +279,9 @@ fn duration() {
|
|||
fn keyword() {
|
||||
test_case! {
|
||||
r#"select SELECT sElEcT"# => [
|
||||
t!("SELECT"),
|
||||
t!("SELECT"),
|
||||
t!("SELECT"),t!(" "),
|
||||
t!("SELECT"),t!(" "),
|
||||
t!("SELECT"),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uuid() {
|
||||
let mut lexer =
|
||||
crate::syn::lexer::Lexer::new(r#" u"e72bee20-f49b-11ec-b939-0242ac120002" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {} @ ", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::Uuid);
|
||||
let uuid = lexer.uuid.take().unwrap();
|
||||
assert_eq!(uuid.0.to_string(), "e72bee20-f49b-11ec-b939-0242ac120002");
|
||||
|
||||
let mut lexer =
|
||||
crate::syn::lexer::Lexer::new(r#" u"b19bc00b-aa98-486c-ae37-c8e1c54295b1" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {} @ ", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::Uuid);
|
||||
let uuid = lexer.uuid.take().unwrap();
|
||||
assert_eq!(uuid.0.to_string(), "b19bc00b-aa98-486c-ae37-c8e1c54295b1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_just_date() {
|
||||
let mut lexer = crate::syn::lexer::Lexer::new(r#" d"2012-04-23" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {} @ ", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let expected_datetime = Utc
|
||||
.fix()
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23).unwrap().and_hms_nano_opt(0, 0, 0, 0).unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_zone_time() {
|
||||
let mut lexer = crate::syn::lexer::Lexer::new(r#" d"2020-01-01T00:00:00Z" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {} @ ", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let expected_datetime = Utc
|
||||
.fix()
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_nano_opt(0, 0, 0, 0).unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_with_time() {
|
||||
let mut lexer = crate::syn::lexer::Lexer::new(r#" d"2012-04-23T18:25:43Z" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {} @ ", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let expected_datetime = Utc
|
||||
.fix()
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23).unwrap().and_hms_nano_opt(18, 25, 43, 0).unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_nanos() {
|
||||
let mut lexer = crate::syn::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.5631Z" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {} @ ", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let expected_datetime = Utc
|
||||
.fix()
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||
.unwrap()
|
||||
.and_hms_nano_opt(18, 25, 43, 563_100_000)
|
||||
.unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_timezone_utc() {
|
||||
let mut lexer =
|
||||
crate::syn::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.0000511Z" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {}", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let expected_datetime = Utc
|
||||
.fix()
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||
.unwrap()
|
||||
.and_hms_nano_opt(18, 25, 43, 51_100)
|
||||
.unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_timezone_pacific() {
|
||||
let mut lexer =
|
||||
crate::syn::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.511-08:00" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {}", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let offset = FixedOffset::west_opt(8 * 3600).unwrap();
|
||||
let expected_datetime = offset
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||
.unwrap()
|
||||
.and_hms_nano_opt(18, 25, 43, 511_000_000)
|
||||
.unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_timezone_pacific_partial() {
|
||||
let mut lexer =
|
||||
crate::syn::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.511+08:30" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {}", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let offset = FixedOffset::east_opt(8 * 3600 + 30 * 60).unwrap();
|
||||
let expected_datetime = offset
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||
.unwrap()
|
||||
.and_hms_nano_opt(18, 25, 43, 511_000_000)
|
||||
.unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_timezone_utc_nanoseconds() {
|
||||
let mut lexer =
|
||||
crate::syn::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.5110000Z" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {}", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let offset = Utc.fix();
|
||||
let expected_datetime = offset
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||
.unwrap()
|
||||
.and_hms_nano_opt(18, 25, 43, 511_000_000)
|
||||
.unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_timezone_utc_sub_nanoseconds() {
|
||||
let mut lexer =
|
||||
crate::syn::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.0000511Z" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {}", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let offset = Utc.fix();
|
||||
let expected_datetime = offset
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||
.unwrap()
|
||||
.and_hms_nano_opt(18, 25, 43, 51_100)
|
||||
.unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
|
|
@ -40,29 +40,3 @@ pub mod byte {
|
|||
/// Space
|
||||
pub const SP: u8 = 0x20;
|
||||
}
|
||||
|
||||
/// A trait extending u8 for adding some extra function.
|
||||
pub trait U8Ext {
|
||||
///. Returns if the u8 is the start of an identifier.
|
||||
fn is_identifier_start(&self) -> bool;
|
||||
|
||||
/// Returns if the u8 can start an identifier.
|
||||
fn is_number_start(&self) -> bool;
|
||||
|
||||
/// Returns if the u8 can continue an identifier after the first character.
|
||||
fn is_identifier_continue(&self) -> bool;
|
||||
}
|
||||
|
||||
impl U8Ext for u8 {
|
||||
fn is_identifier_start(&self) -> bool {
|
||||
matches!(self, b'a'..=b'z' | b'A'..=b'Z' | b'_')
|
||||
}
|
||||
|
||||
fn is_identifier_continue(&self) -> bool {
|
||||
matches!(self, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_')
|
||||
}
|
||||
|
||||
fn is_number_start(&self) -> bool {
|
||||
self.is_ascii_digit()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,125 +0,0 @@
|
|||
use crate::{
|
||||
sql::Uuid,
|
||||
syn::token::{Token, TokenKind},
|
||||
};
|
||||
|
||||
use super::{Error as LexError, Lexer};
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub enum Error {
|
||||
#[error("missing digits")]
|
||||
MissingDigits,
|
||||
#[error("digit was not in allowed range")]
|
||||
InvalidRange,
|
||||
#[error("expected uuid-strand to end")]
|
||||
ExpectedStrandEnd,
|
||||
#[error("missing a uuid separator")]
|
||||
MissingSeperator,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Lex a uuid strand with either double or single quotes.
|
||||
///
|
||||
/// Expects the first delimiter to already have been eaten.
|
||||
pub fn lex_uuid(&mut self, double: bool) -> Token {
|
||||
match self.lex_uuid_err(double) {
|
||||
Ok(x) => {
|
||||
debug_assert!(self.uuid.is_none());
|
||||
self.uuid = Some(x);
|
||||
self.finish_token(TokenKind::Uuid)
|
||||
}
|
||||
Err(_) => self.invalid_token(LexError::Uuid(Error::MissingDigits)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex a uuid strand with either double or single quotes but return an result instead of a
|
||||
/// token.
|
||||
///
|
||||
/// Expects the first delimiter to already have been eaten.
|
||||
pub fn lex_uuid_err(&mut self, double: bool) -> Result<Uuid, Error> {
|
||||
let uuid = self.lex_uuid_err_inner()?;
|
||||
|
||||
let end_char = if double {
|
||||
b'"'
|
||||
} else {
|
||||
b'\''
|
||||
};
|
||||
// closing strand character
|
||||
if !self.eat(end_char) {
|
||||
return Err(Error::ExpectedStrandEnd);
|
||||
}
|
||||
|
||||
Ok(uuid)
|
||||
}
|
||||
|
||||
/// Lex a uuid strand without delimiting quotes but return an result instead of a
|
||||
/// token.
|
||||
///
|
||||
/// Expects the first delimiter to already have been eaten.
|
||||
pub fn lex_uuid_err_inner(&mut self) -> Result<Uuid, Error> {
|
||||
let start = self.reader.offset();
|
||||
|
||||
if !self.lex_hex(8) {
|
||||
return Err(Error::MissingDigits);
|
||||
}
|
||||
|
||||
if !self.eat(b'-') {
|
||||
return Err(Error::MissingSeperator);
|
||||
}
|
||||
|
||||
if !self.lex_hex(4) {
|
||||
return Err(Error::MissingDigits);
|
||||
}
|
||||
|
||||
if !self.eat(b'-') {
|
||||
return Err(Error::MissingSeperator);
|
||||
}
|
||||
|
||||
if !self.eat_when(|x| (b'1'..=b'8').contains(&x)) {
|
||||
if self.reader.peek().map(|x| x.is_ascii_digit()).unwrap_or(false) {
|
||||
// byte was an ascii digit but not in the valid range.
|
||||
return Err(Error::InvalidRange);
|
||||
}
|
||||
return Err(Error::MissingDigits);
|
||||
};
|
||||
|
||||
if !self.lex_hex(3) {
|
||||
return Err(Error::MissingDigits);
|
||||
}
|
||||
|
||||
if !self.eat(b'-') {
|
||||
return Err(Error::MissingSeperator);
|
||||
}
|
||||
|
||||
if !self.lex_hex(4) {
|
||||
return Err(Error::MissingDigits);
|
||||
}
|
||||
|
||||
if !self.eat(b'-') {
|
||||
return Err(Error::MissingSeperator);
|
||||
}
|
||||
|
||||
if !self.lex_hex(12) {
|
||||
return Err(Error::MissingDigits);
|
||||
}
|
||||
|
||||
let end = self.reader.offset();
|
||||
// The lexer ensures that the section of bytes is valid utf8 so this should never panic.
|
||||
let uuid_str = std::str::from_utf8(&self.reader.full()[start..end]).unwrap();
|
||||
// The lexer ensures that the bytes are a valid uuid so this should never panic.
|
||||
Ok(Uuid(uuid::Uuid::try_from(uuid_str).unwrap()))
|
||||
}
|
||||
|
||||
/// lexes a given amount of hex characters. returns true if the lexing was successfull, false
|
||||
/// otherwise.
|
||||
pub fn lex_hex(&mut self, amount: u8) -> bool {
|
||||
for _ in 0..amount {
|
||||
if !self.eat_when(|x| x.is_ascii_hexdigit()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
|
@ -1,5 +1,3 @@
|
|||
#![allow(dead_code)]
|
||||
|
||||
//! Module containing the implementation of the surrealql tokens, lexer, and parser.
|
||||
|
||||
use crate::{
|
||||
|
@ -21,8 +19,7 @@ pub trait Parse<T> {
|
|||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
use lexer::Lexer;
|
||||
use parser::{ParseError, ParseErrorKind, Parser};
|
||||
use parser::Parser;
|
||||
use reblessive::Stack;
|
||||
|
||||
/// Takes a string and returns if it could be a reserved keyword in certain contexts.
|
||||
|
@ -134,26 +131,16 @@ pub fn idiom(input: &str) -> Result<Idiom, Error> {
|
|||
/// Parse a datetime without enclosing delimiters from a string.
|
||||
pub fn datetime_raw(input: &str) -> Result<Datetime, Error> {
|
||||
debug!("parsing datetime, input = {input}");
|
||||
let mut lexer = Lexer::new(input.as_bytes());
|
||||
lexer
|
||||
.lex_datetime_raw_err()
|
||||
.map_err(|e| {
|
||||
ParseError::new(
|
||||
ParseErrorKind::InvalidToken(lexer::Error::DateTime(e)),
|
||||
lexer.current_span(),
|
||||
)
|
||||
})
|
||||
.map_err(|e| e.render_on(input))
|
||||
.map_err(Error::InvalidQuery)
|
||||
let mut parser = Parser::new(input.as_bytes());
|
||||
parser.parse_inner_datetime().map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
||||
/// Parse a duration from a string.
|
||||
pub fn duration(input: &str) -> Result<Duration, Error> {
|
||||
debug!("parsing duration, input = {input}");
|
||||
let mut lexer = Lexer::new(input.as_bytes());
|
||||
lexer
|
||||
.lex_only_duration()
|
||||
.map_err(|e| ParseError::new(ParseErrorKind::InvalidToken(e), lexer.current_span()))
|
||||
let mut parser = Parser::new(input.as_bytes());
|
||||
parser
|
||||
.next_token_value::<Duration>()
|
||||
.map_err(|e| e.render_on(input))
|
||||
.map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
|
|
@ -1,360 +0,0 @@
|
|||
use crate::{
|
||||
sql::{
|
||||
language::Language, Datetime, Duration, Ident, Number, Param, Regex, Strand, Table, Uuid,
|
||||
},
|
||||
syn::{
|
||||
parser::mac::unexpected,
|
||||
token::{t, NumberKind, Token, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
use super::{ParseError, ParseErrorKind, ParseResult, Parser};
|
||||
|
||||
/// A trait for parsing single tokens with a specific value.
|
||||
pub trait TokenValue: Sized {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self>;
|
||||
}
|
||||
|
||||
impl TokenValue for Ident {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
match token.kind {
|
||||
TokenKind::Keyword(_)
|
||||
| TokenKind::Language(_)
|
||||
| TokenKind::Algorithm(_)
|
||||
| TokenKind::Distance(_)
|
||||
| TokenKind::VectorType(_) => {
|
||||
let str = parser.lexer.reader.span(token.span);
|
||||
// Lexer should ensure that the token is valid utf-8
|
||||
let str = std::str::from_utf8(str).unwrap().to_owned();
|
||||
Ok(Ident(str))
|
||||
}
|
||||
TokenKind::Identifier => {
|
||||
let str = parser.lexer.string.take().unwrap();
|
||||
Ok(Ident(str))
|
||||
}
|
||||
x => {
|
||||
unexpected!(parser, x, "a identifier");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Table {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
parser.token_value::<Ident>(token).map(|x| Table(x.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for u64 {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
match token.kind {
|
||||
TokenKind::Number(NumberKind::Integer) => {
|
||||
let number = parser.lexer.string.take().unwrap().parse().map_err(|e| {
|
||||
ParseError::new(
|
||||
ParseErrorKind::InvalidInteger {
|
||||
error: e,
|
||||
},
|
||||
token.span,
|
||||
)
|
||||
})?;
|
||||
Ok(number)
|
||||
}
|
||||
x => unexpected!(parser, x, "an integer"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for u32 {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
match token.kind {
|
||||
TokenKind::Number(NumberKind::Integer) => {
|
||||
let number = parser.lexer.string.take().unwrap().parse().map_err(|e| {
|
||||
ParseError::new(
|
||||
ParseErrorKind::InvalidInteger {
|
||||
error: e,
|
||||
},
|
||||
token.span,
|
||||
)
|
||||
})?;
|
||||
Ok(number)
|
||||
}
|
||||
x => unexpected!(parser, x, "an integer"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for u16 {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
match token.kind {
|
||||
TokenKind::Number(NumberKind::Integer) => {
|
||||
let number = parser.lexer.string.take().unwrap().parse().map_err(|e| {
|
||||
ParseError::new(
|
||||
ParseErrorKind::InvalidInteger {
|
||||
error: e,
|
||||
},
|
||||
token.span,
|
||||
)
|
||||
})?;
|
||||
Ok(number)
|
||||
}
|
||||
x => unexpected!(parser, x, "an integer"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for u8 {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
match token.kind {
|
||||
TokenKind::Number(NumberKind::Integer) => {
|
||||
let number = parser.lexer.string.take().unwrap().parse().map_err(|e| {
|
||||
ParseError::new(
|
||||
ParseErrorKind::InvalidInteger {
|
||||
error: e,
|
||||
},
|
||||
token.span,
|
||||
)
|
||||
})?;
|
||||
Ok(number)
|
||||
}
|
||||
x => unexpected!(parser, x, "an integer"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for f32 {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
match token.kind {
|
||||
TokenKind::Number(NumberKind::NaN) => Ok(f32::NAN),
|
||||
TokenKind::Number(
|
||||
NumberKind::Integer
|
||||
| NumberKind::Float
|
||||
| NumberKind::FloatMantissa
|
||||
| NumberKind::Mantissa
|
||||
| NumberKind::MantissaExponent,
|
||||
) => {
|
||||
let number = parser.lexer.string.take().unwrap().parse().map_err(|e| {
|
||||
ParseError::new(
|
||||
ParseErrorKind::InvalidFloat {
|
||||
error: e,
|
||||
},
|
||||
token.span,
|
||||
)
|
||||
})?;
|
||||
Ok(number)
|
||||
}
|
||||
x => unexpected!(parser, x, "a floating point number"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for f64 {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
match token.kind {
|
||||
TokenKind::Number(NumberKind::NaN) => Ok(f64::NAN),
|
||||
TokenKind::Number(
|
||||
NumberKind::Integer
|
||||
| NumberKind::Float
|
||||
| NumberKind::FloatMantissa
|
||||
| NumberKind::Mantissa
|
||||
| NumberKind::MantissaExponent,
|
||||
) => {
|
||||
let number = parser.lexer.string.take().unwrap().parse().map_err(|e| {
|
||||
ParseError::new(
|
||||
ParseErrorKind::InvalidFloat {
|
||||
error: e,
|
||||
},
|
||||
token.span,
|
||||
)
|
||||
})?;
|
||||
Ok(number)
|
||||
}
|
||||
x => unexpected!(parser, x, "a floating point number"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Language {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
match token.kind {
|
||||
TokenKind::Language(x) => Ok(x),
|
||||
// `NO` can both be used as a keyword and as a language.
|
||||
t!("NO") => Ok(Language::Norwegian),
|
||||
x => unexpected!(parser, x, "a language"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Number {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
match token.kind {
|
||||
TokenKind::Number(NumberKind::NaN) => Ok(Number::Float(f64::NAN)),
|
||||
TokenKind::Number(NumberKind::Integer) => {
|
||||
let source = parser.lexer.string.take().unwrap();
|
||||
if let Ok(x) = source.parse() {
|
||||
return Ok(Number::Int(x));
|
||||
}
|
||||
// integer overflowed, fallback to floating point
|
||||
// As far as I can tell this will never fail for valid integers.
|
||||
let x = source.parse().map_err(|e| {
|
||||
ParseError::new(
|
||||
ParseErrorKind::InvalidFloat {
|
||||
error: e,
|
||||
},
|
||||
token.span,
|
||||
)
|
||||
})?;
|
||||
Ok(Number::Float(x))
|
||||
}
|
||||
TokenKind::Number(
|
||||
NumberKind::Mantissa
|
||||
| NumberKind::MantissaExponent
|
||||
| NumberKind::Float
|
||||
| NumberKind::FloatMantissa,
|
||||
) => {
|
||||
let source = parser.lexer.string.take().unwrap();
|
||||
// As far as I can tell this will never fail for valid integers.
|
||||
let x = source.parse().map_err(|e| {
|
||||
ParseError::new(
|
||||
ParseErrorKind::InvalidFloat {
|
||||
error: e,
|
||||
},
|
||||
token.span,
|
||||
)
|
||||
})?;
|
||||
Ok(Number::Float(x))
|
||||
}
|
||||
TokenKind::Number(NumberKind::Decimal) => {
|
||||
let source = parser.lexer.string.take().unwrap();
|
||||
// As far as I can tell this will never fail for valid integers.
|
||||
let x: rust_decimal::Decimal = source.parse().map_err(|error| {
|
||||
ParseError::new(
|
||||
ParseErrorKind::InvalidDecimal {
|
||||
error,
|
||||
},
|
||||
token.span,
|
||||
)
|
||||
})?;
|
||||
Ok(Number::Decimal(x))
|
||||
}
|
||||
TokenKind::Number(NumberKind::DecimalExponent) => {
|
||||
let source = parser.lexer.string.take().unwrap();
|
||||
// As far as I can tell this will never fail for valid integers.
|
||||
let x = rust_decimal::Decimal::from_scientific(&source).map_err(|error| {
|
||||
ParseError::new(
|
||||
ParseErrorKind::InvalidDecimal {
|
||||
error,
|
||||
},
|
||||
token.span,
|
||||
)
|
||||
})?;
|
||||
Ok(Number::Decimal(x))
|
||||
}
|
||||
x => unexpected!(parser, x, "a number"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Param {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
match token.kind {
|
||||
TokenKind::Parameter => {
|
||||
let param = parser.lexer.string.take().unwrap();
|
||||
Ok(Param(Ident(param)))
|
||||
}
|
||||
x => unexpected!(parser, x, "a parameter"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Duration {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
let TokenKind::Duration = token.kind else {
|
||||
unexpected!(parser, token.kind, "a duration")
|
||||
};
|
||||
let duration = parser.lexer.duration.take().expect("token data was already consumed");
|
||||
Ok(duration)
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Datetime {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
let TokenKind::DateTime = token.kind else {
|
||||
unexpected!(parser, token.kind, "a duration")
|
||||
};
|
||||
let datetime = parser.lexer.datetime.take().expect("token data was already consumed");
|
||||
Ok(datetime)
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Strand {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
match token.kind {
|
||||
TokenKind::Strand => {
|
||||
let strand = parser.lexer.string.take().unwrap();
|
||||
Ok(Strand(strand))
|
||||
}
|
||||
x => unexpected!(parser, x, "a strand"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Uuid {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
let TokenKind::Uuid = token.kind else {
|
||||
unexpected!(parser, token.kind, "a duration")
|
||||
};
|
||||
Ok(parser.lexer.uuid.take().expect("token data was already consumed"))
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Regex {
|
||||
fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult<Self> {
|
||||
let TokenKind::Regex = token.kind else {
|
||||
unexpected!(parser, token.kind, "a regex")
|
||||
};
|
||||
Ok(parser.lexer.regex.take().expect("token data was already consumed"))
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser<'_> {
|
||||
/// Parse a token value from the next token in the parser.
|
||||
pub fn next_token_value<V: TokenValue>(&mut self) -> ParseResult<V> {
|
||||
let next = self.peek();
|
||||
let res = V::from_token(self, next);
|
||||
if res.is_ok() {
|
||||
self.pop_peek();
|
||||
}
|
||||
res
|
||||
}
|
||||
|
||||
pub fn parse_signed_float(&mut self) -> ParseResult<f64> {
|
||||
let neg = self.eat(t!("-"));
|
||||
if !neg {
|
||||
self.eat(t!("+"));
|
||||
}
|
||||
let res: f64 = self.next_token_value()?;
|
||||
if neg {
|
||||
Ok(-res)
|
||||
} else {
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a token value from the given token.
|
||||
pub fn token_value<V: TokenValue>(&mut self, token: Token) -> ParseResult<V> {
|
||||
V::from_token(self, token)
|
||||
}
|
||||
|
||||
/// Returns if the peeked token can be a identifier.
|
||||
pub fn peek_can_be_ident(&mut self) -> bool {
|
||||
matches!(
|
||||
self.peek_kind(),
|
||||
TokenKind::Keyword(_)
|
||||
| TokenKind::Language(_)
|
||||
| TokenKind::Algorithm(_)
|
||||
| TokenKind::Distance(_)
|
||||
| TokenKind::VectorType(_)
|
||||
| TokenKind::Identifier
|
||||
)
|
||||
}
|
||||
}
|
192
core/src/syn/parser/basic/datetime.rs
Normal file
192
core/src/syn/parser/basic/datetime.rs
Normal file
|
@ -0,0 +1,192 @@
|
|||
use std::ops::RangeInclusive;
|
||||
|
||||
use chrono::{FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone, Utc};
|
||||
|
||||
use crate::{
|
||||
sql::Datetime,
|
||||
syn::{
|
||||
parser::{
|
||||
mac::{expected_whitespace, unexpected},
|
||||
ParseError, ParseErrorKind, ParseResult, Parser,
|
||||
},
|
||||
token::{t, DatetimeChars, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
impl Parser<'_> {
|
||||
pub fn parse_datetime(&mut self) -> ParseResult<Datetime> {
|
||||
let start = self.peek();
|
||||
let double = match start.kind {
|
||||
t!("d\"") => true,
|
||||
t!("d'") => false,
|
||||
x => unexpected!(self, x, "a datetime"),
|
||||
};
|
||||
|
||||
self.pop_peek();
|
||||
|
||||
let datetime = self.parse_inner_datetime()?;
|
||||
|
||||
if double {
|
||||
expected_whitespace!(self, t!("\""));
|
||||
} else {
|
||||
expected_whitespace!(self, t!("'"));
|
||||
}
|
||||
|
||||
Ok(datetime)
|
||||
}
|
||||
|
||||
/// Parses the datetimem without surrounding qoutes
|
||||
pub fn parse_inner_datetime(&mut self) -> ParseResult<Datetime> {
|
||||
let start_date = self.peek_whitespace().span;
|
||||
|
||||
let year_neg = self.eat_whitespace(t!("-"));
|
||||
if !year_neg {
|
||||
self.eat_whitespace(t!("+"));
|
||||
}
|
||||
|
||||
let year = self.parse_datetime_digits(4, 0..=9999)?;
|
||||
expected_whitespace!(self, t!("-"));
|
||||
let month = self.parse_datetime_digits(2, 1..=12)?;
|
||||
expected_whitespace!(self, t!("-"));
|
||||
let day = self.parse_datetime_digits(2, 1..=31)?;
|
||||
|
||||
let date_span = start_date.covers(self.last_span());
|
||||
|
||||
let year = if year_neg {
|
||||
-(year as i32)
|
||||
} else {
|
||||
year as i32
|
||||
};
|
||||
|
||||
let date = NaiveDate::from_ymd_opt(year, month as u32, day as u32)
|
||||
.ok_or_else(|| ParseError::new(ParseErrorKind::InvalidDatetimeDate, date_span))?;
|
||||
|
||||
if !self.eat(TokenKind::DatetimeChars(DatetimeChars::T)) {
|
||||
let time = NaiveTime::default();
|
||||
let date_time = NaiveDateTime::new(date, time);
|
||||
|
||||
let datetime =
|
||||
Utc.fix().from_local_datetime(&date_time).earliest().unwrap().with_timezone(&Utc);
|
||||
|
||||
return Ok(Datetime(datetime));
|
||||
}
|
||||
|
||||
let start_time = self.peek_whitespace().span;
|
||||
|
||||
let hour = self.parse_datetime_digits(2, 0..=24)?;
|
||||
expected_whitespace!(self, t!(":"));
|
||||
let minute = self.parse_datetime_digits(2, 0..=59)?;
|
||||
expected_whitespace!(self, t!(":"));
|
||||
let second = self.parse_datetime_digits(2, 0..=59)?;
|
||||
|
||||
let nanos = if self.eat_whitespace(t!(".")) {
|
||||
let digits_token = expected_whitespace!(self, TokenKind::Digits);
|
||||
let slice = self.span_bytes(digits_token.span);
|
||||
|
||||
if slice.len() > 9 {
|
||||
return Err(ParseError::new(
|
||||
ParseErrorKind::TooManyNanosecondsDatetime,
|
||||
digits_token.span,
|
||||
));
|
||||
}
|
||||
|
||||
let mut number = 0u32;
|
||||
for i in 0..9 {
|
||||
let Some(c) = slice.get(i).copied() else {
|
||||
// If digits are missing they are counted as 0's
|
||||
for _ in i..9 {
|
||||
number *= 10;
|
||||
}
|
||||
break;
|
||||
};
|
||||
number *= 10;
|
||||
number += (c - b'0') as u32;
|
||||
}
|
||||
|
||||
number
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let time_span = start_time.covers(self.last_span());
|
||||
|
||||
let time =
|
||||
NaiveTime::from_hms_nano_opt(hour as u32, minute as u32, second as u32, nanos)
|
||||
.ok_or_else(|| ParseError::new(ParseErrorKind::InvalidDatetimeTime, time_span))?;
|
||||
|
||||
let peek = self.peek_whitespace();
|
||||
let timezone = match peek.kind {
|
||||
t!("+") => self.parse_datetime_timezone(false)?,
|
||||
t!("-") => self.parse_datetime_timezone(true)?,
|
||||
TokenKind::DatetimeChars(DatetimeChars::Z) => {
|
||||
self.pop_peek();
|
||||
Utc.fix()
|
||||
}
|
||||
x => unexpected!(self, x, "`Z` or a timezone"),
|
||||
};
|
||||
|
||||
let date_time = NaiveDateTime::new(date, time);
|
||||
|
||||
let datetime = timezone
|
||||
.from_local_datetime(&date_time)
|
||||
.earliest()
|
||||
// this should never panic with a fixed offset.
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
Ok(Datetime(datetime))
|
||||
}
|
||||
|
||||
fn parse_datetime_timezone(&mut self, neg: bool) -> ParseResult<FixedOffset> {
|
||||
self.pop_peek();
|
||||
let hour = self.parse_datetime_digits(2, 0..=23)?;
|
||||
expected_whitespace!(self, t!(":"));
|
||||
let minute = self.parse_datetime_digits(2, 0..=59)?;
|
||||
|
||||
// The range checks on the digits ensure that the offset can't exceed 23:59 so below
|
||||
// unwraps won't panic.
|
||||
if neg {
|
||||
Ok(FixedOffset::west_opt((hour * 3600 + minute * 60) as i32).unwrap())
|
||||
} else {
|
||||
Ok(FixedOffset::east_opt((hour * 3600 + minute * 60) as i32).unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_datetime_digits(
|
||||
&mut self,
|
||||
len: usize,
|
||||
range: RangeInclusive<usize>,
|
||||
) -> ParseResult<usize> {
|
||||
let t = self.peek_whitespace();
|
||||
match t.kind {
|
||||
TokenKind::Digits => {}
|
||||
x => unexpected!(self, x, "datetime digits"),
|
||||
}
|
||||
|
||||
let digits_str = self.span_str(t.span);
|
||||
if digits_str.len() != len {
|
||||
return Err(ParseError::new(
|
||||
ParseErrorKind::InvalidDatetimePart {
|
||||
len,
|
||||
},
|
||||
t.span,
|
||||
));
|
||||
}
|
||||
|
||||
self.pop_peek();
|
||||
|
||||
// This should always parse as it has been validated by the lexer.
|
||||
let value = digits_str.parse().unwrap();
|
||||
|
||||
if !range.contains(&value) {
|
||||
return Err(ParseError::new(
|
||||
ParseErrorKind::OutrangeDatetimePart {
|
||||
range,
|
||||
},
|
||||
t.span,
|
||||
));
|
||||
}
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
}
|
192
core/src/syn/parser/basic/mod.rs
Normal file
192
core/src/syn/parser/basic/mod.rs
Normal file
|
@ -0,0 +1,192 @@
|
|||
use crate::{
|
||||
sql::{language::Language, Datetime, Duration, Ident, Param, Regex, Strand, Table, Uuid},
|
||||
syn::{
|
||||
parser::{mac::unexpected, ParseError, ParseErrorKind, ParseResult, Parser},
|
||||
token::{t, QouteKind, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
mod datetime;
|
||||
mod number;
|
||||
mod uuid;
|
||||
|
||||
/// A trait for parsing single tokens with a specific value.
|
||||
pub trait TokenValue: Sized {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self>;
|
||||
}
|
||||
|
||||
impl TokenValue for Ident {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
match parser.glue_ident(false)?.kind {
|
||||
TokenKind::Identifier => {
|
||||
parser.pop_peek();
|
||||
let str = parser.lexer.string.take().unwrap();
|
||||
Ok(Ident(str))
|
||||
}
|
||||
TokenKind::Keyword(_) | TokenKind::Language(_) | TokenKind::Algorithm(_) => {
|
||||
let s = parser.pop_peek().span;
|
||||
Ok(Ident(parser.span_str(s).to_owned()))
|
||||
}
|
||||
x => {
|
||||
unexpected!(parser, x, "an identifier");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Table {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
parser.next_token_value::<Ident>().map(|x| Table(x.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Language {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
match parser.peek_kind() {
|
||||
TokenKind::Language(x) => {
|
||||
parser.pop_peek();
|
||||
Ok(x)
|
||||
}
|
||||
// `NO` can both be used as a keyword and as a language.
|
||||
t!("NO") => {
|
||||
parser.pop_peek();
|
||||
Ok(Language::Norwegian)
|
||||
}
|
||||
x => unexpected!(parser, x, "a language"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Param {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
match parser.peek_kind() {
|
||||
TokenKind::Parameter => {
|
||||
parser.pop_peek();
|
||||
let param = parser.lexer.string.take().unwrap();
|
||||
Ok(Param(Ident(param)))
|
||||
}
|
||||
x => unexpected!(parser, x, "a parameter"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Duration {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
match parser.glue_duration()?.kind {
|
||||
TokenKind::Duration => {
|
||||
parser.pop_peek();
|
||||
Ok(Duration(parser.lexer.duration.unwrap()))
|
||||
}
|
||||
x => unexpected!(parser, x, "a duration"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Datetime {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
parser.parse_datetime()
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Strand {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
let token = parser.peek();
|
||||
match token.kind {
|
||||
TokenKind::Qoute(QouteKind::Plain | QouteKind::PlainDouble) => {
|
||||
parser.pop_peek();
|
||||
let t = parser.lexer.relex_strand(token);
|
||||
let TokenKind::Strand = t.kind else {
|
||||
unexpected!(parser, t.kind, "a strand")
|
||||
};
|
||||
Ok(Strand(parser.lexer.string.take().unwrap()))
|
||||
}
|
||||
TokenKind::Strand => {
|
||||
parser.pop_peek();
|
||||
Ok(Strand(parser.lexer.string.take().unwrap()))
|
||||
}
|
||||
x => unexpected!(parser, x, "a strand"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Uuid {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
parser.parse_uuid()
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Regex {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
match parser.peek().kind {
|
||||
t!("/") => {
|
||||
let pop = parser.pop_peek();
|
||||
assert!(!parser.has_peek());
|
||||
let token = parser.lexer.relex_regex(pop);
|
||||
let mut span = token.span;
|
||||
|
||||
// remove the starting and ending `/` characters.
|
||||
span.offset += 1;
|
||||
span.len -= 2;
|
||||
|
||||
let regex = parser
|
||||
.span_str(span)
|
||||
.parse()
|
||||
.map_err(|e| ParseError::new(ParseErrorKind::InvalidRegex(e), token.span))?;
|
||||
Ok(regex)
|
||||
}
|
||||
x => unexpected!(parser, x, "a regex"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Parser<'_> {
|
||||
/// Parse a token value from the next token in the parser.
|
||||
pub fn next_token_value<V: TokenValue>(&mut self) -> ParseResult<V> {
|
||||
V::from_token(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
#[test]
|
||||
fn identifiers() {
|
||||
use crate::sql;
|
||||
|
||||
fn assert_ident_parses_correctly(ident: &str) {
|
||||
use crate::syn::Parser;
|
||||
use reblessive::Stack;
|
||||
|
||||
let mut parser = Parser::new(ident.as_bytes());
|
||||
let mut stack = Stack::new();
|
||||
let r = stack
|
||||
.enter(|ctx| async move { parser.parse_query(ctx).await })
|
||||
.finish()
|
||||
.expect(&format!("failed on {}", ident));
|
||||
|
||||
assert_eq!(
|
||||
r,
|
||||
sql::Query(sql::Statements(vec![sql::Statement::Value(sql::Value::Idiom(
|
||||
sql::Idiom(vec![sql::Part::Field(sql::Ident(ident.to_string()))])
|
||||
))]))
|
||||
)
|
||||
}
|
||||
|
||||
assert_ident_parses_correctly("select123");
|
||||
|
||||
assert_ident_parses_correctly("e123");
|
||||
|
||||
assert_ident_parses_correctly("dec123");
|
||||
assert_ident_parses_correctly("f123");
|
||||
|
||||
assert_ident_parses_correctly("y123");
|
||||
assert_ident_parses_correctly("w123");
|
||||
assert_ident_parses_correctly("d123");
|
||||
assert_ident_parses_correctly("h123");
|
||||
assert_ident_parses_correctly("m123");
|
||||
assert_ident_parses_correctly("s123");
|
||||
assert_ident_parses_correctly("ms123");
|
||||
assert_ident_parses_correctly("us123");
|
||||
assert_ident_parses_correctly("ns123");
|
||||
}
|
||||
}
|
184
core/src/syn/parser/basic/number.rs
Normal file
184
core/src/syn/parser/basic/number.rs
Normal file
|
@ -0,0 +1,184 @@
|
|||
use rust_decimal::Decimal;
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
num::{ParseFloatError, ParseIntError},
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
sql::Number,
|
||||
syn::{
|
||||
parser::{mac::unexpected, ParseError, ParseErrorKind, ParseResult, Parser},
|
||||
token::{t, NumberKind, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
use super::TokenValue;
|
||||
|
||||
fn prepare_number_str(str: &str) -> Cow<str> {
|
||||
if str.contains('_') {
|
||||
Cow::Owned(str.chars().filter(|x| *x != '_').collect())
|
||||
} else {
|
||||
Cow::Borrowed(str)
|
||||
}
|
||||
}
|
||||
|
||||
/// Generic integer parsing method,
|
||||
/// works for all unsigned integers.
|
||||
fn parse_integer<I>(parser: &mut Parser<'_>) -> ParseResult<I>
|
||||
where
|
||||
I: FromStr<Err = ParseIntError>,
|
||||
{
|
||||
let mut peek = parser.peek();
|
||||
|
||||
if let t!("-") = peek.kind {
|
||||
unexpected!(parser,t!("-"),"an integer" => "only positive integers are allowed here")
|
||||
}
|
||||
|
||||
if let t!("+") = peek.kind {
|
||||
peek = parser.peek_whitespace();
|
||||
}
|
||||
|
||||
match peek.kind {
|
||||
TokenKind::Digits => {
|
||||
parser.pop_peek();
|
||||
assert!(!parser.has_peek());
|
||||
|
||||
let p = parser.peek_whitespace();
|
||||
match p.kind {
|
||||
t!(".") => {
|
||||
unexpected!(parser, p.kind, "an integer")
|
||||
}
|
||||
t!("dec") => {
|
||||
unexpected!(parser, p.kind, "an integer" => "decimal numbers not supported here")
|
||||
}
|
||||
x if Parser::tokenkind_continues_ident(x) => {
|
||||
unexpected!(parser, p.kind, "an integer")
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// remove the possible "f" number suffix and any '_' characters
|
||||
let res = prepare_number_str(parser.span_str(peek.span))
|
||||
.parse()
|
||||
.map_err(ParseErrorKind::InvalidInteger)
|
||||
.map_err(|e| ParseError::new(e, peek.span))?;
|
||||
Ok(res)
|
||||
}
|
||||
x => unexpected!(parser, x, "an integer"),
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for u64 {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
parse_integer(parser)
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for u32 {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
parse_integer(parser)
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for u16 {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
parse_integer(parser)
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for u8 {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
parse_integer(parser)
|
||||
}
|
||||
}
|
||||
|
||||
/// Generic float parsing method,
|
||||
/// works for both f32 and f64
|
||||
fn parse_float<F>(parser: &mut Parser<'_>) -> ParseResult<F>
|
||||
where
|
||||
F: FromStr<Err = ParseFloatError>,
|
||||
{
|
||||
let peek = parser.peek();
|
||||
// find initial digits
|
||||
match peek.kind {
|
||||
TokenKind::NaN => return Ok("NaN".parse().unwrap()),
|
||||
TokenKind::Digits | t!("+") | t!("-") => {}
|
||||
x => unexpected!(parser, x, "a floating point number"),
|
||||
};
|
||||
let float_token = parser.glue_float()?;
|
||||
match float_token.kind {
|
||||
TokenKind::Number(NumberKind::Float) => {
|
||||
parser.pop_peek();
|
||||
}
|
||||
x => unexpected!(parser, x, "a floating point number"),
|
||||
};
|
||||
|
||||
let span = parser.span_str(float_token.span);
|
||||
|
||||
// remove the possible "f" number suffix and any '_' characters
|
||||
prepare_number_str(span.strip_suffix('f').unwrap_or(span))
|
||||
.parse()
|
||||
.map_err(ParseErrorKind::InvalidFloat)
|
||||
.map_err(|e| ParseError::new(e, float_token.span))
|
||||
}
|
||||
|
||||
impl TokenValue for f32 {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
parse_float(parser)
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for f64 {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
parse_float(parser)
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Number {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
let number = parser.glue_number()?;
|
||||
let number_kind = match number.kind {
|
||||
TokenKind::NaN => {
|
||||
parser.pop_peek();
|
||||
return Ok(Number::Float(f64::NAN));
|
||||
}
|
||||
TokenKind::Number(x) => x,
|
||||
x => unexpected!(parser, x, "a number"),
|
||||
};
|
||||
|
||||
parser.pop_peek();
|
||||
let span = parser.span_str(number.span);
|
||||
|
||||
match number_kind {
|
||||
NumberKind::Decimal => {
|
||||
let str = prepare_number_str(span.strip_suffix("dec").unwrap_or(span));
|
||||
let decimal = if str.contains('e') {
|
||||
Decimal::from_scientific(str.as_ref()).map_err(|e| {
|
||||
ParseError::new(ParseErrorKind::InvalidDecimal(e), number.span)
|
||||
})?
|
||||
} else {
|
||||
Decimal::from_str(str.as_ref()).map_err(|e| {
|
||||
ParseError::new(ParseErrorKind::InvalidDecimal(e), number.span)
|
||||
})?
|
||||
};
|
||||
|
||||
Ok(Number::Decimal(decimal))
|
||||
}
|
||||
NumberKind::Float => {
|
||||
let float = prepare_number_str(span.strip_suffix('f').unwrap_or(span))
|
||||
.parse()
|
||||
.map_err(|e| ParseError::new(ParseErrorKind::InvalidFloat(e), number.span))?;
|
||||
|
||||
Ok(Number::Float(float))
|
||||
}
|
||||
NumberKind::Integer => {
|
||||
let integer = prepare_number_str(span.strip_suffix('f').unwrap_or(span))
|
||||
.parse()
|
||||
.map_err(|e| ParseError::new(ParseErrorKind::InvalidInteger(e), number.span))?;
|
||||
|
||||
Ok(Number::Int(integer))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
195
core/src/syn/parser/basic/uuid.rs
Normal file
195
core/src/syn/parser/basic/uuid.rs
Normal file
|
@ -0,0 +1,195 @@
|
|||
use crate::{
|
||||
sql::Uuid,
|
||||
syn::{
|
||||
parser::{
|
||||
mac::{expected_whitespace, unexpected},
|
||||
ParseError, ParseErrorKind, ParseResult, Parser,
|
||||
},
|
||||
token::{t, DurationSuffix, NumberSuffix, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
impl Parser<'_> {
|
||||
/// Parses a uuid strand.
|
||||
pub fn parse_uuid(&mut self) -> ParseResult<Uuid> {
|
||||
let quote_token = self.peek_whitespace();
|
||||
|
||||
let double = match quote_token.kind {
|
||||
t!("u\"") => true,
|
||||
t!("u'") => false,
|
||||
x => unexpected!(self, x, "a uuid"),
|
||||
};
|
||||
|
||||
self.pop_peek();
|
||||
|
||||
// number of bytes is 4-2-2-2-6
|
||||
|
||||
let mut uuid_buffer = [0u8; 16];
|
||||
|
||||
self.eat_uuid_hex(&mut uuid_buffer[0..4])?;
|
||||
|
||||
expected_whitespace!(self, t!("-"));
|
||||
|
||||
self.eat_uuid_hex(&mut uuid_buffer[4..6])?;
|
||||
|
||||
expected_whitespace!(self, t!("-"));
|
||||
|
||||
self.eat_uuid_hex(&mut uuid_buffer[6..8])?;
|
||||
|
||||
expected_whitespace!(self, t!("-"));
|
||||
|
||||
self.eat_uuid_hex(&mut uuid_buffer[8..10])?;
|
||||
|
||||
expected_whitespace!(self, t!("-"));
|
||||
|
||||
self.eat_uuid_hex(&mut uuid_buffer[10..16])?;
|
||||
|
||||
if double {
|
||||
expected_whitespace!(self, t!("\""));
|
||||
} else {
|
||||
expected_whitespace!(self, t!("'"));
|
||||
}
|
||||
|
||||
Ok(Uuid(uuid::Uuid::from_bytes(uuid_buffer)))
|
||||
}
|
||||
|
||||
/// Eats a uuid hex section, enough to fill the given buffer with bytes.
|
||||
fn eat_uuid_hex(&mut self, buffer: &mut [u8]) -> ParseResult<()> {
|
||||
// A function to covert a hex digit to its number representation.
|
||||
fn ascii_to_hex(b: u8) -> Option<u8> {
|
||||
if b.is_ascii_digit() {
|
||||
return Some(b - b'0');
|
||||
}
|
||||
|
||||
if (b'a'..=b'f').contains(&b) {
|
||||
return Some(b - (b'a' - 10));
|
||||
}
|
||||
|
||||
if (b'A'..=b'F').contains(&b) {
|
||||
return Some(b - (b'A' - 10));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
// the amounts of character required is twice the buffer len.
|
||||
// since every character is half a byte.
|
||||
let required_len = buffer.len() * 2;
|
||||
|
||||
// The next token should be digits or an identifier
|
||||
// If it is digits an identifier might be after it.
|
||||
let start_token = self.peek_whitespace();
|
||||
let mut cur = start_token;
|
||||
loop {
|
||||
let next = self.peek_whitespace();
|
||||
match next.kind {
|
||||
TokenKind::Identifier => {
|
||||
cur = self.pop_peek();
|
||||
break;
|
||||
}
|
||||
TokenKind::Exponent
|
||||
| TokenKind::Digits
|
||||
| TokenKind::DurationSuffix(DurationSuffix::Day)
|
||||
| TokenKind::NumberSuffix(NumberSuffix::Float) => {
|
||||
cur = self.pop_peek();
|
||||
}
|
||||
TokenKind::Language(_) | TokenKind::Keyword(_) => {
|
||||
// there are some keywords and languages keywords which could be part of the
|
||||
// hex section.
|
||||
if !self.span_bytes(next.span).iter().all(|x| x.is_ascii_hexdigit()) {
|
||||
unexpected!(self, TokenKind::Identifier, "UUID hex digits");
|
||||
}
|
||||
cur = self.pop_peek();
|
||||
break;
|
||||
}
|
||||
t!("-") | t!("\"") | t!("'") => break,
|
||||
_ => unexpected!(self, TokenKind::Identifier, "UUID hex digits"),
|
||||
}
|
||||
}
|
||||
|
||||
// Get the span that covered all eaten tokens.
|
||||
let digits_span = start_token.span.covers(cur.span);
|
||||
let digits_bytes = self.span_str(digits_span).as_bytes();
|
||||
|
||||
// for error handling, the incorrect hex character should be returned first, before
|
||||
// returning the not correct length for segment error even if both are valid.
|
||||
if !digits_bytes.iter().all(|x| x.is_ascii_hexdigit()) {
|
||||
return Err(ParseError::new(
|
||||
ParseErrorKind::Unexpected {
|
||||
found: TokenKind::Strand,
|
||||
expected: "UUID hex digits",
|
||||
},
|
||||
digits_span,
|
||||
));
|
||||
}
|
||||
|
||||
if digits_bytes.len() != required_len {
|
||||
return Err(ParseError::new(
|
||||
ParseErrorKind::InvalidUuidPart {
|
||||
len: required_len,
|
||||
},
|
||||
digits_span,
|
||||
));
|
||||
}
|
||||
|
||||
// write into the buffer
|
||||
for (i, b) in buffer.iter_mut().enumerate() {
|
||||
*b = ascii_to_hex(digits_bytes[i * 2]).unwrap() << 4
|
||||
| ascii_to_hex(digits_bytes[i * 2 + 1]).unwrap();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::syn::parser::Parser;
|
||||
|
||||
#[test]
|
||||
fn uuid_parsing() {
|
||||
fn assert_uuid_parses(s: &str) {
|
||||
let uuid_str = format!("u'{s}'");
|
||||
let mut parser = Parser::new(uuid_str.as_bytes());
|
||||
let uuid = parser.parse_uuid().unwrap();
|
||||
assert_eq!(uuid::Uuid::parse_str(s).unwrap(), *uuid);
|
||||
}
|
||||
|
||||
assert_uuid_parses("0531956f-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("0531956d-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("0531956e-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("0531956a-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("053195f1-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("053195d1-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("053195e1-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("053195a1-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("f0531951-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("d0531951-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("e0531951-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("a0531951-20ec-4575-bb68-3e6b49d813fa");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_uuid_characters() {
|
||||
let hex_characters =
|
||||
[b'0', b'a', b'b', b'c', b'd', b'e', b'f', b'A', b'B', b'C', b'D', b'E', b'F'];
|
||||
|
||||
let mut uuid_string: Vec<u8> = "u'0531956f-20ec-4575-bb68-3e6b49d813fa'".to_string().into();
|
||||
|
||||
fn assert_uuid_parses(s: &[u8]) {
|
||||
let mut parser = Parser::new(s);
|
||||
parser.parse_uuid().unwrap();
|
||||
}
|
||||
|
||||
for i in hex_characters.iter() {
|
||||
for j in hex_characters.iter() {
|
||||
for k in hex_characters.iter() {
|
||||
uuid_string[3] = *i;
|
||||
uuid_string[4] = *j;
|
||||
uuid_string[5] = *k;
|
||||
|
||||
assert_uuid_parses(&uuid_string)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,8 +1,11 @@
|
|||
use super::{ParseResult, Parser};
|
||||
use crate::{
|
||||
sql::{Constant, Function, Ident, Value},
|
||||
sql::{Constant, Function, Value},
|
||||
syn::{
|
||||
parser::{mac::expected, ParseError, ParseErrorKind},
|
||||
parser::{
|
||||
mac::{expected, unexpected},
|
||||
ParseError, ParseErrorKind,
|
||||
},
|
||||
token::{t, Span},
|
||||
},
|
||||
};
|
||||
|
@ -440,15 +443,16 @@ impl Parser<'_> {
|
|||
pub async fn parse_builtin(&mut self, stk: &mut Stk, start: Span) -> ParseResult<Value> {
|
||||
let mut last_span = start;
|
||||
while self.eat(t!("::")) {
|
||||
self.next_token_value::<Ident>()?;
|
||||
let t = self.glue_ident(false)?;
|
||||
if !t.kind.can_be_identifier() {
|
||||
unexpected!(self, t.kind, "an identifier")
|
||||
}
|
||||
self.pop_peek();
|
||||
last_span = self.last_span();
|
||||
}
|
||||
|
||||
let span = start.covers(last_span);
|
||||
let slice = self.lexer.reader.span(span);
|
||||
|
||||
// parser implementations guarentess that the slice is a valid utf8 string.
|
||||
let str = std::str::from_utf8(slice).unwrap();
|
||||
let str = self.span_str(span);
|
||||
|
||||
match PATHS.get_entry(&UniCase::ascii(str)) {
|
||||
Some((_, PathKind::Constant(x))) => Ok(Value::Constant(x.clone())),
|
||||
|
|
|
@ -7,6 +7,7 @@ use crate::syn::{
|
|||
use std::{
|
||||
fmt::Write,
|
||||
num::{ParseFloatError, ParseIntError},
|
||||
ops::RangeInclusive,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -48,17 +49,12 @@ pub enum ParseErrorKind {
|
|||
should_close: Span,
|
||||
},
|
||||
/// An error for parsing an integer
|
||||
InvalidInteger {
|
||||
error: ParseIntError,
|
||||
},
|
||||
InvalidInteger(ParseIntError),
|
||||
/// An error for parsing an float
|
||||
InvalidFloat {
|
||||
error: ParseFloatError,
|
||||
},
|
||||
InvalidFloat(ParseFloatError),
|
||||
/// An error for parsing an decimal.
|
||||
InvalidDecimal {
|
||||
error: rust_decimal::Error,
|
||||
},
|
||||
InvalidDecimal(rust_decimal::Error),
|
||||
InvalidIdent,
|
||||
DisallowedStatement {
|
||||
found: TokenKind,
|
||||
expected: TokenKind,
|
||||
|
@ -70,13 +66,27 @@ pub enum ParseErrorKind {
|
|||
InvalidPath {
|
||||
possibly: Option<&'static str>,
|
||||
},
|
||||
InvalidRegex(regex::Error),
|
||||
MissingField {
|
||||
field: Span,
|
||||
idiom: String,
|
||||
kind: MissingKind,
|
||||
},
|
||||
InvalidUuidPart {
|
||||
len: usize,
|
||||
},
|
||||
InvalidDatetimePart {
|
||||
len: usize,
|
||||
},
|
||||
OutrangeDatetimePart {
|
||||
range: RangeInclusive<usize>,
|
||||
},
|
||||
TooManyNanosecondsDatetime,
|
||||
InvalidDatetimeDate,
|
||||
InvalidDatetimeTime,
|
||||
ExceededObjectDepthLimit,
|
||||
ExceededQueryDepthLimit,
|
||||
DurationOverflow,
|
||||
NoWhitespace,
|
||||
}
|
||||
|
||||
|
@ -102,7 +112,7 @@ impl ParseError {
|
|||
|
||||
/// Create a rendered error from the string this error was generated from.
|
||||
pub fn render_on_inner(source: &str, kind: &ParseErrorKind, at: Span) -> RenderedError {
|
||||
match &kind {
|
||||
match kind {
|
||||
ParseErrorKind::Unexpected {
|
||||
found,
|
||||
expected,
|
||||
|
@ -208,9 +218,7 @@ impl ParseError {
|
|||
snippets: vec![snippet],
|
||||
}
|
||||
}
|
||||
ParseErrorKind::InvalidInteger {
|
||||
ref error,
|
||||
} => {
|
||||
ParseErrorKind::InvalidInteger(ref error) => {
|
||||
let text = format!("failed to parse integer, {error}");
|
||||
let locations = Location::range_of_span(source, at);
|
||||
let snippet = Snippet::from_source_location_range(source, locations, None);
|
||||
|
@ -219,9 +227,7 @@ impl ParseError {
|
|||
snippets: vec![snippet],
|
||||
}
|
||||
}
|
||||
ParseErrorKind::InvalidFloat {
|
||||
ref error,
|
||||
} => {
|
||||
ParseErrorKind::InvalidFloat(ref error) => {
|
||||
let text = format!("failed to parse floating point, {error}");
|
||||
let locations = Location::range_of_span(source, at);
|
||||
let snippet = Snippet::from_source_location_range(source, locations, None);
|
||||
|
@ -230,9 +236,7 @@ impl ParseError {
|
|||
snippets: vec![snippet],
|
||||
}
|
||||
}
|
||||
ParseErrorKind::InvalidDecimal {
|
||||
ref error,
|
||||
} => {
|
||||
ParseErrorKind::InvalidDecimal(ref error) => {
|
||||
let text = format!("failed to parse decimal number, {error}");
|
||||
let locations = Location::range_of_span(source, at);
|
||||
let snippet = Snippet::from_source_location_range(source, locations, None);
|
||||
|
@ -241,6 +245,15 @@ impl ParseError {
|
|||
snippets: vec![snippet],
|
||||
}
|
||||
}
|
||||
ParseErrorKind::InvalidRegex(ref error) => {
|
||||
let text = format!("failed to parse regex, {error}");
|
||||
let locations = Location::range_of_span(source, at);
|
||||
let snippet = Snippet::from_source_location_range(source, locations, None);
|
||||
RenderedError {
|
||||
text: text.to_string(),
|
||||
snippets: vec![snippet],
|
||||
}
|
||||
}
|
||||
ParseErrorKind::NoWhitespace => {
|
||||
let text = "Whitespace is dissallowed in this position";
|
||||
let locations = Location::range_of_span(source, at);
|
||||
|
@ -297,6 +310,96 @@ impl ParseError {
|
|||
snippets: vec![snippet_error, snippet_hint],
|
||||
}
|
||||
}
|
||||
ParseErrorKind::DurationOverflow => {
|
||||
let text = "Duration specified exceeds maximum allowed value";
|
||||
let locations = Location::range_of_span(source, at);
|
||||
let snippet = Snippet::from_source_location_range(source, locations, None);
|
||||
RenderedError {
|
||||
text: text.to_string(),
|
||||
snippets: vec![snippet],
|
||||
}
|
||||
}
|
||||
ParseErrorKind::InvalidIdent => {
|
||||
let text = "Duration specified exceeds maximum allowed value";
|
||||
let locations = Location::range_of_span(source, at);
|
||||
let snippet = Snippet::from_source_location_range(source, locations, None);
|
||||
RenderedError {
|
||||
text: text.to_string(),
|
||||
snippets: vec![snippet],
|
||||
}
|
||||
}
|
||||
ParseErrorKind::InvalidUuidPart {
|
||||
len,
|
||||
} => {
|
||||
let text = format!(
|
||||
"Uuid hex section not the correct length, needs to be {len} characters"
|
||||
);
|
||||
let locations = Location::range_of_span(source, at);
|
||||
let snippet = Snippet::from_source_location_range(source, locations, None);
|
||||
RenderedError {
|
||||
text,
|
||||
snippets: vec![snippet],
|
||||
}
|
||||
}
|
||||
ParseErrorKind::InvalidDatetimePart {
|
||||
len,
|
||||
} => {
|
||||
let text = format!(
|
||||
"Datetime digits section not the correct length, needs to be {len} characters"
|
||||
);
|
||||
let locations = Location::range_of_span(source, at);
|
||||
let snippet = Snippet::from_source_location_range(source, locations, None);
|
||||
RenderedError {
|
||||
text,
|
||||
snippets: vec![snippet],
|
||||
}
|
||||
}
|
||||
ParseErrorKind::OutrangeDatetimePart {
|
||||
range,
|
||||
} => {
|
||||
let text = format!(
|
||||
"Datetime digits not within valid range {}..={}",
|
||||
range.start(),
|
||||
range.end()
|
||||
);
|
||||
let locations = Location::range_of_span(source, at);
|
||||
let snippet = Snippet::from_source_location_range(source, locations, None);
|
||||
RenderedError {
|
||||
text,
|
||||
snippets: vec![snippet],
|
||||
}
|
||||
}
|
||||
ParseErrorKind::TooManyNanosecondsDatetime => {
|
||||
let text = "Too many digits in Datetime nanoseconds".to_owned();
|
||||
let locations = Location::range_of_span(source, at);
|
||||
let snippet = Snippet::from_source_location_range(
|
||||
source,
|
||||
locations,
|
||||
Some("Nanoseconds can at most be 9 characters"),
|
||||
);
|
||||
RenderedError {
|
||||
text,
|
||||
snippets: vec![snippet],
|
||||
}
|
||||
}
|
||||
ParseErrorKind::InvalidDatetimeDate => {
|
||||
let text = "Invalid Datetime date".to_owned();
|
||||
let locations = Location::range_of_span(source, at);
|
||||
let snippet = Snippet::from_source_location_range(source, locations, None);
|
||||
RenderedError {
|
||||
text,
|
||||
snippets: vec![snippet],
|
||||
}
|
||||
}
|
||||
ParseErrorKind::InvalidDatetimeTime => {
|
||||
let text = "Datetime time outside of valid time range".to_owned();
|
||||
let locations = Location::range_of_span(source, at);
|
||||
let snippet = Snippet::from_source_location_range(source, locations, None);
|
||||
RenderedError {
|
||||
text,
|
||||
snippets: vec![snippet],
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,9 +8,8 @@ use crate::sql::{value::TryNeg, Cast, Expression, Number, Operator, Value};
|
|||
use crate::syn::token::Token;
|
||||
use crate::syn::{
|
||||
parser::{mac::expected, ParseErrorKind, ParseResult, Parser},
|
||||
token::{t, NumberKind, TokenKind},
|
||||
token::{t, TokenKind},
|
||||
};
|
||||
use std::cmp::Ordering;
|
||||
|
||||
impl Parser<'_> {
|
||||
/// Parsers a generic value.
|
||||
|
@ -124,20 +123,47 @@ impl Parser<'_> {
|
|||
fn prefix_binding_power(&mut self, token: TokenKind) -> Option<((), u8)> {
|
||||
match token {
|
||||
t!("!") | t!("+") | t!("-") => Some(((), 19)),
|
||||
t!("<") if self.peek_token_at(1).kind != t!("FUTURE") => Some(((), 20)),
|
||||
t!("<") => {
|
||||
if self.peek_token_at(1).kind != t!("FUTURE") {
|
||||
Some(((), 20))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_prefix_op(&mut self, ctx: &mut Stk, min_bp: u8) -> ParseResult<Value> {
|
||||
const I64_ABS_MAX: u64 = 9223372036854775808;
|
||||
|
||||
let token = self.next();
|
||||
let token = self.peek();
|
||||
let operator = match token.kind {
|
||||
t!("+") => Operator::Add,
|
||||
t!("-") => Operator::Neg,
|
||||
t!("!") => Operator::Not,
|
||||
t!("+") => {
|
||||
// +123 is a single number token, so parse it as such
|
||||
let p = self.peek_whitespace_token_at(1);
|
||||
if matches!(p.kind, TokenKind::Digits) {
|
||||
return self.next_token_value::<Number>().map(Value::Number);
|
||||
}
|
||||
self.pop_peek();
|
||||
|
||||
Operator::Add
|
||||
}
|
||||
t!("-") => {
|
||||
// -123 is a single number token, so parse it as such
|
||||
let p = self.peek_whitespace_token_at(1);
|
||||
if matches!(p.kind, TokenKind::Digits) {
|
||||
return self.next_token_value::<Number>().map(Value::Number);
|
||||
}
|
||||
|
||||
self.pop_peek();
|
||||
|
||||
Operator::Neg
|
||||
}
|
||||
t!("!") => {
|
||||
self.pop_peek();
|
||||
Operator::Not
|
||||
}
|
||||
t!("<") => {
|
||||
self.pop_peek();
|
||||
let kind = self.parse_kind(ctx, token.span).await?;
|
||||
let value = ctx.run(|ctx| self.pratt_parse_expr(ctx, min_bp)).await?;
|
||||
let cast = Cast(kind, value);
|
||||
|
@ -147,29 +173,6 @@ impl Parser<'_> {
|
|||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
// HACK: The way we handle numbers in the parser has one downside: We can't parse i64::MIN
|
||||
// directly.
|
||||
// The tokens [`-`, `1232`] are parsed independently where - is parsed as a unary operator then 1232
|
||||
// as a positive i64 integer. This results in a problem when 9223372036854775808 is the
|
||||
// positive integer. This is larger then i64::MAX so the parser fallsback to parsing a
|
||||
// floating point number. However -9223372036854775808 does fit in a i64 but the parser is,
|
||||
// when parsing the number, unaware that the number will be negative.
|
||||
// To handle this correctly we parse negation operator followed by an integer here so we can
|
||||
// make sure this specific case is handled correctly.
|
||||
if let Operator::Neg = operator {
|
||||
// parse -12301230 immediately as a negative number,
|
||||
if let TokenKind::Number(NumberKind::Integer) = self.peek_kind() {
|
||||
let token = self.next();
|
||||
let number = self.token_value::<u64>(token)?;
|
||||
let number = match number.cmp(&I64_ABS_MAX) {
|
||||
Ordering::Less => Number::Int(-(number as i64)),
|
||||
Ordering::Equal => Number::Int(i64::MIN),
|
||||
Ordering::Greater => self.token_value::<Number>(token)?.try_neg().unwrap(),
|
||||
};
|
||||
return Ok(Value::Number(number));
|
||||
}
|
||||
}
|
||||
|
||||
let v = ctx.run(|ctx| self.pratt_parse_expr(ctx, min_bp)).await?;
|
||||
|
||||
// HACK: For compatiblity with the old parser apply + and - operator immediately if the
|
||||
|
@ -195,17 +198,18 @@ impl Parser<'_> {
|
|||
})))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_knn(&mut self, token: Token) -> ParseResult<Operator> {
|
||||
let amount = self.next_token_value()?;
|
||||
let op = if self.eat(t!(",")) {
|
||||
let token = self.next();
|
||||
match &token.kind {
|
||||
TokenKind::Distance(k) => {
|
||||
match self.peek_kind(){
|
||||
TokenKind::Distance(ref k) => {
|
||||
self.pop_peek();
|
||||
let d = self.convert_distance(k).map(Some)?;
|
||||
Operator::Knn(amount, d)
|
||||
},
|
||||
TokenKind::Number(NumberKind::Integer) => {
|
||||
let ef = self.token_value(token)?;
|
||||
TokenKind::Digits | TokenKind::Number(_) => {
|
||||
let ef = self.next_token_value()?;
|
||||
Operator::Ann(amount, ef)
|
||||
}
|
||||
_ => {
|
||||
|
|
|
@ -3,8 +3,11 @@ use reblessive::Stk;
|
|||
use crate::{
|
||||
sql::{Function, Ident, Model},
|
||||
syn::{
|
||||
parser::mac::{expected, unexpected},
|
||||
token::{t, NumberKind, TokenKind},
|
||||
parser::{
|
||||
mac::{expected, expected_whitespace, unexpected},
|
||||
ParseError, ParseErrorKind,
|
||||
},
|
||||
token::{t, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -52,26 +55,38 @@ impl Parser<'_> {
|
|||
}
|
||||
let start = expected!(self, t!("<")).span;
|
||||
|
||||
let token = self.lexer.lex_only_integer();
|
||||
let major = match token.kind {
|
||||
TokenKind::Number(NumberKind::Integer) => self.token_value::<u64>(token)?,
|
||||
x => unexpected!(self, x, "a integer"),
|
||||
let token = self.next();
|
||||
let major: u32 = match token.kind {
|
||||
TokenKind::Digits => std::str::from_utf8(self.lexer.reader.span(token.span))
|
||||
.unwrap()
|
||||
.parse()
|
||||
.map_err(ParseErrorKind::InvalidInteger)
|
||||
.map_err(|e| ParseError::new(e, token.span))?,
|
||||
x => unexpected!(self, x, "an integer"),
|
||||
};
|
||||
|
||||
expected!(self, t!("."));
|
||||
expected_whitespace!(self, t!("."));
|
||||
|
||||
let token = self.lexer.lex_only_integer();
|
||||
let minor = match token.kind {
|
||||
TokenKind::Number(NumberKind::Integer) => self.token_value::<u64>(token)?,
|
||||
x => unexpected!(self, x, "a integer"),
|
||||
let token = self.next_whitespace();
|
||||
let minor: u32 = match token.kind {
|
||||
TokenKind::Digits => std::str::from_utf8(self.lexer.reader.span(token.span))
|
||||
.unwrap()
|
||||
.parse()
|
||||
.map_err(ParseErrorKind::InvalidInteger)
|
||||
.map_err(|e| ParseError::new(e, token.span))?,
|
||||
x => unexpected!(self, x, "an integer"),
|
||||
};
|
||||
|
||||
expected!(self, t!("."));
|
||||
expected_whitespace!(self, t!("."));
|
||||
|
||||
let token = self.lexer.lex_only_integer();
|
||||
let patch = match token.kind {
|
||||
TokenKind::Number(NumberKind::Integer) => self.token_value::<u64>(token)?,
|
||||
x => unexpected!(self, x, "a integer"),
|
||||
let token = self.next_whitespace();
|
||||
let patch: u32 = match token.kind {
|
||||
TokenKind::Digits => std::str::from_utf8(self.lexer.reader.span(token.span))
|
||||
.unwrap()
|
||||
.parse()
|
||||
.map_err(ParseErrorKind::InvalidInteger)
|
||||
.map_err(|e| ParseError::new(e, token.span))?,
|
||||
x => unexpected!(self, x, "an integer"),
|
||||
};
|
||||
|
||||
self.expect_closing_delimiter(t!(">"), start)?;
|
||||
|
|
|
@ -267,14 +267,22 @@ impl Parser<'_> {
|
|||
self.pop_peek();
|
||||
Part::Last
|
||||
}
|
||||
t!("123") => Part::Index(self.next_token_value()?),
|
||||
t!("+") | TokenKind::Digits | TokenKind::Number(_) => {
|
||||
Part::Index(self.next_token_value()?)
|
||||
}
|
||||
t!("-") => {
|
||||
if let TokenKind::Digits = self.peek_whitespace_token_at(1).kind {
|
||||
unexpected!(self, t!("-"),"$, * or a number" => "an index can't be negative");
|
||||
}
|
||||
unexpected!(self, t!("-"), "$, * or a number");
|
||||
}
|
||||
t!("?") | t!("WHERE") => {
|
||||
self.pop_peek();
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
Part::Where(value)
|
||||
}
|
||||
t!("$param") => Part::Value(Value::Param(self.next_token_value()?)),
|
||||
TokenKind::Strand => Part::Value(Value::Strand(self.next_token_value()?)),
|
||||
TokenKind::Qoute(_x) => Part::Value(Value::Strand(self.next_token_value()?)),
|
||||
_ => {
|
||||
let idiom = self.parse_basic_idiom()?;
|
||||
Part::Value(Value::Idiom(idiom))
|
||||
|
@ -318,10 +326,18 @@ impl Parser<'_> {
|
|||
self.pop_peek();
|
||||
Part::Last
|
||||
}
|
||||
t!("123") => {
|
||||
let number = self.token_value(token)?;
|
||||
TokenKind::Digits | t!("+") | TokenKind::Number(_) => {
|
||||
let number = self.next_token_value()?;
|
||||
Part::Index(number)
|
||||
}
|
||||
t!("-") => {
|
||||
let peek_digit = self.peek_whitespace_token_at(1);
|
||||
if let TokenKind::Digits = peek_digit.kind {
|
||||
let span = self.recent_span().covers(peek_digit.span);
|
||||
unexpected!(@ span, self, t!("-"),"$, * or a number" => "an index can't be negative");
|
||||
}
|
||||
unexpected!(self, t!("-"), "$, * or a number");
|
||||
}
|
||||
x => unexpected!(self, x, "$, * or a number"),
|
||||
};
|
||||
self.expect_closing_delimiter(t!("]"), token.span)?;
|
||||
|
@ -356,10 +372,18 @@ impl Parser<'_> {
|
|||
self.pop_peek();
|
||||
Part::All
|
||||
}
|
||||
t!("123") => {
|
||||
TokenKind::Digits | t!("+") | TokenKind::Number(_) => {
|
||||
let number = self.next_token_value()?;
|
||||
Part::Index(number)
|
||||
}
|
||||
t!("-") => {
|
||||
let peek_digit = self.peek_whitespace_token_at(1);
|
||||
if let TokenKind::Digits = peek_digit.kind {
|
||||
let span = self.recent_span().covers(peek_digit.span);
|
||||
unexpected!(@ span, self, t!("-"),"$, * or a number" => "an index can't be negative");
|
||||
}
|
||||
unexpected!(self, t!("-"), "$, * or a number");
|
||||
}
|
||||
x => unexpected!(self, x, "$, * or a number"),
|
||||
};
|
||||
self.expect_closing_delimiter(t!("]"), token.span)?;
|
||||
|
|
|
@ -6,34 +6,55 @@ use crate::{
|
|||
sql::{Array, Ident, Object, Strand, Value},
|
||||
syn::{
|
||||
parser::mac::expected,
|
||||
token::{t, Span, TokenKind},
|
||||
token::{t, QouteKind, Span, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
use super::{ParseResult, Parser};
|
||||
use super::{mac::unexpected, ParseResult, Parser};
|
||||
|
||||
impl Parser<'_> {
|
||||
pub async fn parse_json(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
let token = self.next();
|
||||
let token = self.peek();
|
||||
match token.kind {
|
||||
t!("NULL") => Ok(Value::Null),
|
||||
t!("true") => Ok(Value::Bool(true)),
|
||||
t!("false") => Ok(Value::Bool(false)),
|
||||
t!("{") => self.parse_json_object(ctx, token.span).await.map(Value::Object),
|
||||
t!("[") => self.parse_json_array(ctx, token.span).await.map(Value::Array),
|
||||
TokenKind::Duration => self.token_value(token).map(Value::Duration),
|
||||
TokenKind::DateTime => self.token_value(token).map(Value::Datetime),
|
||||
TokenKind::Strand => {
|
||||
t!("NULL") => {
|
||||
self.pop_peek();
|
||||
Ok(Value::Null)
|
||||
}
|
||||
t!("true") => {
|
||||
self.pop_peek();
|
||||
Ok(Value::Bool(true))
|
||||
}
|
||||
t!("false") => {
|
||||
self.pop_peek();
|
||||
Ok(Value::Bool(false))
|
||||
}
|
||||
t!("{") => {
|
||||
self.pop_peek();
|
||||
self.parse_json_object(ctx, token.span).await.map(Value::Object)
|
||||
}
|
||||
t!("[") => {
|
||||
self.pop_peek();
|
||||
self.parse_json_array(ctx, token.span).await.map(Value::Array)
|
||||
}
|
||||
TokenKind::Qoute(QouteKind::Plain | QouteKind::PlainDouble) => {
|
||||
let strand: Strand = self.next_token_value()?;
|
||||
if self.legacy_strands {
|
||||
self.parse_legacy_strand(ctx).await
|
||||
} else {
|
||||
Ok(Value::Strand(Strand(self.lexer.string.take().unwrap())))
|
||||
if let Some(x) = self.reparse_legacy_strand(ctx, &strand.0).await {
|
||||
return Ok(x);
|
||||
}
|
||||
}
|
||||
Ok(Value::Strand(strand))
|
||||
}
|
||||
TokenKind::Digits | TokenKind::Number(_) => {
|
||||
let peek = self.glue()?;
|
||||
match peek.kind {
|
||||
TokenKind::Duration => Ok(Value::Duration(self.next_token_value()?)),
|
||||
TokenKind::Number(_) => Ok(Value::Number(self.next_token_value()?)),
|
||||
x => unexpected!(self, x, "a number"),
|
||||
}
|
||||
}
|
||||
TokenKind::Number(_) => self.token_value(token).map(Value::Number),
|
||||
TokenKind::Uuid => self.token_value(token).map(Value::Uuid),
|
||||
_ => {
|
||||
let ident = self.token_value::<Ident>(token)?.0;
|
||||
let ident = self.next_token_value::<Ident>()?.0;
|
||||
self.parse_thing_from_ident(ctx, ident).await.map(Value::Thing)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,12 +1,21 @@
|
|||
/// A macro for requiring a certain token to be next, returning an error otherwise..
|
||||
macro_rules! unexpected {
|
||||
($parser:expr, $found:expr, $expected:expr) => {
|
||||
(@ $span:expr, $parser:expr, $found:expr, $expected:expr $(=> $explain:expr)?) => {{
|
||||
unexpected!(@@withSpan, $span, $parser,$found, $expected $(=> $explain)?)
|
||||
}};
|
||||
|
||||
($parser:expr, $found:expr, $expected:expr $(=> $explain:expr)?) => {{
|
||||
let span = $parser.recent_span();
|
||||
unexpected!(@@withSpan, span, $parser,$found, $expected $(=> $explain)?)
|
||||
}};
|
||||
|
||||
(@@withSpan, $span:expr, $parser:expr, $found:expr, $expected:expr) => {
|
||||
match $found {
|
||||
$crate::syn::token::TokenKind::Invalid => {
|
||||
let error = $parser.lexer.error.take().unwrap();
|
||||
return Err($crate::syn::parser::ParseError::new(
|
||||
$crate::syn::parser::ParseErrorKind::InvalidToken(error),
|
||||
$parser.recent_span(),
|
||||
$span
|
||||
));
|
||||
}
|
||||
$crate::syn::token::TokenKind::Eof => {
|
||||
|
@ -15,7 +24,7 @@ macro_rules! unexpected {
|
|||
$crate::syn::parser::ParseErrorKind::UnexpectedEof {
|
||||
expected,
|
||||
},
|
||||
$parser.recent_span(),
|
||||
$span
|
||||
));
|
||||
}
|
||||
x => {
|
||||
|
@ -25,7 +34,39 @@ macro_rules! unexpected {
|
|||
found: x,
|
||||
expected,
|
||||
},
|
||||
$parser.recent_span(),
|
||||
$span
|
||||
));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
(@@withSpan, $span:expr, $parser:expr, $found:expr, $expected:expr => $explain:expr) => {
|
||||
match $found {
|
||||
$crate::syn::token::TokenKind::Invalid => {
|
||||
let error = $parser.lexer.error.take().unwrap();
|
||||
return Err($crate::syn::parser::ParseError::new(
|
||||
$crate::syn::parser::ParseErrorKind::InvalidToken(error),
|
||||
$span
|
||||
));
|
||||
}
|
||||
$crate::syn::token::TokenKind::Eof => {
|
||||
let expected = $expected;
|
||||
return Err($crate::syn::parser::ParseError::new(
|
||||
$crate::syn::parser::ParseErrorKind::UnexpectedEof {
|
||||
expected,
|
||||
},
|
||||
$span
|
||||
));
|
||||
}
|
||||
x => {
|
||||
let expected = $expected;
|
||||
return Err($crate::syn::parser::ParseError::new(
|
||||
$crate::syn::parser::ParseErrorKind::UnexpectedExplain {
|
||||
found: x,
|
||||
expected,
|
||||
explain: $explain,
|
||||
},
|
||||
$span
|
||||
));
|
||||
}
|
||||
}
|
||||
|
@ -64,6 +105,38 @@ macro_rules! expected {
|
|||
}};
|
||||
}
|
||||
|
||||
/// A macro for indicating that the parser encountered an token which it didn't expect.
|
||||
macro_rules! expected_whitespace {
|
||||
($parser:expr, $($kind:tt)*) => {{
|
||||
let token = $parser.next_whitespace();
|
||||
match token.kind {
|
||||
$($kind)* => token,
|
||||
$crate::syn::parser::TokenKind::Invalid => {
|
||||
let error = $parser.lexer.error.take().unwrap();
|
||||
return Err($crate::syn::parser::ParseError::new(
|
||||
$crate::syn::parser::ParseErrorKind::InvalidToken(error),
|
||||
$parser.recent_span(),
|
||||
));
|
||||
}
|
||||
x => {
|
||||
let expected = $($kind)*.as_str();
|
||||
let kind = if let $crate::syn::token::TokenKind::Eof = x {
|
||||
$crate::syn::parser::ParseErrorKind::UnexpectedEof {
|
||||
expected,
|
||||
}
|
||||
} else {
|
||||
$crate::syn::parser::ParseErrorKind::Unexpected {
|
||||
found: x,
|
||||
expected,
|
||||
}
|
||||
};
|
||||
|
||||
return Err($crate::syn::parser::ParseError::new(kind, $parser.last_span()));
|
||||
}
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! test_parse {
|
||||
|
@ -149,40 +222,8 @@ macro_rules! enter_query_recursion {
|
|||
}};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! enter_flexible_ident{
|
||||
($name:ident = $this:expr => ($enabled:expr){ $($t:tt)* }) => {{
|
||||
struct Dropper<'a, 'b>(&'a mut $crate::syn::parser::Parser<'b>,bool);
|
||||
impl Drop for Dropper<'_, '_> {
|
||||
fn drop(&mut self) {
|
||||
self.0.lexer.flexible_ident = self.1;
|
||||
}
|
||||
}
|
||||
impl<'a> ::std::ops::Deref for Dropper<'_,'a>{
|
||||
type Target = $crate::syn::parser::Parser<'a>;
|
||||
|
||||
fn deref(&self) -> &Self::Target{
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ::std::ops::DerefMut for Dropper<'_,'a>{
|
||||
fn deref_mut(&mut self) -> &mut Self::Target{
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
let enabled = $this.lexer.flexible_ident;
|
||||
$this.lexer.flexible_ident = $enabled;
|
||||
#[allow(unused_mut)]
|
||||
let mut $name = Dropper($this,enabled);
|
||||
{
|
||||
$($t)*
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
pub(super) use expected;
|
||||
pub(super) use expected_whitespace;
|
||||
pub(super) use unexpected;
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
//! Most of the functions of the SurrealQL parser peek a token from the lexer and then decide to
|
||||
//! take a path depending on which token is next.
|
||||
//!
|
||||
//! # Implementation Details
|
||||
//!
|
||||
//! There are a bunch of common patterns for which this module has some confinence functions.
|
||||
//! - Whenever only one token can be next you should use the [`expected!`] macro. This macro
|
||||
//! ensures that the given token type is next and if not returns a parser error.
|
||||
|
@ -15,6 +17,39 @@
|
|||
//! - If a closing delimiting token is expected use [`Parser::expect_closing_delimiter`]. This
|
||||
//! function will raise an error if the expected delimiter isn't the next token. This error will
|
||||
//! also point to which delimiter the parser expected to be closed.
|
||||
//!
|
||||
//! ## Far Token Peek
|
||||
//!
|
||||
//! Occasionally the parser needs to check further ahead than peeking allows.
|
||||
//! This is done with the [`Parser::peek_token_at`] function. This function peeks a given number
|
||||
//! of tokens further than normal up to 3 tokens further.
|
||||
//!
|
||||
//! ## WhiteSpace Tokens
|
||||
//!
|
||||
//! The lexer produces whitespace tokens, these are tokens which are normally ignored in most place
|
||||
//! in the syntax as they have no bearing on the meaning of a statements. [`Parser::next`] and
|
||||
//! [`Parser::peek`] automatically skip over any whitespace tokens. However in some places, like
|
||||
//! in a record-id and when gluing tokens, these white-space tokens are required for correct
|
||||
//! parsing. In which case the function [`Parser::next_whitespace`] and others with `_whitespace`
|
||||
//! are used. These functions don't skip whitespace tokens. However these functions do not undo
|
||||
//! whitespace tokens which might have been skipped. Implementers must be carefull to not call a
|
||||
//! functions which requires whitespace tokens when they may already have been skipped.
|
||||
//!
|
||||
//! ## Token Gluing
|
||||
//!
|
||||
//! Tokens produces from the lexer are in some place more fine-grained then normal. Numbers,
|
||||
//! Identifiers and strand-like productions could be making up from multiple smaller tokens. A
|
||||
//! floating point number for example can be at most made up from a 3 digits token, a dot token,
|
||||
//! an exponent token and number suffix token and two `-` or `+` tokens. Whenever these tokens
|
||||
//! are required the parser calls a `glue_` method which will take the current peeked token and
|
||||
//! replace it with a more complex glued together token if possible.
|
||||
//!
|
||||
//! ## Use of reblessive
|
||||
//!
|
||||
//! This parser uses reblessive to be able to parse deep without overflowing the stack. This means
|
||||
//! that all functions which might recurse, i.e. in some paths can call themselves again, are async
|
||||
//! functions taking argument from reblessive to call recursive functions without using more stack
|
||||
//! with each depth.
|
||||
|
||||
use self::token_buffer::TokenBuffer;
|
||||
use crate::{
|
||||
|
@ -39,6 +74,7 @@ mod object;
|
|||
mod prime;
|
||||
mod stmt;
|
||||
mod thing;
|
||||
mod token;
|
||||
mod token_buffer;
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -99,16 +135,23 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Disallow a query to have objects deeper that limit.
|
||||
/// Arrays also count towards objects. So `[{foo: [] }]` would be 3 deep.
|
||||
pub fn with_object_recursion_limit(mut self, limit: usize) -> Self {
|
||||
self.object_recursion = limit;
|
||||
self
|
||||
}
|
||||
|
||||
/// Disallow a query from being deeper than the give limit.
|
||||
/// A query recurses when a statement contains another statement within itself.
|
||||
/// Examples are subquery and blocks like block statements and if statements and such.
|
||||
pub fn with_query_recursion_limit(mut self, limit: usize) -> Self {
|
||||
self.query_recursion = limit;
|
||||
self
|
||||
}
|
||||
|
||||
/// Parse strand like the old parser where a strand which looks like a UUID, Record-Id, Or a
|
||||
/// DateTime will be parsed as a date-time.
|
||||
pub fn with_allow_legacy_strand(mut self, value: bool) -> Self {
|
||||
self.legacy_strands = value;
|
||||
self
|
||||
|
@ -120,6 +163,8 @@ impl<'a> Parser<'a> {
|
|||
}
|
||||
|
||||
/// Set whether to allow record-id's which don't adheare to regular ident rules.
|
||||
/// Setting this to true will allow parsing of, for example, `foo:0bar`. This would be rejected
|
||||
/// by normal identifier rules as most identifiers can't start with a number.
|
||||
pub fn allow_fexible_record_id(&mut self, value: bool) {
|
||||
self.flexible_record_id = value;
|
||||
}
|
||||
|
@ -149,11 +194,31 @@ impl<'a> Parser<'a> {
|
|||
/// Returns the next token and advance the parser one token forward.
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn next(&mut self) -> Token {
|
||||
let res = loop {
|
||||
let res = self.token_buffer.pop().unwrap_or_else(|| self.lexer.next_token());
|
||||
if res.kind != TokenKind::WhiteSpace {
|
||||
break res;
|
||||
}
|
||||
};
|
||||
self.last_span = res.span;
|
||||
res
|
||||
}
|
||||
|
||||
/// Returns the next token and advance the parser one token forward.
|
||||
///
|
||||
/// This function is like next but returns whitespace tokens which are normally skipped
|
||||
#[allow(clippy::should_implement_trait)]
|
||||
pub fn next_whitespace(&mut self) -> Token {
|
||||
let res = self.token_buffer.pop().unwrap_or_else(|| self.lexer.next_token());
|
||||
self.last_span = res.span;
|
||||
res
|
||||
}
|
||||
|
||||
/// Returns if there is a token in the token buffer, meaning that a token was peeked.
|
||||
pub fn has_peek(&self) -> bool {
|
||||
self.token_buffer.is_empty()
|
||||
}
|
||||
|
||||
/// Consume the current peeked value and advance the parser one token forward.
|
||||
///
|
||||
/// Should only be called after peeking a value.
|
||||
|
@ -165,6 +230,30 @@ impl<'a> Parser<'a> {
|
|||
|
||||
/// Returns the next token without consuming it.
|
||||
pub fn peek(&mut self) -> Token {
|
||||
loop {
|
||||
let Some(x) = self.token_buffer.first() else {
|
||||
let res = loop {
|
||||
let res = self.lexer.next_token();
|
||||
if res.kind != TokenKind::WhiteSpace {
|
||||
break res;
|
||||
}
|
||||
};
|
||||
self.token_buffer.push(res);
|
||||
return res;
|
||||
};
|
||||
if x.kind == TokenKind::WhiteSpace {
|
||||
self.token_buffer.pop();
|
||||
continue;
|
||||
}
|
||||
break x;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the next token without consuming it.
|
||||
///
|
||||
/// This function is like peek but returns whitespace tokens which are normally skipped
|
||||
/// Does not undo tokens skipped in a previous normal peek.
|
||||
pub fn peek_whitespace(&mut self) -> Token {
|
||||
let Some(x) = self.token_buffer.first() else {
|
||||
let res = self.lexer.next_token();
|
||||
self.token_buffer.push(res);
|
||||
|
@ -175,19 +264,30 @@ impl<'a> Parser<'a> {
|
|||
|
||||
/// Return the token kind of the next token without consuming it.
|
||||
pub fn peek_kind(&mut self) -> TokenKind {
|
||||
let Some(x) = self.token_buffer.first().map(|x| x.kind) else {
|
||||
let res = self.lexer.next_token();
|
||||
self.token_buffer.push(res);
|
||||
return res.kind;
|
||||
};
|
||||
x
|
||||
self.peek().kind
|
||||
}
|
||||
|
||||
/// Returns the next n'th token without consuming it.
|
||||
/// `peek_token_at(0)` is equivalent to `peek`.
|
||||
pub fn peek_token_at(&mut self, at: u8) -> Token {
|
||||
for _ in self.token_buffer.len()..=at {
|
||||
self.token_buffer.push(self.lexer.next_token());
|
||||
let r = loop {
|
||||
let r = self.lexer.next_token();
|
||||
if r.kind != TokenKind::WhiteSpace {
|
||||
break r;
|
||||
}
|
||||
};
|
||||
self.token_buffer.push(r);
|
||||
}
|
||||
self.token_buffer.at(at).unwrap()
|
||||
}
|
||||
|
||||
/// Returns the next n'th token without consuming it.
|
||||
/// `peek_token_at(0)` is equivalent to `peek`.
|
||||
pub fn peek_whitespace_token_at(&mut self, at: u8) -> Token {
|
||||
for _ in self.token_buffer.len()..=at {
|
||||
let r = self.lexer.next_token();
|
||||
self.token_buffer.push(r);
|
||||
}
|
||||
self.token_buffer.at(at).unwrap()
|
||||
}
|
||||
|
@ -206,14 +306,49 @@ impl<'a> Parser<'a> {
|
|||
/// Eat the next token if it is of the given kind.
|
||||
/// Returns whether a token was eaten.
|
||||
pub fn eat(&mut self, token: TokenKind) -> bool {
|
||||
if token == self.peek().kind {
|
||||
let peek = self.peek();
|
||||
if token == peek.kind {
|
||||
self.token_buffer.pop();
|
||||
self.last_span = peek.span;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Eat the next token if it is of the given kind.
|
||||
/// Returns whether a token was eaten.
|
||||
///
|
||||
/// Unlike [`Parser::eat`] this doesn't skip whitespace tokens
|
||||
pub fn eat_whitespace(&mut self, token: TokenKind) -> bool {
|
||||
let peek = self.peek_whitespace();
|
||||
if token == peek.kind {
|
||||
self.token_buffer.pop();
|
||||
self.last_span = peek.span;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Forces the next token to be the given one.
|
||||
/// Used in token gluing to replace the current one with the glued token.
|
||||
fn prepend_token(&mut self, token: Token) {
|
||||
self.token_buffer.push_front(token);
|
||||
}
|
||||
|
||||
/// Returns the string for a given span of the source.
|
||||
/// Will panic if the given span was not valid for the source, or invalid utf8
|
||||
fn span_str(&self, span: Span) -> &'a str {
|
||||
std::str::from_utf8(self.span_bytes(span)).expect("invalid span segment for source")
|
||||
}
|
||||
|
||||
/// Returns the string for a given span of the source.
|
||||
/// Will panic if the given span was not valid for the source, or invalid utf8
|
||||
fn span_bytes(&self, span: Span) -> &'a [u8] {
|
||||
self.lexer.reader.span(span)
|
||||
}
|
||||
|
||||
/// Checks if the next token is of the given kind. If it isn't it returns a UnclosedDelimiter
|
||||
/// error.
|
||||
fn expect_closing_delimiter(&mut self, kind: TokenKind, should_close: Span) -> ParseResult<()> {
|
||||
|
@ -229,17 +364,6 @@ impl<'a> Parser<'a> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Ensure that there was no whitespace parser between the last token and the current one.
|
||||
///
|
||||
/// This is used in places where whitespace is prohibited like inside a record id.
|
||||
fn no_whitespace(&mut self) -> ParseResult<()> {
|
||||
if let Some(span) = self.lexer.whitespace_span() {
|
||||
Err(ParseError::new(ParseErrorKind::NoWhitespace, span))
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Recover the parser state to after a given span.
|
||||
pub fn backup_after(&mut self, span: Span) {
|
||||
self.token_buffer.clear();
|
||||
|
@ -295,7 +419,8 @@ impl<'a> Parser<'a> {
|
|||
}) => {
|
||||
// Ensure the we are sure that the last token was fully parsed.
|
||||
self.backup_after(at);
|
||||
if self.peek().kind != TokenKind::Eof || self.lexer.whitespace_span().is_some() {
|
||||
let peek = self.peek_whitespace();
|
||||
if peek.kind != TokenKind::Eof && peek.kind != TokenKind::WhiteSpace {
|
||||
// if there is a next token or we ate whitespace after the eof we can be sure
|
||||
// that the error is not the result of a token only being partially present.
|
||||
return PartialResult::Ready {
|
||||
|
|
|
@ -5,7 +5,7 @@ use reblessive::Stk;
|
|||
|
||||
use crate::{
|
||||
enter_object_recursion,
|
||||
sql::{Block, Geometry, Object, Strand, Value},
|
||||
sql::{Block, Geometry, Number, Object, Strand, Value},
|
||||
syn::{
|
||||
parser::{mac::expected, ParseError, ParseErrorKind, ParseResult, Parser},
|
||||
token::{t, Span, TokenKind},
|
||||
|
@ -30,7 +30,10 @@ impl Parser<'_> {
|
|||
})
|
||||
}
|
||||
|
||||
// Check first if it can be an object.
|
||||
// glue possible complex tokens.
|
||||
self.glue()?;
|
||||
|
||||
// Now check first if it can be an object.
|
||||
if self.peek_token_at(1).kind == t!(":") {
|
||||
enter_object_recursion!(this = self => {
|
||||
return this.parse_object_or_geometry(ctx, start).await;
|
||||
|
@ -41,6 +44,373 @@ impl Parser<'_> {
|
|||
self.parse_block(ctx, start).await.map(Box::new).map(Value::Block)
|
||||
}
|
||||
|
||||
async fn parse_object_or_geometry_after_type(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
start: Span,
|
||||
key: String,
|
||||
) -> ParseResult<Value> {
|
||||
expected!(self, t!(":"));
|
||||
// for it to be geometry the next value must be a strand like.
|
||||
let (t!("\"") | t!("'")) = self.peek_kind() else {
|
||||
return self
|
||||
.parse_object_from_key(ctx, key, BTreeMap::new(), start)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
};
|
||||
|
||||
// We know it is a strand so check if the type is one of the allowe geometry types.
|
||||
// If it is, there are some which all take roughly the save type of value and produce a
|
||||
// similar output, which is parsed with parse_geometry_after_type
|
||||
//
|
||||
// GeometryCollection however has a different object key for its value, so it is handled
|
||||
// appart from the others.
|
||||
let type_value = self.next_token_value::<Strand>()?.0;
|
||||
match type_value.as_str() {
|
||||
"Point" => {
|
||||
// we matched a type correctly but the field containing the geometry value
|
||||
// can still be wrong.
|
||||
//
|
||||
// we can unwrap strand since we just matched it to not be an err.
|
||||
self.parse_geometry_after_type(ctx, start, key, type_value, Self::to_point, |x| {
|
||||
Value::Geometry(Geometry::Point(x))
|
||||
})
|
||||
.await
|
||||
}
|
||||
"LineString" => {
|
||||
self.parse_geometry_after_type(ctx, start, key, type_value, Self::to_line, |x| {
|
||||
Value::Geometry(Geometry::Line(x))
|
||||
})
|
||||
.await
|
||||
}
|
||||
"Polygon" => {
|
||||
self.parse_geometry_after_type(ctx, start, key, type_value, Self::to_polygon, |x| {
|
||||
Value::Geometry(Geometry::Polygon(x))
|
||||
})
|
||||
.await
|
||||
}
|
||||
"MultiPoint" => {
|
||||
self.parse_geometry_after_type(
|
||||
ctx,
|
||||
start,
|
||||
key,
|
||||
type_value,
|
||||
Self::to_multipoint,
|
||||
|x| Value::Geometry(Geometry::MultiPoint(x)),
|
||||
)
|
||||
.await
|
||||
}
|
||||
"MultiLineString" => {
|
||||
self.parse_geometry_after_type(
|
||||
ctx,
|
||||
start,
|
||||
key,
|
||||
type_value,
|
||||
Self::to_multiline,
|
||||
|x| Value::Geometry(Geometry::MultiLine(x)),
|
||||
)
|
||||
.await
|
||||
}
|
||||
"MultiPolygon" => {
|
||||
self.parse_geometry_after_type(
|
||||
ctx,
|
||||
start,
|
||||
key,
|
||||
type_value,
|
||||
Self::to_multipolygon,
|
||||
|x| Value::Geometry(Geometry::MultiPolygon(x)),
|
||||
)
|
||||
.await
|
||||
}
|
||||
"GeometryCollection" => {
|
||||
if !self.eat(t!(",")) {
|
||||
// missing next field, not a geometry.
|
||||
return self
|
||||
.parse_object_from_map(
|
||||
ctx,
|
||||
BTreeMap::from([(key, Value::Strand(type_value.into()))]),
|
||||
start,
|
||||
)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
}
|
||||
|
||||
let coord_key = self.parse_object_key()?;
|
||||
if coord_key != "geometries" {
|
||||
expected!(self, t!(":"));
|
||||
// invalid field key, not a Geometry
|
||||
return self
|
||||
.parse_object_from_key(
|
||||
ctx,
|
||||
coord_key,
|
||||
BTreeMap::from([(key, Value::Strand(type_value.into()))]),
|
||||
start,
|
||||
)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
}
|
||||
|
||||
expected!(self, t!(":"));
|
||||
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
|
||||
// check for an object end, if it doesn't end it is not a geometry.
|
||||
if !self.eat(t!(",")) {
|
||||
self.expect_closing_delimiter(t!("}"), start)?;
|
||||
} else {
|
||||
if self.peek_kind() != t!("}") {
|
||||
// A comma and then no brace. more then two fields, not a geometry.
|
||||
return self
|
||||
.parse_object_from_map(
|
||||
ctx,
|
||||
BTreeMap::from([
|
||||
(key, Value::Strand(type_value.into())),
|
||||
(coord_key, value),
|
||||
]),
|
||||
start,
|
||||
)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
}
|
||||
self.pop_peek();
|
||||
}
|
||||
|
||||
// try to convert to the right value.
|
||||
if let Value::Array(x) = value {
|
||||
// test first to avoid a cloning.
|
||||
if x.iter().all(|x| matches!(x, Value::Geometry(_))) {
|
||||
let geometries =
|
||||
x.0.into_iter()
|
||||
.map(|x| {
|
||||
if let Value::Geometry(x) = x {
|
||||
x
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
return Ok(Value::Geometry(Geometry::Collection(geometries)));
|
||||
}
|
||||
|
||||
return Ok(Value::Object(Object(BTreeMap::from([
|
||||
(key, Value::Strand(type_value.into())),
|
||||
(coord_key, Value::Array(x)),
|
||||
]))));
|
||||
}
|
||||
|
||||
// Couldn't convert so it is a normal object.
|
||||
Ok(Value::Object(Object(BTreeMap::from([
|
||||
(key, Value::Strand(type_value.into())),
|
||||
(coord_key, value),
|
||||
]))))
|
||||
}
|
||||
// key was not one of the allowed keys so it is a normal object.
|
||||
_ => {
|
||||
let object = BTreeMap::from([(key, Value::Strand(type_value.into()))]);
|
||||
|
||||
if self.eat(t!(",")) {
|
||||
self.parse_object_from_map(ctx, object, start).await.map(Value::Object)
|
||||
} else {
|
||||
self.expect_closing_delimiter(t!("}"), start)?;
|
||||
Ok(Value::Object(Object(object)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_object_or_geometry_after_coordinates(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
start: Span,
|
||||
key: String,
|
||||
) -> ParseResult<Value> {
|
||||
expected!(self, t!(":"));
|
||||
|
||||
// found coordinates field, next must be a coordinates value but we don't know
|
||||
// which until we match type.
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
|
||||
if !self.eat(t!(",")) {
|
||||
// no comma object must end early.
|
||||
self.expect_closing_delimiter(t!("}"), start)?;
|
||||
return Ok(Value::Object(Object(BTreeMap::from([(key, value)]))));
|
||||
}
|
||||
|
||||
if self.eat(t!("}")) {
|
||||
// object ends early.
|
||||
return Ok(Value::Object(Object(BTreeMap::from([(key, value)]))));
|
||||
}
|
||||
|
||||
let type_key = self.parse_object_key()?;
|
||||
if type_key != "type" {
|
||||
expected!(self, t!(":"));
|
||||
// not the right field, return object.
|
||||
return self
|
||||
.parse_object_from_key(ctx, type_key, BTreeMap::from([(key, value)]), start)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
}
|
||||
expected!(self, t!(":"));
|
||||
|
||||
let (t!("\"") | t!("'")) = self.peek_kind() else {
|
||||
// not the right value also move back to parsing an object.
|
||||
return self
|
||||
.parse_object_from_key(ctx, type_key, BTreeMap::from([(key, value)]), start)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
};
|
||||
|
||||
let type_value = self.next_token_value::<Strand>()?.0;
|
||||
let ate_comma = self.eat(t!(","));
|
||||
// match the type and then match the coordinates field to a value of that type.
|
||||
match type_value.as_str() {
|
||||
"Point" => {
|
||||
if self.eat(t!("}")) {
|
||||
if let Some(point) = Self::to_point(&value) {
|
||||
return Ok(Value::Geometry(Geometry::Point(point)));
|
||||
}
|
||||
}
|
||||
}
|
||||
"LineString" => {
|
||||
if self.eat(t!("}")) {
|
||||
if let Some(point) = Self::to_line(&value) {
|
||||
return Ok(Value::Geometry(Geometry::Line(point)));
|
||||
}
|
||||
}
|
||||
}
|
||||
"Polygon" => {
|
||||
if self.eat(t!("}")) {
|
||||
if let Some(point) = Self::to_polygon(&value) {
|
||||
return Ok(Value::Geometry(Geometry::Polygon(point)));
|
||||
}
|
||||
}
|
||||
}
|
||||
"MultiPoint" => {
|
||||
if self.eat(t!("}")) {
|
||||
if let Some(point) = Self::to_multipolygon(&value) {
|
||||
return Ok(Value::Geometry(Geometry::MultiPolygon(point)));
|
||||
}
|
||||
}
|
||||
}
|
||||
"MultiLineString" => {
|
||||
if self.eat(t!("}")) {
|
||||
if let Some(point) = Self::to_multiline(&value) {
|
||||
return Ok(Value::Geometry(Geometry::MultiLine(point)));
|
||||
}
|
||||
}
|
||||
}
|
||||
"MultiPolygon" => {
|
||||
if self.eat(t!("}")) {
|
||||
if let Some(point) = Self::to_multipolygon(&value) {
|
||||
return Ok(Value::Geometry(Geometry::MultiPolygon(point)));
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
|
||||
// type field or coordinates value didn't match or the object continues after to
|
||||
// fields.
|
||||
|
||||
if !ate_comma {
|
||||
self.expect_closing_delimiter(t!("}"), start)?;
|
||||
return Ok(Value::Object(Object(BTreeMap::from([
|
||||
(key, value),
|
||||
(type_key, Value::Strand(type_value.into())),
|
||||
]))));
|
||||
}
|
||||
|
||||
self.parse_object_from_map(
|
||||
ctx,
|
||||
BTreeMap::from([(key, value), (type_key, Value::Strand(type_value.into()))]),
|
||||
start,
|
||||
)
|
||||
.await
|
||||
.map(Value::Object)
|
||||
}
|
||||
|
||||
async fn parse_object_or_geometry_after_geometries(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
start: Span,
|
||||
key: String,
|
||||
) -> ParseResult<Value> {
|
||||
// 'geometries' key can only happen in a GeometryCollection, so try to parse that.
|
||||
expected!(self, t!(":"));
|
||||
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
|
||||
// if the object ends here, it is not a geometry.
|
||||
if !self.eat(t!(",")) || self.peek_kind() == t!("}") {
|
||||
self.expect_closing_delimiter(t!("}"), start)?;
|
||||
return Ok(Value::Object(Object(BTreeMap::from([(key, value)]))));
|
||||
}
|
||||
|
||||
// parse the next objectkey
|
||||
let type_key = self.parse_object_key()?;
|
||||
// it if isn't 'type' this object is not a geometry, so bail.
|
||||
if type_key != "type" {
|
||||
expected!(self, t!(":"));
|
||||
return self
|
||||
.parse_object_from_key(ctx, type_key, BTreeMap::from([(key, value)]), start)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
}
|
||||
expected!(self, t!(":"));
|
||||
// check if the next key is a strand.
|
||||
let (t!("\"") | t!("'")) = self.peek_kind() else {
|
||||
// not the right value also move back to parsing an object.
|
||||
return self
|
||||
.parse_object_from_key(ctx, type_key, BTreeMap::from([(key, value)]), start)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
};
|
||||
|
||||
let type_value = self.next_token_value::<Strand>()?.0;
|
||||
let ate_comma = self.eat(t!(","));
|
||||
|
||||
if type_value == "GeometryCollection" && self.eat(t!("}")) {
|
||||
if let Value::Array(ref x) = value {
|
||||
if x.iter().all(|x| matches!(x, Value::Geometry(_))) {
|
||||
let Value::Array(x) = value else {
|
||||
unreachable!()
|
||||
};
|
||||
let geometries = x
|
||||
.into_iter()
|
||||
.map(|x| {
|
||||
if let Value::Geometry(x) = x {
|
||||
x
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
return Ok(Value::Geometry(Geometry::Collection(geometries)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Either type value didn't match or gemoetry value didn't match.
|
||||
// Regardless the current object is not a geometry.
|
||||
|
||||
if !ate_comma {
|
||||
self.expect_closing_delimiter(t!("}"), start)?;
|
||||
return Ok(Value::Object(Object(BTreeMap::from([
|
||||
(key, value),
|
||||
(type_key, Value::Strand(type_value.into())),
|
||||
]))));
|
||||
}
|
||||
|
||||
self.parse_object_from_map(
|
||||
ctx,
|
||||
BTreeMap::from([(key, value), (type_key, Value::Strand(type_value.into()))]),
|
||||
start,
|
||||
)
|
||||
.await
|
||||
.map(Value::Object)
|
||||
}
|
||||
|
||||
/// Parse a production starting with an `{` as either an object or a geometry.
|
||||
///
|
||||
/// This function tries to match an object to an geometry like object and if it is unable
|
||||
|
@ -48,374 +418,20 @@ impl Parser<'_> {
|
|||
async fn parse_object_or_geometry(&mut self, ctx: &mut Stk, start: Span) -> ParseResult<Value> {
|
||||
// empty object was already matched previously so next must be a key.
|
||||
let key = self.parse_object_key()?;
|
||||
expected!(self, t!(":"));
|
||||
// the order of fields of a geometry does not matter so check if it is any of geometry like keys
|
||||
// "type" : could be the type of the object.
|
||||
// "collections": could be a geometry collection.
|
||||
// "geometry": could be the values of geometry.
|
||||
match key.as_str() {
|
||||
"type" => {
|
||||
// for it to be geometry the next value must be a strand like.
|
||||
let token = self.peek();
|
||||
let strand = self.token_value::<Strand>(token);
|
||||
match strand.as_ref().map(|x| x.as_str()) {
|
||||
Ok("Point") => {
|
||||
// we matched a type correctly but the field containing the geometry value
|
||||
// can still be wrong.
|
||||
//
|
||||
// we can unwrap strand since we just matched it to not be an err.
|
||||
self.parse_geometry_after_type(
|
||||
ctx,
|
||||
start,
|
||||
key,
|
||||
strand.unwrap(),
|
||||
Self::to_point,
|
||||
|x| Value::Geometry(Geometry::Point(x)),
|
||||
)
|
||||
.await
|
||||
}
|
||||
Ok("LineString") => {
|
||||
self.parse_geometry_after_type(
|
||||
ctx,
|
||||
start,
|
||||
key,
|
||||
strand.unwrap(),
|
||||
Self::to_line,
|
||||
|x| Value::Geometry(Geometry::Line(x)),
|
||||
)
|
||||
.await
|
||||
}
|
||||
Ok("Polygon") => {
|
||||
self.parse_geometry_after_type(
|
||||
ctx,
|
||||
start,
|
||||
key,
|
||||
strand.unwrap(),
|
||||
Self::to_polygon,
|
||||
|x| Value::Geometry(Geometry::Polygon(x)),
|
||||
)
|
||||
.await
|
||||
}
|
||||
Ok("MultiPoint") => {
|
||||
self.parse_geometry_after_type(
|
||||
ctx,
|
||||
start,
|
||||
key,
|
||||
strand.unwrap(),
|
||||
Self::to_multipoint,
|
||||
|x| Value::Geometry(Geometry::MultiPoint(x)),
|
||||
)
|
||||
.await
|
||||
}
|
||||
Ok("MultiLineString") => {
|
||||
self.parse_geometry_after_type(
|
||||
ctx,
|
||||
start,
|
||||
key,
|
||||
strand.unwrap(),
|
||||
Self::to_multiline,
|
||||
|x| Value::Geometry(Geometry::MultiLine(x)),
|
||||
)
|
||||
.await
|
||||
}
|
||||
Ok("MultiPolygon") => {
|
||||
self.parse_geometry_after_type(
|
||||
ctx,
|
||||
start,
|
||||
key,
|
||||
strand.unwrap(),
|
||||
Self::to_multipolygon,
|
||||
|x| Value::Geometry(Geometry::MultiPolygon(x)),
|
||||
)
|
||||
.await
|
||||
}
|
||||
Ok("GeometryCollection") => {
|
||||
self.next();
|
||||
let strand = strand.unwrap();
|
||||
if !self.eat(t!(",")) {
|
||||
// missing next field, not a geometry.
|
||||
return self
|
||||
.parse_object_from_map(
|
||||
ctx,
|
||||
BTreeMap::from([(key, Value::Strand(strand))]),
|
||||
start,
|
||||
)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
}
|
||||
let coord_key = self.parse_object_key()?;
|
||||
expected!(self, t!(":"));
|
||||
if coord_key != "geometries" {
|
||||
// invalid field key, not a Geometry
|
||||
return self
|
||||
.parse_object_from_key(
|
||||
ctx,
|
||||
coord_key,
|
||||
BTreeMap::from([(key, Value::Strand(strand))]),
|
||||
start,
|
||||
)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
}
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let comma = self.eat(t!(","));
|
||||
if !self.eat(t!("}")) {
|
||||
if !comma {
|
||||
// No brace after no comma, missing brace.
|
||||
return Err(ParseError::new(
|
||||
ParseErrorKind::UnclosedDelimiter {
|
||||
expected: t!("}"),
|
||||
should_close: start,
|
||||
},
|
||||
self.last_span(),
|
||||
));
|
||||
}
|
||||
|
||||
// A comma and then no brace. more then two fields, not a geometry.
|
||||
return self
|
||||
.parse_object_from_map(
|
||||
ctx,
|
||||
BTreeMap::from([
|
||||
(key, Value::Strand(strand)),
|
||||
(coord_key, value),
|
||||
]),
|
||||
start,
|
||||
)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
}
|
||||
|
||||
if let Value::Array(x) = value {
|
||||
// test first to avoid a cloning.
|
||||
if x.iter().all(|x| matches!(x, Value::Geometry(_))) {
|
||||
let geometries =
|
||||
x.0.into_iter()
|
||||
.map(|x| {
|
||||
if let Value::Geometry(x) = x {
|
||||
x
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
return Ok(Value::Geometry(Geometry::Collection(geometries)));
|
||||
}
|
||||
|
||||
return Ok(Value::Object(Object(BTreeMap::from([
|
||||
(key, Value::Strand(strand)),
|
||||
(coord_key, Value::Array(x)),
|
||||
]))));
|
||||
}
|
||||
|
||||
Ok(Value::Object(Object(BTreeMap::from([
|
||||
(key, Value::Strand(strand)),
|
||||
(coord_key, value),
|
||||
]))))
|
||||
}
|
||||
Ok(_) => {
|
||||
self.pop_peek();
|
||||
if !self.eat(t!(",")) {
|
||||
self.expect_closing_delimiter(t!("}"), start)?;
|
||||
Ok(Value::Object(Object(BTreeMap::from([(
|
||||
key,
|
||||
Value::Strand(strand.unwrap()),
|
||||
)]))))
|
||||
} else {
|
||||
self.parse_object_from_map(
|
||||
ctx,
|
||||
BTreeMap::from([(key, Value::Strand(strand.unwrap()))]),
|
||||
start,
|
||||
)
|
||||
.await
|
||||
.map(Value::Object)
|
||||
}
|
||||
}
|
||||
_ => self
|
||||
.parse_object_from_key(ctx, key, BTreeMap::new(), start)
|
||||
.await
|
||||
.map(Value::Object),
|
||||
}
|
||||
}
|
||||
"coordinates" => {
|
||||
// found coordinates field, next must be a coordinates value but we don't know
|
||||
// which until we match type.
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
if !self.eat(t!(",")) {
|
||||
// no comma object must end early.
|
||||
self.expect_closing_delimiter(t!("}"), start)?;
|
||||
return Ok(Value::Object(Object(BTreeMap::from([(key, value)]))));
|
||||
}
|
||||
|
||||
if self.eat(t!("}")) {
|
||||
// object ends early.
|
||||
return Ok(Value::Object(Object(BTreeMap::from([(key, value)]))));
|
||||
}
|
||||
|
||||
let type_key = self.parse_object_key()?;
|
||||
"type" => self.parse_object_or_geometry_after_type(ctx, start, key).await,
|
||||
"coordinates" => self.parse_object_or_geometry_after_coordinates(ctx, start, key).await,
|
||||
"geometries" => self.parse_object_or_geometry_after_geometries(ctx, start, key).await,
|
||||
_ => {
|
||||
expected!(self, t!(":"));
|
||||
if type_key != "type" {
|
||||
// not the right field, return object.
|
||||
return self
|
||||
.parse_object_from_key(ctx, type_key, BTreeMap::from([(key, value)]), start)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
}
|
||||
let peek = self.peek();
|
||||
let strand = self.token_value::<Strand>(peek);
|
||||
// match the type and then match the coordinates field to a value of that type.
|
||||
let (ate_comma, type_value) = match strand.as_ref().map(|x| x.as_str()) {
|
||||
Ok("Point") => {
|
||||
self.next();
|
||||
let ate_comma = self.eat(t!(","));
|
||||
if self.eat(t!("}")) {
|
||||
if let Some(point) = Self::to_point(&value) {
|
||||
return Ok(Value::Geometry(Geometry::Point(point)));
|
||||
}
|
||||
}
|
||||
// At this point the value does not match, or there are more fields.
|
||||
// since we matched `Ok("Point")` strand cannot be an error so this unwrap
|
||||
// will never panic.
|
||||
(ate_comma, Value::Strand(strand.unwrap()))
|
||||
}
|
||||
Ok("LineString") => {
|
||||
self.next();
|
||||
let ate_comma = self.eat(t!(","));
|
||||
if self.eat(t!("}")) {
|
||||
if let Some(point) = Self::to_line(&value) {
|
||||
return Ok(Value::Geometry(Geometry::Line(point)));
|
||||
}
|
||||
}
|
||||
(ate_comma, Value::Strand(strand.unwrap()))
|
||||
}
|
||||
Ok("Polygon") => {
|
||||
self.next();
|
||||
let ate_comma = self.eat(t!(","));
|
||||
if self.eat(t!("}")) {
|
||||
if let Some(point) = Self::to_polygon(&value) {
|
||||
return Ok(Value::Geometry(Geometry::Polygon(point)));
|
||||
}
|
||||
}
|
||||
(ate_comma, Value::Strand(strand.unwrap()))
|
||||
}
|
||||
Ok("MultiPoint") => {
|
||||
self.next();
|
||||
let ate_comma = self.eat(t!(","));
|
||||
if self.eat(t!("}")) {
|
||||
if let Some(point) = Self::to_multipolygon(&value) {
|
||||
return Ok(Value::Geometry(Geometry::MultiPolygon(point)));
|
||||
}
|
||||
}
|
||||
(ate_comma, Value::Strand(strand.unwrap()))
|
||||
}
|
||||
Ok("MultiLineString") => {
|
||||
self.next();
|
||||
let ate_comma = self.eat(t!(","));
|
||||
if self.eat(t!("}")) {
|
||||
if let Some(point) = Self::to_multiline(&value) {
|
||||
return Ok(Value::Geometry(Geometry::MultiLine(point)));
|
||||
}
|
||||
}
|
||||
(ate_comma, Value::Strand(strand.unwrap()))
|
||||
}
|
||||
Ok("MultiPolygon") => {
|
||||
self.next();
|
||||
let ate_comma = self.eat(t!(","));
|
||||
if self.eat(t!("}")) {
|
||||
if let Some(point) = Self::to_multipolygon(&value) {
|
||||
return Ok(Value::Geometry(Geometry::MultiPolygon(point)));
|
||||
}
|
||||
}
|
||||
(ate_comma, Value::Strand(strand.unwrap()))
|
||||
}
|
||||
_ => {
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
(self.eat(t!(",")), value)
|
||||
}
|
||||
};
|
||||
// type field or coordinates value didn't match or the object continues after to
|
||||
// fields.
|
||||
|
||||
if !ate_comma {
|
||||
self.expect_closing_delimiter(t!("}"), start)?;
|
||||
return Ok(Value::Object(Object(BTreeMap::from([
|
||||
(key, value),
|
||||
(type_key, type_value),
|
||||
]))));
|
||||
}
|
||||
self.parse_object_from_map(
|
||||
ctx,
|
||||
BTreeMap::from([(key, value), (type_key, type_value)]),
|
||||
start,
|
||||
)
|
||||
.await
|
||||
.map(Value::Object)
|
||||
self.parse_object_from_key(ctx, key, BTreeMap::new(), start)
|
||||
.await
|
||||
.map(Value::Object)
|
||||
}
|
||||
"geometries" => {
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
if !self.eat(t!(",")) {
|
||||
self.expect_closing_delimiter(t!("}"), start)?;
|
||||
return Ok(Value::Object(Object(BTreeMap::from([(key, value)]))));
|
||||
}
|
||||
let type_key = self.parse_object_key()?;
|
||||
expected!(self, t!(":"));
|
||||
if type_key != "type" {
|
||||
return self
|
||||
.parse_object_from_key(ctx, type_key, BTreeMap::from([(key, value)]), start)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
}
|
||||
let peek = self.peek();
|
||||
let strand = self.token_value::<Strand>(peek);
|
||||
let (ate_comma, type_value) =
|
||||
if let Ok("GeometryCollection") = strand.as_ref().map(|x| x.as_str()) {
|
||||
self.next();
|
||||
let ate_comma = self.eat(t!(","));
|
||||
if self.eat(t!("}")) {
|
||||
if let Value::Array(ref x) = value {
|
||||
if x.iter().all(|x| matches!(x, Value::Geometry(_))) {
|
||||
let Value::Array(x) = value else {
|
||||
unreachable!()
|
||||
};
|
||||
let geometries = x
|
||||
.into_iter()
|
||||
.map(|x| {
|
||||
if let Value::Geometry(x) = x {
|
||||
x
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
return Ok(Value::Geometry(Geometry::Collection(geometries)));
|
||||
}
|
||||
}
|
||||
}
|
||||
(ate_comma, Value::Strand(strand.unwrap()))
|
||||
} else {
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
(self.eat(t!(",")), value)
|
||||
};
|
||||
|
||||
if !ate_comma {
|
||||
self.expect_closing_delimiter(t!("}"), start)?;
|
||||
return Ok(Value::Object(Object(BTreeMap::from([
|
||||
(key, value),
|
||||
(type_key, type_value),
|
||||
]))));
|
||||
}
|
||||
self.parse_object_from_map(
|
||||
ctx,
|
||||
BTreeMap::from([(key, value), (type_key, type_value)]),
|
||||
start,
|
||||
)
|
||||
.await
|
||||
.map(Value::Object)
|
||||
}
|
||||
_ => self
|
||||
.parse_object_from_key(ctx, key, BTreeMap::new(), start)
|
||||
.await
|
||||
.map(Value::Object),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -424,7 +440,7 @@ impl Parser<'_> {
|
|||
ctx: &mut Stk,
|
||||
start: Span,
|
||||
key: String,
|
||||
strand: Strand,
|
||||
strand: String,
|
||||
capture: F,
|
||||
map: Fm,
|
||||
) -> ParseResult<Value>
|
||||
|
@ -432,27 +448,29 @@ impl Parser<'_> {
|
|||
F: FnOnce(&Value) -> Option<R>,
|
||||
Fm: FnOnce(R) -> Value,
|
||||
{
|
||||
// eat the strand with the type name.
|
||||
self.next();
|
||||
if !self.eat(t!(",")) {
|
||||
// there is not second field. not a geometry
|
||||
self.expect_closing_delimiter(t!("}"), start)?;
|
||||
return Ok(Value::Object(Object(BTreeMap::from([(key, Value::Strand(strand))]))));
|
||||
return Ok(Value::Object(Object(BTreeMap::from([(
|
||||
key,
|
||||
Value::Strand(strand.into()),
|
||||
)]))));
|
||||
}
|
||||
let coord_key = self.parse_object_key()?;
|
||||
expected!(self, t!(":"));
|
||||
if coord_key != "coordinates" {
|
||||
expected!(self, t!(":"));
|
||||
// next field was not correct, fallback to parsing plain object.
|
||||
return self
|
||||
.parse_object_from_key(
|
||||
ctx,
|
||||
coord_key,
|
||||
BTreeMap::from([(key, Value::Strand(strand))]),
|
||||
BTreeMap::from([(key, Value::Strand(strand.into()))]),
|
||||
start,
|
||||
)
|
||||
.await
|
||||
.map(Value::Object);
|
||||
}
|
||||
expected!(self, t!(":"));
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let comma = self.eat(t!(","));
|
||||
if !self.eat(t!("}")) {
|
||||
|
@ -470,7 +488,7 @@ impl Parser<'_> {
|
|||
return self
|
||||
.parse_object_from_map(
|
||||
ctx,
|
||||
BTreeMap::from([(key, Value::Strand(strand)), (coord_key, value)]),
|
||||
BTreeMap::from([(key, Value::Strand(strand.into())), (coord_key, value)]),
|
||||
start,
|
||||
)
|
||||
.await
|
||||
|
@ -480,7 +498,7 @@ impl Parser<'_> {
|
|||
let Some(v) = capture(&value) else {
|
||||
// failed to match the geometry value, just a plain object.
|
||||
return Ok(Value::Object(Object(BTreeMap::from([
|
||||
(key, Value::Strand(strand)),
|
||||
(key, Value::Strand(strand.into())),
|
||||
(coord_key, value),
|
||||
]))));
|
||||
};
|
||||
|
@ -648,7 +666,7 @@ impl Parser<'_> {
|
|||
|
||||
/// Parses the key of an object, i.e. `field` in the object `{ field: 1 }`.
|
||||
pub fn parse_object_key(&mut self) -> ParseResult<String> {
|
||||
let token = self.peek();
|
||||
let token = self.glue()?;
|
||||
match token.kind {
|
||||
TokenKind::Keyword(_)
|
||||
| TokenKind::Language(_)
|
||||
|
@ -661,14 +679,18 @@ impl Parser<'_> {
|
|||
let str = std::str::from_utf8(str).unwrap().to_owned();
|
||||
Ok(str)
|
||||
}
|
||||
TokenKind::Identifier | TokenKind::Strand => {
|
||||
TokenKind::Identifier => {
|
||||
self.pop_peek();
|
||||
let str = self.lexer.string.take().unwrap();
|
||||
Ok(str)
|
||||
}
|
||||
TokenKind::Number(_) => {
|
||||
self.pop_peek();
|
||||
Ok(self.lexer.string.take().unwrap())
|
||||
t!("\"") | t!("'") | TokenKind::Strand => {
|
||||
let str = self.next_token_value::<Strand>()?.0;
|
||||
Ok(str)
|
||||
}
|
||||
TokenKind::Digits | TokenKind::Number(_) => {
|
||||
let number = self.next_token_value::<Number>()?.to_string();
|
||||
Ok(number)
|
||||
}
|
||||
x => unexpected!(self, x, "an object key"),
|
||||
}
|
||||
|
|
|
@ -5,16 +5,15 @@ use super::{ParseResult, Parser};
|
|||
use crate::{
|
||||
enter_object_recursion, enter_query_recursion,
|
||||
sql::{
|
||||
Array, Dir, Function, Geometry, Ident, Idiom, Mock, Part, Script, Strand, Subquery, Table,
|
||||
Value,
|
||||
Array, Dir, Function, Geometry, Ident, Idiom, Mock, Number, Part, Script, Strand, Subquery,
|
||||
Table, Value,
|
||||
},
|
||||
syn::{
|
||||
lexer::Lexer,
|
||||
parser::{
|
||||
mac::{expected, unexpected},
|
||||
ParseError, ParseErrorKind,
|
||||
},
|
||||
token::{t, NumberKind, Span, TokenKind},
|
||||
token::{t, Span, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -24,14 +23,6 @@ impl Parser<'_> {
|
|||
/// What's are values which are more restricted in what expressions they can contain.
|
||||
pub async fn parse_what_primary(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
match self.peek_kind() {
|
||||
TokenKind::Duration => {
|
||||
let duration = self.next_token_value()?;
|
||||
Ok(Value::Duration(duration))
|
||||
}
|
||||
TokenKind::DateTime => {
|
||||
let datetime = self.next_token_value()?;
|
||||
Ok(Value::Datetime(datetime))
|
||||
}
|
||||
t!("r\"") => {
|
||||
self.pop_peek();
|
||||
let thing = self.parse_record_string(ctx, true).await?;
|
||||
|
@ -42,6 +33,14 @@ impl Parser<'_> {
|
|||
let thing = self.parse_record_string(ctx, false).await?;
|
||||
Ok(Value::Thing(thing))
|
||||
}
|
||||
t!("d\"") | t!("d'") => {
|
||||
let datetime = self.next_token_value()?;
|
||||
Ok(Value::Datetime(datetime))
|
||||
}
|
||||
t!("u\"") | t!("u'") => {
|
||||
let uuid = self.next_token_value()?;
|
||||
Ok(Value::Uuid(uuid))
|
||||
}
|
||||
t!("$param") => {
|
||||
let param = self.next_token_value()?;
|
||||
Ok(Value::Param(param))
|
||||
|
@ -73,11 +72,7 @@ impl Parser<'_> {
|
|||
let start = self.pop_peek().span;
|
||||
self.parse_mock(start).map(Value::Mock)
|
||||
}
|
||||
t!("/") => {
|
||||
let token = self.pop_peek();
|
||||
let regex = self.lexer.relex_regex(token);
|
||||
self.token_value(regex).map(Value::Regex)
|
||||
}
|
||||
t!("/") => self.next_token_value().map(Value::Regex),
|
||||
t!("RETURN")
|
||||
| t!("SELECT")
|
||||
| t!("CREATE")
|
||||
|
@ -93,15 +88,19 @@ impl Parser<'_> {
|
|||
t!("fn") => self.parse_custom_function(ctx).await.map(|x| Value::Function(Box::new(x))),
|
||||
t!("ml") => self.parse_model(ctx).await.map(|x| Value::Model(Box::new(x))),
|
||||
x => {
|
||||
if !self.peek_can_be_ident() {
|
||||
if !self.peek_can_start_ident() {
|
||||
unexpected!(self, x, "a value")
|
||||
}
|
||||
|
||||
let token = self.next();
|
||||
match self.peek_kind() {
|
||||
t!("::") | t!("(") => self.parse_builtin(ctx, token.span).await,
|
||||
let span = self.glue()?.span;
|
||||
|
||||
match self.peek_token_at(1).kind {
|
||||
t!("::") | t!("(") => {
|
||||
self.pop_peek();
|
||||
self.parse_builtin(ctx, span).await
|
||||
}
|
||||
t!(":") => {
|
||||
let str = self.token_value::<Ident>(token)?.0;
|
||||
let str = self.next_token_value::<Ident>()?.0;
|
||||
self.parse_thing_or_range(ctx, str).await
|
||||
}
|
||||
x => {
|
||||
|
@ -110,7 +109,7 @@ impl Parser<'_> {
|
|||
// always an invalid production so just return error.
|
||||
unexpected!(self, x, "a value");
|
||||
} else {
|
||||
Ok(Value::Table(self.token_value(token)?))
|
||||
Ok(Value::Table(self.next_token_value()?))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -118,6 +117,15 @@ impl Parser<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn parse_number_like_prime(&mut self) -> ParseResult<Value> {
|
||||
let token = self.glue_numeric()?;
|
||||
match token.kind {
|
||||
TokenKind::Number(_) => self.next_token_value().map(Value::Number),
|
||||
TokenKind::Duration => self.next_token_value().map(Value::Duration),
|
||||
x => unexpected!(self, x, "a value"),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse an expressions
|
||||
pub async fn parse_idiom_expression(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
let token = self.peek();
|
||||
|
@ -147,35 +155,6 @@ impl Parser<'_> {
|
|||
let block = self.parse_block(ctx, next).await?;
|
||||
return Ok(Value::Future(Box::new(crate::sql::Future(block))));
|
||||
}
|
||||
TokenKind::Strand => {
|
||||
self.pop_peek();
|
||||
if self.legacy_strands {
|
||||
return self.parse_legacy_strand(ctx).await;
|
||||
} else {
|
||||
let strand = self.token_value(token)?;
|
||||
return Ok(Value::Strand(strand));
|
||||
}
|
||||
}
|
||||
TokenKind::Duration => {
|
||||
self.pop_peek();
|
||||
let duration = self.token_value(token)?;
|
||||
Value::Duration(duration)
|
||||
}
|
||||
TokenKind::Number(_) => {
|
||||
self.pop_peek();
|
||||
let number = self.token_value(token)?;
|
||||
Value::Number(number)
|
||||
}
|
||||
TokenKind::Uuid => {
|
||||
self.pop_peek();
|
||||
let uuid = self.token_value(token)?;
|
||||
Value::Uuid(uuid)
|
||||
}
|
||||
TokenKind::DateTime => {
|
||||
self.pop_peek();
|
||||
let datetime = self.token_value(token)?;
|
||||
Value::Datetime(datetime)
|
||||
}
|
||||
t!("r\"") => {
|
||||
self.pop_peek();
|
||||
let thing = self.parse_record_string(ctx, true).await?;
|
||||
|
@ -186,9 +165,32 @@ impl Parser<'_> {
|
|||
let thing = self.parse_record_string(ctx, false).await?;
|
||||
Value::Thing(thing)
|
||||
}
|
||||
t!("$param") => {
|
||||
t!("d\"") | t!("d'") => {
|
||||
let datetime = self.next_token_value()?;
|
||||
Value::Datetime(datetime)
|
||||
}
|
||||
t!("u\"") | t!("u'") => {
|
||||
let uuid = self.next_token_value()?;
|
||||
Value::Uuid(uuid)
|
||||
}
|
||||
t!("'") | t!("\"") | TokenKind::Strand => {
|
||||
let s = self.next_token_value::<Strand>()?;
|
||||
if self.legacy_strands {
|
||||
if let Some(x) = self.reparse_legacy_strand(ctx, &s.0).await {
|
||||
return Ok(x);
|
||||
}
|
||||
}
|
||||
Value::Strand(s)
|
||||
}
|
||||
t!("+") | t!("-") | TokenKind::Number(_) | TokenKind::Digits | TokenKind::Duration => {
|
||||
self.parse_number_like_prime()?
|
||||
}
|
||||
TokenKind::NaN => {
|
||||
self.pop_peek();
|
||||
let param = self.token_value(token)?;
|
||||
return Ok(Value::Number(Number::Float(f64::NAN)));
|
||||
}
|
||||
t!("$param") => {
|
||||
let param = self.next_token_value()?;
|
||||
Value::Param(param)
|
||||
}
|
||||
t!("FUNCTION") => {
|
||||
|
@ -234,11 +236,7 @@ impl Parser<'_> {
|
|||
self.pop_peek();
|
||||
self.parse_inner_subquery_or_coordinate(ctx, token.span).await?
|
||||
}
|
||||
t!("/") => {
|
||||
self.pop_peek();
|
||||
let regex = self.lexer.relex_regex(token);
|
||||
self.token_value(regex).map(Value::Regex)?
|
||||
}
|
||||
t!("/") => self.next_token_value().map(Value::Regex)?,
|
||||
t!("RETURN")
|
||||
| t!("SELECT")
|
||||
| t!("CREATE")
|
||||
|
@ -260,20 +258,24 @@ impl Parser<'_> {
|
|||
self.parse_model(ctx).await.map(|x| Value::Model(Box::new(x)))?
|
||||
}
|
||||
_ => {
|
||||
self.pop_peek();
|
||||
match self.peek_kind() {
|
||||
t!("::") | t!("(") => self.parse_builtin(ctx, token.span).await?,
|
||||
self.glue()?;
|
||||
|
||||
match self.peek_token_at(1).kind {
|
||||
t!("::") | t!("(") => {
|
||||
self.pop_peek();
|
||||
self.parse_builtin(ctx, token.span).await?
|
||||
}
|
||||
t!(":") => {
|
||||
let str = self.token_value::<Ident>(token)?.0;
|
||||
let str = self.next_token_value::<Ident>()?.0;
|
||||
self.parse_thing_or_range(ctx, str).await?
|
||||
}
|
||||
x => {
|
||||
if x.has_data() {
|
||||
unexpected!(self, x, "a value");
|
||||
} else if self.table_as_field {
|
||||
Value::Idiom(Idiom(vec![Part::Field(self.token_value(token)?)]))
|
||||
Value::Idiom(Idiom(vec![Part::Field(self.next_token_value()?)]))
|
||||
} else {
|
||||
Value::Table(self.token_value(token)?)
|
||||
Value::Table(self.next_token_value()?)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -426,69 +428,46 @@ impl Parser<'_> {
|
|||
let stmt = self.parse_rebuild_stmt()?;
|
||||
Subquery::Rebuild(stmt)
|
||||
}
|
||||
t!("+") | t!("-") => {
|
||||
// handle possible coordinate in the shape of ([-+]?number,[-+]?number)
|
||||
if let TokenKind::Number(kind) = self.peek_token_at(1).kind {
|
||||
// take the value so we don't overwrite it if the next token happens to be an
|
||||
// strand or an ident, both of which are invalid syntax.
|
||||
let number_value = self.lexer.string.take().unwrap();
|
||||
if self.peek_token_at(2).kind == t!(",") {
|
||||
match kind {
|
||||
NumberKind::Decimal | NumberKind::NaN => {
|
||||
return Err(ParseError::new(
|
||||
ParseErrorKind::UnexpectedExplain {
|
||||
found: TokenKind::Number(kind),
|
||||
expected: "a non-decimal, non-nan number",
|
||||
explain: "coordinate numbers can't be NaN or a decimal",
|
||||
},
|
||||
peek.span,
|
||||
));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
TokenKind::Digits | TokenKind::Number(_) | t!("+") | t!("-") => {
|
||||
let number_token = self.glue()?;
|
||||
if matches!(self.peek_kind(), TokenKind::Number(_))
|
||||
&& self.peek_token_at(1).kind == t!(",")
|
||||
{
|
||||
let number = self.next_token_value::<Number>()?;
|
||||
// eat ','
|
||||
self.next();
|
||||
|
||||
self.lexer.string = Some(number_value);
|
||||
let a = self.parse_signed_float()?;
|
||||
self.next();
|
||||
let b = self.parse_signed_float()?;
|
||||
self.expect_closing_delimiter(t!(")"), start)?;
|
||||
return Ok(Value::Geometry(Geometry::Point(Point::from((a, b)))));
|
||||
}
|
||||
self.lexer.string = Some(number_value);
|
||||
}
|
||||
Subquery::Value(ctx.run(|ctx| self.parse_value_field(ctx)).await?)
|
||||
}
|
||||
TokenKind::Number(kind) => {
|
||||
// handle possible coordinate in the shape of ([-+]?number,[-+]?number)
|
||||
// take the value so we don't overwrite it if the next token happens to be an
|
||||
// strand or an ident, both of which are invalid syntax.
|
||||
let number_value = self.lexer.string.take().unwrap();
|
||||
if self.peek_token_at(1).kind == t!(",") {
|
||||
match kind {
|
||||
NumberKind::Decimal | NumberKind::NaN => {
|
||||
match number {
|
||||
Number::Decimal(_) => {
|
||||
return Err(ParseError::new(
|
||||
ParseErrorKind::UnexpectedExplain {
|
||||
found: TokenKind::Number(kind),
|
||||
found: TokenKind::Digits,
|
||||
expected: "a non-decimal, non-nan number",
|
||||
explain: "coordinate numbers can't be NaN or a decimal",
|
||||
},
|
||||
peek.span,
|
||||
number_token.span,
|
||||
));
|
||||
}
|
||||
Number::Float(x) if x.is_nan() => {
|
||||
return Err(ParseError::new(
|
||||
ParseErrorKind::UnexpectedExplain {
|
||||
found: TokenKind::Digits,
|
||||
expected: "a non-decimal, non-nan number",
|
||||
explain: "coordinate numbers can't be NaN or a decimal",
|
||||
},
|
||||
number_token.span,
|
||||
));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
self.pop_peek();
|
||||
// was a semicolon, put the strand back for code reuse.
|
||||
self.lexer.string = Some(number_value);
|
||||
let a = self.token_value::<f64>(peek)?;
|
||||
// eat the semicolon.
|
||||
self.next();
|
||||
let b = self.parse_signed_float()?;
|
||||
let x = number.as_float();
|
||||
let y = self.next_token_value::<f64>()?;
|
||||
self.expect_closing_delimiter(t!(")"), start)?;
|
||||
return Ok(Value::Geometry(Geometry::Point(Point::from((a, b)))));
|
||||
return Ok(Value::Geometry(Geometry::Point(Point::from((x, y)))));
|
||||
} else {
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
Subquery::Value(value)
|
||||
}
|
||||
self.lexer.string = Some(number_value);
|
||||
Subquery::Value(ctx.run(|ctx| self.parse_value_field(ctx)).await?)
|
||||
}
|
||||
_ => {
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
|
@ -628,18 +607,17 @@ impl Parser<'_> {
|
|||
|
||||
/// Parses a strand with legacy rules, parsing to a record id, datetime or uuid if the string
|
||||
/// matches.
|
||||
pub async fn parse_legacy_strand(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
let text = self.lexer.string.take().unwrap();
|
||||
pub async fn reparse_legacy_strand(&mut self, ctx: &mut Stk, text: &str) -> Option<Value> {
|
||||
if let Ok(x) = Parser::new(text.as_bytes()).parse_thing(ctx).await {
|
||||
return Ok(Value::Thing(x));
|
||||
return Some(Value::Thing(x));
|
||||
}
|
||||
if let Ok(x) = Lexer::new(text.as_bytes()).lex_only_datetime() {
|
||||
return Ok(Value::Datetime(x));
|
||||
if let Ok(x) = Parser::new(text.as_bytes()).next_token_value() {
|
||||
return Some(Value::Datetime(x));
|
||||
}
|
||||
if let Ok(x) = Lexer::new(text.as_bytes()).lex_only_uuid() {
|
||||
return Ok(Value::Uuid(x));
|
||||
if let Ok(x) = Parser::new(text.as_bytes()).next_token_value() {
|
||||
return Some(Value::Uuid(x));
|
||||
}
|
||||
Ok(Value::Strand(Strand(text)))
|
||||
None
|
||||
}
|
||||
|
||||
async fn parse_script(&mut self, ctx: &mut Stk) -> ParseResult<Function> {
|
||||
|
@ -741,7 +719,7 @@ mod tests {
|
|||
fn regex_complex() {
|
||||
let sql = r"/(?i)test\/[a-z]+\/\s\d\w{1}.*/";
|
||||
let out = Value::parse(sql);
|
||||
assert_eq!(r"/(?i)test/[a-z]+/\s\d\w{1}.*/", format!("{}", out));
|
||||
assert_eq!(r"/(?i)test\/[a-z]+\/\s\d\w{1}.*/", format!("{}", out));
|
||||
let Value::Regex(regex) = out else {
|
||||
panic!()
|
||||
};
|
||||
|
|
|
@ -15,22 +15,20 @@ impl Parser<'_> {
|
|||
) -> ParseResult<InsertStatement> {
|
||||
let relation = self.eat(t!("RELATION"));
|
||||
let ignore = self.eat(t!("IGNORE"));
|
||||
let into = match self.eat(t!("INTO")) {
|
||||
false => None,
|
||||
true => {
|
||||
let next = self.next();
|
||||
// TODO: Explain that more complicated expressions are not allowed here.
|
||||
Some(match next.kind {
|
||||
t!("$param") => {
|
||||
let param = self.token_value(next)?;
|
||||
Value::Param(param)
|
||||
}
|
||||
_ => {
|
||||
let table = self.token_value(next)?;
|
||||
Value::Table(table)
|
||||
}
|
||||
})
|
||||
}
|
||||
let into = if self.eat(t!("INTO")) {
|
||||
let r = match self.peek().kind {
|
||||
t!("$param") => {
|
||||
let param = self.next_token_value()?;
|
||||
Value::Param(param)
|
||||
}
|
||||
_ => {
|
||||
let table = self.next_token_value()?;
|
||||
Value::Table(table)
|
||||
}
|
||||
};
|
||||
Some(r)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let data = match self.peek_kind() {
|
||||
|
|
|
@ -476,12 +476,8 @@ impl Parser<'_> {
|
|||
/// Expects `KILL` to already be consumed.
|
||||
pub(crate) fn parse_kill_stmt(&mut self) -> ParseResult<KillStatement> {
|
||||
let id = match self.peek_kind() {
|
||||
TokenKind::Uuid => self.next_token_value().map(Value::Uuid)?,
|
||||
t!("$param") => {
|
||||
let token = self.pop_peek();
|
||||
let param = self.token_value(token)?;
|
||||
Value::Param(param)
|
||||
}
|
||||
t!("u\"") | t!("u'") => self.next_token_value().map(Value::Uuid)?,
|
||||
t!("$param") => self.next_token_value().map(Value::Param)?,
|
||||
x => unexpected!(self, x, "a UUID or a parameter"),
|
||||
};
|
||||
Ok(KillStatement {
|
||||
|
@ -614,10 +610,12 @@ impl Parser<'_> {
|
|||
|
||||
expected!(self, t!("SINCE"));
|
||||
|
||||
let next = self.next();
|
||||
let next = self.peek();
|
||||
let since = match next.kind {
|
||||
TokenKind::Number(_) => ShowSince::Versionstamp(self.token_value(next)?),
|
||||
TokenKind::DateTime => ShowSince::Timestamp(self.token_value(next)?),
|
||||
TokenKind::Digits | TokenKind::Number(_) => {
|
||||
ShowSince::Versionstamp(self.next_token_value()?)
|
||||
}
|
||||
t!("d\"") | t!("d'") => ShowSince::Timestamp(self.next_token_value()?),
|
||||
x => unexpected!(self, x, "a version stamp or a date-time"),
|
||||
};
|
||||
|
||||
|
|
|
@ -4,9 +4,9 @@ use reblessive::Stk;
|
|||
|
||||
use crate::{
|
||||
sql::{
|
||||
change_feed_include::ChangeFeedInclude, changefeed::ChangeFeed, index::Distance,
|
||||
index::VectorType, Base, Cond, Data, Duration, Fetch, Fetchs, Field, Fields, Group, Groups,
|
||||
Ident, Idiom, Output, Permission, Permissions, Tables, Timeout, Value, View,
|
||||
changefeed::ChangeFeed, index::Distance, index::VectorType, Base, Cond, Data, Duration,
|
||||
Fetch, Fetchs, Field, Fields, Group, Groups, Ident, Idiom, Output, Permission, Permissions,
|
||||
Tables, Timeout, Value, View,
|
||||
},
|
||||
syn::{
|
||||
parser::{
|
||||
|
@ -343,7 +343,7 @@ impl Parser<'_> {
|
|||
pub fn parse_changefeed(&mut self) -> ParseResult<ChangeFeed> {
|
||||
let expiry = self.next_token_value::<Duration>()?.0;
|
||||
let store_diff = if self.eat(t!("INCLUDE")) {
|
||||
expected!(self, TokenKind::ChangeFeedInclude(ChangeFeedInclude::Original));
|
||||
expected!(self, t!("ORIGINAL"));
|
||||
true
|
||||
} else {
|
||||
false
|
||||
|
|
|
@ -107,6 +107,7 @@ impl Parser<'_> {
|
|||
}
|
||||
|
||||
pub async fn parse_thing_or_table(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
self.glue()?;
|
||||
if self.peek_token_at(1).kind == t!(":") {
|
||||
self.parse_thing(ctx).await.map(Value::Thing)
|
||||
} else {
|
||||
|
|
|
@ -2,14 +2,13 @@ use reblessive::Stk;
|
|||
|
||||
use super::{ParseResult, Parser};
|
||||
use crate::{
|
||||
enter_flexible_ident,
|
||||
sql::{id::Gen, Id, Ident, Range, Thing, Value},
|
||||
syn::{
|
||||
parser::{
|
||||
mac::{expected, unexpected},
|
||||
mac::{expected, expected_whitespace, unexpected},
|
||||
ParseError, ParseErrorKind,
|
||||
},
|
||||
token::{t, NumberKind, TokenKind},
|
||||
token::{t, TokenKind},
|
||||
},
|
||||
};
|
||||
use std::{cmp::Ordering, ops::Bound};
|
||||
|
@ -17,33 +16,20 @@ use std::{cmp::Ordering, ops::Bound};
|
|||
impl Parser<'_> {
|
||||
pub async fn parse_record_string(&mut self, ctx: &mut Stk, double: bool) -> ParseResult<Thing> {
|
||||
let thing = self.parse_thing(ctx).await?;
|
||||
// can't have any tokens in the buffer, since the next token must be produced by a specific
|
||||
// call.
|
||||
debug_assert_eq!(self.token_buffer.len(), 0);
|
||||
// manually handle the trailing `"`.
|
||||
let token = self.lexer.lex_record_string_close();
|
||||
if token.kind == TokenKind::Invalid {
|
||||
return Err(ParseError::new(
|
||||
ParseErrorKind::InvalidToken(self.lexer.error.take().unwrap()),
|
||||
token.span,
|
||||
));
|
||||
}
|
||||
if token.kind == t!("'r") && double {
|
||||
unexpected!(self, token.kind, "a single quote")
|
||||
}
|
||||
if token.kind == t!("\"r") && !double {
|
||||
unexpected!(self, token.kind, "a double quote")
|
||||
}
|
||||
debug_assert!(matches!(token.kind, TokenKind::CloseRecordString { .. }));
|
||||
|
||||
debug_assert!(self.last_span().is_followed_by(&self.peek_whitespace().span));
|
||||
|
||||
if double {
|
||||
expected_whitespace!(self, t!("\""));
|
||||
} else {
|
||||
expected_whitespace!(self, t!("'"));
|
||||
};
|
||||
Ok(thing)
|
||||
}
|
||||
|
||||
fn peek_can_start_id(&mut self) -> bool {
|
||||
self.peek_can_be_ident()
|
||||
|| matches!(
|
||||
self.peek_kind(),
|
||||
TokenKind::Number(_) | t!("{") | t!("[") | TokenKind::Duration
|
||||
)
|
||||
fn kind_cast_start_id(kind: TokenKind) -> bool {
|
||||
Self::tokenkind_can_start_ident(kind)
|
||||
|| matches!(kind, TokenKind::Digits | t!("{") | t!("[") | t!("+") | t!("-"))
|
||||
}
|
||||
|
||||
pub async fn parse_thing_or_range(
|
||||
|
@ -51,157 +37,133 @@ impl Parser<'_> {
|
|||
stk: &mut Stk,
|
||||
ident: String,
|
||||
) -> ParseResult<Value> {
|
||||
expected!(self, t!(":"));
|
||||
expected_whitespace!(self, t!(":"));
|
||||
|
||||
enter_flexible_ident!(this = self =>(self.flexible_record_id){
|
||||
|
||||
this.peek();
|
||||
this.no_whitespace()?;
|
||||
|
||||
if this.eat(t!("..")) {
|
||||
let end = if this.eat(t!("=")) {
|
||||
this.no_whitespace()?;
|
||||
let id = stk.run(|stk| this.parse_id(stk)).await?;
|
||||
Bound::Included(id)
|
||||
} else if this.peek_can_start_id() {
|
||||
this.no_whitespace()?;
|
||||
let id = stk.run(|stk| this.parse_id(stk)).await?;
|
||||
Bound::Excluded(id)
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
};
|
||||
return Ok(Value::Range(Box::new(Range {
|
||||
tb: ident,
|
||||
beg: Bound::Unbounded,
|
||||
end,
|
||||
})));
|
||||
}
|
||||
|
||||
let beg = if this.peek_can_start_id(){
|
||||
let id = stk.run(|ctx| this.parse_id(ctx)).await?;
|
||||
|
||||
if this.eat(t!(">")) {
|
||||
this.no_whitespace()?;
|
||||
Bound::Excluded(id)
|
||||
} else {
|
||||
Bound::Included(id)
|
||||
}
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
};
|
||||
|
||||
if this.eat(t!("..")) {
|
||||
let end = if this.eat(t!("=")) {
|
||||
this.no_whitespace()?;
|
||||
let id = stk.run(|ctx| this.parse_id(ctx)).await?;
|
||||
Bound::Included(id)
|
||||
} else if this.peek_can_start_id(){
|
||||
this.no_whitespace()?;
|
||||
let id = stk.run(|ctx| this.parse_id(ctx)).await?;
|
||||
Bound::Excluded(id)
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
};
|
||||
Ok(Value::Range(Box::new(Range {
|
||||
tb: ident,
|
||||
beg,
|
||||
end,
|
||||
})))
|
||||
// If self starts with a range operator self is a range with no start bound
|
||||
if self.eat_whitespace(t!("..")) {
|
||||
// Check for inclusive
|
||||
let end = if self.eat_whitespace(t!("=")) {
|
||||
let id = stk.run(|stk| self.parse_id(stk)).await?;
|
||||
Bound::Included(id)
|
||||
} else if Self::kind_cast_start_id(self.peek_whitespace().kind) {
|
||||
let id = stk.run(|stk| self.parse_id(stk)).await?;
|
||||
Bound::Excluded(id)
|
||||
} else {
|
||||
let id = match beg {
|
||||
Bound::Unbounded => {
|
||||
if this.peek_kind() == t!("$param") {
|
||||
return Err(ParseError::new(
|
||||
Bound::Unbounded
|
||||
};
|
||||
return Ok(Value::Range(Box::new(Range {
|
||||
tb: ident,
|
||||
beg: Bound::Unbounded,
|
||||
end,
|
||||
})));
|
||||
}
|
||||
|
||||
// Didn't eat range yet so we need to parse the id.
|
||||
let beg = if Self::kind_cast_start_id(self.peek_whitespace().kind) {
|
||||
let id = stk.run(|ctx| self.parse_id(ctx)).await?;
|
||||
|
||||
// check for exclusive
|
||||
if self.eat_whitespace(t!(">")) {
|
||||
Bound::Excluded(id)
|
||||
} else {
|
||||
Bound::Included(id)
|
||||
}
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
};
|
||||
|
||||
// Check if self is actually a range.
|
||||
// If we already ate the exclusive it must be a range.
|
||||
if self.eat_whitespace(t!("..")) {
|
||||
let end = if self.eat_whitespace(t!("=")) {
|
||||
let id = stk.run(|ctx| self.parse_id(ctx)).await?;
|
||||
Bound::Included(id)
|
||||
} else if Self::kind_cast_start_id(self.peek_whitespace().kind) {
|
||||
let id = stk.run(|ctx| self.parse_id(ctx)).await?;
|
||||
Bound::Excluded(id)
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
};
|
||||
Ok(Value::Range(Box::new(Range {
|
||||
tb: ident,
|
||||
beg,
|
||||
end,
|
||||
})))
|
||||
} else {
|
||||
let id = match beg {
|
||||
Bound::Unbounded => {
|
||||
if self.peek_whitespace().kind == t!("$param") {
|
||||
return Err(ParseError::new(
|
||||
ParseErrorKind::UnexpectedExplain {
|
||||
found: t!("$param"),
|
||||
expected: "a record-id id",
|
||||
explain: "you can create a record-id from a param with the function 'type::thing'",
|
||||
},
|
||||
this.recent_span(),
|
||||
self.recent_span(),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// we haven't matched anythong so far so we still want any type of id.
|
||||
unexpected!(this, this.peek_kind(), "a record-id id")
|
||||
}
|
||||
Bound::Excluded(_) => {
|
||||
// we have matched a bounded id but we don't see an range operator.
|
||||
unexpected!(this, this.peek_kind(), "the range operator `..`")
|
||||
}
|
||||
Bound::Included(id) => id,
|
||||
};
|
||||
Ok(Value::Thing(Thing {
|
||||
tb: ident,
|
||||
id,
|
||||
}))
|
||||
}
|
||||
})
|
||||
// we haven't matched anythong so far so we still want any type of id.
|
||||
unexpected!(self, self.peek_whitespace().kind, "a record-id id")
|
||||
}
|
||||
Bound::Excluded(_) => {
|
||||
// we have matched a bounded id but we don't see an range operator.
|
||||
unexpected!(self, self.peek_whitespace().kind, "the range operator `..`")
|
||||
}
|
||||
Bound::Included(id) => id,
|
||||
};
|
||||
Ok(Value::Thing(Thing {
|
||||
tb: ident,
|
||||
id,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse an range
|
||||
pub async fn parse_range(&mut self, ctx: &mut Stk) -> ParseResult<Range> {
|
||||
let tb = self.next_token_value::<Ident>()?.0;
|
||||
|
||||
expected!(self, t!(":"));
|
||||
expected_whitespace!(self, t!(":"));
|
||||
|
||||
enter_flexible_ident!(this = self =>(self.flexible_record_id){
|
||||
this.peek();
|
||||
this.no_whitespace()?;
|
||||
// Check for beginning id
|
||||
let beg = if Self::tokenkind_can_start_ident(self.peek_whitespace().kind) {
|
||||
let id = ctx.run(|ctx| self.parse_id(ctx)).await?;
|
||||
|
||||
let beg = if this.peek_can_be_ident() {
|
||||
this.peek();
|
||||
this.no_whitespace()?;
|
||||
|
||||
let id = ctx.run(|ctx| this.parse_id(ctx)).await?;
|
||||
|
||||
this.peek();
|
||||
this.no_whitespace()?;
|
||||
|
||||
if this.eat(t!(">")) {
|
||||
Bound::Excluded(id)
|
||||
} else {
|
||||
Bound::Included(id)
|
||||
}
|
||||
if self.eat_whitespace(t!(">")) {
|
||||
Bound::Excluded(id)
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
};
|
||||
Bound::Included(id)
|
||||
}
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
};
|
||||
|
||||
this.peek();
|
||||
this.no_whitespace()?;
|
||||
expected_whitespace!(self, t!(".."));
|
||||
|
||||
expected!(this, t!(".."));
|
||||
let inclusive = self.eat_whitespace(t!("="));
|
||||
|
||||
this.peek();
|
||||
this.no_whitespace()?;
|
||||
|
||||
let inclusive = this.eat(t!("="));
|
||||
|
||||
this.peek();
|
||||
this.no_whitespace()?;
|
||||
|
||||
let end = if this.peek_can_be_ident() {
|
||||
let id = ctx.run(|ctx| this.parse_id(ctx)).await?;
|
||||
if inclusive {
|
||||
Bound::Included(id)
|
||||
} else {
|
||||
Bound::Excluded(id)
|
||||
}
|
||||
// parse ending id.
|
||||
let end = if Self::tokenkind_can_start_ident(self.peek_whitespace().kind) {
|
||||
let id = ctx.run(|ctx| self.parse_id(ctx)).await?;
|
||||
if inclusive {
|
||||
Bound::Included(id)
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
};
|
||||
Bound::Excluded(id)
|
||||
}
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
};
|
||||
|
||||
Ok(Range {
|
||||
tb,
|
||||
beg,
|
||||
end,
|
||||
})
|
||||
Ok(Range {
|
||||
tb,
|
||||
beg,
|
||||
end,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn parse_thing(&mut self, ctx: &mut Stk) -> ParseResult<Thing> {
|
||||
let ident = self.next_token_value::<Ident>()?.0;
|
||||
enter_flexible_ident!(this = self =>(self.flexible_record_id){
|
||||
this.parse_thing_from_ident(ctx, ident).await
|
||||
})
|
||||
self.parse_thing_from_ident(ctx, ident).await
|
||||
}
|
||||
|
||||
pub async fn parse_thing_from_ident(
|
||||
|
@ -211,12 +173,7 @@ impl Parser<'_> {
|
|||
) -> ParseResult<Thing> {
|
||||
expected!(self, t!(":"));
|
||||
|
||||
let id = enter_flexible_ident!(this = self =>(self.flexible_record_id){
|
||||
this.peek();
|
||||
this.no_whitespace()?;
|
||||
|
||||
ctx.run(|ctx| this.parse_id(ctx)).await
|
||||
})?;
|
||||
let id = ctx.run(|ctx| self.parse_id(ctx)).await?;
|
||||
|
||||
Ok(Thing {
|
||||
tb: ident,
|
||||
|
@ -225,87 +182,110 @@ impl Parser<'_> {
|
|||
}
|
||||
|
||||
pub async fn parse_id(&mut self, stk: &mut Stk) -> ParseResult<Id> {
|
||||
let token = self.next();
|
||||
let token = self.peek_whitespace();
|
||||
match token.kind {
|
||||
t!("{") => {
|
||||
let object = enter_flexible_ident!(this = self => (false){
|
||||
this.parse_object(stk, token.span).await
|
||||
})?;
|
||||
self.pop_peek();
|
||||
// object record id
|
||||
let object = self.parse_object(stk, token.span).await?;
|
||||
Ok(Id::Object(object))
|
||||
}
|
||||
t!("[") => {
|
||||
let array = enter_flexible_ident!(this = self => (false){
|
||||
this.parse_array(stk, token.span).await
|
||||
})?;
|
||||
self.pop_peek();
|
||||
// array record id
|
||||
let array = self.parse_array(stk, token.span).await?;
|
||||
Ok(Id::Array(array))
|
||||
}
|
||||
t!("+") => {
|
||||
self.peek();
|
||||
self.no_whitespace()?;
|
||||
expected!(self, TokenKind::Number(NumberKind::Integer));
|
||||
let text = self.lexer.string.take().unwrap();
|
||||
if let Ok(number) = text.parse() {
|
||||
self.pop_peek();
|
||||
// starting with a + so it must be a number
|
||||
let digits_token = self.peek_whitespace();
|
||||
match digits_token.kind {
|
||||
TokenKind::Digits => {}
|
||||
x => unexpected!(self, x, "an integer"),
|
||||
}
|
||||
|
||||
let next = self.peek_whitespace();
|
||||
match next.kind {
|
||||
t!(".") | TokenKind::Exponent | TokenKind::NumberSuffix(_) => {
|
||||
// TODO(delskayn) explain that record-id's cant have matissas,
|
||||
// exponents or a number suffix
|
||||
unexpected!(self, next.kind, "an integer");
|
||||
}
|
||||
x if Self::tokenkind_continues_ident(x) => {
|
||||
let span = token.span.covers(next.span);
|
||||
unexpected!(@span, self, x, "an integer");
|
||||
}
|
||||
// allowed
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let digits_str = self.span_str(digits_token.span);
|
||||
if let Ok(number) = digits_str.parse() {
|
||||
Ok(Id::Number(number))
|
||||
} else {
|
||||
Ok(Id::String(text))
|
||||
Ok(Id::String(digits_str.to_owned()))
|
||||
}
|
||||
}
|
||||
t!("-") => {
|
||||
self.peek();
|
||||
self.no_whitespace()?;
|
||||
expected!(self, TokenKind::Number(NumberKind::Integer));
|
||||
let text = self.lexer.string.take().unwrap();
|
||||
if let Ok(number) = text.parse::<u64>() {
|
||||
self.pop_peek();
|
||||
// starting with a + so it must be a number
|
||||
let digits_token = self.peek_whitespace();
|
||||
match digits_token.kind {
|
||||
TokenKind::Digits => {}
|
||||
x => unexpected!(self, x, "an integer"),
|
||||
}
|
||||
|
||||
let next = self.peek_whitespace();
|
||||
match next.kind {
|
||||
t!(".") | TokenKind::Exponent | TokenKind::NumberSuffix(_) => {
|
||||
// TODO(delskayn) explain that record-id's cant have matissas,
|
||||
// exponents or a number suffix
|
||||
unexpected!(self, next.kind, "an integer");
|
||||
}
|
||||
x if Self::tokenkind_continues_ident(x) => {
|
||||
let span = token.span.covers(next.span);
|
||||
unexpected!(@span, self, x, "an integer");
|
||||
}
|
||||
// allowed
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let digits_str = self.span_str(digits_token.span);
|
||||
if let Ok(number) = digits_str.parse::<u64>() {
|
||||
// Parse to u64 and check if the value is equal to `-i64::MIN` via u64 as
|
||||
// `-i64::MIN` doesn't fit in an i64
|
||||
match number.cmp(&((i64::MAX as u64) + 1)) {
|
||||
Ordering::Less => Ok(Id::Number(-(number as i64))),
|
||||
Ordering::Equal => Ok(Id::Number(i64::MIN)),
|
||||
Ordering::Greater => Ok(Id::String(format!("-{}", text))),
|
||||
Ordering::Greater => Ok(Id::String(format!("-{}", digits_str))),
|
||||
}
|
||||
} else {
|
||||
Ok(Id::String(text))
|
||||
Ok(Id::String(format!("-{}", digits_str)))
|
||||
}
|
||||
}
|
||||
TokenKind::Number(NumberKind::Integer) => {
|
||||
// Id handle numbers more loose then other parts of the code.
|
||||
// If number can't fit in a i64 it will instead be parsed as a string.
|
||||
let text = self.lexer.string.take().unwrap();
|
||||
if let Ok(number) = text.parse() {
|
||||
TokenKind::Digits => {
|
||||
let next = self.peek_whitespace_token_at(1);
|
||||
|
||||
if Self::tokenkind_can_start_ident(next.kind) {
|
||||
let glued = self.glue_ident(self.flexible_record_id)?;
|
||||
if let TokenKind::Identifier = glued.kind {
|
||||
self.pop_peek();
|
||||
return Ok(Id::String(self.lexer.string.take().unwrap()));
|
||||
} else {
|
||||
unexpected!(self, glued.kind, "a record-id id")
|
||||
}
|
||||
}
|
||||
|
||||
self.pop_peek();
|
||||
|
||||
let digits_str = self.span_str(token.span);
|
||||
if let Ok(number) = digits_str.parse::<i64>() {
|
||||
Ok(Id::Number(number))
|
||||
} else {
|
||||
Ok(Id::String(text))
|
||||
Ok(Id::String(digits_str.to_owned()))
|
||||
}
|
||||
}
|
||||
TokenKind::Number(NumberKind::Exponent) if self.flexible_record_id => {
|
||||
let text = self.lexer.string.take().unwrap();
|
||||
if text.bytes().any(|x| !x.is_ascii_alphanumeric()) {
|
||||
unexpected!(self, token.kind, "a identifier");
|
||||
}
|
||||
Ok(Id::String(text))
|
||||
}
|
||||
TokenKind::Number(NumberKind::Decimal) if self.flexible_record_id => {
|
||||
let mut text = self.lexer.string.take().unwrap();
|
||||
text.push('d');
|
||||
text.push('e');
|
||||
text.push('c');
|
||||
Ok(Id::String(text))
|
||||
}
|
||||
TokenKind::Number(NumberKind::DecimalExponent) if self.flexible_record_id => {
|
||||
let mut text = self.lexer.string.take().unwrap();
|
||||
if text.bytes().any(|x| !x.is_ascii_alphanumeric()) {
|
||||
unexpected!(self, token.kind, "a identifier");
|
||||
}
|
||||
text.push('d');
|
||||
text.push('e');
|
||||
text.push('c');
|
||||
Ok(Id::String(text))
|
||||
}
|
||||
TokenKind::Number(NumberKind::Float) if self.flexible_record_id => {
|
||||
let mut text = self.lexer.string.take().unwrap();
|
||||
text.push('f');
|
||||
Ok(Id::String(text))
|
||||
}
|
||||
TokenKind::Duration if self.flexible_record_id => {
|
||||
self.lexer.duration = None;
|
||||
let slice = self.lexer.reader.span(token.span);
|
||||
|
@ -317,23 +297,27 @@ impl Parser<'_> {
|
|||
Ok(Id::String(text))
|
||||
}
|
||||
t!("ULID") => {
|
||||
self.pop_peek();
|
||||
// TODO: error message about how to use `ulid` as an identifier.
|
||||
expected!(self, t!("("));
|
||||
expected!(self, t!(")"));
|
||||
Ok(Id::Generate(Gen::Ulid))
|
||||
}
|
||||
t!("UUID") => {
|
||||
self.pop_peek();
|
||||
expected!(self, t!("("));
|
||||
expected!(self, t!(")"));
|
||||
Ok(Id::Generate(Gen::Uuid))
|
||||
}
|
||||
t!("RAND") => {
|
||||
self.pop_peek();
|
||||
expected!(self, t!("("));
|
||||
expected!(self, t!(")"));
|
||||
Ok(Id::Generate(Gen::Rand))
|
||||
}
|
||||
_ => {
|
||||
let ident = self.token_value::<Ident>(token)?.0;
|
||||
self.glue_ident(self.flexible_record_id)?;
|
||||
let ident = self.next_token_value::<Ident>()?.0;
|
||||
Ok(Id::String(ident))
|
||||
}
|
||||
}
|
||||
|
@ -582,5 +566,18 @@ mod tests {
|
|||
assert_ident_parses_correctly("1ns1h");
|
||||
assert_ident_parses_correctly("000e8");
|
||||
assert_ident_parses_correctly("000e8bla");
|
||||
|
||||
assert_ident_parses_correctly("y123");
|
||||
assert_ident_parses_correctly("w123");
|
||||
assert_ident_parses_correctly("d123");
|
||||
assert_ident_parses_correctly("h123");
|
||||
assert_ident_parses_correctly("m123");
|
||||
assert_ident_parses_correctly("s123");
|
||||
assert_ident_parses_correctly("ms123");
|
||||
assert_ident_parses_correctly("us123");
|
||||
assert_ident_parses_correctly("ns123");
|
||||
assert_ident_parses_correctly("dec123");
|
||||
assert_ident_parses_correctly("f123");
|
||||
assert_ident_parses_correctly("e123");
|
||||
}
|
||||
}
|
||||
|
|
527
core/src/syn/parser/token.rs
Normal file
527
core/src/syn/parser/token.rs
Normal file
|
@ -0,0 +1,527 @@
|
|||
//! Implements token gluing logic.
|
||||
|
||||
use crate::{
|
||||
sql::duration::{
|
||||
SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE, SECONDS_PER_WEEK, SECONDS_PER_YEAR,
|
||||
},
|
||||
syn::{
|
||||
parser::{mac::unexpected, ParseError, ParseErrorKind, ParseResult, Parser},
|
||||
token::{t, DurationSuffix, NumberKind, NumberSuffix, Token, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
use std::time::Duration as StdDuration;
|
||||
|
||||
impl Parser<'_> {
|
||||
/// Returns if a token kind can start an identifier.
|
||||
pub fn tokenkind_can_start_ident(t: TokenKind) -> bool {
|
||||
matches!(
|
||||
t,
|
||||
TokenKind::Keyword(_)
|
||||
| TokenKind::Language(_)
|
||||
| TokenKind::Algorithm(_)
|
||||
| TokenKind::Distance(_)
|
||||
| TokenKind::VectorType(_)
|
||||
| TokenKind::Identifier
|
||||
| TokenKind::Exponent
|
||||
| TokenKind::DatetimeChars(_)
|
||||
| TokenKind::NumberSuffix(_)
|
||||
| TokenKind::DurationSuffix(
|
||||
// All except Micro unicode
|
||||
DurationSuffix::Nano
|
||||
| DurationSuffix::Micro | DurationSuffix::Milli
|
||||
| DurationSuffix::Second | DurationSuffix::Minute
|
||||
| DurationSuffix::Hour | DurationSuffix::Day
|
||||
| DurationSuffix::Week | DurationSuffix::Year
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns if a token kind can start continue an identifier.
|
||||
pub fn tokenkind_continues_ident(t: TokenKind) -> bool {
|
||||
matches!(
|
||||
t,
|
||||
TokenKind::Keyword(_)
|
||||
| TokenKind::Language(_)
|
||||
| TokenKind::Algorithm(_)
|
||||
| TokenKind::Distance(_)
|
||||
| TokenKind::VectorType(_)
|
||||
| TokenKind::Identifier
|
||||
| TokenKind::DatetimeChars(_)
|
||||
| TokenKind::Exponent
|
||||
| TokenKind::NumberSuffix(_)
|
||||
| TokenKind::NaN | TokenKind::DurationSuffix(
|
||||
// All except Micro unicode
|
||||
DurationSuffix::Nano
|
||||
| DurationSuffix::Micro
|
||||
| DurationSuffix::Milli
|
||||
| DurationSuffix::Second
|
||||
| DurationSuffix::Minute
|
||||
| DurationSuffix::Hour
|
||||
| DurationSuffix::Day
|
||||
| DurationSuffix::Week
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns if the peeked token can be a identifier.
|
||||
pub fn peek_can_start_ident(&mut self) -> bool {
|
||||
Self::tokenkind_can_start_ident(self.peek_kind())
|
||||
}
|
||||
|
||||
/// Returns if the peeked token can be a identifier.
|
||||
pub fn peek_continues_ident(&mut self) -> bool {
|
||||
Self::tokenkind_can_start_ident(self.peek_kind())
|
||||
}
|
||||
|
||||
/// Glue an token and immediately consume it.
|
||||
pub fn glue_next(&mut self) -> ParseResult<Token> {
|
||||
self.glue()?;
|
||||
Ok(self.next())
|
||||
}
|
||||
|
||||
/// Glues the next token together, returning its value, doesnt consume the token.
|
||||
pub fn glue(&mut self) -> ParseResult<Token> {
|
||||
let token = self.peek();
|
||||
match token.kind {
|
||||
TokenKind::Exponent
|
||||
| TokenKind::NumberSuffix(_)
|
||||
| TokenKind::DurationSuffix(_)
|
||||
| TokenKind::DatetimeChars(_) => self.glue_ident(false),
|
||||
TokenKind::Digits => self.glue_numeric(),
|
||||
t!("\"") | t!("'") => {
|
||||
self.pop_peek();
|
||||
let t = self.lexer.relex_strand(token);
|
||||
let TokenKind::Strand = t.kind else {
|
||||
unexpected!(self, t.kind, "a strand")
|
||||
};
|
||||
self.prepend_token(t);
|
||||
Ok(t)
|
||||
}
|
||||
t!("+") | t!("-") => {
|
||||
if let TokenKind::Digits = self.peek_whitespace_token_at(1).kind {
|
||||
self.glue_number()
|
||||
} else {
|
||||
Ok(token)
|
||||
}
|
||||
}
|
||||
_ => Ok(token),
|
||||
}
|
||||
}
|
||||
|
||||
/// Glues all next tokens follow eachother, which can make up an ident into a single string.
|
||||
pub fn glue_ident(&mut self, flexible: bool) -> ParseResult<Token> {
|
||||
let start = self.peek();
|
||||
|
||||
let mut token_buffer = match start.kind {
|
||||
TokenKind::Exponent | TokenKind::NumberSuffix(_) => {
|
||||
self.pop_peek();
|
||||
|
||||
self.span_str(start.span).to_owned()
|
||||
}
|
||||
TokenKind::Digits if flexible => {
|
||||
self.pop_peek();
|
||||
self.span_str(start.span).to_owned()
|
||||
}
|
||||
TokenKind::DurationSuffix(x) if x.can_be_ident() => {
|
||||
self.pop_peek();
|
||||
|
||||
self.span_str(start.span).to_owned()
|
||||
}
|
||||
_ => return Ok(start),
|
||||
};
|
||||
|
||||
debug_assert!(
|
||||
start.is_followed_by(&self.peek_whitespace()),
|
||||
"a whitespace token was eaten where eating it would disturb parsing\n {:?}@{:?} => {:?}@{:?}",
|
||||
start.kind,
|
||||
start.span,
|
||||
self.peek_whitespace().kind,
|
||||
self.peek_whitespace().span
|
||||
);
|
||||
|
||||
let mut prev = start;
|
||||
loop {
|
||||
let p = self.peek_whitespace();
|
||||
match p.kind {
|
||||
// These token_kinds always complete an ident, no more identifier parts can happen
|
||||
// after this.
|
||||
TokenKind::Identifier => {
|
||||
self.pop_peek();
|
||||
let buffer = self.lexer.string.take().unwrap();
|
||||
token_buffer.push_str(&buffer);
|
||||
prev = p;
|
||||
break;
|
||||
}
|
||||
TokenKind::Keyword(_)
|
||||
| TokenKind::Language(_)
|
||||
| TokenKind::Algorithm(_)
|
||||
| TokenKind::Distance(_)
|
||||
| TokenKind::VectorType(_)
|
||||
| TokenKind::NumberSuffix(_) => {
|
||||
self.pop_peek();
|
||||
let str = self.span_str(p.span);
|
||||
token_buffer.push_str(str);
|
||||
|
||||
prev = p;
|
||||
|
||||
break;
|
||||
}
|
||||
// These tokens might have some more parts following them
|
||||
TokenKind::Exponent => {
|
||||
self.pop_peek();
|
||||
let str = self.span_str(p.span);
|
||||
token_buffer.push_str(str);
|
||||
|
||||
prev = p;
|
||||
}
|
||||
TokenKind::DurationSuffix(suffix) => {
|
||||
self.pop_peek();
|
||||
if !suffix.can_be_ident() {
|
||||
return Err(ParseError::new(ParseErrorKind::InvalidIdent, p.span));
|
||||
}
|
||||
token_buffer.push_str(suffix.as_str());
|
||||
prev = p;
|
||||
}
|
||||
TokenKind::Digits => {
|
||||
self.pop_peek();
|
||||
let str = self.span_str(p.span);
|
||||
token_buffer.push_str(str);
|
||||
prev = p;
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
let token = Token {
|
||||
kind: TokenKind::Identifier,
|
||||
span: start.span.covers(prev.span),
|
||||
};
|
||||
|
||||
self.lexer.string = Some(token_buffer);
|
||||
self.prepend_token(token);
|
||||
|
||||
Ok(token)
|
||||
}
|
||||
|
||||
pub fn glue_numeric(&mut self) -> ParseResult<Token> {
|
||||
let peek = self.peek();
|
||||
match peek.kind {
|
||||
TokenKind::Digits => {
|
||||
if matches!(self.peek_whitespace_token_at(1).kind, TokenKind::DurationSuffix(_)) {
|
||||
return self.glue_duration();
|
||||
}
|
||||
self.glue_number()
|
||||
}
|
||||
t!("+") | t!("-") => self.glue_number(),
|
||||
_ => Ok(peek),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn glue_number(&mut self) -> ParseResult<Token> {
|
||||
let start = self.peek();
|
||||
|
||||
match start.kind {
|
||||
t!("+") | t!("-") => {
|
||||
self.pop_peek();
|
||||
|
||||
debug_assert!(
|
||||
start.is_followed_by(&self.peek_whitespace()),
|
||||
"a whitespace token was eaten where eating it would disturb parsing\n {:?}@{:?} => {:?}@{:?}",
|
||||
start.kind,
|
||||
start.span,
|
||||
self.peek_whitespace().kind,
|
||||
self.peek_whitespace().span
|
||||
);
|
||||
|
||||
let n = self.peek_whitespace();
|
||||
|
||||
if n.kind != TokenKind::Digits {
|
||||
unexpected!(self, start.kind, "a number")
|
||||
}
|
||||
|
||||
self.pop_peek();
|
||||
}
|
||||
TokenKind::Digits => {
|
||||
self.pop_peek();
|
||||
debug_assert!(
|
||||
start.is_followed_by(&self.peek_whitespace()),
|
||||
"a whitespace token was eaten where eating it would disturb parsing\n {:?}@{:?} => {:?}@{:?}",
|
||||
start.kind,
|
||||
start.span,
|
||||
self.peek_whitespace().kind,
|
||||
self.peek_whitespace().span
|
||||
);
|
||||
}
|
||||
_ => return Ok(start),
|
||||
};
|
||||
|
||||
let mut kind = NumberKind::Integer;
|
||||
|
||||
// Check for mantissa
|
||||
if let t!(".") = self.peek_whitespace().kind {
|
||||
self.pop_peek();
|
||||
let next = self.peek_whitespace();
|
||||
if next.kind != TokenKind::Digits {
|
||||
unexpected!(self, next.kind, "digits after the dot");
|
||||
}
|
||||
self.pop_peek();
|
||||
kind = NumberKind::Float;
|
||||
}
|
||||
|
||||
// Check for exponent
|
||||
if let TokenKind::Exponent = self.peek_whitespace().kind {
|
||||
self.pop_peek();
|
||||
let exponent_token = self.peek_whitespace();
|
||||
match exponent_token.kind {
|
||||
t!("+") | t!("-") => {
|
||||
self.pop_peek();
|
||||
let exponent_token = self.peek_whitespace();
|
||||
if exponent_token.kind != TokenKind::Digits {
|
||||
unexpected!(self, exponent_token.kind, "digits after the exponent")
|
||||
}
|
||||
}
|
||||
TokenKind::Digits => {}
|
||||
x => unexpected!(self, x, "digits after the exponent"),
|
||||
}
|
||||
self.pop_peek();
|
||||
kind = NumberKind::Float;
|
||||
}
|
||||
|
||||
// Check for number suffix
|
||||
let suffix_token = self.peek_whitespace();
|
||||
if let TokenKind::NumberSuffix(suffix) = suffix_token.kind {
|
||||
self.pop_peek();
|
||||
match suffix {
|
||||
NumberSuffix::Float => {
|
||||
kind = NumberKind::Float;
|
||||
}
|
||||
NumberSuffix::Decimal => {
|
||||
kind = NumberKind::Decimal;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check that no ident-like identifiers follow
|
||||
let next = self.peek_whitespace();
|
||||
if Self::tokenkind_continues_ident(next.kind) {
|
||||
unexpected!(self, next.kind, "number to end")
|
||||
}
|
||||
|
||||
let token = Token {
|
||||
kind: TokenKind::Number(kind),
|
||||
span: start.span.covers(self.last_span()),
|
||||
};
|
||||
|
||||
self.prepend_token(token);
|
||||
|
||||
Ok(token)
|
||||
}
|
||||
|
||||
pub fn glue_duration(&mut self) -> ParseResult<Token> {
|
||||
let mut duration = StdDuration::ZERO;
|
||||
|
||||
let start = self.peek();
|
||||
match start.kind {
|
||||
TokenKind::Digits => {
|
||||
self.pop_peek();
|
||||
}
|
||||
_ => return Ok(start),
|
||||
};
|
||||
|
||||
debug_assert!(
|
||||
start.is_followed_by(&self.peek_whitespace()),
|
||||
"a whitespace token was eaten where eating it would disturb parsing"
|
||||
);
|
||||
|
||||
let mut cur = start;
|
||||
loop {
|
||||
let p = self.peek_whitespace();
|
||||
|
||||
let suffix = match p.kind {
|
||||
TokenKind::DurationSuffix(x) => x,
|
||||
x => unexpected!(self, x, "a duration suffix"),
|
||||
};
|
||||
|
||||
self.pop_peek();
|
||||
|
||||
let digits_str = self.span_str(cur.span);
|
||||
let digits_value: u64 = digits_str
|
||||
.parse()
|
||||
.map_err(ParseErrorKind::InvalidInteger)
|
||||
.map_err(|e| ParseError::new(e, p.span))?;
|
||||
|
||||
let addition = match suffix {
|
||||
DurationSuffix::Nano => StdDuration::from_nanos(digits_value),
|
||||
DurationSuffix::Micro | DurationSuffix::MicroUnicode => {
|
||||
StdDuration::from_micros(digits_value)
|
||||
}
|
||||
DurationSuffix::Milli => StdDuration::from_millis(digits_value),
|
||||
DurationSuffix::Second => StdDuration::from_secs(digits_value),
|
||||
DurationSuffix::Minute => {
|
||||
let minutes =
|
||||
digits_value.checked_mul(SECONDS_PER_MINUTE).ok_or_else(|| {
|
||||
let span = start.span.covers(p.span);
|
||||
ParseError::new(ParseErrorKind::DurationOverflow, span)
|
||||
})?;
|
||||
StdDuration::from_secs(minutes)
|
||||
}
|
||||
DurationSuffix::Hour => {
|
||||
let hours = digits_value.checked_mul(SECONDS_PER_HOUR).ok_or_else(|| {
|
||||
let span = start.span.covers(p.span);
|
||||
ParseError::new(ParseErrorKind::DurationOverflow, span)
|
||||
})?;
|
||||
StdDuration::from_secs(hours)
|
||||
}
|
||||
DurationSuffix::Day => {
|
||||
let days = digits_value.checked_mul(SECONDS_PER_DAY).ok_or_else(|| {
|
||||
let span = start.span.covers(p.span);
|
||||
ParseError::new(ParseErrorKind::DurationOverflow, span)
|
||||
})?;
|
||||
StdDuration::from_secs(days)
|
||||
}
|
||||
DurationSuffix::Week => {
|
||||
let weeks = digits_value.checked_mul(SECONDS_PER_WEEK).ok_or_else(|| {
|
||||
let span = start.span.covers(p.span);
|
||||
ParseError::new(ParseErrorKind::DurationOverflow, span)
|
||||
})?;
|
||||
StdDuration::from_secs(weeks)
|
||||
}
|
||||
DurationSuffix::Year => {
|
||||
let years = digits_value.checked_mul(SECONDS_PER_YEAR).ok_or_else(|| {
|
||||
let span = start.span.covers(p.span);
|
||||
ParseError::new(ParseErrorKind::DurationOverflow, span)
|
||||
})?;
|
||||
StdDuration::from_secs(years)
|
||||
}
|
||||
};
|
||||
|
||||
duration = duration.checked_add(addition).ok_or_else(|| {
|
||||
let span = start.span.covers(p.span);
|
||||
ParseError::new(ParseErrorKind::DurationOverflow, span)
|
||||
})?;
|
||||
|
||||
match self.peek_whitespace().kind {
|
||||
TokenKind::Digits => {
|
||||
cur = self.pop_peek();
|
||||
}
|
||||
x if Parser::tokenkind_continues_ident(x) => {
|
||||
let span = start.span.covers(p.span);
|
||||
unexpected!(@span, self, x, "a duration")
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
let span = start.span.covers(cur.span);
|
||||
let token = Token {
|
||||
kind: TokenKind::Duration,
|
||||
span,
|
||||
};
|
||||
|
||||
self.lexer.duration = Some(duration);
|
||||
self.prepend_token(token);
|
||||
|
||||
Ok(token)
|
||||
}
|
||||
|
||||
/// Glues the next tokens which would make up a float together into a single buffer.
|
||||
/// Return err if the tokens would return a invalid float.
|
||||
pub fn glue_float(&mut self) -> ParseResult<Token> {
|
||||
let start = self.peek();
|
||||
|
||||
match start.kind {
|
||||
t!("+") | t!("-") => {
|
||||
self.pop_peek();
|
||||
|
||||
debug_assert!(
|
||||
start.is_followed_by(&self.peek_whitespace()),
|
||||
"a whitespace token was eaten where eating it would disturb parsing"
|
||||
);
|
||||
|
||||
let digits_token = self.peek_whitespace();
|
||||
if TokenKind::Digits != digits_token.kind {
|
||||
let span = start.span.covers(digits_token.span);
|
||||
unexpected!(@span, self,digits_token.kind, "a floating point number")
|
||||
}
|
||||
}
|
||||
TokenKind::Digits => {
|
||||
self.pop_peek();
|
||||
|
||||
debug_assert!(
|
||||
start.is_followed_by(&self.peek_whitespace()),
|
||||
"a whitespace token was eaten where eating it would disturb parsing"
|
||||
);
|
||||
}
|
||||
TokenKind::NumberSuffix(NumberSuffix::Float) => {
|
||||
return Ok(start);
|
||||
}
|
||||
_ => return Ok(start),
|
||||
}
|
||||
|
||||
// check for mantissa
|
||||
if let t!(".") = self.peek_whitespace().kind {
|
||||
self.pop_peek();
|
||||
let digits_token = self.peek_whitespace();
|
||||
if TokenKind::Digits != digits_token.kind {
|
||||
unexpected!(self, digits_token.kind, "a floating point number")
|
||||
}
|
||||
self.pop_peek();
|
||||
};
|
||||
|
||||
// check for exponent
|
||||
if let TokenKind::Exponent = self.peek_whitespace().kind {
|
||||
self.pop_peek();
|
||||
let mut digits_token = self.peek_whitespace();
|
||||
|
||||
if let t!("+") | t!("-") = digits_token.kind {
|
||||
self.pop_peek();
|
||||
digits_token = self.peek_whitespace();
|
||||
}
|
||||
|
||||
if TokenKind::Digits != digits_token.kind {
|
||||
unexpected!(self, digits_token.kind, "a floating point number")
|
||||
}
|
||||
self.pop_peek();
|
||||
}
|
||||
|
||||
// check for exponent
|
||||
if let TokenKind::NumberSuffix(suffix) = self.peek_whitespace().kind {
|
||||
match suffix {
|
||||
NumberSuffix::Float => {
|
||||
self.pop_peek();
|
||||
}
|
||||
NumberSuffix::Decimal => {
|
||||
unexpected!(self, t!("dec"), "a floating point number")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let t = self.peek_whitespace();
|
||||
if Self::tokenkind_continues_ident(t.kind) {
|
||||
unexpected!(self, t.kind, "a floating point number to end")
|
||||
}
|
||||
|
||||
let span = start.span.covers(self.last_span());
|
||||
let token = Token {
|
||||
kind: TokenKind::Number(NumberKind::Float),
|
||||
span,
|
||||
};
|
||||
|
||||
self.prepend_token(token);
|
||||
|
||||
Ok(token)
|
||||
}
|
||||
|
||||
pub fn glue_plain_strand(&mut self) -> ParseResult<Token> {
|
||||
let start = self.peek();
|
||||
match start.kind {
|
||||
t!("\"") | t!("'") => {}
|
||||
_ => return Ok(start),
|
||||
};
|
||||
|
||||
let token = self.lexer.relex_strand(start);
|
||||
self.prepend_token(token);
|
||||
Ok(token)
|
||||
}
|
||||
}
|
|
@ -27,6 +27,16 @@ impl<const S: usize> TokenBuffer<S> {
|
|||
self.write = next_write;
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn push_front(&mut self, token: Token) {
|
||||
let next_read = self.read.checked_sub(1).unwrap_or((S - 1) as u8);
|
||||
if next_read == self.write {
|
||||
panic!("token buffer full");
|
||||
}
|
||||
self.buffer[next_read as usize] = token;
|
||||
self.read = next_read;
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn pop(&mut self) -> Option<Token> {
|
||||
if self.write == self.read {
|
||||
|
@ -57,6 +67,10 @@ impl<const S: usize> TokenBuffer<S> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.write != self.read
|
||||
}
|
||||
|
||||
pub fn at(&mut self, at: u8) -> Option<Token> {
|
||||
if at >= self.len() {
|
||||
return None;
|
||||
|
|
|
@ -125,6 +125,7 @@ keyword! {
|
|||
Only => "ONLY",
|
||||
Option => "OPTION",
|
||||
Order => "ORDER",
|
||||
Original => "ORIGINAL",
|
||||
Parallel => "PARALLEL",
|
||||
Param => "PARAM",
|
||||
Passhash => "PASSHASH",
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
/// A shorthand for token kinds.
|
||||
macro_rules! t {
|
||||
(" ") => {
|
||||
$crate::syn::token::TokenKind::WhiteSpace
|
||||
};
|
||||
("invalid") => {
|
||||
$crate::syn::token::TokenKind::Invalid
|
||||
};
|
||||
|
@ -26,27 +29,80 @@ macro_rules! t {
|
|||
};
|
||||
|
||||
("r\"") => {
|
||||
$crate::syn::token::TokenKind::OpenRecordString {
|
||||
double: true,
|
||||
}
|
||||
$crate::syn::token::TokenKind::Qoute($crate::syn::token::QouteKind::RecordIdDouble)
|
||||
};
|
||||
("r'") => {
|
||||
$crate::syn::token::TokenKind::OpenRecordString {
|
||||
double: false,
|
||||
}
|
||||
$crate::syn::token::TokenKind::Qoute($crate::syn::token::QouteKind::RecordId)
|
||||
};
|
||||
("u\"") => {
|
||||
$crate::syn::token::TokenKind::Qoute($crate::syn::token::QouteKind::UuidDouble)
|
||||
};
|
||||
("u'") => {
|
||||
$crate::syn::token::TokenKind::Qoute($crate::syn::token::QouteKind::Uuid)
|
||||
};
|
||||
("d\"") => {
|
||||
$crate::syn::token::TokenKind::Qoute($crate::syn::token::QouteKind::DateTimeDouble)
|
||||
};
|
||||
("d'") => {
|
||||
$crate::syn::token::TokenKind::Qoute($crate::syn::token::QouteKind::DateTime)
|
||||
};
|
||||
("\"") => {
|
||||
$crate::syn::token::TokenKind::Qoute($crate::syn::token::QouteKind::PlainDouble)
|
||||
};
|
||||
("'") => {
|
||||
$crate::syn::token::TokenKind::Qoute($crate::syn::token::QouteKind::Plain)
|
||||
};
|
||||
|
||||
("\"r") => {
|
||||
$crate::syn::token::TokenKind::CloseRecordString {
|
||||
$crate::syn::token::TokenKind::CloseString {
|
||||
double: true,
|
||||
}
|
||||
};
|
||||
("'r") => {
|
||||
$crate::syn::token::TokenKind::CloseRecordString {
|
||||
$crate::syn::token::TokenKind::CloseString {
|
||||
double: false,
|
||||
}
|
||||
};
|
||||
|
||||
("ns") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Nano)
|
||||
};
|
||||
("us") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Micro)
|
||||
};
|
||||
("µs") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix(
|
||||
$crate::syn::token::DurationSuffix::MicroUnicode,
|
||||
)
|
||||
};
|
||||
("ms") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Milli)
|
||||
};
|
||||
("s") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Second)
|
||||
};
|
||||
("m") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Minute)
|
||||
};
|
||||
("h") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Hour)
|
||||
};
|
||||
("d") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Day)
|
||||
};
|
||||
("w") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Week)
|
||||
};
|
||||
("y") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Year)
|
||||
};
|
||||
|
||||
("f") => {
|
||||
$crate::syn::token::TokenKind::NumberSuffix($crate::syn::token::NumberSuffix::Float)
|
||||
};
|
||||
("dec") => {
|
||||
$crate::syn::token::TokenKind::NumberSuffix($crate::syn::token::NumberSuffix::Decimal)
|
||||
};
|
||||
|
||||
("<") => {
|
||||
$crate::syn::token::TokenKind::LeftChefron
|
||||
};
|
||||
|
@ -144,9 +200,6 @@ macro_rules! t {
|
|||
("$param") => {
|
||||
$crate::syn::token::TokenKind::Parameter
|
||||
};
|
||||
("123") => {
|
||||
$crate::syn::token::TokenKind::Number(_)
|
||||
};
|
||||
|
||||
("!") => {
|
||||
$crate::syn::token::TokenKind::Operator($crate::syn::token::Operator::Not)
|
||||
|
|
|
@ -6,10 +6,8 @@ mod keyword;
|
|||
pub(crate) use keyword::keyword_t;
|
||||
pub use keyword::Keyword;
|
||||
mod mac;
|
||||
pub(crate) use mac::t;
|
||||
|
||||
use crate::sql::change_feed_include::ChangeFeedInclude;
|
||||
use crate::sql::{language::Language, Algorithm};
|
||||
pub(crate) use mac::t;
|
||||
|
||||
/// A location in the source passed to the lexer.
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
|
||||
|
@ -52,6 +50,18 @@ impl Span {
|
|||
len: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns if the given span is the next span after this one.
|
||||
pub fn is_followed_by(&self, other: &Self) -> bool {
|
||||
let end = self.offset as usize + self.len as usize;
|
||||
other.offset as usize == end
|
||||
}
|
||||
|
||||
/// Returns if this span immediately follows the given.
|
||||
pub fn follows_from(&self, other: &Self) -> bool {
|
||||
let end = self.offset as usize + self.len as usize;
|
||||
other.offset as usize == end
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(u8)]
|
||||
|
@ -244,58 +254,134 @@ impl VectorTypeKind {
|
|||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub enum NumberKind {
|
||||
// A plain integer number.
|
||||
Integer,
|
||||
// A number with a decimal postfix.
|
||||
Decimal,
|
||||
// A number with a decimal postfix.
|
||||
DecimalExponent,
|
||||
// A number with a float postfix.
|
||||
pub enum DurationSuffix {
|
||||
Nano,
|
||||
Micro,
|
||||
MicroUnicode,
|
||||
Milli,
|
||||
Second,
|
||||
Minute,
|
||||
Hour,
|
||||
Day,
|
||||
Week,
|
||||
Year,
|
||||
}
|
||||
|
||||
impl DurationSuffix {
|
||||
pub fn can_be_ident(&self) -> bool {
|
||||
!matches!(self, DurationSuffix::MicroUnicode)
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
DurationSuffix::Nano => "ns",
|
||||
DurationSuffix::Micro => "us",
|
||||
DurationSuffix::MicroUnicode => "µs",
|
||||
DurationSuffix::Milli => "ms",
|
||||
DurationSuffix::Second => "s",
|
||||
DurationSuffix::Minute => "m",
|
||||
DurationSuffix::Hour => "h",
|
||||
DurationSuffix::Day => "d",
|
||||
DurationSuffix::Week => "w",
|
||||
DurationSuffix::Year => "y",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
|
||||
pub enum NumberSuffix {
|
||||
Float,
|
||||
// A number with a float postfix that had a mantissa.
|
||||
FloatMantissa,
|
||||
// A number with a `.3` part.
|
||||
Mantissa,
|
||||
// A number with a `.3e10` part.
|
||||
MantissaExponent,
|
||||
// A number with a `.3e10` part.
|
||||
Exponent,
|
||||
NaN,
|
||||
Decimal,
|
||||
}
|
||||
|
||||
impl Algorithm {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::EdDSA => "EDDSA",
|
||||
Self::Es256 => "ES256",
|
||||
Self::Es384 => "ES384",
|
||||
Self::Es512 => "ES512",
|
||||
Self::Hs256 => "HS256",
|
||||
Self::Hs384 => "HS384",
|
||||
Self::Hs512 => "HS512",
|
||||
Self::Ps256 => "PS256",
|
||||
Self::Ps384 => "PS384",
|
||||
Self::Ps512 => "PS512",
|
||||
Self::Rs256 => "RS256",
|
||||
Self::Rs384 => "RS384",
|
||||
Self::Rs512 => "RS512",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
|
||||
pub enum QouteKind {
|
||||
/// `'`
|
||||
Plain,
|
||||
/// `"`
|
||||
PlainDouble,
|
||||
/// `r'`
|
||||
RecordId,
|
||||
/// `r"`
|
||||
RecordIdDouble,
|
||||
/// `u'`
|
||||
Uuid,
|
||||
/// `u"`
|
||||
UuidDouble,
|
||||
/// `d'`
|
||||
DateTime,
|
||||
/// `d"`
|
||||
DateTimeDouble,
|
||||
}
|
||||
|
||||
impl QouteKind {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
QouteKind::Plain | QouteKind::PlainDouble => "a strand",
|
||||
QouteKind::RecordId | QouteKind::RecordIdDouble => "a record-id strand",
|
||||
QouteKind::Uuid | QouteKind::UuidDouble => "a uuid",
|
||||
QouteKind::DateTime | QouteKind::DateTimeDouble => "a datetime",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
|
||||
pub enum NumberKind {
|
||||
Decimal,
|
||||
Float,
|
||||
Integer,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
|
||||
pub enum DatetimeChars {
|
||||
T,
|
||||
Z,
|
||||
}
|
||||
|
||||
/// The type of token
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub enum TokenKind {
|
||||
WhiteSpace,
|
||||
Keyword(Keyword),
|
||||
Algorithm(Algorithm),
|
||||
ChangeFeedInclude(ChangeFeedInclude),
|
||||
Language(Language),
|
||||
Distance(DistanceKind),
|
||||
VectorType(VectorTypeKind),
|
||||
Operator(Operator),
|
||||
OpenDelim(Delim),
|
||||
CloseDelim(Delim),
|
||||
// a token denoting the opening of a record string, i.e. `r"`
|
||||
OpenRecordString {
|
||||
double: bool,
|
||||
},
|
||||
/// a token denoting the clsoing of a record string, i.e. `"`
|
||||
/// Never produced normally by the lexer.
|
||||
CloseRecordString {
|
||||
double: bool,
|
||||
},
|
||||
Regex,
|
||||
Uuid,
|
||||
DateTime,
|
||||
/// a token denoting the opening of a string, i.e. `r"`
|
||||
Qoute(QouteKind),
|
||||
/// Not produced by the lexer but only the result of token gluing.
|
||||
Number(NumberKind),
|
||||
/// Not produced by the lexer but only the result of token gluing.
|
||||
Duration,
|
||||
/// Not produced by the lexer but only the result of token gluing.
|
||||
Strand,
|
||||
Regex,
|
||||
/// A parameter like `$name`.
|
||||
Parameter,
|
||||
/// A duration.
|
||||
Duration,
|
||||
Number(NumberKind),
|
||||
Identifier,
|
||||
/// `<`
|
||||
LeftChefron,
|
||||
|
@ -337,6 +423,18 @@ pub enum TokenKind {
|
|||
Invalid,
|
||||
/// A token which indicates the end of the file.
|
||||
Eof,
|
||||
/// A token consiting of one or more ascii digits.
|
||||
Digits,
|
||||
/// A identifier like token which matches a duration suffix.
|
||||
DurationSuffix(DurationSuffix),
|
||||
/// A part of a datetime like token which matches a duration suffix.
|
||||
DatetimeChars(DatetimeChars),
|
||||
/// A identifier like token which matches an exponent.
|
||||
Exponent,
|
||||
/// A identifier like token which matches an number suffix.
|
||||
NumberSuffix(NumberSuffix),
|
||||
/// The Not-A-Number number token.
|
||||
NaN,
|
||||
}
|
||||
|
||||
/// An assertion statically checking that the size of Tokenkind remains two bytes
|
||||
|
@ -344,15 +442,7 @@ const _TOKEN_KIND_SIZE_ASSERT: [(); 2] = [(); std::mem::size_of::<TokenKind>()];
|
|||
|
||||
impl TokenKind {
|
||||
pub fn has_data(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
TokenKind::Identifier
|
||||
| TokenKind::Uuid
|
||||
| TokenKind::DateTime
|
||||
| TokenKind::Strand
|
||||
| TokenKind::Parameter
|
||||
| TokenKind::Regex
|
||||
)
|
||||
matches!(self, TokenKind::Identifier | TokenKind::Duration)
|
||||
}
|
||||
|
||||
pub fn can_be_identifier(&self) -> bool {
|
||||
|
@ -362,6 +452,8 @@ impl TokenKind {
|
|||
| TokenKind::Keyword(_)
|
||||
| TokenKind::Language(_)
|
||||
| TokenKind::Algorithm(_)
|
||||
| TokenKind::DatetimeChars(_)
|
||||
| TokenKind::Distance(_),
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -397,17 +489,9 @@ impl TokenKind {
|
|||
TokenKind::CloseDelim(Delim::Paren) => ")",
|
||||
TokenKind::CloseDelim(Delim::Brace) => "}",
|
||||
TokenKind::CloseDelim(Delim::Bracket) => "]",
|
||||
TokenKind::OpenRecordString {
|
||||
..
|
||||
} => "a record string",
|
||||
TokenKind::CloseRecordString {
|
||||
..
|
||||
} => "a closing record string",
|
||||
TokenKind::Uuid => "a uuid",
|
||||
TokenKind::DateTime => "a date-time",
|
||||
TokenKind::DurationSuffix(x) => x.as_str(),
|
||||
TokenKind::Strand => "a strand",
|
||||
TokenKind::Parameter => "a parameter",
|
||||
TokenKind::Duration => "a duration",
|
||||
TokenKind::Number(_) => "a number",
|
||||
TokenKind::Identifier => "an identifier",
|
||||
TokenKind::Regex => "a regex",
|
||||
|
@ -431,7 +515,15 @@ impl TokenKind {
|
|||
TokenKind::At => "@",
|
||||
TokenKind::Invalid => "Invalid",
|
||||
TokenKind::Eof => "Eof",
|
||||
TokenKind::ChangeFeedInclude(_) => "change feed include",
|
||||
TokenKind::WhiteSpace => "whitespace",
|
||||
TokenKind::Qoute(x) => x.as_str(),
|
||||
TokenKind::Duration => "a duration",
|
||||
TokenKind::Digits => "a number",
|
||||
TokenKind::NaN => "NaN",
|
||||
// below are small broken up tokens which are most of the time identifiers.
|
||||
TokenKind::DatetimeChars(_) => "an identifier",
|
||||
TokenKind::Exponent => "an identifier",
|
||||
TokenKind::NumberSuffix(_) => "an identifier",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -460,4 +552,12 @@ impl Token {
|
|||
pub fn is_eof(&self) -> bool {
|
||||
matches!(self.kind, TokenKind::Eof)
|
||||
}
|
||||
|
||||
pub fn is_followed_by(&self, other: &Token) -> bool {
|
||||
self.span.is_followed_by(&other.span)
|
||||
}
|
||||
|
||||
pub fn follows_from(&self, other: &Token) -> bool {
|
||||
self.span.follows_from(&other.span)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -182,8 +182,6 @@ where
|
|||
}),
|
||||
Err(_) => Err(crate::Error::from(Error::NotLiveQuery(idx))),
|
||||
};
|
||||
|
||||
dbg!(&response);
|
||||
response.live_queries.insert(idx, res);
|
||||
}
|
||||
|
||||
|
|
|
@ -416,7 +416,8 @@ impl Drop for Test {
|
|||
/// Drops the instance of the struct
|
||||
/// This method will panic if there are remaining responses that have not been checked.
|
||||
fn drop(&mut self) {
|
||||
if !self.responses.is_empty() {
|
||||
// Check for a panic to make sure test doesnt cause a double panic.
|
||||
if !std::thread::panicking() && !self.responses.is_empty() {
|
||||
panic!("Not every response has been checked");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1365,7 +1365,7 @@ async fn select_with_uuid_value() -> Result<(), Error> {
|
|||
plan: {
|
||||
index: 'sessionUid',
|
||||
operator: '=',
|
||||
value: '00ad70db-f435-442e-9012-1cd853102084'
|
||||
value: u'00ad70db-f435-442e-9012-1cd853102084'
|
||||
},
|
||||
table: 'sessions'
|
||||
},
|
||||
|
@ -1388,7 +1388,7 @@ async fn select_with_uuid_value() -> Result<(), Error> {
|
|||
r#"[
|
||||
{
|
||||
"id": sessions:1,
|
||||
"sessionUid": "00ad70db-f435-442e-9012-1cd853102084"
|
||||
"sessionUid": u"00ad70db-f435-442e-9012-1cd853102084"
|
||||
}
|
||||
]"#,
|
||||
);
|
||||
|
|
Loading…
Reference in a new issue