Move some of the handling of complex tokens back to the lexer. (#4708)

Co-authored-by: Tobie Morgan Hitchcock <tobie@surrealdb.com>
Co-authored-by: Dmitrii Blaginin <dmitrii@blaginin.me>
This commit is contained in:
Mees Delzenne 2024-09-12 18:23:33 +02:00 committed by GitHub
parent 4e2b2b9e30
commit 1a1278fc3a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
60 changed files with 2411 additions and 2513 deletions

View file

@ -20,7 +20,7 @@ impl Tokens {
}
}
pub(super) fn get_token_string<'a>(&'a self, t: &'a Token) -> Result<&str, Error> {
pub(super) fn get_token_string<'a>(&'a self, t: &'a Token) -> Result<&'a str, Error> {
t.get_str(&self.i)
}
@ -157,7 +157,7 @@ impl Token {
}
}
pub(super) fn get_str<'a>(&'a self, i: &'a str) -> Result<&str, Error> {
pub(super) fn get_str<'a>(&'a self, i: &'a str) -> Result<&'a str, Error> {
match self {
Token::Ref {
bytes,

View file

@ -7,8 +7,11 @@ use std::fmt;
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
#[non_exhaustive]
pub enum Dir {
/// `<-`
In,
/// `->`
Out,
/// `<->`
Both,
}

View file

@ -137,7 +137,7 @@ impl fmt::Display for Part {
// ------------------------------
pub trait Next<'a> {
fn next(&'a self) -> &[Part];
fn next(&'a self) -> &'a [Part];
}
impl<'a> Next<'a> for &'a [Part] {
@ -152,7 +152,7 @@ impl<'a> Next<'a> for &'a [Part] {
// ------------------------------
pub trait NextMethod<'a> {
fn next_method(&'a self) -> &[Part];
fn next_method(&'a self) -> &'a [Part];
}
impl<'a> NextMethod<'a> for &'a [Part] {

View file

@ -95,6 +95,14 @@ impl SyntaxError {
self
}
pub fn with_cause<T: Display>(mut self, t: T) -> Self {
self.diagnostic = Box::new(Diagnostic {
kind: DiagnosticKind::Cause(t.to_string()),
next: Some(self.diagnostic),
});
self
}
pub fn render_on(&self, source: &str) -> RenderedError {
let mut res = RenderedError {
errors: Vec::new(),

View file

@ -170,6 +170,11 @@ impl fmt::Display for Snippet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// extra spacing for the line number
let spacing = self.location.line.ilog10() as usize + 1;
for _ in 0..spacing {
f.write_str(" ")?;
}
writeln!(f, "--> [{}:{}]", self.location.line, self.location.column)?;
for _ in 0..spacing {
f.write_str(" ")?;
}
@ -301,6 +306,7 @@ mod test {
let error_string = format!("{}", error);
let expected = r#"some_error
--> [4:10]
|
4 | ...hallo error...
| ^^^^^ this is wrong

View file

@ -4,12 +4,12 @@ use crate::syn::{
unicode::{byte, chars},
Lexer,
},
token::{t, DatetimeChars, Token, TokenKind},
token::{t, Token, TokenKind},
};
impl<'a> Lexer<'a> {
/// Eats a single line comment.
pub fn eat_single_line_comment(&mut self) {
pub(super) fn eat_single_line_comment(&mut self) {
loop {
let Some(byte) = self.reader.next() else {
break;
@ -45,7 +45,7 @@ impl<'a> Lexer<'a> {
}
/// Eats a multi line comment and returns an error if `*/` would be missing.
pub fn eat_multi_line_comment(&mut self) -> Result<(), SyntaxError> {
pub(super) fn eat_multi_line_comment(&mut self) -> Result<(), SyntaxError> {
let start_span = self.current_span();
loop {
let Some(byte) = self.reader.next() else {
@ -64,7 +64,7 @@ impl<'a> Lexer<'a> {
}
/// Eat whitespace like spaces tables and new-lines.
pub fn eat_whitespace(&mut self) {
pub(super) fn eat_whitespace(&mut self) {
loop {
let Some(byte) = self.reader.peek() else {
return;
@ -100,8 +100,17 @@ impl<'a> Lexer<'a> {
}
}
/// Lex digits tokens
pub(super) fn lex_digits(&mut self) -> Token {
while let Some(b'0'..=b'9' | b'_') = self.reader.peek() {
self.reader.next();
}
self.finish_token(TokenKind::Digits)
}
/// Lex the next token, starting from the given byte.
pub fn lex_ascii(&mut self, byte: u8) -> Token {
pub(super) fn lex_ascii(&mut self, byte: u8) -> Token {
let kind = match byte {
b'{' => t!("{"),
b'}' => t!("}"),
@ -190,16 +199,6 @@ impl<'a> Lexer<'a> {
self.reader.next();
t!("<|")
}
Some(b'-') => {
self.reader.next();
match self.reader.peek() {
Some(b'>') => {
self.reader.next();
t!("<->")
}
_ => t!("<-"),
}
}
_ => t!("<"),
},
b'>' => match self.reader.peek() {
@ -328,75 +327,10 @@ impl<'a> Lexer<'a> {
self.reader.next();
t!("d'")
}
Some(b'e') => {
self.reader.next();
let Some(b'c') = self.reader.peek() else {
self.scratch.push('d');
return self.lex_ident_from_next_byte(b'e');
};
self.reader.next();
if self.reader.peek().map(|x| x.is_ascii_alphanumeric()).unwrap_or(false) {
self.scratch.push('d');
self.scratch.push('e');
return self.lex_ident_from_next_byte(b'c');
}
t!("dec")
}
Some(x) if !x.is_ascii_alphabetic() => {
t!("d")
}
None => {
t!("d")
}
_ => {
return self.lex_ident_from_next_byte(b'd');
}
},
b'f' => match self.reader.peek() {
Some(x) if !x.is_ascii_alphanumeric() => {
t!("f")
}
None => t!("f"),
_ => {
return self.lex_ident_from_next_byte(b'f');
}
},
b'n' => match self.reader.peek() {
Some(b's') => {
self.reader.next();
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
self.scratch.push('n');
return self.lex_ident_from_next_byte(b's');
}
t!("ns")
}
_ => {
return self.lex_ident_from_next_byte(b'n');
}
},
b'm' => match self.reader.peek() {
Some(b's') => {
self.reader.next();
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
self.scratch.push('m');
return self.lex_ident_from_next_byte(b's');
}
t!("ms")
}
Some(x) if !x.is_ascii_alphabetic() => {
t!("m")
}
None => {
t!("m")
}
_ => {
return self.lex_ident_from_next_byte(b'm');
}
},
b's' => match self.reader.peek() {
Some(b'"') => {
self.reader.next();
@ -406,32 +340,10 @@ impl<'a> Lexer<'a> {
self.reader.next();
t!("'")
}
Some(x) if x.is_ascii_alphabetic() => {
_ => {
return self.lex_ident_from_next_byte(b's');
}
_ => t!("s"),
},
b'h' => {
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
return self.lex_ident_from_next_byte(b'h');
} else {
t!("h")
}
}
b'w' => {
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
return self.lex_ident_from_next_byte(b'w');
} else {
t!("w")
}
}
b'y' => {
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
return self.lex_ident_from_next_byte(b'y');
} else {
t!("y")
}
}
b'u' => match self.reader.peek() {
Some(b'"') => {
self.reader.next();
@ -441,14 +353,6 @@ impl<'a> Lexer<'a> {
self.reader.next();
t!("u'")
}
Some(b's') => {
self.reader.next();
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
self.scratch.push('u');
return self.lex_ident_from_next_byte(b's');
}
t!("us")
}
_ => {
return self.lex_ident_from_next_byte(b'u');
}
@ -466,24 +370,6 @@ impl<'a> Lexer<'a> {
return self.lex_ident_from_next_byte(b'r');
}
},
b'Z' => match self.reader.peek() {
Some(x) if x.is_ascii_alphabetic() => {
return self.lex_ident_from_next_byte(b'Z');
}
_ => TokenKind::DatetimeChars(DatetimeChars::Z),
},
b'T' => match self.reader.peek() {
Some(x) if x.is_ascii_alphabetic() => {
return self.lex_ident_from_next_byte(b'T');
}
_ => TokenKind::DatetimeChars(DatetimeChars::T),
},
b'e' => {
return self.lex_exponent(b'e');
}
b'E' => {
return self.lex_exponent(b'E');
}
b'0'..=b'9' => return self.lex_digits(),
b'a'..=b'z' | b'A'..=b'Z' | b'_' => {
return self.lex_ident_from_next_byte(byte);

View file

@ -8,7 +8,7 @@ impl<'a> Lexer<'a> {
/// lex non-ascii characters.
///
/// Should only be called after determining that the byte is not a valid ascii character.
pub fn lex_char(&mut self, byte: u8) -> Token {
pub(super) fn lex_char(&mut self, byte: u8) -> Token {
let c = match self.reader.complete_char(byte) {
Ok(x) => x,
Err(e) => return self.invalid_token(e.into()),
@ -28,20 +28,6 @@ impl<'a> Lexer<'a> {
'⊄' => t!(""),
'×' => t!("×"),
'÷' => t!("÷"),
'µ' => {
let Some(b's') = self.reader.peek() else {
let err = error!("Invalid token `µ` expected token to be followed by `s`", @self.current_span());
return self.invalid_token(err);
};
self.reader.next();
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
let err = error!("Invalid token `µ` expected token to be followed by `s`", @self.current_span());
return self.invalid_token(err);
}
t!("µs")
}
x => {
let err = error!("Invalid token `{x}`", @self.current_span());
return self.invalid_token(err);

View file

@ -0,0 +1,195 @@
use std::ops::RangeInclusive;
use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone, Utc};
use crate::syn::{
error::{bail, error, SyntaxError},
lexer::Lexer,
token::{t, Token},
};
pub fn datetime(lexer: &mut Lexer, start: Token) -> Result<DateTime<Utc>, SyntaxError> {
let double = match start.kind {
t!("d\"") => true,
t!("d'") => false,
x => panic!("Invalid start token of datetime compound: {x}"),
};
let datetime = datetime_inner(lexer)?;
if double {
lexer.expect('"')?;
} else {
lexer.expect('\'')?;
}
Ok(datetime)
}
/// Lexes a datetime without the surrounding `'` or `"`
pub fn datetime_inner(lexer: &mut Lexer) -> Result<DateTime<Utc>, SyntaxError> {
let date_start = lexer.reader.offset();
let year_neg = lexer.eat(b'-');
if !year_neg {
lexer.eat(b'+');
}
let year = parse_datetime_digits(lexer, 4, 0..=9999)?;
lexer.expect('-')?;
let month = parse_datetime_digits(lexer, 2, 1..=12)?;
lexer.expect('-')?;
let day = parse_datetime_digits(lexer, 2, 1..=31)?;
let year = if year_neg {
-(year as i32)
} else {
year as i32
};
let date = NaiveDate::from_ymd_opt(year, month as u32, day as u32).ok_or_else(
|| error!("Invalid DateTime date: date outside of valid range", @lexer.span_since(date_start)),
)?;
if !lexer.eat_when(|x| x == b'T') {
let time = NaiveTime::default();
let date_time = NaiveDateTime::new(date, time);
let datetime =
Utc.fix().from_local_datetime(&date_time).earliest().unwrap().with_timezone(&Utc);
return Ok(datetime);
}
let time_start = lexer.reader.offset();
let hour = parse_datetime_digits(lexer, 2, 0..=24)?;
lexer.expect(':')?;
let minute = parse_datetime_digits(lexer, 2, 0..=59)?;
lexer.expect(':')?;
let second = parse_datetime_digits(lexer, 2, 0..=60)?;
let nanos_start = lexer.reader.offset();
let nanos = if lexer.eat(b'.') {
let mut number = 0u32;
let mut count = 0;
loop {
let Some(d) = lexer.reader.peek() else {
break;
};
if !d.is_ascii_digit() {
break;
}
if count == 9 {
bail!("Invalid datetime nanoseconds, expected no more then 9 digits", @lexer.span_since(nanos_start))
}
lexer.reader.next();
number *= 10;
number += (d - b'0') as u32;
count += 1;
}
if count == 0 {
bail!("Invalid datetime nanoseconds, expected at least a single digit", @lexer.span_since(nanos_start))
}
// if digits are missing they count as 0's
for _ in count..9 {
number *= 10;
}
number
} else {
0
};
let time = NaiveTime::from_hms_nano_opt(hour as u32, minute as u32, second as u32, nanos)
.ok_or_else(
|| error!("Invalid DateTime time: time outside of valid range", @lexer.span_since(time_start)),
)?;
let timezone_start = lexer.reader.offset();
let timezone = match lexer.reader.peek() {
Some(b'-') => {
lexer.reader.next();
let (hour, minute) = parse_timezone(lexer)?;
// The range checks on the digits ensure that the offset can't exceed 23:59 so below
// unwraps won't panic.
FixedOffset::west_opt((hour * 3600 + minute * 60) as i32).unwrap()
}
Some(b'+') => {
lexer.reader.next();
let (hour, minute) = parse_timezone(lexer)?;
// The range checks on the digits ensure that the offset can't exceed 23:59 so below
// unwraps won't panic.
FixedOffset::east_opt((hour * 3600 + minute * 60) as i32).unwrap()
}
Some(b'Z') => {
lexer.reader.next();
Utc.fix()
}
Some(x) => {
let char = lexer.reader.convert_to_char(x)?;
bail!("Invalid datetime timezone, expected `Z` or a timezone offset, found {char}",@lexer.span_since(timezone_start));
}
None => {
bail!("Invalid end of file, expected datetime to finish",@lexer.span_since(time_start));
}
};
let date_time = NaiveDateTime::new(date, time);
let datetime = timezone
.from_local_datetime(&date_time)
.earliest()
// this should never panic with a fixed offset.
.unwrap()
.with_timezone(&Utc);
Ok(datetime)
}
fn parse_timezone(lexer: &mut Lexer) -> Result<(u32, u32), SyntaxError> {
let hour = parse_datetime_digits(lexer, 2, 0..=23)? as u32;
lexer.expect(':')?;
let minute = parse_datetime_digits(lexer, 2, 0..=59)? as u32;
Ok((hour, minute))
}
fn parse_datetime_digits(
lexer: &mut Lexer,
count: usize,
range: RangeInclusive<usize>,
) -> Result<usize, SyntaxError> {
let start = lexer.reader.offset();
let mut value = 0usize;
for _ in 0..count {
let offset = lexer.reader.offset();
match lexer.reader.next() {
Some(x) if x.is_ascii_digit() => {
value *= 10;
value += (x - b'0') as usize;
}
Some(x) => {
let char = lexer.reader.convert_to_char(x)?;
let span = lexer.span_since(offset);
bail!("Invalid datetime, expected digit character found `{char}`", @span);
}
None => {
bail!("Expected end of file, expected datetime digit character", @lexer.current_span());
}
}
}
if !range.contains(&value) {
let span = lexer.span_since(start);
bail!("Invalid datetime digit section, section not within allowed range",
@span => "This section must be within {}..={}",range.start(),range.end());
}
Ok(value)
}

View file

@ -0,0 +1,25 @@
use crate::syn::{
error::{bail, SyntaxError},
lexer::{unicode::is_identifier_continue, Lexer},
token::{Token, TokenKind},
};
use std::mem;
pub fn flexible_ident(lexer: &mut Lexer, start: Token) -> Result<String, SyntaxError> {
match start.kind {
TokenKind::Digits => {
let mut res = lexer.span_str(start.span).to_owned();
while let Some(x) = lexer.reader.peek() {
if is_identifier_continue(x) {
lexer.reader.next();
res.push(x as char);
} else {
break;
}
}
Ok(res)
}
TokenKind::Identifier => Ok(mem::take(&mut lexer.string).unwrap()),
x => bail!("Unexpected token {x}, expected flexible identifier", @start.span),
}
}

View file

@ -1,25 +1,19 @@
use crate::syn::{
error::{MessageKind, SyntaxError},
lexer::unicode::chars::JS_LINE_TERIMATORS,
token::{t, CompoundToken, JavaScript, Span, TokenKind},
token::{t, Token},
};
use super::{CompoundValue, Lexer};
use super::Lexer;
impl CompoundValue for JavaScript {
const START: TokenKind = t!("{");
fn relex(lexer: &mut Lexer, _: Span) -> Result<CompoundToken<Self>, SyntaxError> {
let span = lex_js_function_body_inner(lexer)?;
Ok(CompoundToken {
value: JavaScript,
span,
})
}
pub fn javascript(lexer: &mut Lexer, start: Token) -> Result<(), SyntaxError> {
assert_eq!(start.kind, t!("{"), "Invalid start of JavaScript compound token");
lex_js_function_body_inner(lexer)?;
Ok(())
}
/// Lex the body of a js function.
fn lex_js_function_body_inner(lexer: &mut Lexer) -> Result<Span, SyntaxError> {
fn lex_js_function_body_inner(lexer: &mut Lexer) -> Result<(), SyntaxError> {
let mut block_depth = 1;
loop {
let Some(byte) = lexer.reader.next() else {
@ -61,7 +55,7 @@ fn lex_js_function_body_inner(lexer: &mut Lexer) -> Result<Span, SyntaxError> {
}
}
Ok(lexer.advance_span())
Ok(())
}
/// lex a js string with the given delimiter.

View file

@ -1,85 +1,57 @@
use crate::sql::Regex;
use crate::syn::{
error::{bail, error, SyntaxError},
error::SyntaxError,
lexer::Lexer,
token::{t, CompoundToken, Span, Token, TokenKind},
token::{Span, Token},
};
mod datetime;
mod ident;
mod js;
mod number;
mod regex;
mod strand;
mod uuid;
pub trait CompoundValue: Sized {
/// The token which indicates the start of this compound token.
const START: TokenKind;
pub use datetime::{datetime, datetime_inner};
pub use ident::flexible_ident;
pub use js::javascript;
pub use number::{
duration, float, integer, number, numeric, numeric_kind, NumberKind, Numeric, NumericKind,
};
pub use regex::regex;
pub use strand::strand;
pub use uuid::uuid;
/// Lex the start of this span to a more complex type of token.
fn relex(lexer: &mut Lexer, start_span: Span) -> Result<CompoundToken<Self>, SyntaxError>;
#[derive(Debug)]
pub struct CompoundToken<T> {
pub value: T,
pub span: Span,
}
impl<'a> Lexer<'a> {
pub fn lex_compound<T: CompoundValue>(
/// Lex a more complex token from the start token.
/// The start token should already be consumed.
pub fn lex_compound<F, R>(
&mut self,
start: Token,
) -> Result<CompoundToken<T>, SyntaxError> {
f: F,
) -> Result<CompoundToken<R>, SyntaxError>
where
F: Fn(&mut Self, Token) -> Result<R, SyntaxError>,
{
assert_eq!(
start.kind,
T::START,
"Invalid start of compound token, expected {} got {}",
T::START,
start.kind
);
assert_eq!(
start.span.offset + 1,
self.last_offset,
"Tried to parse compound when lexer already ate past the start token"
start.span.offset + start.span.len,
"The start token given to compound was not the last token consumed."
);
self.last_offset = start.span.offset;
T::relex(self, start.span)
}
}
let res = f(self, start)?;
impl CompoundValue for Regex {
const START: TokenKind = t!("/");
// re-lexes a `/` token to a regex token.
fn relex(lexer: &mut Lexer, _: Span) -> Result<CompoundToken<Regex>, SyntaxError> {
loop {
match lexer.reader.next() {
Some(b'\\') => {
// We can't just eat all bytes after a \ because a byte might be non-ascii.
lexer.eat(b'/');
}
Some(b'/') => break,
Some(x) => {
if !x.is_ascii() {
if let Err(e) = lexer.reader.complete_char(x) {
let span = lexer.advance_span();
bail!("Invalid token: {e}", @span);
}
}
}
None => {
let span = lexer.advance_span();
return Err(
error!("Failed to lex regex, unexpected eof", @span).with_data_pending()
);
}
}
}
// successfully parsed the regex, time to structure it.
let span = lexer.advance_span();
// +1 offset to move over the first `/` -2 len to remove the last `/`
let mut inner_span = span;
debug_assert!(inner_span.len > 2);
inner_span.offset += 1;
inner_span.len -= 2;
let str = lexer.span_str(inner_span);
let regex = str.parse().map_err(|e| error!("Invalid regex: {e}", @span))?;
Ok(CompoundToken {
value: regex,
span,
value: res,
span: self.advance_span(),
})
}
}

View file

@ -0,0 +1,405 @@
use std::{
borrow::Cow,
num::{ParseFloatError, ParseIntError},
str::FromStr,
time::Duration,
};
use rust_decimal::Decimal;
use crate::{
sql::{
duration::{
SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE, SECONDS_PER_WEEK,
SECONDS_PER_YEAR,
},
Number,
},
syn::{
error::{bail, error, SyntaxError},
lexer::Lexer,
token::{t, Span, Token, TokenKind},
},
};
pub enum Numeric {
Number(Number),
Duration(Duration),
}
/// Like numeric but holds of parsing the a number into a specific value.
pub enum NumericKind {
Number(NumberKind),
Duration(Duration),
}
pub enum NumberKind {
Integer,
Float,
Decimal,
}
enum DurationSuffix {
Nano,
Micro,
Milli,
Second,
Minute,
Hour,
Day,
Week,
Year,
}
fn prepare_number_str(str: &str) -> Cow<str> {
if str.contains('_') {
Cow::Owned(str.chars().filter(|x| *x != '_').collect())
} else {
Cow::Borrowed(str)
}
}
/// Tokens which can start with digits: Number or Duration.
/// Like numeric but holds of parsing the a number into a specific value.
pub fn numeric_kind(lexer: &mut Lexer, start: Token) -> Result<NumericKind, SyntaxError> {
match start.kind {
t!("-") | t!("+") => number_kind(lexer, start).map(NumericKind::Number),
TokenKind::Digits => match lexer.reader.peek() {
Some(b'n' | b'm' | b's' | b'h' | b'y' | b'd' | b'w' | b'u') => {
duration(lexer, start).map(NumericKind::Duration)
}
Some(x) if !x.is_ascii() => duration(lexer, start).map(NumericKind::Duration),
_ => number_kind(lexer, start).map(NumericKind::Number),
},
x => {
bail!("Unexpected token `{x}`, expected a numeric value, either a duration or number",@start.span)
}
}
}
/// Tokens which can start with digits: Number or Duration.
pub fn numeric(lexer: &mut Lexer, start: Token) -> Result<Numeric, SyntaxError> {
match start.kind {
t!("-") | t!("+") => number(lexer, start).map(Numeric::Number),
TokenKind::Digits => match lexer.reader.peek() {
Some(b'n' | b'm' | b's' | b'h' | b'y' | b'd' | b'w') => {
duration(lexer, start).map(Numeric::Duration)
}
Some(x) if !x.is_ascii() => duration(lexer, start).map(Numeric::Duration),
_ => number(lexer, start).map(Numeric::Number),
},
x => {
bail!("Unexpected token `{x}`, expected a numeric value, either a duration or number",@start.span)
}
}
}
pub fn number_kind(lexer: &mut Lexer, start: Token) -> Result<NumberKind, SyntaxError> {
let offset = start.span.offset as usize;
match start.kind {
t!("-") | t!("+") => {
eat_digits1(lexer, offset)?;
}
TokenKind::Digits => {}
x => bail!("Unexpected start token for integer: {x}",@start.span),
}
let mut kind = NumberKind::Integer;
let before_mantissa = lexer.reader.offset();
// need to test for digit.. which is a range not a floating point number.
if lexer.reader.peek1() != Some(b'.') && lexer.eat(b'.') {
eat_digits1(lexer, before_mantissa)?;
kind = NumberKind::Float;
}
let before_exponent = lexer.reader.offset();
if lexer.eat(b'e') || lexer.eat(b'E') {
if !lexer.eat(b'-') {
lexer.eat(b'+');
}
eat_digits1(lexer, before_exponent)?;
kind = NumberKind::Float;
}
if !lexer.eat(b'f') {
if lexer.eat(b'd') {
lexer.expect('e')?;
lexer.expect('c')?;
kind = NumberKind::Decimal;
}
} else {
kind = NumberKind::Float;
}
if has_ident_after(lexer) {
let char = lexer.reader.next().unwrap();
let char = lexer.reader.convert_to_char(char)?;
bail!("Invalid token, found unexpected character `{char}` after number token", @lexer.current_span())
}
Ok(kind)
}
pub fn number(lexer: &mut Lexer, start: Token) -> Result<Number, SyntaxError> {
let kind = number_kind(lexer, start)?;
let span = lexer.current_span();
let number_str = prepare_number_str(lexer.span_str(span));
match kind {
NumberKind::Integer => number_str
.parse()
.map(Number::Int)
.map_err(|e| error!("Failed to parse number: {e}", @lexer.current_span())),
NumberKind::Float => {
let number_str = number_str.trim_end_matches('f');
number_str
.parse()
.map(Number::Float)
.map_err(|e| error!("Failed to parse number: {e}", @lexer.current_span()))
}
NumberKind::Decimal => {
let number_str = number_str.trim_end_matches("dec");
let decimal = if number_str.contains(['e', 'E']) {
Decimal::from_scientific(number_str)
.map_err(|e| error!("Failed to parser decimal: {e}", @lexer.current_span()))?
} else {
Decimal::from_str(number_str)
.map_err(|e| error!("Failed to parser decimal: {e}", @lexer.current_span()))?
};
Ok(Number::Decimal(decimal))
}
}
}
/// Generic integer parsing method,
/// works for all unsigned integers.
pub fn integer<I>(lexer: &mut Lexer, start: Token) -> Result<I, SyntaxError>
where
I: FromStr<Err = ParseIntError>,
{
let offset = start.span.offset as usize;
match start.kind {
t!("-") | t!("+") => {
eat_digits1(lexer, offset)?;
}
TokenKind::Digits => {}
x => bail!("Unexpected token {x}, expected integer",@start.span),
};
if has_ident_after(lexer) {
let char = lexer.reader.next().unwrap();
let char = lexer.reader.convert_to_char(char)?;
bail!("Invalid token, found unexpected character `{char} after integer token", @lexer.current_span())
}
let last_offset = lexer.reader.offset();
let peek = lexer.reader.peek();
if peek == Some(b'.') {
let is_mantissa = lexer.reader.peek1().map(|x| x.is_ascii_digit()).unwrap_or(false);
if is_mantissa {
let span = Span {
offset: last_offset as u32,
len: 1,
};
bail!("Unexpected character `.` starting float, only integers are allowed here", @span)
}
}
if peek == Some(b'e') || peek == Some(b'E') {
bail!("Unexpected character `{}` only integers are allowed here",peek.unwrap() as char, @lexer.current_span())
}
let span = lexer.current_span();
let str = prepare_number_str(lexer.span_str(span));
str.parse().map_err(|e| error!("Invalid integer: {e}", @span))
}
/// Generic integer parsing method,
/// works for all unsigned integers.
pub fn float<I>(lexer: &mut Lexer, start: Token) -> Result<I, SyntaxError>
where
I: FromStr<Err = ParseFloatError>,
{
let offset = start.span.offset as usize;
match start.kind {
t!("-") | t!("+") => {
eat_digits1(lexer, offset)?;
}
TokenKind::Digits => {}
x => bail!("Unexpected token {x}, expected floating point number",@start.span),
};
let before_mantissa = lexer.reader.offset();
if lexer.eat(b'.') {
eat_digits1(lexer, before_mantissa)?;
}
let before_exponent = lexer.reader.offset();
if lexer.eat(b'e') || lexer.eat(b'E') {
if !lexer.eat(b'-') {
lexer.eat(b'+');
}
eat_digits1(lexer, before_exponent)?;
}
let number_span = lexer.current_span();
lexer.eat(b'f');
if has_ident_after(lexer) {
let char = lexer.reader.next().unwrap();
let char = lexer.reader.convert_to_char(char)?;
bail!("Invalid token, found invalid character `{char}` after number token", @lexer.current_span())
}
let str = prepare_number_str(lexer.span_str(number_span));
str.parse().map_err(|e| error!("Invalid floating point number: {e}", @lexer.current_span()))
}
pub fn duration(lexer: &mut Lexer, start: Token) -> Result<Duration, SyntaxError> {
match start.kind {
TokenKind::Digits => {}
x => bail!("Unexpected token {x}, expected duration", @start.span),
}
let mut duration = Duration::ZERO;
let mut number_span = start.span;
loop {
let suffix = lex_duration_suffix(lexer)?;
let numeric_string = prepare_number_str(lexer.span_str(number_span));
let numeric_value: u64 = numeric_string.parse().map_err(
|e| error!("Invalid token, failed to parse duration digits: {e}",@lexer.current_span()),
)?;
let addition = match suffix {
DurationSuffix::Nano => Duration::from_nanos(numeric_value),
DurationSuffix::Micro => Duration::from_micros(numeric_value),
DurationSuffix::Milli => Duration::from_millis(numeric_value),
DurationSuffix::Second => Duration::from_secs(numeric_value),
DurationSuffix::Minute => {
let minutes = numeric_value.checked_mul(SECONDS_PER_MINUTE).ok_or_else(
|| error!("Invalid duartion, value overflowed maximum allowed value", @lexer.current_span()),
)?;
Duration::from_secs(minutes)
}
DurationSuffix::Hour => {
let hours = numeric_value.checked_mul(SECONDS_PER_HOUR).ok_or_else(
|| error!("Invalid duartion, value overflowed maximum allowed value", @lexer.current_span()),
)?;
Duration::from_secs(hours)
}
DurationSuffix::Day => {
let day = numeric_value.checked_mul(SECONDS_PER_DAY).ok_or_else(
|| error!("Invalid duartion, value overflowed maximum allowed value", @lexer.current_span()),
)?;
Duration::from_secs(day)
}
DurationSuffix::Week => {
let week = numeric_value.checked_mul(SECONDS_PER_WEEK).ok_or_else(
|| error!("Invalid duartion, value overflowed maximum allowed value", @lexer.current_span()),
)?;
Duration::from_secs(week)
}
DurationSuffix::Year => {
let year = numeric_value.checked_mul(SECONDS_PER_YEAR).ok_or_else(
|| error!("Invalid duartion, value overflowed maximum allowed value", @lexer.current_span()),
)?;
Duration::from_secs(year)
}
};
duration = duration.checked_add(addition).ok_or_else(
|| error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
)?;
match lexer.reader.peek() {
Some(x) if x.is_ascii_digit() => {
let before = lexer.reader.offset();
eat_digits(lexer);
number_span = lexer.span_since(before);
}
_ => break,
}
}
Ok(duration)
}
fn lex_duration_suffix(lexer: &mut Lexer) -> Result<DurationSuffix, SyntaxError> {
let suffix = match lexer.reader.next() {
Some(b'n') => {
lexer.expect('s')?;
DurationSuffix::Nano
}
Some(b'u') => {
lexer.expect('s')?;
DurationSuffix::Micro
}
Some(b'm') => {
if lexer.eat(b's') {
DurationSuffix::Milli
} else {
DurationSuffix::Minute
}
}
Some(b's') => DurationSuffix::Second,
Some(b'h') => DurationSuffix::Hour,
Some(b'd') => DurationSuffix::Day,
Some(b'w') => DurationSuffix::Week,
Some(b'y') => DurationSuffix::Year,
// Start byte of 'µ'
Some(0xC2) => {
if !lexer.eat(0xB5) {
let char = lexer.reader.complete_char(0xC2)?;
bail!("Invalid duration token, expected a duration suffix found `{char}`",@lexer.current_span())
}
lexer.expect('s')?;
DurationSuffix::Micro
}
Some(x) => {
let char = lexer.reader.convert_to_char(x)?;
bail!("Invalid duration token, expected a duration suffix found `{char}`",@lexer.current_span())
}
None => {
bail!("Unexpected end of file, expected a duration suffix",@lexer.current_span())
}
};
if has_ident_after(lexer) {
let char = lexer.reader.next().unwrap();
let char = lexer.reader.convert_to_char(char)?;
bail!("Invalid token, found invalid character `{char}` after duration suffix", @lexer.current_span())
}
Ok(suffix)
}
fn has_ident_after(lexer: &mut Lexer) -> bool {
match lexer.reader.peek() {
Some(x) => !x.is_ascii() || x.is_ascii_alphabetic(),
None => false,
}
}
fn eat_digits1(lexer: &mut Lexer, start: usize) -> Result<(), SyntaxError> {
match lexer.reader.peek() {
Some(x) if x.is_ascii_digit() => {}
Some(x) => {
let char = lexer.reader.convert_to_char(x)?;
bail!("Invalid number token, expected a digit, found: {char}", @lexer.span_since(start));
}
None => {
bail!("Unexpected end of file, expected a number token digit", @lexer.span_since(start));
}
}
eat_digits(lexer);
Ok(())
}
fn eat_digits(lexer: &mut Lexer) {
while lexer.eat_when(|x| x.is_ascii_digit() || x == b'_') {}
}

View file

@ -0,0 +1,50 @@
use regex::Regex;
use crate::syn::{
error::{bail, error, SyntaxError},
lexer::Lexer,
token::{t, Token},
};
pub fn regex(lexer: &mut Lexer, start: Token) -> Result<Regex, SyntaxError> {
assert_eq!(start.kind, t!("/"), "Invalid start token of regex compound");
lexer.scratch.clear();
loop {
match lexer.reader.next() {
Some(b'\\') => {
// We can't just eat all bytes after a \ because a byte might be non-ascii.
if lexer.eat(b'/') {
lexer.scratch.push('/');
} else {
lexer.scratch.push('\\');
}
}
Some(b'/') => break,
Some(x) => {
if !x.is_ascii() {
match lexer.reader.complete_char(x) {
Err(e) => {
let span = lexer.current_span();
bail!("Invalid token: {e}", @span);
}
Ok(x) => {
lexer.scratch.push(x);
}
}
} else {
lexer.scratch.push(x as char);
}
}
None => {
let span = lexer.current_span();
bail!("Failed to lex regex, unexpected eof", @span);
}
}
}
let span = lexer.current_span();
let regex = lexer.scratch.parse().map_err(|e| error!("Invalid regex: {e}", @span))?;
lexer.scratch.clear();
Ok(regex)
}

View file

@ -0,0 +1,100 @@
use std::mem;
use crate::syn::{
error::{bail, error, SyntaxError},
lexer::{unicode::chars, Lexer},
token::{t, Token},
};
pub fn strand(lexer: &mut Lexer, start: Token) -> Result<String, SyntaxError> {
let is_double = match start.kind {
t!("\"") => true,
t!("'") => false,
_ => panic!("Invalid start of strand compound token"),
};
loop {
let Some(x) = lexer.reader.next() else {
lexer.scratch.clear();
let err =
error!("Unexpected end of file, expected strand to end",@lexer.current_span());
return Err(err.with_data_pending());
};
if x.is_ascii() {
match x {
b'\'' if !is_double => {
let res = mem::take(&mut lexer.scratch);
return Ok(res);
}
b'"' if is_double => {
let res = mem::take(&mut lexer.scratch);
return Ok(res);
}
b'\0' => {
bail!("Invalid null byte in source, null bytes are not valid SurrealQL characters",@lexer.current_span());
}
b'\\' => {
// Handle escape sequences.
let Some(next) = lexer.reader.next() else {
lexer.scratch.clear();
let err = error!("Unexpected end of file, expected strand to end",@lexer.current_span());
return Err(err.with_data_pending());
};
match next {
b'\\' => {
lexer.scratch.push('\\');
}
b'\'' if !is_double => {
lexer.scratch.push('\'');
}
b'\"' if is_double => {
lexer.scratch.push('\"');
}
b'/' => {
lexer.scratch.push('/');
}
b'b' => {
lexer.scratch.push(chars::BS);
}
b'f' => {
lexer.scratch.push(chars::FF);
}
b'n' => {
lexer.scratch.push(chars::LF);
}
b'r' => {
lexer.scratch.push(chars::CR);
}
b't' => {
lexer.scratch.push(chars::TAB);
}
x => match lexer.reader.convert_to_char(x) {
Ok(char) => {
let valid_escape = if is_double {
'"'
} else {
'\''
};
bail!("Invalid escape character `{char}`, valid characters are `\\`, `{valid_escape}`, `/`, `b`, `f`, `n`, `r`, or `t`", @lexer.current_span());
}
Err(e) => {
lexer.scratch.clear();
return Err(e.into());
}
},
}
}
x => lexer.scratch.push(x as char),
}
} else {
match lexer.reader.complete_char(x) {
Ok(x) => lexer.scratch.push(x),
Err(e) => {
lexer.scratch.clear();
return Err(e.into());
}
}
}
}
}

View file

@ -0,0 +1,85 @@
use crate::syn::{
error::{bail, SyntaxError},
lexer::Lexer,
token::{t, Token},
};
use uuid::Uuid;
pub fn uuid(lexer: &mut Lexer, start: Token) -> Result<Uuid, SyntaxError> {
let double = match start.kind {
t!("u\"") => true,
t!("u'") => false,
x => panic!("Invalid start token of uuid compound: {x}"),
};
let mut uuid_buffer = [0u8; 16];
// number of bytes is 4-2-2-2-6
eat_uuid_hex(lexer, &mut uuid_buffer[0..4])?;
lexer.expect('-')?;
eat_uuid_hex(lexer, &mut uuid_buffer[4..6])?;
lexer.expect('-')?;
eat_uuid_hex(lexer, &mut uuid_buffer[6..8])?;
lexer.expect('-')?;
eat_uuid_hex(lexer, &mut uuid_buffer[8..10])?;
lexer.expect('-')?;
eat_uuid_hex(lexer, &mut uuid_buffer[10..16])?;
if double {
lexer.expect('"')?;
} else {
lexer.expect('\'')?;
}
Ok(Uuid::from_bytes(uuid_buffer))
}
fn eat_uuid_hex(lexer: &mut Lexer, buffer: &mut [u8]) -> Result<(), SyntaxError> {
// the amounts of character required is twice the buffer len.
// since every character is half a byte.
for x in buffer {
let a = eat_hex_character(lexer)?;
let b = eat_hex_character(lexer)?;
*x = (a << 4) | b;
}
Ok(())
}
fn eat_hex_character(lexer: &mut Lexer) -> Result<u8, SyntaxError> {
fn ascii_to_hex(b: u8) -> Option<u8> {
if b.is_ascii_digit() {
return Some(b - b'0');
}
if (b'a'..=b'f').contains(&b) {
return Some(b - (b'a' - 10));
}
if (b'A'..=b'F').contains(&b) {
return Some(b - (b'A' - 10));
}
None
}
let Some(peek) = lexer.reader.peek() else {
bail!("Unexpected end of file, expected UUID token to finish",@lexer.current_span());
};
let Some(res) = ascii_to_hex(peek) else {
lexer.advance_span();
let char = lexer.reader.next().unwrap();
let char = lexer.reader.convert_to_char(char)?;
bail!("Unexpected character `{char}` expected hexidecimal digit",@lexer.current_span());
};
lexer.reader.next();
Ok(res)
}

View file

@ -8,18 +8,14 @@ use crate::syn::{
token::{Token, TokenKind},
};
use super::unicode::chars;
fn is_identifier_continue(x: u8) -> bool {
matches!(x, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_')
}
use super::unicode::{chars, is_identifier_continue};
impl<'a> Lexer<'a> {
/// Lex a parameter in the form of `$[a-zA-Z0-9_]*`
///
/// # Lexer State
/// Expected the lexer to have already eaten the param starting `$`
pub fn lex_param(&mut self) -> Token {
pub(super) fn lex_param(&mut self) -> Token {
debug_assert_eq!(self.scratch, "");
loop {
if let Some(x) = self.reader.peek() {
@ -34,7 +30,7 @@ impl<'a> Lexer<'a> {
}
}
pub fn lex_surrounded_param(&mut self, is_backtick: bool) -> Token {
pub(super) fn lex_surrounded_param(&mut self, is_backtick: bool) -> Token {
debug_assert_eq!(self.scratch, "");
match self.lex_surrounded_ident_err(is_backtick) {
Ok(_) => self.finish_token(TokenKind::Parameter),
@ -51,7 +47,7 @@ impl<'a> Lexer<'a> {
///
/// When calling the caller should already know that the token can't be any other token covered
/// by `[a-zA-Z0-9_]*`.
pub fn lex_ident_from_next_byte(&mut self, start: u8) -> Token {
pub(super) fn lex_ident_from_next_byte(&mut self, start: u8) -> Token {
debug_assert!(matches!(start, b'a'..=b'z' | b'A'..=b'Z' | b'_'));
self.scratch.push(start as char);
self.lex_ident()
@ -60,7 +56,7 @@ impl<'a> Lexer<'a> {
/// Lex a not surrounded identfier.
///
/// The scratch should contain only identifier valid chars.
pub fn lex_ident(&mut self) -> Token {
pub(super) fn lex_ident(&mut self) -> Token {
loop {
if let Some(x) = self.reader.peek() {
if is_identifier_continue(x) {
@ -90,7 +86,7 @@ impl<'a> Lexer<'a> {
}
/// Lex an ident which is surround by delimiters.
pub fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token {
pub(super) fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token {
match self.lex_surrounded_ident_err(is_backtick) {
Ok(_) => self.finish_token(TokenKind::Identifier),
Err(e) => {
@ -101,7 +97,10 @@ impl<'a> Lexer<'a> {
}
/// Lex an ident surrounded either by `⟨⟩` or `\`\``
pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<(), SyntaxError> {
pub(super) fn lex_surrounded_ident_err(
&mut self,
is_backtick: bool,
) -> Result<(), SyntaxError> {
loop {
let Some(x) = self.reader.next() else {
let end_char = if is_backtick {

View file

@ -1,25 +1,18 @@
use std::time::Duration;
use chrono::{DateTime, Utc};
mod byte;
mod char;
pub mod compound;
mod ident;
pub mod keywords;
mod number;
mod reader;
mod strand;
mod unicode;
mod compound;
#[cfg(test)]
mod test;
pub use reader::{BytesReader, CharError};
use uuid::Uuid;
use crate::syn::{
error::SyntaxError,
error::{bail, SyntaxError},
token::{Span, Token, TokenKind},
};
@ -38,7 +31,7 @@ use crate::syn::{
#[non_exhaustive]
pub struct Lexer<'a> {
/// The reader for reading the source bytes.
pub reader: BytesReader<'a>,
pub(super) reader: BytesReader<'a>,
/// The one past the last character of the previous token.
last_offset: u32,
/// A buffer used to build the value of tokens which can't be read straight from the source.
@ -58,11 +51,8 @@ pub struct Lexer<'a> {
// The parser can, depending on position in syntax, decide to parse a number in a variety of
// different precisions or formats. The only way to support all is to delay parsing the
// actual number value to when the parser can decide on a format.
pub string: Option<String>,
pub duration: Option<Duration>,
pub datetime: Option<DateTime<Utc>>,
pub uuid: Option<Uuid>,
pub error: Option<SyntaxError>,
pub(super) string: Option<String>,
pub(super) error: Option<SyntaxError>,
}
impl<'a> Lexer<'a> {
@ -78,9 +68,6 @@ impl<'a> Lexer<'a> {
scratch: String::new(),
string: None,
error: None,
duration: None,
datetime: None,
uuid: None,
}
}
@ -109,9 +96,6 @@ impl<'a> Lexer<'a> {
scratch: self.scratch,
string: self.string,
error: self.error,
duration: self.duration,
datetime: self.datetime,
uuid: self.uuid,
}
}
@ -150,7 +134,7 @@ impl<'a> Lexer<'a> {
}
// Returns the span for the current token being lexed.
pub fn current_span(&self) -> Span {
pub(crate) fn current_span(&self) -> Span {
// We make sure that the source is no longer then u32::MAX so this can't overflow.
let new_offset = self.reader.offset() as u32;
let len = new_offset - self.last_offset;
@ -160,6 +144,15 @@ impl<'a> Lexer<'a> {
}
}
pub(crate) fn span_since(&self, offset: usize) -> Span {
let new_offset = self.reader.offset() as u32;
let len = new_offset - offset as u32;
Span {
offset: offset as u32,
len,
}
}
fn advance_span(&mut self) -> Span {
let span = self.current_span();
self.last_offset = self.reader.offset() as u32;
@ -181,7 +174,7 @@ impl<'a> Lexer<'a> {
/// # Warning
/// Moving the lexer into a state where the next byte is within a multibyte character will
/// result in spurious errors.
pub fn backup_before(&mut self, span: Span) {
pub(crate) fn backup_before(&mut self, span: Span) {
self.reader.backup(span.offset as usize);
self.last_offset = span.offset;
}
@ -191,7 +184,7 @@ impl<'a> Lexer<'a> {
/// # Warning
/// Moving the lexer into a state where the next byte is within a multibyte character will
/// result in spurious errors.
pub fn backup_after(&mut self, span: Span) {
pub(crate) fn backup_after(&mut self, span: Span) {
let offset = span.offset + span.len;
self.reader.backup(offset as usize);
self.last_offset = offset;
@ -201,7 +194,7 @@ impl<'a> Lexer<'a> {
/// Otherwise returns false.
///
/// Also returns false if there is no next character.
pub fn eat(&mut self, byte: u8) -> bool {
fn eat(&mut self, byte: u8) -> bool {
if self.reader.peek() == Some(byte) {
self.reader.next();
true
@ -214,7 +207,7 @@ impl<'a> Lexer<'a> {
/// and returns true. Otherwise returns false.
///
/// Also returns false if there is no next character.
pub fn eat_when<F: FnOnce(u8) -> bool>(&mut self, f: F) -> bool {
fn eat_when<F: FnOnce(u8) -> bool>(&mut self, f: F) -> bool {
let Some(x) = self.reader.peek() else {
return false;
};
@ -226,6 +219,30 @@ impl<'a> Lexer<'a> {
}
}
fn expect(&mut self, c: char) -> Result<(), SyntaxError> {
match self.reader.peek() {
Some(x) => {
let offset = self.reader.offset() as u32;
self.reader.next();
let char = self.reader.convert_to_char(x)?;
if char == c {
return Ok(());
}
let len = self.reader.offset() as u32 - offset;
bail!(
"Unexpected character `{char}` expected `{c}`",
@Span {
offset,
len
}
)
}
None => {
bail!("Unexpected end of file, expected character `{c}`", @self.current_span())
}
}
}
/// Returns the string for a given span of the source.
/// Will panic if the given span was not valid for the source, or invalid utf8
pub fn span_str(&self, span: Span) -> &'a str {
@ -237,6 +254,20 @@ impl<'a> Lexer<'a> {
pub fn span_bytes(&self, span: Span) -> &'a [u8] {
self.reader.span(span)
}
/// Returns an error if not all bytes were consumed.
pub fn assert_finished(&self) -> Result<(), SyntaxError> {
if !self.reader.is_empty() {
let offset = self.reader.offset() as u32;
let len = self.reader.remaining().len() as u32;
let span = Span {
offset,
len,
};
bail!("Trailing characters", @span)
}
Ok(())
}
}
impl Iterator for Lexer<'_> {

View file

@ -1,24 +0,0 @@
use crate::syn::token::{Token, TokenKind};
use super::Lexer;
impl Lexer<'_> {
pub fn lex_digits(&mut self) -> Token {
while let Some(b'0'..=b'9' | b'_') = self.reader.peek() {
self.reader.next();
}
self.finish_token(TokenKind::Digits)
}
pub fn lex_exponent(&mut self, start: u8) -> Token {
if let Some(x) = self.reader.peek() {
if x.is_ascii_alphabetic() || x == b'_' {
self.scratch.push(start as char);
return self.lex_ident();
}
};
self.finish_token(TokenKind::Exponent)
}
}

View file

@ -91,6 +91,11 @@ impl<'a> BytesReader<'a> {
self.remaining().first().copied()
}
#[inline]
pub fn peek1(&self) -> Option<u8> {
self.remaining().get(1).copied()
}
#[inline]
pub fn span(&self, span: Span) -> &'a [u8] {
&self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)]

View file

@ -1,103 +0,0 @@
//! Lexing of strand like characters.
use std::mem;
use crate::syn::{
error::error,
token::{QouteKind, Token, TokenKind},
};
use super::{unicode::chars, Lexer};
impl<'a> Lexer<'a> {
/// Lex a plain strand with either single or double quotes.
pub fn relex_strand(&mut self, token: Token) -> Token {
let is_double = match token.kind {
TokenKind::Qoute(QouteKind::Plain) => false,
TokenKind::Qoute(QouteKind::PlainDouble) => true,
x => panic!("invalid token kind, '{:?}' is not allowed for re-lexing strands", x),
};
self.last_offset = token.span.offset;
loop {
let Some(x) = self.reader.next() else {
self.scratch.clear();
return self.eof_token();
};
if x.is_ascii() {
match x {
b'\'' if !is_double => {
self.string = Some(mem::take(&mut self.scratch));
return self.finish_token(TokenKind::Strand);
}
b'"' if is_double => {
self.string = Some(mem::take(&mut self.scratch));
return self.finish_token(TokenKind::Strand);
}
b'\0' => {
let err = error!("Invalid null byte in source, null bytes are not valid SurrealQL characters",@self.current_span());
return self.invalid_token(err);
}
b'\\' => {
// Handle escape sequences.
let Some(next) = self.reader.next() else {
self.scratch.clear();
return self.eof_token();
};
match next {
b'\\' => {
self.scratch.push('\\');
}
b'\'' if !is_double => {
self.scratch.push('\'');
}
b'\"' if is_double => {
self.scratch.push('\"');
}
b'/' => {
self.scratch.push('/');
}
b'b' => {
self.scratch.push(chars::BS);
}
b'f' => {
self.scratch.push(chars::FF);
}
b'n' => {
self.scratch.push(chars::LF);
}
b'r' => {
self.scratch.push(chars::CR);
}
b't' => {
self.scratch.push(chars::TAB);
}
x => match self.reader.convert_to_char(x) {
Ok(char) => {
let valid_escape = if is_double {
'"'
} else {
'\''
};
let err = error!("Invalid escape character `{char}`, valid characters are `\\`, `{valid_escape}`, `/`, `b`, `f`, `n`, `r`, or `t`", @self.current_span());
return self.invalid_token(err);
}
Err(e) => {
return self.invalid_token(e.into());
}
},
}
}
x => self.scratch.push(x as char),
}
} else {
match self.reader.complete_char(x) {
Ok(x) => self.scratch.push(x),
Err(e) => return self.invalid_token(e.into()),
}
}
}
}
}

View file

@ -1,4 +1,4 @@
use crate::syn::token::{t, DurationSuffix, TokenKind};
use crate::syn::token::{t, TokenKind};
macro_rules! test_case(
($source:expr => [$($token:expr),*$(,)?]) => {
@ -40,7 +40,7 @@ fn operators() {
"# => [
t!("-"), t!(" "), t!("+"),t!(" "), t!("/"),t!(" "), t!("*"),t!(" "), t!("!"),t!(" "), t!("**"), t!(" "),
t!("<"), t!(" "), t!(">"), t!(" "), t!("<="), t!(" "), t!(">="), t!(" "), t!("<-"), t!(" "), t!("<->"), t!(" "), t!("->"), t!(" "),
t!("<"), t!(" "), t!(">"), t!(" "), t!("<="), t!(" "), t!(">="), t!(" "), t!("<"), t!("-"), t!(" "), t!("<"), t!("->"), t!(" "), t!("->"), t!(" "),
t!("="), t!(" "), t!("=="), t!(" "), t!("-="), t!(" "), t!("+="), t!(" "), t!("!="), t!(" "), t!("+?="), t!(" "),
@ -110,171 +110,6 @@ fn identifiers() {
}
}
#[test]
fn numbers() {
test_case! {
r#"123123+32010230.123012031+33043030dec+33043030f+303e10dec+"#
=> [
TokenKind::Digits, // 123123
t!("+"),
TokenKind::Digits, // 32010230
t!("."),
TokenKind::Digits, // 123012031
t!("+"),
TokenKind::Digits, // 33043030
t!("dec"),
t!("+"),
TokenKind::Digits, // 33043030
t!("f"),
t!("+"),
TokenKind::Digits, // 303
TokenKind::Exponent , // e
TokenKind::Digits, // 10
t!("dec"),
t!("+"),
]
}
test_case! {
"+123129decs+"
=> [
t!("+"),
TokenKind::Digits, // 123129
TokenKind::Identifier, // decs
t!("+"),
]
}
test_case! {
"+39349fs+"
=> [
t!("+"),
TokenKind::Digits, // 39349
TokenKind::Identifier, // fs
t!("+"),
]
}
test_case! {
"+394393df+"
=> [
t!("+"),
TokenKind::Digits, // 39349
TokenKind::Identifier, // df
t!("+"),
]
}
test_case! {
"+32932932def+"
=> [
t!("+"),
TokenKind::Digits, // 32932932
TokenKind::Identifier, // def
t!("+"),
]
}
test_case! {
"+329239329z+"
=> [
t!("+"),
TokenKind::Digits, // 329239329
TokenKind::Identifier, // z
t!("+"),
]
}
}
#[test]
fn duration() {
test_case! {
r#"
1ns+1µs+1us+1ms+1s+1m+1h+1w+1y
1nsa+1ans+1aus+1usa+1ams+1msa+1am+1ma+1ah+1ha+1aw+1wa+1ay+1ya+1µsa
"#
=> [
t!(" "),
TokenKind::Digits,
TokenKind::DurationSuffix(DurationSuffix::Nano),
t!("+"),
TokenKind::Digits,
TokenKind::DurationSuffix(DurationSuffix::MicroUnicode),
t!("+"),
TokenKind::Digits,
TokenKind::DurationSuffix(DurationSuffix::Micro),
t!("+"),
TokenKind::Digits,
TokenKind::DurationSuffix(DurationSuffix::Milli),
t!("+"),
TokenKind::Digits,
TokenKind::DurationSuffix(DurationSuffix::Second),
t!("+"),
TokenKind::Digits,
TokenKind::DurationSuffix(DurationSuffix::Minute),
t!("+"),
TokenKind::Digits,
TokenKind::DurationSuffix(DurationSuffix::Hour),
t!("+"),
TokenKind::Digits,
TokenKind::DurationSuffix(DurationSuffix::Week),
t!("+"),
TokenKind::Digits,
TokenKind::DurationSuffix(DurationSuffix::Year),
t!(" "),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Identifier,
t!("+"),
TokenKind::Digits,
TokenKind::Invalid,
TokenKind::Identifier,
t!(" "),
]
}
}
#[test]
fn keyword() {
test_case! {

View file

@ -1,5 +1,9 @@
//! Unicode related utilities.
pub fn is_identifier_continue(x: u8) -> bool {
matches!(x, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_')
}
/// Character constants
pub mod chars {
// Character tabulation

View file

@ -18,6 +18,7 @@ pub trait Parse<T> {
#[cfg(test)]
mod test;
use lexer::{compound, Lexer};
use parser::Parser;
use reblessive::Stack;
use token::t;
@ -52,12 +53,28 @@ pub fn parse(input: &str) -> Result<Query, Error> {
/// Parses a SurrealQL [`Value`].
#[instrument(level = "debug", name = "parser", skip_all, fields(length = input.len()))]
pub fn value(input: &str) -> Result<Value, Error> {
debug!("parsing value, input = {input}");
let mut parser = Parser::new(input.as_bytes());
let mut stack = Stack::new();
stack
.enter(|stk| parser.parse_value_table(stk))
.finish()
.and_then(|e| parser.assert_finished().map(|_| e))
.map_err(|e| e.render_on(input))
.map_err(Error::InvalidQuery)
}
/// Parses a SurrealQL [`Value`].
#[cfg(test)]
#[instrument(level = "debug", name = "parser", skip_all, fields(length = input.len()))]
pub(crate) fn value_field(input: &str) -> Result<Value, Error> {
debug!("parsing value, input = {input}");
let mut parser = Parser::new(input.as_bytes());
let mut stack = Stack::new();
stack
.enter(|stk| parser.parse_value_field(stk))
.finish()
.and_then(|e| parser.assert_finished().map(|_| e))
.map_err(|e| e.render_on(input))
.map_err(Error::InvalidQuery)
}
@ -70,8 +87,9 @@ pub fn value_legacy_strand(input: &str) -> Result<Value, Error> {
let mut stack = Stack::new();
parser.allow_legacy_strand(true);
stack
.enter(|stk| parser.parse_value(stk))
.enter(|stk| parser.parse_value_table(stk))
.finish()
.and_then(|e| parser.assert_finished().map(|_| e))
.map_err(|e| e.render_on(input))
.map_err(Error::InvalidQuery)
}
@ -85,6 +103,7 @@ pub fn json(input: &str) -> Result<Value, Error> {
stack
.enter(|stk| parser.parse_json(stk))
.finish()
.and_then(|e| parser.assert_finished().map(|_| e))
.map_err(|e| e.render_on(input))
.map_err(Error::InvalidQuery)
}
@ -99,6 +118,7 @@ pub fn json_legacy_strand(input: &str) -> Result<Value, Error> {
stack
.enter(|stk| parser.parse_json(stk))
.finish()
.and_then(|e| parser.assert_finished().map(|_| e))
.map_err(|e| e.render_on(input))
.map_err(Error::InvalidQuery)
}
@ -111,6 +131,7 @@ pub fn subquery(input: &str) -> Result<Subquery, Error> {
stack
.enter(|stk| parser.parse_full_subquery(stk))
.finish()
.and_then(|e| parser.assert_finished().map(|_| e))
.map_err(|e| e.render_on(input))
.map_err(Error::InvalidQuery)
}
@ -120,10 +141,12 @@ pub fn subquery(input: &str) -> Result<Subquery, Error> {
pub fn idiom(input: &str) -> Result<Idiom, Error> {
debug!("parsing idiom, input = {input}");
let mut parser = Parser::new(input.as_bytes());
parser.table_as_field = true;
let mut stack = Stack::new();
stack
.enter(|stk| parser.parse_plain_idiom(stk))
.finish()
.and_then(|e| parser.assert_finished().map(|_| e))
.map_err(|e| e.render_on(input))
.map_err(Error::InvalidQuery)
}
@ -131,8 +154,12 @@ pub fn idiom(input: &str) -> Result<Idiom, Error> {
/// Parse a datetime without enclosing delimiters from a string.
pub fn datetime_raw(input: &str) -> Result<Datetime, Error> {
debug!("parsing datetime, input = {input}");
let mut parser = Parser::new(input.as_bytes());
parser.parse_inner_datetime().map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery)
let mut lexer = Lexer::new(input.as_bytes());
let res = compound::datetime_inner(&mut lexer);
if let Err(e) = lexer.assert_finished() {
return Err(Error::InvalidQuery(e.render_on(input)));
}
res.map(Datetime).map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery)
}
/// Parse a duration from a string.
@ -141,6 +168,7 @@ pub fn duration(input: &str) -> Result<Duration, Error> {
let mut parser = Parser::new(input.as_bytes());
parser
.next_token_value::<Duration>()
.and_then(|e| parser.assert_finished().map(|_| e))
.map_err(|e| e.render_on(input))
.map_err(Error::InvalidQuery)
}
@ -153,6 +181,7 @@ pub fn range(input: &str) -> Result<Range, Error> {
stack
.enter(|stk| parser.parse_range(stk))
.finish()
.and_then(|e| parser.assert_finished().map(|_| e))
.map_err(|e| e.render_on(input))
.map_err(Error::InvalidQuery)
}
@ -165,6 +194,7 @@ pub fn thing(input: &str) -> Result<Thing, Error> {
stack
.enter(|stk| parser.parse_thing(stk))
.finish()
.and_then(|e| parser.assert_finished().map(|_| e))
.map_err(|e| e.render_on(input))
.map_err(Error::InvalidQuery)
}
@ -183,6 +213,7 @@ pub fn block(input: &str) -> Result<Block, Error> {
stack
.enter(|stk| parser.parse_block(stk, start))
.finish()
.and_then(|e| parser.assert_finished().map(|_| e))
.map_err(|e| e.render_on(input))
.map_err(Error::InvalidQuery)
}

View file

@ -1,181 +0,0 @@
use std::ops::RangeInclusive;
use chrono::{FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone, Utc};
use crate::{
sql::Datetime,
syn::{
error::{bail, error},
parser::{expected_whitespace, unexpected, ParseResult, Parser},
token::{t, DatetimeChars, TokenKind},
},
};
impl Parser<'_> {
pub fn parse_datetime(&mut self) -> ParseResult<Datetime> {
let start = self.peek();
let double = match start.kind {
t!("d\"") => true,
t!("d'") => false,
x => bail!("Expected a datetime found {}",x, @start.span),
};
self.pop_peek();
let datetime = self.parse_inner_datetime()?;
if double {
expected_whitespace!(self, t!("\""));
} else {
expected_whitespace!(self, t!("'"));
}
Ok(datetime)
}
/// Parses the datetimem without surrounding qoutes
pub fn parse_inner_datetime(&mut self) -> ParseResult<Datetime> {
let start_date = self.peek_whitespace().span;
let year_neg = self.eat_whitespace(t!("-"));
if !year_neg {
self.eat_whitespace(t!("+"));
}
let year = self.parse_datetime_digits(4, 0..=9999)?;
expected_whitespace!(self, t!("-"));
let month = self.parse_datetime_digits(2, 1..=12)?;
expected_whitespace!(self, t!("-"));
let day = self.parse_datetime_digits(2, 1..=31)?;
let date_span = start_date.covers(self.last_span());
let year = if year_neg {
-(year as i32)
} else {
year as i32
};
let date = NaiveDate::from_ymd_opt(year, month as u32, day as u32).ok_or_else(
|| error!("Invalid DateTime date: date outside of valid range", @date_span),
)?;
if !self.eat(TokenKind::DatetimeChars(DatetimeChars::T)) {
let time = NaiveTime::default();
let date_time = NaiveDateTime::new(date, time);
let datetime =
Utc.fix().from_local_datetime(&date_time).earliest().unwrap().with_timezone(&Utc);
return Ok(Datetime(datetime));
}
let start_time = self.peek_whitespace().span;
let hour = self.parse_datetime_digits(2, 0..=24)?;
expected_whitespace!(self, t!(":"));
let minute = self.parse_datetime_digits(2, 0..=59)?;
expected_whitespace!(self, t!(":"));
let second = self.parse_datetime_digits(2, 0..=59)?;
let nanos = if self.eat_whitespace(t!(".")) {
let digits_token = expected_whitespace!(self, TokenKind::Digits);
let slice = self.lexer.span_bytes(digits_token.span);
if slice.len() > 9 {
bail!("Invalid DateTime nanoseconds, too many nanosecond digits",
@digits_token.span => "This section contains more then 9 digits");
}
let mut number = 0u32;
for i in 0..9 {
let Some(c) = slice.get(i).copied() else {
// If digits are missing they are counted as 0's
for _ in i..9 {
number *= 10;
}
break;
};
number *= 10;
number += (c - b'0') as u32;
}
number
} else {
0
};
let time_span = start_time.covers(self.last_span());
let time = NaiveTime::from_hms_nano_opt(hour as u32, minute as u32, second as u32, nanos)
.ok_or_else(
|| error!("Invalid DateTime time: time outside of valid range", @time_span),
)?;
let peek = self.peek_whitespace();
let timezone = match peek.kind {
t!("+") => self.parse_datetime_timezone(false)?,
t!("-") => self.parse_datetime_timezone(true)?,
TokenKind::DatetimeChars(DatetimeChars::Z) => {
self.pop_peek();
Utc.fix()
}
_ => unexpected!(self, peek, "`Z` or a timezone"),
};
let date_time = NaiveDateTime::new(date, time);
let datetime = timezone
.from_local_datetime(&date_time)
.earliest()
// this should never panic with a fixed offset.
.unwrap()
.with_timezone(&Utc);
Ok(Datetime(datetime))
}
fn parse_datetime_timezone(&mut self, neg: bool) -> ParseResult<FixedOffset> {
self.pop_peek();
let hour = self.parse_datetime_digits(2, 0..=23)?;
expected_whitespace!(self, t!(":"));
let minute = self.parse_datetime_digits(2, 0..=59)?;
// The range checks on the digits ensure that the offset can't exceed 23:59 so below
// unwraps won't panic.
if neg {
Ok(FixedOffset::west_opt((hour * 3600 + minute * 60) as i32).unwrap())
} else {
Ok(FixedOffset::east_opt((hour * 3600 + minute * 60) as i32).unwrap())
}
}
fn parse_datetime_digits(
&mut self,
len: usize,
range: RangeInclusive<usize>,
) -> ParseResult<usize> {
let t = self.peek_whitespace();
match t.kind {
TokenKind::Digits => {}
_ => unexpected!(self, t, "datetime digits"),
}
let digits_str = self.lexer.span_str(t.span);
if digits_str.len() != len {
bail!("Datetime digits section not the correct length, needs to be {len} characters",
@t.span => "This section has a length of {}", digits_str.len());
}
self.pop_peek();
// This should always parse as it has been validated by the lexer.
let value = digits_str.parse().unwrap();
if !range.contains(&value) {
bail!("Datetime digits section outside of valid range of {}..={}", range.start(),range.end(), @t.span);
}
Ok(value)
}
}

View file

@ -1,30 +1,31 @@
use crate::{
sql::{language::Language, Datetime, Duration, Ident, Param, Regex, Strand, Table, Uuid},
syn::{
lexer::compound,
parser::{mac::unexpected, ParseResult, Parser},
token::{t, QouteKind, TokenKind},
token::{self, t, TokenKind},
},
};
mod datetime;
use super::mac::pop_glued;
mod number;
mod uuid;
/// A trait for parsing single tokens with a specific value.
pub trait TokenValue: Sized {
pub(crate) trait TokenValue: Sized {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self>;
}
impl TokenValue for Ident {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
let token = parser.glue_ident(false)?;
let token = parser.peek();
match token.kind {
TokenKind::Identifier => {
parser.pop_peek();
let str = parser.lexer.string.take().unwrap();
Ok(Ident(str))
}
TokenKind::Keyword(_) | TokenKind::Language(_) | TokenKind::Algorithm(_) => {
x if Parser::kind_is_keyword_like(x) => {
let s = parser.pop_peek().span;
Ok(Ident(parser.lexer.span_str(s).to_owned()))
}
@ -75,11 +76,13 @@ impl TokenValue for Param {
impl TokenValue for Duration {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
let token = parser.glue_duration()?;
let token = parser.peek();
match token.kind {
TokenKind::Duration => {
TokenKind::Glued(token::Glued::Duration) => Ok(pop_glued!(parser, Duration)),
TokenKind::Digits => {
parser.pop_peek();
Ok(Duration(parser.lexer.duration.unwrap()))
let v = parser.lexer.lex_compound(token, compound::duration)?.value;
Ok(Duration(v))
}
_ => unexpected!(parser, token, "a duration"),
}
@ -88,7 +91,16 @@ impl TokenValue for Duration {
impl TokenValue for Datetime {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
parser.parse_datetime()
let token = parser.peek();
match token.kind {
TokenKind::Glued(token::Glued::Datetime) => Ok(pop_glued!(parser, Datetime)),
t!("d\"") | t!("d'") => {
parser.pop_peek();
let v = parser.lexer.lex_compound(token, compound::datetime)?.value;
Ok(Datetime(v))
}
_ => unexpected!(parser, token, "a datetime"),
}
}
}
@ -96,17 +108,11 @@ impl TokenValue for Strand {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
let token = parser.peek();
match token.kind {
TokenKind::Qoute(QouteKind::Plain | QouteKind::PlainDouble) => {
TokenKind::Glued(token::Glued::Strand) => Ok(pop_glued!(parser, Strand)),
t!("\"") | t!("'") => {
parser.pop_peek();
let t = parser.lexer.relex_strand(token);
let TokenKind::Strand = t.kind else {
unexpected!(parser, t, "a strand")
};
Ok(Strand(parser.lexer.string.take().unwrap()))
}
TokenKind::Strand => {
parser.pop_peek();
Ok(Strand(parser.lexer.string.take().unwrap()))
let v = parser.lexer.lex_compound(token, compound::strand)?.value;
Ok(Strand(v))
}
_ => unexpected!(parser, token, "a strand"),
}
@ -115,7 +121,16 @@ impl TokenValue for Strand {
impl TokenValue for Uuid {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
parser.parse_uuid()
let token = parser.peek();
match token.kind {
TokenKind::Glued(token::Glued::Uuid) => Ok(pop_glued!(parser, Uuid)),
t!("u\"") | t!("u'") => {
parser.pop_peek();
let v = parser.lexer.lex_compound(token, compound::uuid)?.value;
Ok(Uuid(v))
}
_ => unexpected!(parser, token, "a uuid"),
}
}
}
@ -124,8 +139,9 @@ impl TokenValue for Regex {
let peek = parser.peek();
match peek.kind {
t!("/") => {
let pop = parser.pop_peek();
Ok(parser.lexer.lex_compound(pop)?.value)
parser.pop_peek();
let v = parser.lexer.lex_compound(peek, compound::regex)?.value;
Ok(Regex(v))
}
_ => unexpected!(parser, peek, "a regex"),
}
@ -134,9 +150,40 @@ impl TokenValue for Regex {
impl Parser<'_> {
/// Parse a token value from the next token in the parser.
pub fn next_token_value<V: TokenValue>(&mut self) -> ParseResult<V> {
pub(crate) fn next_token_value<V: TokenValue>(&mut self) -> ParseResult<V> {
V::from_token(self)
}
pub(crate) fn parse_flexible_ident(&mut self) -> ParseResult<Ident> {
let token = self.next();
match token.kind {
TokenKind::Digits => {
let peek = self.peek_whitespace();
let span = match peek.kind {
x if Self::kind_is_keyword_like(x) => {
self.pop_peek();
token.span.covers(peek.span)
}
TokenKind::Identifier => {
self.pop_peek();
token.span.covers(peek.span)
}
_ => token.span,
};
Ok(Ident(self.lexer.span_str(span).to_owned()))
}
TokenKind::Identifier => {
let str = self.lexer.string.take().unwrap();
Ok(Ident(str))
}
x if Self::kind_is_keyword_like(x) => {
Ok(Ident(self.lexer.span_str(token.span).to_owned()))
}
_ => {
unexpected!(self, token, "an identifier");
}
}
}
}
#[cfg(test)]
@ -159,8 +206,8 @@ mod test {
assert_eq!(
r,
sql::Query(sql::Statements(vec![sql::Statement::Value(sql::Value::Idiom(
sql::Idiom(vec![sql::Part::Field(sql::Ident(ident.to_string()))])
sql::Query(sql::Statements(vec![sql::Statement::Value(sql::Value::Table(
sql::Table(ident.to_string())
))]))
)
}

View file

@ -1,71 +1,35 @@
use std::{mem, num::ParseIntError, str::FromStr};
use rust_decimal::Decimal;
use std::{
borrow::Cow,
num::{ParseFloatError, ParseIntError},
str::FromStr,
};
use crate::{
sql::Number,
syn::{
error::error,
parser::{mac::unexpected, ParseResult, Parser},
token::{t, NumberKind, TokenKind},
error::{bail, error},
lexer::compound::{self, NumberKind},
parser::{mac::unexpected, GluedValue, ParseResult, Parser},
token::{self, t, TokenKind},
},
};
use super::TokenValue;
fn prepare_number_str(str: &str) -> Cow<str> {
if str.contains('_') {
Cow::Owned(str.chars().filter(|x| *x != '_').collect())
} else {
Cow::Borrowed(str)
}
}
/// Generic integer parsing method,
/// works for all unsigned integers.
fn parse_integer<I>(parser: &mut Parser<'_>) -> ParseResult<I>
where
I: FromStr<Err = ParseIntError>,
{
let mut peek = parser.peek();
if let t!("-") = peek.kind {
unexpected!(parser,peek,"an integer", => "only positive integers are allowed here")
}
if let t!("+") = peek.kind {
peek = parser.peek_whitespace();
}
match peek.kind {
TokenKind::Digits => {
let token = parser.peek();
match token.kind {
t!("+") | TokenKind::Digits => {
parser.pop_peek();
assert!(!parser.has_peek());
let p = parser.peek_whitespace();
match p.kind {
t!(".") => {
unexpected!(parser, p, "an integer")
}
t!("dec") => {
unexpected!(parser, p, "an integer", => "decimal numbers not supported here")
}
x if Parser::tokenkind_continues_ident(x) => {
unexpected!(parser, p, "an integer")
}
_ => {}
}
// remove the possible "f" number suffix and any '_' characters
let res = prepare_number_str(parser.lexer.span_str(peek.span))
.parse()
.map_err(|e| error!("Failed to parse integer: {e}", @peek.span))?;
Ok(res)
Ok(parser.lexer.lex_compound(token, compound::integer)?.value)
}
_ => unexpected!(parser, peek, "an integer"),
t!("-") => {
bail!("Unexpected token `-`", @token.span => "Only positive integers allowed here")
}
_ => unexpected!(parser, token, "an unsigned integer"),
}
}
@ -93,90 +57,68 @@ impl TokenValue for u8 {
}
}
/// Generic float parsing method,
/// works for both f32 and f64
fn parse_float<F>(parser: &mut Parser<'_>) -> ParseResult<F>
where
F: FromStr<Err = ParseFloatError>,
{
let peek = parser.peek();
// find initial digits
match peek.kind {
TokenKind::NaN => return Ok("NaN".parse().unwrap()),
TokenKind::Digits | t!("+") | t!("-") => {}
_ => unexpected!(parser, peek, "a floating point number"),
};
let float_token = parser.glue_float()?;
match float_token.kind {
TokenKind::Number(NumberKind::Float) => {
parser.pop_peek();
}
_ => unexpected!(parser, float_token, "a floating point number"),
};
let span = parser.lexer.span_str(float_token.span);
// remove the possible "f" number suffix and any '_' characters
prepare_number_str(span.strip_suffix('f').unwrap_or(span))
.parse()
.map_err(|e| error!("Failed to parser floating point number: {e}", @float_token.span))
}
impl TokenValue for f32 {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
parse_float(parser)
let token = parser.peek();
match token.kind {
t!("+") | t!("-") | TokenKind::Digits => {
parser.pop_peek();
Ok(parser.lexer.lex_compound(token, compound::float)?.value)
}
_ => unexpected!(parser, token, "a floating point number"),
}
}
}
impl TokenValue for f64 {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
parse_float(parser)
let token = parser.peek();
match token.kind {
t!("+") | t!("-") | TokenKind::Digits => {
parser.pop_peek();
Ok(parser.lexer.lex_compound(token, compound::float)?.value)
}
_ => unexpected!(parser, token, "a floating point number"),
}
}
}
impl TokenValue for Number {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
let number = parser.glue_number()?;
let number_kind = match number.kind {
TokenKind::NaN => {
let token = parser.peek();
match token.kind {
TokenKind::Glued(token::Glued::Number) => {
parser.pop_peek();
return Ok(Number::Float(f64::NAN));
}
TokenKind::Number(x) => x,
_ => unexpected!(parser, number, "a number"),
};
parser.pop_peek();
let span = parser.lexer.span_str(number.span);
match number_kind {
NumberKind::Decimal => {
let str = prepare_number_str(span.strip_suffix("dec").unwrap_or(span));
let decimal = if str.contains('e') {
Decimal::from_scientific(str.as_ref())
.map_err(|e| error!("Failed to parser decimal: {e}", @number.span))?
} else {
Decimal::from_str(str.as_ref())
.map_err(|e| error!("Failed to parser decimal: {e}", @number.span))?
let GluedValue::Number(x) = mem::take(&mut parser.glued_value) else {
panic!("Glued token was next but glued value was not of the correct value");
};
Ok(Number::Decimal(decimal))
let number_str = parser.lexer.span_str(token.span);
match x {
NumberKind::Integer => number_str
.parse()
.map(Number::Int)
.map_err(|e| error!("Failed to parse number: {e}", @token.span)),
NumberKind::Float => number_str
.parse()
.map(Number::Float)
.map_err(|e| error!("Failed to parse number: {e}", @token.span)),
NumberKind::Decimal => {
let decimal = if number_str.contains(['e', 'E']) {
Decimal::from_scientific(number_str)
.map_err(|e| error!("Failed to parser decimal: {e}", @token.span))?
} else {
Decimal::from_str(number_str)
.map_err(|e| error!("Failed to parser decimal: {e}", @token.span))?
};
Ok(Number::Decimal(decimal))
}
}
}
NumberKind::Float => {
let float =
prepare_number_str(span.strip_suffix('f').unwrap_or(span)).parse().map_err(
|e| error!("Failed to parser floating point number: {e}", @number.span),
)?;
Ok(Number::Float(float))
}
NumberKind::Integer => {
let integer = prepare_number_str(span.strip_suffix('f').unwrap_or(span))
.parse()
.map_err(|e| error!("Failed to parse integer: {e}", @number.span))?;
Ok(Number::Int(integer))
t!("+") | t!("-") | TokenKind::Digits => {
parser.pop_peek();
Ok((parser.lexer.lex_compound(token, compound::number))?.value)
}
_ => unexpected!(parser, token, "a number"),
}
}
}

View file

@ -1,191 +0,0 @@
use crate::{
sql::Uuid,
syn::{
error::bail,
parser::{
mac::{expected_whitespace, unexpected},
ParseResult, Parser,
},
token::{t, DurationSuffix, NumberSuffix, TokenKind, VectorTypeKind},
},
};
impl Parser<'_> {
/// Parses a uuid strand.
pub fn parse_uuid(&mut self) -> ParseResult<Uuid> {
let quote_token = self.peek();
let double = match quote_token.kind {
t!("u\"") => true,
t!("u'") => false,
_ => unexpected!(self, quote_token, "a uuid"),
};
self.pop_peek();
// number of bytes is 4-2-2-2-6
let mut uuid_buffer = [0u8; 16];
self.eat_uuid_hex(&mut uuid_buffer[0..4])?;
expected_whitespace!(self, t!("-"));
self.eat_uuid_hex(&mut uuid_buffer[4..6])?;
expected_whitespace!(self, t!("-"));
self.eat_uuid_hex(&mut uuid_buffer[6..8])?;
expected_whitespace!(self, t!("-"));
self.eat_uuid_hex(&mut uuid_buffer[8..10])?;
expected_whitespace!(self, t!("-"));
self.eat_uuid_hex(&mut uuid_buffer[10..16])?;
if double {
expected_whitespace!(self, t!("\""));
} else {
expected_whitespace!(self, t!("'"));
}
Ok(Uuid(uuid::Uuid::from_bytes(uuid_buffer)))
}
/// Eats a uuid hex section, enough to fill the given buffer with bytes.
fn eat_uuid_hex(&mut self, buffer: &mut [u8]) -> ParseResult<()> {
// A function to covert a hex digit to its number representation.
fn ascii_to_hex(b: u8) -> Option<u8> {
if b.is_ascii_digit() {
return Some(b - b'0');
}
if (b'a'..=b'f').contains(&b) {
return Some(b - (b'a' - 10));
}
if (b'A'..=b'F').contains(&b) {
return Some(b - (b'A' - 10));
}
None
}
// the amounts of character required is twice the buffer len.
// since every character is half a byte.
let required_len = buffer.len() * 2;
// The next token should be digits or an identifier
// If it is digits an identifier might be after it.
let start_token = self.peek_whitespace();
let mut cur = start_token;
loop {
let next = self.peek_whitespace();
match next.kind {
TokenKind::Identifier => {
cur = self.pop_peek();
break;
}
TokenKind::Exponent
| TokenKind::Digits
| TokenKind::DurationSuffix(DurationSuffix::Day)
| TokenKind::NumberSuffix(NumberSuffix::Float) => {
cur = self.pop_peek();
}
TokenKind::Language(_)
| TokenKind::Keyword(_)
| TokenKind::VectorType(VectorTypeKind::F64 | VectorTypeKind::F32) => {
// there are some keywords and languages keywords which could be part of the
// hex section.
if !self.lexer.span_bytes(next.span).iter().all(|x| x.is_ascii_hexdigit()) {
bail!("Invalid UUID section, invalid hex character in section", @next.span)
}
cur = self.pop_peek();
break;
}
t!("-") | t!("\"") | t!("'") => break,
_ => {
bail!("Invalid UUID section, invalid hex character in section", @next.span)
}
}
}
// Get the span that covered all eaten tokens.
let digits_span = start_token.span.covers(cur.span);
let digits_bytes = self.lexer.span_str(digits_span).as_bytes();
// for error handling, the incorrect hex character should be returned first, before
// returning the not correct length for segment error even if both are valid.
if !digits_bytes.iter().all(|x| x.is_ascii_hexdigit()) {
bail!("Unexpected characters in UUID token, expected UUID hex digits", @digits_span);
}
if digits_bytes.len() != required_len {
bail!("Unexpected characters in UUID token, invalid length of hex digits are",
@digits_span => "this has `{}` character where `{}` are required", digits_bytes.len(), required_len);
}
// write into the buffer
for (i, b) in buffer.iter_mut().enumerate() {
*b = ascii_to_hex(digits_bytes[i * 2]).unwrap() << 4
| ascii_to_hex(digits_bytes[i * 2 + 1]).unwrap();
}
Ok(())
}
}
#[cfg(test)]
mod test {
use crate::syn::parser::Parser;
#[test]
fn uuid_parsing() {
fn assert_uuid_parses(s: &str) {
let uuid_str = format!("u'{s}'");
let mut parser = Parser::new(uuid_str.as_bytes());
let uuid = parser.parse_uuid().unwrap();
assert_eq!(uuid::Uuid::parse_str(s).unwrap(), *uuid);
}
assert_uuid_parses("0531956f-20ec-4575-bb68-3e6b49d813fa");
assert_uuid_parses("0531956d-20ec-4575-bb68-3e6b49d813fa");
assert_uuid_parses("0531956e-20ec-4575-bb68-3e6b49d813fa");
assert_uuid_parses("0531956a-20ec-4575-bb68-3e6b49d813fa");
assert_uuid_parses("053195f1-20ec-4575-bb68-3e6b49d813fa");
assert_uuid_parses("053195d1-20ec-4575-bb68-3e6b49d813fa");
assert_uuid_parses("053195e1-20ec-4575-bb68-3e6b49d813fa");
assert_uuid_parses("053195a1-20ec-4575-bb68-3e6b49d813fa");
assert_uuid_parses("f0531951-20ec-4575-bb68-3e6b49d813fa");
assert_uuid_parses("d0531951-20ec-4575-bb68-3e6b49d813fa");
assert_uuid_parses("e0531951-20ec-4575-bb68-3e6b49d813fa");
assert_uuid_parses("a0531951-20ec-4575-bb68-3e6b49d813fa");
assert_uuid_parses("b98839b9-0471-4dbb-aae0-14780e848f32");
}
#[test]
fn test_uuid_characters() {
let hex_characters =
[b'0', b'a', b'b', b'c', b'd', b'e', b'f', b'A', b'B', b'C', b'D', b'E', b'F'];
let mut uuid_string: Vec<u8> = "u'0531956f-20ec-4575-bb68-3e6b49d813fa'".to_string().into();
fn assert_uuid_parses(s: &[u8]) {
let mut parser = Parser::new(s);
parser.parse_uuid().unwrap();
}
for i in hex_characters.iter() {
for j in hex_characters.iter() {
for k in hex_characters.iter() {
uuid_string[3] = *i;
uuid_string[4] = *j;
uuid_string[5] = *k;
assert_uuid_parses(&uuid_string)
}
}
}
}
}

View file

@ -3,10 +3,7 @@ use crate::{
sql::{Constant, Function, Value},
syn::{
error::MessageKind,
parser::{
mac::{expected, unexpected},
SyntaxError,
},
parser::{mac::expected, unexpected, SyntaxError},
token::{t, Span},
},
};
@ -464,12 +461,12 @@ pub(crate) static PATHS: phf::Map<UniCase<&'static str>, PathKind> = phf_map! {
impl Parser<'_> {
/// Parse a builtin path.
pub async fn parse_builtin(&mut self, stk: &mut Stk, start: Span) -> ParseResult<Value> {
pub(super) async fn parse_builtin(&mut self, stk: &mut Stk, start: Span) -> ParseResult<Value> {
let mut last_span = start;
while self.eat(t!("::")) {
let t = self.glue_ident(false)?;
if !Self::tokenkind_can_start_ident(t.kind) {
unexpected!(self, t, "an identifier")
let peek = self.peek();
if !Self::kind_is_identifier(peek.kind) {
unexpected!(self, peek, "an identifier")
}
self.pop_peek();
last_span = self.last_span();
@ -518,7 +515,7 @@ impl Parser<'_> {
}
/// Parse a call to a builtin function.
pub async fn parse_builtin_function(
pub(super) async fn parse_builtin_function(
&mut self,
stk: &mut Stk,
name: String,
@ -530,7 +527,7 @@ impl Parser<'_> {
break;
}
let arg = stk.run(|ctx| self.parse_value_field(ctx)).await?;
let arg = stk.run(|ctx| self.parse_value_inherit(ctx)).await?;
args.push(arg);
if !self.eat(t!(",")) {

View file

@ -1,26 +1,49 @@
//! This module defines the pratt parser for operators.
use std::ops::Bound;
use reblessive::Stk;
use super::mac::unexpected;
use super::mac::{expected_whitespace, unexpected};
use crate::sql::Range;
use crate::sql::{value::TryNeg, Cast, Expression, Number, Operator, Value};
use crate::syn::error::bail;
use crate::syn::token::Token;
use crate::syn::token::{self, Token};
use crate::syn::{
parser::{mac::expected, ParseResult, Parser},
token::{t, TokenKind},
};
/// An enum which defines how strong a operator binds it's operands.
///
/// If a binding power is higher the operator is more likely to directly operate on it's
/// neighbours.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
pub enum BindingPower {
Base,
Or,
And,
Equality,
Relation,
AddSub,
MulDiv,
Power,
Cast,
Range,
Nullish,
Unary,
}
impl Parser<'_> {
/// Parsers a generic value.
///
/// A generic loose ident like `foo` in for example `foo.bar` can be two different values
/// depending on context: a table or a field the current document. This function parses loose
/// idents as a table, see [`parse_value_field`] for parsing loose idents as fields
pub async fn parse_value(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
pub async fn parse_value_table(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
let old = self.table_as_field;
self.table_as_field = false;
let res = self.pratt_parse_expr(ctx, 0).await;
let res = self.pratt_parse_expr(ctx, BindingPower::Base).await;
self.table_as_field = old;
res
}
@ -30,16 +53,23 @@ impl Parser<'_> {
/// A generic loose ident like `foo` in for example `foo.bar` can be two different values
/// depending on context: a table or a field the current document. This function parses loose
/// idents as a field, see [`parse_value`] for parsing loose idents as table
pub async fn parse_value_field(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
pub(crate) async fn parse_value_field(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
let old = self.table_as_field;
self.table_as_field = true;
let res = self.pratt_parse_expr(ctx, 0).await;
let res = self.pratt_parse_expr(ctx, BindingPower::Base).await;
self.table_as_field = old;
res
}
/// Parsers a generic value.
///
/// Inherits how loose identifiers are parsed from it's caller.
pub(super) async fn parse_value_inherit(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
self.pratt_parse_expr(ctx, BindingPower::Base).await
}
/// Parse a assigner operator.
pub fn parse_assigner(&mut self) -> ParseResult<Operator> {
pub(super) fn parse_assigner(&mut self) -> ParseResult<Operator> {
let token = self.next();
match token.kind {
t!("=") => Ok(Operator::Equal),
@ -56,19 +86,15 @@ impl Parser<'_> {
/// more like to operate directly on it's neighbours. Example `*` has a higher binding power
/// than `-` resulting in 1 - 2 * 3 being parsed as 1 - (2 * 3).
///
/// This returns two numbers: the binding power of the left neighbour and the right neighbour.
/// If the left number is lower then the right it is left associative: i.e. '1 op 2 op 3' will
/// be parsed as '(1 op 2) op 3'. If the right number is lower the operator is right
/// associative: i.e. '1 op 2 op 3' will be parsed as '1 op (2 op 3)'. For example: `+=` is
/// right associative so `a += b += 3` will be parsed as `a += (b += 3)` while `+` is left
/// associative and will be parsed as `(a + b) + c`.
fn infix_binding_power(token: TokenKind) -> Option<(u8, u8)> {
/// All operators in SurrealQL which are parsed by the functions in this module are left
/// associative or have no defined associativity.
fn infix_binding_power(&mut self, token: TokenKind) -> Option<BindingPower> {
// TODO: Look at ordering of operators.
match token {
// assigment operators have the lowest binding power.
//t!("+=") | t!("-=") | t!("+?=") => Some((2, 1)),
t!("||") | t!("OR") => Some((3, 4)),
t!("&&") | t!("AND") => Some((5, 6)),
t!("||") | t!("OR") => Some(BindingPower::Or),
t!("&&") | t!("AND") => Some(BindingPower::And),
// Equality operators have same binding power.
t!("=")
@ -81,11 +107,26 @@ impl Parser<'_> {
| t!("!~")
| t!("*~")
| t!("?~")
| t!("@") => Some((7, 8)),
| t!("@") => Some(BindingPower::Equality),
t!("<")
| t!("<=")
| t!(">")
t!("<") => {
let peek = self.peek_whitespace1();
if matches!(peek.kind, t!("-") | t!("->") | t!("..")) {
return None;
}
Some(BindingPower::Relation)
}
t!(">") => {
if self.peek_whitespace1().kind == t!("..") {
return Some(BindingPower::Range);
}
Some(BindingPower::Relation)
}
t!("..") => Some(BindingPower::Range),
t!("<=")
| t!(">=")
| t!("")
| t!("CONTAINS")
@ -111,37 +152,49 @@ impl Parser<'_> {
| t!("INTERSECTS")
| t!("NOT")
| t!("IN")
| t!("<|") => Some((9, 10)),
| t!("<|") => Some(BindingPower::Relation),
t!("+") | t!("-") => Some((11, 12)),
t!("*") | t!("×") | t!("/") | t!("÷") | t!("%") => Some((13, 14)),
t!("**") => Some((15, 16)),
t!("?:") | t!("??") => Some((17, 18)),
t!("+") | t!("-") => Some(BindingPower::AddSub),
t!("*") | t!("×") | t!("/") | t!("÷") | t!("%") => Some(BindingPower::MulDiv),
t!("**") => Some(BindingPower::Power),
t!("?:") | t!("??") => Some(BindingPower::Nullish),
_ => None,
}
}
fn prefix_binding_power(&mut self, token: TokenKind) -> Option<((), u8)> {
fn prefix_binding_power(&mut self, token: TokenKind) -> Option<BindingPower> {
match token {
t!("!") | t!("+") | t!("-") => Some(((), 19)),
t!("!") | t!("+") | t!("-") => Some(BindingPower::Unary),
t!("..") => Some(BindingPower::Range),
t!("<") => {
if self.peek_token_at(1).kind != t!("FUTURE") {
Some(((), 20))
} else {
None
let peek = self.peek1();
if matches!(peek.kind, t!("-") | t!("->") | t!("FUTURE")) {
return None;
}
Some(BindingPower::Cast)
}
_ => None,
}
}
async fn parse_prefix_op(&mut self, ctx: &mut Stk, min_bp: u8) -> ParseResult<Value> {
async fn parse_prefix_op(&mut self, ctx: &mut Stk, min_bp: BindingPower) -> ParseResult<Value> {
let token = self.peek();
let operator = match token.kind {
t!("+") => {
// +123 is a single number token, so parse it as such
let p = self.peek_whitespace_token_at(1);
let p = self.peek_whitespace1();
if matches!(p.kind, TokenKind::Digits) {
// This is a bit of an annoying special case.
// The problem is that `+` and `-` can be an prefix operator and a the start
// of a number token.
// To figure out which it is we need to peek the next whitespace token,
// This eats the digits that the lexer needs to lex the number. So we we need
// to backup before the digits token was consumed, clear the digits token from
// the token buffer so it isn't popped after parsing the number and then lex the
// number.
self.lexer.backup_before(p.span);
self.token_buffer.clear();
self.token_buffer.push(token);
return self.next_token_value::<Number>().map(Value::Number);
}
self.pop_peek();
@ -150,8 +203,19 @@ impl Parser<'_> {
}
t!("-") => {
// -123 is a single number token, so parse it as such
let p = self.peek_whitespace_token_at(1);
let p = self.peek_whitespace1();
if matches!(p.kind, TokenKind::Digits) {
// This is a bit of an annoying special case.
// The problem is that `+` and `-` can be an prefix operator and a the start
// of a number token.
// To figure out which it is we need to peek the next whitespace token,
// This eats the digits that the lexer needs to lex the number. So we we need
// to backup before the digits token was consumed, clear the digits token from
// the token buffer so it isn't popped after parsing the number and then lex the
// number.
self.lexer.backup_before(p.span);
self.token_buffer.clear();
self.token_buffer.push(token);
return self.next_token_value::<Number>().map(Value::Number);
}
@ -166,10 +230,11 @@ impl Parser<'_> {
t!("<") => {
self.pop_peek();
let kind = self.parse_kind(ctx, token.span).await?;
let value = ctx.run(|ctx| self.pratt_parse_expr(ctx, min_bp)).await?;
let value = ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Cast)).await?;
let cast = Cast(kind, value);
return Ok(Value::Cast(Box::new(cast)));
}
t!("..") => return self.parse_prefix_range(ctx).await,
// should be unreachable as we previously check if the token was a prefix op.
_ => unreachable!(),
};
@ -200,17 +265,16 @@ impl Parser<'_> {
}
}
pub fn parse_knn(&mut self, token: Token) -> ParseResult<Operator> {
pub(super) fn parse_knn(&mut self, token: Token) -> ParseResult<Operator> {
let amount = self.next_token_value()?;
let op = if self.eat(t!(",")) {
let token = self.peek();
match token.kind {
TokenKind::Distance(ref k) => {
self.pop_peek();
let d = self.convert_distance(k).map(Some)?;
TokenKind::Distance(_) => {
let d = self.parse_distance().map(Some)?;
Operator::Knn(amount, d)
}
TokenKind::Digits | TokenKind::Number(_) => {
TokenKind::Digits | TokenKind::Glued(token::Glued::Number) => {
let ef = self.next_token_value()?;
Operator::Ann(amount, ef)
}
@ -226,10 +290,43 @@ impl Parser<'_> {
Ok(op)
}
fn expression_is_relation(value: &Value) -> bool {
if let Value::Expression(x) = value {
return Self::operator_is_relation(x.operator());
}
false
}
fn operator_is_relation(operator: &Operator) -> bool {
matches!(
operator,
Operator::Equal
| Operator::NotEqual
| Operator::AllEqual
| Operator::AnyEqual
| Operator::NotLike
| Operator::AllLike
| Operator::AnyLike
| Operator::Like
| Operator::Contain
| Operator::NotContain
| Operator::NotInside
| Operator::ContainAll
| Operator::ContainNone
| Operator::AllInside
| Operator::AnyInside
| Operator::NoneInside
| Operator::Outside
| Operator::Intersects
| Operator::Inside
| Operator::Knn(_, _)
)
}
async fn parse_infix_op(
&mut self,
ctx: &mut Stk,
min_bp: u8,
min_bp: BindingPower,
lhs: Value,
) -> ParseResult<Value> {
let token = self.next();
@ -261,7 +358,6 @@ impl Parser<'_> {
t!("<=") => Operator::LessThanOrEqual,
t!("<") => Operator::LessThan,
t!(">=") => Operator::MoreThanOrEqual,
t!(">") => Operator::MoreThan,
t!("**") => Operator::Pow,
t!("+") => Operator::Add,
t!("-") => Operator::Sub,
@ -294,10 +390,30 @@ impl Parser<'_> {
t!("IN") => Operator::Inside,
t!("<|") => self.parse_knn(token)?,
t!(">") => {
if self.peek_whitespace().kind == t!("..") {
self.pop_peek();
return self.parse_infix_range(ctx, true, lhs).await;
}
Operator::MoreThan
}
t!("..") => {
return self.parse_infix_range(ctx, false, lhs).await;
}
// should be unreachable as we previously check if the token was a prefix op.
x => unreachable!("found non-operator token {x:?}"),
};
let before = self.recent_span();
let rhs = ctx.run(|ctx| self.pratt_parse_expr(ctx, min_bp)).await?;
if Self::operator_is_relation(&operator) && Self::expression_is_relation(&lhs) {
let span = before.covers(self.recent_span());
// 1 >= 2 >= 3 has no defined associativity and is often a mistake.
bail!("Chaining relational operators have no defined associativity.",
@span => "Use parens, '()', to specify which operator must be evaluated first")
}
Ok(Value::Expression(Box::new(Expression::Binary {
l: lhs,
o: operator,
@ -305,19 +421,113 @@ impl Parser<'_> {
})))
}
async fn parse_infix_range(
&mut self,
ctx: &mut Stk,
exclusive: bool,
lhs: Value,
) -> ParseResult<Value> {
let inclusive = self.eat_whitespace(t!("="));
let before = self.recent_span();
let peek = self.peek_whitespace();
let rhs = if inclusive {
// ..= must be followed by an expression.
if peek.kind == TokenKind::WhiteSpace {
bail!("Unexpected whitespace, expected inclusive range to be immediately followed by a expression",
@peek.span => "Whitespace between a range and it's operands is dissallowed")
}
ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Range)).await?
} else if Self::kind_starts_expression(peek.kind) {
ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Range)).await?
} else {
return Ok(Value::Range(Box::new(Range {
beg: if exclusive {
Bound::Excluded(lhs)
} else {
Bound::Included(lhs)
},
end: Bound::Unbounded,
})));
};
if matches!(lhs, Value::Range(_)) {
let span = before.covers(self.recent_span());
// a..b..c is ambiguous, so throw an error
bail!("Chaining range operators has no specified associativity",
@span => "use parens, '()', to specify which operator must be evaluated first")
}
Ok(Value::Range(Box::new(Range {
beg: if exclusive {
Bound::Excluded(lhs)
} else {
Bound::Included(lhs)
},
end: if inclusive {
Bound::Included(rhs)
} else {
Bound::Excluded(rhs)
},
})))
}
async fn parse_prefix_range(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
expected_whitespace!(self, t!(".."));
let inclusive = self.eat_whitespace(t!("="));
let before = self.recent_span();
let peek = self.peek_whitespace();
let rhs = if inclusive {
// ..= must be followed by an expression.
if peek.kind == TokenKind::WhiteSpace {
bail!("Unexpected whitespace, expected inclusive range to be immediately followed by a expression",
@peek.span => "Whitespace between a range and it's operands is dissallowed")
}
ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Range)).await?
} else if Self::kind_starts_expression(peek.kind) {
ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Range)).await?
} else {
return Ok(Value::Range(Box::new(Range {
beg: Bound::Unbounded,
end: Bound::Unbounded,
})));
};
if matches!(rhs, Value::Range(_)) {
let span = before.covers(self.recent_span());
// a..b..c is ambiguous, so throw an error
bail!("Chaining range operators has no specified associativity",
@span => "use parens, '()', to specify which operator must be evaluated first")
}
let range = Range {
beg: Bound::Unbounded,
end: if inclusive {
Bound::Included(rhs)
} else {
Bound::Excluded(rhs)
},
};
Ok(Value::Range(Box::new(range)))
}
/// The pratt parsing loop.
/// Parses expression according to binding power.
async fn pratt_parse_expr(&mut self, ctx: &mut Stk, min_bp: u8) -> ParseResult<Value> {
async fn pratt_parse_expr(
&mut self,
ctx: &mut Stk,
min_bp: BindingPower,
) -> ParseResult<Value> {
let peek = self.peek();
let mut lhs = if let Some(((), r_bp)) = self.prefix_binding_power(peek.kind) {
self.parse_prefix_op(ctx, r_bp).await?
let mut lhs = if let Some(bp) = self.prefix_binding_power(peek.kind) {
self.parse_prefix_op(ctx, bp).await?
} else {
self.parse_idiom_expression(ctx).await?
};
loop {
let token = self.peek();
let Some((l_bp, r_bp)) = Self::infix_binding_power(token.kind) else {
let Some(bp) = self.infix_binding_power(token.kind) else {
// explain that assignment operators can't be used in normal expressions.
if let t!("+=") | t!("*=") | t!("-=") | t!("+?=") = token.kind {
unexpected!(self,token,"an operator",
@ -326,11 +536,11 @@ impl Parser<'_> {
break;
};
if l_bp < min_bp {
if bp <= min_bp {
break;
}
lhs = self.parse_infix_op(ctx, r_bp, lhs).await?;
lhs = self.parse_infix_op(ctx, bp, lhs).await?;
}
Ok(lhs)
@ -422,6 +632,23 @@ mod test {
assert_eq!(sql, format!("{}", out));
}
#[test]
fn expression_left_associative() {
let sql = "1 - 1 - 1";
let out = Value::parse(sql);
let one = Value::Number(Number::Int(1));
let expected = Value::Expression(Box::new(Expression::Binary {
l: Value::Expression(Box::new(Expression::Binary {
l: one.clone(),
o: Operator::Sub,
r: one.clone(),
})),
o: Operator::Sub,
r: one,
}));
assert_eq!(expected, out);
}
#[test]
fn parse_expression() {
let sql = "<future> { 5 + 10 }";

View file

@ -15,7 +15,7 @@ impl Parser<'_> {
/// Parse a custom function function call
///
/// Expects `fn` to already be called.
pub async fn parse_custom_function(&mut self, ctx: &mut Stk) -> ParseResult<Function> {
pub(super) async fn parse_custom_function(&mut self, ctx: &mut Stk) -> ParseResult<Function> {
expected!(self, t!("::"));
let mut name = self.next_token_value::<Ident>()?.0;
while self.eat(t!("::")) {
@ -27,7 +27,7 @@ impl Parser<'_> {
Ok(Function::Custom(name, args))
}
pub async fn parse_function_args(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Value>> {
pub(super) async fn parse_function_args(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Value>> {
let start = self.last_span();
let mut args = Vec::new();
loop {
@ -35,7 +35,7 @@ impl Parser<'_> {
break;
}
let arg = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let arg = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
args.push(arg);
if !self.eat(t!(",")) {
@ -49,7 +49,7 @@ impl Parser<'_> {
/// Parse a model invocation
///
/// Expects `ml` to already be called.
pub async fn parse_model(&mut self, ctx: &mut Stk) -> ParseResult<Model> {
pub(super) async fn parse_model(&mut self, ctx: &mut Stk) -> ParseResult<Model> {
expected!(self, t!("::"));
let mut name = self.next_token_value::<Ident>()?.0;
while self.eat(t!("::")) {
@ -101,7 +101,7 @@ impl Parser<'_> {
break;
}
let arg = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let arg = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
args.push(arg);
if !self.eat(t!(",")) {

View file

@ -0,0 +1,71 @@
//! Implements token gluing logic.
use crate::{
sql::{Datetime, Duration, Strand, Uuid},
syn::{
lexer::compound,
token::{t, Glued, Token, TokenKind},
},
};
use super::{GluedValue, ParseResult, Parser};
impl Parser<'_> {
/// Glues the next token and returns the token after.
pub(super) fn glue_and_peek1(&mut self) -> ParseResult<Token> {
let token = self.peek();
match token.kind {
TokenKind::Glued(_) => return Ok(self.peek1()),
t!("+") | t!("-") | TokenKind::Digits => {
self.pop_peek();
let value = self.lexer.lex_compound(token, compound::numeric_kind)?;
match value.value {
compound::NumericKind::Number(x) => {
self.glued_value = GluedValue::Number(x);
self.prepend_token(Token {
span: value.span,
kind: TokenKind::Glued(Glued::Number),
});
}
compound::NumericKind::Duration(x) => {
self.glued_value = GluedValue::Duration(Duration(x));
self.prepend_token(Token {
span: value.span,
kind: TokenKind::Glued(Glued::Duration),
});
}
}
}
t!("\"") | t!("'") => {
self.pop_peek();
let value = self.lexer.lex_compound(token, compound::strand)?;
self.glued_value = GluedValue::Strand(Strand(value.value));
self.prepend_token(Token {
span: value.span,
kind: TokenKind::Glued(Glued::Strand),
});
return Ok(self.peek1());
}
t!("d\"") | t!("d'") => {
self.pop_peek();
let value = self.lexer.lex_compound(token, compound::datetime)?;
self.glued_value = GluedValue::Datetime(Datetime(value.value));
self.prepend_token(Token {
span: value.span,
kind: TokenKind::Glued(Glued::Datetime),
});
}
t!("u\"") | t!("u'") => {
self.pop_peek();
let value = self.lexer.lex_compound(token, compound::uuid)?;
self.glued_value = GluedValue::Uuid(Uuid(value.value));
self.prepend_token(Token {
span: value.span,
kind: TokenKind::Glued(Glued::Uuid),
});
}
_ => {}
}
Ok(self.peek1())
}
}

View file

@ -7,18 +7,26 @@ use crate::{
},
syn::{
error::bail,
token::{t, Span, TokenKind},
token::{t, Glued, Span, TokenKind},
},
};
use super::{mac::unexpected, ParseResult, Parser};
impl Parser<'_> {
pub(super) fn peek_continues_idiom(&mut self) -> bool {
let peek = self.peek().kind;
if matches!(peek, t!("->") | t!("[") | t!(".") | t!("...")) {
return true;
}
peek == t!("<") && self.peek1().kind == t!("-")
}
/// Parse fields of a selecting query: `foo, bar` in `SELECT foo, bar FROM baz`.
///
/// # Parser State
/// Expects the next tokens to be of a field set.
pub async fn parse_fields(&mut self, ctx: &mut Stk) -> ParseResult<Fields> {
pub(super) async fn parse_fields(&mut self, ctx: &mut Stk) -> ParseResult<Fields> {
if self.eat(t!("VALUE")) {
let expr = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let alias = if self.eat(t!("AS")) {
@ -60,7 +68,7 @@ impl Parser<'_> {
}
/// Parses a list of idioms separated by a `,`
pub async fn parse_idiom_list(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Idiom>> {
pub(super) async fn parse_idiom_list(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Idiom>> {
let mut res = vec![self.parse_plain_idiom(ctx).await?];
while self.eat(t!(",")) {
res.push(self.parse_plain_idiom(ctx).await?);
@ -72,7 +80,7 @@ impl Parser<'_> {
///
/// This function differes from [`Parser::parse_remaining_value_idiom`] in how it handles graph
/// parsing. Graphs inside a plain idioms will remain a normal graph production.
pub(crate) async fn parse_remaining_idiom(
pub(super) async fn parse_remaining_idiom(
&mut self,
stk: &mut Stk,
start: Vec<Part>,
@ -98,15 +106,21 @@ impl Parser<'_> {
let graph = stk.run(|stk| self.parse_graph(stk, Dir::Out)).await?;
res.push(Part::Graph(graph))
}
t!("<->") => {
self.pop_peek();
let graph = stk.run(|stk| self.parse_graph(stk, Dir::Both)).await?;
res.push(Part::Graph(graph))
}
t!("<-") => {
self.pop_peek();
let graph = stk.run(|stk| self.parse_graph(stk, Dir::In)).await?;
res.push(Part::Graph(graph))
t!("<") => {
let peek = self.peek_whitespace1();
if peek.kind == t!("-") {
self.pop_peek();
self.pop_peek();
let graph = stk.run(|stk| self.parse_graph(stk, Dir::In)).await?;
res.push(Part::Graph(graph))
} else if peek.kind == t!("->") {
self.pop_peek();
self.pop_peek();
let graph = stk.run(|stk| self.parse_graph(stk, Dir::Both)).await?;
res.push(Part::Graph(graph))
} else {
break;
}
}
t!("..") => {
bail!("Unexpected token `{}` expected and idiom",t!(".."),
@ -124,7 +138,7 @@ impl Parser<'_> {
/// This function differes from [`Parser::parse_remaining_value_idiom`] in how it handles graph
/// parsing. When parsing a idiom like production which can be a value, the initial start value
/// might need to be changed to a Edge depending on what is parsed next.
pub(crate) async fn parse_remaining_value_idiom(
pub(super) async fn parse_remaining_value_idiom(
&mut self,
ctx: &mut Stk,
start: Vec<Part>,
@ -155,16 +169,22 @@ impl Parser<'_> {
return Ok(x);
}
}
t!("<->") => {
self.pop_peek();
if let Some(x) = self.parse_graph_idiom(ctx, &mut res, Dir::Both).await? {
return Ok(x);
}
}
t!("<-") => {
self.pop_peek();
if let Some(x) = self.parse_graph_idiom(ctx, &mut res, Dir::In).await? {
return Ok(x);
t!("<") => {
let peek = self.peek_whitespace1();
if peek.kind == t!("-") {
self.pop_peek();
self.pop_peek();
if let Some(x) = self.parse_graph_idiom(ctx, &mut res, Dir::In).await? {
return Ok(x);
}
} else if peek.kind == t!("->") {
self.pop_peek();
self.pop_peek();
if let Some(x) = self.parse_graph_idiom(ctx, &mut res, Dir::Both).await? {
return Ok(x);
}
}
}
t!("..") => {
@ -198,7 +218,7 @@ impl Parser<'_> {
};
let value = Value::Edges(Box::new(edge));
if !Self::continues_idiom(self.peek_kind()) {
if !self.peek_continues_idiom() {
return Ok(Some(value));
}
res[0] = Part::Start(value);
@ -213,11 +233,6 @@ impl Parser<'_> {
Ok(None)
}
/// Returns if the token kind could continua an idiom
pub fn continues_idiom(kind: TokenKind) -> bool {
matches!(kind, t!("->") | t!("<->") | t!("<-") | t!("[") | t!(".") | t!("..."))
}
/// Parse a idiom which can only start with a graph or an identifier.
/// Other expressions are not allowed as start of this idiom
pub async fn parse_plain_idiom(&mut self, ctx: &mut Stk) -> ParseResult<Idiom> {
@ -227,14 +242,15 @@ impl Parser<'_> {
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::Out)).await?;
Part::Graph(graph)
}
t!("<->") => {
self.pop_peek();
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::Both)).await?;
Part::Graph(graph)
}
t!("<-") => {
self.pop_peek();
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::In)).await?;
t!("<") => {
let t = self.pop_peek();
let graph = if self.eat_whitespace(t!("-")) {
ctx.run(|ctx| self.parse_graph(ctx, Dir::In)).await?
} else if self.eat_whitespace(t!("->")) {
ctx.run(|ctx| self.parse_graph(ctx, Dir::Both)).await?
} else {
unexpected!(self, t, "either `<-` `<->` or `->`")
};
Part::Graph(graph)
}
_ => Part::Field(self.next_token_value()?),
@ -244,7 +260,7 @@ impl Parser<'_> {
}
/// Parse the part after the `.` in a idiom
pub async fn parse_dot_part(&mut self, ctx: &mut Stk) -> ParseResult<Part> {
pub(super) async fn parse_dot_part(&mut self, ctx: &mut Stk) -> ParseResult<Part> {
let res = match self.peek_kind() {
t!("*") => {
self.pop_peek();
@ -265,12 +281,16 @@ impl Parser<'_> {
};
Ok(res)
}
pub async fn parse_function_part(&mut self, ctx: &mut Stk, name: Ident) -> ParseResult<Part> {
pub(super) async fn parse_function_part(
&mut self,
ctx: &mut Stk,
name: Ident,
) -> ParseResult<Part> {
let args = self.parse_function_args(ctx).await?;
Ok(Part::Method(name.0, args))
}
/// Parse the part after the `.{` in an idiom
pub async fn parse_destructure_part(&mut self, ctx: &mut Stk) -> ParseResult<Part> {
pub(super) async fn parse_destructure_part(&mut self, ctx: &mut Stk) -> ParseResult<Part> {
let start = self.last_span();
let mut destructured: Vec<DestructurePart> = Vec::new();
loop {
@ -311,7 +331,11 @@ impl Parser<'_> {
Ok(Part::Destructure(destructured))
}
/// Parse the part after the `[` in a idiom
pub async fn parse_bracket_part(&mut self, ctx: &mut Stk, start: Span) -> ParseResult<Part> {
pub(super) async fn parse_bracket_part(
&mut self,
ctx: &mut Stk,
start: Span,
) -> ParseResult<Part> {
let peek = self.peek();
let res = match peek.kind {
t!("*") => {
@ -322,11 +346,11 @@ impl Parser<'_> {
self.pop_peek();
Part::Last
}
t!("+") | TokenKind::Digits | TokenKind::Number(_) => {
t!("+") | TokenKind::Digits | TokenKind::Glued(Glued::Number) => {
Part::Index(self.next_token_value()?)
}
t!("-") => {
if let TokenKind::Digits = self.peek_whitespace_token_at(1).kind {
if let TokenKind::Digits = self.peek_whitespace1().kind {
unexpected!(self, peek,"$, * or a number", => "An index can't be negative.");
}
unexpected!(self, peek, "$, * or a number");
@ -347,20 +371,11 @@ impl Parser<'_> {
Ok(res)
}
/// Parse a list of basic idioms seperated by a ','
pub async fn parse_basic_idiom_list(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Idiom>> {
let mut res = vec![self.parse_basic_idiom(ctx).await?];
while self.eat(t!(",")) {
res.push(self.parse_basic_idiom(ctx).await?);
}
Ok(res)
}
/// Parse a basic idiom.
///
/// Basic idioms differ from normal idioms in that they are more restrictive.
/// Flatten, graphs, conditions and indexing by param is not allowed.
pub async fn parse_basic_idiom(&mut self, ctx: &mut Stk) -> ParseResult<Idiom> {
pub(super) async fn parse_basic_idiom(&mut self, ctx: &mut Stk) -> ParseResult<Idiom> {
let start = self.next_token_value::<Ident>()?;
let mut parts = vec![Part::Field(start)];
loop {
@ -382,12 +397,12 @@ impl Parser<'_> {
self.pop_peek();
Part::Last
}
TokenKind::Digits | t!("+") | TokenKind::Number(_) => {
TokenKind::Digits | t!("+") | TokenKind::Glued(Glued::Number) => {
let number = self.next_token_value()?;
Part::Index(number)
}
t!("-") => {
let peek_digit = self.peek_whitespace_token_at(1);
let peek_digit = self.peek_whitespace1();
if let TokenKind::Digits = peek_digit.kind {
let span = self.recent_span().covers(peek_digit.span);
bail!("Unexpected token `-` expected $, *, or a number", @span => "an index can't be negative");
@ -411,7 +426,7 @@ impl Parser<'_> {
/// Basic idioms differ from local idioms in that they are more restrictive.
/// Only field, all and number indexing is allowed. Flatten is also allowed but only at the
/// end.
pub async fn parse_local_idiom(&mut self, ctx: &mut Stk) -> ParseResult<Idiom> {
pub(super) async fn parse_local_idiom(&mut self, ctx: &mut Stk) -> ParseResult<Idiom> {
let start = self.next_token_value()?;
let mut parts = vec![Part::Field(start)];
loop {
@ -429,12 +444,12 @@ impl Parser<'_> {
self.pop_peek();
Part::All
}
TokenKind::Digits | t!("+") | TokenKind::Number(_) => {
TokenKind::Digits | t!("+") | TokenKind::Glued(Glued::Number) => {
let number = self.next_token_value()?;
Part::Index(number)
}
t!("-") => {
let peek_digit = self.peek_whitespace_token_at(1);
let peek_digit = self.peek_whitespace1();
if let TokenKind::Digits = peek_digit.kind {
let span = self.recent_span().covers(peek_digit.span);
bail!("Unexpected token `-` expected $, *, or a number", @span => "an index can't be negative");
@ -468,7 +483,7 @@ impl Parser<'_> {
///
/// # Parser state
/// Expects to be at the start of a what list.
pub async fn parse_what_list(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Value>> {
pub(super) async fn parse_what_list(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Value>> {
let mut res = vec![self.parse_what_value(ctx).await?];
while self.eat(t!(",")) {
res.push(self.parse_what_value(ctx).await?)
@ -480,9 +495,9 @@ impl Parser<'_> {
///
/// # Parser state
/// Expects to be at the start of a what value
pub async fn parse_what_value(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
pub(super) async fn parse_what_value(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
let start = self.parse_what_primary(ctx).await?;
if start.can_start_idiom() && Self::continues_idiom(self.peek_kind()) {
if start.can_start_idiom() && self.peek_continues_idiom() {
let start = match start {
Value::Table(Table(x)) => vec![Part::Field(Ident(x))],
Value::Idiom(Idiom(x)) => x,
@ -501,7 +516,7 @@ impl Parser<'_> {
/// # Parser state
/// Expects to just have eaten a direction (e.g. <-, <->, or ->) and be at the field like part
/// of the graph
pub async fn parse_graph(&mut self, ctx: &mut Stk, dir: Dir) -> ParseResult<Graph> {
pub(super) async fn parse_graph(&mut self, ctx: &mut Stk, dir: Dir) -> ParseResult<Graph> {
let token = self.peek();
match token.kind {
t!("?") => {
@ -519,7 +534,7 @@ impl Parser<'_> {
self.pop_peek();
Tables::default()
}
x if Self::tokenkind_can_start_ident(x) => {
x if Self::kind_is_identifier(x) => {
// The following function should always succeed here,
// returning an error here would be a bug, so unwrap.
let table = self.next_token_value().unwrap();
@ -550,7 +565,7 @@ impl Parser<'_> {
..Default::default()
})
}
x if Self::tokenkind_can_start_ident(x) => {
x if Self::kind_is_identifier(x) => {
// The following function should always succeed here,
// returning an error here would be a bug, so unwrap.
let table = self.next_token_value().unwrap();
@ -568,7 +583,7 @@ impl Parser<'_> {
#[cfg(test)]
mod tests {
use crate::sql::{Expression, Id, Number, Object, Param, Strand, Thing};
use crate::sql::{Expression, Id, Number, Object, Operator, Param, Strand, Thing};
use crate::syn::Parse;
use super::*;
@ -710,7 +725,11 @@ mod tests {
Value::from(Idiom(vec![
Part::from("test"),
Part::from("temp"),
Part::Where(Value::from(Expression::parse("test = true"))),
Part::Where(Value::Expression(Box::new(Expression::Binary {
l: Value::Idiom(Idiom(vec![Part::Field(Ident("test".to_string()))])),
o: Operator::Equal,
r: Value::Bool(true)
}))),
Part::from("text")
]))
);
@ -726,7 +745,11 @@ mod tests {
Value::from(Idiom(vec![
Part::from("test"),
Part::from("temp"),
Part::Where(Value::from(Expression::parse("test = true"))),
Part::Where(Value::Expression(Box::new(Expression::Binary {
l: Value::Idiom(Idiom(vec![Part::Field(Ident("test".to_string()))])),
o: Operator::Equal,
r: Value::Bool(true)
}))),
Part::from("text")
]))
);
@ -868,7 +891,11 @@ mod tests {
out,
Value::from(Idiom(vec![
Part::Start(Value::from(Object::default())),
Part::Where(Value::from(Expression::parse("test = true")))
Part::Where(Value::Expression(Box::new(Expression::Binary {
l: Value::Idiom(Idiom(vec![Part::Field(Ident("test".to_string()))])),
o: Operator::Equal,
r: Value::Bool(true)
}))),
]))
);
}
@ -882,7 +909,11 @@ mod tests {
out,
Value::from(Idiom(vec![
Part::Start(Value::from(Object::default())),
Part::Where(Value::from(Expression::parse("test = true")))
Part::Where(Value::Expression(Box::new(Expression::Binary {
l: Value::Idiom(Idiom(vec![Part::Field(Ident("test".to_string()))])),
o: Operator::Equal,
r: Value::Bool(true)
}))),
]))
);
}

View file

@ -3,14 +3,15 @@ use std::collections::BTreeMap;
use reblessive::Stk;
use crate::{
sql::{Array, Ident, Object, Strand, Value},
sql::{Array, Duration, Ident, Object, Strand, Value},
syn::{
parser::mac::expected,
token::{t, QouteKind, Span, TokenKind},
lexer::compound::{self, Numeric},
parser::mac::{expected, pop_glued},
token::{t, Glued, Span, TokenKind},
},
};
use super::{mac::unexpected, ParseResult, Parser};
use super::{ParseResult, Parser};
impl Parser<'_> {
pub async fn parse_json(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
@ -36,7 +37,7 @@ impl Parser<'_> {
self.pop_peek();
self.parse_json_array(ctx, token.span).await.map(Value::Array)
}
TokenKind::Qoute(QouteKind::Plain | QouteKind::PlainDouble) => {
t!("\"") | t!("'") => {
let strand: Strand = self.next_token_value()?;
if self.legacy_strands {
if let Some(x) = self.reparse_legacy_strand(ctx, &strand.0).await {
@ -45,14 +46,22 @@ impl Parser<'_> {
}
Ok(Value::Strand(strand))
}
TokenKind::Digits | TokenKind::Number(_) => {
let peek = self.glue()?;
match peek.kind {
TokenKind::Duration => Ok(Value::Duration(self.next_token_value()?)),
TokenKind::Number(_) => Ok(Value::Number(self.next_token_value()?)),
_ => unexpected!(self, peek, "a number"),
t!("-") | t!("+") | TokenKind::Digits => {
self.pop_peek();
let compound = self.lexer.lex_compound(token, compound::numeric)?;
match compound.value {
Numeric::Duration(x) => Ok(Value::Duration(Duration(x))),
Numeric::Number(x) => Ok(Value::Number(x)),
}
}
TokenKind::Glued(Glued::Strand) => {
let glued = pop_glued!(self, Strand);
Ok(Value::Strand(glued))
}
TokenKind::Glued(Glued::Duration) => {
let glued = pop_glued!(self, Duration);
Ok(Value::Duration(glued))
}
_ => {
let ident = self.next_token_value::<Ident>()?.0;
self.parse_thing_from_ident(ctx, ident).await.map(Value::Thing)

View file

@ -3,10 +3,11 @@ use std::collections::BTreeMap;
use reblessive::Stk;
use crate::{
sql::{kind::Literal, Kind, Strand},
sql::{kind::Literal, Duration, Kind, Strand},
syn::{
lexer::compound,
parser::mac::expected,
token::{t, Keyword, Span, TokenKind},
token::{t, Glued, Keyword, Span, TokenKind},
},
};
@ -17,14 +18,14 @@ impl Parser<'_> {
///
/// # Parser State
/// expects the first `<` to already be eaten
pub async fn parse_kind(&mut self, ctx: &mut Stk, delim: Span) -> ParseResult<Kind> {
pub(super) async fn parse_kind(&mut self, ctx: &mut Stk, delim: Span) -> ParseResult<Kind> {
let kind = self.parse_inner_kind(ctx).await?;
self.expect_closing_delimiter(t!(">"), delim)?;
Ok(kind)
}
/// Parse an inner kind, a kind without enclosing `<` `>`.
pub async fn parse_inner_kind(&mut self, ctx: &mut Stk) -> ParseResult<Kind> {
pub(super) async fn parse_inner_kind(&mut self, ctx: &mut Stk) -> ParseResult<Kind> {
match self.parse_inner_single_kind(ctx).await? {
Kind::Any => Ok(Kind::Any),
Kind::Option(k) => Ok(Kind::Option(k)),
@ -45,7 +46,7 @@ impl Parser<'_> {
}
/// Parse a single inner kind, a kind without enclosing `<` `>`.
pub async fn parse_inner_single_kind(&mut self, ctx: &mut Stk) -> ParseResult<Kind> {
pub(super) async fn parse_inner_single_kind(&mut self, ctx: &mut Stk) -> ParseResult<Kind> {
match self.peek_kind() {
t!("ANY") => {
self.pop_peek();
@ -170,17 +171,22 @@ impl Parser<'_> {
async fn parse_literal_kind(&mut self, ctx: &mut Stk) -> ParseResult<Literal> {
let peek = self.peek();
match peek.kind {
t!("'") | t!("\"") | TokenKind::Strand => {
t!("'") | t!("\"") | TokenKind::Glued(Glued::Strand) => {
let s = self.next_token_value::<Strand>()?;
Ok(Literal::String(s))
}
t!("+") | t!("-") | TokenKind::Number(_) | TokenKind::Digits | TokenKind::Duration => {
let token = self.glue_numeric()?;
match token.kind {
TokenKind::Number(_) => self.next_token_value().map(Literal::Number),
TokenKind::Duration => self.next_token_value().map(Literal::Duration),
_ => unexpected!(self, token, "a value"),
}
t!("+") | t!("-") | TokenKind::Glued(Glued::Number) => {
self.next_token_value().map(Literal::Number)
}
TokenKind::Glued(Glued::Duration) => self.next_token_value().map(Literal::Duration),
TokenKind::Digits => {
self.pop_peek();
let compound = self.lexer.lex_compound(peek, compound::numeric)?;
let v = match compound.value {
compound::Numeric::Number(x) => Literal::Number(x),
compound::Numeric::Duration(x) => Literal::Duration(Duration(x)),
};
Ok(v)
}
t!("{") => {
self.pop_peek();
@ -212,11 +218,9 @@ impl Parser<'_> {
matches!(
t,
t!("'")
| t!("\"") | TokenKind::Strand
| t!("+") | t!("-")
| TokenKind::Number(_)
| t!("\"") | t!("+")
| t!("-") | TokenKind::Glued(Glued::Duration | Glued::Strand | Glued::Number)
| TokenKind::Digits
| TokenKind::Duration
| t!("{") | t!("[")
)
}

View file

@ -42,6 +42,25 @@ macro_rules! expected {
}};
}
/// Pops the last token, checks if it is the desired glue value and then returns the value.
/// This will panic if the token was not correct or the value was already eat, both of which the
/// parser should make sure to uphold.
macro_rules! pop_glued {
($parser:expr, $variant:ident) => {{
let token = $parser.pop_peek();
debug_assert!(matches!(
token.kind,
$crate::syn::token::TokenKind::Glued($crate::syn::token::Glued::$variant)
));
let $crate::syn::parser::GluedValue::$variant(x) =
::std::mem::take(&mut $parser.glued_value)
else {
panic!("Glued value was already taken, while the glue token still in the token buffer.")
};
x
}};
}
/// A macro for indicating that the parser encountered an token which it didn't expect.
macro_rules! expected_whitespace {
($parser:expr, $($kind:tt)*) => {{
@ -136,6 +155,7 @@ pub(crate) use enter_object_recursion;
pub(crate) use enter_query_recursion;
pub(crate) use expected;
pub(crate) use expected_whitespace;
pub(crate) use pop_glued;
pub(crate) use unexpected;
#[cfg(test)]

View file

@ -35,28 +35,29 @@
//! whitespace tokens which might have been skipped. Implementers must be carefull to not call a
//! functions which requires whitespace tokens when they may already have been skipped.
//!
//! ## Token Gluing
//! ## Compound tokens and token gluing.
//!
//! Tokens produces from the lexer are in some place more fine-grained then normal. Numbers,
//! Identifiers and strand-like productions could be making up from multiple smaller tokens. A
//! floating point number for example can be at most made up from a 3 digits token, a dot token,
//! an exponent token and number suffix token and two `-` or `+` tokens. Whenever these tokens
//! are required the parser calls a `glue_` method which will take the current peeked token and
//! replace it with a more complex glued together token if possible.
//! SurrealQL has a bunch of tokens which have complex rules for when they are allowed and the
//! value they contain. Such tokens are named compound tokens, and examples include a javascript
//! body, strand-like tokens, regex, numbers, etc.
//!
//! ## Use of reblessive
//! These tokens need to be manually requested from the lexer with the [`Lexer::lex_compound`]
//! function.
//!
//! This parser uses reblessive to be able to parse deep without overflowing the stack. This means
//! that all functions which might recurse, i.e. in some paths can call themselves again, are async
//! functions taking argument from reblessive to call recursive functions without using more stack
//! with each depth.
//! This manually request of tokens leads to a problems when used in conjunction with peeking. Take
//! for instance the production `{ "foo": "bar"}`. `"foo"` is a compound token so when intially
//! encountered the lexer only returns a `"` token and then that token needs to be collected into a
//! the full strand token. However the parser needs to figure out if we are parsing an object
//! or a block so it needs to look past the compound token to see if the next token is `:`. This is
//! where gluing comes in. Calling `Parser::glue` checks if the next token could start a compound
//! token and combines them into a single token. This can only be done in places where we know if
//! we encountered a leading token of a compound token it will result in the 'default' compound token.
use self::token_buffer::TokenBuffer;
use crate::{
sql,
sql::{self, Datetime, Duration, Strand, Uuid},
syn::{
error::{bail, SyntaxError},
lexer::Lexer,
lexer::{compound::NumberKind, Lexer},
token::{t, Span, Token, TokenKind},
},
};
@ -66,6 +67,7 @@ mod basic;
mod builtin;
mod expression;
mod function;
mod glue;
mod idiom;
mod json;
mod kind;
@ -77,9 +79,7 @@ mod thing;
mod token;
mod token_buffer;
pub(crate) use mac::{
enter_object_recursion, enter_query_recursion, expected_whitespace, unexpected,
};
pub(crate) use mac::{enter_object_recursion, enter_query_recursion, unexpected};
#[cfg(test)]
pub mod test;
@ -102,12 +102,24 @@ pub enum PartialResult<T> {
},
}
#[derive(Default)]
pub enum GluedValue {
Duration(Duration),
Datetime(Datetime),
Uuid(Uuid),
Number(NumberKind),
Strand(Strand),
#[default]
None,
}
/// The SurrealQL parser.
pub struct Parser<'a> {
lexer: Lexer<'a>,
last_span: Span,
token_buffer: TokenBuffer<4>,
table_as_field: bool,
glued_value: GluedValue,
pub(crate) table_as_field: bool,
legacy_strands: bool,
flexible_record_id: bool,
object_recursion: usize,
@ -121,6 +133,7 @@ impl<'a> Parser<'a> {
lexer: Lexer::new(source),
last_span: Span::empty(),
token_buffer: TokenBuffer::new(),
glued_value: GluedValue::None,
table_as_field: false,
legacy_strands: false,
flexible_record_id: true,
@ -177,6 +190,7 @@ impl<'a> Parser<'a> {
lexer: self.lexer.change_source(source),
last_span: Span::empty(),
token_buffer: TokenBuffer::new(),
glued_value: GluedValue::None,
legacy_strands: self.legacy_strands,
flexible_record_id: self.flexible_record_id,
table_as_field: false,
@ -263,7 +277,7 @@ impl<'a> Parser<'a> {
/// Returns the next n'th token without consuming it.
/// `peek_token_at(0)` is equivalent to `peek`.
pub fn peek_token_at(&mut self, at: u8) -> Token {
pub(crate) fn peek_token_at(&mut self, at: u8) -> Token {
for _ in self.token_buffer.len()..=at {
let r = loop {
let r = self.lexer.next_token();
@ -276,6 +290,10 @@ impl<'a> Parser<'a> {
self.token_buffer.at(at).unwrap()
}
pub fn peek1(&mut self) -> Token {
self.peek_token_at(1)
}
/// Returns the next n'th token without consuming it.
/// `peek_token_at(0)` is equivalent to `peek`.
pub fn peek_whitespace_token_at(&mut self, at: u8) -> Token {
@ -286,6 +304,10 @@ impl<'a> Parser<'a> {
self.token_buffer.at(at).unwrap()
}
pub fn peek_whitespace1(&mut self) -> Token {
self.peek_whitespace_token_at(1)
}
/// Returns the span of the next token if it was already peeked, otherwise returns the token of
/// the last consumed token.
pub fn recent_span(&mut self) -> Span {
@ -297,6 +319,10 @@ impl<'a> Parser<'a> {
self.last_span
}
pub fn assert_finished(&self) -> ParseResult<()> {
self.lexer.assert_finished()
}
/// Eat the next token if it is of the given kind.
/// Returns whether a token was eaten.
pub fn eat(&mut self, token: TokenKind) -> bool {
@ -334,12 +360,15 @@ impl<'a> Parser<'a> {
/// Checks if the next token is of the given kind. If it isn't it returns a UnclosedDelimiter
/// error.
fn expect_closing_delimiter(&mut self, kind: TokenKind, should_close: Span) -> ParseResult<()> {
if !self.eat(kind) {
bail!("Unexpected token, expected delimiter `{kind}`",
let peek = self.peek();
if peek.kind != kind {
bail!("Unexpected token `{}` expected delimiter `{kind}`",
peek.kind,
@self.recent_span(),
@should_close => "expected this delimiter to close"
);
}
self.pop_peek();
Ok(())
}
@ -375,7 +404,7 @@ impl<'a> Parser<'a> {
let res = ctx.run(|ctx| self.parse_stmt(ctx)).await;
let v = match res {
Err(e) => {
let peek = self.peek_whitespace_token_at(1);
let peek = self.peek_whitespace1();
if e.is_data_pending()
|| matches!(peek.kind, TokenKind::Eof | TokenKind::WhiteSpace)
{

View file

@ -7,7 +7,7 @@ use crate::{
syn::{
error::bail,
parser::{enter_object_recursion, mac::expected, ParseResult, Parser},
token::{t, Span, TokenKind},
token::{t, Glued, Span, TokenKind},
},
};
@ -29,11 +29,8 @@ impl Parser<'_> {
})
}
// glue possible complex tokens.
self.glue()?;
// Now check first if it can be an object.
if self.peek_token_at(1).kind == t!(":") {
if self.glue_and_peek1()?.kind == t!(":") {
enter_object_recursion!(this = self => {
return this.parse_object_or_geometry(ctx, start).await;
})
@ -51,7 +48,7 @@ impl Parser<'_> {
) -> ParseResult<Value> {
expected!(self, t!(":"));
// for it to be geometry the next value must be a strand like.
let (t!("\"") | t!("'")) = self.peek_kind() else {
let (t!("\"") | t!("'") | TokenKind::Glued(Glued::Strand)) = self.peek_kind() else {
return self
.parse_object_from_key(ctx, key, BTreeMap::new(), start)
.await
@ -166,7 +163,7 @@ impl Parser<'_> {
expected!(self, t!(":"));
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
// check for an object end, if it doesn't end it is not a geometry.
if !self.eat(t!(",")) {
@ -243,7 +240,7 @@ impl Parser<'_> {
// found coordinates field, next must be a coordinates value but we don't know
// which until we match type.
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
if !self.eat(t!(",")) {
// no comma object must end early.
@ -353,7 +350,7 @@ impl Parser<'_> {
// 'geometries' key can only happen in a GeometryCollection, so try to parse that.
expected!(self, t!(":"));
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
// if the object ends here, it is not a geometry.
if !self.eat(t!(",")) || self.peek_kind() == t!("}") {
@ -485,7 +482,7 @@ impl Parser<'_> {
.map(Value::Object);
}
expected!(self, t!(":"));
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
let comma = self.eat(t!(","));
if !self.eat(t!("}")) {
// the object didn't end, either an error or not a geometry.
@ -524,7 +521,7 @@ impl Parser<'_> {
mut map: BTreeMap<String, Value>,
start: Span,
) -> ParseResult<Object> {
let v = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let v = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
map.insert(key, v);
if !self.eat(t!(",")) {
self.expect_closing_delimiter(t!("}"), start)?;
@ -595,19 +592,15 @@ impl Parser<'_> {
async fn parse_object_entry(&mut self, ctx: &mut Stk) -> ParseResult<(String, Value)> {
let text = self.parse_object_key()?;
expected!(self, t!(":"));
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
Ok((text, value))
}
/// Parses the key of an object, i.e. `field` in the object `{ field: 1 }`.
pub fn parse_object_key(&mut self) -> ParseResult<String> {
let token = self.glue()?;
pub(super) fn parse_object_key(&mut self) -> ParseResult<String> {
let token = self.peek();
match token.kind {
TokenKind::Keyword(_)
| TokenKind::Language(_)
| TokenKind::Algorithm(_)
| TokenKind::Distance(_)
| TokenKind::VectorType(_) => {
x if Self::kind_is_keyword_like(x) => {
self.pop_peek();
let str = self.lexer.reader.span(token.span);
// Lexer should ensure that the token is valid utf-8
@ -619,11 +612,11 @@ impl Parser<'_> {
let str = self.lexer.string.take().unwrap();
Ok(str)
}
t!("\"") | t!("'") | TokenKind::Strand => {
t!("\"") | t!("'") | TokenKind::Glued(Glued::Strand) => {
let str = self.next_token_value::<Strand>()?.0;
Ok(str)
}
TokenKind::Digits | TokenKind::Number(_) => {
TokenKind::Digits | TokenKind::Glued(Glued::Number) => {
let number = self.next_token_value::<Number>()?.to_string();
Ok(number)
}

View file

@ -1,21 +1,20 @@
use std::ops::Bound;
use geo::Point;
use reblessive::Stk;
use super::{ParseResult, Parser};
use super::{mac::pop_glued, ParseResult, Parser};
use crate::{
sql::{
Array, Closure, Dir, Function, Geometry, Ident, Idiom, Kind, Mock, Number, Param, Part,
Range, Script, Strand, Subquery, Table, Value,
Array, Closure, Dir, Duration, Function, Geometry, Ident, Idiom, Kind, Mock, Number, Param,
Part, Script, Strand, Subquery, Table, Value,
},
syn::{
error::bail,
lexer::compound,
parser::{
enter_object_recursion, enter_query_recursion,
mac::{expected, expected_whitespace, unexpected},
mac::{expected, unexpected},
},
token::{self, t, DurationSuffix, Span, TokenKind},
token::{t, Glued, Span, TokenKind},
},
};
@ -23,32 +22,27 @@ impl Parser<'_> {
/// Parse a what primary.
///
/// What's are values which are more restricted in what expressions they can contain.
pub async fn parse_what_primary(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
let peek = self.peek();
match peek.kind {
t!("..") => Ok(self.try_parse_range(ctx, None).await?.unwrap()),
pub(super) async fn parse_what_primary(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
let token = self.peek();
match token.kind {
t!("r\"") => {
self.pop_peek();
let value = Value::Thing(self.parse_record_string(ctx, true).await?);
Ok(self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value))
Ok(Value::Thing(self.parse_record_string(ctx, true).await?))
}
t!("r'") => {
self.pop_peek();
let value = Value::Thing(self.parse_record_string(ctx, false).await?);
Ok(self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value))
Ok(Value::Thing(self.parse_record_string(ctx, false).await?))
}
t!("d\"") | t!("d'") => {
let value = Value::Datetime(self.next_token_value()?);
Ok(self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value))
t!("d\"") | t!("d'") | TokenKind::Glued(Glued::Datetime) => {
Ok(Value::Datetime(self.next_token_value()?))
}
t!("u\"") | t!("u'") => {
let value = Value::Uuid(self.next_token_value()?);
Ok(self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value))
t!("u\"") | t!("u'") | TokenKind::Glued(Glued::Uuid) => {
Ok(Value::Uuid(self.next_token_value()?))
}
t!("$param") => {
let value = Value::Param(self.next_token_value()?);
let value = self.try_parse_inline(ctx, &value).await?.unwrap_or(value);
Ok(self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value))
Ok(value)
}
t!("FUNCTION") => {
self.pop_peek();
@ -74,7 +68,7 @@ impl Parser<'_> {
expected!(self, t!(">"));
let start = expected!(self, t!("{")).span;
let block = self.parse_block(ctx, start).await?;
Ok(Value::Future(Box::new(crate::sql::Future(block))))
Ok(Value::Future(Box::new(super::sql::Future(block))))
}
t!("|") => {
let start = self.pop_peek().span;
@ -103,89 +97,25 @@ impl Parser<'_> {
let value = self.parse_model(ctx).await.map(|x| Value::Model(Box::new(x)))?;
Ok(self.try_parse_inline(ctx, &value).await?.unwrap_or(value))
}
x => {
if !Self::tokenkind_can_start_ident(x) {
unexpected!(self, peek, "a value")
}
// Combine possible multiple tokens into a single one. before scanning past it.
let span = self.glue()?.span;
let peek = self.peek_token_at(1);
x if Self::kind_is_identifier(x) => {
let peek = self.peek1();
match peek.kind {
t!("::") | t!("(") => {
self.pop_peek();
self.parse_builtin(ctx, span).await
self.parse_builtin(ctx, token.span).await
}
t!(":") => {
let str = self.next_token_value::<Ident>()?.0;
self.parse_thing_or_range(ctx, str).await
}
x => {
if x.has_data() {
// Consume the first identifier to ensure streaming works correctly.
self.pop_peek();
// x had data and possibly overwrote the data from token, This is
// always an invalid production so just return error.
unexpected!(self, peek, "a value");
} else {
Ok(Value::Table(self.next_token_value()?))
}
}
_ => Ok(Value::Table(self.next_token_value()?)),
}
}
_ => unexpected!(self, token, "an expression"),
}
}
pub async fn try_parse_range(
&mut self,
ctx: &mut Stk,
subject: Option<&Value>,
) -> ParseResult<Option<Value>> {
// The ">" can also mean a comparison.
// If the token after is not "..", then return
if self.peek_whitespace().kind == t!(">")
&& self.peek_whitespace_token_at(1).kind != t!("..")
{
return Ok(None);
}
let beg = if let Some(subject) = subject {
if self.eat_whitespace(t!(">")) {
expected_whitespace!(self, t!(".."));
Bound::Excluded(subject.to_owned())
} else {
if !self.eat_whitespace(t!("..")) {
return Ok(None);
}
Bound::Included(subject.to_owned())
}
} else {
if !self.eat_whitespace(t!("..")) {
return Ok(None);
}
Bound::Unbounded
};
let end = if self.eat_whitespace(t!("=")) {
let id = ctx.run(|ctx| self.parse_simple_value(ctx)).await?;
Bound::Included(id)
} else if Self::tokenkind_can_start_simple_value(self.peek_whitespace().kind) {
let id = ctx.run(|ctx| self.parse_simple_value(ctx)).await?;
Bound::Excluded(id)
} else {
Bound::Unbounded
};
Ok(Some(Value::Range(Box::new(Range {
beg,
end,
}))))
}
pub async fn try_parse_inline(
pub(super) async fn try_parse_inline(
&mut self,
ctx: &mut Stk,
subject: &Value,
@ -198,7 +128,7 @@ impl Parser<'_> {
break;
}
let arg = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let arg = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
args.push(arg);
if !self.eat(t!(",")) {
@ -215,90 +145,104 @@ impl Parser<'_> {
}
}
pub fn parse_number_like_prime(&mut self) -> ParseResult<Value> {
let token = self.glue_numeric()?;
pub(super) fn parse_number_like_prime(&mut self) -> ParseResult<Value> {
let token = self.peek();
match token.kind {
TokenKind::Number(_) => self.next_token_value().map(Value::Number),
TokenKind::Duration => self.next_token_value().map(Value::Duration),
_ => unexpected!(self, token, "a value"),
TokenKind::Glued(Glued::Duration) => {
let duration = pop_glued!(self, Duration);
Ok(Value::Duration(duration))
}
TokenKind::Glued(Glued::Number) => {
let v = self.next_token_value()?;
Ok(Value::Number(v))
}
_ => {
self.pop_peek();
let value = self.lexer.lex_compound(token, compound::numeric)?;
let v = match value.value {
compound::Numeric::Number(x) => Value::Number(x),
compound::Numeric::Duration(x) => Value::Duration(Duration(x)),
};
Ok(v)
}
}
}
/// Parse an expressions
pub async fn parse_idiom_expression(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
pub(super) async fn parse_idiom_expression(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
let token = self.peek();
let value = match token.kind {
t!("..") => self.try_parse_range(ctx, None).await?.unwrap(),
t!("NONE") => {
self.pop_peek();
let value = Value::None;
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
Value::None
}
t!("NULL") => {
self.pop_peek();
let value = Value::Null;
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
Value::Null
}
t!("true") => {
self.pop_peek();
let value = Value::Bool(true);
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
Value::Bool(true)
}
t!("false") => {
self.pop_peek();
let value = Value::Bool(false);
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
Value::Bool(false)
}
t!("<") => {
self.pop_peek();
// Casting should already have been parsed.
expected!(self, t!("FUTURE"));
self.expect_closing_delimiter(t!(">"), token.span)?;
let next = expected!(self, t!("{")).span;
let block = self.parse_block(ctx, next).await?;
Value::Future(Box::new(crate::sql::Future(block)))
let peek = self.peek_whitespace();
if peek.kind == t!("-") {
self.pop_peek();
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::In)).await?;
Value::Idiom(Idiom(vec![Part::Graph(graph)]))
} else if peek.kind == t!("->") {
self.pop_peek();
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::Both)).await?;
Value::Idiom(Idiom(vec![Part::Graph(graph)]))
} else if self.eat(t!("FUTURE")) {
// Casting should already have been parsed.
self.expect_closing_delimiter(t!(">"), token.span)?;
let next = expected!(self, t!("{")).span;
let block = self.parse_block(ctx, next).await?;
Value::Future(Box::new(super::sql::Future(block)))
} else {
unexpected!(self, token, "expected either a `<-` or a future")
}
}
t!("r\"") => {
self.pop_peek();
let value = Value::Thing(self.parse_record_string(ctx, true).await?);
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
Value::Thing(self.parse_record_string(ctx, true).await?)
}
t!("r'") => {
self.pop_peek();
let value = Value::Thing(self.parse_record_string(ctx, false).await?);
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
Value::Thing(self.parse_record_string(ctx, false).await?)
}
t!("d\"") | t!("d'") => {
let value = Value::Datetime(self.next_token_value()?);
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
t!("d\"") | t!("d'") | TokenKind::Glued(Glued::Datetime) => {
Value::Datetime(self.next_token_value()?)
}
t!("u\"") | t!("u'") => {
let value = Value::Uuid(self.next_token_value()?);
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
t!("u\"") | t!("u'") | TokenKind::Glued(Glued::Uuid) => {
Value::Uuid(self.next_token_value()?)
}
t!("'") | t!("\"") | TokenKind::Strand => {
t!("'") | t!("\"") | TokenKind::Glued(Glued::Strand) => {
let s = self.next_token_value::<Strand>()?;
if self.legacy_strands {
if let Some(x) = self.reparse_legacy_strand(ctx, &s.0).await {
return Ok(x);
}
}
let value = Value::Strand(s);
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
}
t!("+") | t!("-") | TokenKind::Number(_) | TokenKind::Digits | TokenKind::Duration => {
let value = self.parse_number_like_prime()?;
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
Value::Strand(s)
}
t!("+")
| t!("-")
| TokenKind::Digits
| TokenKind::Glued(Glued::Number | Glued::Duration) => self.parse_number_like_prime()?,
TokenKind::NaN => {
self.pop_peek();
let value = Value::Number(Number::Float(f64::NAN));
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
Value::Number(Number::Float(f64::NAN))
}
t!("$param") => {
let value = Value::Param(self.next_token_value()?);
let value = self.try_parse_inline(ctx, &value).await?.unwrap_or(value);
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
self.try_parse_inline(ctx, &value).await?.unwrap_or(value)
}
t!("FUNCTION") => {
self.pop_peek();
@ -310,26 +254,14 @@ impl Parser<'_> {
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::Out)).await?;
Value::Idiom(Idiom(vec![Part::Graph(graph)]))
}
t!("<->") => {
self.pop_peek();
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::Both)).await?;
Value::Idiom(Idiom(vec![Part::Graph(graph)]))
}
t!("<-") => {
self.pop_peek();
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::In)).await?;
Value::Idiom(Idiom(vec![Part::Graph(graph)]))
}
t!("[") => {
self.pop_peek();
let value = self.parse_array(ctx, token.span).await.map(Value::Array)?;
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
self.parse_array(ctx, token.span).await.map(Value::Array)?
}
t!("{") => {
self.pop_peek();
let value = self.parse_object_like(ctx, token.span).await?;
let value = self.try_parse_inline(ctx, &value).await?.unwrap_or(value);
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
self.try_parse_inline(ctx, &value).await?.unwrap_or(value)
}
t!("|") => {
self.pop_peek();
@ -349,8 +281,7 @@ impl Parser<'_> {
t!("(") => {
self.pop_peek();
let value = self.parse_inner_subquery_or_coordinate(ctx, token.span).await?;
let value = self.try_parse_inline(ctx, &value).await?.unwrap_or(value);
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
self.try_parse_inline(ctx, &value).await?.unwrap_or(value)
}
t!("/") => self.next_token_value().map(Value::Regex)?,
t!("RETURN")
@ -373,10 +304,8 @@ impl Parser<'_> {
self.pop_peek();
self.parse_model(ctx).await.map(|x| Value::Model(Box::new(x)))?
}
_ => {
self.glue()?;
let peek = self.peek_token_at(1);
x if Self::kind_is_identifier(x) => {
let peek = self.peek1();
match peek.kind {
t!("::") | t!("(") => {
self.pop_peek();
@ -386,12 +315,8 @@ impl Parser<'_> {
let str = self.next_token_value::<Ident>()?.0;
self.parse_thing_or_range(ctx, str).await?
}
x => {
if x.has_data() {
// Pop the first identifier token so that streaming works correctly.
self.pop_peek();
unexpected!(self, peek, "a value");
} else if self.table_as_field {
_ => {
if self.table_as_field {
Value::Idiom(Idiom(vec![Part::Field(self.next_token_value()?)]))
} else {
Value::Table(self.next_token_value()?)
@ -399,10 +324,13 @@ impl Parser<'_> {
}
}
}
_ => {
unexpected!(self, token, "an expression")
}
};
// Parse the rest of the idiom if it is being continued.
if Self::continues_idiom(self.peek_kind()) {
if self.peek_continues_idiom() {
let value = match value {
Value::Idiom(Idiom(x)) => self.parse_remaining_value_idiom(ctx, x).await,
Value::Table(Table(x)) => {
@ -420,7 +348,7 @@ impl Parser<'_> {
///
/// # Parser state
/// Expects the starting `[` to already be eaten and its span passed as an argument.
pub async fn parse_array(&mut self, ctx: &mut Stk, start: Span) -> ParseResult<Array> {
pub(crate) async fn parse_array(&mut self, ctx: &mut Stk, start: Span) -> ParseResult<Array> {
let mut values = Vec::new();
enter_object_recursion!(this = self => {
loop {
@ -428,7 +356,7 @@ impl Parser<'_> {
break;
}
let value = ctx.run(|ctx| this.parse_value_field(ctx)).await?;
let value = ctx.run(|ctx| this.parse_value_inherit(ctx)).await?;
values.push(value);
if !this.eat(t!(",")) {
@ -445,7 +373,7 @@ impl Parser<'_> {
///
/// # Parser State
/// Expects the starting `|` already be eaten and its span passed as an argument.
pub fn parse_mock(&mut self, start: Span) -> ParseResult<Mock> {
pub(super) fn parse_mock(&mut self, start: Span) -> ParseResult<Mock> {
let name = self.next_token_value::<Ident>()?.0;
expected!(self, t!(":"));
let from = self.next_token_value()?;
@ -458,7 +386,7 @@ impl Parser<'_> {
}
}
pub async fn parse_closure_or_mock(
pub(super) async fn parse_closure_or_mock(
&mut self,
ctx: &mut Stk,
start: Span,
@ -469,7 +397,7 @@ impl Parser<'_> {
}
}
pub async fn parse_closure(&mut self, ctx: &mut Stk, start: Span) -> ParseResult<Value> {
pub(super) async fn parse_closure(&mut self, ctx: &mut Stk, start: Span) -> ParseResult<Value> {
let mut args = Vec::new();
loop {
if self.eat(t!("|")) {
@ -499,7 +427,7 @@ impl Parser<'_> {
self.parse_closure_after_args(ctx, args).await
}
pub async fn parse_closure_after_args(
pub(super) async fn parse_closure_after_args(
&mut self,
ctx: &mut Stk,
args: Vec<(Ident, Kind)>,
@ -510,7 +438,7 @@ impl Parser<'_> {
let body = Value::Block(Box::new(ctx.run(|ctx| self.parse_block(ctx, start)).await?));
(returns, body)
} else {
let body = ctx.run(|ctx| self.parse_value(ctx)).await?;
let body = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
(None, body)
};
@ -539,7 +467,7 @@ impl Parser<'_> {
}
}
pub async fn parse_inner_subquery_or_coordinate(
pub(super) async fn parse_inner_subquery_or_coordinate(
&mut self,
ctx: &mut Stk,
start: Span,
@ -606,11 +534,9 @@ impl Parser<'_> {
let stmt = self.parse_rebuild_stmt()?;
Subquery::Rebuild(stmt)
}
TokenKind::Digits | TokenKind::Number(_) | t!("+") | t!("-") => {
let number_token = self.glue()?;
if matches!(self.peek_kind(), TokenKind::Number(_))
&& self.peek_token_at(1).kind == t!(",")
{
TokenKind::Digits | TokenKind::Glued(Glued::Number) | t!("+") | t!("-") => {
if self.glue_and_peek1()?.kind == t!(",") {
let number_span = self.peek().span;
let number = self.next_token_value::<Number>()?;
// eat ','
self.next();
@ -618,8 +544,8 @@ impl Parser<'_> {
if matches!(number, Number::Decimal(_))
|| matches!(number, Number::Float(x) if x.is_nan())
{
bail!("Unexpected token `dec` expecte a non-decimal, non-number",
@number_token.span => "Coordinate numbers can't be NaN or a decimal");
bail!("Unexpected token, expected a non-decimal, non-NaN, number",
@number_span => "Coordinate numbers can't be NaN or a decimal");
}
let x = number.as_float();
@ -627,12 +553,12 @@ impl Parser<'_> {
self.expect_closing_delimiter(t!(")"), start)?;
return Ok(Value::Geometry(Geometry::Point(Point::from((x, y)))));
} else {
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
Subquery::Value(value)
}
}
_ => {
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
Subquery::Value(value)
}
};
@ -650,7 +576,7 @@ impl Parser<'_> {
Ok(Value::Subquery(Box::new(res)))
}
pub async fn parse_inner_subquery(
pub(super) async fn parse_inner_subquery(
&mut self,
ctx: &mut Stk,
start: Option<Span>,
@ -718,7 +644,7 @@ impl Parser<'_> {
Subquery::Rebuild(stmt)
}
_ => {
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
Subquery::Value(value)
}
};
@ -762,7 +688,11 @@ impl Parser<'_> {
/// Parses a strand with legacy rules, parsing to a record id, datetime or uuid if the string
/// matches.
pub async fn reparse_legacy_strand(&mut self, ctx: &mut Stk, text: &str) -> Option<Value> {
pub(super) async fn reparse_legacy_strand(
&mut self,
ctx: &mut Stk,
text: &str,
) -> Option<Value> {
if let Ok(x) = Parser::new(text.as_bytes()).parse_thing(ctx).await {
return Some(Value::Thing(x));
}
@ -783,7 +713,7 @@ impl Parser<'_> {
break;
}
let arg = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let arg = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
args.push(arg);
if !self.eat(t!(",")) {
@ -792,146 +722,13 @@ impl Parser<'_> {
}
}
let token = expected!(self, t!("{"));
let mut span = self.lexer.lex_compound::<token::JavaScript>(token)?.span;
let mut span = self.lexer.lex_compound(token, compound::javascript)?.span;
// remove the starting `{` and ending `}`.
span.offset += 1;
span.len -= 2;
let body = self.lexer.span_str(span);
Ok(Function::Script(Script(body.to_string()), args))
}
/// Parse a simple singular value
pub async fn parse_simple_value(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
let token = self.peek();
let value = match token.kind {
t!("NONE") => {
self.pop_peek();
let value = Value::None;
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
}
t!("NULL") => {
self.pop_peek();
let value = Value::Null;
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
}
t!("true") => {
self.pop_peek();
let value = Value::Bool(true);
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
}
t!("false") => {
self.pop_peek();
let value = Value::Bool(false);
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
}
t!("r\"") => {
self.pop_peek();
let thing = self.parse_record_string(ctx, true).await?;
Value::Thing(thing)
}
t!("r'") => {
self.pop_peek();
let thing = self.parse_record_string(ctx, false).await?;
Value::Thing(thing)
}
t!("d\"") | t!("d'") => {
let datetime = self.next_token_value()?;
Value::Datetime(datetime)
}
t!("u\"") | t!("u'") => {
let uuid = self.next_token_value()?;
Value::Uuid(uuid)
}
t!("'") | t!("\"") | TokenKind::Strand => {
let s = self.next_token_value::<Strand>()?;
if self.legacy_strands {
if let Some(x) = self.reparse_legacy_strand(ctx, &s.0).await {
return Ok(x);
}
}
Value::Strand(s)
}
t!("+") | t!("-") | TokenKind::Number(_) | TokenKind::Digits | TokenKind::Duration => {
self.parse_number_like_prime()?
}
TokenKind::NaN => {
self.pop_peek();
Value::Number(Number::Float(f64::NAN))
}
t!("$param") => {
let value = Value::Param(self.next_token_value()?);
self.try_parse_inline(ctx, &value).await?.unwrap_or(value)
}
t!("[") => {
self.pop_peek();
self.parse_array(ctx, token.span).await.map(Value::Array)?
}
t!("{") => {
self.pop_peek();
let value = self.parse_object_like(ctx, token.span).await?;
self.try_parse_inline(ctx, &value).await?.unwrap_or(value)
}
t!("(") => {
self.pop_peek();
let value = self.parse_inner_subquery_or_coordinate(ctx, token.span).await?;
self.try_parse_inline(ctx, &value).await?.unwrap_or(value)
}
_ => {
self.glue()?;
let peek = self.peek_token_at(1);
if peek.kind.has_data() {
unexpected!(self, peek, "a value");
} else if self.table_as_field {
Value::Idiom(Idiom(vec![Part::Field(self.next_token_value()?)]))
} else {
Value::Table(self.next_token_value()?)
}
}
};
Ok(value)
}
pub fn tokenkind_can_start_simple_value(t: TokenKind) -> bool {
matches!(
t,
t!("NONE")
| t!("NULL") | t!("true")
| t!("false")
| t!("r\"") | t!("r'")
| t!("d\"") | t!("d'")
| t!("u\"") | t!("u'")
| t!("\"") | t!("'")
| t!("+") | t!("-")
| TokenKind::Number(_)
| TokenKind::Digits
| TokenKind::Duration
| TokenKind::NaN
| t!("$param")
| t!("[") | t!("{")
| t!("(") | TokenKind::Keyword(_)
| TokenKind::Language(_)
| TokenKind::Algorithm(_)
| TokenKind::Distance(_)
| TokenKind::VectorType(_)
| TokenKind::Identifier
| TokenKind::Exponent
| TokenKind::DatetimeChars(_)
| TokenKind::NumberSuffix(_)
| TokenKind::DurationSuffix(
// All except Micro unicode
DurationSuffix::Nano
| DurationSuffix::Micro
| DurationSuffix::Milli
| DurationSuffix::Second
| DurationSuffix::Minute
| DurationSuffix::Hour
| DurationSuffix::Day
| DurationSuffix::Week
| DurationSuffix::Year
)
)
}
}
#[cfg(test)]

View file

@ -15,7 +15,7 @@ use crate::{
};
impl Parser<'_> {
pub async fn parse_alter_stmt(&mut self, ctx: &mut Stk) -> ParseResult<AlterStatement> {
pub(crate) async fn parse_alter_stmt(&mut self, ctx: &mut Stk) -> ParseResult<AlterStatement> {
let next = self.next();
match next.kind {
t!("TABLE") => self.parse_alter_table(ctx).await.map(AlterStatement::Table),
@ -23,7 +23,10 @@ impl Parser<'_> {
}
}
pub async fn parse_alter_table(&mut self, ctx: &mut Stk) -> ParseResult<AlterTableStatement> {
pub(crate) async fn parse_alter_table(
&mut self,
ctx: &mut Stk,
) -> ParseResult<AlterTableStatement> {
let if_exists = if self.eat(t!("IF")) {
expected!(self, t!("EXISTS"));
true

View file

@ -9,7 +9,10 @@ use crate::{
};
impl Parser<'_> {
pub async fn parse_create_stmt(&mut self, ctx: &mut Stk) -> ParseResult<CreateStatement> {
pub(crate) async fn parse_create_stmt(
&mut self,
ctx: &mut Stk,
) -> ParseResult<CreateStatement> {
let only = self.eat(t!("ONLY"));
let what = Values(self.parse_what_list(ctx).await?);
let data = self.try_parse_data(ctx).await?;

View file

@ -31,12 +31,13 @@ use crate::{
};
impl Parser<'_> {
pub async fn parse_define_stmt(&mut self, ctx: &mut Stk) -> ParseResult<DefineStatement> {
pub(crate) async fn parse_define_stmt(
&mut self,
ctx: &mut Stk,
) -> ParseResult<DefineStatement> {
let next = self.next();
match next.kind {
t!("NAMESPACE") | t!("ns") => {
self.parse_define_namespace().map(DefineStatement::Namespace)
}
t!("NAMESPACE") => self.parse_define_namespace().map(DefineStatement::Namespace),
t!("DATABASE") => self.parse_define_database().map(DefineStatement::Database),
t!("FUNCTION") => self.parse_define_function(ctx).await.map(DefineStatement::Function),
t!("USER") => self.parse_define_user().map(DefineStatement::User),
@ -59,7 +60,7 @@ impl Parser<'_> {
}
}
pub fn parse_define_namespace(&mut self) -> ParseResult<DefineNamespaceStatement> {
pub(crate) fn parse_define_namespace(&mut self) -> ParseResult<DefineNamespaceStatement> {
let (if_not_exists, overwrite) = if self.eat(t!("IF")) {
expected!(self, t!("NOT"));
expected!(self, t!("EXISTS"));
@ -330,12 +331,12 @@ impl Parser<'_> {
t!("SIGNUP") => {
self.pop_peek();
ac.signup =
Some(stk.run(|stk| self.parse_value(stk)).await?);
Some(stk.run(|stk| self.parse_value_table(stk)).await?);
}
t!("SIGNIN") => {
self.pop_peek();
ac.signin =
Some(stk.run(|stk| self.parse_value(stk)).await?);
Some(stk.run(|stk| self.parse_value_table(stk)).await?);
}
_ => break,
}
@ -370,7 +371,7 @@ impl Parser<'_> {
}
t!("AUTHENTICATE") => {
self.pop_peek();
res.authenticate = Some(stk.run(|stk| self.parse_value(stk)).await?);
res.authenticate = Some(stk.run(|stk| self.parse_value_table(stk)).await?);
}
t!("DURATION") => {
self.pop_peek();
@ -576,11 +577,11 @@ impl Parser<'_> {
}
t!("SIGNUP") => {
self.pop_peek();
ac.signup = Some(stk.run(|stk| self.parse_value(stk)).await?);
ac.signup = Some(stk.run(|stk| self.parse_value_table(stk)).await?);
}
t!("SIGNIN") => {
self.pop_peek();
ac.signin = Some(stk.run(|stk| self.parse_value(stk)).await?);
ac.signin = Some(stk.run(|stk| self.parse_value_table(stk)).await?);
}
_ => break,
}
@ -614,7 +615,7 @@ impl Parser<'_> {
match self.peek_kind() {
t!("VALUE") => {
self.pop_peek();
res.value = ctx.run(|ctx| self.parse_value(ctx)).await?;
res.value = ctx.run(|ctx| self.parse_value_table(ctx)).await?;
}
t!("COMMENT") => {
self.pop_peek();
@ -753,13 +754,13 @@ impl Parser<'_> {
match self.peek_kind() {
t!("WHEN") => {
self.pop_peek();
res.when = ctx.run(|ctx| self.parse_value(ctx)).await?;
res.when = ctx.run(|ctx| self.parse_value_table(ctx)).await?;
}
t!("THEN") => {
self.pop_peek();
res.then = Values(vec![ctx.run(|ctx| self.parse_value(ctx)).await?]);
res.then = Values(vec![ctx.run(|ctx| self.parse_value_table(ctx)).await?]);
while self.eat(t!(",")) {
res.then.0.push(ctx.run(|ctx| self.parse_value(ctx)).await?)
res.then.0.push(ctx.run(|ctx| self.parse_value_table(ctx)).await?)
}
}
t!("COMMENT") => {
@ -812,15 +813,15 @@ impl Parser<'_> {
}
t!("VALUE") => {
self.pop_peek();
res.value = Some(ctx.run(|ctx| self.parse_value(ctx)).await?);
res.value = Some(ctx.run(|ctx| self.parse_value_field(ctx)).await?);
}
t!("ASSERT") => {
self.pop_peek();
res.assert = Some(ctx.run(|ctx| self.parse_value(ctx)).await?);
res.assert = Some(ctx.run(|ctx| self.parse_value_field(ctx)).await?);
}
t!("DEFAULT") => {
self.pop_peek();
res.default = Some(ctx.run(|ctx| self.parse_value(ctx)).await?);
res.default = Some(ctx.run(|ctx| self.parse_value_field(ctx)).await?);
}
t!("PERMISSIONS") => {
self.pop_peek();

View file

@ -9,7 +9,10 @@ use crate::{
};
impl Parser<'_> {
pub async fn parse_delete_stmt(&mut self, ctx: &mut Stk) -> ParseResult<DeleteStatement> {
pub(crate) async fn parse_delete_stmt(
&mut self,
ctx: &mut Stk,
) -> ParseResult<DeleteStatement> {
self.eat(t!("FROM"));
let only = self.eat(t!("ONLY"));
let what = Values(self.parse_what_list(ctx).await?);

View file

@ -12,8 +12,8 @@ use crate::{
};
impl Parser<'_> {
pub async fn parse_if_stmt(&mut self, ctx: &mut Stk) -> ParseResult<IfelseStatement> {
let condition = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
pub(crate) async fn parse_if_stmt(&mut self, ctx: &mut Stk) -> ParseResult<IfelseStatement> {
let condition = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
let mut res = IfelseStatement {
exprs: Vec::new(),
@ -23,7 +23,7 @@ impl Parser<'_> {
let next = self.next();
match next.kind {
t!("THEN") => {
let body = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let body = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
self.eat(t!(";"));
res.exprs.push((condition, body));
self.parse_worded_tail(ctx, &mut res).await?;
@ -50,13 +50,13 @@ impl Parser<'_> {
t!("END") => return Ok(()),
t!("ELSE") => {
if self.eat(t!("IF")) {
let condition = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let condition = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
expected!(self, t!("THEN"));
let body = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let body = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
self.eat(t!(";"));
res.exprs.push((condition, body));
} else {
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
self.eat(t!(";"));
expected!(self, t!("END"));
res.close = Some(value);
@ -78,7 +78,7 @@ impl Parser<'_> {
t!("ELSE") => {
self.pop_peek();
if self.eat(t!("IF")) {
let condition = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let condition = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
let span = expected!(self, t!("{")).span;
let body = self.parse_block(ctx, span).await?;
res.exprs.push((condition, body.into()));

View file

@ -1,8 +1,9 @@
use reblessive::Stk;
use crate::{
sql::{statements::InsertStatement, Data, Value},
sql::{statements::InsertStatement, Data, Idiom, Subquery, Value},
syn::{
error::bail,
parser::{mac::expected, ParseResult, Parser},
token::t,
},
@ -31,43 +32,7 @@ impl Parser<'_> {
None
};
let data = match self.peek_kind() {
t!("(") => {
let start = self.pop_peek().span;
let fields = self.parse_idiom_list(ctx).await?;
self.expect_closing_delimiter(t!(")"), start)?;
expected!(self, t!("VALUES"));
let start = expected!(self, t!("(")).span;
let mut values = vec![ctx.run(|ctx| self.parse_value(ctx)).await?];
while self.eat(t!(",")) {
values.push(ctx.run(|ctx| self.parse_value(ctx)).await?);
}
self.expect_closing_delimiter(t!(")"), start)?;
let mut values = vec![values];
while self.eat(t!(",")) {
let start = expected!(self, t!("(")).span;
let mut inner_values = vec![ctx.run(|ctx| self.parse_value(ctx)).await?];
while self.eat(t!(",")) {
inner_values.push(ctx.run(|ctx| self.parse_value(ctx)).await?);
}
values.push(inner_values);
self.expect_closing_delimiter(t!(")"), start)?;
}
Data::ValuesExpression(
values
.into_iter()
.map(|row| fields.iter().cloned().zip(row).collect())
.collect(),
)
}
_ => {
let value = ctx.run(|ctx| self.parse_value(ctx)).await?;
Data::SingleExpression(value)
}
};
let data = self.parse_insert_values(ctx).await?;
let update = if self.eat(t!("ON")) {
Some(self.parse_insert_update(ctx).await?)
@ -91,19 +56,122 @@ impl Parser<'_> {
})
}
fn extract_idiom(subquery: Subquery) -> Option<Idiom> {
let Subquery::Value(Value::Idiom(idiom)) = subquery else {
return None;
};
Some(idiom)
}
async fn parse_insert_values(&mut self, ctx: &mut Stk) -> ParseResult<Data> {
let token = self.peek();
// not a `(` so it cant be `(a,b) VALUES (c,d)`
if token.kind != t!("(") {
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
return Ok(Data::SingleExpression(value));
}
// might still be a subquery `(select foo from ...`
self.pop_peek();
let before = self.peek().span;
let backup = self.table_as_field;
self.table_as_field = true;
let subquery = self.parse_inner_subquery(ctx, None).await?;
self.table_as_field = backup;
let subquery_span = before.covers(self.last_span());
let mut idioms = Vec::new();
let select_span = if !self.eat(t!(",")) {
// not a comma so it might be a single (a) VALUES (b) or a subquery
self.expect_closing_delimiter(t!(")"), token.span)?;
let select_span = token.span.covers(self.last_span());
if !self.eat(t!("VALUES")) {
// found a subquery
return Ok(Data::SingleExpression(Value::Subquery(Box::new(subquery))));
}
// found an values expression, so subquery must be an idiom
let Some(idiom) = Self::extract_idiom(subquery) else {
bail!("Invalid value, expected an idiom in INSERT VALUES statement.",
@subquery_span => "Here only idioms are allowed")
};
idioms.push(idiom);
select_span
} else {
// found an values expression, so subquery must be an idiom
let Some(idiom) = Self::extract_idiom(subquery) else {
bail!("Invalid value, expected an idiom in INSERT VALUES statement.",
@subquery_span => "Here only idioms are allowed")
};
idioms.push(idiom);
loop {
idioms.push(self.parse_plain_idiom(ctx).await?);
if !self.eat(t!(",")) {
break;
}
}
self.expect_closing_delimiter(t!(")"), token.span)?;
expected!(self, t!("VALUES"));
token.span.covers(self.last_span())
};
let mut insertions = Vec::new();
loop {
let mut values = Vec::new();
let start = expected!(self, t!("(")).span;
loop {
values.push(self.parse_value_table(ctx).await?);
if !self.eat(t!(",")) {
break;
}
}
self.expect_closing_delimiter(t!(")"), start)?;
let span = start.covers(self.last_span());
if values.len() != idioms.len() {
bail!("Invalid numbers of values to insert, found {} value(s) but selector requires {} value(s).",
values.len(), idioms.len(),
@span,
@select_span => "This selector has {} field(s)",idioms.len()
);
}
insertions.push(values);
if !self.eat(t!(",")) {
break;
}
}
Ok(Data::ValuesExpression(
insertions.into_iter().map(|row| idioms.iter().cloned().zip(row).collect()).collect(),
))
}
async fn parse_insert_update(&mut self, ctx: &mut Stk) -> ParseResult<Data> {
expected!(self, t!("DUPLICATE"));
expected!(self, t!("KEY"));
expected!(self, t!("UPDATE"));
let l = self.parse_plain_idiom(ctx).await?;
let o = self.parse_assigner()?;
let r = ctx.run(|ctx| self.parse_value(ctx)).await?;
let r = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let mut data = vec![(l, o, r)];
while self.eat(t!(",")) {
let l = self.parse_plain_idiom(ctx).await?;
let o = self.parse_assigner()?;
let r = ctx.run(|ctx| self.parse_value(ctx)).await?;
let r = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
data.push((l, o, r))
}

View file

@ -12,8 +12,9 @@ use crate::sql::statements::{
KillStatement, LiveStatement, OptionStatement, SetStatement, ThrowStatement,
};
use crate::sql::{Fields, Ident, Param};
use crate::syn::lexer::compound;
use crate::syn::parser::enter_query_recursion;
use crate::syn::token::{t, TokenKind};
use crate::syn::token::{t, Glued, TokenKind};
use crate::{
sql::{
statements::{
@ -42,7 +43,7 @@ mod update;
mod upsert;
impl Parser<'_> {
pub async fn parse_stmt_list(&mut self, ctx: &mut Stk) -> ParseResult<Statements> {
pub(super) async fn parse_stmt_list(&mut self, ctx: &mut Stk) -> ParseResult<Statements> {
let mut res = Vec::new();
loop {
match self.peek_kind() {
@ -61,7 +62,7 @@ impl Parser<'_> {
}
let token = self.peek();
if Self::token_kind_starts_statement(token.kind) {
if Self::kind_starts_statement(token.kind) {
// user likely forgot a semicolon.
unexpected!(self,token,"the query to end", => "maybe forgot a semicolon after the previous statement?");
}
@ -74,38 +75,6 @@ impl Parser<'_> {
Ok(Statements(res))
}
fn token_kind_starts_statement(kind: TokenKind) -> bool {
matches!(
kind,
t!("ACCESS")
| t!("ALTER")
| t!("ANALYZE")
| t!("BEGIN")
| t!("BREAK")
| t!("CANCEL")
| t!("COMMIT")
| t!("CONTINUE")
| t!("CREATE")
| t!("DEFINE")
| t!("DELETE")
| t!("FOR") | t!("IF")
| t!("INFO") | t!("INSERT")
| t!("KILL") | t!("LIVE")
| t!("OPTION")
| t!("REBUILD")
| t!("RETURN")
| t!("RELATE")
| t!("REMOVE")
| t!("SELECT")
| t!("LET") | t!("SHOW")
| t!("SLEEP")
| t!("THROW")
| t!("UPDATE")
| t!("UPSERT")
| t!("USE")
)
}
pub(super) async fn parse_stmt(&mut self, ctx: &mut Stk) -> ParseResult<Statement> {
enter_query_recursion!(this = self => {
this.parse_stmt_inner(ctx).await
@ -245,7 +214,7 @@ impl Parser<'_> {
}
_ => {
// TODO: Provide information about keywords.
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let value = ctx.run(|ctx| self.parse_value_table(ctx)).await?;
Ok(Self::refine_stmt_value(value))
}
}
@ -334,7 +303,7 @@ impl Parser<'_> {
}
_ => {
// TODO: Provide information about keywords.
let v = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let v = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
Ok(Self::refine_entry_value(v))
}
}
@ -474,7 +443,7 @@ impl Parser<'_> {
fn parse_use_stmt(&mut self) -> ParseResult<UseStatement> {
let peek = self.peek();
let (ns, db) = match peek.kind {
t!("NAMESPACE") | t!("ns") => {
t!("NAMESPACE") => {
self.pop_peek();
let ns = self.next_token_value::<Ident>()?.0;
let db = self
@ -502,10 +471,10 @@ impl Parser<'_> {
///
/// # Parser State
/// Expects `FOR` to already be consumed.
pub async fn parse_for_stmt(&mut self, stk: &mut Stk) -> ParseResult<ForeachStatement> {
pub(super) async fn parse_for_stmt(&mut self, stk: &mut Stk) -> ParseResult<ForeachStatement> {
let param = self.next_token_value()?;
expected!(self, t!("IN"));
let range = stk.run(|stk| self.parse_value(stk)).await?;
let range = stk.run(|stk| self.parse_value_inherit(stk)).await?;
let span = expected!(self, t!("{")).span;
let block = self.parse_block(stk, span).await?;
@ -520,12 +489,12 @@ impl Parser<'_> {
///
/// # Parser State
/// Expects `INFO` to already be consumed.
pub(crate) fn parse_info_stmt(&mut self) -> ParseResult<InfoStatement> {
pub(super) fn parse_info_stmt(&mut self) -> ParseResult<InfoStatement> {
expected!(self, t!("FOR"));
let next = self.next();
let mut stmt = match next.kind {
t!("ROOT") => InfoStatement::Root(false),
t!("NAMESPACE") | t!("ns") => InfoStatement::Ns(false),
t!("NAMESPACE") => InfoStatement::Ns(false),
t!("DATABASE") => InfoStatement::Db(false, None),
t!("TABLE") => {
let ident = self.next_token_value()?;
@ -561,10 +530,12 @@ impl Parser<'_> {
///
/// # Parser State
/// Expects `KILL` to already be consumed.
pub(crate) fn parse_kill_stmt(&mut self) -> ParseResult<KillStatement> {
pub(super) fn parse_kill_stmt(&mut self) -> ParseResult<KillStatement> {
let peek = self.peek();
let id = match peek.kind {
t!("u\"") | t!("u'") => self.next_token_value().map(Value::Uuid)?,
t!("u\"") | t!("u'") | TokenKind::Glued(Glued::Uuid) => {
self.next_token_value().map(Value::Uuid)?
}
t!("$param") => self.next_token_value().map(Value::Param)?,
_ => unexpected!(self, peek, "a UUID or a parameter"),
};
@ -577,7 +548,7 @@ impl Parser<'_> {
///
/// # Parser State
/// Expects `LIVE` to already be consumed.
pub(crate) async fn parse_live_stmt(&mut self, stk: &mut Stk) -> ParseResult<LiveStatement> {
pub(super) async fn parse_live_stmt(&mut self, stk: &mut Stk) -> ParseResult<LiveStatement> {
expected!(self, t!("SELECT"));
let expr = match self.peek_kind() {
@ -602,7 +573,7 @@ impl Parser<'_> {
///
/// # Parser State
/// Expects `OPTION` to already be consumed.
pub(crate) fn parse_option_stmt(&mut self) -> ParseResult<OptionStatement> {
pub(super) fn parse_option_stmt(&mut self) -> ParseResult<OptionStatement> {
let name = self.next_token_value()?;
let what = if self.eat(t!("=")) {
let next = self.next();
@ -620,7 +591,7 @@ impl Parser<'_> {
})
}
pub fn parse_rebuild_stmt(&mut self) -> ParseResult<RebuildStatement> {
pub(super) fn parse_rebuild_stmt(&mut self) -> ParseResult<RebuildStatement> {
let next = self.next();
let res = match next.kind {
t!("INDEX") => {
@ -650,11 +621,11 @@ impl Parser<'_> {
///
/// # Parser State
/// Expects `RETURN` to already be consumed.
pub(crate) async fn parse_return_stmt(
pub(super) async fn parse_return_stmt(
&mut self,
ctx: &mut Stk,
) -> ParseResult<OutputStatement> {
let what = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
let what = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
let fetch = self.try_parse_fetch(ctx).await?;
Ok(OutputStatement {
what,
@ -671,7 +642,7 @@ impl Parser<'_> {
///
/// # Parser State
/// Expects `LET` to already be consumed.
pub(crate) async fn parse_let_stmt(&mut self, ctx: &mut Stk) -> ParseResult<SetStatement> {
pub(super) async fn parse_let_stmt(&mut self, ctx: &mut Stk) -> ParseResult<SetStatement> {
let name = self.next_token_value::<Param>()?.0 .0;
let kind = if self.eat(t!(":")) {
Some(self.parse_inner_kind(ctx).await?)
@ -679,7 +650,7 @@ impl Parser<'_> {
None
};
expected!(self, t!("="));
let what = self.parse_value(ctx).await?;
let what = self.parse_value_inherit(ctx).await?;
Ok(SetStatement {
name,
what,
@ -691,7 +662,7 @@ impl Parser<'_> {
///
/// # Parser State
/// Expects `SHOW` to already be consumed.
pub(crate) fn parse_show_stmt(&mut self) -> ParseResult<ShowStatement> {
pub(super) fn parse_show_stmt(&mut self) -> ParseResult<ShowStatement> {
expected!(self, t!("CHANGES"));
expected!(self, t!("FOR"));
@ -709,10 +680,17 @@ impl Parser<'_> {
let next = self.peek();
let since = match next.kind {
TokenKind::Digits | TokenKind::Number(_) => {
ShowSince::Versionstamp(self.next_token_value()?)
TokenKind::Digits => {
self.pop_peek();
let int = self.lexer.lex_compound(next, compound::integer)?.value;
ShowSince::Versionstamp(int)
}
t!("d\"") | t!("d'") => ShowSince::Timestamp(self.next_token_value()?),
TokenKind::Glued(_) => {
// This panic can be upheld within this function, just make sure you don't call
// glue here and the `next()` before this peek should eat any glued value.
panic!("A glued number token would truncate the timestamp so no gluing is allowed before this production.");
}
_ => unexpected!(self, next, "a version stamp or a date-time"),
};
@ -729,7 +707,7 @@ impl Parser<'_> {
///
/// # Parser State
/// Expects `SLEEP` to already be consumed.
pub(crate) fn parse_sleep_stmt(&mut self) -> ParseResult<SleepStatement> {
pub(super) fn parse_sleep_stmt(&mut self) -> ParseResult<SleepStatement> {
let duration = self.next_token_value()?;
Ok(SleepStatement {
duration,
@ -740,8 +718,8 @@ impl Parser<'_> {
///
/// # Parser State
/// Expects `THROW` to already be consumed.
pub(crate) async fn parse_throw_stmt(&mut self, ctx: &mut Stk) -> ParseResult<ThrowStatement> {
let error = self.parse_value_field(ctx).await?;
pub(super) async fn parse_throw_stmt(&mut self, ctx: &mut Stk) -> ParseResult<ThrowStatement> {
let error = self.parse_value_inherit(ctx).await?;
Ok(ThrowStatement {
error,
})

View file

@ -37,7 +37,7 @@ impl Parser<'_> {
loop {
let idiom = self.parse_plain_idiom(ctx).await?;
let operator = self.parse_assigner()?;
let value = ctx.run(|ctx| self.parse_value(ctx)).await?;
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
set_list.push((idiom, operator, value));
if !self.eat(t!(",")) {
break;
@ -52,19 +52,19 @@ impl Parser<'_> {
}
t!("PATCH") => {
self.pop_peek();
Data::PatchExpression(ctx.run(|ctx| self.parse_value(ctx)).await?)
Data::PatchExpression(ctx.run(|ctx| self.parse_value_field(ctx)).await?)
}
t!("MERGE") => {
self.pop_peek();
Data::MergeExpression(ctx.run(|ctx| self.parse_value(ctx)).await?)
Data::MergeExpression(ctx.run(|ctx| self.parse_value_field(ctx)).await?)
}
t!("REPLACE") => {
self.pop_peek();
Data::ReplaceExpression(ctx.run(|ctx| self.parse_value(ctx)).await?)
Data::ReplaceExpression(ctx.run(|ctx| self.parse_value_field(ctx)).await?)
}
t!("CONTENT") => {
self.pop_peek();
Data::ContentExpression(ctx.run(|ctx| self.parse_value(ctx)).await?)
Data::ContentExpression(ctx.run(|ctx| self.parse_value_field(ctx)).await?)
}
_ => return Ok(None),
};
@ -378,7 +378,7 @@ impl Parser<'_> {
pub fn parse_base(&mut self, scope_allowed: bool) -> ParseResult<Base> {
let next = self.next();
match next.kind {
t!("NAMESPACE") | t!("ns") => Ok(Base::Ns),
t!("NAMESPACE") => Ok(Base::Ns),
t!("DATABASE") => Ok(Base::Db),
t!("ROOT") => Ok(Base::Root),
t!("SCOPE") => {
@ -444,28 +444,26 @@ impl Parser<'_> {
})
}
pub fn convert_distance(&mut self, k: &DistanceKind) -> ParseResult<Distance> {
let dist = match k {
DistanceKind::Chebyshev => Distance::Chebyshev,
DistanceKind::Cosine => Distance::Cosine,
DistanceKind::Euclidean => Distance::Euclidean,
DistanceKind::Manhattan => Distance::Manhattan,
DistanceKind::Hamming => Distance::Hamming,
DistanceKind::Jaccard => Distance::Jaccard,
DistanceKind::Minkowski => {
let distance = self.next_token_value()?;
Distance::Minkowski(distance)
}
DistanceKind::Pearson => Distance::Pearson,
};
Ok(dist)
}
pub fn parse_distance(&mut self) -> ParseResult<Distance> {
let next = self.next();
match next.kind {
TokenKind::Distance(k) => self.convert_distance(&k),
TokenKind::Distance(k) => {
let dist = match k {
DistanceKind::Chebyshev => Distance::Chebyshev,
DistanceKind::Cosine => Distance::Cosine,
DistanceKind::Euclidean => Distance::Euclidean,
DistanceKind::Manhattan => Distance::Manhattan,
DistanceKind::Hamming => Distance::Hamming,
DistanceKind::Jaccard => Distance::Jaccard,
DistanceKind::Minkowski => {
let distance = self.next_token_value()?;
Distance::Minkowski(distance)
}
DistanceKind::Pearson => Distance::Pearson,
};
Ok(dist)
}
_ => unexpected!(self, next, "a distance measure"),
}
}

View file

@ -4,7 +4,7 @@ use crate::{
sql::{statements::RelateStatement, Subquery, Value},
syn::{
parser::{
mac::{expected, unexpected},
mac::{expected, expected_whitespace, unexpected},
ParseResult, Parser,
},
token::t,
@ -39,14 +39,18 @@ impl Parser<'_> {
let next = self.next();
let is_o = match next.kind {
t!("->") => true,
t!("<-") => false,
t!("<") => {
expected_whitespace!(self, t!("-"));
false
}
_ => unexpected!(self, next, "a relation arrow"),
};
let kind = self.parse_relate_kind(stk).await?;
if is_o {
expected!(self, t!("->"))
expected!(self, t!("->"));
} else {
expected!(self, t!("<-"))
expected!(self, t!("<"));
expected_whitespace!(self, t!("-"));
};
let second = self.parse_relate_value(stk).await?;
if is_o {
@ -109,8 +113,7 @@ impl Parser<'_> {
}
pub async fn parse_thing_or_table(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
self.glue()?;
if self.peek_token_at(1).kind == t!(":") {
if self.peek_whitespace1().kind == t!(":") {
self.parse_thing(ctx).await.map(Value::Thing)
} else {
self.next_token_value().map(Value::Table)

View file

@ -23,7 +23,7 @@ impl Parser<'_> {
pub async fn parse_remove_stmt(&mut self, ctx: &mut Stk) -> ParseResult<RemoveStatement> {
let next = self.next();
let res = match next.kind {
t!("NAMESPACE") | t!("ns") => {
t!("NAMESPACE") => {
let if_exists = if self.eat(t!("IF")) {
expected!(self, t!("EXISTS"));
true

View file

@ -35,9 +35,9 @@ impl Parser<'_> {
let only = self.eat(t!("ONLY"));
let mut what = vec![stk.run(|ctx| self.parse_value(ctx)).await?];
let mut what = vec![stk.run(|ctx| self.parse_value_table(ctx)).await?];
while self.eat(t!(",")) {
what.push(stk.run(|ctx| self.parse_value(ctx)).await?);
what.push(stk.run(|ctx| self.parse_value_table(ctx)).await?);
}
let what = Values(what);
@ -217,7 +217,7 @@ impl Parser<'_> {
return Ok(None);
}
self.eat(t!("BY"));
let value = ctx.run(|ctx| self.parse_value(ctx)).await?;
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
Ok(Some(Limit(value)))
}
@ -226,7 +226,7 @@ impl Parser<'_> {
return Ok(None);
}
self.eat(t!("AT"));
let value = ctx.run(|ctx| self.parse_value(ctx)).await?;
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
Ok(Some(Start(value)))
}

View file

@ -0,0 +1,11 @@
use crate::syn::parser::mac::test_parse;
#[test]
fn object_with_negative() {
test_parse!(parse_json, r#"{"foo": -1 }"#).unwrap();
}
#[test]
fn array_with_negative() {
test_parse!(parse_json, r#"[-1]"#).unwrap();
}

View file

@ -3,6 +3,7 @@ use crate::{
syn::parser::mac::test_parse,
};
mod json;
mod limit;
mod stmt;
mod streaming;
@ -58,6 +59,20 @@ fn escaped_params() {
test_parse!(parse_query, src).unwrap();
}
#[test]
fn missed_qoute_caused_panic() {
let src = r#"{"id:0,"method":"query","params"["SLEEP 30s"]}"#;
test_parse!(parse_query, src).unwrap_err();
}
#[test]
fn query_object() {
let src = r#"{"id":0,"method":"query","params":["SLEEP 30s"]}"#;
test_parse!(parse_query, src).inspect_err(|e| eprintln!("{}", e.render_on(src))).unwrap();
}
#[test]
fn escaped_params_backtick() {
test_parse!(

View file

@ -90,7 +90,7 @@ pub fn parse_continue() {
fn parse_create() {
let res = test_parse!(
parse_stmt,
"CREATE ONLY foo SET bar = 3, foo +?= 4 RETURN VALUE foo AS bar TIMEOUT 1s PARALLEL"
"CREATE ONLY foo SET bar = 3, foo +?= baz RETURN VALUE foo AS bar TIMEOUT 1s PARALLEL"
)
.unwrap();
assert_eq!(
@ -107,7 +107,7 @@ fn parse_create() {
(
Idiom(vec![Part::Field(Ident("foo".to_owned()))]),
Operator::Ext,
Value::Number(Number::Int(4))
Value::Idiom(Idiom(vec![Part::Field(Ident("baz".to_owned()))]))
),
])),
output: Some(Output::Fields(Fields(
@ -205,7 +205,7 @@ fn parse_define_function() {
(Ident("b".to_string()), Kind::Array(Box::new(Kind::Bool), Some(3)))
],
block: Block(vec![Entry::Output(OutputStatement {
what: Value::Idiom(Idiom(vec![Part::Field(Ident("a".to_string()))])),
what: Value::Table(Table("a".to_string())),
fetch: None,
})]),
comment: Some(Strand("test".to_string())),
@ -1709,16 +1709,10 @@ fn parse_if() {
res,
Statement::Ifelse(IfelseStatement {
exprs: vec![
(
Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])),
Value::Idiom(Idiom(vec![Part::Field(Ident("bar".to_owned()))]))
),
(
Value::Idiom(Idiom(vec![Part::Field(Ident("faz".to_owned()))])),
Value::Idiom(Idiom(vec![Part::Field(Ident("baz".to_owned()))]))
)
(Value::Table(Table("foo".to_owned())), Value::Table(Table("bar".to_owned()))),
(Value::Table(Table("faz".to_owned())), Value::Table(Table("baz".to_owned())))
],
close: Some(Value::Idiom(Idiom(vec![Part::Field(Ident("baq".to_owned()))])))
close: Some(Value::Table(Table("baq".to_owned())))
})
)
}
@ -1732,20 +1726,20 @@ fn parse_if_block() {
Statement::Ifelse(IfelseStatement {
exprs: vec![
(
Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])),
Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(vec![
Part::Field(Ident("bar".to_owned()))
])))]))),
Value::Table(Table("foo".to_owned())),
Value::Block(Box::new(Block(vec![Entry::Value(Value::Table(Table(
"bar".to_owned()
)),)]))),
),
(
Value::Idiom(Idiom(vec![Part::Field(Ident("faz".to_owned()))])),
Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(vec![
Part::Field(Ident("baz".to_owned()))
])))]))),
Value::Table(Table("faz".to_owned())),
Value::Block(Box::new(Block(vec![Entry::Value(Value::Table(Table(
"baz".to_owned()
)),)]))),
)
],
close: Some(Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(
vec![Part::Field(Ident("baq".to_owned()))]
close: Some(Value::Block(Box::new(Block(vec![Entry::Value(Value::Table(Table(
"baq".to_owned()
)))])))),
})
)
@ -2078,6 +2072,51 @@ fn parse_insert() {
)
}
#[test]
fn parse_insert_select() {
let res = test_parse!(parse_stmt, r#"INSERT IGNORE INTO bar (select foo from baz)"#).unwrap();
assert_eq!(
res,
Statement::Insert(InsertStatement {
into: Some(Value::Table(Table("bar".to_owned()))),
data: Data::SingleExpression(Value::Subquery(Box::new(Subquery::Select(
SelectStatement {
expr: Fields(
vec![Field::Single {
expr: Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_string()))])),
alias: None
}],
false
),
omit: None,
only: false,
what: Values(vec![Value::Table(Table("baz".to_string()))]),
with: None,
cond: None,
split: None,
group: None,
order: None,
limit: None,
start: None,
fetch: None,
version: None,
timeout: None,
parallel: false,
explain: None,
tempfiles: false
}
)))),
ignore: true,
update: None,
output: None,
version: None,
timeout: None,
parallel: false,
relation: false,
}),
)
}
#[test]
fn parse_kill() {
let res = test_parse!(parse_stmt, r#"KILL $param"#).unwrap();
@ -2154,7 +2193,7 @@ fn parse_return() {
assert_eq!(
res,
Statement::Output(OutputStatement {
what: Value::Idiom(Idiom(vec![Part::Field(Ident("RETRUN".to_owned()))])),
what: Value::Table(Table("RETRUN".to_owned())),
fetch: Some(Fetchs(vec![Fetch(Value::Idiom(Idiom(vec![Part::Field(
Ident("RETURN".to_owned()).to_owned()
)])))])),

View file

@ -192,7 +192,7 @@ fn statements() -> Vec<Statement> {
(Ident("b".to_string()), Kind::Array(Box::new(Kind::Bool), Some(3))),
],
block: Block(vec![Entry::Output(OutputStatement {
what: Value::Idiom(Idiom(vec![Part::Field(Ident("a".to_string()))])),
what: Value::Table(Table("a".to_string())),
fetch: None,
})]),
comment: Some(Strand("test".to_string())),
@ -440,34 +440,28 @@ fn statements() -> Vec<Statement> {
}),
Statement::Ifelse(IfelseStatement {
exprs: vec![
(
Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])),
Value::Idiom(Idiom(vec![Part::Field(Ident("bar".to_owned()))])),
),
(
Value::Idiom(Idiom(vec![Part::Field(Ident("faz".to_owned()))])),
Value::Idiom(Idiom(vec![Part::Field(Ident("baz".to_owned()))])),
),
(Value::Table(Table("foo".to_owned())), Value::Table(Table("bar".to_owned()))),
(Value::Table(Table("faz".to_owned())), Value::Table(Table("baz".to_owned()))),
],
close: Some(Value::Idiom(Idiom(vec![Part::Field(Ident("baq".to_owned()))]))),
close: Some(Value::Table(Table("baq".to_owned()))),
}),
Statement::Ifelse(IfelseStatement {
exprs: vec![
(
Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])),
Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(vec![
Part::Field(Ident("bar".to_owned())),
])))]))),
Value::Table(Table("foo".to_owned())),
Value::Block(Box::new(Block(vec![Entry::Value(Value::Table(Table(
"bar".to_owned(),
)))]))),
),
(
Value::Idiom(Idiom(vec![Part::Field(Ident("faz".to_owned()))])),
Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(vec![
Part::Field(Ident("baz".to_owned())),
])))]))),
Value::Table(Table("faz".to_owned())),
Value::Block(Box::new(Block(vec![Entry::Value(Value::Table(Table(
"baz".to_owned(),
)))]))),
),
],
close: Some(Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(
vec![Part::Field(Ident("baq".to_owned()))],
close: Some(Value::Block(Box::new(Block(vec![Entry::Value(Value::Table(Table(
"baq".to_owned(),
)))])))),
}),
Statement::Info(InfoStatement::Root(false)),
@ -612,7 +606,7 @@ fn statements() -> Vec<Statement> {
id: Value::Uuid(Uuid(uuid::uuid!("e72bee20-f49b-11ec-b939-0242ac120002"))),
}),
Statement::Output(OutputStatement {
what: Value::Idiom(Idiom(vec![Part::Field(Ident("RETRUN".to_owned()))])),
what: Value::Table(Table("RETRUN".to_owned())),
fetch: Some(Fetchs(vec![Fetch(Value::Idiom(Idiom(vec![Part::Field(
Ident("RETURN".to_owned()).to_owned(),
)])))])),

View file

@ -11,12 +11,17 @@ use crate::{
#[test]
fn parse_coordinate() {
test_parse!(parse_value, "(1.88, -18.0)").unwrap();
test_parse!(parse_value_table, "(1.88, -18.0)").unwrap();
}
#[test]
fn parse_like_operator() {
test_parse!(parse_value, "a ~ b").unwrap();
test_parse!(parse_value_table, "a ~ b").unwrap();
}
#[test]
fn parse_range_operator() {
test_parse!(parse_value_table, "1..2").unwrap();
}
#[test]
@ -88,7 +93,7 @@ fn parse_large_depth_record_id() {
#[test]
fn parse_recursive_record_string() {
let res = test_parse!(parse_value, r#" r"a:[r"b:{c: r"d:1"}"]" "#).unwrap();
let res = test_parse!(parse_value_table, r#" r"a:[r"b:{c: r"d:1"}"]" "#).unwrap();
assert_eq!(
res,
Value::Thing(Thing {
@ -109,7 +114,7 @@ fn parse_recursive_record_string() {
#[test]
fn parse_record_string_2() {
let res = test_parse!(parse_value, r#" r'a:["foo"]' "#).unwrap();
let res = test_parse!(parse_value_table, r#" r'a:["foo"]' "#).unwrap();
assert_eq!(
res,
Value::Thing(Thing {
@ -121,64 +126,69 @@ fn parse_record_string_2() {
#[test]
fn parse_i64() {
let res = test_parse!(parse_value, r#" -9223372036854775808 "#).unwrap();
let res = test_parse!(parse_value_table, r#" -9223372036854775808 "#).unwrap();
assert_eq!(res, Value::Number(Number::Int(i64::MIN)));
let res = test_parse!(parse_value, r#" 9223372036854775807 "#).unwrap();
let res = test_parse!(parse_value_table, r#" 9223372036854775807 "#).unwrap();
assert_eq!(res, Value::Number(Number::Int(i64::MAX)));
}
#[test]
fn constant_lowercase() {
let out = test_parse!(parse_value, r#" math::pi "#).unwrap();
let out = test_parse!(parse_value_table, r#" math::pi "#).unwrap();
assert_eq!(out, Value::Constant(Constant::MathPi));
let out = test_parse!(parse_value, r#" math::inf "#).unwrap();
let out = test_parse!(parse_value_table, r#" math::inf "#).unwrap();
assert_eq!(out, Value::Constant(Constant::MathInf));
let out = test_parse!(parse_value, r#" math::neg_inf "#).unwrap();
let out = test_parse!(parse_value_table, r#" math::neg_inf "#).unwrap();
assert_eq!(out, Value::Constant(Constant::MathNegInf));
}
#[test]
fn constant_uppercase() {
let out = test_parse!(parse_value, r#" MATH::PI "#).unwrap();
let out = test_parse!(parse_value_table, r#" MATH::PI "#).unwrap();
assert_eq!(out, Value::Constant(Constant::MathPi));
let out = test_parse!(parse_value, r#" MATH::INF "#).unwrap();
let out = test_parse!(parse_value_table, r#" MATH::INF "#).unwrap();
assert_eq!(out, Value::Constant(Constant::MathInf));
let out = test_parse!(parse_value, r#" MATH::NEG_INF "#).unwrap();
let out = test_parse!(parse_value_table, r#" MATH::NEG_INF "#).unwrap();
assert_eq!(out, Value::Constant(Constant::MathNegInf));
}
#[test]
fn constant_mixedcase() {
let out = test_parse!(parse_value, r#" MaTh::Pi "#).unwrap();
let out = test_parse!(parse_value_table, r#" MaTh::Pi "#).unwrap();
assert_eq!(out, Value::Constant(Constant::MathPi));
let out = test_parse!(parse_value, r#" MaTh::Inf "#).unwrap();
let out = test_parse!(parse_value_table, r#" MaTh::Inf "#).unwrap();
assert_eq!(out, Value::Constant(Constant::MathInf));
let out = test_parse!(parse_value, r#" MaTh::Neg_Inf "#).unwrap();
let out = test_parse!(parse_value_table, r#" MaTh::Neg_Inf "#).unwrap();
assert_eq!(out, Value::Constant(Constant::MathNegInf));
}
#[test]
fn scientific_decimal() {
let res = test_parse!(parse_value, r#" 9.7e-7dec "#).unwrap();
let res = test_parse!(parse_value_table, r#" 9.7e-7dec "#).unwrap();
assert!(matches!(res, Value::Number(Number::Decimal(_))));
assert_eq!(res.to_string(), "0.00000097dec")
}
#[test]
fn scientific_number() {
let res = test_parse!(parse_value, r#" 9.7e-5"#).unwrap();
let res = test_parse!(parse_value_table, r#" 9.7e-5"#).unwrap();
assert!(matches!(res, Value::Number(Number::Float(_))));
assert_eq!(res.to_string(), "0.000097f")
}
#[test]
fn empty_string() {
test_parse!(parse_value, "").unwrap_err();
fn datetime_error() {
test_parse!(parse_value_table, r#" d"2001-01-01T01:01:01.9999999999" "#).unwrap_err();
}
#[test]
fn empty_string() {
test_parse!(parse_value_table, "").unwrap_err();
}

View file

@ -8,14 +8,19 @@ use crate::{
},
syn::{
error::bail,
lexer::compound,
parser::mac::{expected, expected_whitespace, unexpected},
token::{t, TokenKind},
token::{t, Glued, TokenKind},
},
};
use std::{cmp::Ordering, ops::Bound};
impl Parser<'_> {
pub async fn parse_record_string(&mut self, ctx: &mut Stk, double: bool) -> ParseResult<Thing> {
pub(crate) async fn parse_record_string(
&mut self,
ctx: &mut Stk,
double: bool,
) -> ParseResult<Thing> {
let thing = self.parse_thing(ctx).await?;
debug_assert!(self.last_span().is_followed_by(&self.peek_whitespace().span));
@ -28,15 +33,7 @@ impl Parser<'_> {
Ok(thing)
}
fn kind_cast_start_id(kind: TokenKind) -> bool {
Self::tokenkind_can_start_ident(kind)
|| matches!(
kind,
TokenKind::Digits | t!("{") | t!("[") | t!("+") | t!("-") | t!("u'") | t!("u\"")
)
}
pub async fn parse_thing_or_range(
pub(crate) async fn parse_thing_or_range(
&mut self,
stk: &mut Stk,
ident: String,
@ -49,7 +46,7 @@ impl Parser<'_> {
let end = if self.eat_whitespace(t!("=")) {
let id = stk.run(|stk| self.parse_id(stk)).await?;
Bound::Included(id)
} else if Self::kind_cast_start_id(self.peek_whitespace().kind) {
} else if Self::kind_starts_record_id_key(self.peek_whitespace().kind) {
let id = stk.run(|stk| self.parse_id(stk)).await?;
Bound::Excluded(id)
} else {
@ -65,7 +62,7 @@ impl Parser<'_> {
}
// Didn't eat range yet so we need to parse the id.
let beg = if Self::kind_cast_start_id(self.peek_whitespace().kind) {
let beg = if Self::kind_starts_record_id_key(self.peek_whitespace().kind) {
let v = stk.run(|stk| self.parse_id(stk)).await?;
// check for exclusive
@ -84,7 +81,7 @@ impl Parser<'_> {
let end = if self.eat_whitespace(t!("=")) {
let id = stk.run(|stk| self.parse_id(stk)).await?;
Bound::Included(id)
} else if Self::kind_cast_start_id(self.peek_whitespace().kind) {
} else if Self::kind_starts_record_id_key(self.peek_whitespace().kind) {
let id = stk.run(|stk| self.parse_id(stk)).await?;
Bound::Excluded(id)
} else {
@ -125,10 +122,10 @@ impl Parser<'_> {
}
/// Parse an range
pub async fn parse_range(&mut self, ctx: &mut Stk) -> ParseResult<Range> {
pub(crate) async fn parse_range(&mut self, ctx: &mut Stk) -> ParseResult<Range> {
// Check for beginning id
let beg = if Self::tokenkind_can_start_ident(self.peek_whitespace().kind) {
let v = ctx.run(|ctx| self.parse_value(ctx)).await?;
let beg = if Self::kind_is_identifier(self.peek_whitespace().kind) {
let v = ctx.run(|ctx| self.parse_value_table(ctx)).await?;
if self.eat_whitespace(t!(">")) {
Bound::Excluded(v)
@ -144,8 +141,8 @@ impl Parser<'_> {
let inclusive = self.eat_whitespace(t!("="));
// parse ending id.
let end = if Self::tokenkind_can_start_ident(self.peek_whitespace().kind) {
let v = ctx.run(|ctx| self.parse_value(ctx)).await?;
let end = if Self::kind_is_identifier(self.peek_whitespace().kind) {
let v = ctx.run(|ctx| self.parse_value_table(ctx)).await?;
if inclusive {
Bound::Included(v)
} else {
@ -161,12 +158,12 @@ impl Parser<'_> {
})
}
pub async fn parse_thing(&mut self, ctx: &mut Stk) -> ParseResult<Thing> {
pub(crate) async fn parse_thing(&mut self, ctx: &mut Stk) -> ParseResult<Thing> {
let ident = self.next_token_value::<Ident>()?.0;
self.parse_thing_from_ident(ctx, ident).await
}
pub async fn parse_thing_from_ident(
pub(crate) async fn parse_thing_from_ident(
&mut self,
ctx: &mut Stk,
ident: String,
@ -181,7 +178,7 @@ impl Parser<'_> {
})
}
pub async fn parse_id(&mut self, stk: &mut Stk) -> ParseResult<Id> {
pub(crate) async fn parse_id(&mut self, stk: &mut Stk) -> ParseResult<Id> {
let token = self.peek_whitespace();
match token.kind {
t!("u'") | t!("u\"") => Ok(Id::Uuid(self.next_token_value()?)),
@ -208,12 +205,12 @@ impl Parser<'_> {
let next = self.peek_whitespace();
match next.kind {
t!(".") | TokenKind::Exponent | TokenKind::NumberSuffix(_) => {
t!(".") => {
// TODO(delskayn) explain that record-id's cant have matissas,
// exponents or a number suffix
unexpected!(self, next, "an integer");
unexpected!(self, next, "an integer", => "Numeric Record-id keys can only be integers");
}
x if Self::tokenkind_continues_ident(x) => {
x if Self::kind_is_identifier(x) => {
let span = token.span.covers(next.span);
bail!("Unexpected token `{x}` expected an integer", @span);
}
@ -230,51 +227,27 @@ impl Parser<'_> {
}
t!("-") => {
self.pop_peek();
// starting with a + so it must be a number
let digits_token = self.peek_whitespace();
match digits_token.kind {
TokenKind::Digits => {}
_ => unexpected!(self, digits_token, "an integer"),
}
let next = self.peek_whitespace();
match next.kind {
t!(".") | TokenKind::Exponent | TokenKind::NumberSuffix(_) => {
// TODO(delskayn) explain that record-id's cant have matissas,
// exponents or a number suffix
unexpected!(self, next, "an integer");
}
x if Self::tokenkind_continues_ident(x) => {
let span = token.span.covers(next.span);
bail!("Unexpected token `{x}` expected an integer", @span);
}
// allowed
_ => {}
}
let digits_str = self.lexer.span_str(digits_token.span);
if let Ok(number) = digits_str.parse::<u64>() {
let token = expected!(self, TokenKind::Digits);
if let Ok(number) = self.lexer.lex_compound(token, compound::integer::<u64>) {
// Parse to u64 and check if the value is equal to `-i64::MIN` via u64 as
// `-i64::MIN` doesn't fit in an i64
match number.cmp(&((i64::MAX as u64) + 1)) {
Ordering::Less => Ok(Id::Number(-(number as i64))),
match number.value.cmp(&((i64::MAX as u64) + 1)) {
Ordering::Less => Ok(Id::Number(-(number.value as i64))),
Ordering::Equal => Ok(Id::Number(i64::MIN)),
Ordering::Greater => Ok(Id::String(format!("-{}", digits_str))),
Ordering::Greater => {
Ok(Id::String(format!("-{}", self.lexer.span_str(number.span))))
}
}
} else {
Ok(Id::String(format!("-{}", digits_str)))
Ok(Id::String(format!("-{}", self.lexer.span_str(token.span))))
}
}
TokenKind::Digits => {
let next = self.peek_whitespace_token_at(1);
if Self::tokenkind_can_start_ident(next.kind) {
let glued = self.glue_ident(self.flexible_record_id)?;
if let TokenKind::Identifier = glued.kind {
self.pop_peek();
return Ok(Id::String(self.lexer.string.take().unwrap()));
} else {
unexpected!(self, glued, "a record-id id")
if self.flexible_record_id {
let next = self.peek_whitespace1();
if Self::kind_is_identifier(next.kind) {
let ident = self.parse_flexible_ident()?.0;
return Ok(Id::String(ident));
}
}
@ -287,16 +260,23 @@ impl Parser<'_> {
Ok(Id::String(digits_str.to_owned()))
}
}
TokenKind::Duration if self.flexible_record_id => {
self.lexer.duration = None;
TokenKind::Glued(Glued::Duration) if self.flexible_record_id => {
let slice = self.lexer.reader.span(token.span);
if slice.iter().any(|x| *x > 0b0111_1111) {
if slice.iter().any(|x| !x.is_ascii()) {
unexpected!(self, token, "a identifier");
}
// Should be valid utf-8 as it was already parsed by the lexer
let text = String::from_utf8(slice.to_vec()).unwrap();
Ok(Id::String(text))
}
TokenKind::Glued(_) => {
// If we glue before a parsing a record id, for example 123s456z would return an error as it is
// an invalid duration, however it is a valid flexible record id identifier.
// So calling glue before using that token to create a record id is not allowed.
panic!(
"Glueing tokens used in parsing a record id would result in inproper parsing"
)
}
t!("ULID") => {
self.pop_peek();
// TODO: error message about how to use `ulid` as an identifier.
@ -317,8 +297,11 @@ impl Parser<'_> {
Ok(Id::Generate(Gen::Rand))
}
_ => {
self.glue_ident(self.flexible_record_id)?;
let ident = self.next_token_value::<Ident>()?.0;
let ident = if self.flexible_record_id {
self.parse_flexible_ident()?.0
} else {
self.next_token_value::<Ident>()?.0
};
Ok(Id::String(ident))
}
}

View file

@ -1,21 +1,43 @@
//! Implements token gluing logic.
use crate::syn::token::{t, Glued, TokenKind};
use crate::{
sql::duration::{
SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE, SECONDS_PER_WEEK, SECONDS_PER_YEAR,
},
syn::{
error::{bail, error},
parser::{mac::unexpected, ParseResult, Parser},
token::{t, DurationSuffix, NumberKind, NumberSuffix, Token, TokenKind},
},
};
use std::time::Duration as StdDuration;
use super::Parser;
impl Parser<'_> {
/// Returns true if the next token can start a statement.
pub(super) fn kind_starts_statement(kind: TokenKind) -> bool {
matches!(
kind,
t!("ACCESS")
| t!("ALTER")
| t!("ANALYZE")
| t!("BEGIN")
| t!("BREAK")
| t!("CANCEL")
| t!("COMMIT")
| t!("CONTINUE")
| t!("CREATE")
| t!("DEFINE")
| t!("DELETE")
| t!("FOR") | t!("IF")
| t!("INFO") | t!("INSERT")
| t!("KILL") | t!("LIVE")
| t!("OPTION")
| t!("REBUILD")
| t!("RETURN")
| t!("RELATE")
| t!("REMOVE")
| t!("SELECT")
| t!("LET") | t!("SHOW")
| t!("SLEEP")
| t!("THROW")
| t!("UPDATE")
| t!("UPSERT")
| t!("USE")
)
}
/// Returns if a token kind can start an identifier.
pub fn tokenkind_can_start_ident(t: TokenKind) -> bool {
pub(super) fn kind_is_keyword_like(t: TokenKind) -> bool {
matches!(
t,
TokenKind::Keyword(_)
@ -23,27 +45,11 @@ impl Parser<'_> {
| TokenKind::Algorithm(_)
| TokenKind::Distance(_)
| TokenKind::VectorType(_)
| TokenKind::Identifier
| TokenKind::Exponent
| TokenKind::DatetimeChars(_)
| TokenKind::NumberSuffix(_)
| TokenKind::DurationSuffix(
// All except Micro unicode
DurationSuffix::Nano
| DurationSuffix::Micro
| DurationSuffix::Milli
| DurationSuffix::Second
| DurationSuffix::Minute
| DurationSuffix::Hour
| DurationSuffix::Day
| DurationSuffix::Week
| DurationSuffix::Year
)
)
}
/// Returns if a token kind can start continue an identifier.
pub fn tokenkind_continues_ident(t: TokenKind) -> bool {
/// Returns if a token kind can start an identifier.
pub(super) fn kind_is_identifier(t: TokenKind) -> bool {
matches!(
t,
TokenKind::Keyword(_)
@ -52,478 +58,61 @@ impl Parser<'_> {
| TokenKind::Distance(_)
| TokenKind::VectorType(_)
| TokenKind::Identifier
| TokenKind::DatetimeChars(_)
| TokenKind::Exponent
| TokenKind::NumberSuffix(_)
)
}
pub(super) fn kind_starts_record_id_key(kind: TokenKind) -> bool {
Self::kind_is_identifier(kind)
|| matches!(
kind,
TokenKind::Digits
| t!("{") | t!("[")
| t!("+") | t!("-")
| t!("u'") | t!("u\"")
| t!("'") | t!("\"")
| TokenKind::Glued(Glued::Uuid | Glued::Strand)
)
}
pub(super) fn kind_starts_subquery(kind: TokenKind) -> bool {
matches!(
kind,
t!("RETURN")
| t!("SELECT")
| t!("CREATE")
| t!("UPSERT")
| t!("UPDATE")
| t!("DELETE")
| t!("RELATE")
| t!("DEFINE")
| t!("REMOVE")
| t!("REBUILD")
| t!("IF")
)
}
pub(super) fn kind_starts_prime_value(kind: TokenKind) -> bool {
matches!(
kind,
t!("+")
| t!("-") | t!("u'")
| t!("u\"") | t!("d'")
| t!("d\"") | t!("r'")
| t!("r\"") | t!("'")
| t!("\"") | TokenKind::Digits
| TokenKind::NaN
| TokenKind::DurationSuffix(
// All except Micro unicode
DurationSuffix::Nano
| DurationSuffix::Micro
| DurationSuffix::Milli
| DurationSuffix::Second
| DurationSuffix::Minute
| DurationSuffix::Hour
| DurationSuffix::Day
| DurationSuffix::Week
)
)
| t!("true") | t!("false")
| t!("fn") | t!("ml")
| t!("(") | t!("{")
| t!("/") | t!("|")
| t!("||") | t!("<")
| t!("$param")
| t!("..") | TokenKind::Glued(_)
) || Self::kind_starts_subquery(kind)
|| Self::kind_is_identifier(kind)
}
/// Returns if the peeked token can be a identifier.
pub fn peek_continues_ident(&mut self) -> bool {
Self::tokenkind_can_start_ident(self.peek_kind())
}
/// Glue an token and immediately consume it.
pub fn glue_next(&mut self) -> ParseResult<Token> {
self.glue()?;
Ok(self.next())
}
/// Glues the next token together, returning its value, doesnt consume the token.
pub fn glue(&mut self) -> ParseResult<Token> {
let token = self.peek();
match token.kind {
TokenKind::Exponent
| TokenKind::NumberSuffix(_)
| TokenKind::DurationSuffix(_)
| TokenKind::VectorType(_)
| TokenKind::DatetimeChars(_) => self.glue_ident(false),
TokenKind::Digits => self.glue_numeric(),
t!("\"") | t!("'") => {
self.pop_peek();
let t = self.lexer.relex_strand(token);
let TokenKind::Strand = t.kind else {
unexpected!(self, t, "a strand")
};
self.prepend_token(t);
Ok(t)
}
t!("+") | t!("-") => {
if let TokenKind::Digits = self.peek_whitespace_token_at(1).kind {
self.glue_number()
} else {
Ok(token)
}
}
_ => Ok(token),
}
}
/// Glues all next tokens follow eachother, which can make up an ident into a single string.
pub fn glue_ident(&mut self, flexible: bool) -> ParseResult<Token> {
let start = self.peek();
let mut token_buffer = match start.kind {
TokenKind::Exponent | TokenKind::NumberSuffix(_) => {
self.pop_peek();
self.lexer.span_str(start.span).to_owned()
}
TokenKind::Digits if flexible => {
self.pop_peek();
self.lexer.span_str(start.span).to_owned()
}
TokenKind::DurationSuffix(x) if x.can_be_ident() => {
self.pop_peek();
self.lexer.span_str(start.span).to_owned()
}
TokenKind::DatetimeChars(_) | TokenKind::VectorType(_) => {
self.pop_peek();
self.lexer.span_str(start.span).to_owned()
}
_ => return Ok(start),
};
debug_assert!(
start.is_followed_by(&self.peek_whitespace()),
"a whitespace token was eaten where eating it would disturb parsing\n {:?}@{:?} => {:?}@{:?}",
start.kind,
start.span,
self.peek_whitespace().kind,
self.peek_whitespace().span
);
let mut prev = start;
loop {
let p = self.peek_whitespace();
match p.kind {
// These token_kinds always complete an ident, no more identifier parts can happen
// after this.
TokenKind::Identifier => {
self.pop_peek();
let buffer = self.lexer.string.take().unwrap();
token_buffer.push_str(&buffer);
prev = p;
break;
}
TokenKind::Keyword(_)
| TokenKind::Language(_)
| TokenKind::Algorithm(_)
| TokenKind::Distance(_)
| TokenKind::VectorType(_)
| TokenKind::NumberSuffix(_) => {
self.pop_peek();
let str = self.lexer.span_str(p.span);
token_buffer.push_str(str);
prev = p;
break;
}
// These tokens might have some more parts following them
TokenKind::Exponent | TokenKind::DatetimeChars(_) | TokenKind::Digits => {
self.pop_peek();
let str = self.lexer.span_str(p.span);
token_buffer.push_str(str);
prev = p;
}
TokenKind::DurationSuffix(suffix) => {
self.pop_peek();
if !suffix.can_be_ident() {
bail!("Invalid identifier containing non-ascii characters", @p.span);
}
token_buffer.push_str(suffix.as_str());
prev = p;
}
_ => break,
}
}
let token = Token {
kind: TokenKind::Identifier,
span: start.span.covers(prev.span),
};
self.lexer.string = Some(token_buffer);
self.prepend_token(token);
Ok(token)
}
pub fn glue_numeric(&mut self) -> ParseResult<Token> {
let peek = self.peek();
match peek.kind {
TokenKind::Digits => {
if matches!(self.peek_whitespace_token_at(1).kind, TokenKind::DurationSuffix(_)) {
return self.glue_duration();
}
self.glue_number()
}
t!("+") | t!("-") => self.glue_number(),
_ => Ok(peek),
}
}
pub fn glue_number(&mut self) -> ParseResult<Token> {
let start = self.peek();
match start.kind {
t!("+") | t!("-") => {
self.pop_peek();
debug_assert!(
start.is_followed_by(&self.peek_whitespace()),
"a whitespace token was eaten where eating it would disturb parsing\n {:?}@{:?} => {:?}@{:?}",
start.kind,
start.span,
self.peek_whitespace().kind,
self.peek_whitespace().span
);
let n = self.peek_whitespace();
if n.kind != TokenKind::Digits {
unexpected!(self, start, "a number")
}
self.pop_peek();
}
TokenKind::Digits => {
self.pop_peek();
debug_assert!(
start.is_followed_by(&self.peek_whitespace()),
"a whitespace token was eaten where eating it would disturb parsing\n {:?}@{:?} => {:?}@{:?}",
start.kind,
start.span,
self.peek_whitespace().kind,
self.peek_whitespace().span
);
}
_ => return Ok(start),
};
let mut kind = NumberKind::Integer;
// Check for mantissa
if let t!(".") = self.peek_whitespace().kind {
self.pop_peek();
let next = self.peek_whitespace();
if next.kind != TokenKind::Digits {
unexpected!(self, next, "digits after the dot");
}
self.pop_peek();
kind = NumberKind::Float;
}
// Check for exponent
if let TokenKind::Exponent = self.peek_whitespace().kind {
self.pop_peek();
let exponent_token = self.peek_whitespace();
match exponent_token.kind {
t!("+") | t!("-") => {
self.pop_peek();
let exponent_token = self.peek_whitespace();
if exponent_token.kind != TokenKind::Digits {
unexpected!(self, exponent_token, "digits after the exponent")
}
}
TokenKind::Digits => {}
_ => unexpected!(self, exponent_token, "digits after the exponent"),
}
self.pop_peek();
kind = NumberKind::Float;
}
// Check for number suffix
let suffix_token = self.peek_whitespace();
if let TokenKind::NumberSuffix(suffix) = suffix_token.kind {
self.pop_peek();
match suffix {
NumberSuffix::Float => {
kind = NumberKind::Float;
}
NumberSuffix::Decimal => {
kind = NumberKind::Decimal;
}
}
}
// Check that no ident-like identifiers follow
let next = self.peek_whitespace();
if Self::tokenkind_continues_ident(next.kind) {
unexpected!(self, next, "number to end")
}
let token = Token {
kind: TokenKind::Number(kind),
span: start.span.covers(self.last_span()),
};
self.prepend_token(token);
Ok(token)
}
pub fn glue_duration(&mut self) -> ParseResult<Token> {
let mut duration = StdDuration::ZERO;
let start = self.peek();
match start.kind {
TokenKind::Digits => {
self.pop_peek();
}
_ => return Ok(start),
};
debug_assert!(
start.is_followed_by(&self.peek_whitespace()),
"a whitespace token was eaten where eating it would disturb parsing"
);
let mut cur = start;
loop {
let p = self.peek_whitespace();
let suffix = match p.kind {
TokenKind::DurationSuffix(x) => x,
_ => unexpected!(self, p, "a duration suffix"),
};
self.pop_peek();
let digits_str = self.lexer.span_str(cur.span);
let digits_value: u64 = digits_str
.parse()
.map_err(|e| error!("Failed to parse duration digits: {e}",@cur.span))?;
let addition = match suffix {
DurationSuffix::Nano => StdDuration::from_nanos(digits_value),
DurationSuffix::Micro | DurationSuffix::MicroUnicode => {
StdDuration::from_micros(digits_value)
}
DurationSuffix::Milli => StdDuration::from_millis(digits_value),
DurationSuffix::Second => StdDuration::from_secs(digits_value),
DurationSuffix::Minute => {
let minutes =
digits_value.checked_mul(SECONDS_PER_MINUTE).ok_or_else(|| {
let span = start.span.covers(p.span);
error!("Invalid duration, value overflowed maximum allowed value", @span)
})?;
StdDuration::from_secs(minutes)
}
DurationSuffix::Hour => {
let hours = digits_value.checked_mul(SECONDS_PER_HOUR).ok_or_else(|| {
let span = start.span.covers(p.span);
error!("Invalid duration, value overflowed maximum allowed value", @span)
})?;
StdDuration::from_secs(hours)
}
DurationSuffix::Day => {
let days = digits_value.checked_mul(SECONDS_PER_DAY).ok_or_else(|| {
let span = start.span.covers(p.span);
error!("Invalid duration, value overflowed maximum allowed value", @span)
})?;
StdDuration::from_secs(days)
}
DurationSuffix::Week => {
let weeks = digits_value.checked_mul(SECONDS_PER_WEEK).ok_or_else(|| {
let span = start.span.covers(p.span);
error!("Invalid duration, value overflowed maximum allowed value", @span)
})?;
StdDuration::from_secs(weeks)
}
DurationSuffix::Year => {
let years = digits_value.checked_mul(SECONDS_PER_YEAR).ok_or_else(|| {
let span = start.span.covers(p.span);
error!("Invalid duration, value overflowed maximum allowed value", @span)
})?;
StdDuration::from_secs(years)
}
};
duration = duration.checked_add(addition).ok_or_else(|| {
let span = start.span.covers(p.span);
error!("Invalid duration, value overflowed maximum allowed value", @span)
})?;
match self.peek_whitespace().kind {
TokenKind::Digits => {
cur = self.pop_peek();
}
x if Parser::tokenkind_continues_ident(x) => {
let span = start.span.covers(p.span);
bail!("Invalid token, expected duration, but token contained invalid characters", @span)
}
_ => break,
}
}
let span = start.span.covers(cur.span);
let token = Token {
kind: TokenKind::Duration,
span,
};
self.lexer.duration = Some(duration);
self.prepend_token(token);
Ok(token)
}
/// Glues the next tokens which would make up a float together into a single buffer.
/// Return err if the tokens would return a invalid float.
pub fn glue_float(&mut self) -> ParseResult<Token> {
let start = self.peek();
match start.kind {
t!("+") | t!("-") => {
self.pop_peek();
debug_assert!(
start.is_followed_by(&self.peek_whitespace()),
"a whitespace token was eaten where eating it would disturb parsing"
);
let digits_token = self.peek_whitespace();
if TokenKind::Digits != digits_token.kind {
let span = start.span.covers(digits_token.span);
bail!("Unexpected token `{}` expected a floating point number",digits_token.kind,@span);
}
self.pop_peek();
}
TokenKind::Digits => {
self.pop_peek();
debug_assert!(
start.is_followed_by(&self.peek_whitespace()),
"a whitespace token was eaten where eating it would disturb parsing"
);
}
TokenKind::NumberSuffix(NumberSuffix::Float) => {
return Ok(start);
}
_ => return Ok(start),
}
// check for mantissa
if let t!(".") = self.peek_whitespace().kind {
self.pop_peek();
let digits_token = self.peek_whitespace();
if TokenKind::Digits != digits_token.kind {
unexpected!(self, digits_token, "a floating point number")
}
self.pop_peek();
};
// check for exponent
if let TokenKind::Exponent = self.peek_whitespace().kind {
self.pop_peek();
let mut digits_token = self.peek_whitespace();
if let t!("+") | t!("-") = digits_token.kind {
self.pop_peek();
digits_token = self.peek_whitespace();
}
if TokenKind::Digits != digits_token.kind {
unexpected!(self, digits_token, "a floating point number")
}
self.pop_peek();
}
// check for exponent
let token = self.peek_whitespace();
if let TokenKind::NumberSuffix(suffix) = token.kind {
match suffix {
NumberSuffix::Float => {
self.pop_peek();
}
NumberSuffix::Decimal => {
unexpected!(self, token, "a floating point number")
}
}
}
let t = self.peek_whitespace();
if Self::tokenkind_continues_ident(t.kind) {
unexpected!(self, t, "a floating point number to end")
}
let span = start.span.covers(self.last_span());
let token = Token {
kind: TokenKind::Number(NumberKind::Float),
span,
};
self.prepend_token(token);
Ok(token)
}
pub fn glue_plain_strand(&mut self) -> ParseResult<Token> {
let start = self.peek();
match start.kind {
t!("\"") | t!("'") => {}
_ => return Ok(start),
};
let token = self.lexer.relex_strand(start);
self.prepend_token(token);
Ok(token)
pub(super) fn kind_starts_expression(kind: TokenKind) -> bool {
matches!(kind, t!("..") | t!("<") | t!("->")) | Self::kind_starts_prime_value(kind)
}
}

View file

@ -1,5 +1,6 @@
use crate::syn::token::Token;
#[derive(Debug)]
pub struct TokenBuffer<const S: usize> {
buffer: [Token; S],
write: u8,

View file

@ -9,7 +9,7 @@ use crate::syn::token::{t, TokenKind};
impl Parse<Self> for Value {
fn parse(val: &str) -> Self {
super::value(val).unwrap()
super::value_field(val).inspect_err(|e| eprintln!("{e}")).unwrap()
}
}
@ -19,7 +19,11 @@ impl Parse<Self> for Array {
let mut stack = Stack::new();
let start = parser.peek().span;
assert!(parser.eat(t!("[")));
stack.enter(|ctx| async move { parser.parse_array(ctx, start).await }).finish().unwrap()
stack
.enter(|ctx| async move { parser.parse_array(ctx, start).await })
.finish()
.map_err(|e| e.render_on(val))
.unwrap()
}
}
@ -54,7 +58,11 @@ impl Parse<Self> for Expression {
fn parse(val: &str) -> Self {
let mut parser = Parser::new(val.as_bytes());
let mut stack = Stack::new();
let value = stack.enter(|ctx| parser.parse_value_field(ctx)).finish().unwrap();
let value = stack
.enter(|ctx| parser.parse_value_table(ctx))
.finish()
.map_err(|e| e.render_on(val))
.unwrap();
if let Value::Expression(x) = value {
return *x;
}

View file

@ -63,39 +63,6 @@ macro_rules! t {
}
};
("ns") => {
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Nano)
};
("us") => {
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Micro)
};
("µs") => {
$crate::syn::token::TokenKind::DurationSuffix(
$crate::syn::token::DurationSuffix::MicroUnicode,
)
};
("ms") => {
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Milli)
};
("s") => {
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Second)
};
("m") => {
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Minute)
};
("h") => {
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Hour)
};
("d") => {
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Day)
};
("w") => {
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Week)
};
("y") => {
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Year)
};
("f") => {
$crate::syn::token::TokenKind::NumberSuffix($crate::syn::token::NumberSuffix::Float)
};
@ -140,12 +107,6 @@ macro_rules! t {
(":") => {
$crate::syn::token::TokenKind::Colon
};
("<-") => {
$crate::syn::token::TokenKind::ArrowLeft
};
("<->") => {
$crate::syn::token::TokenKind::BiArrow
};
("->") => {
$crate::syn::token::TokenKind::ArrowRight
};

View file

@ -256,47 +256,6 @@ impl VectorTypeKind {
}
}
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
pub enum DurationSuffix {
Nano,
Micro,
MicroUnicode,
Milli,
Second,
Minute,
Hour,
Day,
Week,
Year,
}
impl DurationSuffix {
pub fn can_be_ident(&self) -> bool {
!matches!(self, DurationSuffix::MicroUnicode)
}
pub fn as_str(&self) -> &'static str {
match self {
DurationSuffix::Nano => "ns",
DurationSuffix::Micro => "us",
DurationSuffix::MicroUnicode => "µs",
DurationSuffix::Milli => "ms",
DurationSuffix::Second => "s",
DurationSuffix::Minute => "m",
DurationSuffix::Hour => "h",
DurationSuffix::Day => "d",
DurationSuffix::Week => "w",
DurationSuffix::Year => "y",
}
}
}
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
pub enum NumberSuffix {
Float,
Decimal,
}
impl Algorithm {
pub fn as_str(&self) -> &'static str {
match self {
@ -349,16 +308,24 @@ impl QouteKind {
}
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
pub enum NumberKind {
Decimal,
Float,
Integer,
pub enum Glued {
Number,
Duration,
Strand,
Datetime,
Uuid,
}
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
pub enum DatetimeChars {
T,
Z,
impl Glued {
fn as_str(&self) -> &'static str {
match self {
Glued::Number => "a number",
Glued::Strand => "a strand",
Glued::Uuid => "a uuid",
Glued::Datetime => "a datetime",
Glued::Duration => "a duration",
}
}
}
/// The type of token
@ -376,13 +343,6 @@ pub enum TokenKind {
CloseDelim(Delim),
/// a token denoting the opening of a string, i.e. `r"`
Qoute(QouteKind),
/// Not produced by the lexer but only the result of token gluing.
Number(NumberKind),
/// Not produced by the lexer but only the result of token gluing.
Duration,
/// Not produced by the lexer but only the result of token gluing.
Strand,
Regex,
/// A parameter like `$name`.
Parameter,
Identifier,
@ -398,10 +358,6 @@ pub enum TokenKind {
Dollar,
/// `->`
ArrowRight,
/// `<-`
ArrowLeft,
/// `<->`
BiArrow,
/// '/'
ForwardSlash,
/// `.`
@ -422,22 +378,17 @@ pub enum TokenKind {
Vert,
/// `@`
At,
/// A token which could not be properly lexed.
Invalid,
/// A token which indicates the end of the file.
Eof,
/// A token consiting of one or more ascii digits.
Digits,
/// A identifier like token which matches a duration suffix.
DurationSuffix(DurationSuffix),
/// A part of a datetime like token which matches a duration suffix.
DatetimeChars(DatetimeChars),
/// A identifier like token which matches an exponent.
Exponent,
/// A identifier like token which matches an number suffix.
NumberSuffix(NumberSuffix),
/// The Not-A-Number number token.
NaN,
/// A token which is a compound token which has been glued together and then put back into the
/// token buffer. This is required for some places where we need to look past possible compound tokens.
Glued(Glued),
/// A token which could not be properly lexed.
Invalid,
}
impl fmt::Display for TokenKind {
@ -451,7 +402,7 @@ const _TOKEN_KIND_SIZE_ASSERT: [(); 2] = [(); std::mem::size_of::<TokenKind>()];
impl TokenKind {
pub fn has_data(&self) -> bool {
matches!(self, TokenKind::Identifier | TokenKind::Duration)
matches!(self, TokenKind::Identifier | TokenKind::Glued(_))
}
fn algorithm_as_str(alg: Algorithm) -> &'static str {
@ -486,20 +437,14 @@ impl TokenKind {
TokenKind::CloseDelim(Delim::Paren) => ")",
TokenKind::CloseDelim(Delim::Brace) => "}",
TokenKind::CloseDelim(Delim::Bracket) => "]",
TokenKind::DurationSuffix(x) => x.as_str(),
TokenKind::Strand => "a strand",
TokenKind::Parameter => "a parameter",
TokenKind::Number(_) => "a number",
TokenKind::Identifier => "an identifier",
TokenKind::Regex => "a regex",
TokenKind::LeftChefron => "<",
TokenKind::RightChefron => ">",
TokenKind::Star => "*",
TokenKind::Dollar => "$",
TokenKind::Question => "?",
TokenKind::ArrowRight => "->",
TokenKind::ArrowLeft => "<-",
TokenKind::BiArrow => "<->",
TokenKind::ForwardSlash => "/",
TokenKind::Dot => ".",
TokenKind::DotDot => "..",
@ -514,13 +459,10 @@ impl TokenKind {
TokenKind::Eof => "Eof",
TokenKind::WhiteSpace => "whitespace",
TokenKind::Qoute(x) => x.as_str(),
TokenKind::Duration => "a duration",
TokenKind::Digits => "a number",
TokenKind::NaN => "NaN",
TokenKind::Glued(x) => x.as_str(),
// below are small broken up tokens which are most of the time identifiers.
TokenKind::DatetimeChars(_) => "an identifier",
TokenKind::Exponent => "an identifier",
TokenKind::NumberSuffix(_) => "an identifier",
}
}
}