Move some of the handling of complex tokens back to the lexer. (#4708)
Co-authored-by: Tobie Morgan Hitchcock <tobie@surrealdb.com> Co-authored-by: Dmitrii Blaginin <dmitrii@blaginin.me>
This commit is contained in:
parent
4e2b2b9e30
commit
1a1278fc3a
60 changed files with 2411 additions and 2513 deletions
|
@ -20,7 +20,7 @@ impl Tokens {
|
|||
}
|
||||
}
|
||||
|
||||
pub(super) fn get_token_string<'a>(&'a self, t: &'a Token) -> Result<&str, Error> {
|
||||
pub(super) fn get_token_string<'a>(&'a self, t: &'a Token) -> Result<&'a str, Error> {
|
||||
t.get_str(&self.i)
|
||||
}
|
||||
|
||||
|
@ -157,7 +157,7 @@ impl Token {
|
|||
}
|
||||
}
|
||||
|
||||
pub(super) fn get_str<'a>(&'a self, i: &'a str) -> Result<&str, Error> {
|
||||
pub(super) fn get_str<'a>(&'a self, i: &'a str) -> Result<&'a str, Error> {
|
||||
match self {
|
||||
Token::Ref {
|
||||
bytes,
|
||||
|
|
|
@ -7,8 +7,11 @@ use std::fmt;
|
|||
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
||||
#[non_exhaustive]
|
||||
pub enum Dir {
|
||||
/// `<-`
|
||||
In,
|
||||
/// `->`
|
||||
Out,
|
||||
/// `<->`
|
||||
Both,
|
||||
}
|
||||
|
||||
|
|
|
@ -137,7 +137,7 @@ impl fmt::Display for Part {
|
|||
// ------------------------------
|
||||
|
||||
pub trait Next<'a> {
|
||||
fn next(&'a self) -> &[Part];
|
||||
fn next(&'a self) -> &'a [Part];
|
||||
}
|
||||
|
||||
impl<'a> Next<'a> for &'a [Part] {
|
||||
|
@ -152,7 +152,7 @@ impl<'a> Next<'a> for &'a [Part] {
|
|||
// ------------------------------
|
||||
|
||||
pub trait NextMethod<'a> {
|
||||
fn next_method(&'a self) -> &[Part];
|
||||
fn next_method(&'a self) -> &'a [Part];
|
||||
}
|
||||
|
||||
impl<'a> NextMethod<'a> for &'a [Part] {
|
||||
|
|
|
@ -95,6 +95,14 @@ impl SyntaxError {
|
|||
self
|
||||
}
|
||||
|
||||
pub fn with_cause<T: Display>(mut self, t: T) -> Self {
|
||||
self.diagnostic = Box::new(Diagnostic {
|
||||
kind: DiagnosticKind::Cause(t.to_string()),
|
||||
next: Some(self.diagnostic),
|
||||
});
|
||||
self
|
||||
}
|
||||
|
||||
pub fn render_on(&self, source: &str) -> RenderedError {
|
||||
let mut res = RenderedError {
|
||||
errors: Vec::new(),
|
||||
|
|
|
@ -170,6 +170,11 @@ impl fmt::Display for Snippet {
|
|||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
// extra spacing for the line number
|
||||
let spacing = self.location.line.ilog10() as usize + 1;
|
||||
for _ in 0..spacing {
|
||||
f.write_str(" ")?;
|
||||
}
|
||||
writeln!(f, "--> [{}:{}]", self.location.line, self.location.column)?;
|
||||
|
||||
for _ in 0..spacing {
|
||||
f.write_str(" ")?;
|
||||
}
|
||||
|
@ -301,6 +306,7 @@ mod test {
|
|||
|
||||
let error_string = format!("{}", error);
|
||||
let expected = r#"some_error
|
||||
--> [4:10]
|
||||
|
|
||||
4 | ...hallo error...
|
||||
| ^^^^^ this is wrong
|
||||
|
|
|
@ -4,12 +4,12 @@ use crate::syn::{
|
|||
unicode::{byte, chars},
|
||||
Lexer,
|
||||
},
|
||||
token::{t, DatetimeChars, Token, TokenKind},
|
||||
token::{t, Token, TokenKind},
|
||||
};
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Eats a single line comment.
|
||||
pub fn eat_single_line_comment(&mut self) {
|
||||
pub(super) fn eat_single_line_comment(&mut self) {
|
||||
loop {
|
||||
let Some(byte) = self.reader.next() else {
|
||||
break;
|
||||
|
@ -45,7 +45,7 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
|
||||
/// Eats a multi line comment and returns an error if `*/` would be missing.
|
||||
pub fn eat_multi_line_comment(&mut self) -> Result<(), SyntaxError> {
|
||||
pub(super) fn eat_multi_line_comment(&mut self) -> Result<(), SyntaxError> {
|
||||
let start_span = self.current_span();
|
||||
loop {
|
||||
let Some(byte) = self.reader.next() else {
|
||||
|
@ -64,7 +64,7 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
|
||||
/// Eat whitespace like spaces tables and new-lines.
|
||||
pub fn eat_whitespace(&mut self) {
|
||||
pub(super) fn eat_whitespace(&mut self) {
|
||||
loop {
|
||||
let Some(byte) = self.reader.peek() else {
|
||||
return;
|
||||
|
@ -100,8 +100,17 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Lex digits tokens
|
||||
pub(super) fn lex_digits(&mut self) -> Token {
|
||||
while let Some(b'0'..=b'9' | b'_') = self.reader.peek() {
|
||||
self.reader.next();
|
||||
}
|
||||
|
||||
self.finish_token(TokenKind::Digits)
|
||||
}
|
||||
|
||||
/// Lex the next token, starting from the given byte.
|
||||
pub fn lex_ascii(&mut self, byte: u8) -> Token {
|
||||
pub(super) fn lex_ascii(&mut self, byte: u8) -> Token {
|
||||
let kind = match byte {
|
||||
b'{' => t!("{"),
|
||||
b'}' => t!("}"),
|
||||
|
@ -190,16 +199,6 @@ impl<'a> Lexer<'a> {
|
|||
self.reader.next();
|
||||
t!("<|")
|
||||
}
|
||||
Some(b'-') => {
|
||||
self.reader.next();
|
||||
match self.reader.peek() {
|
||||
Some(b'>') => {
|
||||
self.reader.next();
|
||||
t!("<->")
|
||||
}
|
||||
_ => t!("<-"),
|
||||
}
|
||||
}
|
||||
_ => t!("<"),
|
||||
},
|
||||
b'>' => match self.reader.peek() {
|
||||
|
@ -328,75 +327,10 @@ impl<'a> Lexer<'a> {
|
|||
self.reader.next();
|
||||
t!("d'")
|
||||
}
|
||||
Some(b'e') => {
|
||||
self.reader.next();
|
||||
|
||||
let Some(b'c') = self.reader.peek() else {
|
||||
self.scratch.push('d');
|
||||
return self.lex_ident_from_next_byte(b'e');
|
||||
};
|
||||
|
||||
self.reader.next();
|
||||
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphanumeric()).unwrap_or(false) {
|
||||
self.scratch.push('d');
|
||||
self.scratch.push('e');
|
||||
return self.lex_ident_from_next_byte(b'c');
|
||||
}
|
||||
|
||||
t!("dec")
|
||||
}
|
||||
Some(x) if !x.is_ascii_alphabetic() => {
|
||||
t!("d")
|
||||
}
|
||||
None => {
|
||||
t!("d")
|
||||
}
|
||||
_ => {
|
||||
return self.lex_ident_from_next_byte(b'd');
|
||||
}
|
||||
},
|
||||
b'f' => match self.reader.peek() {
|
||||
Some(x) if !x.is_ascii_alphanumeric() => {
|
||||
t!("f")
|
||||
}
|
||||
None => t!("f"),
|
||||
_ => {
|
||||
return self.lex_ident_from_next_byte(b'f');
|
||||
}
|
||||
},
|
||||
b'n' => match self.reader.peek() {
|
||||
Some(b's') => {
|
||||
self.reader.next();
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
self.scratch.push('n');
|
||||
return self.lex_ident_from_next_byte(b's');
|
||||
}
|
||||
t!("ns")
|
||||
}
|
||||
_ => {
|
||||
return self.lex_ident_from_next_byte(b'n');
|
||||
}
|
||||
},
|
||||
b'm' => match self.reader.peek() {
|
||||
Some(b's') => {
|
||||
self.reader.next();
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
self.scratch.push('m');
|
||||
return self.lex_ident_from_next_byte(b's');
|
||||
}
|
||||
t!("ms")
|
||||
}
|
||||
Some(x) if !x.is_ascii_alphabetic() => {
|
||||
t!("m")
|
||||
}
|
||||
None => {
|
||||
t!("m")
|
||||
}
|
||||
_ => {
|
||||
return self.lex_ident_from_next_byte(b'm');
|
||||
}
|
||||
},
|
||||
b's' => match self.reader.peek() {
|
||||
Some(b'"') => {
|
||||
self.reader.next();
|
||||
|
@ -406,32 +340,10 @@ impl<'a> Lexer<'a> {
|
|||
self.reader.next();
|
||||
t!("'")
|
||||
}
|
||||
Some(x) if x.is_ascii_alphabetic() => {
|
||||
_ => {
|
||||
return self.lex_ident_from_next_byte(b's');
|
||||
}
|
||||
_ => t!("s"),
|
||||
},
|
||||
b'h' => {
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
return self.lex_ident_from_next_byte(b'h');
|
||||
} else {
|
||||
t!("h")
|
||||
}
|
||||
}
|
||||
b'w' => {
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
return self.lex_ident_from_next_byte(b'w');
|
||||
} else {
|
||||
t!("w")
|
||||
}
|
||||
}
|
||||
b'y' => {
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
return self.lex_ident_from_next_byte(b'y');
|
||||
} else {
|
||||
t!("y")
|
||||
}
|
||||
}
|
||||
b'u' => match self.reader.peek() {
|
||||
Some(b'"') => {
|
||||
self.reader.next();
|
||||
|
@ -441,14 +353,6 @@ impl<'a> Lexer<'a> {
|
|||
self.reader.next();
|
||||
t!("u'")
|
||||
}
|
||||
Some(b's') => {
|
||||
self.reader.next();
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
self.scratch.push('u');
|
||||
return self.lex_ident_from_next_byte(b's');
|
||||
}
|
||||
t!("us")
|
||||
}
|
||||
_ => {
|
||||
return self.lex_ident_from_next_byte(b'u');
|
||||
}
|
||||
|
@ -466,24 +370,6 @@ impl<'a> Lexer<'a> {
|
|||
return self.lex_ident_from_next_byte(b'r');
|
||||
}
|
||||
},
|
||||
b'Z' => match self.reader.peek() {
|
||||
Some(x) if x.is_ascii_alphabetic() => {
|
||||
return self.lex_ident_from_next_byte(b'Z');
|
||||
}
|
||||
_ => TokenKind::DatetimeChars(DatetimeChars::Z),
|
||||
},
|
||||
b'T' => match self.reader.peek() {
|
||||
Some(x) if x.is_ascii_alphabetic() => {
|
||||
return self.lex_ident_from_next_byte(b'T');
|
||||
}
|
||||
_ => TokenKind::DatetimeChars(DatetimeChars::T),
|
||||
},
|
||||
b'e' => {
|
||||
return self.lex_exponent(b'e');
|
||||
}
|
||||
b'E' => {
|
||||
return self.lex_exponent(b'E');
|
||||
}
|
||||
b'0'..=b'9' => return self.lex_digits(),
|
||||
b'a'..=b'z' | b'A'..=b'Z' | b'_' => {
|
||||
return self.lex_ident_from_next_byte(byte);
|
||||
|
|
|
@ -8,7 +8,7 @@ impl<'a> Lexer<'a> {
|
|||
/// lex non-ascii characters.
|
||||
///
|
||||
/// Should only be called after determining that the byte is not a valid ascii character.
|
||||
pub fn lex_char(&mut self, byte: u8) -> Token {
|
||||
pub(super) fn lex_char(&mut self, byte: u8) -> Token {
|
||||
let c = match self.reader.complete_char(byte) {
|
||||
Ok(x) => x,
|
||||
Err(e) => return self.invalid_token(e.into()),
|
||||
|
@ -28,20 +28,6 @@ impl<'a> Lexer<'a> {
|
|||
'⊄' => t!("⊄"),
|
||||
'×' => t!("×"),
|
||||
'÷' => t!("÷"),
|
||||
'µ' => {
|
||||
let Some(b's') = self.reader.peek() else {
|
||||
let err = error!("Invalid token `µ` expected token to be followed by `s`", @self.current_span());
|
||||
return self.invalid_token(err);
|
||||
};
|
||||
self.reader.next();
|
||||
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
let err = error!("Invalid token `µ` expected token to be followed by `s`", @self.current_span());
|
||||
return self.invalid_token(err);
|
||||
}
|
||||
|
||||
t!("µs")
|
||||
}
|
||||
x => {
|
||||
let err = error!("Invalid token `{x}`", @self.current_span());
|
||||
return self.invalid_token(err);
|
||||
|
|
195
core/src/syn/lexer/compound/datetime.rs
Normal file
195
core/src/syn/lexer/compound/datetime.rs
Normal file
|
@ -0,0 +1,195 @@
|
|||
use std::ops::RangeInclusive;
|
||||
|
||||
use chrono::{DateTime, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone, Utc};
|
||||
|
||||
use crate::syn::{
|
||||
error::{bail, error, SyntaxError},
|
||||
lexer::Lexer,
|
||||
token::{t, Token},
|
||||
};
|
||||
|
||||
pub fn datetime(lexer: &mut Lexer, start: Token) -> Result<DateTime<Utc>, SyntaxError> {
|
||||
let double = match start.kind {
|
||||
t!("d\"") => true,
|
||||
t!("d'") => false,
|
||||
x => panic!("Invalid start token of datetime compound: {x}"),
|
||||
};
|
||||
let datetime = datetime_inner(lexer)?;
|
||||
if double {
|
||||
lexer.expect('"')?;
|
||||
} else {
|
||||
lexer.expect('\'')?;
|
||||
}
|
||||
Ok(datetime)
|
||||
}
|
||||
|
||||
/// Lexes a datetime without the surrounding `'` or `"`
|
||||
pub fn datetime_inner(lexer: &mut Lexer) -> Result<DateTime<Utc>, SyntaxError> {
|
||||
let date_start = lexer.reader.offset();
|
||||
|
||||
let year_neg = lexer.eat(b'-');
|
||||
if !year_neg {
|
||||
lexer.eat(b'+');
|
||||
}
|
||||
|
||||
let year = parse_datetime_digits(lexer, 4, 0..=9999)?;
|
||||
lexer.expect('-')?;
|
||||
let month = parse_datetime_digits(lexer, 2, 1..=12)?;
|
||||
lexer.expect('-')?;
|
||||
let day = parse_datetime_digits(lexer, 2, 1..=31)?;
|
||||
|
||||
let year = if year_neg {
|
||||
-(year as i32)
|
||||
} else {
|
||||
year as i32
|
||||
};
|
||||
|
||||
let date = NaiveDate::from_ymd_opt(year, month as u32, day as u32).ok_or_else(
|
||||
|| error!("Invalid DateTime date: date outside of valid range", @lexer.span_since(date_start)),
|
||||
)?;
|
||||
|
||||
if !lexer.eat_when(|x| x == b'T') {
|
||||
let time = NaiveTime::default();
|
||||
let date_time = NaiveDateTime::new(date, time);
|
||||
|
||||
let datetime =
|
||||
Utc.fix().from_local_datetime(&date_time).earliest().unwrap().with_timezone(&Utc);
|
||||
|
||||
return Ok(datetime);
|
||||
}
|
||||
|
||||
let time_start = lexer.reader.offset();
|
||||
|
||||
let hour = parse_datetime_digits(lexer, 2, 0..=24)?;
|
||||
lexer.expect(':')?;
|
||||
let minute = parse_datetime_digits(lexer, 2, 0..=59)?;
|
||||
lexer.expect(':')?;
|
||||
let second = parse_datetime_digits(lexer, 2, 0..=60)?;
|
||||
|
||||
let nanos_start = lexer.reader.offset();
|
||||
let nanos = if lexer.eat(b'.') {
|
||||
let mut number = 0u32;
|
||||
let mut count = 0;
|
||||
|
||||
loop {
|
||||
let Some(d) = lexer.reader.peek() else {
|
||||
break;
|
||||
};
|
||||
if !d.is_ascii_digit() {
|
||||
break;
|
||||
}
|
||||
|
||||
if count == 9 {
|
||||
bail!("Invalid datetime nanoseconds, expected no more then 9 digits", @lexer.span_since(nanos_start))
|
||||
}
|
||||
|
||||
lexer.reader.next();
|
||||
number *= 10;
|
||||
number += (d - b'0') as u32;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
if count == 0 {
|
||||
bail!("Invalid datetime nanoseconds, expected at least a single digit", @lexer.span_since(nanos_start))
|
||||
}
|
||||
|
||||
// if digits are missing they count as 0's
|
||||
for _ in count..9 {
|
||||
number *= 10;
|
||||
}
|
||||
|
||||
number
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let time = NaiveTime::from_hms_nano_opt(hour as u32, minute as u32, second as u32, nanos)
|
||||
.ok_or_else(
|
||||
|| error!("Invalid DateTime time: time outside of valid range", @lexer.span_since(time_start)),
|
||||
)?;
|
||||
|
||||
let timezone_start = lexer.reader.offset();
|
||||
let timezone = match lexer.reader.peek() {
|
||||
Some(b'-') => {
|
||||
lexer.reader.next();
|
||||
let (hour, minute) = parse_timezone(lexer)?;
|
||||
// The range checks on the digits ensure that the offset can't exceed 23:59 so below
|
||||
// unwraps won't panic.
|
||||
FixedOffset::west_opt((hour * 3600 + minute * 60) as i32).unwrap()
|
||||
}
|
||||
Some(b'+') => {
|
||||
lexer.reader.next();
|
||||
let (hour, minute) = parse_timezone(lexer)?;
|
||||
|
||||
// The range checks on the digits ensure that the offset can't exceed 23:59 so below
|
||||
// unwraps won't panic.
|
||||
FixedOffset::east_opt((hour * 3600 + minute * 60) as i32).unwrap()
|
||||
}
|
||||
Some(b'Z') => {
|
||||
lexer.reader.next();
|
||||
Utc.fix()
|
||||
}
|
||||
Some(x) => {
|
||||
let char = lexer.reader.convert_to_char(x)?;
|
||||
bail!("Invalid datetime timezone, expected `Z` or a timezone offset, found {char}",@lexer.span_since(timezone_start));
|
||||
}
|
||||
None => {
|
||||
bail!("Invalid end of file, expected datetime to finish",@lexer.span_since(time_start));
|
||||
}
|
||||
};
|
||||
|
||||
let date_time = NaiveDateTime::new(date, time);
|
||||
|
||||
let datetime = timezone
|
||||
.from_local_datetime(&date_time)
|
||||
.earliest()
|
||||
// this should never panic with a fixed offset.
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
Ok(datetime)
|
||||
}
|
||||
|
||||
fn parse_timezone(lexer: &mut Lexer) -> Result<(u32, u32), SyntaxError> {
|
||||
let hour = parse_datetime_digits(lexer, 2, 0..=23)? as u32;
|
||||
lexer.expect(':')?;
|
||||
let minute = parse_datetime_digits(lexer, 2, 0..=59)? as u32;
|
||||
|
||||
Ok((hour, minute))
|
||||
}
|
||||
|
||||
fn parse_datetime_digits(
|
||||
lexer: &mut Lexer,
|
||||
count: usize,
|
||||
range: RangeInclusive<usize>,
|
||||
) -> Result<usize, SyntaxError> {
|
||||
let start = lexer.reader.offset();
|
||||
|
||||
let mut value = 0usize;
|
||||
|
||||
for _ in 0..count {
|
||||
let offset = lexer.reader.offset();
|
||||
match lexer.reader.next() {
|
||||
Some(x) if x.is_ascii_digit() => {
|
||||
value *= 10;
|
||||
value += (x - b'0') as usize;
|
||||
}
|
||||
Some(x) => {
|
||||
let char = lexer.reader.convert_to_char(x)?;
|
||||
let span = lexer.span_since(offset);
|
||||
bail!("Invalid datetime, expected digit character found `{char}`", @span);
|
||||
}
|
||||
None => {
|
||||
bail!("Expected end of file, expected datetime digit character", @lexer.current_span());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !range.contains(&value) {
|
||||
let span = lexer.span_since(start);
|
||||
bail!("Invalid datetime digit section, section not within allowed range",
|
||||
@span => "This section must be within {}..={}",range.start(),range.end());
|
||||
}
|
||||
|
||||
Ok(value)
|
||||
}
|
25
core/src/syn/lexer/compound/ident.rs
Normal file
25
core/src/syn/lexer/compound/ident.rs
Normal file
|
@ -0,0 +1,25 @@
|
|||
use crate::syn::{
|
||||
error::{bail, SyntaxError},
|
||||
lexer::{unicode::is_identifier_continue, Lexer},
|
||||
token::{Token, TokenKind},
|
||||
};
|
||||
use std::mem;
|
||||
|
||||
pub fn flexible_ident(lexer: &mut Lexer, start: Token) -> Result<String, SyntaxError> {
|
||||
match start.kind {
|
||||
TokenKind::Digits => {
|
||||
let mut res = lexer.span_str(start.span).to_owned();
|
||||
while let Some(x) = lexer.reader.peek() {
|
||||
if is_identifier_continue(x) {
|
||||
lexer.reader.next();
|
||||
res.push(x as char);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
TokenKind::Identifier => Ok(mem::take(&mut lexer.string).unwrap()),
|
||||
x => bail!("Unexpected token {x}, expected flexible identifier", @start.span),
|
||||
}
|
||||
}
|
|
@ -1,25 +1,19 @@
|
|||
use crate::syn::{
|
||||
error::{MessageKind, SyntaxError},
|
||||
lexer::unicode::chars::JS_LINE_TERIMATORS,
|
||||
token::{t, CompoundToken, JavaScript, Span, TokenKind},
|
||||
token::{t, Token},
|
||||
};
|
||||
|
||||
use super::{CompoundValue, Lexer};
|
||||
use super::Lexer;
|
||||
|
||||
impl CompoundValue for JavaScript {
|
||||
const START: TokenKind = t!("{");
|
||||
|
||||
fn relex(lexer: &mut Lexer, _: Span) -> Result<CompoundToken<Self>, SyntaxError> {
|
||||
let span = lex_js_function_body_inner(lexer)?;
|
||||
Ok(CompoundToken {
|
||||
value: JavaScript,
|
||||
span,
|
||||
})
|
||||
}
|
||||
pub fn javascript(lexer: &mut Lexer, start: Token) -> Result<(), SyntaxError> {
|
||||
assert_eq!(start.kind, t!("{"), "Invalid start of JavaScript compound token");
|
||||
lex_js_function_body_inner(lexer)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Lex the body of a js function.
|
||||
fn lex_js_function_body_inner(lexer: &mut Lexer) -> Result<Span, SyntaxError> {
|
||||
fn lex_js_function_body_inner(lexer: &mut Lexer) -> Result<(), SyntaxError> {
|
||||
let mut block_depth = 1;
|
||||
loop {
|
||||
let Some(byte) = lexer.reader.next() else {
|
||||
|
@ -61,7 +55,7 @@ fn lex_js_function_body_inner(lexer: &mut Lexer) -> Result<Span, SyntaxError> {
|
|||
}
|
||||
}
|
||||
|
||||
Ok(lexer.advance_span())
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// lex a js string with the given delimiter.
|
||||
|
|
|
@ -1,85 +1,57 @@
|
|||
use crate::sql::Regex;
|
||||
use crate::syn::{
|
||||
error::{bail, error, SyntaxError},
|
||||
error::SyntaxError,
|
||||
lexer::Lexer,
|
||||
token::{t, CompoundToken, Span, Token, TokenKind},
|
||||
token::{Span, Token},
|
||||
};
|
||||
|
||||
mod datetime;
|
||||
mod ident;
|
||||
mod js;
|
||||
mod number;
|
||||
mod regex;
|
||||
mod strand;
|
||||
mod uuid;
|
||||
|
||||
pub trait CompoundValue: Sized {
|
||||
/// The token which indicates the start of this compound token.
|
||||
const START: TokenKind;
|
||||
pub use datetime::{datetime, datetime_inner};
|
||||
pub use ident::flexible_ident;
|
||||
pub use js::javascript;
|
||||
pub use number::{
|
||||
duration, float, integer, number, numeric, numeric_kind, NumberKind, Numeric, NumericKind,
|
||||
};
|
||||
pub use regex::regex;
|
||||
pub use strand::strand;
|
||||
pub use uuid::uuid;
|
||||
|
||||
/// Lex the start of this span to a more complex type of token.
|
||||
fn relex(lexer: &mut Lexer, start_span: Span) -> Result<CompoundToken<Self>, SyntaxError>;
|
||||
#[derive(Debug)]
|
||||
pub struct CompoundToken<T> {
|
||||
pub value: T,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn lex_compound<T: CompoundValue>(
|
||||
/// Lex a more complex token from the start token.
|
||||
/// The start token should already be consumed.
|
||||
pub fn lex_compound<F, R>(
|
||||
&mut self,
|
||||
start: Token,
|
||||
) -> Result<CompoundToken<T>, SyntaxError> {
|
||||
f: F,
|
||||
) -> Result<CompoundToken<R>, SyntaxError>
|
||||
where
|
||||
F: Fn(&mut Self, Token) -> Result<R, SyntaxError>,
|
||||
{
|
||||
assert_eq!(
|
||||
start.kind,
|
||||
T::START,
|
||||
"Invalid start of compound token, expected {} got {}",
|
||||
T::START,
|
||||
start.kind
|
||||
);
|
||||
assert_eq!(
|
||||
start.span.offset + 1,
|
||||
self.last_offset,
|
||||
"Tried to parse compound when lexer already ate past the start token"
|
||||
start.span.offset + start.span.len,
|
||||
"The start token given to compound was not the last token consumed."
|
||||
);
|
||||
|
||||
self.last_offset = start.span.offset;
|
||||
|
||||
T::relex(self, start.span)
|
||||
}
|
||||
}
|
||||
let res = f(self, start)?;
|
||||
|
||||
impl CompoundValue for Regex {
|
||||
const START: TokenKind = t!("/");
|
||||
// re-lexes a `/` token to a regex token.
|
||||
fn relex(lexer: &mut Lexer, _: Span) -> Result<CompoundToken<Regex>, SyntaxError> {
|
||||
loop {
|
||||
match lexer.reader.next() {
|
||||
Some(b'\\') => {
|
||||
// We can't just eat all bytes after a \ because a byte might be non-ascii.
|
||||
lexer.eat(b'/');
|
||||
}
|
||||
Some(b'/') => break,
|
||||
Some(x) => {
|
||||
if !x.is_ascii() {
|
||||
if let Err(e) = lexer.reader.complete_char(x) {
|
||||
let span = lexer.advance_span();
|
||||
bail!("Invalid token: {e}", @span);
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let span = lexer.advance_span();
|
||||
return Err(
|
||||
error!("Failed to lex regex, unexpected eof", @span).with_data_pending()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// successfully parsed the regex, time to structure it.
|
||||
let span = lexer.advance_span();
|
||||
// +1 offset to move over the first `/` -2 len to remove the last `/`
|
||||
let mut inner_span = span;
|
||||
debug_assert!(inner_span.len > 2);
|
||||
inner_span.offset += 1;
|
||||
inner_span.len -= 2;
|
||||
|
||||
let str = lexer.span_str(inner_span);
|
||||
let regex = str.parse().map_err(|e| error!("Invalid regex: {e}", @span))?;
|
||||
Ok(CompoundToken {
|
||||
value: regex,
|
||||
span,
|
||||
value: res,
|
||||
span: self.advance_span(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
405
core/src/syn/lexer/compound/number.rs
Normal file
405
core/src/syn/lexer/compound/number.rs
Normal file
|
@ -0,0 +1,405 @@
|
|||
use std::{
|
||||
borrow::Cow,
|
||||
num::{ParseFloatError, ParseIntError},
|
||||
str::FromStr,
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
use rust_decimal::Decimal;
|
||||
|
||||
use crate::{
|
||||
sql::{
|
||||
duration::{
|
||||
SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE, SECONDS_PER_WEEK,
|
||||
SECONDS_PER_YEAR,
|
||||
},
|
||||
Number,
|
||||
},
|
||||
syn::{
|
||||
error::{bail, error, SyntaxError},
|
||||
lexer::Lexer,
|
||||
token::{t, Span, Token, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
pub enum Numeric {
|
||||
Number(Number),
|
||||
Duration(Duration),
|
||||
}
|
||||
|
||||
/// Like numeric but holds of parsing the a number into a specific value.
|
||||
pub enum NumericKind {
|
||||
Number(NumberKind),
|
||||
Duration(Duration),
|
||||
}
|
||||
|
||||
pub enum NumberKind {
|
||||
Integer,
|
||||
Float,
|
||||
Decimal,
|
||||
}
|
||||
|
||||
enum DurationSuffix {
|
||||
Nano,
|
||||
Micro,
|
||||
Milli,
|
||||
Second,
|
||||
Minute,
|
||||
Hour,
|
||||
Day,
|
||||
Week,
|
||||
Year,
|
||||
}
|
||||
|
||||
fn prepare_number_str(str: &str) -> Cow<str> {
|
||||
if str.contains('_') {
|
||||
Cow::Owned(str.chars().filter(|x| *x != '_').collect())
|
||||
} else {
|
||||
Cow::Borrowed(str)
|
||||
}
|
||||
}
|
||||
|
||||
/// Tokens which can start with digits: Number or Duration.
|
||||
/// Like numeric but holds of parsing the a number into a specific value.
|
||||
pub fn numeric_kind(lexer: &mut Lexer, start: Token) -> Result<NumericKind, SyntaxError> {
|
||||
match start.kind {
|
||||
t!("-") | t!("+") => number_kind(lexer, start).map(NumericKind::Number),
|
||||
TokenKind::Digits => match lexer.reader.peek() {
|
||||
Some(b'n' | b'm' | b's' | b'h' | b'y' | b'd' | b'w' | b'u') => {
|
||||
duration(lexer, start).map(NumericKind::Duration)
|
||||
}
|
||||
Some(x) if !x.is_ascii() => duration(lexer, start).map(NumericKind::Duration),
|
||||
_ => number_kind(lexer, start).map(NumericKind::Number),
|
||||
},
|
||||
x => {
|
||||
bail!("Unexpected token `{x}`, expected a numeric value, either a duration or number",@start.span)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Tokens which can start with digits: Number or Duration.
|
||||
pub fn numeric(lexer: &mut Lexer, start: Token) -> Result<Numeric, SyntaxError> {
|
||||
match start.kind {
|
||||
t!("-") | t!("+") => number(lexer, start).map(Numeric::Number),
|
||||
TokenKind::Digits => match lexer.reader.peek() {
|
||||
Some(b'n' | b'm' | b's' | b'h' | b'y' | b'd' | b'w') => {
|
||||
duration(lexer, start).map(Numeric::Duration)
|
||||
}
|
||||
Some(x) if !x.is_ascii() => duration(lexer, start).map(Numeric::Duration),
|
||||
_ => number(lexer, start).map(Numeric::Number),
|
||||
},
|
||||
x => {
|
||||
bail!("Unexpected token `{x}`, expected a numeric value, either a duration or number",@start.span)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn number_kind(lexer: &mut Lexer, start: Token) -> Result<NumberKind, SyntaxError> {
|
||||
let offset = start.span.offset as usize;
|
||||
match start.kind {
|
||||
t!("-") | t!("+") => {
|
||||
eat_digits1(lexer, offset)?;
|
||||
}
|
||||
TokenKind::Digits => {}
|
||||
x => bail!("Unexpected start token for integer: {x}",@start.span),
|
||||
}
|
||||
|
||||
let mut kind = NumberKind::Integer;
|
||||
|
||||
let before_mantissa = lexer.reader.offset();
|
||||
// need to test for digit.. which is a range not a floating point number.
|
||||
if lexer.reader.peek1() != Some(b'.') && lexer.eat(b'.') {
|
||||
eat_digits1(lexer, before_mantissa)?;
|
||||
kind = NumberKind::Float;
|
||||
}
|
||||
|
||||
let before_exponent = lexer.reader.offset();
|
||||
if lexer.eat(b'e') || lexer.eat(b'E') {
|
||||
if !lexer.eat(b'-') {
|
||||
lexer.eat(b'+');
|
||||
}
|
||||
|
||||
eat_digits1(lexer, before_exponent)?;
|
||||
kind = NumberKind::Float;
|
||||
}
|
||||
|
||||
if !lexer.eat(b'f') {
|
||||
if lexer.eat(b'd') {
|
||||
lexer.expect('e')?;
|
||||
lexer.expect('c')?;
|
||||
kind = NumberKind::Decimal;
|
||||
}
|
||||
} else {
|
||||
kind = NumberKind::Float;
|
||||
}
|
||||
|
||||
if has_ident_after(lexer) {
|
||||
let char = lexer.reader.next().unwrap();
|
||||
let char = lexer.reader.convert_to_char(char)?;
|
||||
bail!("Invalid token, found unexpected character `{char}` after number token", @lexer.current_span())
|
||||
}
|
||||
Ok(kind)
|
||||
}
|
||||
|
||||
pub fn number(lexer: &mut Lexer, start: Token) -> Result<Number, SyntaxError> {
|
||||
let kind = number_kind(lexer, start)?;
|
||||
let span = lexer.current_span();
|
||||
let number_str = prepare_number_str(lexer.span_str(span));
|
||||
match kind {
|
||||
NumberKind::Integer => number_str
|
||||
.parse()
|
||||
.map(Number::Int)
|
||||
.map_err(|e| error!("Failed to parse number: {e}", @lexer.current_span())),
|
||||
NumberKind::Float => {
|
||||
let number_str = number_str.trim_end_matches('f');
|
||||
number_str
|
||||
.parse()
|
||||
.map(Number::Float)
|
||||
.map_err(|e| error!("Failed to parse number: {e}", @lexer.current_span()))
|
||||
}
|
||||
NumberKind::Decimal => {
|
||||
let number_str = number_str.trim_end_matches("dec");
|
||||
let decimal = if number_str.contains(['e', 'E']) {
|
||||
Decimal::from_scientific(number_str)
|
||||
.map_err(|e| error!("Failed to parser decimal: {e}", @lexer.current_span()))?
|
||||
} else {
|
||||
Decimal::from_str(number_str)
|
||||
.map_err(|e| error!("Failed to parser decimal: {e}", @lexer.current_span()))?
|
||||
};
|
||||
Ok(Number::Decimal(decimal))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Generic integer parsing method,
|
||||
/// works for all unsigned integers.
|
||||
pub fn integer<I>(lexer: &mut Lexer, start: Token) -> Result<I, SyntaxError>
|
||||
where
|
||||
I: FromStr<Err = ParseIntError>,
|
||||
{
|
||||
let offset = start.span.offset as usize;
|
||||
match start.kind {
|
||||
t!("-") | t!("+") => {
|
||||
eat_digits1(lexer, offset)?;
|
||||
}
|
||||
TokenKind::Digits => {}
|
||||
x => bail!("Unexpected token {x}, expected integer",@start.span),
|
||||
};
|
||||
|
||||
if has_ident_after(lexer) {
|
||||
let char = lexer.reader.next().unwrap();
|
||||
let char = lexer.reader.convert_to_char(char)?;
|
||||
bail!("Invalid token, found unexpected character `{char} after integer token", @lexer.current_span())
|
||||
}
|
||||
|
||||
let last_offset = lexer.reader.offset();
|
||||
let peek = lexer.reader.peek();
|
||||
if peek == Some(b'.') {
|
||||
let is_mantissa = lexer.reader.peek1().map(|x| x.is_ascii_digit()).unwrap_or(false);
|
||||
if is_mantissa {
|
||||
let span = Span {
|
||||
offset: last_offset as u32,
|
||||
len: 1,
|
||||
};
|
||||
bail!("Unexpected character `.` starting float, only integers are allowed here", @span)
|
||||
}
|
||||
}
|
||||
|
||||
if peek == Some(b'e') || peek == Some(b'E') {
|
||||
bail!("Unexpected character `{}` only integers are allowed here",peek.unwrap() as char, @lexer.current_span())
|
||||
}
|
||||
|
||||
let span = lexer.current_span();
|
||||
let str = prepare_number_str(lexer.span_str(span));
|
||||
str.parse().map_err(|e| error!("Invalid integer: {e}", @span))
|
||||
}
|
||||
|
||||
/// Generic integer parsing method,
|
||||
/// works for all unsigned integers.
|
||||
pub fn float<I>(lexer: &mut Lexer, start: Token) -> Result<I, SyntaxError>
|
||||
where
|
||||
I: FromStr<Err = ParseFloatError>,
|
||||
{
|
||||
let offset = start.span.offset as usize;
|
||||
match start.kind {
|
||||
t!("-") | t!("+") => {
|
||||
eat_digits1(lexer, offset)?;
|
||||
}
|
||||
TokenKind::Digits => {}
|
||||
x => bail!("Unexpected token {x}, expected floating point number",@start.span),
|
||||
};
|
||||
|
||||
let before_mantissa = lexer.reader.offset();
|
||||
if lexer.eat(b'.') {
|
||||
eat_digits1(lexer, before_mantissa)?;
|
||||
}
|
||||
|
||||
let before_exponent = lexer.reader.offset();
|
||||
if lexer.eat(b'e') || lexer.eat(b'E') {
|
||||
if !lexer.eat(b'-') {
|
||||
lexer.eat(b'+');
|
||||
}
|
||||
|
||||
eat_digits1(lexer, before_exponent)?;
|
||||
}
|
||||
|
||||
let number_span = lexer.current_span();
|
||||
|
||||
lexer.eat(b'f');
|
||||
|
||||
if has_ident_after(lexer) {
|
||||
let char = lexer.reader.next().unwrap();
|
||||
let char = lexer.reader.convert_to_char(char)?;
|
||||
bail!("Invalid token, found invalid character `{char}` after number token", @lexer.current_span())
|
||||
}
|
||||
|
||||
let str = prepare_number_str(lexer.span_str(number_span));
|
||||
str.parse().map_err(|e| error!("Invalid floating point number: {e}", @lexer.current_span()))
|
||||
}
|
||||
|
||||
pub fn duration(lexer: &mut Lexer, start: Token) -> Result<Duration, SyntaxError> {
|
||||
match start.kind {
|
||||
TokenKind::Digits => {}
|
||||
x => bail!("Unexpected token {x}, expected duration", @start.span),
|
||||
}
|
||||
|
||||
let mut duration = Duration::ZERO;
|
||||
|
||||
let mut number_span = start.span;
|
||||
loop {
|
||||
let suffix = lex_duration_suffix(lexer)?;
|
||||
|
||||
let numeric_string = prepare_number_str(lexer.span_str(number_span));
|
||||
let numeric_value: u64 = numeric_string.parse().map_err(
|
||||
|e| error!("Invalid token, failed to parse duration digits: {e}",@lexer.current_span()),
|
||||
)?;
|
||||
|
||||
let addition = match suffix {
|
||||
DurationSuffix::Nano => Duration::from_nanos(numeric_value),
|
||||
DurationSuffix::Micro => Duration::from_micros(numeric_value),
|
||||
DurationSuffix::Milli => Duration::from_millis(numeric_value),
|
||||
DurationSuffix::Second => Duration::from_secs(numeric_value),
|
||||
DurationSuffix::Minute => {
|
||||
let minutes = numeric_value.checked_mul(SECONDS_PER_MINUTE).ok_or_else(
|
||||
|| error!("Invalid duartion, value overflowed maximum allowed value", @lexer.current_span()),
|
||||
)?;
|
||||
Duration::from_secs(minutes)
|
||||
}
|
||||
DurationSuffix::Hour => {
|
||||
let hours = numeric_value.checked_mul(SECONDS_PER_HOUR).ok_or_else(
|
||||
|| error!("Invalid duartion, value overflowed maximum allowed value", @lexer.current_span()),
|
||||
)?;
|
||||
Duration::from_secs(hours)
|
||||
}
|
||||
DurationSuffix::Day => {
|
||||
let day = numeric_value.checked_mul(SECONDS_PER_DAY).ok_or_else(
|
||||
|| error!("Invalid duartion, value overflowed maximum allowed value", @lexer.current_span()),
|
||||
)?;
|
||||
Duration::from_secs(day)
|
||||
}
|
||||
DurationSuffix::Week => {
|
||||
let week = numeric_value.checked_mul(SECONDS_PER_WEEK).ok_or_else(
|
||||
|| error!("Invalid duartion, value overflowed maximum allowed value", @lexer.current_span()),
|
||||
)?;
|
||||
Duration::from_secs(week)
|
||||
}
|
||||
DurationSuffix::Year => {
|
||||
let year = numeric_value.checked_mul(SECONDS_PER_YEAR).ok_or_else(
|
||||
|| error!("Invalid duartion, value overflowed maximum allowed value", @lexer.current_span()),
|
||||
)?;
|
||||
Duration::from_secs(year)
|
||||
}
|
||||
};
|
||||
|
||||
duration = duration.checked_add(addition).ok_or_else(
|
||||
|| error!("Invalid duration, value overflowed maximum allowed value", @lexer.current_span()),
|
||||
)?;
|
||||
|
||||
match lexer.reader.peek() {
|
||||
Some(x) if x.is_ascii_digit() => {
|
||||
let before = lexer.reader.offset();
|
||||
eat_digits(lexer);
|
||||
number_span = lexer.span_since(before);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
Ok(duration)
|
||||
}
|
||||
|
||||
fn lex_duration_suffix(lexer: &mut Lexer) -> Result<DurationSuffix, SyntaxError> {
|
||||
let suffix = match lexer.reader.next() {
|
||||
Some(b'n') => {
|
||||
lexer.expect('s')?;
|
||||
DurationSuffix::Nano
|
||||
}
|
||||
Some(b'u') => {
|
||||
lexer.expect('s')?;
|
||||
DurationSuffix::Micro
|
||||
}
|
||||
Some(b'm') => {
|
||||
if lexer.eat(b's') {
|
||||
DurationSuffix::Milli
|
||||
} else {
|
||||
DurationSuffix::Minute
|
||||
}
|
||||
}
|
||||
Some(b's') => DurationSuffix::Second,
|
||||
Some(b'h') => DurationSuffix::Hour,
|
||||
Some(b'd') => DurationSuffix::Day,
|
||||
Some(b'w') => DurationSuffix::Week,
|
||||
Some(b'y') => DurationSuffix::Year,
|
||||
// Start byte of 'µ'
|
||||
Some(0xC2) => {
|
||||
if !lexer.eat(0xB5) {
|
||||
let char = lexer.reader.complete_char(0xC2)?;
|
||||
bail!("Invalid duration token, expected a duration suffix found `{char}`",@lexer.current_span())
|
||||
}
|
||||
lexer.expect('s')?;
|
||||
DurationSuffix::Micro
|
||||
}
|
||||
Some(x) => {
|
||||
let char = lexer.reader.convert_to_char(x)?;
|
||||
bail!("Invalid duration token, expected a duration suffix found `{char}`",@lexer.current_span())
|
||||
}
|
||||
None => {
|
||||
bail!("Unexpected end of file, expected a duration suffix",@lexer.current_span())
|
||||
}
|
||||
};
|
||||
|
||||
if has_ident_after(lexer) {
|
||||
let char = lexer.reader.next().unwrap();
|
||||
let char = lexer.reader.convert_to_char(char)?;
|
||||
bail!("Invalid token, found invalid character `{char}` after duration suffix", @lexer.current_span())
|
||||
}
|
||||
|
||||
Ok(suffix)
|
||||
}
|
||||
|
||||
fn has_ident_after(lexer: &mut Lexer) -> bool {
|
||||
match lexer.reader.peek() {
|
||||
Some(x) => !x.is_ascii() || x.is_ascii_alphabetic(),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn eat_digits1(lexer: &mut Lexer, start: usize) -> Result<(), SyntaxError> {
|
||||
match lexer.reader.peek() {
|
||||
Some(x) if x.is_ascii_digit() => {}
|
||||
Some(x) => {
|
||||
let char = lexer.reader.convert_to_char(x)?;
|
||||
bail!("Invalid number token, expected a digit, found: {char}", @lexer.span_since(start));
|
||||
}
|
||||
None => {
|
||||
bail!("Unexpected end of file, expected a number token digit", @lexer.span_since(start));
|
||||
}
|
||||
}
|
||||
|
||||
eat_digits(lexer);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn eat_digits(lexer: &mut Lexer) {
|
||||
while lexer.eat_when(|x| x.is_ascii_digit() || x == b'_') {}
|
||||
}
|
50
core/src/syn/lexer/compound/regex.rs
Normal file
50
core/src/syn/lexer/compound/regex.rs
Normal file
|
@ -0,0 +1,50 @@
|
|||
use regex::Regex;
|
||||
|
||||
use crate::syn::{
|
||||
error::{bail, error, SyntaxError},
|
||||
lexer::Lexer,
|
||||
token::{t, Token},
|
||||
};
|
||||
|
||||
pub fn regex(lexer: &mut Lexer, start: Token) -> Result<Regex, SyntaxError> {
|
||||
assert_eq!(start.kind, t!("/"), "Invalid start token of regex compound");
|
||||
lexer.scratch.clear();
|
||||
|
||||
loop {
|
||||
match lexer.reader.next() {
|
||||
Some(b'\\') => {
|
||||
// We can't just eat all bytes after a \ because a byte might be non-ascii.
|
||||
if lexer.eat(b'/') {
|
||||
lexer.scratch.push('/');
|
||||
} else {
|
||||
lexer.scratch.push('\\');
|
||||
}
|
||||
}
|
||||
Some(b'/') => break,
|
||||
Some(x) => {
|
||||
if !x.is_ascii() {
|
||||
match lexer.reader.complete_char(x) {
|
||||
Err(e) => {
|
||||
let span = lexer.current_span();
|
||||
bail!("Invalid token: {e}", @span);
|
||||
}
|
||||
Ok(x) => {
|
||||
lexer.scratch.push(x);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
lexer.scratch.push(x as char);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
let span = lexer.current_span();
|
||||
bail!("Failed to lex regex, unexpected eof", @span);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let span = lexer.current_span();
|
||||
let regex = lexer.scratch.parse().map_err(|e| error!("Invalid regex: {e}", @span))?;
|
||||
lexer.scratch.clear();
|
||||
Ok(regex)
|
||||
}
|
100
core/src/syn/lexer/compound/strand.rs
Normal file
100
core/src/syn/lexer/compound/strand.rs
Normal file
|
@ -0,0 +1,100 @@
|
|||
use std::mem;
|
||||
|
||||
use crate::syn::{
|
||||
error::{bail, error, SyntaxError},
|
||||
lexer::{unicode::chars, Lexer},
|
||||
token::{t, Token},
|
||||
};
|
||||
|
||||
pub fn strand(lexer: &mut Lexer, start: Token) -> Result<String, SyntaxError> {
|
||||
let is_double = match start.kind {
|
||||
t!("\"") => true,
|
||||
t!("'") => false,
|
||||
_ => panic!("Invalid start of strand compound token"),
|
||||
};
|
||||
|
||||
loop {
|
||||
let Some(x) = lexer.reader.next() else {
|
||||
lexer.scratch.clear();
|
||||
let err =
|
||||
error!("Unexpected end of file, expected strand to end",@lexer.current_span());
|
||||
return Err(err.with_data_pending());
|
||||
};
|
||||
|
||||
if x.is_ascii() {
|
||||
match x {
|
||||
b'\'' if !is_double => {
|
||||
let res = mem::take(&mut lexer.scratch);
|
||||
return Ok(res);
|
||||
}
|
||||
b'"' if is_double => {
|
||||
let res = mem::take(&mut lexer.scratch);
|
||||
return Ok(res);
|
||||
}
|
||||
b'\0' => {
|
||||
bail!("Invalid null byte in source, null bytes are not valid SurrealQL characters",@lexer.current_span());
|
||||
}
|
||||
b'\\' => {
|
||||
// Handle escape sequences.
|
||||
let Some(next) = lexer.reader.next() else {
|
||||
lexer.scratch.clear();
|
||||
let err = error!("Unexpected end of file, expected strand to end",@lexer.current_span());
|
||||
return Err(err.with_data_pending());
|
||||
};
|
||||
match next {
|
||||
b'\\' => {
|
||||
lexer.scratch.push('\\');
|
||||
}
|
||||
b'\'' if !is_double => {
|
||||
lexer.scratch.push('\'');
|
||||
}
|
||||
b'\"' if is_double => {
|
||||
lexer.scratch.push('\"');
|
||||
}
|
||||
b'/' => {
|
||||
lexer.scratch.push('/');
|
||||
}
|
||||
b'b' => {
|
||||
lexer.scratch.push(chars::BS);
|
||||
}
|
||||
b'f' => {
|
||||
lexer.scratch.push(chars::FF);
|
||||
}
|
||||
b'n' => {
|
||||
lexer.scratch.push(chars::LF);
|
||||
}
|
||||
b'r' => {
|
||||
lexer.scratch.push(chars::CR);
|
||||
}
|
||||
b't' => {
|
||||
lexer.scratch.push(chars::TAB);
|
||||
}
|
||||
x => match lexer.reader.convert_to_char(x) {
|
||||
Ok(char) => {
|
||||
let valid_escape = if is_double {
|
||||
'"'
|
||||
} else {
|
||||
'\''
|
||||
};
|
||||
bail!("Invalid escape character `{char}`, valid characters are `\\`, `{valid_escape}`, `/`, `b`, `f`, `n`, `r`, or `t`", @lexer.current_span());
|
||||
}
|
||||
Err(e) => {
|
||||
lexer.scratch.clear();
|
||||
return Err(e.into());
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
x => lexer.scratch.push(x as char),
|
||||
}
|
||||
} else {
|
||||
match lexer.reader.complete_char(x) {
|
||||
Ok(x) => lexer.scratch.push(x),
|
||||
Err(e) => {
|
||||
lexer.scratch.clear();
|
||||
return Err(e.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
85
core/src/syn/lexer/compound/uuid.rs
Normal file
85
core/src/syn/lexer/compound/uuid.rs
Normal file
|
@ -0,0 +1,85 @@
|
|||
use crate::syn::{
|
||||
error::{bail, SyntaxError},
|
||||
lexer::Lexer,
|
||||
token::{t, Token},
|
||||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
pub fn uuid(lexer: &mut Lexer, start: Token) -> Result<Uuid, SyntaxError> {
|
||||
let double = match start.kind {
|
||||
t!("u\"") => true,
|
||||
t!("u'") => false,
|
||||
x => panic!("Invalid start token of uuid compound: {x}"),
|
||||
};
|
||||
|
||||
let mut uuid_buffer = [0u8; 16];
|
||||
// number of bytes is 4-2-2-2-6
|
||||
|
||||
eat_uuid_hex(lexer, &mut uuid_buffer[0..4])?;
|
||||
|
||||
lexer.expect('-')?;
|
||||
|
||||
eat_uuid_hex(lexer, &mut uuid_buffer[4..6])?;
|
||||
|
||||
lexer.expect('-')?;
|
||||
|
||||
eat_uuid_hex(lexer, &mut uuid_buffer[6..8])?;
|
||||
|
||||
lexer.expect('-')?;
|
||||
|
||||
eat_uuid_hex(lexer, &mut uuid_buffer[8..10])?;
|
||||
|
||||
lexer.expect('-')?;
|
||||
|
||||
eat_uuid_hex(lexer, &mut uuid_buffer[10..16])?;
|
||||
|
||||
if double {
|
||||
lexer.expect('"')?;
|
||||
} else {
|
||||
lexer.expect('\'')?;
|
||||
}
|
||||
|
||||
Ok(Uuid::from_bytes(uuid_buffer))
|
||||
}
|
||||
|
||||
fn eat_uuid_hex(lexer: &mut Lexer, buffer: &mut [u8]) -> Result<(), SyntaxError> {
|
||||
// the amounts of character required is twice the buffer len.
|
||||
// since every character is half a byte.
|
||||
for x in buffer {
|
||||
let a = eat_hex_character(lexer)?;
|
||||
let b = eat_hex_character(lexer)?;
|
||||
*x = (a << 4) | b;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn eat_hex_character(lexer: &mut Lexer) -> Result<u8, SyntaxError> {
|
||||
fn ascii_to_hex(b: u8) -> Option<u8> {
|
||||
if b.is_ascii_digit() {
|
||||
return Some(b - b'0');
|
||||
}
|
||||
|
||||
if (b'a'..=b'f').contains(&b) {
|
||||
return Some(b - (b'a' - 10));
|
||||
}
|
||||
|
||||
if (b'A'..=b'F').contains(&b) {
|
||||
return Some(b - (b'A' - 10));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
let Some(peek) = lexer.reader.peek() else {
|
||||
bail!("Unexpected end of file, expected UUID token to finish",@lexer.current_span());
|
||||
};
|
||||
let Some(res) = ascii_to_hex(peek) else {
|
||||
lexer.advance_span();
|
||||
let char = lexer.reader.next().unwrap();
|
||||
let char = lexer.reader.convert_to_char(char)?;
|
||||
bail!("Unexpected character `{char}` expected hexidecimal digit",@lexer.current_span());
|
||||
};
|
||||
lexer.reader.next();
|
||||
Ok(res)
|
||||
}
|
|
@ -8,18 +8,14 @@ use crate::syn::{
|
|||
token::{Token, TokenKind},
|
||||
};
|
||||
|
||||
use super::unicode::chars;
|
||||
|
||||
fn is_identifier_continue(x: u8) -> bool {
|
||||
matches!(x, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_')
|
||||
}
|
||||
use super::unicode::{chars, is_identifier_continue};
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Lex a parameter in the form of `$[a-zA-Z0-9_]*`
|
||||
///
|
||||
/// # Lexer State
|
||||
/// Expected the lexer to have already eaten the param starting `$`
|
||||
pub fn lex_param(&mut self) -> Token {
|
||||
pub(super) fn lex_param(&mut self) -> Token {
|
||||
debug_assert_eq!(self.scratch, "");
|
||||
loop {
|
||||
if let Some(x) = self.reader.peek() {
|
||||
|
@ -34,7 +30,7 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn lex_surrounded_param(&mut self, is_backtick: bool) -> Token {
|
||||
pub(super) fn lex_surrounded_param(&mut self, is_backtick: bool) -> Token {
|
||||
debug_assert_eq!(self.scratch, "");
|
||||
match self.lex_surrounded_ident_err(is_backtick) {
|
||||
Ok(_) => self.finish_token(TokenKind::Parameter),
|
||||
|
@ -51,7 +47,7 @@ impl<'a> Lexer<'a> {
|
|||
///
|
||||
/// When calling the caller should already know that the token can't be any other token covered
|
||||
/// by `[a-zA-Z0-9_]*`.
|
||||
pub fn lex_ident_from_next_byte(&mut self, start: u8) -> Token {
|
||||
pub(super) fn lex_ident_from_next_byte(&mut self, start: u8) -> Token {
|
||||
debug_assert!(matches!(start, b'a'..=b'z' | b'A'..=b'Z' | b'_'));
|
||||
self.scratch.push(start as char);
|
||||
self.lex_ident()
|
||||
|
@ -60,7 +56,7 @@ impl<'a> Lexer<'a> {
|
|||
/// Lex a not surrounded identfier.
|
||||
///
|
||||
/// The scratch should contain only identifier valid chars.
|
||||
pub fn lex_ident(&mut self) -> Token {
|
||||
pub(super) fn lex_ident(&mut self) -> Token {
|
||||
loop {
|
||||
if let Some(x) = self.reader.peek() {
|
||||
if is_identifier_continue(x) {
|
||||
|
@ -90,7 +86,7 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
|
||||
/// Lex an ident which is surround by delimiters.
|
||||
pub fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token {
|
||||
pub(super) fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token {
|
||||
match self.lex_surrounded_ident_err(is_backtick) {
|
||||
Ok(_) => self.finish_token(TokenKind::Identifier),
|
||||
Err(e) => {
|
||||
|
@ -101,7 +97,10 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
|
||||
/// Lex an ident surrounded either by `⟨⟩` or `\`\``
|
||||
pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<(), SyntaxError> {
|
||||
pub(super) fn lex_surrounded_ident_err(
|
||||
&mut self,
|
||||
is_backtick: bool,
|
||||
) -> Result<(), SyntaxError> {
|
||||
loop {
|
||||
let Some(x) = self.reader.next() else {
|
||||
let end_char = if is_backtick {
|
||||
|
|
|
@ -1,25 +1,18 @@
|
|||
use std::time::Duration;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
mod byte;
|
||||
mod char;
|
||||
pub mod compound;
|
||||
mod ident;
|
||||
pub mod keywords;
|
||||
mod number;
|
||||
mod reader;
|
||||
mod strand;
|
||||
mod unicode;
|
||||
|
||||
mod compound;
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
pub use reader::{BytesReader, CharError};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::syn::{
|
||||
error::SyntaxError,
|
||||
error::{bail, SyntaxError},
|
||||
token::{Span, Token, TokenKind},
|
||||
};
|
||||
|
||||
|
@ -38,7 +31,7 @@ use crate::syn::{
|
|||
#[non_exhaustive]
|
||||
pub struct Lexer<'a> {
|
||||
/// The reader for reading the source bytes.
|
||||
pub reader: BytesReader<'a>,
|
||||
pub(super) reader: BytesReader<'a>,
|
||||
/// The one past the last character of the previous token.
|
||||
last_offset: u32,
|
||||
/// A buffer used to build the value of tokens which can't be read straight from the source.
|
||||
|
@ -58,11 +51,8 @@ pub struct Lexer<'a> {
|
|||
// The parser can, depending on position in syntax, decide to parse a number in a variety of
|
||||
// different precisions or formats. The only way to support all is to delay parsing the
|
||||
// actual number value to when the parser can decide on a format.
|
||||
pub string: Option<String>,
|
||||
pub duration: Option<Duration>,
|
||||
pub datetime: Option<DateTime<Utc>>,
|
||||
pub uuid: Option<Uuid>,
|
||||
pub error: Option<SyntaxError>,
|
||||
pub(super) string: Option<String>,
|
||||
pub(super) error: Option<SyntaxError>,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
|
@ -78,9 +68,6 @@ impl<'a> Lexer<'a> {
|
|||
scratch: String::new(),
|
||||
string: None,
|
||||
error: None,
|
||||
duration: None,
|
||||
datetime: None,
|
||||
uuid: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -109,9 +96,6 @@ impl<'a> Lexer<'a> {
|
|||
scratch: self.scratch,
|
||||
string: self.string,
|
||||
error: self.error,
|
||||
duration: self.duration,
|
||||
datetime: self.datetime,
|
||||
uuid: self.uuid,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -150,7 +134,7 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
|
||||
// Returns the span for the current token being lexed.
|
||||
pub fn current_span(&self) -> Span {
|
||||
pub(crate) fn current_span(&self) -> Span {
|
||||
// We make sure that the source is no longer then u32::MAX so this can't overflow.
|
||||
let new_offset = self.reader.offset() as u32;
|
||||
let len = new_offset - self.last_offset;
|
||||
|
@ -160,6 +144,15 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn span_since(&self, offset: usize) -> Span {
|
||||
let new_offset = self.reader.offset() as u32;
|
||||
let len = new_offset - offset as u32;
|
||||
Span {
|
||||
offset: offset as u32,
|
||||
len,
|
||||
}
|
||||
}
|
||||
|
||||
fn advance_span(&mut self) -> Span {
|
||||
let span = self.current_span();
|
||||
self.last_offset = self.reader.offset() as u32;
|
||||
|
@ -181,7 +174,7 @@ impl<'a> Lexer<'a> {
|
|||
/// # Warning
|
||||
/// Moving the lexer into a state where the next byte is within a multibyte character will
|
||||
/// result in spurious errors.
|
||||
pub fn backup_before(&mut self, span: Span) {
|
||||
pub(crate) fn backup_before(&mut self, span: Span) {
|
||||
self.reader.backup(span.offset as usize);
|
||||
self.last_offset = span.offset;
|
||||
}
|
||||
|
@ -191,7 +184,7 @@ impl<'a> Lexer<'a> {
|
|||
/// # Warning
|
||||
/// Moving the lexer into a state where the next byte is within a multibyte character will
|
||||
/// result in spurious errors.
|
||||
pub fn backup_after(&mut self, span: Span) {
|
||||
pub(crate) fn backup_after(&mut self, span: Span) {
|
||||
let offset = span.offset + span.len;
|
||||
self.reader.backup(offset as usize);
|
||||
self.last_offset = offset;
|
||||
|
@ -201,7 +194,7 @@ impl<'a> Lexer<'a> {
|
|||
/// Otherwise returns false.
|
||||
///
|
||||
/// Also returns false if there is no next character.
|
||||
pub fn eat(&mut self, byte: u8) -> bool {
|
||||
fn eat(&mut self, byte: u8) -> bool {
|
||||
if self.reader.peek() == Some(byte) {
|
||||
self.reader.next();
|
||||
true
|
||||
|
@ -214,7 +207,7 @@ impl<'a> Lexer<'a> {
|
|||
/// and returns true. Otherwise returns false.
|
||||
///
|
||||
/// Also returns false if there is no next character.
|
||||
pub fn eat_when<F: FnOnce(u8) -> bool>(&mut self, f: F) -> bool {
|
||||
fn eat_when<F: FnOnce(u8) -> bool>(&mut self, f: F) -> bool {
|
||||
let Some(x) = self.reader.peek() else {
|
||||
return false;
|
||||
};
|
||||
|
@ -226,6 +219,30 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn expect(&mut self, c: char) -> Result<(), SyntaxError> {
|
||||
match self.reader.peek() {
|
||||
Some(x) => {
|
||||
let offset = self.reader.offset() as u32;
|
||||
self.reader.next();
|
||||
let char = self.reader.convert_to_char(x)?;
|
||||
if char == c {
|
||||
return Ok(());
|
||||
}
|
||||
let len = self.reader.offset() as u32 - offset;
|
||||
bail!(
|
||||
"Unexpected character `{char}` expected `{c}`",
|
||||
@Span {
|
||||
offset,
|
||||
len
|
||||
}
|
||||
)
|
||||
}
|
||||
None => {
|
||||
bail!("Unexpected end of file, expected character `{c}`", @self.current_span())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the string for a given span of the source.
|
||||
/// Will panic if the given span was not valid for the source, or invalid utf8
|
||||
pub fn span_str(&self, span: Span) -> &'a str {
|
||||
|
@ -237,6 +254,20 @@ impl<'a> Lexer<'a> {
|
|||
pub fn span_bytes(&self, span: Span) -> &'a [u8] {
|
||||
self.reader.span(span)
|
||||
}
|
||||
|
||||
/// Returns an error if not all bytes were consumed.
|
||||
pub fn assert_finished(&self) -> Result<(), SyntaxError> {
|
||||
if !self.reader.is_empty() {
|
||||
let offset = self.reader.offset() as u32;
|
||||
let len = self.reader.remaining().len() as u32;
|
||||
let span = Span {
|
||||
offset,
|
||||
len,
|
||||
};
|
||||
bail!("Trailing characters", @span)
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Lexer<'_> {
|
||||
|
|
|
@ -1,24 +0,0 @@
|
|||
use crate::syn::token::{Token, TokenKind};
|
||||
|
||||
use super::Lexer;
|
||||
|
||||
impl Lexer<'_> {
|
||||
pub fn lex_digits(&mut self) -> Token {
|
||||
while let Some(b'0'..=b'9' | b'_') = self.reader.peek() {
|
||||
self.reader.next();
|
||||
}
|
||||
|
||||
self.finish_token(TokenKind::Digits)
|
||||
}
|
||||
|
||||
pub fn lex_exponent(&mut self, start: u8) -> Token {
|
||||
if let Some(x) = self.reader.peek() {
|
||||
if x.is_ascii_alphabetic() || x == b'_' {
|
||||
self.scratch.push(start as char);
|
||||
return self.lex_ident();
|
||||
}
|
||||
};
|
||||
|
||||
self.finish_token(TokenKind::Exponent)
|
||||
}
|
||||
}
|
|
@ -91,6 +91,11 @@ impl<'a> BytesReader<'a> {
|
|||
self.remaining().first().copied()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn peek1(&self) -> Option<u8> {
|
||||
self.remaining().get(1).copied()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn span(&self, span: Span) -> &'a [u8] {
|
||||
&self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)]
|
||||
|
|
|
@ -1,103 +0,0 @@
|
|||
//! Lexing of strand like characters.
|
||||
|
||||
use std::mem;
|
||||
|
||||
use crate::syn::{
|
||||
error::error,
|
||||
token::{QouteKind, Token, TokenKind},
|
||||
};
|
||||
|
||||
use super::{unicode::chars, Lexer};
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Lex a plain strand with either single or double quotes.
|
||||
pub fn relex_strand(&mut self, token: Token) -> Token {
|
||||
let is_double = match token.kind {
|
||||
TokenKind::Qoute(QouteKind::Plain) => false,
|
||||
TokenKind::Qoute(QouteKind::PlainDouble) => true,
|
||||
x => panic!("invalid token kind, '{:?}' is not allowed for re-lexing strands", x),
|
||||
};
|
||||
|
||||
self.last_offset = token.span.offset;
|
||||
|
||||
loop {
|
||||
let Some(x) = self.reader.next() else {
|
||||
self.scratch.clear();
|
||||
return self.eof_token();
|
||||
};
|
||||
|
||||
if x.is_ascii() {
|
||||
match x {
|
||||
b'\'' if !is_double => {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return self.finish_token(TokenKind::Strand);
|
||||
}
|
||||
b'"' if is_double => {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return self.finish_token(TokenKind::Strand);
|
||||
}
|
||||
b'\0' => {
|
||||
let err = error!("Invalid null byte in source, null bytes are not valid SurrealQL characters",@self.current_span());
|
||||
return self.invalid_token(err);
|
||||
}
|
||||
b'\\' => {
|
||||
// Handle escape sequences.
|
||||
let Some(next) = self.reader.next() else {
|
||||
self.scratch.clear();
|
||||
return self.eof_token();
|
||||
};
|
||||
match next {
|
||||
b'\\' => {
|
||||
self.scratch.push('\\');
|
||||
}
|
||||
b'\'' if !is_double => {
|
||||
self.scratch.push('\'');
|
||||
}
|
||||
b'\"' if is_double => {
|
||||
self.scratch.push('\"');
|
||||
}
|
||||
b'/' => {
|
||||
self.scratch.push('/');
|
||||
}
|
||||
b'b' => {
|
||||
self.scratch.push(chars::BS);
|
||||
}
|
||||
b'f' => {
|
||||
self.scratch.push(chars::FF);
|
||||
}
|
||||
b'n' => {
|
||||
self.scratch.push(chars::LF);
|
||||
}
|
||||
b'r' => {
|
||||
self.scratch.push(chars::CR);
|
||||
}
|
||||
b't' => {
|
||||
self.scratch.push(chars::TAB);
|
||||
}
|
||||
x => match self.reader.convert_to_char(x) {
|
||||
Ok(char) => {
|
||||
let valid_escape = if is_double {
|
||||
'"'
|
||||
} else {
|
||||
'\''
|
||||
};
|
||||
let err = error!("Invalid escape character `{char}`, valid characters are `\\`, `{valid_escape}`, `/`, `b`, `f`, `n`, `r`, or `t`", @self.current_span());
|
||||
return self.invalid_token(err);
|
||||
}
|
||||
Err(e) => {
|
||||
return self.invalid_token(e.into());
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
x => self.scratch.push(x as char),
|
||||
}
|
||||
} else {
|
||||
match self.reader.complete_char(x) {
|
||||
Ok(x) => self.scratch.push(x),
|
||||
Err(e) => return self.invalid_token(e.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
use crate::syn::token::{t, DurationSuffix, TokenKind};
|
||||
use crate::syn::token::{t, TokenKind};
|
||||
|
||||
macro_rules! test_case(
|
||||
($source:expr => [$($token:expr),*$(,)?]) => {
|
||||
|
@ -40,7 +40,7 @@ fn operators() {
|
|||
"# => [
|
||||
t!("-"), t!(" "), t!("+"),t!(" "), t!("/"),t!(" "), t!("*"),t!(" "), t!("!"),t!(" "), t!("**"), t!(" "),
|
||||
|
||||
t!("<"), t!(" "), t!(">"), t!(" "), t!("<="), t!(" "), t!(">="), t!(" "), t!("<-"), t!(" "), t!("<->"), t!(" "), t!("->"), t!(" "),
|
||||
t!("<"), t!(" "), t!(">"), t!(" "), t!("<="), t!(" "), t!(">="), t!(" "), t!("<"), t!("-"), t!(" "), t!("<"), t!("->"), t!(" "), t!("->"), t!(" "),
|
||||
|
||||
t!("="), t!(" "), t!("=="), t!(" "), t!("-="), t!(" "), t!("+="), t!(" "), t!("!="), t!(" "), t!("+?="), t!(" "),
|
||||
|
||||
|
@ -110,171 +110,6 @@ fn identifiers() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn numbers() {
|
||||
test_case! {
|
||||
r#"123123+32010230.123012031+33043030dec+33043030f+303e10dec+"#
|
||||
=> [
|
||||
TokenKind::Digits, // 123123
|
||||
t!("+"),
|
||||
TokenKind::Digits, // 32010230
|
||||
t!("."),
|
||||
TokenKind::Digits, // 123012031
|
||||
t!("+"),
|
||||
TokenKind::Digits, // 33043030
|
||||
t!("dec"),
|
||||
t!("+"),
|
||||
TokenKind::Digits, // 33043030
|
||||
t!("f"),
|
||||
t!("+"),
|
||||
TokenKind::Digits, // 303
|
||||
TokenKind::Exponent , // e
|
||||
TokenKind::Digits, // 10
|
||||
t!("dec"),
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
||||
test_case! {
|
||||
"+123129decs+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Digits, // 123129
|
||||
TokenKind::Identifier, // decs
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
||||
test_case! {
|
||||
"+39349fs+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Digits, // 39349
|
||||
TokenKind::Identifier, // fs
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
||||
test_case! {
|
||||
"+394393df+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Digits, // 39349
|
||||
TokenKind::Identifier, // df
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
||||
test_case! {
|
||||
"+32932932def+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Digits, // 32932932
|
||||
TokenKind::Identifier, // def
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
||||
test_case! {
|
||||
"+329239329z+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Digits, // 329239329
|
||||
TokenKind::Identifier, // z
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn duration() {
|
||||
test_case! {
|
||||
r#"
|
||||
1ns+1µs+1us+1ms+1s+1m+1h+1w+1y
|
||||
|
||||
1nsa+1ans+1aus+1usa+1ams+1msa+1am+1ma+1ah+1ha+1aw+1wa+1ay+1ya+1µsa
|
||||
"#
|
||||
=> [
|
||||
t!(" "),
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Nano),
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::MicroUnicode),
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Micro),
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Milli),
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Second),
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Minute),
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Hour),
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Week),
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::DurationSuffix(DurationSuffix::Year),
|
||||
|
||||
t!(" "),
|
||||
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Digits,
|
||||
TokenKind::Invalid,
|
||||
TokenKind::Identifier,
|
||||
t!(" "),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keyword() {
|
||||
test_case! {
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
//! Unicode related utilities.
|
||||
|
||||
pub fn is_identifier_continue(x: u8) -> bool {
|
||||
matches!(x, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_')
|
||||
}
|
||||
|
||||
/// Character constants
|
||||
pub mod chars {
|
||||
// Character tabulation
|
||||
|
|
|
@ -18,6 +18,7 @@ pub trait Parse<T> {
|
|||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
use lexer::{compound, Lexer};
|
||||
use parser::Parser;
|
||||
use reblessive::Stack;
|
||||
use token::t;
|
||||
|
@ -52,12 +53,28 @@ pub fn parse(input: &str) -> Result<Query, Error> {
|
|||
/// Parses a SurrealQL [`Value`].
|
||||
#[instrument(level = "debug", name = "parser", skip_all, fields(length = input.len()))]
|
||||
pub fn value(input: &str) -> Result<Value, Error> {
|
||||
debug!("parsing value, input = {input}");
|
||||
let mut parser = Parser::new(input.as_bytes());
|
||||
let mut stack = Stack::new();
|
||||
stack
|
||||
.enter(|stk| parser.parse_value_table(stk))
|
||||
.finish()
|
||||
.and_then(|e| parser.assert_finished().map(|_| e))
|
||||
.map_err(|e| e.render_on(input))
|
||||
.map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
||||
/// Parses a SurrealQL [`Value`].
|
||||
#[cfg(test)]
|
||||
#[instrument(level = "debug", name = "parser", skip_all, fields(length = input.len()))]
|
||||
pub(crate) fn value_field(input: &str) -> Result<Value, Error> {
|
||||
debug!("parsing value, input = {input}");
|
||||
let mut parser = Parser::new(input.as_bytes());
|
||||
let mut stack = Stack::new();
|
||||
stack
|
||||
.enter(|stk| parser.parse_value_field(stk))
|
||||
.finish()
|
||||
.and_then(|e| parser.assert_finished().map(|_| e))
|
||||
.map_err(|e| e.render_on(input))
|
||||
.map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
@ -70,8 +87,9 @@ pub fn value_legacy_strand(input: &str) -> Result<Value, Error> {
|
|||
let mut stack = Stack::new();
|
||||
parser.allow_legacy_strand(true);
|
||||
stack
|
||||
.enter(|stk| parser.parse_value(stk))
|
||||
.enter(|stk| parser.parse_value_table(stk))
|
||||
.finish()
|
||||
.and_then(|e| parser.assert_finished().map(|_| e))
|
||||
.map_err(|e| e.render_on(input))
|
||||
.map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
@ -85,6 +103,7 @@ pub fn json(input: &str) -> Result<Value, Error> {
|
|||
stack
|
||||
.enter(|stk| parser.parse_json(stk))
|
||||
.finish()
|
||||
.and_then(|e| parser.assert_finished().map(|_| e))
|
||||
.map_err(|e| e.render_on(input))
|
||||
.map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
@ -99,6 +118,7 @@ pub fn json_legacy_strand(input: &str) -> Result<Value, Error> {
|
|||
stack
|
||||
.enter(|stk| parser.parse_json(stk))
|
||||
.finish()
|
||||
.and_then(|e| parser.assert_finished().map(|_| e))
|
||||
.map_err(|e| e.render_on(input))
|
||||
.map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
@ -111,6 +131,7 @@ pub fn subquery(input: &str) -> Result<Subquery, Error> {
|
|||
stack
|
||||
.enter(|stk| parser.parse_full_subquery(stk))
|
||||
.finish()
|
||||
.and_then(|e| parser.assert_finished().map(|_| e))
|
||||
.map_err(|e| e.render_on(input))
|
||||
.map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
@ -120,10 +141,12 @@ pub fn subquery(input: &str) -> Result<Subquery, Error> {
|
|||
pub fn idiom(input: &str) -> Result<Idiom, Error> {
|
||||
debug!("parsing idiom, input = {input}");
|
||||
let mut parser = Parser::new(input.as_bytes());
|
||||
parser.table_as_field = true;
|
||||
let mut stack = Stack::new();
|
||||
stack
|
||||
.enter(|stk| parser.parse_plain_idiom(stk))
|
||||
.finish()
|
||||
.and_then(|e| parser.assert_finished().map(|_| e))
|
||||
.map_err(|e| e.render_on(input))
|
||||
.map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
@ -131,8 +154,12 @@ pub fn idiom(input: &str) -> Result<Idiom, Error> {
|
|||
/// Parse a datetime without enclosing delimiters from a string.
|
||||
pub fn datetime_raw(input: &str) -> Result<Datetime, Error> {
|
||||
debug!("parsing datetime, input = {input}");
|
||||
let mut parser = Parser::new(input.as_bytes());
|
||||
parser.parse_inner_datetime().map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery)
|
||||
let mut lexer = Lexer::new(input.as_bytes());
|
||||
let res = compound::datetime_inner(&mut lexer);
|
||||
if let Err(e) = lexer.assert_finished() {
|
||||
return Err(Error::InvalidQuery(e.render_on(input)));
|
||||
}
|
||||
res.map(Datetime).map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
||||
/// Parse a duration from a string.
|
||||
|
@ -141,6 +168,7 @@ pub fn duration(input: &str) -> Result<Duration, Error> {
|
|||
let mut parser = Parser::new(input.as_bytes());
|
||||
parser
|
||||
.next_token_value::<Duration>()
|
||||
.and_then(|e| parser.assert_finished().map(|_| e))
|
||||
.map_err(|e| e.render_on(input))
|
||||
.map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
@ -153,6 +181,7 @@ pub fn range(input: &str) -> Result<Range, Error> {
|
|||
stack
|
||||
.enter(|stk| parser.parse_range(stk))
|
||||
.finish()
|
||||
.and_then(|e| parser.assert_finished().map(|_| e))
|
||||
.map_err(|e| e.render_on(input))
|
||||
.map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
@ -165,6 +194,7 @@ pub fn thing(input: &str) -> Result<Thing, Error> {
|
|||
stack
|
||||
.enter(|stk| parser.parse_thing(stk))
|
||||
.finish()
|
||||
.and_then(|e| parser.assert_finished().map(|_| e))
|
||||
.map_err(|e| e.render_on(input))
|
||||
.map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
@ -183,6 +213,7 @@ pub fn block(input: &str) -> Result<Block, Error> {
|
|||
stack
|
||||
.enter(|stk| parser.parse_block(stk, start))
|
||||
.finish()
|
||||
.and_then(|e| parser.assert_finished().map(|_| e))
|
||||
.map_err(|e| e.render_on(input))
|
||||
.map_err(Error::InvalidQuery)
|
||||
}
|
||||
|
|
|
@ -1,181 +0,0 @@
|
|||
use std::ops::RangeInclusive;
|
||||
|
||||
use chrono::{FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone, Utc};
|
||||
|
||||
use crate::{
|
||||
sql::Datetime,
|
||||
syn::{
|
||||
error::{bail, error},
|
||||
parser::{expected_whitespace, unexpected, ParseResult, Parser},
|
||||
token::{t, DatetimeChars, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
impl Parser<'_> {
|
||||
pub fn parse_datetime(&mut self) -> ParseResult<Datetime> {
|
||||
let start = self.peek();
|
||||
let double = match start.kind {
|
||||
t!("d\"") => true,
|
||||
t!("d'") => false,
|
||||
x => bail!("Expected a datetime found {}",x, @start.span),
|
||||
};
|
||||
|
||||
self.pop_peek();
|
||||
|
||||
let datetime = self.parse_inner_datetime()?;
|
||||
|
||||
if double {
|
||||
expected_whitespace!(self, t!("\""));
|
||||
} else {
|
||||
expected_whitespace!(self, t!("'"));
|
||||
}
|
||||
|
||||
Ok(datetime)
|
||||
}
|
||||
|
||||
/// Parses the datetimem without surrounding qoutes
|
||||
pub fn parse_inner_datetime(&mut self) -> ParseResult<Datetime> {
|
||||
let start_date = self.peek_whitespace().span;
|
||||
|
||||
let year_neg = self.eat_whitespace(t!("-"));
|
||||
if !year_neg {
|
||||
self.eat_whitespace(t!("+"));
|
||||
}
|
||||
|
||||
let year = self.parse_datetime_digits(4, 0..=9999)?;
|
||||
expected_whitespace!(self, t!("-"));
|
||||
let month = self.parse_datetime_digits(2, 1..=12)?;
|
||||
expected_whitespace!(self, t!("-"));
|
||||
let day = self.parse_datetime_digits(2, 1..=31)?;
|
||||
|
||||
let date_span = start_date.covers(self.last_span());
|
||||
|
||||
let year = if year_neg {
|
||||
-(year as i32)
|
||||
} else {
|
||||
year as i32
|
||||
};
|
||||
|
||||
let date = NaiveDate::from_ymd_opt(year, month as u32, day as u32).ok_or_else(
|
||||
|| error!("Invalid DateTime date: date outside of valid range", @date_span),
|
||||
)?;
|
||||
|
||||
if !self.eat(TokenKind::DatetimeChars(DatetimeChars::T)) {
|
||||
let time = NaiveTime::default();
|
||||
let date_time = NaiveDateTime::new(date, time);
|
||||
|
||||
let datetime =
|
||||
Utc.fix().from_local_datetime(&date_time).earliest().unwrap().with_timezone(&Utc);
|
||||
|
||||
return Ok(Datetime(datetime));
|
||||
}
|
||||
|
||||
let start_time = self.peek_whitespace().span;
|
||||
|
||||
let hour = self.parse_datetime_digits(2, 0..=24)?;
|
||||
expected_whitespace!(self, t!(":"));
|
||||
let minute = self.parse_datetime_digits(2, 0..=59)?;
|
||||
expected_whitespace!(self, t!(":"));
|
||||
let second = self.parse_datetime_digits(2, 0..=59)?;
|
||||
|
||||
let nanos = if self.eat_whitespace(t!(".")) {
|
||||
let digits_token = expected_whitespace!(self, TokenKind::Digits);
|
||||
let slice = self.lexer.span_bytes(digits_token.span);
|
||||
|
||||
if slice.len() > 9 {
|
||||
bail!("Invalid DateTime nanoseconds, too many nanosecond digits",
|
||||
@digits_token.span => "This section contains more then 9 digits");
|
||||
}
|
||||
|
||||
let mut number = 0u32;
|
||||
for i in 0..9 {
|
||||
let Some(c) = slice.get(i).copied() else {
|
||||
// If digits are missing they are counted as 0's
|
||||
for _ in i..9 {
|
||||
number *= 10;
|
||||
}
|
||||
break;
|
||||
};
|
||||
number *= 10;
|
||||
number += (c - b'0') as u32;
|
||||
}
|
||||
|
||||
number
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let time_span = start_time.covers(self.last_span());
|
||||
|
||||
let time = NaiveTime::from_hms_nano_opt(hour as u32, minute as u32, second as u32, nanos)
|
||||
.ok_or_else(
|
||||
|| error!("Invalid DateTime time: time outside of valid range", @time_span),
|
||||
)?;
|
||||
|
||||
let peek = self.peek_whitespace();
|
||||
let timezone = match peek.kind {
|
||||
t!("+") => self.parse_datetime_timezone(false)?,
|
||||
t!("-") => self.parse_datetime_timezone(true)?,
|
||||
TokenKind::DatetimeChars(DatetimeChars::Z) => {
|
||||
self.pop_peek();
|
||||
Utc.fix()
|
||||
}
|
||||
_ => unexpected!(self, peek, "`Z` or a timezone"),
|
||||
};
|
||||
|
||||
let date_time = NaiveDateTime::new(date, time);
|
||||
|
||||
let datetime = timezone
|
||||
.from_local_datetime(&date_time)
|
||||
.earliest()
|
||||
// this should never panic with a fixed offset.
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
Ok(Datetime(datetime))
|
||||
}
|
||||
|
||||
fn parse_datetime_timezone(&mut self, neg: bool) -> ParseResult<FixedOffset> {
|
||||
self.pop_peek();
|
||||
let hour = self.parse_datetime_digits(2, 0..=23)?;
|
||||
expected_whitespace!(self, t!(":"));
|
||||
let minute = self.parse_datetime_digits(2, 0..=59)?;
|
||||
|
||||
// The range checks on the digits ensure that the offset can't exceed 23:59 so below
|
||||
// unwraps won't panic.
|
||||
if neg {
|
||||
Ok(FixedOffset::west_opt((hour * 3600 + minute * 60) as i32).unwrap())
|
||||
} else {
|
||||
Ok(FixedOffset::east_opt((hour * 3600 + minute * 60) as i32).unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_datetime_digits(
|
||||
&mut self,
|
||||
len: usize,
|
||||
range: RangeInclusive<usize>,
|
||||
) -> ParseResult<usize> {
|
||||
let t = self.peek_whitespace();
|
||||
match t.kind {
|
||||
TokenKind::Digits => {}
|
||||
_ => unexpected!(self, t, "datetime digits"),
|
||||
}
|
||||
|
||||
let digits_str = self.lexer.span_str(t.span);
|
||||
if digits_str.len() != len {
|
||||
bail!("Datetime digits section not the correct length, needs to be {len} characters",
|
||||
@t.span => "This section has a length of {}", digits_str.len());
|
||||
}
|
||||
|
||||
self.pop_peek();
|
||||
|
||||
// This should always parse as it has been validated by the lexer.
|
||||
let value = digits_str.parse().unwrap();
|
||||
|
||||
if !range.contains(&value) {
|
||||
bail!("Datetime digits section outside of valid range of {}..={}", range.start(),range.end(), @t.span);
|
||||
}
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
}
|
|
@ -1,30 +1,31 @@
|
|||
use crate::{
|
||||
sql::{language::Language, Datetime, Duration, Ident, Param, Regex, Strand, Table, Uuid},
|
||||
syn::{
|
||||
lexer::compound,
|
||||
parser::{mac::unexpected, ParseResult, Parser},
|
||||
token::{t, QouteKind, TokenKind},
|
||||
token::{self, t, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
mod datetime;
|
||||
use super::mac::pop_glued;
|
||||
|
||||
mod number;
|
||||
mod uuid;
|
||||
|
||||
/// A trait for parsing single tokens with a specific value.
|
||||
pub trait TokenValue: Sized {
|
||||
pub(crate) trait TokenValue: Sized {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self>;
|
||||
}
|
||||
|
||||
impl TokenValue for Ident {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
let token = parser.glue_ident(false)?;
|
||||
let token = parser.peek();
|
||||
match token.kind {
|
||||
TokenKind::Identifier => {
|
||||
parser.pop_peek();
|
||||
let str = parser.lexer.string.take().unwrap();
|
||||
Ok(Ident(str))
|
||||
}
|
||||
TokenKind::Keyword(_) | TokenKind::Language(_) | TokenKind::Algorithm(_) => {
|
||||
x if Parser::kind_is_keyword_like(x) => {
|
||||
let s = parser.pop_peek().span;
|
||||
Ok(Ident(parser.lexer.span_str(s).to_owned()))
|
||||
}
|
||||
|
@ -75,11 +76,13 @@ impl TokenValue for Param {
|
|||
|
||||
impl TokenValue for Duration {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
let token = parser.glue_duration()?;
|
||||
let token = parser.peek();
|
||||
match token.kind {
|
||||
TokenKind::Duration => {
|
||||
TokenKind::Glued(token::Glued::Duration) => Ok(pop_glued!(parser, Duration)),
|
||||
TokenKind::Digits => {
|
||||
parser.pop_peek();
|
||||
Ok(Duration(parser.lexer.duration.unwrap()))
|
||||
let v = parser.lexer.lex_compound(token, compound::duration)?.value;
|
||||
Ok(Duration(v))
|
||||
}
|
||||
_ => unexpected!(parser, token, "a duration"),
|
||||
}
|
||||
|
@ -88,7 +91,16 @@ impl TokenValue for Duration {
|
|||
|
||||
impl TokenValue for Datetime {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
parser.parse_datetime()
|
||||
let token = parser.peek();
|
||||
match token.kind {
|
||||
TokenKind::Glued(token::Glued::Datetime) => Ok(pop_glued!(parser, Datetime)),
|
||||
t!("d\"") | t!("d'") => {
|
||||
parser.pop_peek();
|
||||
let v = parser.lexer.lex_compound(token, compound::datetime)?.value;
|
||||
Ok(Datetime(v))
|
||||
}
|
||||
_ => unexpected!(parser, token, "a datetime"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -96,17 +108,11 @@ impl TokenValue for Strand {
|
|||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
let token = parser.peek();
|
||||
match token.kind {
|
||||
TokenKind::Qoute(QouteKind::Plain | QouteKind::PlainDouble) => {
|
||||
TokenKind::Glued(token::Glued::Strand) => Ok(pop_glued!(parser, Strand)),
|
||||
t!("\"") | t!("'") => {
|
||||
parser.pop_peek();
|
||||
let t = parser.lexer.relex_strand(token);
|
||||
let TokenKind::Strand = t.kind else {
|
||||
unexpected!(parser, t, "a strand")
|
||||
};
|
||||
Ok(Strand(parser.lexer.string.take().unwrap()))
|
||||
}
|
||||
TokenKind::Strand => {
|
||||
parser.pop_peek();
|
||||
Ok(Strand(parser.lexer.string.take().unwrap()))
|
||||
let v = parser.lexer.lex_compound(token, compound::strand)?.value;
|
||||
Ok(Strand(v))
|
||||
}
|
||||
_ => unexpected!(parser, token, "a strand"),
|
||||
}
|
||||
|
@ -115,7 +121,16 @@ impl TokenValue for Strand {
|
|||
|
||||
impl TokenValue for Uuid {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
parser.parse_uuid()
|
||||
let token = parser.peek();
|
||||
match token.kind {
|
||||
TokenKind::Glued(token::Glued::Uuid) => Ok(pop_glued!(parser, Uuid)),
|
||||
t!("u\"") | t!("u'") => {
|
||||
parser.pop_peek();
|
||||
let v = parser.lexer.lex_compound(token, compound::uuid)?.value;
|
||||
Ok(Uuid(v))
|
||||
}
|
||||
_ => unexpected!(parser, token, "a uuid"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -124,8 +139,9 @@ impl TokenValue for Regex {
|
|||
let peek = parser.peek();
|
||||
match peek.kind {
|
||||
t!("/") => {
|
||||
let pop = parser.pop_peek();
|
||||
Ok(parser.lexer.lex_compound(pop)?.value)
|
||||
parser.pop_peek();
|
||||
let v = parser.lexer.lex_compound(peek, compound::regex)?.value;
|
||||
Ok(Regex(v))
|
||||
}
|
||||
_ => unexpected!(parser, peek, "a regex"),
|
||||
}
|
||||
|
@ -134,9 +150,40 @@ impl TokenValue for Regex {
|
|||
|
||||
impl Parser<'_> {
|
||||
/// Parse a token value from the next token in the parser.
|
||||
pub fn next_token_value<V: TokenValue>(&mut self) -> ParseResult<V> {
|
||||
pub(crate) fn next_token_value<V: TokenValue>(&mut self) -> ParseResult<V> {
|
||||
V::from_token(self)
|
||||
}
|
||||
|
||||
pub(crate) fn parse_flexible_ident(&mut self) -> ParseResult<Ident> {
|
||||
let token = self.next();
|
||||
match token.kind {
|
||||
TokenKind::Digits => {
|
||||
let peek = self.peek_whitespace();
|
||||
let span = match peek.kind {
|
||||
x if Self::kind_is_keyword_like(x) => {
|
||||
self.pop_peek();
|
||||
token.span.covers(peek.span)
|
||||
}
|
||||
TokenKind::Identifier => {
|
||||
self.pop_peek();
|
||||
token.span.covers(peek.span)
|
||||
}
|
||||
_ => token.span,
|
||||
};
|
||||
Ok(Ident(self.lexer.span_str(span).to_owned()))
|
||||
}
|
||||
TokenKind::Identifier => {
|
||||
let str = self.lexer.string.take().unwrap();
|
||||
Ok(Ident(str))
|
||||
}
|
||||
x if Self::kind_is_keyword_like(x) => {
|
||||
Ok(Ident(self.lexer.span_str(token.span).to_owned()))
|
||||
}
|
||||
_ => {
|
||||
unexpected!(self, token, "an identifier");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -159,8 +206,8 @@ mod test {
|
|||
|
||||
assert_eq!(
|
||||
r,
|
||||
sql::Query(sql::Statements(vec![sql::Statement::Value(sql::Value::Idiom(
|
||||
sql::Idiom(vec![sql::Part::Field(sql::Ident(ident.to_string()))])
|
||||
sql::Query(sql::Statements(vec![sql::Statement::Value(sql::Value::Table(
|
||||
sql::Table(ident.to_string())
|
||||
))]))
|
||||
)
|
||||
}
|
||||
|
|
|
@ -1,71 +1,35 @@
|
|||
use std::{mem, num::ParseIntError, str::FromStr};
|
||||
|
||||
use rust_decimal::Decimal;
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
num::{ParseFloatError, ParseIntError},
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
sql::Number,
|
||||
syn::{
|
||||
error::error,
|
||||
parser::{mac::unexpected, ParseResult, Parser},
|
||||
token::{t, NumberKind, TokenKind},
|
||||
error::{bail, error},
|
||||
lexer::compound::{self, NumberKind},
|
||||
parser::{mac::unexpected, GluedValue, ParseResult, Parser},
|
||||
token::{self, t, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
use super::TokenValue;
|
||||
|
||||
fn prepare_number_str(str: &str) -> Cow<str> {
|
||||
if str.contains('_') {
|
||||
Cow::Owned(str.chars().filter(|x| *x != '_').collect())
|
||||
} else {
|
||||
Cow::Borrowed(str)
|
||||
}
|
||||
}
|
||||
|
||||
/// Generic integer parsing method,
|
||||
/// works for all unsigned integers.
|
||||
fn parse_integer<I>(parser: &mut Parser<'_>) -> ParseResult<I>
|
||||
where
|
||||
I: FromStr<Err = ParseIntError>,
|
||||
{
|
||||
let mut peek = parser.peek();
|
||||
|
||||
if let t!("-") = peek.kind {
|
||||
unexpected!(parser,peek,"an integer", => "only positive integers are allowed here")
|
||||
}
|
||||
|
||||
if let t!("+") = peek.kind {
|
||||
peek = parser.peek_whitespace();
|
||||
}
|
||||
|
||||
match peek.kind {
|
||||
TokenKind::Digits => {
|
||||
let token = parser.peek();
|
||||
match token.kind {
|
||||
t!("+") | TokenKind::Digits => {
|
||||
parser.pop_peek();
|
||||
assert!(!parser.has_peek());
|
||||
|
||||
let p = parser.peek_whitespace();
|
||||
match p.kind {
|
||||
t!(".") => {
|
||||
unexpected!(parser, p, "an integer")
|
||||
}
|
||||
t!("dec") => {
|
||||
unexpected!(parser, p, "an integer", => "decimal numbers not supported here")
|
||||
}
|
||||
x if Parser::tokenkind_continues_ident(x) => {
|
||||
unexpected!(parser, p, "an integer")
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// remove the possible "f" number suffix and any '_' characters
|
||||
let res = prepare_number_str(parser.lexer.span_str(peek.span))
|
||||
.parse()
|
||||
.map_err(|e| error!("Failed to parse integer: {e}", @peek.span))?;
|
||||
Ok(res)
|
||||
Ok(parser.lexer.lex_compound(token, compound::integer)?.value)
|
||||
}
|
||||
_ => unexpected!(parser, peek, "an integer"),
|
||||
t!("-") => {
|
||||
bail!("Unexpected token `-`", @token.span => "Only positive integers allowed here")
|
||||
}
|
||||
_ => unexpected!(parser, token, "an unsigned integer"),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -93,90 +57,68 @@ impl TokenValue for u8 {
|
|||
}
|
||||
}
|
||||
|
||||
/// Generic float parsing method,
|
||||
/// works for both f32 and f64
|
||||
fn parse_float<F>(parser: &mut Parser<'_>) -> ParseResult<F>
|
||||
where
|
||||
F: FromStr<Err = ParseFloatError>,
|
||||
{
|
||||
let peek = parser.peek();
|
||||
// find initial digits
|
||||
match peek.kind {
|
||||
TokenKind::NaN => return Ok("NaN".parse().unwrap()),
|
||||
TokenKind::Digits | t!("+") | t!("-") => {}
|
||||
_ => unexpected!(parser, peek, "a floating point number"),
|
||||
};
|
||||
let float_token = parser.glue_float()?;
|
||||
match float_token.kind {
|
||||
TokenKind::Number(NumberKind::Float) => {
|
||||
parser.pop_peek();
|
||||
}
|
||||
_ => unexpected!(parser, float_token, "a floating point number"),
|
||||
};
|
||||
|
||||
let span = parser.lexer.span_str(float_token.span);
|
||||
|
||||
// remove the possible "f" number suffix and any '_' characters
|
||||
prepare_number_str(span.strip_suffix('f').unwrap_or(span))
|
||||
.parse()
|
||||
.map_err(|e| error!("Failed to parser floating point number: {e}", @float_token.span))
|
||||
}
|
||||
|
||||
impl TokenValue for f32 {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
parse_float(parser)
|
||||
let token = parser.peek();
|
||||
match token.kind {
|
||||
t!("+") | t!("-") | TokenKind::Digits => {
|
||||
parser.pop_peek();
|
||||
Ok(parser.lexer.lex_compound(token, compound::float)?.value)
|
||||
}
|
||||
_ => unexpected!(parser, token, "a floating point number"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for f64 {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
parse_float(parser)
|
||||
let token = parser.peek();
|
||||
match token.kind {
|
||||
t!("+") | t!("-") | TokenKind::Digits => {
|
||||
parser.pop_peek();
|
||||
Ok(parser.lexer.lex_compound(token, compound::float)?.value)
|
||||
}
|
||||
_ => unexpected!(parser, token, "a floating point number"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TokenValue for Number {
|
||||
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
|
||||
let number = parser.glue_number()?;
|
||||
let number_kind = match number.kind {
|
||||
TokenKind::NaN => {
|
||||
let token = parser.peek();
|
||||
match token.kind {
|
||||
TokenKind::Glued(token::Glued::Number) => {
|
||||
parser.pop_peek();
|
||||
return Ok(Number::Float(f64::NAN));
|
||||
}
|
||||
TokenKind::Number(x) => x,
|
||||
_ => unexpected!(parser, number, "a number"),
|
||||
};
|
||||
|
||||
parser.pop_peek();
|
||||
let span = parser.lexer.span_str(number.span);
|
||||
|
||||
match number_kind {
|
||||
NumberKind::Decimal => {
|
||||
let str = prepare_number_str(span.strip_suffix("dec").unwrap_or(span));
|
||||
let decimal = if str.contains('e') {
|
||||
Decimal::from_scientific(str.as_ref())
|
||||
.map_err(|e| error!("Failed to parser decimal: {e}", @number.span))?
|
||||
} else {
|
||||
Decimal::from_str(str.as_ref())
|
||||
.map_err(|e| error!("Failed to parser decimal: {e}", @number.span))?
|
||||
let GluedValue::Number(x) = mem::take(&mut parser.glued_value) else {
|
||||
panic!("Glued token was next but glued value was not of the correct value");
|
||||
};
|
||||
|
||||
Ok(Number::Decimal(decimal))
|
||||
let number_str = parser.lexer.span_str(token.span);
|
||||
match x {
|
||||
NumberKind::Integer => number_str
|
||||
.parse()
|
||||
.map(Number::Int)
|
||||
.map_err(|e| error!("Failed to parse number: {e}", @token.span)),
|
||||
NumberKind::Float => number_str
|
||||
.parse()
|
||||
.map(Number::Float)
|
||||
.map_err(|e| error!("Failed to parse number: {e}", @token.span)),
|
||||
NumberKind::Decimal => {
|
||||
let decimal = if number_str.contains(['e', 'E']) {
|
||||
Decimal::from_scientific(number_str)
|
||||
.map_err(|e| error!("Failed to parser decimal: {e}", @token.span))?
|
||||
} else {
|
||||
Decimal::from_str(number_str)
|
||||
.map_err(|e| error!("Failed to parser decimal: {e}", @token.span))?
|
||||
};
|
||||
Ok(Number::Decimal(decimal))
|
||||
}
|
||||
}
|
||||
}
|
||||
NumberKind::Float => {
|
||||
let float =
|
||||
prepare_number_str(span.strip_suffix('f').unwrap_or(span)).parse().map_err(
|
||||
|e| error!("Failed to parser floating point number: {e}", @number.span),
|
||||
)?;
|
||||
|
||||
Ok(Number::Float(float))
|
||||
}
|
||||
NumberKind::Integer => {
|
||||
let integer = prepare_number_str(span.strip_suffix('f').unwrap_or(span))
|
||||
.parse()
|
||||
.map_err(|e| error!("Failed to parse integer: {e}", @number.span))?;
|
||||
|
||||
Ok(Number::Int(integer))
|
||||
t!("+") | t!("-") | TokenKind::Digits => {
|
||||
parser.pop_peek();
|
||||
Ok((parser.lexer.lex_compound(token, compound::number))?.value)
|
||||
}
|
||||
_ => unexpected!(parser, token, "a number"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,191 +0,0 @@
|
|||
use crate::{
|
||||
sql::Uuid,
|
||||
syn::{
|
||||
error::bail,
|
||||
parser::{
|
||||
mac::{expected_whitespace, unexpected},
|
||||
ParseResult, Parser,
|
||||
},
|
||||
token::{t, DurationSuffix, NumberSuffix, TokenKind, VectorTypeKind},
|
||||
},
|
||||
};
|
||||
|
||||
impl Parser<'_> {
|
||||
/// Parses a uuid strand.
|
||||
pub fn parse_uuid(&mut self) -> ParseResult<Uuid> {
|
||||
let quote_token = self.peek();
|
||||
|
||||
let double = match quote_token.kind {
|
||||
t!("u\"") => true,
|
||||
t!("u'") => false,
|
||||
_ => unexpected!(self, quote_token, "a uuid"),
|
||||
};
|
||||
|
||||
self.pop_peek();
|
||||
|
||||
// number of bytes is 4-2-2-2-6
|
||||
|
||||
let mut uuid_buffer = [0u8; 16];
|
||||
|
||||
self.eat_uuid_hex(&mut uuid_buffer[0..4])?;
|
||||
|
||||
expected_whitespace!(self, t!("-"));
|
||||
|
||||
self.eat_uuid_hex(&mut uuid_buffer[4..6])?;
|
||||
|
||||
expected_whitespace!(self, t!("-"));
|
||||
|
||||
self.eat_uuid_hex(&mut uuid_buffer[6..8])?;
|
||||
|
||||
expected_whitespace!(self, t!("-"));
|
||||
|
||||
self.eat_uuid_hex(&mut uuid_buffer[8..10])?;
|
||||
|
||||
expected_whitespace!(self, t!("-"));
|
||||
|
||||
self.eat_uuid_hex(&mut uuid_buffer[10..16])?;
|
||||
|
||||
if double {
|
||||
expected_whitespace!(self, t!("\""));
|
||||
} else {
|
||||
expected_whitespace!(self, t!("'"));
|
||||
}
|
||||
|
||||
Ok(Uuid(uuid::Uuid::from_bytes(uuid_buffer)))
|
||||
}
|
||||
|
||||
/// Eats a uuid hex section, enough to fill the given buffer with bytes.
|
||||
fn eat_uuid_hex(&mut self, buffer: &mut [u8]) -> ParseResult<()> {
|
||||
// A function to covert a hex digit to its number representation.
|
||||
fn ascii_to_hex(b: u8) -> Option<u8> {
|
||||
if b.is_ascii_digit() {
|
||||
return Some(b - b'0');
|
||||
}
|
||||
|
||||
if (b'a'..=b'f').contains(&b) {
|
||||
return Some(b - (b'a' - 10));
|
||||
}
|
||||
|
||||
if (b'A'..=b'F').contains(&b) {
|
||||
return Some(b - (b'A' - 10));
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
// the amounts of character required is twice the buffer len.
|
||||
// since every character is half a byte.
|
||||
let required_len = buffer.len() * 2;
|
||||
|
||||
// The next token should be digits or an identifier
|
||||
// If it is digits an identifier might be after it.
|
||||
let start_token = self.peek_whitespace();
|
||||
let mut cur = start_token;
|
||||
loop {
|
||||
let next = self.peek_whitespace();
|
||||
match next.kind {
|
||||
TokenKind::Identifier => {
|
||||
cur = self.pop_peek();
|
||||
break;
|
||||
}
|
||||
TokenKind::Exponent
|
||||
| TokenKind::Digits
|
||||
| TokenKind::DurationSuffix(DurationSuffix::Day)
|
||||
| TokenKind::NumberSuffix(NumberSuffix::Float) => {
|
||||
cur = self.pop_peek();
|
||||
}
|
||||
TokenKind::Language(_)
|
||||
| TokenKind::Keyword(_)
|
||||
| TokenKind::VectorType(VectorTypeKind::F64 | VectorTypeKind::F32) => {
|
||||
// there are some keywords and languages keywords which could be part of the
|
||||
// hex section.
|
||||
if !self.lexer.span_bytes(next.span).iter().all(|x| x.is_ascii_hexdigit()) {
|
||||
bail!("Invalid UUID section, invalid hex character in section", @next.span)
|
||||
}
|
||||
cur = self.pop_peek();
|
||||
break;
|
||||
}
|
||||
t!("-") | t!("\"") | t!("'") => break,
|
||||
_ => {
|
||||
bail!("Invalid UUID section, invalid hex character in section", @next.span)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Get the span that covered all eaten tokens.
|
||||
let digits_span = start_token.span.covers(cur.span);
|
||||
let digits_bytes = self.lexer.span_str(digits_span).as_bytes();
|
||||
|
||||
// for error handling, the incorrect hex character should be returned first, before
|
||||
// returning the not correct length for segment error even if both are valid.
|
||||
if !digits_bytes.iter().all(|x| x.is_ascii_hexdigit()) {
|
||||
bail!("Unexpected characters in UUID token, expected UUID hex digits", @digits_span);
|
||||
}
|
||||
|
||||
if digits_bytes.len() != required_len {
|
||||
bail!("Unexpected characters in UUID token, invalid length of hex digits are",
|
||||
@digits_span => "this has `{}` character where `{}` are required", digits_bytes.len(), required_len);
|
||||
}
|
||||
|
||||
// write into the buffer
|
||||
for (i, b) in buffer.iter_mut().enumerate() {
|
||||
*b = ascii_to_hex(digits_bytes[i * 2]).unwrap() << 4
|
||||
| ascii_to_hex(digits_bytes[i * 2 + 1]).unwrap();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::syn::parser::Parser;
|
||||
|
||||
#[test]
|
||||
fn uuid_parsing() {
|
||||
fn assert_uuid_parses(s: &str) {
|
||||
let uuid_str = format!("u'{s}'");
|
||||
let mut parser = Parser::new(uuid_str.as_bytes());
|
||||
let uuid = parser.parse_uuid().unwrap();
|
||||
assert_eq!(uuid::Uuid::parse_str(s).unwrap(), *uuid);
|
||||
}
|
||||
|
||||
assert_uuid_parses("0531956f-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("0531956d-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("0531956e-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("0531956a-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("053195f1-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("053195d1-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("053195e1-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("053195a1-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("f0531951-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("d0531951-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("e0531951-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("a0531951-20ec-4575-bb68-3e6b49d813fa");
|
||||
assert_uuid_parses("b98839b9-0471-4dbb-aae0-14780e848f32");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_uuid_characters() {
|
||||
let hex_characters =
|
||||
[b'0', b'a', b'b', b'c', b'd', b'e', b'f', b'A', b'B', b'C', b'D', b'E', b'F'];
|
||||
|
||||
let mut uuid_string: Vec<u8> = "u'0531956f-20ec-4575-bb68-3e6b49d813fa'".to_string().into();
|
||||
|
||||
fn assert_uuid_parses(s: &[u8]) {
|
||||
let mut parser = Parser::new(s);
|
||||
parser.parse_uuid().unwrap();
|
||||
}
|
||||
|
||||
for i in hex_characters.iter() {
|
||||
for j in hex_characters.iter() {
|
||||
for k in hex_characters.iter() {
|
||||
uuid_string[3] = *i;
|
||||
uuid_string[4] = *j;
|
||||
uuid_string[5] = *k;
|
||||
|
||||
assert_uuid_parses(&uuid_string)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -3,10 +3,7 @@ use crate::{
|
|||
sql::{Constant, Function, Value},
|
||||
syn::{
|
||||
error::MessageKind,
|
||||
parser::{
|
||||
mac::{expected, unexpected},
|
||||
SyntaxError,
|
||||
},
|
||||
parser::{mac::expected, unexpected, SyntaxError},
|
||||
token::{t, Span},
|
||||
},
|
||||
};
|
||||
|
@ -464,12 +461,12 @@ pub(crate) static PATHS: phf::Map<UniCase<&'static str>, PathKind> = phf_map! {
|
|||
|
||||
impl Parser<'_> {
|
||||
/// Parse a builtin path.
|
||||
pub async fn parse_builtin(&mut self, stk: &mut Stk, start: Span) -> ParseResult<Value> {
|
||||
pub(super) async fn parse_builtin(&mut self, stk: &mut Stk, start: Span) -> ParseResult<Value> {
|
||||
let mut last_span = start;
|
||||
while self.eat(t!("::")) {
|
||||
let t = self.glue_ident(false)?;
|
||||
if !Self::tokenkind_can_start_ident(t.kind) {
|
||||
unexpected!(self, t, "an identifier")
|
||||
let peek = self.peek();
|
||||
if !Self::kind_is_identifier(peek.kind) {
|
||||
unexpected!(self, peek, "an identifier")
|
||||
}
|
||||
self.pop_peek();
|
||||
last_span = self.last_span();
|
||||
|
@ -518,7 +515,7 @@ impl Parser<'_> {
|
|||
}
|
||||
|
||||
/// Parse a call to a builtin function.
|
||||
pub async fn parse_builtin_function(
|
||||
pub(super) async fn parse_builtin_function(
|
||||
&mut self,
|
||||
stk: &mut Stk,
|
||||
name: String,
|
||||
|
@ -530,7 +527,7 @@ impl Parser<'_> {
|
|||
break;
|
||||
}
|
||||
|
||||
let arg = stk.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let arg = stk.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
args.push(arg);
|
||||
|
||||
if !self.eat(t!(",")) {
|
||||
|
|
|
@ -1,26 +1,49 @@
|
|||
//! This module defines the pratt parser for operators.
|
||||
|
||||
use std::ops::Bound;
|
||||
|
||||
use reblessive::Stk;
|
||||
|
||||
use super::mac::unexpected;
|
||||
use super::mac::{expected_whitespace, unexpected};
|
||||
use crate::sql::Range;
|
||||
use crate::sql::{value::TryNeg, Cast, Expression, Number, Operator, Value};
|
||||
use crate::syn::error::bail;
|
||||
use crate::syn::token::Token;
|
||||
use crate::syn::token::{self, Token};
|
||||
use crate::syn::{
|
||||
parser::{mac::expected, ParseResult, Parser},
|
||||
token::{t, TokenKind},
|
||||
};
|
||||
|
||||
/// An enum which defines how strong a operator binds it's operands.
|
||||
///
|
||||
/// If a binding power is higher the operator is more likely to directly operate on it's
|
||||
/// neighbours.
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
|
||||
pub enum BindingPower {
|
||||
Base,
|
||||
Or,
|
||||
And,
|
||||
Equality,
|
||||
Relation,
|
||||
AddSub,
|
||||
MulDiv,
|
||||
Power,
|
||||
Cast,
|
||||
Range,
|
||||
Nullish,
|
||||
Unary,
|
||||
}
|
||||
|
||||
impl Parser<'_> {
|
||||
/// Parsers a generic value.
|
||||
///
|
||||
/// A generic loose ident like `foo` in for example `foo.bar` can be two different values
|
||||
/// depending on context: a table or a field the current document. This function parses loose
|
||||
/// idents as a table, see [`parse_value_field`] for parsing loose idents as fields
|
||||
pub async fn parse_value(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
pub async fn parse_value_table(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
let old = self.table_as_field;
|
||||
self.table_as_field = false;
|
||||
let res = self.pratt_parse_expr(ctx, 0).await;
|
||||
let res = self.pratt_parse_expr(ctx, BindingPower::Base).await;
|
||||
self.table_as_field = old;
|
||||
res
|
||||
}
|
||||
|
@ -30,16 +53,23 @@ impl Parser<'_> {
|
|||
/// A generic loose ident like `foo` in for example `foo.bar` can be two different values
|
||||
/// depending on context: a table or a field the current document. This function parses loose
|
||||
/// idents as a field, see [`parse_value`] for parsing loose idents as table
|
||||
pub async fn parse_value_field(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
pub(crate) async fn parse_value_field(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
let old = self.table_as_field;
|
||||
self.table_as_field = true;
|
||||
let res = self.pratt_parse_expr(ctx, 0).await;
|
||||
let res = self.pratt_parse_expr(ctx, BindingPower::Base).await;
|
||||
self.table_as_field = old;
|
||||
res
|
||||
}
|
||||
|
||||
/// Parsers a generic value.
|
||||
///
|
||||
/// Inherits how loose identifiers are parsed from it's caller.
|
||||
pub(super) async fn parse_value_inherit(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
self.pratt_parse_expr(ctx, BindingPower::Base).await
|
||||
}
|
||||
|
||||
/// Parse a assigner operator.
|
||||
pub fn parse_assigner(&mut self) -> ParseResult<Operator> {
|
||||
pub(super) fn parse_assigner(&mut self) -> ParseResult<Operator> {
|
||||
let token = self.next();
|
||||
match token.kind {
|
||||
t!("=") => Ok(Operator::Equal),
|
||||
|
@ -56,19 +86,15 @@ impl Parser<'_> {
|
|||
/// more like to operate directly on it's neighbours. Example `*` has a higher binding power
|
||||
/// than `-` resulting in 1 - 2 * 3 being parsed as 1 - (2 * 3).
|
||||
///
|
||||
/// This returns two numbers: the binding power of the left neighbour and the right neighbour.
|
||||
/// If the left number is lower then the right it is left associative: i.e. '1 op 2 op 3' will
|
||||
/// be parsed as '(1 op 2) op 3'. If the right number is lower the operator is right
|
||||
/// associative: i.e. '1 op 2 op 3' will be parsed as '1 op (2 op 3)'. For example: `+=` is
|
||||
/// right associative so `a += b += 3` will be parsed as `a += (b += 3)` while `+` is left
|
||||
/// associative and will be parsed as `(a + b) + c`.
|
||||
fn infix_binding_power(token: TokenKind) -> Option<(u8, u8)> {
|
||||
/// All operators in SurrealQL which are parsed by the functions in this module are left
|
||||
/// associative or have no defined associativity.
|
||||
fn infix_binding_power(&mut self, token: TokenKind) -> Option<BindingPower> {
|
||||
// TODO: Look at ordering of operators.
|
||||
match token {
|
||||
// assigment operators have the lowest binding power.
|
||||
//t!("+=") | t!("-=") | t!("+?=") => Some((2, 1)),
|
||||
t!("||") | t!("OR") => Some((3, 4)),
|
||||
t!("&&") | t!("AND") => Some((5, 6)),
|
||||
t!("||") | t!("OR") => Some(BindingPower::Or),
|
||||
t!("&&") | t!("AND") => Some(BindingPower::And),
|
||||
|
||||
// Equality operators have same binding power.
|
||||
t!("=")
|
||||
|
@ -81,11 +107,26 @@ impl Parser<'_> {
|
|||
| t!("!~")
|
||||
| t!("*~")
|
||||
| t!("?~")
|
||||
| t!("@") => Some((7, 8)),
|
||||
| t!("@") => Some(BindingPower::Equality),
|
||||
|
||||
t!("<")
|
||||
| t!("<=")
|
||||
| t!(">")
|
||||
t!("<") => {
|
||||
let peek = self.peek_whitespace1();
|
||||
if matches!(peek.kind, t!("-") | t!("->") | t!("..")) {
|
||||
return None;
|
||||
}
|
||||
Some(BindingPower::Relation)
|
||||
}
|
||||
|
||||
t!(">") => {
|
||||
if self.peek_whitespace1().kind == t!("..") {
|
||||
return Some(BindingPower::Range);
|
||||
}
|
||||
Some(BindingPower::Relation)
|
||||
}
|
||||
|
||||
t!("..") => Some(BindingPower::Range),
|
||||
|
||||
t!("<=")
|
||||
| t!(">=")
|
||||
| t!("∋")
|
||||
| t!("CONTAINS")
|
||||
|
@ -111,37 +152,49 @@ impl Parser<'_> {
|
|||
| t!("INTERSECTS")
|
||||
| t!("NOT")
|
||||
| t!("IN")
|
||||
| t!("<|") => Some((9, 10)),
|
||||
| t!("<|") => Some(BindingPower::Relation),
|
||||
|
||||
t!("+") | t!("-") => Some((11, 12)),
|
||||
t!("*") | t!("×") | t!("/") | t!("÷") | t!("%") => Some((13, 14)),
|
||||
t!("**") => Some((15, 16)),
|
||||
t!("?:") | t!("??") => Some((17, 18)),
|
||||
t!("+") | t!("-") => Some(BindingPower::AddSub),
|
||||
t!("*") | t!("×") | t!("/") | t!("÷") | t!("%") => Some(BindingPower::MulDiv),
|
||||
t!("**") => Some(BindingPower::Power),
|
||||
t!("?:") | t!("??") => Some(BindingPower::Nullish),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn prefix_binding_power(&mut self, token: TokenKind) -> Option<((), u8)> {
|
||||
fn prefix_binding_power(&mut self, token: TokenKind) -> Option<BindingPower> {
|
||||
match token {
|
||||
t!("!") | t!("+") | t!("-") => Some(((), 19)),
|
||||
t!("!") | t!("+") | t!("-") => Some(BindingPower::Unary),
|
||||
t!("..") => Some(BindingPower::Range),
|
||||
t!("<") => {
|
||||
if self.peek_token_at(1).kind != t!("FUTURE") {
|
||||
Some(((), 20))
|
||||
} else {
|
||||
None
|
||||
let peek = self.peek1();
|
||||
if matches!(peek.kind, t!("-") | t!("->") | t!("FUTURE")) {
|
||||
return None;
|
||||
}
|
||||
Some(BindingPower::Cast)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_prefix_op(&mut self, ctx: &mut Stk, min_bp: u8) -> ParseResult<Value> {
|
||||
async fn parse_prefix_op(&mut self, ctx: &mut Stk, min_bp: BindingPower) -> ParseResult<Value> {
|
||||
let token = self.peek();
|
||||
let operator = match token.kind {
|
||||
t!("+") => {
|
||||
// +123 is a single number token, so parse it as such
|
||||
let p = self.peek_whitespace_token_at(1);
|
||||
let p = self.peek_whitespace1();
|
||||
if matches!(p.kind, TokenKind::Digits) {
|
||||
// This is a bit of an annoying special case.
|
||||
// The problem is that `+` and `-` can be an prefix operator and a the start
|
||||
// of a number token.
|
||||
// To figure out which it is we need to peek the next whitespace token,
|
||||
// This eats the digits that the lexer needs to lex the number. So we we need
|
||||
// to backup before the digits token was consumed, clear the digits token from
|
||||
// the token buffer so it isn't popped after parsing the number and then lex the
|
||||
// number.
|
||||
self.lexer.backup_before(p.span);
|
||||
self.token_buffer.clear();
|
||||
self.token_buffer.push(token);
|
||||
return self.next_token_value::<Number>().map(Value::Number);
|
||||
}
|
||||
self.pop_peek();
|
||||
|
@ -150,8 +203,19 @@ impl Parser<'_> {
|
|||
}
|
||||
t!("-") => {
|
||||
// -123 is a single number token, so parse it as such
|
||||
let p = self.peek_whitespace_token_at(1);
|
||||
let p = self.peek_whitespace1();
|
||||
if matches!(p.kind, TokenKind::Digits) {
|
||||
// This is a bit of an annoying special case.
|
||||
// The problem is that `+` and `-` can be an prefix operator and a the start
|
||||
// of a number token.
|
||||
// To figure out which it is we need to peek the next whitespace token,
|
||||
// This eats the digits that the lexer needs to lex the number. So we we need
|
||||
// to backup before the digits token was consumed, clear the digits token from
|
||||
// the token buffer so it isn't popped after parsing the number and then lex the
|
||||
// number.
|
||||
self.lexer.backup_before(p.span);
|
||||
self.token_buffer.clear();
|
||||
self.token_buffer.push(token);
|
||||
return self.next_token_value::<Number>().map(Value::Number);
|
||||
}
|
||||
|
||||
|
@ -166,10 +230,11 @@ impl Parser<'_> {
|
|||
t!("<") => {
|
||||
self.pop_peek();
|
||||
let kind = self.parse_kind(ctx, token.span).await?;
|
||||
let value = ctx.run(|ctx| self.pratt_parse_expr(ctx, min_bp)).await?;
|
||||
let value = ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Cast)).await?;
|
||||
let cast = Cast(kind, value);
|
||||
return Ok(Value::Cast(Box::new(cast)));
|
||||
}
|
||||
t!("..") => return self.parse_prefix_range(ctx).await,
|
||||
// should be unreachable as we previously check if the token was a prefix op.
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
@ -200,17 +265,16 @@ impl Parser<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn parse_knn(&mut self, token: Token) -> ParseResult<Operator> {
|
||||
pub(super) fn parse_knn(&mut self, token: Token) -> ParseResult<Operator> {
|
||||
let amount = self.next_token_value()?;
|
||||
let op = if self.eat(t!(",")) {
|
||||
let token = self.peek();
|
||||
match token.kind {
|
||||
TokenKind::Distance(ref k) => {
|
||||
self.pop_peek();
|
||||
let d = self.convert_distance(k).map(Some)?;
|
||||
TokenKind::Distance(_) => {
|
||||
let d = self.parse_distance().map(Some)?;
|
||||
Operator::Knn(amount, d)
|
||||
}
|
||||
TokenKind::Digits | TokenKind::Number(_) => {
|
||||
TokenKind::Digits | TokenKind::Glued(token::Glued::Number) => {
|
||||
let ef = self.next_token_value()?;
|
||||
Operator::Ann(amount, ef)
|
||||
}
|
||||
|
@ -226,10 +290,43 @@ impl Parser<'_> {
|
|||
Ok(op)
|
||||
}
|
||||
|
||||
fn expression_is_relation(value: &Value) -> bool {
|
||||
if let Value::Expression(x) = value {
|
||||
return Self::operator_is_relation(x.operator());
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn operator_is_relation(operator: &Operator) -> bool {
|
||||
matches!(
|
||||
operator,
|
||||
Operator::Equal
|
||||
| Operator::NotEqual
|
||||
| Operator::AllEqual
|
||||
| Operator::AnyEqual
|
||||
| Operator::NotLike
|
||||
| Operator::AllLike
|
||||
| Operator::AnyLike
|
||||
| Operator::Like
|
||||
| Operator::Contain
|
||||
| Operator::NotContain
|
||||
| Operator::NotInside
|
||||
| Operator::ContainAll
|
||||
| Operator::ContainNone
|
||||
| Operator::AllInside
|
||||
| Operator::AnyInside
|
||||
| Operator::NoneInside
|
||||
| Operator::Outside
|
||||
| Operator::Intersects
|
||||
| Operator::Inside
|
||||
| Operator::Knn(_, _)
|
||||
)
|
||||
}
|
||||
|
||||
async fn parse_infix_op(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
min_bp: u8,
|
||||
min_bp: BindingPower,
|
||||
lhs: Value,
|
||||
) -> ParseResult<Value> {
|
||||
let token = self.next();
|
||||
|
@ -261,7 +358,6 @@ impl Parser<'_> {
|
|||
t!("<=") => Operator::LessThanOrEqual,
|
||||
t!("<") => Operator::LessThan,
|
||||
t!(">=") => Operator::MoreThanOrEqual,
|
||||
t!(">") => Operator::MoreThan,
|
||||
t!("**") => Operator::Pow,
|
||||
t!("+") => Operator::Add,
|
||||
t!("-") => Operator::Sub,
|
||||
|
@ -294,10 +390,30 @@ impl Parser<'_> {
|
|||
t!("IN") => Operator::Inside,
|
||||
t!("<|") => self.parse_knn(token)?,
|
||||
|
||||
t!(">") => {
|
||||
if self.peek_whitespace().kind == t!("..") {
|
||||
self.pop_peek();
|
||||
return self.parse_infix_range(ctx, true, lhs).await;
|
||||
}
|
||||
Operator::MoreThan
|
||||
}
|
||||
t!("..") => {
|
||||
return self.parse_infix_range(ctx, false, lhs).await;
|
||||
}
|
||||
|
||||
// should be unreachable as we previously check if the token was a prefix op.
|
||||
x => unreachable!("found non-operator token {x:?}"),
|
||||
};
|
||||
let before = self.recent_span();
|
||||
let rhs = ctx.run(|ctx| self.pratt_parse_expr(ctx, min_bp)).await?;
|
||||
|
||||
if Self::operator_is_relation(&operator) && Self::expression_is_relation(&lhs) {
|
||||
let span = before.covers(self.recent_span());
|
||||
// 1 >= 2 >= 3 has no defined associativity and is often a mistake.
|
||||
bail!("Chaining relational operators have no defined associativity.",
|
||||
@span => "Use parens, '()', to specify which operator must be evaluated first")
|
||||
}
|
||||
|
||||
Ok(Value::Expression(Box::new(Expression::Binary {
|
||||
l: lhs,
|
||||
o: operator,
|
||||
|
@ -305,19 +421,113 @@ impl Parser<'_> {
|
|||
})))
|
||||
}
|
||||
|
||||
async fn parse_infix_range(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
exclusive: bool,
|
||||
lhs: Value,
|
||||
) -> ParseResult<Value> {
|
||||
let inclusive = self.eat_whitespace(t!("="));
|
||||
|
||||
let before = self.recent_span();
|
||||
let peek = self.peek_whitespace();
|
||||
let rhs = if inclusive {
|
||||
// ..= must be followed by an expression.
|
||||
if peek.kind == TokenKind::WhiteSpace {
|
||||
bail!("Unexpected whitespace, expected inclusive range to be immediately followed by a expression",
|
||||
@peek.span => "Whitespace between a range and it's operands is dissallowed")
|
||||
}
|
||||
ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Range)).await?
|
||||
} else if Self::kind_starts_expression(peek.kind) {
|
||||
ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Range)).await?
|
||||
} else {
|
||||
return Ok(Value::Range(Box::new(Range {
|
||||
beg: if exclusive {
|
||||
Bound::Excluded(lhs)
|
||||
} else {
|
||||
Bound::Included(lhs)
|
||||
},
|
||||
end: Bound::Unbounded,
|
||||
})));
|
||||
};
|
||||
|
||||
if matches!(lhs, Value::Range(_)) {
|
||||
let span = before.covers(self.recent_span());
|
||||
// a..b..c is ambiguous, so throw an error
|
||||
bail!("Chaining range operators has no specified associativity",
|
||||
@span => "use parens, '()', to specify which operator must be evaluated first")
|
||||
}
|
||||
|
||||
Ok(Value::Range(Box::new(Range {
|
||||
beg: if exclusive {
|
||||
Bound::Excluded(lhs)
|
||||
} else {
|
||||
Bound::Included(lhs)
|
||||
},
|
||||
end: if inclusive {
|
||||
Bound::Included(rhs)
|
||||
} else {
|
||||
Bound::Excluded(rhs)
|
||||
},
|
||||
})))
|
||||
}
|
||||
|
||||
async fn parse_prefix_range(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
expected_whitespace!(self, t!(".."));
|
||||
let inclusive = self.eat_whitespace(t!("="));
|
||||
let before = self.recent_span();
|
||||
let peek = self.peek_whitespace();
|
||||
let rhs = if inclusive {
|
||||
// ..= must be followed by an expression.
|
||||
if peek.kind == TokenKind::WhiteSpace {
|
||||
bail!("Unexpected whitespace, expected inclusive range to be immediately followed by a expression",
|
||||
@peek.span => "Whitespace between a range and it's operands is dissallowed")
|
||||
}
|
||||
ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Range)).await?
|
||||
} else if Self::kind_starts_expression(peek.kind) {
|
||||
ctx.run(|ctx| self.pratt_parse_expr(ctx, BindingPower::Range)).await?
|
||||
} else {
|
||||
return Ok(Value::Range(Box::new(Range {
|
||||
beg: Bound::Unbounded,
|
||||
end: Bound::Unbounded,
|
||||
})));
|
||||
};
|
||||
|
||||
if matches!(rhs, Value::Range(_)) {
|
||||
let span = before.covers(self.recent_span());
|
||||
// a..b..c is ambiguous, so throw an error
|
||||
bail!("Chaining range operators has no specified associativity",
|
||||
@span => "use parens, '()', to specify which operator must be evaluated first")
|
||||
}
|
||||
|
||||
let range = Range {
|
||||
beg: Bound::Unbounded,
|
||||
end: if inclusive {
|
||||
Bound::Included(rhs)
|
||||
} else {
|
||||
Bound::Excluded(rhs)
|
||||
},
|
||||
};
|
||||
Ok(Value::Range(Box::new(range)))
|
||||
}
|
||||
|
||||
/// The pratt parsing loop.
|
||||
/// Parses expression according to binding power.
|
||||
async fn pratt_parse_expr(&mut self, ctx: &mut Stk, min_bp: u8) -> ParseResult<Value> {
|
||||
async fn pratt_parse_expr(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
min_bp: BindingPower,
|
||||
) -> ParseResult<Value> {
|
||||
let peek = self.peek();
|
||||
let mut lhs = if let Some(((), r_bp)) = self.prefix_binding_power(peek.kind) {
|
||||
self.parse_prefix_op(ctx, r_bp).await?
|
||||
let mut lhs = if let Some(bp) = self.prefix_binding_power(peek.kind) {
|
||||
self.parse_prefix_op(ctx, bp).await?
|
||||
} else {
|
||||
self.parse_idiom_expression(ctx).await?
|
||||
};
|
||||
|
||||
loop {
|
||||
let token = self.peek();
|
||||
let Some((l_bp, r_bp)) = Self::infix_binding_power(token.kind) else {
|
||||
let Some(bp) = self.infix_binding_power(token.kind) else {
|
||||
// explain that assignment operators can't be used in normal expressions.
|
||||
if let t!("+=") | t!("*=") | t!("-=") | t!("+?=") = token.kind {
|
||||
unexpected!(self,token,"an operator",
|
||||
|
@ -326,11 +536,11 @@ impl Parser<'_> {
|
|||
break;
|
||||
};
|
||||
|
||||
if l_bp < min_bp {
|
||||
if bp <= min_bp {
|
||||
break;
|
||||
}
|
||||
|
||||
lhs = self.parse_infix_op(ctx, r_bp, lhs).await?;
|
||||
lhs = self.parse_infix_op(ctx, bp, lhs).await?;
|
||||
}
|
||||
|
||||
Ok(lhs)
|
||||
|
@ -422,6 +632,23 @@ mod test {
|
|||
assert_eq!(sql, format!("{}", out));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expression_left_associative() {
|
||||
let sql = "1 - 1 - 1";
|
||||
let out = Value::parse(sql);
|
||||
let one = Value::Number(Number::Int(1));
|
||||
let expected = Value::Expression(Box::new(Expression::Binary {
|
||||
l: Value::Expression(Box::new(Expression::Binary {
|
||||
l: one.clone(),
|
||||
o: Operator::Sub,
|
||||
r: one.clone(),
|
||||
})),
|
||||
o: Operator::Sub,
|
||||
r: one,
|
||||
}));
|
||||
assert_eq!(expected, out);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_expression() {
|
||||
let sql = "<future> { 5 + 10 }";
|
||||
|
|
|
@ -15,7 +15,7 @@ impl Parser<'_> {
|
|||
/// Parse a custom function function call
|
||||
///
|
||||
/// Expects `fn` to already be called.
|
||||
pub async fn parse_custom_function(&mut self, ctx: &mut Stk) -> ParseResult<Function> {
|
||||
pub(super) async fn parse_custom_function(&mut self, ctx: &mut Stk) -> ParseResult<Function> {
|
||||
expected!(self, t!("::"));
|
||||
let mut name = self.next_token_value::<Ident>()?.0;
|
||||
while self.eat(t!("::")) {
|
||||
|
@ -27,7 +27,7 @@ impl Parser<'_> {
|
|||
Ok(Function::Custom(name, args))
|
||||
}
|
||||
|
||||
pub async fn parse_function_args(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Value>> {
|
||||
pub(super) async fn parse_function_args(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Value>> {
|
||||
let start = self.last_span();
|
||||
let mut args = Vec::new();
|
||||
loop {
|
||||
|
@ -35,7 +35,7 @@ impl Parser<'_> {
|
|||
break;
|
||||
}
|
||||
|
||||
let arg = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let arg = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
args.push(arg);
|
||||
|
||||
if !self.eat(t!(",")) {
|
||||
|
@ -49,7 +49,7 @@ impl Parser<'_> {
|
|||
/// Parse a model invocation
|
||||
///
|
||||
/// Expects `ml` to already be called.
|
||||
pub async fn parse_model(&mut self, ctx: &mut Stk) -> ParseResult<Model> {
|
||||
pub(super) async fn parse_model(&mut self, ctx: &mut Stk) -> ParseResult<Model> {
|
||||
expected!(self, t!("::"));
|
||||
let mut name = self.next_token_value::<Ident>()?.0;
|
||||
while self.eat(t!("::")) {
|
||||
|
@ -101,7 +101,7 @@ impl Parser<'_> {
|
|||
break;
|
||||
}
|
||||
|
||||
let arg = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let arg = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
args.push(arg);
|
||||
|
||||
if !self.eat(t!(",")) {
|
||||
|
|
71
core/src/syn/parser/glue.rs
Normal file
71
core/src/syn/parser/glue.rs
Normal file
|
@ -0,0 +1,71 @@
|
|||
//! Implements token gluing logic.
|
||||
|
||||
use crate::{
|
||||
sql::{Datetime, Duration, Strand, Uuid},
|
||||
syn::{
|
||||
lexer::compound,
|
||||
token::{t, Glued, Token, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
use super::{GluedValue, ParseResult, Parser};
|
||||
|
||||
impl Parser<'_> {
|
||||
/// Glues the next token and returns the token after.
|
||||
pub(super) fn glue_and_peek1(&mut self) -> ParseResult<Token> {
|
||||
let token = self.peek();
|
||||
match token.kind {
|
||||
TokenKind::Glued(_) => return Ok(self.peek1()),
|
||||
t!("+") | t!("-") | TokenKind::Digits => {
|
||||
self.pop_peek();
|
||||
let value = self.lexer.lex_compound(token, compound::numeric_kind)?;
|
||||
match value.value {
|
||||
compound::NumericKind::Number(x) => {
|
||||
self.glued_value = GluedValue::Number(x);
|
||||
self.prepend_token(Token {
|
||||
span: value.span,
|
||||
kind: TokenKind::Glued(Glued::Number),
|
||||
});
|
||||
}
|
||||
compound::NumericKind::Duration(x) => {
|
||||
self.glued_value = GluedValue::Duration(Duration(x));
|
||||
self.prepend_token(Token {
|
||||
span: value.span,
|
||||
kind: TokenKind::Glued(Glued::Duration),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
t!("\"") | t!("'") => {
|
||||
self.pop_peek();
|
||||
let value = self.lexer.lex_compound(token, compound::strand)?;
|
||||
self.glued_value = GluedValue::Strand(Strand(value.value));
|
||||
self.prepend_token(Token {
|
||||
span: value.span,
|
||||
kind: TokenKind::Glued(Glued::Strand),
|
||||
});
|
||||
return Ok(self.peek1());
|
||||
}
|
||||
t!("d\"") | t!("d'") => {
|
||||
self.pop_peek();
|
||||
let value = self.lexer.lex_compound(token, compound::datetime)?;
|
||||
self.glued_value = GluedValue::Datetime(Datetime(value.value));
|
||||
self.prepend_token(Token {
|
||||
span: value.span,
|
||||
kind: TokenKind::Glued(Glued::Datetime),
|
||||
});
|
||||
}
|
||||
t!("u\"") | t!("u'") => {
|
||||
self.pop_peek();
|
||||
let value = self.lexer.lex_compound(token, compound::uuid)?;
|
||||
self.glued_value = GluedValue::Uuid(Uuid(value.value));
|
||||
self.prepend_token(Token {
|
||||
span: value.span,
|
||||
kind: TokenKind::Glued(Glued::Uuid),
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Ok(self.peek1())
|
||||
}
|
||||
}
|
|
@ -7,18 +7,26 @@ use crate::{
|
|||
},
|
||||
syn::{
|
||||
error::bail,
|
||||
token::{t, Span, TokenKind},
|
||||
token::{t, Glued, Span, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
use super::{mac::unexpected, ParseResult, Parser};
|
||||
|
||||
impl Parser<'_> {
|
||||
pub(super) fn peek_continues_idiom(&mut self) -> bool {
|
||||
let peek = self.peek().kind;
|
||||
if matches!(peek, t!("->") | t!("[") | t!(".") | t!("...")) {
|
||||
return true;
|
||||
}
|
||||
peek == t!("<") && self.peek1().kind == t!("-")
|
||||
}
|
||||
|
||||
/// Parse fields of a selecting query: `foo, bar` in `SELECT foo, bar FROM baz`.
|
||||
///
|
||||
/// # Parser State
|
||||
/// Expects the next tokens to be of a field set.
|
||||
pub async fn parse_fields(&mut self, ctx: &mut Stk) -> ParseResult<Fields> {
|
||||
pub(super) async fn parse_fields(&mut self, ctx: &mut Stk) -> ParseResult<Fields> {
|
||||
if self.eat(t!("VALUE")) {
|
||||
let expr = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let alias = if self.eat(t!("AS")) {
|
||||
|
@ -60,7 +68,7 @@ impl Parser<'_> {
|
|||
}
|
||||
|
||||
/// Parses a list of idioms separated by a `,`
|
||||
pub async fn parse_idiom_list(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Idiom>> {
|
||||
pub(super) async fn parse_idiom_list(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Idiom>> {
|
||||
let mut res = vec![self.parse_plain_idiom(ctx).await?];
|
||||
while self.eat(t!(",")) {
|
||||
res.push(self.parse_plain_idiom(ctx).await?);
|
||||
|
@ -72,7 +80,7 @@ impl Parser<'_> {
|
|||
///
|
||||
/// This function differes from [`Parser::parse_remaining_value_idiom`] in how it handles graph
|
||||
/// parsing. Graphs inside a plain idioms will remain a normal graph production.
|
||||
pub(crate) async fn parse_remaining_idiom(
|
||||
pub(super) async fn parse_remaining_idiom(
|
||||
&mut self,
|
||||
stk: &mut Stk,
|
||||
start: Vec<Part>,
|
||||
|
@ -98,15 +106,21 @@ impl Parser<'_> {
|
|||
let graph = stk.run(|stk| self.parse_graph(stk, Dir::Out)).await?;
|
||||
res.push(Part::Graph(graph))
|
||||
}
|
||||
t!("<->") => {
|
||||
self.pop_peek();
|
||||
let graph = stk.run(|stk| self.parse_graph(stk, Dir::Both)).await?;
|
||||
res.push(Part::Graph(graph))
|
||||
}
|
||||
t!("<-") => {
|
||||
self.pop_peek();
|
||||
let graph = stk.run(|stk| self.parse_graph(stk, Dir::In)).await?;
|
||||
res.push(Part::Graph(graph))
|
||||
t!("<") => {
|
||||
let peek = self.peek_whitespace1();
|
||||
if peek.kind == t!("-") {
|
||||
self.pop_peek();
|
||||
self.pop_peek();
|
||||
let graph = stk.run(|stk| self.parse_graph(stk, Dir::In)).await?;
|
||||
res.push(Part::Graph(graph))
|
||||
} else if peek.kind == t!("->") {
|
||||
self.pop_peek();
|
||||
self.pop_peek();
|
||||
let graph = stk.run(|stk| self.parse_graph(stk, Dir::Both)).await?;
|
||||
res.push(Part::Graph(graph))
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
t!("..") => {
|
||||
bail!("Unexpected token `{}` expected and idiom",t!(".."),
|
||||
|
@ -124,7 +138,7 @@ impl Parser<'_> {
|
|||
/// This function differes from [`Parser::parse_remaining_value_idiom`] in how it handles graph
|
||||
/// parsing. When parsing a idiom like production which can be a value, the initial start value
|
||||
/// might need to be changed to a Edge depending on what is parsed next.
|
||||
pub(crate) async fn parse_remaining_value_idiom(
|
||||
pub(super) async fn parse_remaining_value_idiom(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
start: Vec<Part>,
|
||||
|
@ -155,16 +169,22 @@ impl Parser<'_> {
|
|||
return Ok(x);
|
||||
}
|
||||
}
|
||||
t!("<->") => {
|
||||
self.pop_peek();
|
||||
if let Some(x) = self.parse_graph_idiom(ctx, &mut res, Dir::Both).await? {
|
||||
return Ok(x);
|
||||
}
|
||||
}
|
||||
t!("<-") => {
|
||||
self.pop_peek();
|
||||
if let Some(x) = self.parse_graph_idiom(ctx, &mut res, Dir::In).await? {
|
||||
return Ok(x);
|
||||
t!("<") => {
|
||||
let peek = self.peek_whitespace1();
|
||||
if peek.kind == t!("-") {
|
||||
self.pop_peek();
|
||||
self.pop_peek();
|
||||
|
||||
if let Some(x) = self.parse_graph_idiom(ctx, &mut res, Dir::In).await? {
|
||||
return Ok(x);
|
||||
}
|
||||
} else if peek.kind == t!("->") {
|
||||
self.pop_peek();
|
||||
self.pop_peek();
|
||||
|
||||
if let Some(x) = self.parse_graph_idiom(ctx, &mut res, Dir::Both).await? {
|
||||
return Ok(x);
|
||||
}
|
||||
}
|
||||
}
|
||||
t!("..") => {
|
||||
|
@ -198,7 +218,7 @@ impl Parser<'_> {
|
|||
};
|
||||
let value = Value::Edges(Box::new(edge));
|
||||
|
||||
if !Self::continues_idiom(self.peek_kind()) {
|
||||
if !self.peek_continues_idiom() {
|
||||
return Ok(Some(value));
|
||||
}
|
||||
res[0] = Part::Start(value);
|
||||
|
@ -213,11 +233,6 @@ impl Parser<'_> {
|
|||
Ok(None)
|
||||
}
|
||||
|
||||
/// Returns if the token kind could continua an idiom
|
||||
pub fn continues_idiom(kind: TokenKind) -> bool {
|
||||
matches!(kind, t!("->") | t!("<->") | t!("<-") | t!("[") | t!(".") | t!("..."))
|
||||
}
|
||||
|
||||
/// Parse a idiom which can only start with a graph or an identifier.
|
||||
/// Other expressions are not allowed as start of this idiom
|
||||
pub async fn parse_plain_idiom(&mut self, ctx: &mut Stk) -> ParseResult<Idiom> {
|
||||
|
@ -227,14 +242,15 @@ impl Parser<'_> {
|
|||
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::Out)).await?;
|
||||
Part::Graph(graph)
|
||||
}
|
||||
t!("<->") => {
|
||||
self.pop_peek();
|
||||
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::Both)).await?;
|
||||
Part::Graph(graph)
|
||||
}
|
||||
t!("<-") => {
|
||||
self.pop_peek();
|
||||
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::In)).await?;
|
||||
t!("<") => {
|
||||
let t = self.pop_peek();
|
||||
let graph = if self.eat_whitespace(t!("-")) {
|
||||
ctx.run(|ctx| self.parse_graph(ctx, Dir::In)).await?
|
||||
} else if self.eat_whitespace(t!("->")) {
|
||||
ctx.run(|ctx| self.parse_graph(ctx, Dir::Both)).await?
|
||||
} else {
|
||||
unexpected!(self, t, "either `<-` `<->` or `->`")
|
||||
};
|
||||
Part::Graph(graph)
|
||||
}
|
||||
_ => Part::Field(self.next_token_value()?),
|
||||
|
@ -244,7 +260,7 @@ impl Parser<'_> {
|
|||
}
|
||||
|
||||
/// Parse the part after the `.` in a idiom
|
||||
pub async fn parse_dot_part(&mut self, ctx: &mut Stk) -> ParseResult<Part> {
|
||||
pub(super) async fn parse_dot_part(&mut self, ctx: &mut Stk) -> ParseResult<Part> {
|
||||
let res = match self.peek_kind() {
|
||||
t!("*") => {
|
||||
self.pop_peek();
|
||||
|
@ -265,12 +281,16 @@ impl Parser<'_> {
|
|||
};
|
||||
Ok(res)
|
||||
}
|
||||
pub async fn parse_function_part(&mut self, ctx: &mut Stk, name: Ident) -> ParseResult<Part> {
|
||||
pub(super) async fn parse_function_part(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
name: Ident,
|
||||
) -> ParseResult<Part> {
|
||||
let args = self.parse_function_args(ctx).await?;
|
||||
Ok(Part::Method(name.0, args))
|
||||
}
|
||||
/// Parse the part after the `.{` in an idiom
|
||||
pub async fn parse_destructure_part(&mut self, ctx: &mut Stk) -> ParseResult<Part> {
|
||||
pub(super) async fn parse_destructure_part(&mut self, ctx: &mut Stk) -> ParseResult<Part> {
|
||||
let start = self.last_span();
|
||||
let mut destructured: Vec<DestructurePart> = Vec::new();
|
||||
loop {
|
||||
|
@ -311,7 +331,11 @@ impl Parser<'_> {
|
|||
Ok(Part::Destructure(destructured))
|
||||
}
|
||||
/// Parse the part after the `[` in a idiom
|
||||
pub async fn parse_bracket_part(&mut self, ctx: &mut Stk, start: Span) -> ParseResult<Part> {
|
||||
pub(super) async fn parse_bracket_part(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
start: Span,
|
||||
) -> ParseResult<Part> {
|
||||
let peek = self.peek();
|
||||
let res = match peek.kind {
|
||||
t!("*") => {
|
||||
|
@ -322,11 +346,11 @@ impl Parser<'_> {
|
|||
self.pop_peek();
|
||||
Part::Last
|
||||
}
|
||||
t!("+") | TokenKind::Digits | TokenKind::Number(_) => {
|
||||
t!("+") | TokenKind::Digits | TokenKind::Glued(Glued::Number) => {
|
||||
Part::Index(self.next_token_value()?)
|
||||
}
|
||||
t!("-") => {
|
||||
if let TokenKind::Digits = self.peek_whitespace_token_at(1).kind {
|
||||
if let TokenKind::Digits = self.peek_whitespace1().kind {
|
||||
unexpected!(self, peek,"$, * or a number", => "An index can't be negative.");
|
||||
}
|
||||
unexpected!(self, peek, "$, * or a number");
|
||||
|
@ -347,20 +371,11 @@ impl Parser<'_> {
|
|||
Ok(res)
|
||||
}
|
||||
|
||||
/// Parse a list of basic idioms seperated by a ','
|
||||
pub async fn parse_basic_idiom_list(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Idiom>> {
|
||||
let mut res = vec![self.parse_basic_idiom(ctx).await?];
|
||||
while self.eat(t!(",")) {
|
||||
res.push(self.parse_basic_idiom(ctx).await?);
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
/// Parse a basic idiom.
|
||||
///
|
||||
/// Basic idioms differ from normal idioms in that they are more restrictive.
|
||||
/// Flatten, graphs, conditions and indexing by param is not allowed.
|
||||
pub async fn parse_basic_idiom(&mut self, ctx: &mut Stk) -> ParseResult<Idiom> {
|
||||
pub(super) async fn parse_basic_idiom(&mut self, ctx: &mut Stk) -> ParseResult<Idiom> {
|
||||
let start = self.next_token_value::<Ident>()?;
|
||||
let mut parts = vec![Part::Field(start)];
|
||||
loop {
|
||||
|
@ -382,12 +397,12 @@ impl Parser<'_> {
|
|||
self.pop_peek();
|
||||
Part::Last
|
||||
}
|
||||
TokenKind::Digits | t!("+") | TokenKind::Number(_) => {
|
||||
TokenKind::Digits | t!("+") | TokenKind::Glued(Glued::Number) => {
|
||||
let number = self.next_token_value()?;
|
||||
Part::Index(number)
|
||||
}
|
||||
t!("-") => {
|
||||
let peek_digit = self.peek_whitespace_token_at(1);
|
||||
let peek_digit = self.peek_whitespace1();
|
||||
if let TokenKind::Digits = peek_digit.kind {
|
||||
let span = self.recent_span().covers(peek_digit.span);
|
||||
bail!("Unexpected token `-` expected $, *, or a number", @span => "an index can't be negative");
|
||||
|
@ -411,7 +426,7 @@ impl Parser<'_> {
|
|||
/// Basic idioms differ from local idioms in that they are more restrictive.
|
||||
/// Only field, all and number indexing is allowed. Flatten is also allowed but only at the
|
||||
/// end.
|
||||
pub async fn parse_local_idiom(&mut self, ctx: &mut Stk) -> ParseResult<Idiom> {
|
||||
pub(super) async fn parse_local_idiom(&mut self, ctx: &mut Stk) -> ParseResult<Idiom> {
|
||||
let start = self.next_token_value()?;
|
||||
let mut parts = vec![Part::Field(start)];
|
||||
loop {
|
||||
|
@ -429,12 +444,12 @@ impl Parser<'_> {
|
|||
self.pop_peek();
|
||||
Part::All
|
||||
}
|
||||
TokenKind::Digits | t!("+") | TokenKind::Number(_) => {
|
||||
TokenKind::Digits | t!("+") | TokenKind::Glued(Glued::Number) => {
|
||||
let number = self.next_token_value()?;
|
||||
Part::Index(number)
|
||||
}
|
||||
t!("-") => {
|
||||
let peek_digit = self.peek_whitespace_token_at(1);
|
||||
let peek_digit = self.peek_whitespace1();
|
||||
if let TokenKind::Digits = peek_digit.kind {
|
||||
let span = self.recent_span().covers(peek_digit.span);
|
||||
bail!("Unexpected token `-` expected $, *, or a number", @span => "an index can't be negative");
|
||||
|
@ -468,7 +483,7 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser state
|
||||
/// Expects to be at the start of a what list.
|
||||
pub async fn parse_what_list(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Value>> {
|
||||
pub(super) async fn parse_what_list(&mut self, ctx: &mut Stk) -> ParseResult<Vec<Value>> {
|
||||
let mut res = vec![self.parse_what_value(ctx).await?];
|
||||
while self.eat(t!(",")) {
|
||||
res.push(self.parse_what_value(ctx).await?)
|
||||
|
@ -480,9 +495,9 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser state
|
||||
/// Expects to be at the start of a what value
|
||||
pub async fn parse_what_value(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
pub(super) async fn parse_what_value(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
let start = self.parse_what_primary(ctx).await?;
|
||||
if start.can_start_idiom() && Self::continues_idiom(self.peek_kind()) {
|
||||
if start.can_start_idiom() && self.peek_continues_idiom() {
|
||||
let start = match start {
|
||||
Value::Table(Table(x)) => vec![Part::Field(Ident(x))],
|
||||
Value::Idiom(Idiom(x)) => x,
|
||||
|
@ -501,7 +516,7 @@ impl Parser<'_> {
|
|||
/// # Parser state
|
||||
/// Expects to just have eaten a direction (e.g. <-, <->, or ->) and be at the field like part
|
||||
/// of the graph
|
||||
pub async fn parse_graph(&mut self, ctx: &mut Stk, dir: Dir) -> ParseResult<Graph> {
|
||||
pub(super) async fn parse_graph(&mut self, ctx: &mut Stk, dir: Dir) -> ParseResult<Graph> {
|
||||
let token = self.peek();
|
||||
match token.kind {
|
||||
t!("?") => {
|
||||
|
@ -519,7 +534,7 @@ impl Parser<'_> {
|
|||
self.pop_peek();
|
||||
Tables::default()
|
||||
}
|
||||
x if Self::tokenkind_can_start_ident(x) => {
|
||||
x if Self::kind_is_identifier(x) => {
|
||||
// The following function should always succeed here,
|
||||
// returning an error here would be a bug, so unwrap.
|
||||
let table = self.next_token_value().unwrap();
|
||||
|
@ -550,7 +565,7 @@ impl Parser<'_> {
|
|||
..Default::default()
|
||||
})
|
||||
}
|
||||
x if Self::tokenkind_can_start_ident(x) => {
|
||||
x if Self::kind_is_identifier(x) => {
|
||||
// The following function should always succeed here,
|
||||
// returning an error here would be a bug, so unwrap.
|
||||
let table = self.next_token_value().unwrap();
|
||||
|
@ -568,7 +583,7 @@ impl Parser<'_> {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::sql::{Expression, Id, Number, Object, Param, Strand, Thing};
|
||||
use crate::sql::{Expression, Id, Number, Object, Operator, Param, Strand, Thing};
|
||||
use crate::syn::Parse;
|
||||
|
||||
use super::*;
|
||||
|
@ -710,7 +725,11 @@ mod tests {
|
|||
Value::from(Idiom(vec![
|
||||
Part::from("test"),
|
||||
Part::from("temp"),
|
||||
Part::Where(Value::from(Expression::parse("test = true"))),
|
||||
Part::Where(Value::Expression(Box::new(Expression::Binary {
|
||||
l: Value::Idiom(Idiom(vec![Part::Field(Ident("test".to_string()))])),
|
||||
o: Operator::Equal,
|
||||
r: Value::Bool(true)
|
||||
}))),
|
||||
Part::from("text")
|
||||
]))
|
||||
);
|
||||
|
@ -726,7 +745,11 @@ mod tests {
|
|||
Value::from(Idiom(vec![
|
||||
Part::from("test"),
|
||||
Part::from("temp"),
|
||||
Part::Where(Value::from(Expression::parse("test = true"))),
|
||||
Part::Where(Value::Expression(Box::new(Expression::Binary {
|
||||
l: Value::Idiom(Idiom(vec![Part::Field(Ident("test".to_string()))])),
|
||||
o: Operator::Equal,
|
||||
r: Value::Bool(true)
|
||||
}))),
|
||||
Part::from("text")
|
||||
]))
|
||||
);
|
||||
|
@ -868,7 +891,11 @@ mod tests {
|
|||
out,
|
||||
Value::from(Idiom(vec![
|
||||
Part::Start(Value::from(Object::default())),
|
||||
Part::Where(Value::from(Expression::parse("test = true")))
|
||||
Part::Where(Value::Expression(Box::new(Expression::Binary {
|
||||
l: Value::Idiom(Idiom(vec![Part::Field(Ident("test".to_string()))])),
|
||||
o: Operator::Equal,
|
||||
r: Value::Bool(true)
|
||||
}))),
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
@ -882,7 +909,11 @@ mod tests {
|
|||
out,
|
||||
Value::from(Idiom(vec![
|
||||
Part::Start(Value::from(Object::default())),
|
||||
Part::Where(Value::from(Expression::parse("test = true")))
|
||||
Part::Where(Value::Expression(Box::new(Expression::Binary {
|
||||
l: Value::Idiom(Idiom(vec![Part::Field(Ident("test".to_string()))])),
|
||||
o: Operator::Equal,
|
||||
r: Value::Bool(true)
|
||||
}))),
|
||||
]))
|
||||
);
|
||||
}
|
||||
|
|
|
@ -3,14 +3,15 @@ use std::collections::BTreeMap;
|
|||
use reblessive::Stk;
|
||||
|
||||
use crate::{
|
||||
sql::{Array, Ident, Object, Strand, Value},
|
||||
sql::{Array, Duration, Ident, Object, Strand, Value},
|
||||
syn::{
|
||||
parser::mac::expected,
|
||||
token::{t, QouteKind, Span, TokenKind},
|
||||
lexer::compound::{self, Numeric},
|
||||
parser::mac::{expected, pop_glued},
|
||||
token::{t, Glued, Span, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
use super::{mac::unexpected, ParseResult, Parser};
|
||||
use super::{ParseResult, Parser};
|
||||
|
||||
impl Parser<'_> {
|
||||
pub async fn parse_json(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
|
@ -36,7 +37,7 @@ impl Parser<'_> {
|
|||
self.pop_peek();
|
||||
self.parse_json_array(ctx, token.span).await.map(Value::Array)
|
||||
}
|
||||
TokenKind::Qoute(QouteKind::Plain | QouteKind::PlainDouble) => {
|
||||
t!("\"") | t!("'") => {
|
||||
let strand: Strand = self.next_token_value()?;
|
||||
if self.legacy_strands {
|
||||
if let Some(x) = self.reparse_legacy_strand(ctx, &strand.0).await {
|
||||
|
@ -45,14 +46,22 @@ impl Parser<'_> {
|
|||
}
|
||||
Ok(Value::Strand(strand))
|
||||
}
|
||||
TokenKind::Digits | TokenKind::Number(_) => {
|
||||
let peek = self.glue()?;
|
||||
match peek.kind {
|
||||
TokenKind::Duration => Ok(Value::Duration(self.next_token_value()?)),
|
||||
TokenKind::Number(_) => Ok(Value::Number(self.next_token_value()?)),
|
||||
_ => unexpected!(self, peek, "a number"),
|
||||
t!("-") | t!("+") | TokenKind::Digits => {
|
||||
self.pop_peek();
|
||||
let compound = self.lexer.lex_compound(token, compound::numeric)?;
|
||||
match compound.value {
|
||||
Numeric::Duration(x) => Ok(Value::Duration(Duration(x))),
|
||||
Numeric::Number(x) => Ok(Value::Number(x)),
|
||||
}
|
||||
}
|
||||
TokenKind::Glued(Glued::Strand) => {
|
||||
let glued = pop_glued!(self, Strand);
|
||||
Ok(Value::Strand(glued))
|
||||
}
|
||||
TokenKind::Glued(Glued::Duration) => {
|
||||
let glued = pop_glued!(self, Duration);
|
||||
Ok(Value::Duration(glued))
|
||||
}
|
||||
_ => {
|
||||
let ident = self.next_token_value::<Ident>()?.0;
|
||||
self.parse_thing_from_ident(ctx, ident).await.map(Value::Thing)
|
||||
|
|
|
@ -3,10 +3,11 @@ use std::collections::BTreeMap;
|
|||
use reblessive::Stk;
|
||||
|
||||
use crate::{
|
||||
sql::{kind::Literal, Kind, Strand},
|
||||
sql::{kind::Literal, Duration, Kind, Strand},
|
||||
syn::{
|
||||
lexer::compound,
|
||||
parser::mac::expected,
|
||||
token::{t, Keyword, Span, TokenKind},
|
||||
token::{t, Glued, Keyword, Span, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -17,14 +18,14 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser State
|
||||
/// expects the first `<` to already be eaten
|
||||
pub async fn parse_kind(&mut self, ctx: &mut Stk, delim: Span) -> ParseResult<Kind> {
|
||||
pub(super) async fn parse_kind(&mut self, ctx: &mut Stk, delim: Span) -> ParseResult<Kind> {
|
||||
let kind = self.parse_inner_kind(ctx).await?;
|
||||
self.expect_closing_delimiter(t!(">"), delim)?;
|
||||
Ok(kind)
|
||||
}
|
||||
|
||||
/// Parse an inner kind, a kind without enclosing `<` `>`.
|
||||
pub async fn parse_inner_kind(&mut self, ctx: &mut Stk) -> ParseResult<Kind> {
|
||||
pub(super) async fn parse_inner_kind(&mut self, ctx: &mut Stk) -> ParseResult<Kind> {
|
||||
match self.parse_inner_single_kind(ctx).await? {
|
||||
Kind::Any => Ok(Kind::Any),
|
||||
Kind::Option(k) => Ok(Kind::Option(k)),
|
||||
|
@ -45,7 +46,7 @@ impl Parser<'_> {
|
|||
}
|
||||
|
||||
/// Parse a single inner kind, a kind without enclosing `<` `>`.
|
||||
pub async fn parse_inner_single_kind(&mut self, ctx: &mut Stk) -> ParseResult<Kind> {
|
||||
pub(super) async fn parse_inner_single_kind(&mut self, ctx: &mut Stk) -> ParseResult<Kind> {
|
||||
match self.peek_kind() {
|
||||
t!("ANY") => {
|
||||
self.pop_peek();
|
||||
|
@ -170,17 +171,22 @@ impl Parser<'_> {
|
|||
async fn parse_literal_kind(&mut self, ctx: &mut Stk) -> ParseResult<Literal> {
|
||||
let peek = self.peek();
|
||||
match peek.kind {
|
||||
t!("'") | t!("\"") | TokenKind::Strand => {
|
||||
t!("'") | t!("\"") | TokenKind::Glued(Glued::Strand) => {
|
||||
let s = self.next_token_value::<Strand>()?;
|
||||
Ok(Literal::String(s))
|
||||
}
|
||||
t!("+") | t!("-") | TokenKind::Number(_) | TokenKind::Digits | TokenKind::Duration => {
|
||||
let token = self.glue_numeric()?;
|
||||
match token.kind {
|
||||
TokenKind::Number(_) => self.next_token_value().map(Literal::Number),
|
||||
TokenKind::Duration => self.next_token_value().map(Literal::Duration),
|
||||
_ => unexpected!(self, token, "a value"),
|
||||
}
|
||||
t!("+") | t!("-") | TokenKind::Glued(Glued::Number) => {
|
||||
self.next_token_value().map(Literal::Number)
|
||||
}
|
||||
TokenKind::Glued(Glued::Duration) => self.next_token_value().map(Literal::Duration),
|
||||
TokenKind::Digits => {
|
||||
self.pop_peek();
|
||||
let compound = self.lexer.lex_compound(peek, compound::numeric)?;
|
||||
let v = match compound.value {
|
||||
compound::Numeric::Number(x) => Literal::Number(x),
|
||||
compound::Numeric::Duration(x) => Literal::Duration(Duration(x)),
|
||||
};
|
||||
Ok(v)
|
||||
}
|
||||
t!("{") => {
|
||||
self.pop_peek();
|
||||
|
@ -212,11 +218,9 @@ impl Parser<'_> {
|
|||
matches!(
|
||||
t,
|
||||
t!("'")
|
||||
| t!("\"") | TokenKind::Strand
|
||||
| t!("+") | t!("-")
|
||||
| TokenKind::Number(_)
|
||||
| t!("\"") | t!("+")
|
||||
| t!("-") | TokenKind::Glued(Glued::Duration | Glued::Strand | Glued::Number)
|
||||
| TokenKind::Digits
|
||||
| TokenKind::Duration
|
||||
| t!("{") | t!("[")
|
||||
)
|
||||
}
|
||||
|
|
|
@ -42,6 +42,25 @@ macro_rules! expected {
|
|||
}};
|
||||
}
|
||||
|
||||
/// Pops the last token, checks if it is the desired glue value and then returns the value.
|
||||
/// This will panic if the token was not correct or the value was already eat, both of which the
|
||||
/// parser should make sure to uphold.
|
||||
macro_rules! pop_glued {
|
||||
($parser:expr, $variant:ident) => {{
|
||||
let token = $parser.pop_peek();
|
||||
debug_assert!(matches!(
|
||||
token.kind,
|
||||
$crate::syn::token::TokenKind::Glued($crate::syn::token::Glued::$variant)
|
||||
));
|
||||
let $crate::syn::parser::GluedValue::$variant(x) =
|
||||
::std::mem::take(&mut $parser.glued_value)
|
||||
else {
|
||||
panic!("Glued value was already taken, while the glue token still in the token buffer.")
|
||||
};
|
||||
x
|
||||
}};
|
||||
}
|
||||
|
||||
/// A macro for indicating that the parser encountered an token which it didn't expect.
|
||||
macro_rules! expected_whitespace {
|
||||
($parser:expr, $($kind:tt)*) => {{
|
||||
|
@ -136,6 +155,7 @@ pub(crate) use enter_object_recursion;
|
|||
pub(crate) use enter_query_recursion;
|
||||
pub(crate) use expected;
|
||||
pub(crate) use expected_whitespace;
|
||||
pub(crate) use pop_glued;
|
||||
pub(crate) use unexpected;
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -35,28 +35,29 @@
|
|||
//! whitespace tokens which might have been skipped. Implementers must be carefull to not call a
|
||||
//! functions which requires whitespace tokens when they may already have been skipped.
|
||||
//!
|
||||
//! ## Token Gluing
|
||||
//! ## Compound tokens and token gluing.
|
||||
//!
|
||||
//! Tokens produces from the lexer are in some place more fine-grained then normal. Numbers,
|
||||
//! Identifiers and strand-like productions could be making up from multiple smaller tokens. A
|
||||
//! floating point number for example can be at most made up from a 3 digits token, a dot token,
|
||||
//! an exponent token and number suffix token and two `-` or `+` tokens. Whenever these tokens
|
||||
//! are required the parser calls a `glue_` method which will take the current peeked token and
|
||||
//! replace it with a more complex glued together token if possible.
|
||||
//! SurrealQL has a bunch of tokens which have complex rules for when they are allowed and the
|
||||
//! value they contain. Such tokens are named compound tokens, and examples include a javascript
|
||||
//! body, strand-like tokens, regex, numbers, etc.
|
||||
//!
|
||||
//! ## Use of reblessive
|
||||
//! These tokens need to be manually requested from the lexer with the [`Lexer::lex_compound`]
|
||||
//! function.
|
||||
//!
|
||||
//! This parser uses reblessive to be able to parse deep without overflowing the stack. This means
|
||||
//! that all functions which might recurse, i.e. in some paths can call themselves again, are async
|
||||
//! functions taking argument from reblessive to call recursive functions without using more stack
|
||||
//! with each depth.
|
||||
|
||||
//! This manually request of tokens leads to a problems when used in conjunction with peeking. Take
|
||||
//! for instance the production `{ "foo": "bar"}`. `"foo"` is a compound token so when intially
|
||||
//! encountered the lexer only returns a `"` token and then that token needs to be collected into a
|
||||
//! the full strand token. However the parser needs to figure out if we are parsing an object
|
||||
//! or a block so it needs to look past the compound token to see if the next token is `:`. This is
|
||||
//! where gluing comes in. Calling `Parser::glue` checks if the next token could start a compound
|
||||
//! token and combines them into a single token. This can only be done in places where we know if
|
||||
//! we encountered a leading token of a compound token it will result in the 'default' compound token.
|
||||
use self::token_buffer::TokenBuffer;
|
||||
use crate::{
|
||||
sql,
|
||||
sql::{self, Datetime, Duration, Strand, Uuid},
|
||||
syn::{
|
||||
error::{bail, SyntaxError},
|
||||
lexer::Lexer,
|
||||
lexer::{compound::NumberKind, Lexer},
|
||||
token::{t, Span, Token, TokenKind},
|
||||
},
|
||||
};
|
||||
|
@ -66,6 +67,7 @@ mod basic;
|
|||
mod builtin;
|
||||
mod expression;
|
||||
mod function;
|
||||
mod glue;
|
||||
mod idiom;
|
||||
mod json;
|
||||
mod kind;
|
||||
|
@ -77,9 +79,7 @@ mod thing;
|
|||
mod token;
|
||||
mod token_buffer;
|
||||
|
||||
pub(crate) use mac::{
|
||||
enter_object_recursion, enter_query_recursion, expected_whitespace, unexpected,
|
||||
};
|
||||
pub(crate) use mac::{enter_object_recursion, enter_query_recursion, unexpected};
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test;
|
||||
|
@ -102,12 +102,24 @@ pub enum PartialResult<T> {
|
|||
},
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub enum GluedValue {
|
||||
Duration(Duration),
|
||||
Datetime(Datetime),
|
||||
Uuid(Uuid),
|
||||
Number(NumberKind),
|
||||
Strand(Strand),
|
||||
#[default]
|
||||
None,
|
||||
}
|
||||
|
||||
/// The SurrealQL parser.
|
||||
pub struct Parser<'a> {
|
||||
lexer: Lexer<'a>,
|
||||
last_span: Span,
|
||||
token_buffer: TokenBuffer<4>,
|
||||
table_as_field: bool,
|
||||
glued_value: GluedValue,
|
||||
pub(crate) table_as_field: bool,
|
||||
legacy_strands: bool,
|
||||
flexible_record_id: bool,
|
||||
object_recursion: usize,
|
||||
|
@ -121,6 +133,7 @@ impl<'a> Parser<'a> {
|
|||
lexer: Lexer::new(source),
|
||||
last_span: Span::empty(),
|
||||
token_buffer: TokenBuffer::new(),
|
||||
glued_value: GluedValue::None,
|
||||
table_as_field: false,
|
||||
legacy_strands: false,
|
||||
flexible_record_id: true,
|
||||
|
@ -177,6 +190,7 @@ impl<'a> Parser<'a> {
|
|||
lexer: self.lexer.change_source(source),
|
||||
last_span: Span::empty(),
|
||||
token_buffer: TokenBuffer::new(),
|
||||
glued_value: GluedValue::None,
|
||||
legacy_strands: self.legacy_strands,
|
||||
flexible_record_id: self.flexible_record_id,
|
||||
table_as_field: false,
|
||||
|
@ -263,7 +277,7 @@ impl<'a> Parser<'a> {
|
|||
|
||||
/// Returns the next n'th token without consuming it.
|
||||
/// `peek_token_at(0)` is equivalent to `peek`.
|
||||
pub fn peek_token_at(&mut self, at: u8) -> Token {
|
||||
pub(crate) fn peek_token_at(&mut self, at: u8) -> Token {
|
||||
for _ in self.token_buffer.len()..=at {
|
||||
let r = loop {
|
||||
let r = self.lexer.next_token();
|
||||
|
@ -276,6 +290,10 @@ impl<'a> Parser<'a> {
|
|||
self.token_buffer.at(at).unwrap()
|
||||
}
|
||||
|
||||
pub fn peek1(&mut self) -> Token {
|
||||
self.peek_token_at(1)
|
||||
}
|
||||
|
||||
/// Returns the next n'th token without consuming it.
|
||||
/// `peek_token_at(0)` is equivalent to `peek`.
|
||||
pub fn peek_whitespace_token_at(&mut self, at: u8) -> Token {
|
||||
|
@ -286,6 +304,10 @@ impl<'a> Parser<'a> {
|
|||
self.token_buffer.at(at).unwrap()
|
||||
}
|
||||
|
||||
pub fn peek_whitespace1(&mut self) -> Token {
|
||||
self.peek_whitespace_token_at(1)
|
||||
}
|
||||
|
||||
/// Returns the span of the next token if it was already peeked, otherwise returns the token of
|
||||
/// the last consumed token.
|
||||
pub fn recent_span(&mut self) -> Span {
|
||||
|
@ -297,6 +319,10 @@ impl<'a> Parser<'a> {
|
|||
self.last_span
|
||||
}
|
||||
|
||||
pub fn assert_finished(&self) -> ParseResult<()> {
|
||||
self.lexer.assert_finished()
|
||||
}
|
||||
|
||||
/// Eat the next token if it is of the given kind.
|
||||
/// Returns whether a token was eaten.
|
||||
pub fn eat(&mut self, token: TokenKind) -> bool {
|
||||
|
@ -334,12 +360,15 @@ impl<'a> Parser<'a> {
|
|||
/// Checks if the next token is of the given kind. If it isn't it returns a UnclosedDelimiter
|
||||
/// error.
|
||||
fn expect_closing_delimiter(&mut self, kind: TokenKind, should_close: Span) -> ParseResult<()> {
|
||||
if !self.eat(kind) {
|
||||
bail!("Unexpected token, expected delimiter `{kind}`",
|
||||
let peek = self.peek();
|
||||
if peek.kind != kind {
|
||||
bail!("Unexpected token `{}` expected delimiter `{kind}`",
|
||||
peek.kind,
|
||||
@self.recent_span(),
|
||||
@should_close => "expected this delimiter to close"
|
||||
);
|
||||
}
|
||||
self.pop_peek();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -375,7 +404,7 @@ impl<'a> Parser<'a> {
|
|||
let res = ctx.run(|ctx| self.parse_stmt(ctx)).await;
|
||||
let v = match res {
|
||||
Err(e) => {
|
||||
let peek = self.peek_whitespace_token_at(1);
|
||||
let peek = self.peek_whitespace1();
|
||||
if e.is_data_pending()
|
||||
|| matches!(peek.kind, TokenKind::Eof | TokenKind::WhiteSpace)
|
||||
{
|
||||
|
|
|
@ -7,7 +7,7 @@ use crate::{
|
|||
syn::{
|
||||
error::bail,
|
||||
parser::{enter_object_recursion, mac::expected, ParseResult, Parser},
|
||||
token::{t, Span, TokenKind},
|
||||
token::{t, Glued, Span, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -29,11 +29,8 @@ impl Parser<'_> {
|
|||
})
|
||||
}
|
||||
|
||||
// glue possible complex tokens.
|
||||
self.glue()?;
|
||||
|
||||
// Now check first if it can be an object.
|
||||
if self.peek_token_at(1).kind == t!(":") {
|
||||
if self.glue_and_peek1()?.kind == t!(":") {
|
||||
enter_object_recursion!(this = self => {
|
||||
return this.parse_object_or_geometry(ctx, start).await;
|
||||
})
|
||||
|
@ -51,7 +48,7 @@ impl Parser<'_> {
|
|||
) -> ParseResult<Value> {
|
||||
expected!(self, t!(":"));
|
||||
// for it to be geometry the next value must be a strand like.
|
||||
let (t!("\"") | t!("'")) = self.peek_kind() else {
|
||||
let (t!("\"") | t!("'") | TokenKind::Glued(Glued::Strand)) = self.peek_kind() else {
|
||||
return self
|
||||
.parse_object_from_key(ctx, key, BTreeMap::new(), start)
|
||||
.await
|
||||
|
@ -166,7 +163,7 @@ impl Parser<'_> {
|
|||
|
||||
expected!(self, t!(":"));
|
||||
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
|
||||
// check for an object end, if it doesn't end it is not a geometry.
|
||||
if !self.eat(t!(",")) {
|
||||
|
@ -243,7 +240,7 @@ impl Parser<'_> {
|
|||
|
||||
// found coordinates field, next must be a coordinates value but we don't know
|
||||
// which until we match type.
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
|
||||
if !self.eat(t!(",")) {
|
||||
// no comma object must end early.
|
||||
|
@ -353,7 +350,7 @@ impl Parser<'_> {
|
|||
// 'geometries' key can only happen in a GeometryCollection, so try to parse that.
|
||||
expected!(self, t!(":"));
|
||||
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
|
||||
// if the object ends here, it is not a geometry.
|
||||
if !self.eat(t!(",")) || self.peek_kind() == t!("}") {
|
||||
|
@ -485,7 +482,7 @@ impl Parser<'_> {
|
|||
.map(Value::Object);
|
||||
}
|
||||
expected!(self, t!(":"));
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
let comma = self.eat(t!(","));
|
||||
if !self.eat(t!("}")) {
|
||||
// the object didn't end, either an error or not a geometry.
|
||||
|
@ -524,7 +521,7 @@ impl Parser<'_> {
|
|||
mut map: BTreeMap<String, Value>,
|
||||
start: Span,
|
||||
) -> ParseResult<Object> {
|
||||
let v = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let v = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
map.insert(key, v);
|
||||
if !self.eat(t!(",")) {
|
||||
self.expect_closing_delimiter(t!("}"), start)?;
|
||||
|
@ -595,19 +592,15 @@ impl Parser<'_> {
|
|||
async fn parse_object_entry(&mut self, ctx: &mut Stk) -> ParseResult<(String, Value)> {
|
||||
let text = self.parse_object_key()?;
|
||||
expected!(self, t!(":"));
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
Ok((text, value))
|
||||
}
|
||||
|
||||
/// Parses the key of an object, i.e. `field` in the object `{ field: 1 }`.
|
||||
pub fn parse_object_key(&mut self) -> ParseResult<String> {
|
||||
let token = self.glue()?;
|
||||
pub(super) fn parse_object_key(&mut self) -> ParseResult<String> {
|
||||
let token = self.peek();
|
||||
match token.kind {
|
||||
TokenKind::Keyword(_)
|
||||
| TokenKind::Language(_)
|
||||
| TokenKind::Algorithm(_)
|
||||
| TokenKind::Distance(_)
|
||||
| TokenKind::VectorType(_) => {
|
||||
x if Self::kind_is_keyword_like(x) => {
|
||||
self.pop_peek();
|
||||
let str = self.lexer.reader.span(token.span);
|
||||
// Lexer should ensure that the token is valid utf-8
|
||||
|
@ -619,11 +612,11 @@ impl Parser<'_> {
|
|||
let str = self.lexer.string.take().unwrap();
|
||||
Ok(str)
|
||||
}
|
||||
t!("\"") | t!("'") | TokenKind::Strand => {
|
||||
t!("\"") | t!("'") | TokenKind::Glued(Glued::Strand) => {
|
||||
let str = self.next_token_value::<Strand>()?.0;
|
||||
Ok(str)
|
||||
}
|
||||
TokenKind::Digits | TokenKind::Number(_) => {
|
||||
TokenKind::Digits | TokenKind::Glued(Glued::Number) => {
|
||||
let number = self.next_token_value::<Number>()?.to_string();
|
||||
Ok(number)
|
||||
}
|
||||
|
|
|
@ -1,21 +1,20 @@
|
|||
use std::ops::Bound;
|
||||
|
||||
use geo::Point;
|
||||
use reblessive::Stk;
|
||||
|
||||
use super::{ParseResult, Parser};
|
||||
use super::{mac::pop_glued, ParseResult, Parser};
|
||||
use crate::{
|
||||
sql::{
|
||||
Array, Closure, Dir, Function, Geometry, Ident, Idiom, Kind, Mock, Number, Param, Part,
|
||||
Range, Script, Strand, Subquery, Table, Value,
|
||||
Array, Closure, Dir, Duration, Function, Geometry, Ident, Idiom, Kind, Mock, Number, Param,
|
||||
Part, Script, Strand, Subquery, Table, Value,
|
||||
},
|
||||
syn::{
|
||||
error::bail,
|
||||
lexer::compound,
|
||||
parser::{
|
||||
enter_object_recursion, enter_query_recursion,
|
||||
mac::{expected, expected_whitespace, unexpected},
|
||||
mac::{expected, unexpected},
|
||||
},
|
||||
token::{self, t, DurationSuffix, Span, TokenKind},
|
||||
token::{t, Glued, Span, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -23,32 +22,27 @@ impl Parser<'_> {
|
|||
/// Parse a what primary.
|
||||
///
|
||||
/// What's are values which are more restricted in what expressions they can contain.
|
||||
pub async fn parse_what_primary(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
let peek = self.peek();
|
||||
match peek.kind {
|
||||
t!("..") => Ok(self.try_parse_range(ctx, None).await?.unwrap()),
|
||||
pub(super) async fn parse_what_primary(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
let token = self.peek();
|
||||
match token.kind {
|
||||
t!("r\"") => {
|
||||
self.pop_peek();
|
||||
let value = Value::Thing(self.parse_record_string(ctx, true).await?);
|
||||
Ok(self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value))
|
||||
Ok(Value::Thing(self.parse_record_string(ctx, true).await?))
|
||||
}
|
||||
t!("r'") => {
|
||||
self.pop_peek();
|
||||
let value = Value::Thing(self.parse_record_string(ctx, false).await?);
|
||||
Ok(self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value))
|
||||
Ok(Value::Thing(self.parse_record_string(ctx, false).await?))
|
||||
}
|
||||
t!("d\"") | t!("d'") => {
|
||||
let value = Value::Datetime(self.next_token_value()?);
|
||||
Ok(self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value))
|
||||
t!("d\"") | t!("d'") | TokenKind::Glued(Glued::Datetime) => {
|
||||
Ok(Value::Datetime(self.next_token_value()?))
|
||||
}
|
||||
t!("u\"") | t!("u'") => {
|
||||
let value = Value::Uuid(self.next_token_value()?);
|
||||
Ok(self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value))
|
||||
t!("u\"") | t!("u'") | TokenKind::Glued(Glued::Uuid) => {
|
||||
Ok(Value::Uuid(self.next_token_value()?))
|
||||
}
|
||||
t!("$param") => {
|
||||
let value = Value::Param(self.next_token_value()?);
|
||||
let value = self.try_parse_inline(ctx, &value).await?.unwrap_or(value);
|
||||
Ok(self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value))
|
||||
Ok(value)
|
||||
}
|
||||
t!("FUNCTION") => {
|
||||
self.pop_peek();
|
||||
|
@ -74,7 +68,7 @@ impl Parser<'_> {
|
|||
expected!(self, t!(">"));
|
||||
let start = expected!(self, t!("{")).span;
|
||||
let block = self.parse_block(ctx, start).await?;
|
||||
Ok(Value::Future(Box::new(crate::sql::Future(block))))
|
||||
Ok(Value::Future(Box::new(super::sql::Future(block))))
|
||||
}
|
||||
t!("|") => {
|
||||
let start = self.pop_peek().span;
|
||||
|
@ -103,89 +97,25 @@ impl Parser<'_> {
|
|||
let value = self.parse_model(ctx).await.map(|x| Value::Model(Box::new(x)))?;
|
||||
Ok(self.try_parse_inline(ctx, &value).await?.unwrap_or(value))
|
||||
}
|
||||
x => {
|
||||
if !Self::tokenkind_can_start_ident(x) {
|
||||
unexpected!(self, peek, "a value")
|
||||
}
|
||||
|
||||
// Combine possible multiple tokens into a single one. before scanning past it.
|
||||
let span = self.glue()?.span;
|
||||
|
||||
let peek = self.peek_token_at(1);
|
||||
x if Self::kind_is_identifier(x) => {
|
||||
let peek = self.peek1();
|
||||
match peek.kind {
|
||||
t!("::") | t!("(") => {
|
||||
self.pop_peek();
|
||||
self.parse_builtin(ctx, span).await
|
||||
self.parse_builtin(ctx, token.span).await
|
||||
}
|
||||
t!(":") => {
|
||||
let str = self.next_token_value::<Ident>()?.0;
|
||||
self.parse_thing_or_range(ctx, str).await
|
||||
}
|
||||
x => {
|
||||
if x.has_data() {
|
||||
// Consume the first identifier to ensure streaming works correctly.
|
||||
self.pop_peek();
|
||||
// x had data and possibly overwrote the data from token, This is
|
||||
// always an invalid production so just return error.
|
||||
unexpected!(self, peek, "a value");
|
||||
} else {
|
||||
Ok(Value::Table(self.next_token_value()?))
|
||||
}
|
||||
}
|
||||
_ => Ok(Value::Table(self.next_token_value()?)),
|
||||
}
|
||||
}
|
||||
_ => unexpected!(self, token, "an expression"),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn try_parse_range(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
subject: Option<&Value>,
|
||||
) -> ParseResult<Option<Value>> {
|
||||
// The ">" can also mean a comparison.
|
||||
// If the token after is not "..", then return
|
||||
if self.peek_whitespace().kind == t!(">")
|
||||
&& self.peek_whitespace_token_at(1).kind != t!("..")
|
||||
{
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let beg = if let Some(subject) = subject {
|
||||
if self.eat_whitespace(t!(">")) {
|
||||
expected_whitespace!(self, t!(".."));
|
||||
Bound::Excluded(subject.to_owned())
|
||||
} else {
|
||||
if !self.eat_whitespace(t!("..")) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Bound::Included(subject.to_owned())
|
||||
}
|
||||
} else {
|
||||
if !self.eat_whitespace(t!("..")) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
Bound::Unbounded
|
||||
};
|
||||
|
||||
let end = if self.eat_whitespace(t!("=")) {
|
||||
let id = ctx.run(|ctx| self.parse_simple_value(ctx)).await?;
|
||||
Bound::Included(id)
|
||||
} else if Self::tokenkind_can_start_simple_value(self.peek_whitespace().kind) {
|
||||
let id = ctx.run(|ctx| self.parse_simple_value(ctx)).await?;
|
||||
Bound::Excluded(id)
|
||||
} else {
|
||||
Bound::Unbounded
|
||||
};
|
||||
|
||||
Ok(Some(Value::Range(Box::new(Range {
|
||||
beg,
|
||||
end,
|
||||
}))))
|
||||
}
|
||||
|
||||
pub async fn try_parse_inline(
|
||||
pub(super) async fn try_parse_inline(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
subject: &Value,
|
||||
|
@ -198,7 +128,7 @@ impl Parser<'_> {
|
|||
break;
|
||||
}
|
||||
|
||||
let arg = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let arg = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
args.push(arg);
|
||||
|
||||
if !self.eat(t!(",")) {
|
||||
|
@ -215,90 +145,104 @@ impl Parser<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn parse_number_like_prime(&mut self) -> ParseResult<Value> {
|
||||
let token = self.glue_numeric()?;
|
||||
pub(super) fn parse_number_like_prime(&mut self) -> ParseResult<Value> {
|
||||
let token = self.peek();
|
||||
match token.kind {
|
||||
TokenKind::Number(_) => self.next_token_value().map(Value::Number),
|
||||
TokenKind::Duration => self.next_token_value().map(Value::Duration),
|
||||
_ => unexpected!(self, token, "a value"),
|
||||
TokenKind::Glued(Glued::Duration) => {
|
||||
let duration = pop_glued!(self, Duration);
|
||||
Ok(Value::Duration(duration))
|
||||
}
|
||||
TokenKind::Glued(Glued::Number) => {
|
||||
let v = self.next_token_value()?;
|
||||
Ok(Value::Number(v))
|
||||
}
|
||||
_ => {
|
||||
self.pop_peek();
|
||||
let value = self.lexer.lex_compound(token, compound::numeric)?;
|
||||
let v = match value.value {
|
||||
compound::Numeric::Number(x) => Value::Number(x),
|
||||
compound::Numeric::Duration(x) => Value::Duration(Duration(x)),
|
||||
};
|
||||
Ok(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse an expressions
|
||||
pub async fn parse_idiom_expression(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
pub(super) async fn parse_idiom_expression(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
let token = self.peek();
|
||||
let value = match token.kind {
|
||||
t!("..") => self.try_parse_range(ctx, None).await?.unwrap(),
|
||||
t!("NONE") => {
|
||||
self.pop_peek();
|
||||
let value = Value::None;
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
Value::None
|
||||
}
|
||||
t!("NULL") => {
|
||||
self.pop_peek();
|
||||
let value = Value::Null;
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
Value::Null
|
||||
}
|
||||
t!("true") => {
|
||||
self.pop_peek();
|
||||
let value = Value::Bool(true);
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
Value::Bool(true)
|
||||
}
|
||||
t!("false") => {
|
||||
self.pop_peek();
|
||||
let value = Value::Bool(false);
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
Value::Bool(false)
|
||||
}
|
||||
t!("<") => {
|
||||
self.pop_peek();
|
||||
// Casting should already have been parsed.
|
||||
expected!(self, t!("FUTURE"));
|
||||
self.expect_closing_delimiter(t!(">"), token.span)?;
|
||||
let next = expected!(self, t!("{")).span;
|
||||
let block = self.parse_block(ctx, next).await?;
|
||||
Value::Future(Box::new(crate::sql::Future(block)))
|
||||
let peek = self.peek_whitespace();
|
||||
if peek.kind == t!("-") {
|
||||
self.pop_peek();
|
||||
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::In)).await?;
|
||||
Value::Idiom(Idiom(vec![Part::Graph(graph)]))
|
||||
} else if peek.kind == t!("->") {
|
||||
self.pop_peek();
|
||||
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::Both)).await?;
|
||||
Value::Idiom(Idiom(vec![Part::Graph(graph)]))
|
||||
} else if self.eat(t!("FUTURE")) {
|
||||
// Casting should already have been parsed.
|
||||
self.expect_closing_delimiter(t!(">"), token.span)?;
|
||||
let next = expected!(self, t!("{")).span;
|
||||
let block = self.parse_block(ctx, next).await?;
|
||||
Value::Future(Box::new(super::sql::Future(block)))
|
||||
} else {
|
||||
unexpected!(self, token, "expected either a `<-` or a future")
|
||||
}
|
||||
}
|
||||
t!("r\"") => {
|
||||
self.pop_peek();
|
||||
let value = Value::Thing(self.parse_record_string(ctx, true).await?);
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
Value::Thing(self.parse_record_string(ctx, true).await?)
|
||||
}
|
||||
t!("r'") => {
|
||||
self.pop_peek();
|
||||
let value = Value::Thing(self.parse_record_string(ctx, false).await?);
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
Value::Thing(self.parse_record_string(ctx, false).await?)
|
||||
}
|
||||
t!("d\"") | t!("d'") => {
|
||||
let value = Value::Datetime(self.next_token_value()?);
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
t!("d\"") | t!("d'") | TokenKind::Glued(Glued::Datetime) => {
|
||||
Value::Datetime(self.next_token_value()?)
|
||||
}
|
||||
t!("u\"") | t!("u'") => {
|
||||
let value = Value::Uuid(self.next_token_value()?);
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
t!("u\"") | t!("u'") | TokenKind::Glued(Glued::Uuid) => {
|
||||
Value::Uuid(self.next_token_value()?)
|
||||
}
|
||||
t!("'") | t!("\"") | TokenKind::Strand => {
|
||||
t!("'") | t!("\"") | TokenKind::Glued(Glued::Strand) => {
|
||||
let s = self.next_token_value::<Strand>()?;
|
||||
if self.legacy_strands {
|
||||
if let Some(x) = self.reparse_legacy_strand(ctx, &s.0).await {
|
||||
return Ok(x);
|
||||
}
|
||||
}
|
||||
let value = Value::Strand(s);
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
}
|
||||
t!("+") | t!("-") | TokenKind::Number(_) | TokenKind::Digits | TokenKind::Duration => {
|
||||
let value = self.parse_number_like_prime()?;
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
Value::Strand(s)
|
||||
}
|
||||
t!("+")
|
||||
| t!("-")
|
||||
| TokenKind::Digits
|
||||
| TokenKind::Glued(Glued::Number | Glued::Duration) => self.parse_number_like_prime()?,
|
||||
TokenKind::NaN => {
|
||||
self.pop_peek();
|
||||
let value = Value::Number(Number::Float(f64::NAN));
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
Value::Number(Number::Float(f64::NAN))
|
||||
}
|
||||
t!("$param") => {
|
||||
let value = Value::Param(self.next_token_value()?);
|
||||
let value = self.try_parse_inline(ctx, &value).await?.unwrap_or(value);
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
self.try_parse_inline(ctx, &value).await?.unwrap_or(value)
|
||||
}
|
||||
t!("FUNCTION") => {
|
||||
self.pop_peek();
|
||||
|
@ -310,26 +254,14 @@ impl Parser<'_> {
|
|||
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::Out)).await?;
|
||||
Value::Idiom(Idiom(vec![Part::Graph(graph)]))
|
||||
}
|
||||
t!("<->") => {
|
||||
self.pop_peek();
|
||||
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::Both)).await?;
|
||||
Value::Idiom(Idiom(vec![Part::Graph(graph)]))
|
||||
}
|
||||
t!("<-") => {
|
||||
self.pop_peek();
|
||||
let graph = ctx.run(|ctx| self.parse_graph(ctx, Dir::In)).await?;
|
||||
Value::Idiom(Idiom(vec![Part::Graph(graph)]))
|
||||
}
|
||||
t!("[") => {
|
||||
self.pop_peek();
|
||||
let value = self.parse_array(ctx, token.span).await.map(Value::Array)?;
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
self.parse_array(ctx, token.span).await.map(Value::Array)?
|
||||
}
|
||||
t!("{") => {
|
||||
self.pop_peek();
|
||||
let value = self.parse_object_like(ctx, token.span).await?;
|
||||
let value = self.try_parse_inline(ctx, &value).await?.unwrap_or(value);
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
self.try_parse_inline(ctx, &value).await?.unwrap_or(value)
|
||||
}
|
||||
t!("|") => {
|
||||
self.pop_peek();
|
||||
|
@ -349,8 +281,7 @@ impl Parser<'_> {
|
|||
t!("(") => {
|
||||
self.pop_peek();
|
||||
let value = self.parse_inner_subquery_or_coordinate(ctx, token.span).await?;
|
||||
let value = self.try_parse_inline(ctx, &value).await?.unwrap_or(value);
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
self.try_parse_inline(ctx, &value).await?.unwrap_or(value)
|
||||
}
|
||||
t!("/") => self.next_token_value().map(Value::Regex)?,
|
||||
t!("RETURN")
|
||||
|
@ -373,10 +304,8 @@ impl Parser<'_> {
|
|||
self.pop_peek();
|
||||
self.parse_model(ctx).await.map(|x| Value::Model(Box::new(x)))?
|
||||
}
|
||||
_ => {
|
||||
self.glue()?;
|
||||
|
||||
let peek = self.peek_token_at(1);
|
||||
x if Self::kind_is_identifier(x) => {
|
||||
let peek = self.peek1();
|
||||
match peek.kind {
|
||||
t!("::") | t!("(") => {
|
||||
self.pop_peek();
|
||||
|
@ -386,12 +315,8 @@ impl Parser<'_> {
|
|||
let str = self.next_token_value::<Ident>()?.0;
|
||||
self.parse_thing_or_range(ctx, str).await?
|
||||
}
|
||||
x => {
|
||||
if x.has_data() {
|
||||
// Pop the first identifier token so that streaming works correctly.
|
||||
self.pop_peek();
|
||||
unexpected!(self, peek, "a value");
|
||||
} else if self.table_as_field {
|
||||
_ => {
|
||||
if self.table_as_field {
|
||||
Value::Idiom(Idiom(vec![Part::Field(self.next_token_value()?)]))
|
||||
} else {
|
||||
Value::Table(self.next_token_value()?)
|
||||
|
@ -399,10 +324,13 @@ impl Parser<'_> {
|
|||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
unexpected!(self, token, "an expression")
|
||||
}
|
||||
};
|
||||
|
||||
// Parse the rest of the idiom if it is being continued.
|
||||
if Self::continues_idiom(self.peek_kind()) {
|
||||
if self.peek_continues_idiom() {
|
||||
let value = match value {
|
||||
Value::Idiom(Idiom(x)) => self.parse_remaining_value_idiom(ctx, x).await,
|
||||
Value::Table(Table(x)) => {
|
||||
|
@ -420,7 +348,7 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser state
|
||||
/// Expects the starting `[` to already be eaten and its span passed as an argument.
|
||||
pub async fn parse_array(&mut self, ctx: &mut Stk, start: Span) -> ParseResult<Array> {
|
||||
pub(crate) async fn parse_array(&mut self, ctx: &mut Stk, start: Span) -> ParseResult<Array> {
|
||||
let mut values = Vec::new();
|
||||
enter_object_recursion!(this = self => {
|
||||
loop {
|
||||
|
@ -428,7 +356,7 @@ impl Parser<'_> {
|
|||
break;
|
||||
}
|
||||
|
||||
let value = ctx.run(|ctx| this.parse_value_field(ctx)).await?;
|
||||
let value = ctx.run(|ctx| this.parse_value_inherit(ctx)).await?;
|
||||
values.push(value);
|
||||
|
||||
if !this.eat(t!(",")) {
|
||||
|
@ -445,7 +373,7 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser State
|
||||
/// Expects the starting `|` already be eaten and its span passed as an argument.
|
||||
pub fn parse_mock(&mut self, start: Span) -> ParseResult<Mock> {
|
||||
pub(super) fn parse_mock(&mut self, start: Span) -> ParseResult<Mock> {
|
||||
let name = self.next_token_value::<Ident>()?.0;
|
||||
expected!(self, t!(":"));
|
||||
let from = self.next_token_value()?;
|
||||
|
@ -458,7 +386,7 @@ impl Parser<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
pub async fn parse_closure_or_mock(
|
||||
pub(super) async fn parse_closure_or_mock(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
start: Span,
|
||||
|
@ -469,7 +397,7 @@ impl Parser<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
pub async fn parse_closure(&mut self, ctx: &mut Stk, start: Span) -> ParseResult<Value> {
|
||||
pub(super) async fn parse_closure(&mut self, ctx: &mut Stk, start: Span) -> ParseResult<Value> {
|
||||
let mut args = Vec::new();
|
||||
loop {
|
||||
if self.eat(t!("|")) {
|
||||
|
@ -499,7 +427,7 @@ impl Parser<'_> {
|
|||
self.parse_closure_after_args(ctx, args).await
|
||||
}
|
||||
|
||||
pub async fn parse_closure_after_args(
|
||||
pub(super) async fn parse_closure_after_args(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
args: Vec<(Ident, Kind)>,
|
||||
|
@ -510,7 +438,7 @@ impl Parser<'_> {
|
|||
let body = Value::Block(Box::new(ctx.run(|ctx| self.parse_block(ctx, start)).await?));
|
||||
(returns, body)
|
||||
} else {
|
||||
let body = ctx.run(|ctx| self.parse_value(ctx)).await?;
|
||||
let body = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
(None, body)
|
||||
};
|
||||
|
||||
|
@ -539,7 +467,7 @@ impl Parser<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
pub async fn parse_inner_subquery_or_coordinate(
|
||||
pub(super) async fn parse_inner_subquery_or_coordinate(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
start: Span,
|
||||
|
@ -606,11 +534,9 @@ impl Parser<'_> {
|
|||
let stmt = self.parse_rebuild_stmt()?;
|
||||
Subquery::Rebuild(stmt)
|
||||
}
|
||||
TokenKind::Digits | TokenKind::Number(_) | t!("+") | t!("-") => {
|
||||
let number_token = self.glue()?;
|
||||
if matches!(self.peek_kind(), TokenKind::Number(_))
|
||||
&& self.peek_token_at(1).kind == t!(",")
|
||||
{
|
||||
TokenKind::Digits | TokenKind::Glued(Glued::Number) | t!("+") | t!("-") => {
|
||||
if self.glue_and_peek1()?.kind == t!(",") {
|
||||
let number_span = self.peek().span;
|
||||
let number = self.next_token_value::<Number>()?;
|
||||
// eat ','
|
||||
self.next();
|
||||
|
@ -618,8 +544,8 @@ impl Parser<'_> {
|
|||
if matches!(number, Number::Decimal(_))
|
||||
|| matches!(number, Number::Float(x) if x.is_nan())
|
||||
{
|
||||
bail!("Unexpected token `dec` expecte a non-decimal, non-number",
|
||||
@number_token.span => "Coordinate numbers can't be NaN or a decimal");
|
||||
bail!("Unexpected token, expected a non-decimal, non-NaN, number",
|
||||
@number_span => "Coordinate numbers can't be NaN or a decimal");
|
||||
}
|
||||
|
||||
let x = number.as_float();
|
||||
|
@ -627,12 +553,12 @@ impl Parser<'_> {
|
|||
self.expect_closing_delimiter(t!(")"), start)?;
|
||||
return Ok(Value::Geometry(Geometry::Point(Point::from((x, y)))));
|
||||
} else {
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
Subquery::Value(value)
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
Subquery::Value(value)
|
||||
}
|
||||
};
|
||||
|
@ -650,7 +576,7 @@ impl Parser<'_> {
|
|||
Ok(Value::Subquery(Box::new(res)))
|
||||
}
|
||||
|
||||
pub async fn parse_inner_subquery(
|
||||
pub(super) async fn parse_inner_subquery(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
start: Option<Span>,
|
||||
|
@ -718,7 +644,7 @@ impl Parser<'_> {
|
|||
Subquery::Rebuild(stmt)
|
||||
}
|
||||
_ => {
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
Subquery::Value(value)
|
||||
}
|
||||
};
|
||||
|
@ -762,7 +688,11 @@ impl Parser<'_> {
|
|||
|
||||
/// Parses a strand with legacy rules, parsing to a record id, datetime or uuid if the string
|
||||
/// matches.
|
||||
pub async fn reparse_legacy_strand(&mut self, ctx: &mut Stk, text: &str) -> Option<Value> {
|
||||
pub(super) async fn reparse_legacy_strand(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
text: &str,
|
||||
) -> Option<Value> {
|
||||
if let Ok(x) = Parser::new(text.as_bytes()).parse_thing(ctx).await {
|
||||
return Some(Value::Thing(x));
|
||||
}
|
||||
|
@ -783,7 +713,7 @@ impl Parser<'_> {
|
|||
break;
|
||||
}
|
||||
|
||||
let arg = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let arg = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
args.push(arg);
|
||||
|
||||
if !self.eat(t!(",")) {
|
||||
|
@ -792,146 +722,13 @@ impl Parser<'_> {
|
|||
}
|
||||
}
|
||||
let token = expected!(self, t!("{"));
|
||||
let mut span = self.lexer.lex_compound::<token::JavaScript>(token)?.span;
|
||||
let mut span = self.lexer.lex_compound(token, compound::javascript)?.span;
|
||||
// remove the starting `{` and ending `}`.
|
||||
span.offset += 1;
|
||||
span.len -= 2;
|
||||
let body = self.lexer.span_str(span);
|
||||
Ok(Function::Script(Script(body.to_string()), args))
|
||||
}
|
||||
|
||||
/// Parse a simple singular value
|
||||
pub async fn parse_simple_value(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
let token = self.peek();
|
||||
let value = match token.kind {
|
||||
t!("NONE") => {
|
||||
self.pop_peek();
|
||||
let value = Value::None;
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
}
|
||||
t!("NULL") => {
|
||||
self.pop_peek();
|
||||
let value = Value::Null;
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
}
|
||||
t!("true") => {
|
||||
self.pop_peek();
|
||||
let value = Value::Bool(true);
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
}
|
||||
t!("false") => {
|
||||
self.pop_peek();
|
||||
let value = Value::Bool(false);
|
||||
self.try_parse_range(ctx, Some(&value)).await?.unwrap_or(value)
|
||||
}
|
||||
t!("r\"") => {
|
||||
self.pop_peek();
|
||||
let thing = self.parse_record_string(ctx, true).await?;
|
||||
Value::Thing(thing)
|
||||
}
|
||||
t!("r'") => {
|
||||
self.pop_peek();
|
||||
let thing = self.parse_record_string(ctx, false).await?;
|
||||
Value::Thing(thing)
|
||||
}
|
||||
t!("d\"") | t!("d'") => {
|
||||
let datetime = self.next_token_value()?;
|
||||
Value::Datetime(datetime)
|
||||
}
|
||||
t!("u\"") | t!("u'") => {
|
||||
let uuid = self.next_token_value()?;
|
||||
Value::Uuid(uuid)
|
||||
}
|
||||
t!("'") | t!("\"") | TokenKind::Strand => {
|
||||
let s = self.next_token_value::<Strand>()?;
|
||||
if self.legacy_strands {
|
||||
if let Some(x) = self.reparse_legacy_strand(ctx, &s.0).await {
|
||||
return Ok(x);
|
||||
}
|
||||
}
|
||||
Value::Strand(s)
|
||||
}
|
||||
t!("+") | t!("-") | TokenKind::Number(_) | TokenKind::Digits | TokenKind::Duration => {
|
||||
self.parse_number_like_prime()?
|
||||
}
|
||||
TokenKind::NaN => {
|
||||
self.pop_peek();
|
||||
Value::Number(Number::Float(f64::NAN))
|
||||
}
|
||||
t!("$param") => {
|
||||
let value = Value::Param(self.next_token_value()?);
|
||||
self.try_parse_inline(ctx, &value).await?.unwrap_or(value)
|
||||
}
|
||||
t!("[") => {
|
||||
self.pop_peek();
|
||||
self.parse_array(ctx, token.span).await.map(Value::Array)?
|
||||
}
|
||||
t!("{") => {
|
||||
self.pop_peek();
|
||||
let value = self.parse_object_like(ctx, token.span).await?;
|
||||
self.try_parse_inline(ctx, &value).await?.unwrap_or(value)
|
||||
}
|
||||
t!("(") => {
|
||||
self.pop_peek();
|
||||
let value = self.parse_inner_subquery_or_coordinate(ctx, token.span).await?;
|
||||
self.try_parse_inline(ctx, &value).await?.unwrap_or(value)
|
||||
}
|
||||
_ => {
|
||||
self.glue()?;
|
||||
let peek = self.peek_token_at(1);
|
||||
if peek.kind.has_data() {
|
||||
unexpected!(self, peek, "a value");
|
||||
} else if self.table_as_field {
|
||||
Value::Idiom(Idiom(vec![Part::Field(self.next_token_value()?)]))
|
||||
} else {
|
||||
Value::Table(self.next_token_value()?)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
|
||||
pub fn tokenkind_can_start_simple_value(t: TokenKind) -> bool {
|
||||
matches!(
|
||||
t,
|
||||
t!("NONE")
|
||||
| t!("NULL") | t!("true")
|
||||
| t!("false")
|
||||
| t!("r\"") | t!("r'")
|
||||
| t!("d\"") | t!("d'")
|
||||
| t!("u\"") | t!("u'")
|
||||
| t!("\"") | t!("'")
|
||||
| t!("+") | t!("-")
|
||||
| TokenKind::Number(_)
|
||||
| TokenKind::Digits
|
||||
| TokenKind::Duration
|
||||
| TokenKind::NaN
|
||||
| t!("$param")
|
||||
| t!("[") | t!("{")
|
||||
| t!("(") | TokenKind::Keyword(_)
|
||||
| TokenKind::Language(_)
|
||||
| TokenKind::Algorithm(_)
|
||||
| TokenKind::Distance(_)
|
||||
| TokenKind::VectorType(_)
|
||||
| TokenKind::Identifier
|
||||
| TokenKind::Exponent
|
||||
| TokenKind::DatetimeChars(_)
|
||||
| TokenKind::NumberSuffix(_)
|
||||
| TokenKind::DurationSuffix(
|
||||
// All except Micro unicode
|
||||
DurationSuffix::Nano
|
||||
| DurationSuffix::Micro
|
||||
| DurationSuffix::Milli
|
||||
| DurationSuffix::Second
|
||||
| DurationSuffix::Minute
|
||||
| DurationSuffix::Hour
|
||||
| DurationSuffix::Day
|
||||
| DurationSuffix::Week
|
||||
| DurationSuffix::Year
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
@ -15,7 +15,7 @@ use crate::{
|
|||
};
|
||||
|
||||
impl Parser<'_> {
|
||||
pub async fn parse_alter_stmt(&mut self, ctx: &mut Stk) -> ParseResult<AlterStatement> {
|
||||
pub(crate) async fn parse_alter_stmt(&mut self, ctx: &mut Stk) -> ParseResult<AlterStatement> {
|
||||
let next = self.next();
|
||||
match next.kind {
|
||||
t!("TABLE") => self.parse_alter_table(ctx).await.map(AlterStatement::Table),
|
||||
|
@ -23,7 +23,10 @@ impl Parser<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
pub async fn parse_alter_table(&mut self, ctx: &mut Stk) -> ParseResult<AlterTableStatement> {
|
||||
pub(crate) async fn parse_alter_table(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
) -> ParseResult<AlterTableStatement> {
|
||||
let if_exists = if self.eat(t!("IF")) {
|
||||
expected!(self, t!("EXISTS"));
|
||||
true
|
||||
|
|
|
@ -9,7 +9,10 @@ use crate::{
|
|||
};
|
||||
|
||||
impl Parser<'_> {
|
||||
pub async fn parse_create_stmt(&mut self, ctx: &mut Stk) -> ParseResult<CreateStatement> {
|
||||
pub(crate) async fn parse_create_stmt(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
) -> ParseResult<CreateStatement> {
|
||||
let only = self.eat(t!("ONLY"));
|
||||
let what = Values(self.parse_what_list(ctx).await?);
|
||||
let data = self.try_parse_data(ctx).await?;
|
||||
|
|
|
@ -31,12 +31,13 @@ use crate::{
|
|||
};
|
||||
|
||||
impl Parser<'_> {
|
||||
pub async fn parse_define_stmt(&mut self, ctx: &mut Stk) -> ParseResult<DefineStatement> {
|
||||
pub(crate) async fn parse_define_stmt(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
) -> ParseResult<DefineStatement> {
|
||||
let next = self.next();
|
||||
match next.kind {
|
||||
t!("NAMESPACE") | t!("ns") => {
|
||||
self.parse_define_namespace().map(DefineStatement::Namespace)
|
||||
}
|
||||
t!("NAMESPACE") => self.parse_define_namespace().map(DefineStatement::Namespace),
|
||||
t!("DATABASE") => self.parse_define_database().map(DefineStatement::Database),
|
||||
t!("FUNCTION") => self.parse_define_function(ctx).await.map(DefineStatement::Function),
|
||||
t!("USER") => self.parse_define_user().map(DefineStatement::User),
|
||||
|
@ -59,7 +60,7 @@ impl Parser<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn parse_define_namespace(&mut self) -> ParseResult<DefineNamespaceStatement> {
|
||||
pub(crate) fn parse_define_namespace(&mut self) -> ParseResult<DefineNamespaceStatement> {
|
||||
let (if_not_exists, overwrite) = if self.eat(t!("IF")) {
|
||||
expected!(self, t!("NOT"));
|
||||
expected!(self, t!("EXISTS"));
|
||||
|
@ -330,12 +331,12 @@ impl Parser<'_> {
|
|||
t!("SIGNUP") => {
|
||||
self.pop_peek();
|
||||
ac.signup =
|
||||
Some(stk.run(|stk| self.parse_value(stk)).await?);
|
||||
Some(stk.run(|stk| self.parse_value_table(stk)).await?);
|
||||
}
|
||||
t!("SIGNIN") => {
|
||||
self.pop_peek();
|
||||
ac.signin =
|
||||
Some(stk.run(|stk| self.parse_value(stk)).await?);
|
||||
Some(stk.run(|stk| self.parse_value_table(stk)).await?);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
|
@ -370,7 +371,7 @@ impl Parser<'_> {
|
|||
}
|
||||
t!("AUTHENTICATE") => {
|
||||
self.pop_peek();
|
||||
res.authenticate = Some(stk.run(|stk| self.parse_value(stk)).await?);
|
||||
res.authenticate = Some(stk.run(|stk| self.parse_value_table(stk)).await?);
|
||||
}
|
||||
t!("DURATION") => {
|
||||
self.pop_peek();
|
||||
|
@ -576,11 +577,11 @@ impl Parser<'_> {
|
|||
}
|
||||
t!("SIGNUP") => {
|
||||
self.pop_peek();
|
||||
ac.signup = Some(stk.run(|stk| self.parse_value(stk)).await?);
|
||||
ac.signup = Some(stk.run(|stk| self.parse_value_table(stk)).await?);
|
||||
}
|
||||
t!("SIGNIN") => {
|
||||
self.pop_peek();
|
||||
ac.signin = Some(stk.run(|stk| self.parse_value(stk)).await?);
|
||||
ac.signin = Some(stk.run(|stk| self.parse_value_table(stk)).await?);
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
|
@ -614,7 +615,7 @@ impl Parser<'_> {
|
|||
match self.peek_kind() {
|
||||
t!("VALUE") => {
|
||||
self.pop_peek();
|
||||
res.value = ctx.run(|ctx| self.parse_value(ctx)).await?;
|
||||
res.value = ctx.run(|ctx| self.parse_value_table(ctx)).await?;
|
||||
}
|
||||
t!("COMMENT") => {
|
||||
self.pop_peek();
|
||||
|
@ -753,13 +754,13 @@ impl Parser<'_> {
|
|||
match self.peek_kind() {
|
||||
t!("WHEN") => {
|
||||
self.pop_peek();
|
||||
res.when = ctx.run(|ctx| self.parse_value(ctx)).await?;
|
||||
res.when = ctx.run(|ctx| self.parse_value_table(ctx)).await?;
|
||||
}
|
||||
t!("THEN") => {
|
||||
self.pop_peek();
|
||||
res.then = Values(vec![ctx.run(|ctx| self.parse_value(ctx)).await?]);
|
||||
res.then = Values(vec![ctx.run(|ctx| self.parse_value_table(ctx)).await?]);
|
||||
while self.eat(t!(",")) {
|
||||
res.then.0.push(ctx.run(|ctx| self.parse_value(ctx)).await?)
|
||||
res.then.0.push(ctx.run(|ctx| self.parse_value_table(ctx)).await?)
|
||||
}
|
||||
}
|
||||
t!("COMMENT") => {
|
||||
|
@ -812,15 +813,15 @@ impl Parser<'_> {
|
|||
}
|
||||
t!("VALUE") => {
|
||||
self.pop_peek();
|
||||
res.value = Some(ctx.run(|ctx| self.parse_value(ctx)).await?);
|
||||
res.value = Some(ctx.run(|ctx| self.parse_value_field(ctx)).await?);
|
||||
}
|
||||
t!("ASSERT") => {
|
||||
self.pop_peek();
|
||||
res.assert = Some(ctx.run(|ctx| self.parse_value(ctx)).await?);
|
||||
res.assert = Some(ctx.run(|ctx| self.parse_value_field(ctx)).await?);
|
||||
}
|
||||
t!("DEFAULT") => {
|
||||
self.pop_peek();
|
||||
res.default = Some(ctx.run(|ctx| self.parse_value(ctx)).await?);
|
||||
res.default = Some(ctx.run(|ctx| self.parse_value_field(ctx)).await?);
|
||||
}
|
||||
t!("PERMISSIONS") => {
|
||||
self.pop_peek();
|
||||
|
|
|
@ -9,7 +9,10 @@ use crate::{
|
|||
};
|
||||
|
||||
impl Parser<'_> {
|
||||
pub async fn parse_delete_stmt(&mut self, ctx: &mut Stk) -> ParseResult<DeleteStatement> {
|
||||
pub(crate) async fn parse_delete_stmt(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
) -> ParseResult<DeleteStatement> {
|
||||
self.eat(t!("FROM"));
|
||||
let only = self.eat(t!("ONLY"));
|
||||
let what = Values(self.parse_what_list(ctx).await?);
|
||||
|
|
|
@ -12,8 +12,8 @@ use crate::{
|
|||
};
|
||||
|
||||
impl Parser<'_> {
|
||||
pub async fn parse_if_stmt(&mut self, ctx: &mut Stk) -> ParseResult<IfelseStatement> {
|
||||
let condition = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
pub(crate) async fn parse_if_stmt(&mut self, ctx: &mut Stk) -> ParseResult<IfelseStatement> {
|
||||
let condition = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
|
||||
let mut res = IfelseStatement {
|
||||
exprs: Vec::new(),
|
||||
|
@ -23,7 +23,7 @@ impl Parser<'_> {
|
|||
let next = self.next();
|
||||
match next.kind {
|
||||
t!("THEN") => {
|
||||
let body = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let body = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
self.eat(t!(";"));
|
||||
res.exprs.push((condition, body));
|
||||
self.parse_worded_tail(ctx, &mut res).await?;
|
||||
|
@ -50,13 +50,13 @@ impl Parser<'_> {
|
|||
t!("END") => return Ok(()),
|
||||
t!("ELSE") => {
|
||||
if self.eat(t!("IF")) {
|
||||
let condition = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let condition = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
expected!(self, t!("THEN"));
|
||||
let body = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let body = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
self.eat(t!(";"));
|
||||
res.exprs.push((condition, body));
|
||||
} else {
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let value = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
self.eat(t!(";"));
|
||||
expected!(self, t!("END"));
|
||||
res.close = Some(value);
|
||||
|
@ -78,7 +78,7 @@ impl Parser<'_> {
|
|||
t!("ELSE") => {
|
||||
self.pop_peek();
|
||||
if self.eat(t!("IF")) {
|
||||
let condition = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let condition = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
let span = expected!(self, t!("{")).span;
|
||||
let body = self.parse_block(ctx, span).await?;
|
||||
res.exprs.push((condition, body.into()));
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
use reblessive::Stk;
|
||||
|
||||
use crate::{
|
||||
sql::{statements::InsertStatement, Data, Value},
|
||||
sql::{statements::InsertStatement, Data, Idiom, Subquery, Value},
|
||||
syn::{
|
||||
error::bail,
|
||||
parser::{mac::expected, ParseResult, Parser},
|
||||
token::t,
|
||||
},
|
||||
|
@ -31,43 +32,7 @@ impl Parser<'_> {
|
|||
None
|
||||
};
|
||||
|
||||
let data = match self.peek_kind() {
|
||||
t!("(") => {
|
||||
let start = self.pop_peek().span;
|
||||
let fields = self.parse_idiom_list(ctx).await?;
|
||||
self.expect_closing_delimiter(t!(")"), start)?;
|
||||
expected!(self, t!("VALUES"));
|
||||
|
||||
let start = expected!(self, t!("(")).span;
|
||||
let mut values = vec![ctx.run(|ctx| self.parse_value(ctx)).await?];
|
||||
while self.eat(t!(",")) {
|
||||
values.push(ctx.run(|ctx| self.parse_value(ctx)).await?);
|
||||
}
|
||||
self.expect_closing_delimiter(t!(")"), start)?;
|
||||
|
||||
let mut values = vec![values];
|
||||
while self.eat(t!(",")) {
|
||||
let start = expected!(self, t!("(")).span;
|
||||
let mut inner_values = vec![ctx.run(|ctx| self.parse_value(ctx)).await?];
|
||||
while self.eat(t!(",")) {
|
||||
inner_values.push(ctx.run(|ctx| self.parse_value(ctx)).await?);
|
||||
}
|
||||
values.push(inner_values);
|
||||
self.expect_closing_delimiter(t!(")"), start)?;
|
||||
}
|
||||
|
||||
Data::ValuesExpression(
|
||||
values
|
||||
.into_iter()
|
||||
.map(|row| fields.iter().cloned().zip(row).collect())
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
_ => {
|
||||
let value = ctx.run(|ctx| self.parse_value(ctx)).await?;
|
||||
Data::SingleExpression(value)
|
||||
}
|
||||
};
|
||||
let data = self.parse_insert_values(ctx).await?;
|
||||
|
||||
let update = if self.eat(t!("ON")) {
|
||||
Some(self.parse_insert_update(ctx).await?)
|
||||
|
@ -91,19 +56,122 @@ impl Parser<'_> {
|
|||
})
|
||||
}
|
||||
|
||||
fn extract_idiom(subquery: Subquery) -> Option<Idiom> {
|
||||
let Subquery::Value(Value::Idiom(idiom)) = subquery else {
|
||||
return None;
|
||||
};
|
||||
|
||||
Some(idiom)
|
||||
}
|
||||
|
||||
async fn parse_insert_values(&mut self, ctx: &mut Stk) -> ParseResult<Data> {
|
||||
let token = self.peek();
|
||||
// not a `(` so it cant be `(a,b) VALUES (c,d)`
|
||||
if token.kind != t!("(") {
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
return Ok(Data::SingleExpression(value));
|
||||
}
|
||||
|
||||
// might still be a subquery `(select foo from ...`
|
||||
self.pop_peek();
|
||||
let before = self.peek().span;
|
||||
let backup = self.table_as_field;
|
||||
self.table_as_field = true;
|
||||
let subquery = self.parse_inner_subquery(ctx, None).await?;
|
||||
self.table_as_field = backup;
|
||||
let subquery_span = before.covers(self.last_span());
|
||||
|
||||
let mut idioms = Vec::new();
|
||||
let select_span = if !self.eat(t!(",")) {
|
||||
// not a comma so it might be a single (a) VALUES (b) or a subquery
|
||||
self.expect_closing_delimiter(t!(")"), token.span)?;
|
||||
let select_span = token.span.covers(self.last_span());
|
||||
|
||||
if !self.eat(t!("VALUES")) {
|
||||
// found a subquery
|
||||
return Ok(Data::SingleExpression(Value::Subquery(Box::new(subquery))));
|
||||
}
|
||||
|
||||
// found an values expression, so subquery must be an idiom
|
||||
let Some(idiom) = Self::extract_idiom(subquery) else {
|
||||
bail!("Invalid value, expected an idiom in INSERT VALUES statement.",
|
||||
@subquery_span => "Here only idioms are allowed")
|
||||
};
|
||||
|
||||
idioms.push(idiom);
|
||||
select_span
|
||||
} else {
|
||||
// found an values expression, so subquery must be an idiom
|
||||
let Some(idiom) = Self::extract_idiom(subquery) else {
|
||||
bail!("Invalid value, expected an idiom in INSERT VALUES statement.",
|
||||
@subquery_span => "Here only idioms are allowed")
|
||||
};
|
||||
|
||||
idioms.push(idiom);
|
||||
|
||||
loop {
|
||||
idioms.push(self.parse_plain_idiom(ctx).await?);
|
||||
|
||||
if !self.eat(t!(",")) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
self.expect_closing_delimiter(t!(")"), token.span)?;
|
||||
|
||||
expected!(self, t!("VALUES"));
|
||||
|
||||
token.span.covers(self.last_span())
|
||||
};
|
||||
|
||||
let mut insertions = Vec::new();
|
||||
loop {
|
||||
let mut values = Vec::new();
|
||||
let start = expected!(self, t!("(")).span;
|
||||
loop {
|
||||
values.push(self.parse_value_table(ctx).await?);
|
||||
|
||||
if !self.eat(t!(",")) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
self.expect_closing_delimiter(t!(")"), start)?;
|
||||
let span = start.covers(self.last_span());
|
||||
|
||||
if values.len() != idioms.len() {
|
||||
bail!("Invalid numbers of values to insert, found {} value(s) but selector requires {} value(s).",
|
||||
values.len(), idioms.len(),
|
||||
@span,
|
||||
@select_span => "This selector has {} field(s)",idioms.len()
|
||||
);
|
||||
}
|
||||
|
||||
insertions.push(values);
|
||||
|
||||
if !self.eat(t!(",")) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Data::ValuesExpression(
|
||||
insertions.into_iter().map(|row| idioms.iter().cloned().zip(row).collect()).collect(),
|
||||
))
|
||||
}
|
||||
|
||||
async fn parse_insert_update(&mut self, ctx: &mut Stk) -> ParseResult<Data> {
|
||||
expected!(self, t!("DUPLICATE"));
|
||||
expected!(self, t!("KEY"));
|
||||
expected!(self, t!("UPDATE"));
|
||||
let l = self.parse_plain_idiom(ctx).await?;
|
||||
let o = self.parse_assigner()?;
|
||||
let r = ctx.run(|ctx| self.parse_value(ctx)).await?;
|
||||
let r = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let mut data = vec![(l, o, r)];
|
||||
|
||||
while self.eat(t!(",")) {
|
||||
let l = self.parse_plain_idiom(ctx).await?;
|
||||
let o = self.parse_assigner()?;
|
||||
let r = ctx.run(|ctx| self.parse_value(ctx)).await?;
|
||||
let r = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
data.push((l, o, r))
|
||||
}
|
||||
|
||||
|
|
|
@ -12,8 +12,9 @@ use crate::sql::statements::{
|
|||
KillStatement, LiveStatement, OptionStatement, SetStatement, ThrowStatement,
|
||||
};
|
||||
use crate::sql::{Fields, Ident, Param};
|
||||
use crate::syn::lexer::compound;
|
||||
use crate::syn::parser::enter_query_recursion;
|
||||
use crate::syn::token::{t, TokenKind};
|
||||
use crate::syn::token::{t, Glued, TokenKind};
|
||||
use crate::{
|
||||
sql::{
|
||||
statements::{
|
||||
|
@ -42,7 +43,7 @@ mod update;
|
|||
mod upsert;
|
||||
|
||||
impl Parser<'_> {
|
||||
pub async fn parse_stmt_list(&mut self, ctx: &mut Stk) -> ParseResult<Statements> {
|
||||
pub(super) async fn parse_stmt_list(&mut self, ctx: &mut Stk) -> ParseResult<Statements> {
|
||||
let mut res = Vec::new();
|
||||
loop {
|
||||
match self.peek_kind() {
|
||||
|
@ -61,7 +62,7 @@ impl Parser<'_> {
|
|||
}
|
||||
|
||||
let token = self.peek();
|
||||
if Self::token_kind_starts_statement(token.kind) {
|
||||
if Self::kind_starts_statement(token.kind) {
|
||||
// user likely forgot a semicolon.
|
||||
unexpected!(self,token,"the query to end", => "maybe forgot a semicolon after the previous statement?");
|
||||
}
|
||||
|
@ -74,38 +75,6 @@ impl Parser<'_> {
|
|||
Ok(Statements(res))
|
||||
}
|
||||
|
||||
fn token_kind_starts_statement(kind: TokenKind) -> bool {
|
||||
matches!(
|
||||
kind,
|
||||
t!("ACCESS")
|
||||
| t!("ALTER")
|
||||
| t!("ANALYZE")
|
||||
| t!("BEGIN")
|
||||
| t!("BREAK")
|
||||
| t!("CANCEL")
|
||||
| t!("COMMIT")
|
||||
| t!("CONTINUE")
|
||||
| t!("CREATE")
|
||||
| t!("DEFINE")
|
||||
| t!("DELETE")
|
||||
| t!("FOR") | t!("IF")
|
||||
| t!("INFO") | t!("INSERT")
|
||||
| t!("KILL") | t!("LIVE")
|
||||
| t!("OPTION")
|
||||
| t!("REBUILD")
|
||||
| t!("RETURN")
|
||||
| t!("RELATE")
|
||||
| t!("REMOVE")
|
||||
| t!("SELECT")
|
||||
| t!("LET") | t!("SHOW")
|
||||
| t!("SLEEP")
|
||||
| t!("THROW")
|
||||
| t!("UPDATE")
|
||||
| t!("UPSERT")
|
||||
| t!("USE")
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) async fn parse_stmt(&mut self, ctx: &mut Stk) -> ParseResult<Statement> {
|
||||
enter_query_recursion!(this = self => {
|
||||
this.parse_stmt_inner(ctx).await
|
||||
|
@ -245,7 +214,7 @@ impl Parser<'_> {
|
|||
}
|
||||
_ => {
|
||||
// TODO: Provide information about keywords.
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let value = ctx.run(|ctx| self.parse_value_table(ctx)).await?;
|
||||
Ok(Self::refine_stmt_value(value))
|
||||
}
|
||||
}
|
||||
|
@ -334,7 +303,7 @@ impl Parser<'_> {
|
|||
}
|
||||
_ => {
|
||||
// TODO: Provide information about keywords.
|
||||
let v = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let v = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
Ok(Self::refine_entry_value(v))
|
||||
}
|
||||
}
|
||||
|
@ -474,7 +443,7 @@ impl Parser<'_> {
|
|||
fn parse_use_stmt(&mut self) -> ParseResult<UseStatement> {
|
||||
let peek = self.peek();
|
||||
let (ns, db) = match peek.kind {
|
||||
t!("NAMESPACE") | t!("ns") => {
|
||||
t!("NAMESPACE") => {
|
||||
self.pop_peek();
|
||||
let ns = self.next_token_value::<Ident>()?.0;
|
||||
let db = self
|
||||
|
@ -502,10 +471,10 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser State
|
||||
/// Expects `FOR` to already be consumed.
|
||||
pub async fn parse_for_stmt(&mut self, stk: &mut Stk) -> ParseResult<ForeachStatement> {
|
||||
pub(super) async fn parse_for_stmt(&mut self, stk: &mut Stk) -> ParseResult<ForeachStatement> {
|
||||
let param = self.next_token_value()?;
|
||||
expected!(self, t!("IN"));
|
||||
let range = stk.run(|stk| self.parse_value(stk)).await?;
|
||||
let range = stk.run(|stk| self.parse_value_inherit(stk)).await?;
|
||||
|
||||
let span = expected!(self, t!("{")).span;
|
||||
let block = self.parse_block(stk, span).await?;
|
||||
|
@ -520,12 +489,12 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser State
|
||||
/// Expects `INFO` to already be consumed.
|
||||
pub(crate) fn parse_info_stmt(&mut self) -> ParseResult<InfoStatement> {
|
||||
pub(super) fn parse_info_stmt(&mut self) -> ParseResult<InfoStatement> {
|
||||
expected!(self, t!("FOR"));
|
||||
let next = self.next();
|
||||
let mut stmt = match next.kind {
|
||||
t!("ROOT") => InfoStatement::Root(false),
|
||||
t!("NAMESPACE") | t!("ns") => InfoStatement::Ns(false),
|
||||
t!("NAMESPACE") => InfoStatement::Ns(false),
|
||||
t!("DATABASE") => InfoStatement::Db(false, None),
|
||||
t!("TABLE") => {
|
||||
let ident = self.next_token_value()?;
|
||||
|
@ -561,10 +530,12 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser State
|
||||
/// Expects `KILL` to already be consumed.
|
||||
pub(crate) fn parse_kill_stmt(&mut self) -> ParseResult<KillStatement> {
|
||||
pub(super) fn parse_kill_stmt(&mut self) -> ParseResult<KillStatement> {
|
||||
let peek = self.peek();
|
||||
let id = match peek.kind {
|
||||
t!("u\"") | t!("u'") => self.next_token_value().map(Value::Uuid)?,
|
||||
t!("u\"") | t!("u'") | TokenKind::Glued(Glued::Uuid) => {
|
||||
self.next_token_value().map(Value::Uuid)?
|
||||
}
|
||||
t!("$param") => self.next_token_value().map(Value::Param)?,
|
||||
_ => unexpected!(self, peek, "a UUID or a parameter"),
|
||||
};
|
||||
|
@ -577,7 +548,7 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser State
|
||||
/// Expects `LIVE` to already be consumed.
|
||||
pub(crate) async fn parse_live_stmt(&mut self, stk: &mut Stk) -> ParseResult<LiveStatement> {
|
||||
pub(super) async fn parse_live_stmt(&mut self, stk: &mut Stk) -> ParseResult<LiveStatement> {
|
||||
expected!(self, t!("SELECT"));
|
||||
|
||||
let expr = match self.peek_kind() {
|
||||
|
@ -602,7 +573,7 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser State
|
||||
/// Expects `OPTION` to already be consumed.
|
||||
pub(crate) fn parse_option_stmt(&mut self) -> ParseResult<OptionStatement> {
|
||||
pub(super) fn parse_option_stmt(&mut self) -> ParseResult<OptionStatement> {
|
||||
let name = self.next_token_value()?;
|
||||
let what = if self.eat(t!("=")) {
|
||||
let next = self.next();
|
||||
|
@ -620,7 +591,7 @@ impl Parser<'_> {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn parse_rebuild_stmt(&mut self) -> ParseResult<RebuildStatement> {
|
||||
pub(super) fn parse_rebuild_stmt(&mut self) -> ParseResult<RebuildStatement> {
|
||||
let next = self.next();
|
||||
let res = match next.kind {
|
||||
t!("INDEX") => {
|
||||
|
@ -650,11 +621,11 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser State
|
||||
/// Expects `RETURN` to already be consumed.
|
||||
pub(crate) async fn parse_return_stmt(
|
||||
pub(super) async fn parse_return_stmt(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
) -> ParseResult<OutputStatement> {
|
||||
let what = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
let what = ctx.run(|ctx| self.parse_value_inherit(ctx)).await?;
|
||||
let fetch = self.try_parse_fetch(ctx).await?;
|
||||
Ok(OutputStatement {
|
||||
what,
|
||||
|
@ -671,7 +642,7 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser State
|
||||
/// Expects `LET` to already be consumed.
|
||||
pub(crate) async fn parse_let_stmt(&mut self, ctx: &mut Stk) -> ParseResult<SetStatement> {
|
||||
pub(super) async fn parse_let_stmt(&mut self, ctx: &mut Stk) -> ParseResult<SetStatement> {
|
||||
let name = self.next_token_value::<Param>()?.0 .0;
|
||||
let kind = if self.eat(t!(":")) {
|
||||
Some(self.parse_inner_kind(ctx).await?)
|
||||
|
@ -679,7 +650,7 @@ impl Parser<'_> {
|
|||
None
|
||||
};
|
||||
expected!(self, t!("="));
|
||||
let what = self.parse_value(ctx).await?;
|
||||
let what = self.parse_value_inherit(ctx).await?;
|
||||
Ok(SetStatement {
|
||||
name,
|
||||
what,
|
||||
|
@ -691,7 +662,7 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser State
|
||||
/// Expects `SHOW` to already be consumed.
|
||||
pub(crate) fn parse_show_stmt(&mut self) -> ParseResult<ShowStatement> {
|
||||
pub(super) fn parse_show_stmt(&mut self) -> ParseResult<ShowStatement> {
|
||||
expected!(self, t!("CHANGES"));
|
||||
expected!(self, t!("FOR"));
|
||||
|
||||
|
@ -709,10 +680,17 @@ impl Parser<'_> {
|
|||
|
||||
let next = self.peek();
|
||||
let since = match next.kind {
|
||||
TokenKind::Digits | TokenKind::Number(_) => {
|
||||
ShowSince::Versionstamp(self.next_token_value()?)
|
||||
TokenKind::Digits => {
|
||||
self.pop_peek();
|
||||
let int = self.lexer.lex_compound(next, compound::integer)?.value;
|
||||
ShowSince::Versionstamp(int)
|
||||
}
|
||||
t!("d\"") | t!("d'") => ShowSince::Timestamp(self.next_token_value()?),
|
||||
TokenKind::Glued(_) => {
|
||||
// This panic can be upheld within this function, just make sure you don't call
|
||||
// glue here and the `next()` before this peek should eat any glued value.
|
||||
panic!("A glued number token would truncate the timestamp so no gluing is allowed before this production.");
|
||||
}
|
||||
_ => unexpected!(self, next, "a version stamp or a date-time"),
|
||||
};
|
||||
|
||||
|
@ -729,7 +707,7 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser State
|
||||
/// Expects `SLEEP` to already be consumed.
|
||||
pub(crate) fn parse_sleep_stmt(&mut self) -> ParseResult<SleepStatement> {
|
||||
pub(super) fn parse_sleep_stmt(&mut self) -> ParseResult<SleepStatement> {
|
||||
let duration = self.next_token_value()?;
|
||||
Ok(SleepStatement {
|
||||
duration,
|
||||
|
@ -740,8 +718,8 @@ impl Parser<'_> {
|
|||
///
|
||||
/// # Parser State
|
||||
/// Expects `THROW` to already be consumed.
|
||||
pub(crate) async fn parse_throw_stmt(&mut self, ctx: &mut Stk) -> ParseResult<ThrowStatement> {
|
||||
let error = self.parse_value_field(ctx).await?;
|
||||
pub(super) async fn parse_throw_stmt(&mut self, ctx: &mut Stk) -> ParseResult<ThrowStatement> {
|
||||
let error = self.parse_value_inherit(ctx).await?;
|
||||
Ok(ThrowStatement {
|
||||
error,
|
||||
})
|
||||
|
|
|
@ -37,7 +37,7 @@ impl Parser<'_> {
|
|||
loop {
|
||||
let idiom = self.parse_plain_idiom(ctx).await?;
|
||||
let operator = self.parse_assigner()?;
|
||||
let value = ctx.run(|ctx| self.parse_value(ctx)).await?;
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
set_list.push((idiom, operator, value));
|
||||
if !self.eat(t!(",")) {
|
||||
break;
|
||||
|
@ -52,19 +52,19 @@ impl Parser<'_> {
|
|||
}
|
||||
t!("PATCH") => {
|
||||
self.pop_peek();
|
||||
Data::PatchExpression(ctx.run(|ctx| self.parse_value(ctx)).await?)
|
||||
Data::PatchExpression(ctx.run(|ctx| self.parse_value_field(ctx)).await?)
|
||||
}
|
||||
t!("MERGE") => {
|
||||
self.pop_peek();
|
||||
Data::MergeExpression(ctx.run(|ctx| self.parse_value(ctx)).await?)
|
||||
Data::MergeExpression(ctx.run(|ctx| self.parse_value_field(ctx)).await?)
|
||||
}
|
||||
t!("REPLACE") => {
|
||||
self.pop_peek();
|
||||
Data::ReplaceExpression(ctx.run(|ctx| self.parse_value(ctx)).await?)
|
||||
Data::ReplaceExpression(ctx.run(|ctx| self.parse_value_field(ctx)).await?)
|
||||
}
|
||||
t!("CONTENT") => {
|
||||
self.pop_peek();
|
||||
Data::ContentExpression(ctx.run(|ctx| self.parse_value(ctx)).await?)
|
||||
Data::ContentExpression(ctx.run(|ctx| self.parse_value_field(ctx)).await?)
|
||||
}
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
@ -378,7 +378,7 @@ impl Parser<'_> {
|
|||
pub fn parse_base(&mut self, scope_allowed: bool) -> ParseResult<Base> {
|
||||
let next = self.next();
|
||||
match next.kind {
|
||||
t!("NAMESPACE") | t!("ns") => Ok(Base::Ns),
|
||||
t!("NAMESPACE") => Ok(Base::Ns),
|
||||
t!("DATABASE") => Ok(Base::Db),
|
||||
t!("ROOT") => Ok(Base::Root),
|
||||
t!("SCOPE") => {
|
||||
|
@ -444,28 +444,26 @@ impl Parser<'_> {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn convert_distance(&mut self, k: &DistanceKind) -> ParseResult<Distance> {
|
||||
let dist = match k {
|
||||
DistanceKind::Chebyshev => Distance::Chebyshev,
|
||||
DistanceKind::Cosine => Distance::Cosine,
|
||||
DistanceKind::Euclidean => Distance::Euclidean,
|
||||
DistanceKind::Manhattan => Distance::Manhattan,
|
||||
DistanceKind::Hamming => Distance::Hamming,
|
||||
DistanceKind::Jaccard => Distance::Jaccard,
|
||||
|
||||
DistanceKind::Minkowski => {
|
||||
let distance = self.next_token_value()?;
|
||||
Distance::Minkowski(distance)
|
||||
}
|
||||
DistanceKind::Pearson => Distance::Pearson,
|
||||
};
|
||||
Ok(dist)
|
||||
}
|
||||
|
||||
pub fn parse_distance(&mut self) -> ParseResult<Distance> {
|
||||
let next = self.next();
|
||||
match next.kind {
|
||||
TokenKind::Distance(k) => self.convert_distance(&k),
|
||||
TokenKind::Distance(k) => {
|
||||
let dist = match k {
|
||||
DistanceKind::Chebyshev => Distance::Chebyshev,
|
||||
DistanceKind::Cosine => Distance::Cosine,
|
||||
DistanceKind::Euclidean => Distance::Euclidean,
|
||||
DistanceKind::Manhattan => Distance::Manhattan,
|
||||
DistanceKind::Hamming => Distance::Hamming,
|
||||
DistanceKind::Jaccard => Distance::Jaccard,
|
||||
|
||||
DistanceKind::Minkowski => {
|
||||
let distance = self.next_token_value()?;
|
||||
Distance::Minkowski(distance)
|
||||
}
|
||||
DistanceKind::Pearson => Distance::Pearson,
|
||||
};
|
||||
Ok(dist)
|
||||
}
|
||||
_ => unexpected!(self, next, "a distance measure"),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ use crate::{
|
|||
sql::{statements::RelateStatement, Subquery, Value},
|
||||
syn::{
|
||||
parser::{
|
||||
mac::{expected, unexpected},
|
||||
mac::{expected, expected_whitespace, unexpected},
|
||||
ParseResult, Parser,
|
||||
},
|
||||
token::t,
|
||||
|
@ -39,14 +39,18 @@ impl Parser<'_> {
|
|||
let next = self.next();
|
||||
let is_o = match next.kind {
|
||||
t!("->") => true,
|
||||
t!("<-") => false,
|
||||
t!("<") => {
|
||||
expected_whitespace!(self, t!("-"));
|
||||
false
|
||||
}
|
||||
_ => unexpected!(self, next, "a relation arrow"),
|
||||
};
|
||||
let kind = self.parse_relate_kind(stk).await?;
|
||||
if is_o {
|
||||
expected!(self, t!("->"))
|
||||
expected!(self, t!("->"));
|
||||
} else {
|
||||
expected!(self, t!("<-"))
|
||||
expected!(self, t!("<"));
|
||||
expected_whitespace!(self, t!("-"));
|
||||
};
|
||||
let second = self.parse_relate_value(stk).await?;
|
||||
if is_o {
|
||||
|
@ -109,8 +113,7 @@ impl Parser<'_> {
|
|||
}
|
||||
|
||||
pub async fn parse_thing_or_table(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
|
||||
self.glue()?;
|
||||
if self.peek_token_at(1).kind == t!(":") {
|
||||
if self.peek_whitespace1().kind == t!(":") {
|
||||
self.parse_thing(ctx).await.map(Value::Thing)
|
||||
} else {
|
||||
self.next_token_value().map(Value::Table)
|
||||
|
|
|
@ -23,7 +23,7 @@ impl Parser<'_> {
|
|||
pub async fn parse_remove_stmt(&mut self, ctx: &mut Stk) -> ParseResult<RemoveStatement> {
|
||||
let next = self.next();
|
||||
let res = match next.kind {
|
||||
t!("NAMESPACE") | t!("ns") => {
|
||||
t!("NAMESPACE") => {
|
||||
let if_exists = if self.eat(t!("IF")) {
|
||||
expected!(self, t!("EXISTS"));
|
||||
true
|
||||
|
|
|
@ -35,9 +35,9 @@ impl Parser<'_> {
|
|||
|
||||
let only = self.eat(t!("ONLY"));
|
||||
|
||||
let mut what = vec![stk.run(|ctx| self.parse_value(ctx)).await?];
|
||||
let mut what = vec![stk.run(|ctx| self.parse_value_table(ctx)).await?];
|
||||
while self.eat(t!(",")) {
|
||||
what.push(stk.run(|ctx| self.parse_value(ctx)).await?);
|
||||
what.push(stk.run(|ctx| self.parse_value_table(ctx)).await?);
|
||||
}
|
||||
let what = Values(what);
|
||||
|
||||
|
@ -217,7 +217,7 @@ impl Parser<'_> {
|
|||
return Ok(None);
|
||||
}
|
||||
self.eat(t!("BY"));
|
||||
let value = ctx.run(|ctx| self.parse_value(ctx)).await?;
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
Ok(Some(Limit(value)))
|
||||
}
|
||||
|
||||
|
@ -226,7 +226,7 @@ impl Parser<'_> {
|
|||
return Ok(None);
|
||||
}
|
||||
self.eat(t!("AT"));
|
||||
let value = ctx.run(|ctx| self.parse_value(ctx)).await?;
|
||||
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
|
||||
Ok(Some(Start(value)))
|
||||
}
|
||||
|
||||
|
|
11
core/src/syn/parser/test/json.rs
Normal file
11
core/src/syn/parser/test/json.rs
Normal file
|
@ -0,0 +1,11 @@
|
|||
use crate::syn::parser::mac::test_parse;
|
||||
|
||||
#[test]
|
||||
fn object_with_negative() {
|
||||
test_parse!(parse_json, r#"{"foo": -1 }"#).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn array_with_negative() {
|
||||
test_parse!(parse_json, r#"[-1]"#).unwrap();
|
||||
}
|
|
@ -3,6 +3,7 @@ use crate::{
|
|||
syn::parser::mac::test_parse,
|
||||
};
|
||||
|
||||
mod json;
|
||||
mod limit;
|
||||
mod stmt;
|
||||
mod streaming;
|
||||
|
@ -58,6 +59,20 @@ fn escaped_params() {
|
|||
test_parse!(parse_query, src).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missed_qoute_caused_panic() {
|
||||
let src = r#"{"id:0,"method":"query","params"["SLEEP 30s"]}"#;
|
||||
|
||||
test_parse!(parse_query, src).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn query_object() {
|
||||
let src = r#"{"id":0,"method":"query","params":["SLEEP 30s"]}"#;
|
||||
|
||||
test_parse!(parse_query, src).inspect_err(|e| eprintln!("{}", e.render_on(src))).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escaped_params_backtick() {
|
||||
test_parse!(
|
||||
|
|
|
@ -90,7 +90,7 @@ pub fn parse_continue() {
|
|||
fn parse_create() {
|
||||
let res = test_parse!(
|
||||
parse_stmt,
|
||||
"CREATE ONLY foo SET bar = 3, foo +?= 4 RETURN VALUE foo AS bar TIMEOUT 1s PARALLEL"
|
||||
"CREATE ONLY foo SET bar = 3, foo +?= baz RETURN VALUE foo AS bar TIMEOUT 1s PARALLEL"
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
|
@ -107,7 +107,7 @@ fn parse_create() {
|
|||
(
|
||||
Idiom(vec![Part::Field(Ident("foo".to_owned()))]),
|
||||
Operator::Ext,
|
||||
Value::Number(Number::Int(4))
|
||||
Value::Idiom(Idiom(vec![Part::Field(Ident("baz".to_owned()))]))
|
||||
),
|
||||
])),
|
||||
output: Some(Output::Fields(Fields(
|
||||
|
@ -205,7 +205,7 @@ fn parse_define_function() {
|
|||
(Ident("b".to_string()), Kind::Array(Box::new(Kind::Bool), Some(3)))
|
||||
],
|
||||
block: Block(vec![Entry::Output(OutputStatement {
|
||||
what: Value::Idiom(Idiom(vec![Part::Field(Ident("a".to_string()))])),
|
||||
what: Value::Table(Table("a".to_string())),
|
||||
fetch: None,
|
||||
})]),
|
||||
comment: Some(Strand("test".to_string())),
|
||||
|
@ -1709,16 +1709,10 @@ fn parse_if() {
|
|||
res,
|
||||
Statement::Ifelse(IfelseStatement {
|
||||
exprs: vec![
|
||||
(
|
||||
Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])),
|
||||
Value::Idiom(Idiom(vec![Part::Field(Ident("bar".to_owned()))]))
|
||||
),
|
||||
(
|
||||
Value::Idiom(Idiom(vec![Part::Field(Ident("faz".to_owned()))])),
|
||||
Value::Idiom(Idiom(vec![Part::Field(Ident("baz".to_owned()))]))
|
||||
)
|
||||
(Value::Table(Table("foo".to_owned())), Value::Table(Table("bar".to_owned()))),
|
||||
(Value::Table(Table("faz".to_owned())), Value::Table(Table("baz".to_owned())))
|
||||
],
|
||||
close: Some(Value::Idiom(Idiom(vec![Part::Field(Ident("baq".to_owned()))])))
|
||||
close: Some(Value::Table(Table("baq".to_owned())))
|
||||
})
|
||||
)
|
||||
}
|
||||
|
@ -1732,20 +1726,20 @@ fn parse_if_block() {
|
|||
Statement::Ifelse(IfelseStatement {
|
||||
exprs: vec![
|
||||
(
|
||||
Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])),
|
||||
Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(vec![
|
||||
Part::Field(Ident("bar".to_owned()))
|
||||
])))]))),
|
||||
Value::Table(Table("foo".to_owned())),
|
||||
Value::Block(Box::new(Block(vec![Entry::Value(Value::Table(Table(
|
||||
"bar".to_owned()
|
||||
)),)]))),
|
||||
),
|
||||
(
|
||||
Value::Idiom(Idiom(vec![Part::Field(Ident("faz".to_owned()))])),
|
||||
Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(vec![
|
||||
Part::Field(Ident("baz".to_owned()))
|
||||
])))]))),
|
||||
Value::Table(Table("faz".to_owned())),
|
||||
Value::Block(Box::new(Block(vec![Entry::Value(Value::Table(Table(
|
||||
"baz".to_owned()
|
||||
)),)]))),
|
||||
)
|
||||
],
|
||||
close: Some(Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(
|
||||
vec![Part::Field(Ident("baq".to_owned()))]
|
||||
close: Some(Value::Block(Box::new(Block(vec![Entry::Value(Value::Table(Table(
|
||||
"baq".to_owned()
|
||||
)))])))),
|
||||
})
|
||||
)
|
||||
|
@ -2078,6 +2072,51 @@ fn parse_insert() {
|
|||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_insert_select() {
|
||||
let res = test_parse!(parse_stmt, r#"INSERT IGNORE INTO bar (select foo from baz)"#).unwrap();
|
||||
assert_eq!(
|
||||
res,
|
||||
Statement::Insert(InsertStatement {
|
||||
into: Some(Value::Table(Table("bar".to_owned()))),
|
||||
data: Data::SingleExpression(Value::Subquery(Box::new(Subquery::Select(
|
||||
SelectStatement {
|
||||
expr: Fields(
|
||||
vec![Field::Single {
|
||||
expr: Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_string()))])),
|
||||
alias: None
|
||||
}],
|
||||
false
|
||||
),
|
||||
omit: None,
|
||||
only: false,
|
||||
what: Values(vec![Value::Table(Table("baz".to_string()))]),
|
||||
with: None,
|
||||
cond: None,
|
||||
split: None,
|
||||
group: None,
|
||||
order: None,
|
||||
limit: None,
|
||||
start: None,
|
||||
fetch: None,
|
||||
version: None,
|
||||
timeout: None,
|
||||
parallel: false,
|
||||
explain: None,
|
||||
tempfiles: false
|
||||
}
|
||||
)))),
|
||||
ignore: true,
|
||||
update: None,
|
||||
output: None,
|
||||
version: None,
|
||||
timeout: None,
|
||||
parallel: false,
|
||||
relation: false,
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_kill() {
|
||||
let res = test_parse!(parse_stmt, r#"KILL $param"#).unwrap();
|
||||
|
@ -2154,7 +2193,7 @@ fn parse_return() {
|
|||
assert_eq!(
|
||||
res,
|
||||
Statement::Output(OutputStatement {
|
||||
what: Value::Idiom(Idiom(vec![Part::Field(Ident("RETRUN".to_owned()))])),
|
||||
what: Value::Table(Table("RETRUN".to_owned())),
|
||||
fetch: Some(Fetchs(vec![Fetch(Value::Idiom(Idiom(vec![Part::Field(
|
||||
Ident("RETURN".to_owned()).to_owned()
|
||||
)])))])),
|
||||
|
|
|
@ -192,7 +192,7 @@ fn statements() -> Vec<Statement> {
|
|||
(Ident("b".to_string()), Kind::Array(Box::new(Kind::Bool), Some(3))),
|
||||
],
|
||||
block: Block(vec![Entry::Output(OutputStatement {
|
||||
what: Value::Idiom(Idiom(vec![Part::Field(Ident("a".to_string()))])),
|
||||
what: Value::Table(Table("a".to_string())),
|
||||
fetch: None,
|
||||
})]),
|
||||
comment: Some(Strand("test".to_string())),
|
||||
|
@ -440,34 +440,28 @@ fn statements() -> Vec<Statement> {
|
|||
}),
|
||||
Statement::Ifelse(IfelseStatement {
|
||||
exprs: vec![
|
||||
(
|
||||
Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])),
|
||||
Value::Idiom(Idiom(vec![Part::Field(Ident("bar".to_owned()))])),
|
||||
),
|
||||
(
|
||||
Value::Idiom(Idiom(vec![Part::Field(Ident("faz".to_owned()))])),
|
||||
Value::Idiom(Idiom(vec![Part::Field(Ident("baz".to_owned()))])),
|
||||
),
|
||||
(Value::Table(Table("foo".to_owned())), Value::Table(Table("bar".to_owned()))),
|
||||
(Value::Table(Table("faz".to_owned())), Value::Table(Table("baz".to_owned()))),
|
||||
],
|
||||
close: Some(Value::Idiom(Idiom(vec![Part::Field(Ident("baq".to_owned()))]))),
|
||||
close: Some(Value::Table(Table("baq".to_owned()))),
|
||||
}),
|
||||
Statement::Ifelse(IfelseStatement {
|
||||
exprs: vec![
|
||||
(
|
||||
Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])),
|
||||
Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(vec![
|
||||
Part::Field(Ident("bar".to_owned())),
|
||||
])))]))),
|
||||
Value::Table(Table("foo".to_owned())),
|
||||
Value::Block(Box::new(Block(vec![Entry::Value(Value::Table(Table(
|
||||
"bar".to_owned(),
|
||||
)))]))),
|
||||
),
|
||||
(
|
||||
Value::Idiom(Idiom(vec![Part::Field(Ident("faz".to_owned()))])),
|
||||
Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(vec![
|
||||
Part::Field(Ident("baz".to_owned())),
|
||||
])))]))),
|
||||
Value::Table(Table("faz".to_owned())),
|
||||
Value::Block(Box::new(Block(vec![Entry::Value(Value::Table(Table(
|
||||
"baz".to_owned(),
|
||||
)))]))),
|
||||
),
|
||||
],
|
||||
close: Some(Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(
|
||||
vec![Part::Field(Ident("baq".to_owned()))],
|
||||
close: Some(Value::Block(Box::new(Block(vec![Entry::Value(Value::Table(Table(
|
||||
"baq".to_owned(),
|
||||
)))])))),
|
||||
}),
|
||||
Statement::Info(InfoStatement::Root(false)),
|
||||
|
@ -612,7 +606,7 @@ fn statements() -> Vec<Statement> {
|
|||
id: Value::Uuid(Uuid(uuid::uuid!("e72bee20-f49b-11ec-b939-0242ac120002"))),
|
||||
}),
|
||||
Statement::Output(OutputStatement {
|
||||
what: Value::Idiom(Idiom(vec![Part::Field(Ident("RETRUN".to_owned()))])),
|
||||
what: Value::Table(Table("RETRUN".to_owned())),
|
||||
fetch: Some(Fetchs(vec![Fetch(Value::Idiom(Idiom(vec![Part::Field(
|
||||
Ident("RETURN".to_owned()).to_owned(),
|
||||
)])))])),
|
||||
|
|
|
@ -11,12 +11,17 @@ use crate::{
|
|||
|
||||
#[test]
|
||||
fn parse_coordinate() {
|
||||
test_parse!(parse_value, "(1.88, -18.0)").unwrap();
|
||||
test_parse!(parse_value_table, "(1.88, -18.0)").unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_like_operator() {
|
||||
test_parse!(parse_value, "a ~ b").unwrap();
|
||||
test_parse!(parse_value_table, "a ~ b").unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_range_operator() {
|
||||
test_parse!(parse_value_table, "1..2").unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -88,7 +93,7 @@ fn parse_large_depth_record_id() {
|
|||
|
||||
#[test]
|
||||
fn parse_recursive_record_string() {
|
||||
let res = test_parse!(parse_value, r#" r"a:[r"b:{c: r"d:1"}"]" "#).unwrap();
|
||||
let res = test_parse!(parse_value_table, r#" r"a:[r"b:{c: r"d:1"}"]" "#).unwrap();
|
||||
assert_eq!(
|
||||
res,
|
||||
Value::Thing(Thing {
|
||||
|
@ -109,7 +114,7 @@ fn parse_recursive_record_string() {
|
|||
|
||||
#[test]
|
||||
fn parse_record_string_2() {
|
||||
let res = test_parse!(parse_value, r#" r'a:["foo"]' "#).unwrap();
|
||||
let res = test_parse!(parse_value_table, r#" r'a:["foo"]' "#).unwrap();
|
||||
assert_eq!(
|
||||
res,
|
||||
Value::Thing(Thing {
|
||||
|
@ -121,64 +126,69 @@ fn parse_record_string_2() {
|
|||
|
||||
#[test]
|
||||
fn parse_i64() {
|
||||
let res = test_parse!(parse_value, r#" -9223372036854775808 "#).unwrap();
|
||||
let res = test_parse!(parse_value_table, r#" -9223372036854775808 "#).unwrap();
|
||||
assert_eq!(res, Value::Number(Number::Int(i64::MIN)));
|
||||
|
||||
let res = test_parse!(parse_value, r#" 9223372036854775807 "#).unwrap();
|
||||
let res = test_parse!(parse_value_table, r#" 9223372036854775807 "#).unwrap();
|
||||
assert_eq!(res, Value::Number(Number::Int(i64::MAX)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn constant_lowercase() {
|
||||
let out = test_parse!(parse_value, r#" math::pi "#).unwrap();
|
||||
let out = test_parse!(parse_value_table, r#" math::pi "#).unwrap();
|
||||
assert_eq!(out, Value::Constant(Constant::MathPi));
|
||||
|
||||
let out = test_parse!(parse_value, r#" math::inf "#).unwrap();
|
||||
let out = test_parse!(parse_value_table, r#" math::inf "#).unwrap();
|
||||
assert_eq!(out, Value::Constant(Constant::MathInf));
|
||||
|
||||
let out = test_parse!(parse_value, r#" math::neg_inf "#).unwrap();
|
||||
let out = test_parse!(parse_value_table, r#" math::neg_inf "#).unwrap();
|
||||
assert_eq!(out, Value::Constant(Constant::MathNegInf));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn constant_uppercase() {
|
||||
let out = test_parse!(parse_value, r#" MATH::PI "#).unwrap();
|
||||
let out = test_parse!(parse_value_table, r#" MATH::PI "#).unwrap();
|
||||
assert_eq!(out, Value::Constant(Constant::MathPi));
|
||||
|
||||
let out = test_parse!(parse_value, r#" MATH::INF "#).unwrap();
|
||||
let out = test_parse!(parse_value_table, r#" MATH::INF "#).unwrap();
|
||||
assert_eq!(out, Value::Constant(Constant::MathInf));
|
||||
|
||||
let out = test_parse!(parse_value, r#" MATH::NEG_INF "#).unwrap();
|
||||
let out = test_parse!(parse_value_table, r#" MATH::NEG_INF "#).unwrap();
|
||||
assert_eq!(out, Value::Constant(Constant::MathNegInf));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn constant_mixedcase() {
|
||||
let out = test_parse!(parse_value, r#" MaTh::Pi "#).unwrap();
|
||||
let out = test_parse!(parse_value_table, r#" MaTh::Pi "#).unwrap();
|
||||
assert_eq!(out, Value::Constant(Constant::MathPi));
|
||||
|
||||
let out = test_parse!(parse_value, r#" MaTh::Inf "#).unwrap();
|
||||
let out = test_parse!(parse_value_table, r#" MaTh::Inf "#).unwrap();
|
||||
assert_eq!(out, Value::Constant(Constant::MathInf));
|
||||
|
||||
let out = test_parse!(parse_value, r#" MaTh::Neg_Inf "#).unwrap();
|
||||
let out = test_parse!(parse_value_table, r#" MaTh::Neg_Inf "#).unwrap();
|
||||
assert_eq!(out, Value::Constant(Constant::MathNegInf));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scientific_decimal() {
|
||||
let res = test_parse!(parse_value, r#" 9.7e-7dec "#).unwrap();
|
||||
let res = test_parse!(parse_value_table, r#" 9.7e-7dec "#).unwrap();
|
||||
assert!(matches!(res, Value::Number(Number::Decimal(_))));
|
||||
assert_eq!(res.to_string(), "0.00000097dec")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scientific_number() {
|
||||
let res = test_parse!(parse_value, r#" 9.7e-5"#).unwrap();
|
||||
let res = test_parse!(parse_value_table, r#" 9.7e-5"#).unwrap();
|
||||
assert!(matches!(res, Value::Number(Number::Float(_))));
|
||||
assert_eq!(res.to_string(), "0.000097f")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_string() {
|
||||
test_parse!(parse_value, "").unwrap_err();
|
||||
fn datetime_error() {
|
||||
test_parse!(parse_value_table, r#" d"2001-01-01T01:01:01.9999999999" "#).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_string() {
|
||||
test_parse!(parse_value_table, "").unwrap_err();
|
||||
}
|
||||
|
|
|
@ -8,14 +8,19 @@ use crate::{
|
|||
},
|
||||
syn::{
|
||||
error::bail,
|
||||
lexer::compound,
|
||||
parser::mac::{expected, expected_whitespace, unexpected},
|
||||
token::{t, TokenKind},
|
||||
token::{t, Glued, TokenKind},
|
||||
},
|
||||
};
|
||||
use std::{cmp::Ordering, ops::Bound};
|
||||
|
||||
impl Parser<'_> {
|
||||
pub async fn parse_record_string(&mut self, ctx: &mut Stk, double: bool) -> ParseResult<Thing> {
|
||||
pub(crate) async fn parse_record_string(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
double: bool,
|
||||
) -> ParseResult<Thing> {
|
||||
let thing = self.parse_thing(ctx).await?;
|
||||
|
||||
debug_assert!(self.last_span().is_followed_by(&self.peek_whitespace().span));
|
||||
|
@ -28,15 +33,7 @@ impl Parser<'_> {
|
|||
Ok(thing)
|
||||
}
|
||||
|
||||
fn kind_cast_start_id(kind: TokenKind) -> bool {
|
||||
Self::tokenkind_can_start_ident(kind)
|
||||
|| matches!(
|
||||
kind,
|
||||
TokenKind::Digits | t!("{") | t!("[") | t!("+") | t!("-") | t!("u'") | t!("u\"")
|
||||
)
|
||||
}
|
||||
|
||||
pub async fn parse_thing_or_range(
|
||||
pub(crate) async fn parse_thing_or_range(
|
||||
&mut self,
|
||||
stk: &mut Stk,
|
||||
ident: String,
|
||||
|
@ -49,7 +46,7 @@ impl Parser<'_> {
|
|||
let end = if self.eat_whitespace(t!("=")) {
|
||||
let id = stk.run(|stk| self.parse_id(stk)).await?;
|
||||
Bound::Included(id)
|
||||
} else if Self::kind_cast_start_id(self.peek_whitespace().kind) {
|
||||
} else if Self::kind_starts_record_id_key(self.peek_whitespace().kind) {
|
||||
let id = stk.run(|stk| self.parse_id(stk)).await?;
|
||||
Bound::Excluded(id)
|
||||
} else {
|
||||
|
@ -65,7 +62,7 @@ impl Parser<'_> {
|
|||
}
|
||||
|
||||
// Didn't eat range yet so we need to parse the id.
|
||||
let beg = if Self::kind_cast_start_id(self.peek_whitespace().kind) {
|
||||
let beg = if Self::kind_starts_record_id_key(self.peek_whitespace().kind) {
|
||||
let v = stk.run(|stk| self.parse_id(stk)).await?;
|
||||
|
||||
// check for exclusive
|
||||
|
@ -84,7 +81,7 @@ impl Parser<'_> {
|
|||
let end = if self.eat_whitespace(t!("=")) {
|
||||
let id = stk.run(|stk| self.parse_id(stk)).await?;
|
||||
Bound::Included(id)
|
||||
} else if Self::kind_cast_start_id(self.peek_whitespace().kind) {
|
||||
} else if Self::kind_starts_record_id_key(self.peek_whitespace().kind) {
|
||||
let id = stk.run(|stk| self.parse_id(stk)).await?;
|
||||
Bound::Excluded(id)
|
||||
} else {
|
||||
|
@ -125,10 +122,10 @@ impl Parser<'_> {
|
|||
}
|
||||
|
||||
/// Parse an range
|
||||
pub async fn parse_range(&mut self, ctx: &mut Stk) -> ParseResult<Range> {
|
||||
pub(crate) async fn parse_range(&mut self, ctx: &mut Stk) -> ParseResult<Range> {
|
||||
// Check for beginning id
|
||||
let beg = if Self::tokenkind_can_start_ident(self.peek_whitespace().kind) {
|
||||
let v = ctx.run(|ctx| self.parse_value(ctx)).await?;
|
||||
let beg = if Self::kind_is_identifier(self.peek_whitespace().kind) {
|
||||
let v = ctx.run(|ctx| self.parse_value_table(ctx)).await?;
|
||||
|
||||
if self.eat_whitespace(t!(">")) {
|
||||
Bound::Excluded(v)
|
||||
|
@ -144,8 +141,8 @@ impl Parser<'_> {
|
|||
let inclusive = self.eat_whitespace(t!("="));
|
||||
|
||||
// parse ending id.
|
||||
let end = if Self::tokenkind_can_start_ident(self.peek_whitespace().kind) {
|
||||
let v = ctx.run(|ctx| self.parse_value(ctx)).await?;
|
||||
let end = if Self::kind_is_identifier(self.peek_whitespace().kind) {
|
||||
let v = ctx.run(|ctx| self.parse_value_table(ctx)).await?;
|
||||
if inclusive {
|
||||
Bound::Included(v)
|
||||
} else {
|
||||
|
@ -161,12 +158,12 @@ impl Parser<'_> {
|
|||
})
|
||||
}
|
||||
|
||||
pub async fn parse_thing(&mut self, ctx: &mut Stk) -> ParseResult<Thing> {
|
||||
pub(crate) async fn parse_thing(&mut self, ctx: &mut Stk) -> ParseResult<Thing> {
|
||||
let ident = self.next_token_value::<Ident>()?.0;
|
||||
self.parse_thing_from_ident(ctx, ident).await
|
||||
}
|
||||
|
||||
pub async fn parse_thing_from_ident(
|
||||
pub(crate) async fn parse_thing_from_ident(
|
||||
&mut self,
|
||||
ctx: &mut Stk,
|
||||
ident: String,
|
||||
|
@ -181,7 +178,7 @@ impl Parser<'_> {
|
|||
})
|
||||
}
|
||||
|
||||
pub async fn parse_id(&mut self, stk: &mut Stk) -> ParseResult<Id> {
|
||||
pub(crate) async fn parse_id(&mut self, stk: &mut Stk) -> ParseResult<Id> {
|
||||
let token = self.peek_whitespace();
|
||||
match token.kind {
|
||||
t!("u'") | t!("u\"") => Ok(Id::Uuid(self.next_token_value()?)),
|
||||
|
@ -208,12 +205,12 @@ impl Parser<'_> {
|
|||
|
||||
let next = self.peek_whitespace();
|
||||
match next.kind {
|
||||
t!(".") | TokenKind::Exponent | TokenKind::NumberSuffix(_) => {
|
||||
t!(".") => {
|
||||
// TODO(delskayn) explain that record-id's cant have matissas,
|
||||
// exponents or a number suffix
|
||||
unexpected!(self, next, "an integer");
|
||||
unexpected!(self, next, "an integer", => "Numeric Record-id keys can only be integers");
|
||||
}
|
||||
x if Self::tokenkind_continues_ident(x) => {
|
||||
x if Self::kind_is_identifier(x) => {
|
||||
let span = token.span.covers(next.span);
|
||||
bail!("Unexpected token `{x}` expected an integer", @span);
|
||||
}
|
||||
|
@ -230,51 +227,27 @@ impl Parser<'_> {
|
|||
}
|
||||
t!("-") => {
|
||||
self.pop_peek();
|
||||
// starting with a + so it must be a number
|
||||
let digits_token = self.peek_whitespace();
|
||||
match digits_token.kind {
|
||||
TokenKind::Digits => {}
|
||||
_ => unexpected!(self, digits_token, "an integer"),
|
||||
}
|
||||
|
||||
let next = self.peek_whitespace();
|
||||
match next.kind {
|
||||
t!(".") | TokenKind::Exponent | TokenKind::NumberSuffix(_) => {
|
||||
// TODO(delskayn) explain that record-id's cant have matissas,
|
||||
// exponents or a number suffix
|
||||
unexpected!(self, next, "an integer");
|
||||
}
|
||||
x if Self::tokenkind_continues_ident(x) => {
|
||||
let span = token.span.covers(next.span);
|
||||
bail!("Unexpected token `{x}` expected an integer", @span);
|
||||
}
|
||||
// allowed
|
||||
_ => {}
|
||||
}
|
||||
|
||||
let digits_str = self.lexer.span_str(digits_token.span);
|
||||
if let Ok(number) = digits_str.parse::<u64>() {
|
||||
let token = expected!(self, TokenKind::Digits);
|
||||
if let Ok(number) = self.lexer.lex_compound(token, compound::integer::<u64>) {
|
||||
// Parse to u64 and check if the value is equal to `-i64::MIN` via u64 as
|
||||
// `-i64::MIN` doesn't fit in an i64
|
||||
match number.cmp(&((i64::MAX as u64) + 1)) {
|
||||
Ordering::Less => Ok(Id::Number(-(number as i64))),
|
||||
match number.value.cmp(&((i64::MAX as u64) + 1)) {
|
||||
Ordering::Less => Ok(Id::Number(-(number.value as i64))),
|
||||
Ordering::Equal => Ok(Id::Number(i64::MIN)),
|
||||
Ordering::Greater => Ok(Id::String(format!("-{}", digits_str))),
|
||||
Ordering::Greater => {
|
||||
Ok(Id::String(format!("-{}", self.lexer.span_str(number.span))))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Ok(Id::String(format!("-{}", digits_str)))
|
||||
Ok(Id::String(format!("-{}", self.lexer.span_str(token.span))))
|
||||
}
|
||||
}
|
||||
TokenKind::Digits => {
|
||||
let next = self.peek_whitespace_token_at(1);
|
||||
|
||||
if Self::tokenkind_can_start_ident(next.kind) {
|
||||
let glued = self.glue_ident(self.flexible_record_id)?;
|
||||
if let TokenKind::Identifier = glued.kind {
|
||||
self.pop_peek();
|
||||
return Ok(Id::String(self.lexer.string.take().unwrap()));
|
||||
} else {
|
||||
unexpected!(self, glued, "a record-id id")
|
||||
if self.flexible_record_id {
|
||||
let next = self.peek_whitespace1();
|
||||
if Self::kind_is_identifier(next.kind) {
|
||||
let ident = self.parse_flexible_ident()?.0;
|
||||
return Ok(Id::String(ident));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -287,16 +260,23 @@ impl Parser<'_> {
|
|||
Ok(Id::String(digits_str.to_owned()))
|
||||
}
|
||||
}
|
||||
TokenKind::Duration if self.flexible_record_id => {
|
||||
self.lexer.duration = None;
|
||||
TokenKind::Glued(Glued::Duration) if self.flexible_record_id => {
|
||||
let slice = self.lexer.reader.span(token.span);
|
||||
if slice.iter().any(|x| *x > 0b0111_1111) {
|
||||
if slice.iter().any(|x| !x.is_ascii()) {
|
||||
unexpected!(self, token, "a identifier");
|
||||
}
|
||||
// Should be valid utf-8 as it was already parsed by the lexer
|
||||
let text = String::from_utf8(slice.to_vec()).unwrap();
|
||||
Ok(Id::String(text))
|
||||
}
|
||||
TokenKind::Glued(_) => {
|
||||
// If we glue before a parsing a record id, for example 123s456z would return an error as it is
|
||||
// an invalid duration, however it is a valid flexible record id identifier.
|
||||
// So calling glue before using that token to create a record id is not allowed.
|
||||
panic!(
|
||||
"Glueing tokens used in parsing a record id would result in inproper parsing"
|
||||
)
|
||||
}
|
||||
t!("ULID") => {
|
||||
self.pop_peek();
|
||||
// TODO: error message about how to use `ulid` as an identifier.
|
||||
|
@ -317,8 +297,11 @@ impl Parser<'_> {
|
|||
Ok(Id::Generate(Gen::Rand))
|
||||
}
|
||||
_ => {
|
||||
self.glue_ident(self.flexible_record_id)?;
|
||||
let ident = self.next_token_value::<Ident>()?.0;
|
||||
let ident = if self.flexible_record_id {
|
||||
self.parse_flexible_ident()?.0
|
||||
} else {
|
||||
self.next_token_value::<Ident>()?.0
|
||||
};
|
||||
Ok(Id::String(ident))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,21 +1,43 @@
|
|||
//! Implements token gluing logic.
|
||||
use crate::syn::token::{t, Glued, TokenKind};
|
||||
|
||||
use crate::{
|
||||
sql::duration::{
|
||||
SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE, SECONDS_PER_WEEK, SECONDS_PER_YEAR,
|
||||
},
|
||||
syn::{
|
||||
error::{bail, error},
|
||||
parser::{mac::unexpected, ParseResult, Parser},
|
||||
token::{t, DurationSuffix, NumberKind, NumberSuffix, Token, TokenKind},
|
||||
},
|
||||
};
|
||||
|
||||
use std::time::Duration as StdDuration;
|
||||
use super::Parser;
|
||||
|
||||
impl Parser<'_> {
|
||||
/// Returns true if the next token can start a statement.
|
||||
pub(super) fn kind_starts_statement(kind: TokenKind) -> bool {
|
||||
matches!(
|
||||
kind,
|
||||
t!("ACCESS")
|
||||
| t!("ALTER")
|
||||
| t!("ANALYZE")
|
||||
| t!("BEGIN")
|
||||
| t!("BREAK")
|
||||
| t!("CANCEL")
|
||||
| t!("COMMIT")
|
||||
| t!("CONTINUE")
|
||||
| t!("CREATE")
|
||||
| t!("DEFINE")
|
||||
| t!("DELETE")
|
||||
| t!("FOR") | t!("IF")
|
||||
| t!("INFO") | t!("INSERT")
|
||||
| t!("KILL") | t!("LIVE")
|
||||
| t!("OPTION")
|
||||
| t!("REBUILD")
|
||||
| t!("RETURN")
|
||||
| t!("RELATE")
|
||||
| t!("REMOVE")
|
||||
| t!("SELECT")
|
||||
| t!("LET") | t!("SHOW")
|
||||
| t!("SLEEP")
|
||||
| t!("THROW")
|
||||
| t!("UPDATE")
|
||||
| t!("UPSERT")
|
||||
| t!("USE")
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns if a token kind can start an identifier.
|
||||
pub fn tokenkind_can_start_ident(t: TokenKind) -> bool {
|
||||
pub(super) fn kind_is_keyword_like(t: TokenKind) -> bool {
|
||||
matches!(
|
||||
t,
|
||||
TokenKind::Keyword(_)
|
||||
|
@ -23,27 +45,11 @@ impl Parser<'_> {
|
|||
| TokenKind::Algorithm(_)
|
||||
| TokenKind::Distance(_)
|
||||
| TokenKind::VectorType(_)
|
||||
| TokenKind::Identifier
|
||||
| TokenKind::Exponent
|
||||
| TokenKind::DatetimeChars(_)
|
||||
| TokenKind::NumberSuffix(_)
|
||||
| TokenKind::DurationSuffix(
|
||||
// All except Micro unicode
|
||||
DurationSuffix::Nano
|
||||
| DurationSuffix::Micro
|
||||
| DurationSuffix::Milli
|
||||
| DurationSuffix::Second
|
||||
| DurationSuffix::Minute
|
||||
| DurationSuffix::Hour
|
||||
| DurationSuffix::Day
|
||||
| DurationSuffix::Week
|
||||
| DurationSuffix::Year
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns if a token kind can start continue an identifier.
|
||||
pub fn tokenkind_continues_ident(t: TokenKind) -> bool {
|
||||
/// Returns if a token kind can start an identifier.
|
||||
pub(super) fn kind_is_identifier(t: TokenKind) -> bool {
|
||||
matches!(
|
||||
t,
|
||||
TokenKind::Keyword(_)
|
||||
|
@ -52,478 +58,61 @@ impl Parser<'_> {
|
|||
| TokenKind::Distance(_)
|
||||
| TokenKind::VectorType(_)
|
||||
| TokenKind::Identifier
|
||||
| TokenKind::DatetimeChars(_)
|
||||
| TokenKind::Exponent
|
||||
| TokenKind::NumberSuffix(_)
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn kind_starts_record_id_key(kind: TokenKind) -> bool {
|
||||
Self::kind_is_identifier(kind)
|
||||
|| matches!(
|
||||
kind,
|
||||
TokenKind::Digits
|
||||
| t!("{") | t!("[")
|
||||
| t!("+") | t!("-")
|
||||
| t!("u'") | t!("u\"")
|
||||
| t!("'") | t!("\"")
|
||||
| TokenKind::Glued(Glued::Uuid | Glued::Strand)
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn kind_starts_subquery(kind: TokenKind) -> bool {
|
||||
matches!(
|
||||
kind,
|
||||
t!("RETURN")
|
||||
| t!("SELECT")
|
||||
| t!("CREATE")
|
||||
| t!("UPSERT")
|
||||
| t!("UPDATE")
|
||||
| t!("DELETE")
|
||||
| t!("RELATE")
|
||||
| t!("DEFINE")
|
||||
| t!("REMOVE")
|
||||
| t!("REBUILD")
|
||||
| t!("IF")
|
||||
)
|
||||
}
|
||||
|
||||
pub(super) fn kind_starts_prime_value(kind: TokenKind) -> bool {
|
||||
matches!(
|
||||
kind,
|
||||
t!("+")
|
||||
| t!("-") | t!("u'")
|
||||
| t!("u\"") | t!("d'")
|
||||
| t!("d\"") | t!("r'")
|
||||
| t!("r\"") | t!("'")
|
||||
| t!("\"") | TokenKind::Digits
|
||||
| TokenKind::NaN
|
||||
| TokenKind::DurationSuffix(
|
||||
// All except Micro unicode
|
||||
DurationSuffix::Nano
|
||||
| DurationSuffix::Micro
|
||||
| DurationSuffix::Milli
|
||||
| DurationSuffix::Second
|
||||
| DurationSuffix::Minute
|
||||
| DurationSuffix::Hour
|
||||
| DurationSuffix::Day
|
||||
| DurationSuffix::Week
|
||||
)
|
||||
)
|
||||
| t!("true") | t!("false")
|
||||
| t!("fn") | t!("ml")
|
||||
| t!("(") | t!("{")
|
||||
| t!("/") | t!("|")
|
||||
| t!("||") | t!("<")
|
||||
| t!("$param")
|
||||
| t!("..") | TokenKind::Glued(_)
|
||||
) || Self::kind_starts_subquery(kind)
|
||||
|| Self::kind_is_identifier(kind)
|
||||
}
|
||||
|
||||
/// Returns if the peeked token can be a identifier.
|
||||
pub fn peek_continues_ident(&mut self) -> bool {
|
||||
Self::tokenkind_can_start_ident(self.peek_kind())
|
||||
}
|
||||
|
||||
/// Glue an token and immediately consume it.
|
||||
pub fn glue_next(&mut self) -> ParseResult<Token> {
|
||||
self.glue()?;
|
||||
Ok(self.next())
|
||||
}
|
||||
|
||||
/// Glues the next token together, returning its value, doesnt consume the token.
|
||||
pub fn glue(&mut self) -> ParseResult<Token> {
|
||||
let token = self.peek();
|
||||
match token.kind {
|
||||
TokenKind::Exponent
|
||||
| TokenKind::NumberSuffix(_)
|
||||
| TokenKind::DurationSuffix(_)
|
||||
| TokenKind::VectorType(_)
|
||||
| TokenKind::DatetimeChars(_) => self.glue_ident(false),
|
||||
TokenKind::Digits => self.glue_numeric(),
|
||||
t!("\"") | t!("'") => {
|
||||
self.pop_peek();
|
||||
let t = self.lexer.relex_strand(token);
|
||||
let TokenKind::Strand = t.kind else {
|
||||
unexpected!(self, t, "a strand")
|
||||
};
|
||||
self.prepend_token(t);
|
||||
Ok(t)
|
||||
}
|
||||
t!("+") | t!("-") => {
|
||||
if let TokenKind::Digits = self.peek_whitespace_token_at(1).kind {
|
||||
self.glue_number()
|
||||
} else {
|
||||
Ok(token)
|
||||
}
|
||||
}
|
||||
_ => Ok(token),
|
||||
}
|
||||
}
|
||||
|
||||
/// Glues all next tokens follow eachother, which can make up an ident into a single string.
|
||||
pub fn glue_ident(&mut self, flexible: bool) -> ParseResult<Token> {
|
||||
let start = self.peek();
|
||||
|
||||
let mut token_buffer = match start.kind {
|
||||
TokenKind::Exponent | TokenKind::NumberSuffix(_) => {
|
||||
self.pop_peek();
|
||||
|
||||
self.lexer.span_str(start.span).to_owned()
|
||||
}
|
||||
TokenKind::Digits if flexible => {
|
||||
self.pop_peek();
|
||||
self.lexer.span_str(start.span).to_owned()
|
||||
}
|
||||
TokenKind::DurationSuffix(x) if x.can_be_ident() => {
|
||||
self.pop_peek();
|
||||
|
||||
self.lexer.span_str(start.span).to_owned()
|
||||
}
|
||||
TokenKind::DatetimeChars(_) | TokenKind::VectorType(_) => {
|
||||
self.pop_peek();
|
||||
|
||||
self.lexer.span_str(start.span).to_owned()
|
||||
}
|
||||
_ => return Ok(start),
|
||||
};
|
||||
|
||||
debug_assert!(
|
||||
start.is_followed_by(&self.peek_whitespace()),
|
||||
"a whitespace token was eaten where eating it would disturb parsing\n {:?}@{:?} => {:?}@{:?}",
|
||||
start.kind,
|
||||
start.span,
|
||||
self.peek_whitespace().kind,
|
||||
self.peek_whitespace().span
|
||||
);
|
||||
|
||||
let mut prev = start;
|
||||
loop {
|
||||
let p = self.peek_whitespace();
|
||||
match p.kind {
|
||||
// These token_kinds always complete an ident, no more identifier parts can happen
|
||||
// after this.
|
||||
TokenKind::Identifier => {
|
||||
self.pop_peek();
|
||||
let buffer = self.lexer.string.take().unwrap();
|
||||
token_buffer.push_str(&buffer);
|
||||
prev = p;
|
||||
break;
|
||||
}
|
||||
TokenKind::Keyword(_)
|
||||
| TokenKind::Language(_)
|
||||
| TokenKind::Algorithm(_)
|
||||
| TokenKind::Distance(_)
|
||||
| TokenKind::VectorType(_)
|
||||
| TokenKind::NumberSuffix(_) => {
|
||||
self.pop_peek();
|
||||
let str = self.lexer.span_str(p.span);
|
||||
token_buffer.push_str(str);
|
||||
|
||||
prev = p;
|
||||
|
||||
break;
|
||||
}
|
||||
// These tokens might have some more parts following them
|
||||
TokenKind::Exponent | TokenKind::DatetimeChars(_) | TokenKind::Digits => {
|
||||
self.pop_peek();
|
||||
let str = self.lexer.span_str(p.span);
|
||||
token_buffer.push_str(str);
|
||||
|
||||
prev = p;
|
||||
}
|
||||
TokenKind::DurationSuffix(suffix) => {
|
||||
self.pop_peek();
|
||||
if !suffix.can_be_ident() {
|
||||
bail!("Invalid identifier containing non-ascii characters", @p.span);
|
||||
}
|
||||
token_buffer.push_str(suffix.as_str());
|
||||
prev = p;
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
let token = Token {
|
||||
kind: TokenKind::Identifier,
|
||||
span: start.span.covers(prev.span),
|
||||
};
|
||||
|
||||
self.lexer.string = Some(token_buffer);
|
||||
self.prepend_token(token);
|
||||
|
||||
Ok(token)
|
||||
}
|
||||
|
||||
pub fn glue_numeric(&mut self) -> ParseResult<Token> {
|
||||
let peek = self.peek();
|
||||
match peek.kind {
|
||||
TokenKind::Digits => {
|
||||
if matches!(self.peek_whitespace_token_at(1).kind, TokenKind::DurationSuffix(_)) {
|
||||
return self.glue_duration();
|
||||
}
|
||||
self.glue_number()
|
||||
}
|
||||
t!("+") | t!("-") => self.glue_number(),
|
||||
_ => Ok(peek),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn glue_number(&mut self) -> ParseResult<Token> {
|
||||
let start = self.peek();
|
||||
|
||||
match start.kind {
|
||||
t!("+") | t!("-") => {
|
||||
self.pop_peek();
|
||||
|
||||
debug_assert!(
|
||||
start.is_followed_by(&self.peek_whitespace()),
|
||||
"a whitespace token was eaten where eating it would disturb parsing\n {:?}@{:?} => {:?}@{:?}",
|
||||
start.kind,
|
||||
start.span,
|
||||
self.peek_whitespace().kind,
|
||||
self.peek_whitespace().span
|
||||
);
|
||||
|
||||
let n = self.peek_whitespace();
|
||||
|
||||
if n.kind != TokenKind::Digits {
|
||||
unexpected!(self, start, "a number")
|
||||
}
|
||||
|
||||
self.pop_peek();
|
||||
}
|
||||
TokenKind::Digits => {
|
||||
self.pop_peek();
|
||||
debug_assert!(
|
||||
start.is_followed_by(&self.peek_whitespace()),
|
||||
"a whitespace token was eaten where eating it would disturb parsing\n {:?}@{:?} => {:?}@{:?}",
|
||||
start.kind,
|
||||
start.span,
|
||||
self.peek_whitespace().kind,
|
||||
self.peek_whitespace().span
|
||||
);
|
||||
}
|
||||
_ => return Ok(start),
|
||||
};
|
||||
|
||||
let mut kind = NumberKind::Integer;
|
||||
|
||||
// Check for mantissa
|
||||
if let t!(".") = self.peek_whitespace().kind {
|
||||
self.pop_peek();
|
||||
let next = self.peek_whitespace();
|
||||
if next.kind != TokenKind::Digits {
|
||||
unexpected!(self, next, "digits after the dot");
|
||||
}
|
||||
self.pop_peek();
|
||||
kind = NumberKind::Float;
|
||||
}
|
||||
|
||||
// Check for exponent
|
||||
if let TokenKind::Exponent = self.peek_whitespace().kind {
|
||||
self.pop_peek();
|
||||
let exponent_token = self.peek_whitespace();
|
||||
match exponent_token.kind {
|
||||
t!("+") | t!("-") => {
|
||||
self.pop_peek();
|
||||
let exponent_token = self.peek_whitespace();
|
||||
if exponent_token.kind != TokenKind::Digits {
|
||||
unexpected!(self, exponent_token, "digits after the exponent")
|
||||
}
|
||||
}
|
||||
TokenKind::Digits => {}
|
||||
_ => unexpected!(self, exponent_token, "digits after the exponent"),
|
||||
}
|
||||
self.pop_peek();
|
||||
kind = NumberKind::Float;
|
||||
}
|
||||
|
||||
// Check for number suffix
|
||||
let suffix_token = self.peek_whitespace();
|
||||
if let TokenKind::NumberSuffix(suffix) = suffix_token.kind {
|
||||
self.pop_peek();
|
||||
match suffix {
|
||||
NumberSuffix::Float => {
|
||||
kind = NumberKind::Float;
|
||||
}
|
||||
NumberSuffix::Decimal => {
|
||||
kind = NumberKind::Decimal;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check that no ident-like identifiers follow
|
||||
let next = self.peek_whitespace();
|
||||
if Self::tokenkind_continues_ident(next.kind) {
|
||||
unexpected!(self, next, "number to end")
|
||||
}
|
||||
|
||||
let token = Token {
|
||||
kind: TokenKind::Number(kind),
|
||||
span: start.span.covers(self.last_span()),
|
||||
};
|
||||
|
||||
self.prepend_token(token);
|
||||
|
||||
Ok(token)
|
||||
}
|
||||
|
||||
pub fn glue_duration(&mut self) -> ParseResult<Token> {
|
||||
let mut duration = StdDuration::ZERO;
|
||||
|
||||
let start = self.peek();
|
||||
match start.kind {
|
||||
TokenKind::Digits => {
|
||||
self.pop_peek();
|
||||
}
|
||||
_ => return Ok(start),
|
||||
};
|
||||
|
||||
debug_assert!(
|
||||
start.is_followed_by(&self.peek_whitespace()),
|
||||
"a whitespace token was eaten where eating it would disturb parsing"
|
||||
);
|
||||
|
||||
let mut cur = start;
|
||||
loop {
|
||||
let p = self.peek_whitespace();
|
||||
|
||||
let suffix = match p.kind {
|
||||
TokenKind::DurationSuffix(x) => x,
|
||||
_ => unexpected!(self, p, "a duration suffix"),
|
||||
};
|
||||
|
||||
self.pop_peek();
|
||||
|
||||
let digits_str = self.lexer.span_str(cur.span);
|
||||
let digits_value: u64 = digits_str
|
||||
.parse()
|
||||
.map_err(|e| error!("Failed to parse duration digits: {e}",@cur.span))?;
|
||||
|
||||
let addition = match suffix {
|
||||
DurationSuffix::Nano => StdDuration::from_nanos(digits_value),
|
||||
DurationSuffix::Micro | DurationSuffix::MicroUnicode => {
|
||||
StdDuration::from_micros(digits_value)
|
||||
}
|
||||
DurationSuffix::Milli => StdDuration::from_millis(digits_value),
|
||||
DurationSuffix::Second => StdDuration::from_secs(digits_value),
|
||||
DurationSuffix::Minute => {
|
||||
let minutes =
|
||||
digits_value.checked_mul(SECONDS_PER_MINUTE).ok_or_else(|| {
|
||||
let span = start.span.covers(p.span);
|
||||
error!("Invalid duration, value overflowed maximum allowed value", @span)
|
||||
})?;
|
||||
StdDuration::from_secs(minutes)
|
||||
}
|
||||
DurationSuffix::Hour => {
|
||||
let hours = digits_value.checked_mul(SECONDS_PER_HOUR).ok_or_else(|| {
|
||||
let span = start.span.covers(p.span);
|
||||
error!("Invalid duration, value overflowed maximum allowed value", @span)
|
||||
})?;
|
||||
StdDuration::from_secs(hours)
|
||||
}
|
||||
DurationSuffix::Day => {
|
||||
let days = digits_value.checked_mul(SECONDS_PER_DAY).ok_or_else(|| {
|
||||
let span = start.span.covers(p.span);
|
||||
error!("Invalid duration, value overflowed maximum allowed value", @span)
|
||||
})?;
|
||||
StdDuration::from_secs(days)
|
||||
}
|
||||
DurationSuffix::Week => {
|
||||
let weeks = digits_value.checked_mul(SECONDS_PER_WEEK).ok_or_else(|| {
|
||||
let span = start.span.covers(p.span);
|
||||
error!("Invalid duration, value overflowed maximum allowed value", @span)
|
||||
})?;
|
||||
StdDuration::from_secs(weeks)
|
||||
}
|
||||
DurationSuffix::Year => {
|
||||
let years = digits_value.checked_mul(SECONDS_PER_YEAR).ok_or_else(|| {
|
||||
let span = start.span.covers(p.span);
|
||||
error!("Invalid duration, value overflowed maximum allowed value", @span)
|
||||
})?;
|
||||
StdDuration::from_secs(years)
|
||||
}
|
||||
};
|
||||
|
||||
duration = duration.checked_add(addition).ok_or_else(|| {
|
||||
let span = start.span.covers(p.span);
|
||||
error!("Invalid duration, value overflowed maximum allowed value", @span)
|
||||
})?;
|
||||
|
||||
match self.peek_whitespace().kind {
|
||||
TokenKind::Digits => {
|
||||
cur = self.pop_peek();
|
||||
}
|
||||
x if Parser::tokenkind_continues_ident(x) => {
|
||||
let span = start.span.covers(p.span);
|
||||
bail!("Invalid token, expected duration, but token contained invalid characters", @span)
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
let span = start.span.covers(cur.span);
|
||||
let token = Token {
|
||||
kind: TokenKind::Duration,
|
||||
span,
|
||||
};
|
||||
|
||||
self.lexer.duration = Some(duration);
|
||||
self.prepend_token(token);
|
||||
|
||||
Ok(token)
|
||||
}
|
||||
|
||||
/// Glues the next tokens which would make up a float together into a single buffer.
|
||||
/// Return err if the tokens would return a invalid float.
|
||||
pub fn glue_float(&mut self) -> ParseResult<Token> {
|
||||
let start = self.peek();
|
||||
|
||||
match start.kind {
|
||||
t!("+") | t!("-") => {
|
||||
self.pop_peek();
|
||||
|
||||
debug_assert!(
|
||||
start.is_followed_by(&self.peek_whitespace()),
|
||||
"a whitespace token was eaten where eating it would disturb parsing"
|
||||
);
|
||||
|
||||
let digits_token = self.peek_whitespace();
|
||||
if TokenKind::Digits != digits_token.kind {
|
||||
let span = start.span.covers(digits_token.span);
|
||||
bail!("Unexpected token `{}` expected a floating point number",digits_token.kind,@span);
|
||||
}
|
||||
self.pop_peek();
|
||||
}
|
||||
TokenKind::Digits => {
|
||||
self.pop_peek();
|
||||
|
||||
debug_assert!(
|
||||
start.is_followed_by(&self.peek_whitespace()),
|
||||
"a whitespace token was eaten where eating it would disturb parsing"
|
||||
);
|
||||
}
|
||||
TokenKind::NumberSuffix(NumberSuffix::Float) => {
|
||||
return Ok(start);
|
||||
}
|
||||
_ => return Ok(start),
|
||||
}
|
||||
|
||||
// check for mantissa
|
||||
if let t!(".") = self.peek_whitespace().kind {
|
||||
self.pop_peek();
|
||||
let digits_token = self.peek_whitespace();
|
||||
if TokenKind::Digits != digits_token.kind {
|
||||
unexpected!(self, digits_token, "a floating point number")
|
||||
}
|
||||
self.pop_peek();
|
||||
};
|
||||
|
||||
// check for exponent
|
||||
if let TokenKind::Exponent = self.peek_whitespace().kind {
|
||||
self.pop_peek();
|
||||
let mut digits_token = self.peek_whitespace();
|
||||
|
||||
if let t!("+") | t!("-") = digits_token.kind {
|
||||
self.pop_peek();
|
||||
digits_token = self.peek_whitespace();
|
||||
}
|
||||
|
||||
if TokenKind::Digits != digits_token.kind {
|
||||
unexpected!(self, digits_token, "a floating point number")
|
||||
}
|
||||
self.pop_peek();
|
||||
}
|
||||
|
||||
// check for exponent
|
||||
let token = self.peek_whitespace();
|
||||
if let TokenKind::NumberSuffix(suffix) = token.kind {
|
||||
match suffix {
|
||||
NumberSuffix::Float => {
|
||||
self.pop_peek();
|
||||
}
|
||||
NumberSuffix::Decimal => {
|
||||
unexpected!(self, token, "a floating point number")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let t = self.peek_whitespace();
|
||||
if Self::tokenkind_continues_ident(t.kind) {
|
||||
unexpected!(self, t, "a floating point number to end")
|
||||
}
|
||||
|
||||
let span = start.span.covers(self.last_span());
|
||||
let token = Token {
|
||||
kind: TokenKind::Number(NumberKind::Float),
|
||||
span,
|
||||
};
|
||||
|
||||
self.prepend_token(token);
|
||||
|
||||
Ok(token)
|
||||
}
|
||||
|
||||
pub fn glue_plain_strand(&mut self) -> ParseResult<Token> {
|
||||
let start = self.peek();
|
||||
match start.kind {
|
||||
t!("\"") | t!("'") => {}
|
||||
_ => return Ok(start),
|
||||
};
|
||||
|
||||
let token = self.lexer.relex_strand(start);
|
||||
self.prepend_token(token);
|
||||
Ok(token)
|
||||
pub(super) fn kind_starts_expression(kind: TokenKind) -> bool {
|
||||
matches!(kind, t!("..") | t!("<") | t!("->")) | Self::kind_starts_prime_value(kind)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use crate::syn::token::Token;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TokenBuffer<const S: usize> {
|
||||
buffer: [Token; S],
|
||||
write: u8,
|
||||
|
|
|
@ -9,7 +9,7 @@ use crate::syn::token::{t, TokenKind};
|
|||
|
||||
impl Parse<Self> for Value {
|
||||
fn parse(val: &str) -> Self {
|
||||
super::value(val).unwrap()
|
||||
super::value_field(val).inspect_err(|e| eprintln!("{e}")).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -19,7 +19,11 @@ impl Parse<Self> for Array {
|
|||
let mut stack = Stack::new();
|
||||
let start = parser.peek().span;
|
||||
assert!(parser.eat(t!("[")));
|
||||
stack.enter(|ctx| async move { parser.parse_array(ctx, start).await }).finish().unwrap()
|
||||
stack
|
||||
.enter(|ctx| async move { parser.parse_array(ctx, start).await })
|
||||
.finish()
|
||||
.map_err(|e| e.render_on(val))
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -54,7 +58,11 @@ impl Parse<Self> for Expression {
|
|||
fn parse(val: &str) -> Self {
|
||||
let mut parser = Parser::new(val.as_bytes());
|
||||
let mut stack = Stack::new();
|
||||
let value = stack.enter(|ctx| parser.parse_value_field(ctx)).finish().unwrap();
|
||||
let value = stack
|
||||
.enter(|ctx| parser.parse_value_table(ctx))
|
||||
.finish()
|
||||
.map_err(|e| e.render_on(val))
|
||||
.unwrap();
|
||||
if let Value::Expression(x) = value {
|
||||
return *x;
|
||||
}
|
||||
|
|
|
@ -63,39 +63,6 @@ macro_rules! t {
|
|||
}
|
||||
};
|
||||
|
||||
("ns") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Nano)
|
||||
};
|
||||
("us") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Micro)
|
||||
};
|
||||
("µs") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix(
|
||||
$crate::syn::token::DurationSuffix::MicroUnicode,
|
||||
)
|
||||
};
|
||||
("ms") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Milli)
|
||||
};
|
||||
("s") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Second)
|
||||
};
|
||||
("m") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Minute)
|
||||
};
|
||||
("h") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Hour)
|
||||
};
|
||||
("d") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Day)
|
||||
};
|
||||
("w") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Week)
|
||||
};
|
||||
("y") => {
|
||||
$crate::syn::token::TokenKind::DurationSuffix($crate::syn::token::DurationSuffix::Year)
|
||||
};
|
||||
|
||||
("f") => {
|
||||
$crate::syn::token::TokenKind::NumberSuffix($crate::syn::token::NumberSuffix::Float)
|
||||
};
|
||||
|
@ -140,12 +107,6 @@ macro_rules! t {
|
|||
(":") => {
|
||||
$crate::syn::token::TokenKind::Colon
|
||||
};
|
||||
("<-") => {
|
||||
$crate::syn::token::TokenKind::ArrowLeft
|
||||
};
|
||||
("<->") => {
|
||||
$crate::syn::token::TokenKind::BiArrow
|
||||
};
|
||||
("->") => {
|
||||
$crate::syn::token::TokenKind::ArrowRight
|
||||
};
|
||||
|
|
|
@ -256,47 +256,6 @@ impl VectorTypeKind {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
|
||||
pub enum DurationSuffix {
|
||||
Nano,
|
||||
Micro,
|
||||
MicroUnicode,
|
||||
Milli,
|
||||
Second,
|
||||
Minute,
|
||||
Hour,
|
||||
Day,
|
||||
Week,
|
||||
Year,
|
||||
}
|
||||
|
||||
impl DurationSuffix {
|
||||
pub fn can_be_ident(&self) -> bool {
|
||||
!matches!(self, DurationSuffix::MicroUnicode)
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
DurationSuffix::Nano => "ns",
|
||||
DurationSuffix::Micro => "us",
|
||||
DurationSuffix::MicroUnicode => "µs",
|
||||
DurationSuffix::Milli => "ms",
|
||||
DurationSuffix::Second => "s",
|
||||
DurationSuffix::Minute => "m",
|
||||
DurationSuffix::Hour => "h",
|
||||
DurationSuffix::Day => "d",
|
||||
DurationSuffix::Week => "w",
|
||||
DurationSuffix::Year => "y",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
|
||||
pub enum NumberSuffix {
|
||||
Float,
|
||||
Decimal,
|
||||
}
|
||||
|
||||
impl Algorithm {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
|
@ -349,16 +308,24 @@ impl QouteKind {
|
|||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
|
||||
pub enum NumberKind {
|
||||
Decimal,
|
||||
Float,
|
||||
Integer,
|
||||
pub enum Glued {
|
||||
Number,
|
||||
Duration,
|
||||
Strand,
|
||||
Datetime,
|
||||
Uuid,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
|
||||
pub enum DatetimeChars {
|
||||
T,
|
||||
Z,
|
||||
impl Glued {
|
||||
fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Glued::Number => "a number",
|
||||
Glued::Strand => "a strand",
|
||||
Glued::Uuid => "a uuid",
|
||||
Glued::Datetime => "a datetime",
|
||||
Glued::Duration => "a duration",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The type of token
|
||||
|
@ -376,13 +343,6 @@ pub enum TokenKind {
|
|||
CloseDelim(Delim),
|
||||
/// a token denoting the opening of a string, i.e. `r"`
|
||||
Qoute(QouteKind),
|
||||
/// Not produced by the lexer but only the result of token gluing.
|
||||
Number(NumberKind),
|
||||
/// Not produced by the lexer but only the result of token gluing.
|
||||
Duration,
|
||||
/// Not produced by the lexer but only the result of token gluing.
|
||||
Strand,
|
||||
Regex,
|
||||
/// A parameter like `$name`.
|
||||
Parameter,
|
||||
Identifier,
|
||||
|
@ -398,10 +358,6 @@ pub enum TokenKind {
|
|||
Dollar,
|
||||
/// `->`
|
||||
ArrowRight,
|
||||
/// `<-`
|
||||
ArrowLeft,
|
||||
/// `<->`
|
||||
BiArrow,
|
||||
/// '/'
|
||||
ForwardSlash,
|
||||
/// `.`
|
||||
|
@ -422,22 +378,17 @@ pub enum TokenKind {
|
|||
Vert,
|
||||
/// `@`
|
||||
At,
|
||||
/// A token which could not be properly lexed.
|
||||
Invalid,
|
||||
/// A token which indicates the end of the file.
|
||||
Eof,
|
||||
/// A token consiting of one or more ascii digits.
|
||||
Digits,
|
||||
/// A identifier like token which matches a duration suffix.
|
||||
DurationSuffix(DurationSuffix),
|
||||
/// A part of a datetime like token which matches a duration suffix.
|
||||
DatetimeChars(DatetimeChars),
|
||||
/// A identifier like token which matches an exponent.
|
||||
Exponent,
|
||||
/// A identifier like token which matches an number suffix.
|
||||
NumberSuffix(NumberSuffix),
|
||||
/// The Not-A-Number number token.
|
||||
NaN,
|
||||
/// A token which is a compound token which has been glued together and then put back into the
|
||||
/// token buffer. This is required for some places where we need to look past possible compound tokens.
|
||||
Glued(Glued),
|
||||
/// A token which could not be properly lexed.
|
||||
Invalid,
|
||||
}
|
||||
|
||||
impl fmt::Display for TokenKind {
|
||||
|
@ -451,7 +402,7 @@ const _TOKEN_KIND_SIZE_ASSERT: [(); 2] = [(); std::mem::size_of::<TokenKind>()];
|
|||
|
||||
impl TokenKind {
|
||||
pub fn has_data(&self) -> bool {
|
||||
matches!(self, TokenKind::Identifier | TokenKind::Duration)
|
||||
matches!(self, TokenKind::Identifier | TokenKind::Glued(_))
|
||||
}
|
||||
|
||||
fn algorithm_as_str(alg: Algorithm) -> &'static str {
|
||||
|
@ -486,20 +437,14 @@ impl TokenKind {
|
|||
TokenKind::CloseDelim(Delim::Paren) => ")",
|
||||
TokenKind::CloseDelim(Delim::Brace) => "}",
|
||||
TokenKind::CloseDelim(Delim::Bracket) => "]",
|
||||
TokenKind::DurationSuffix(x) => x.as_str(),
|
||||
TokenKind::Strand => "a strand",
|
||||
TokenKind::Parameter => "a parameter",
|
||||
TokenKind::Number(_) => "a number",
|
||||
TokenKind::Identifier => "an identifier",
|
||||
TokenKind::Regex => "a regex",
|
||||
TokenKind::LeftChefron => "<",
|
||||
TokenKind::RightChefron => ">",
|
||||
TokenKind::Star => "*",
|
||||
TokenKind::Dollar => "$",
|
||||
TokenKind::Question => "?",
|
||||
TokenKind::ArrowRight => "->",
|
||||
TokenKind::ArrowLeft => "<-",
|
||||
TokenKind::BiArrow => "<->",
|
||||
TokenKind::ForwardSlash => "/",
|
||||
TokenKind::Dot => ".",
|
||||
TokenKind::DotDot => "..",
|
||||
|
@ -514,13 +459,10 @@ impl TokenKind {
|
|||
TokenKind::Eof => "Eof",
|
||||
TokenKind::WhiteSpace => "whitespace",
|
||||
TokenKind::Qoute(x) => x.as_str(),
|
||||
TokenKind::Duration => "a duration",
|
||||
TokenKind::Digits => "a number",
|
||||
TokenKind::NaN => "NaN",
|
||||
TokenKind::Glued(x) => x.as_str(),
|
||||
// below are small broken up tokens which are most of the time identifiers.
|
||||
TokenKind::DatetimeChars(_) => "an identifier",
|
||||
TokenKind::Exponent => "an identifier",
|
||||
TokenKind::NumberSuffix(_) => "an identifier",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue