Overhaul the parser error type. (#4652)

This commit is contained in:
Mees Delzenne 2024-09-03 13:53:16 +02:00 committed by GitHub
parent 329bb74040
commit b0cde2a40a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
45 changed files with 1106 additions and 1370 deletions

View file

@ -39,7 +39,7 @@ pub async fn v1_to_2_id_uuid(tx: Arc<Transaction>) -> Result<(), Error> {
// We suffix the last id with a null byte, to prevent scanning it twice (which would result in an infinite loop)
beg.clone_from(keys.last().unwrap());
beg.extend_from_slice(&[b'\0']);
beg.extend_from_slice(b"\0");
for key in keys.iter() {
// Check if the id is affected

View file

@ -1,4 +1,4 @@
use super::token::Span;
use crate::syn::token::Span;
use std::ops::Range;
/// A human readable location inside a string.

48
core/src/syn/error/mac.rs Normal file
View file

@ -0,0 +1,48 @@
/// Macro to create an parser error.
///
/// This creates an error with a message first and then a number of spans, possibly with a label.
///
/// # Example
///
/// ```ignore
/// let text = "As we all know 1+1 is 3.";
/// if text.contains("3"){
/// let span = Span::empty(); // just imagine there is an actual span here.
/// error!("1 + 1 should be {}",1+1, @span => "your wrong here!");
/// // This will return the following error when rendered:
/// // Error: 1 + 1 should be 2
/// // |
/// // 1 | As we all know 1+1 is 3.
/// // | ^ your wrong here!
/// }
/// ```
macro_rules! error {
($format:literal $(, $expr:expr)*
$(, @ $span:expr $(=> $label_format:literal $(, $label_expr:expr)* $(,)? )? )*
) => {{
let __error: $crate::syn::error::SyntaxError = $crate::syn::error::SyntaxError::new(format_args!($format $(, $expr)*));
$(
$crate::syn::error::error!(#label __error, $span $(=> $label_format$(, $label_expr)* )?);
)*
__error
}};
(#label $name:ident, $span:expr => $label_format:literal $(, $label_expr:expr)* ) => {
let $name = $name.with_labeled_span($span,$crate::syn::error::MessageKind::Error, format_args!($label_format $(, $label_expr)*));
};
(#label $name:ident, $span:expr ) => {
let $name = $crate::syn::error::SyntaxError::with_span($name,$span, $crate::syn::error::MessageKind::Error);
};
}
/// Similar to [`error`] but immediately returns the error.
macro_rules! bail {
($($t:tt)*) => {{
let __error = $crate::syn::error::error!($($t)*);
return Err(__error)
}};
}
pub(crate) use bail;
pub(crate) use error;

135
core/src/syn/error/mod.rs Normal file
View file

@ -0,0 +1,135 @@
use crate::syn::token::Span;
use std::fmt::Display;
mod location;
mod mac;
mod render;
pub use location::Location;
pub(crate) use mac::{bail, error};
pub use render::{RenderedError, Snippet};
#[derive(Debug, Clone, Copy)]
pub enum MessageKind {
Suggestion,
Error,
}
#[derive(Debug)]
enum DiagnosticKind {
Cause(String),
Span {
kind: MessageKind,
span: Span,
label: Option<String>,
},
}
#[derive(Debug)]
pub struct Diagnostic {
kind: DiagnosticKind,
next: Option<Box<Diagnostic>>,
}
/// A parsing error.
#[derive(Debug)]
pub struct SyntaxError {
diagnostic: Box<Diagnostic>,
data_pending: bool,
}
impl SyntaxError {
/// Create a new parse error.
pub fn new<T>(message: T) -> Self
where
T: Display,
{
let diagnostic = Diagnostic {
kind: DiagnosticKind::Cause(message.to_string()),
next: None,
};
Self {
diagnostic: Box::new(diagnostic),
data_pending: false,
}
}
/// Returns whether this error is possibly the result of missing data.
pub fn is_data_pending(&self) -> bool {
self.data_pending
}
/// Indicate that this error might be the result of missing data and could be resolved with
/// more data.
pub fn with_data_pending(mut self) -> Self {
self.data_pending = true;
self
}
pub fn with_span(mut self, span: Span, kind: MessageKind) -> Self {
self.diagnostic = Box::new(Diagnostic {
kind: DiagnosticKind::Span {
kind,
span,
label: None,
},
next: Some(self.diagnostic),
});
self
}
pub fn with_labeled_span<T: Display>(
mut self,
span: Span,
kind: MessageKind,
label: T,
) -> Self {
self.diagnostic = Box::new(Diagnostic {
kind: DiagnosticKind::Span {
kind,
span,
label: Some(label.to_string()),
},
next: Some(self.diagnostic),
});
self
}
pub fn render_on(&self, source: &str) -> RenderedError {
let mut res = RenderedError {
errors: Vec::new(),
snippets: Vec::new(),
};
Self::render_on_inner(&self.diagnostic, source, &mut res);
res
}
pub fn render_on_bytes(&self, source: &[u8]) -> RenderedError {
let source = String::from_utf8_lossy(source);
self.render_on(&source)
}
fn render_on_inner(diagnostic: &Diagnostic, source: &str, res: &mut RenderedError) {
if let Some(ref x) = diagnostic.next {
Self::render_on_inner(x, source, res);
}
match diagnostic.kind {
DiagnosticKind::Cause(ref x) => res.errors.push(x.clone()),
DiagnosticKind::Span {
ref span,
ref label,
ref kind,
} => {
let locations = Location::range_of_span(source, *span);
let snippet = Snippet::from_source_location_range(
source,
locations,
label.as_ref().map(|x| x.as_str()),
*kind,
);
res.snippets.push(snippet)
}
}
}
}

View file

@ -1,19 +1,31 @@
use std::{fmt, ops::Range};
//! Module for rendering errors onto source code.
use super::common::Location;
use std::{cmp::Ordering, fmt, ops::Range};
use super::{Location, MessageKind};
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct RenderedError {
pub text: String,
pub errors: Vec<String>,
pub snippets: Vec<Snippet>,
}
impl fmt::Display for RenderedError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "{}", self.text)?;
for s in self.snippets.iter() {
writeln!(f, "{}", s)?;
match self.errors.len().cmp(&1) {
Ordering::Equal => writeln!(f, "{}", self.errors[0])?,
Ordering::Greater => {
writeln!(f, "- {}", self.errors[0])?;
writeln!(f, "caused by:")?;
for e in &self.errors[2..] {
writeln!(f, " - {}", e)?
}
}
Ordering::Less => {}
}
for s in &self.snippets {
writeln!(f, "{s}")?;
}
Ok(())
}
@ -21,7 +33,6 @@ impl fmt::Display for RenderedError {
/// Whether the snippet was truncated.
#[derive(Clone, Copy, Eq, PartialEq, Debug)]
#[non_exhaustive]
pub enum Truncation {
/// The snippet wasn't truncated
None,
@ -35,7 +46,6 @@ pub enum Truncation {
/// A piece of the source code with a location and an optional explanation.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct Snippet {
/// The part of the original source code,
source: String,
@ -48,7 +58,11 @@ pub struct Snippet {
/// The amount of characters that are part of area to be pointed to.
length: usize,
/// A possible explanation for this snippet.
explain: Option<String>,
label: Option<String>,
/// The kind of snippet,
// Unused for now but could in the future be used to color snippets.
#[allow(dead_code)]
kind: MessageKind,
}
impl Snippet {
@ -61,6 +75,7 @@ impl Snippet {
source: &str,
location: Location,
explain: Option<&'static str>,
kind: MessageKind,
) -> Self {
let line = source.split('\n').nth(location.line - 1).unwrap();
let (line, truncation, offset) = Self::truncate_line(line, location.column - 1);
@ -71,14 +86,16 @@ impl Snippet {
location,
offset,
length: 1,
explain: explain.map(|x| x.into()),
label: explain.map(|x| x.into()),
kind,
}
}
pub fn from_source_location_range(
source: &str,
location: Range<Location>,
explain: Option<&'static str>,
explain: Option<&str>,
kind: MessageKind,
) -> Self {
let line = source.split('\n').nth(location.start.line - 1).unwrap();
let (line, truncation, offset) = Self::truncate_line(line, location.start.column - 1);
@ -93,7 +110,8 @@ impl Snippet {
location: location.start,
offset,
length,
explain: explain.map(|x| x.into()),
label: explain.map(|x| x.into()),
kind,
}
}
@ -189,7 +207,7 @@ impl fmt::Display for Snippet {
write!(f, "^")?;
}
write!(f, " ")?;
if let Some(ref explain) = self.explain {
if let Some(ref explain) = self.label {
write!(f, "{explain}")?;
}
Ok(())
@ -199,7 +217,7 @@ impl fmt::Display for Snippet {
#[cfg(test)]
mod test {
use super::{RenderedError, Snippet, Truncation};
use crate::syn::common::Location;
use crate::syn::error::{Location, MessageKind};
#[test]
fn truncate_whitespace() {
@ -209,7 +227,7 @@ mod test {
let location = Location::of_in(error, source);
let snippet = Snippet::from_source_location(source, location, None);
let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error);
assert_eq!(snippet.truncation, Truncation::None);
assert_eq!(snippet.offset, 0);
assert_eq!(snippet.source.as_str(), "$");
@ -223,7 +241,7 @@ mod test {
let location = Location::of_in(error, source);
let snippet = Snippet::from_source_location(source, location, None);
let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error);
assert_eq!(snippet.truncation, Truncation::Start);
assert_eq!(snippet.offset, 10);
assert_eq!(snippet.source.as_str(), "aaaaaaaaa $");
@ -237,7 +255,7 @@ mod test {
let location = Location::of_in(error, source);
let snippet = Snippet::from_source_location(source, location, None);
let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error);
assert_eq!(snippet.truncation, Truncation::End);
assert_eq!(snippet.offset, 2);
assert_eq!(
@ -254,7 +272,7 @@ mod test {
let location = Location::of_in(error, source);
let snippet = Snippet::from_source_location(source, location, None);
let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error);
assert_eq!(snippet.truncation, Truncation::Both);
assert_eq!(snippet.offset, 10);
assert_eq!(
@ -266,7 +284,7 @@ mod test {
#[test]
fn render() {
let error = RenderedError {
text: "some_error".to_string(),
errors: vec!["some_error".to_string()],
snippets: vec![Snippet {
source: "hallo error".to_owned(),
truncation: Truncation::Both,
@ -276,7 +294,8 @@ mod test {
},
offset: 6,
length: 5,
explain: Some("this is wrong".to_owned()),
label: Some("this is wrong".to_owned()),
kind: MessageKind::Error,
}],
};

View file

@ -1,13 +1,12 @@
use crate::syn::{
error::{bail, error, SyntaxError},
lexer::{
unicode::{byte, chars},
Error, Lexer,
Lexer,
},
token::{t, DatetimeChars, Token, TokenKind},
};
use super::CharError;
impl<'a> Lexer<'a> {
/// Eats a single line comment.
pub fn eat_single_line_comment(&mut self) {
@ -46,14 +45,15 @@ impl<'a> Lexer<'a> {
}
/// Eats a multi line comment and returns an error if `*/` would be missing.
pub fn eat_multi_line_comment(&mut self) -> Result<(), Error> {
pub fn eat_multi_line_comment(&mut self) -> Result<(), SyntaxError> {
let start_span = self.current_span();
loop {
let Some(byte) = self.reader.next() else {
return Err(Error::UnexpectedEof);
bail!("Unexpected end of file, expected multi-line comment to end.", @start_span => "Comment starts here.");
};
if let b'*' = byte {
let Some(byte) = self.reader.peek() else {
return Err(Error::UnexpectedEof);
bail!("Unexpected end of file, expected multi-line comment to end.", @start_span => "Comment starts here.");
};
if b'/' == byte {
self.reader.next();
@ -100,35 +100,6 @@ impl<'a> Lexer<'a> {
}
}
// re-lexes a `/` token to a regex token.
pub fn relex_regex(&mut self, token: Token) -> Token {
debug_assert_eq!(token.kind, t!("/"));
debug_assert_eq!(token.span.offset + 1, self.last_offset);
debug_assert_eq!(token.span.len, 1);
self.last_offset = token.span.offset;
loop {
match self.reader.next() {
Some(b'\\') => {
if let Some(b'/') = self.reader.peek() {
self.reader.next();
}
}
Some(b'/') => break,
Some(x) => {
if !x.is_ascii() {
if let Err(e) = self.reader.complete_char(x) {
return self.invalid_token(e.into());
}
}
}
None => return self.invalid_token(Error::UnexpectedEof),
}
}
self.finish_token(TokenKind::Regex)
}
/// Lex the next token, starting from the given byte.
pub fn lex_ascii(&mut self, byte: u8) -> Token {
let kind = match byte {
@ -162,7 +133,10 @@ impl<'a> Lexer<'a> {
self.reader.next();
t!("&&")
}
_ => return self.invalid_token(Error::ExpectedEnd('&')),
_ => {
let error = error!("Invalid token `&`, single `&` are not a valid token, did you mean `&&`?",@self.current_span());
return self.invalid_token(error);
}
},
b'.' => match self.reader.peek() {
Some(b'.') => {
@ -263,7 +237,10 @@ impl<'a> Lexer<'a> {
self.reader.next();
t!("+?=")
}
_ => return self.invalid_token(Error::ExpectedEnd('=')),
_ => {
let error = error!("Invalid token `+?` did you maybe mean `+?=` ?", @self.current_span());
return self.invalid_token(error);
}
}
}
_ => t!("+"),
@ -326,8 +303,7 @@ impl<'a> Lexer<'a> {
self.reader.next();
match self.reader.complete_char(x) {
Ok('⟨') => return self.lex_surrounded_param(false),
Err(CharError::Eof) => return self.invalid_token(Error::InvalidUtf8),
Err(CharError::Unicode) => return self.invalid_token(Error::InvalidUtf8),
Err(e) => return self.invalid_token(e.into()),
_ => {
self.reader.backup(backup);
t!("$")
@ -513,7 +489,10 @@ impl<'a> Lexer<'a> {
return self.lex_ident_from_next_byte(byte);
}
//b'0'..=b'9' => return self.lex_number(byte),
x => return self.invalid_token(Error::UnexpectedCharacter(x as char)),
x => {
let err = error!("Invalid token `{}`", x as char, @self.current_span());
return self.invalid_token(err);
}
};
self.finish_token(kind)

View file

@ -1,10 +1,9 @@
use crate::syn::{
lexer::{CharError, Lexer},
error::error,
lexer::Lexer,
token::{t, Token},
};
use super::Error;
impl<'a> Lexer<'a> {
/// lex non-ascii characters.
///
@ -12,8 +11,7 @@ impl<'a> Lexer<'a> {
pub fn lex_char(&mut self, byte: u8) -> Token {
let c = match self.reader.complete_char(byte) {
Ok(x) => x,
Err(CharError::Eof) => return self.invalid_token(Error::InvalidUtf8),
Err(CharError::Unicode) => return self.invalid_token(Error::InvalidUtf8),
Err(e) => return self.invalid_token(e.into()),
};
let kind = match c {
'⟨' => return self.lex_surrounded_ident(false),
@ -32,17 +30,22 @@ impl<'a> Lexer<'a> {
'÷' => t!("÷"),
'µ' => {
let Some(b's') = self.reader.peek() else {
return self.invalid_token(Error::UnexpectedCharacter('µ'));
let err = error!("Invalid token `µ` expected token to be followed by `s`", @self.current_span());
return self.invalid_token(err);
};
self.reader.next();
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
return self.invalid_token(Error::UnexpectedCharacter('µ'));
let err = error!("Invalid token `µ` expected token to be followed by `s`", @self.current_span());
return self.invalid_token(err);
}
t!("µs")
}
x => return self.invalid_token(Error::UnexpectedCharacter(x)),
x => {
let err = error!("Invalid token `{x}`", @self.current_span());
return self.invalid_token(err);
}
};
self.finish_token(kind)
}

View file

@ -0,0 +1,120 @@
use crate::syn::{
error::{MessageKind, SyntaxError},
lexer::unicode::chars::JS_LINE_TERIMATORS,
token::{t, CompoundToken, JavaScript, Span, TokenKind},
};
use super::{CompoundValue, Lexer};
impl CompoundValue for JavaScript {
const START: TokenKind = t!("{");
fn relex(lexer: &mut Lexer, _: Span) -> Result<CompoundToken<Self>, SyntaxError> {
let span = lex_js_function_body_inner(lexer)?;
Ok(CompoundToken {
value: JavaScript,
span,
})
}
}
/// Lex the body of a js function.
fn lex_js_function_body_inner(lexer: &mut Lexer) -> Result<Span, SyntaxError> {
let mut block_depth = 1;
loop {
let Some(byte) = lexer.reader.next() else {
let span = lexer.advance_span();
return Err(SyntaxError::new(format_args!(
"Invalid JavaScript function, encountered unexpected eof"
))
.with_span(span, MessageKind::Error)
.with_data_pending());
};
match byte {
b'`' => lex_js_string(lexer, b'`')?,
b'\'' => lex_js_string(lexer, b'\'')?,
b'\"' => lex_js_string(lexer, b'\"')?,
b'/' => match lexer.reader.peek() {
Some(b'/') => {
lexer.reader.next();
lex_js_single_comment(lexer)?;
}
Some(b'*') => {
lexer.reader.next();
lex_js_multi_comment(lexer)?
}
_ => {}
},
b'{' => {
block_depth += 1;
}
b'}' => {
block_depth -= 1;
if block_depth == 0 {
break;
}
}
x if !x.is_ascii() => {
lexer.reader.complete_char(x)?;
}
_ => {}
}
}
Ok(lexer.advance_span())
}
/// lex a js string with the given delimiter.
fn lex_js_string(lexer: &mut Lexer, enclosing_byte: u8) -> Result<(), SyntaxError> {
loop {
let Some(byte) = lexer.reader.next() else {
let span = lexer.advance_span();
return Err(SyntaxError::new(format_args!(
"Invalid JavaScript function, encountered unexpected eof"
))
.with_span(span, MessageKind::Error)
.with_data_pending());
};
if byte == enclosing_byte {
return Ok(());
}
if byte == b'\\' {
lexer.reader.next();
}
// check for invalid characters.
lexer.reader.convert_to_char(byte)?;
}
}
/// lex a single line js comment.
fn lex_js_single_comment(lexer: &mut Lexer) -> Result<(), SyntaxError> {
loop {
let Some(byte) = lexer.reader.next() else {
return Ok(());
};
let char = lexer.reader.convert_to_char(byte)?;
if JS_LINE_TERIMATORS.contains(&char) {
return Ok(());
}
}
}
/// lex a multi line js comment.
fn lex_js_multi_comment(lexer: &mut Lexer) -> Result<(), SyntaxError> {
loop {
let Some(byte) = lexer.reader.next() else {
let span = lexer.advance_span();
return Err(SyntaxError::new(format_args!(
"Invalid JavaScript function, encountered unexpected eof"
))
.with_span(span, MessageKind::Error)
.with_data_pending());
};
if byte == b'*' && lexer.reader.peek() == Some(b'/') {
lexer.reader.next();
return Ok(());
}
// check for invalid characters.
lexer.reader.convert_to_char(byte)?;
}
}

View file

@ -0,0 +1,85 @@
use crate::sql::Regex;
use crate::syn::{
error::{bail, error, SyntaxError},
lexer::Lexer,
token::{t, CompoundToken, Span, Token, TokenKind},
};
mod js;
pub trait CompoundValue: Sized {
/// The token which indicates the start of this compound token.
const START: TokenKind;
/// Lex the start of this span to a more complex type of token.
fn relex(lexer: &mut Lexer, start_span: Span) -> Result<CompoundToken<Self>, SyntaxError>;
}
impl<'a> Lexer<'a> {
pub fn lex_compound<T: CompoundValue>(
&mut self,
start: Token,
) -> Result<CompoundToken<T>, SyntaxError> {
assert_eq!(
start.kind,
T::START,
"Invalid start of compound token, expected {} got {}",
T::START,
start.kind
);
assert_eq!(
start.span.offset + 1,
self.last_offset,
"Tried to parse compound when lexer already ate past the start token"
);
self.last_offset = start.span.offset;
T::relex(self, start.span)
}
}
impl CompoundValue for Regex {
const START: TokenKind = t!("/");
// re-lexes a `/` token to a regex token.
fn relex(lexer: &mut Lexer, _: Span) -> Result<CompoundToken<Regex>, SyntaxError> {
loop {
match lexer.reader.next() {
Some(b'\\') => {
// We can't just eat all bytes after a \ because a byte might be non-ascii.
lexer.eat(b'/');
}
Some(b'/') => break,
Some(x) => {
if !x.is_ascii() {
if let Err(e) = lexer.reader.complete_char(x) {
let span = lexer.advance_span();
bail!("Invalid token: {e}", @span);
}
}
}
None => {
let span = lexer.advance_span();
return Err(
error!("Failed to lex regex, unexpected eof", @span).with_data_pending()
);
}
}
}
// successfully parsed the regex, time to structure it.
let span = lexer.advance_span();
// +1 offset to move over the first `/` -2 len to remove the last `/`
let mut inner_span = span;
debug_assert!(inner_span.len > 2);
inner_span.offset += 1;
inner_span.len -= 2;
let str = lexer.span_str(inner_span);
let regex = str.parse().map_err(|e| error!("Invalid regex: {e}", @span))?;
Ok(CompoundToken {
value: regex,
span,
})
}
}

View file

@ -3,7 +3,8 @@ use std::mem;
use unicase::UniCase;
use crate::syn::{
lexer::{keywords::KEYWORDS, Error, Lexer},
error::{error, SyntaxError},
lexer::{keywords::KEYWORDS, Lexer},
token::{Token, TokenKind},
};
@ -100,7 +101,7 @@ impl<'a> Lexer<'a> {
}
/// Lex an ident surrounded either by `⟨⟩` or `\`\``
pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<(), Error> {
pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<(), SyntaxError> {
loop {
let Some(x) = self.reader.next() else {
let end_char = if is_backtick {
@ -108,7 +109,8 @@ impl<'a> Lexer<'a> {
} else {
'⟩'
};
return Err(Error::ExpectedEnd(end_char));
let error = error!("Unexpected end of file, expected identifier to end with `{end_char}`", @self.current_span());
return Err(error.with_data_pending());
};
if x.is_ascii() {
match x {
@ -118,7 +120,8 @@ impl<'a> Lexer<'a> {
}
b'\0' => {
// null bytes not allowed
return Err(Error::UnexpectedCharacter('\0'));
let err = error!("Invalid null byte in source, null bytes are not valid SurrealQL characters",@self.current_span());
return Err(err);
}
b'\\' if is_backtick => {
// handle escape sequences.
@ -130,7 +133,8 @@ impl<'a> Lexer<'a> {
} else {
'⟩'
};
return Err(Error::ExpectedEnd(end_char));
let error = error!("Unexpected end of file, expected identifier to end with `{end_char}`", @self.current_span());
return Err(error.with_data_pending());
};
match next {
b'\\' => {
@ -158,12 +162,16 @@ impl<'a> Lexer<'a> {
self.scratch.push(chars::TAB);
}
_ => {
let char = if x.is_ascii() {
x as char
let char = self.reader.convert_to_char(x)?;
let error = if !is_backtick {
if char == '⟩' {
self.scratch.push(char);
}
error!("Invalid escape character `{x}` for identifier, valid characters are `⟩`, `\\`, ```, `/`, `b`, `f`, `n`, `r`, or `t`", @self.current_span())
} else {
self.reader.complete_char(x)?
error!("Invalid escape character `{x}` for identifier, valid characters are `\\`, ```, `/`, `b`, `f`, `n`, `r`, or `t`", @self.current_span())
};
return Err(Error::InvalidEscapeCharacter(char));
return Err(error);
}
}
}

View file

@ -1,97 +0,0 @@
use crate::syn::token::Span;
use super::{unicode::chars::JS_LINE_TERIMATORS, Error, Lexer};
impl Lexer<'_> {
/// Lex the body of a js functions.
///
/// This function will never be called while lexing normally.
pub fn lex_js_function_body(&mut self) -> Result<String, (Error, Span)> {
self.lex_js_function_body_inner().map_err(|e| (e, self.current_span()))
}
/// Lex the body of a js function.
fn lex_js_function_body_inner(&mut self) -> Result<String, Error> {
let mut block_depth = 1;
loop {
let byte = self.reader.next().ok_or(Error::UnexpectedEof)?;
match byte {
b'`' => self.lex_js_string(b'`')?,
b'\'' => self.lex_js_string(b'\'')?,
b'\"' => self.lex_js_string(b'\"')?,
b'/' => match self.reader.peek() {
Some(b'/') => {
self.reader.next();
self.lex_js_single_comment()?;
}
Some(b'*') => {
self.reader.next();
self.lex_js_multi_comment()?
}
_ => {}
},
b'{' => {
block_depth += 1;
}
b'}' => {
block_depth -= 1;
if block_depth == 0 {
break;
}
}
x if !x.is_ascii() => {
// check for invalid characters.
self.reader.complete_char(x)?;
}
_ => {}
}
}
let mut span = self.current_span();
// remove the `}` from the source text;
span.len -= 1;
// lexer ensures that it is valid utf8
let source = String::from_utf8(self.reader.span(span).to_vec()).unwrap();
Ok(source)
}
/// lex a js string with the given delimiter.
fn lex_js_string(&mut self, enclosing_byte: u8) -> Result<(), Error> {
loop {
let byte = self.reader.next().ok_or(Error::UnexpectedEof)?;
if byte == enclosing_byte {
return Ok(());
}
if byte == b'\\' {
self.reader.next();
}
// check for invalid characters.
self.reader.convert_to_char(byte)?;
}
}
/// lex a single line js comment.
fn lex_js_single_comment(&mut self) -> Result<(), Error> {
loop {
let Some(byte) = self.reader.next() else {
return Ok(());
};
let char = self.reader.convert_to_char(byte)?;
if JS_LINE_TERIMATORS.contains(&char) {
return Ok(());
}
}
}
/// lex a multi line js comment.
fn lex_js_multi_comment(&mut self) -> Result<(), Error> {
loop {
let byte = self.reader.next().ok_or(Error::UnexpectedEof)?;
if byte == b'*' && self.reader.peek() == Some(b'/') {
self.reader.next();
return Ok(());
}
// check for invalid characters.
self.reader.convert_to_char(byte)?;
}
}
}

View file

@ -1,53 +1,27 @@
use std::time::Duration;
use chrono::{DateTime, Utc};
use thiserror::Error;
mod byte;
mod char;
mod ident;
mod js;
pub mod keywords;
mod number;
mod reader;
mod strand;
mod unicode;
mod compound;
#[cfg(test)]
mod test;
pub use reader::{BytesReader, CharError};
use uuid::Uuid;
use crate::syn::token::{Span, Token, TokenKind};
/// A error returned by the lexer when an invalid token is encountered.
///
/// Can be retrieved from the `Lexer::error` field whenever it returned a [`TokenKind::Invalid`]
/// token.
#[derive(Error, Debug)]
#[non_exhaustive]
pub enum Error {
#[error("Lexer encountered unexpected character {0:?}")]
UnexpectedCharacter(char),
#[error("invalid escape character {0:?}")]
InvalidEscapeCharacter(char),
#[error("Lexer encountered unexpected end of source characters")]
UnexpectedEof,
#[error("source was not valid utf-8")]
InvalidUtf8,
#[error("expected next character to be '{0}'")]
ExpectedEnd(char),
}
impl From<CharError> for Error {
fn from(value: CharError) -> Self {
match value {
CharError::Eof => Self::UnexpectedEof,
CharError::Unicode => Self::InvalidUtf8,
}
}
}
use crate::syn::{
error::SyntaxError,
token::{Span, Token, TokenKind},
};
/// The SurrealQL lexer.
/// Takes a slice of bytes and turns it into tokens. The lexer is designed with possible invalid utf-8
@ -88,7 +62,7 @@ pub struct Lexer<'a> {
pub duration: Option<Duration>,
pub datetime: Option<DateTime<Utc>>,
pub uuid: Option<Uuid>,
pub error: Option<Error>,
pub error: Option<SyntaxError>,
}
impl<'a> Lexer<'a> {
@ -170,7 +144,7 @@ impl<'a> Lexer<'a> {
}
/// Return an invalid token.
fn invalid_token(&mut self, error: Error) -> Token {
fn invalid_token(&mut self, error: SyntaxError) -> Token {
self.error = Some(error);
self.finish_token(TokenKind::Invalid)
}
@ -186,16 +160,19 @@ impl<'a> Lexer<'a> {
}
}
fn advance_span(&mut self) -> Span {
let span = self.current_span();
self.last_offset = self.reader.offset() as u32;
span
}
/// Builds a token from an TokenKind.
///
/// Attaches a span to the token and returns, updates the new offset.
fn finish_token(&mut self, kind: TokenKind) -> Token {
let span = self.current_span();
// We make sure that the source is no longer then u32::MAX so this can't overflow.
self.last_offset = self.reader.offset() as u32;
Token {
kind,
span,
span: self.advance_span(),
}
}
@ -248,6 +225,18 @@ impl<'a> Lexer<'a> {
false
}
}
/// Returns the string for a given span of the source.
/// Will panic if the given span was not valid for the source, or invalid utf8
pub fn span_str(&self, span: Span) -> &'a str {
std::str::from_utf8(self.span_bytes(span)).expect("invalid span segment for source")
}
/// Returns the string for a given span of the source.
/// Will panic if the given span was not valid for the source, or invalid utf8
pub fn span_bytes(&self, span: Span) -> &'a [u8] {
self.reader.span(span)
}
}
impl Iterator for Lexer<'_> {

View file

@ -1,6 +1,9 @@
use thiserror::Error;
use crate::syn::token::Span;
use crate::syn::{
error::{error, SyntaxError},
token::Span,
};
use std::fmt;
#[derive(Error, Debug)]
@ -12,6 +15,17 @@ pub enum CharError {
Unicode,
}
impl From<CharError> for SyntaxError {
fn from(value: CharError) -> Self {
let e = SyntaxError::new("Invalid, non valid UTF-8 bytes, in source");
if let CharError::Eof = value {
e.with_data_pending()
} else {
e
}
}
}
#[derive(Clone)]
#[non_exhaustive]
pub struct BytesReader<'a> {

View file

@ -2,9 +2,12 @@
use std::mem;
use crate::syn::token::{QouteKind, Token, TokenKind};
use crate::syn::{
error::error,
token::{QouteKind, Token, TokenKind},
};
use super::{unicode::chars, Error, Lexer};
use super::{unicode::chars, Lexer};
impl<'a> Lexer<'a> {
/// Lex a plain strand with either single or double quotes.
@ -34,7 +37,8 @@ impl<'a> Lexer<'a> {
return self.finish_token(TokenKind::Strand);
}
b'\0' => {
return self.invalid_token(Error::UnexpectedCharacter('\0'));
let err = error!("Invalid null byte in source, null bytes are not valid SurrealQL characters",@self.current_span());
return self.invalid_token(err);
}
b'\\' => {
// Handle escape sequences.
@ -70,17 +74,20 @@ impl<'a> Lexer<'a> {
b't' => {
self.scratch.push(chars::TAB);
}
x => {
let char = if x.is_ascii() {
x as char
} else {
match self.reader.complete_char(x) {
Ok(x) => x,
Err(e) => return self.invalid_token(e.into()),
}
};
return self.invalid_token(Error::InvalidEscapeCharacter(char));
}
x => match self.reader.convert_to_char(x) {
Ok(char) => {
let valid_escape = if is_double {
'"'
} else {
'\''
};
let err = error!("Invalid escape character `{char}`, valid characters are `\\`, `{valid_escape}`, `/`, `b`, `f`, `n`, `r`, or `t`", @self.current_span());
return self.invalid_token(err);
}
Err(e) => {
return self.invalid_token(e.into());
}
},
}
}
x => self.scratch.push(x as char),

View file

@ -10,7 +10,7 @@ macro_rules! test_case(
let span = std::str::from_utf8(lexer.reader.span(next.span)).unwrap_or("invalid utf8");
if let TokenKind::Invalid = next.kind{
let error = lexer.error.take().unwrap();
assert_eq!(next.kind, $token, "{} = {}:{} => {}",span, i, stringify!($token), error);
assert_eq!(next.kind, $token, "{} = {}:{} => {:?}",span, i, stringify!($token), error);
}else{
assert_eq!(next.kind, $token, "{} = {}:{}", span, i, stringify!($token));
}

View file

@ -5,7 +5,6 @@ use crate::{
sql::{Block, Datetime, Duration, Idiom, Query, Range, Subquery, Thing, Value},
};
pub mod common;
pub mod error;
pub mod lexer;
pub mod parser;
@ -177,7 +176,8 @@ pub fn block(input: &str) -> Result<Block, Error> {
let mut parser = Parser::new(input.as_bytes());
let mut stack = Stack::new();
match parser.peek_kind() {
let token = parser.peek();
match token.kind {
t!("{") => {
let start = parser.pop_peek().span;
stack
@ -187,14 +187,9 @@ pub fn block(input: &str) -> Result<Block, Error> {
.map_err(Error::InvalidQuery)
}
found => Err(Error::InvalidQuery(
crate::syn::parser::ParseError::new(
crate::syn::parser::ParseErrorKind::Unexpected {
expected: "{",
found,
},
parser.last_span(),
)
.render_on(input),
error::SyntaxError::new(format_args!("Unexpected token `{found}` expected `{{`"))
.with_span(token.span, error::MessageKind::Error)
.render_on(input),
)),
}
}

View file

@ -5,10 +5,8 @@ use chrono::{FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone,
use crate::{
sql::Datetime,
syn::{
parser::{
mac::{expected_whitespace, unexpected},
ParseError, ParseErrorKind, ParseResult, Parser,
},
error::{bail, error},
parser::{expected_whitespace, unexpected, ParseResult, Parser},
token::{t, DatetimeChars, TokenKind},
},
};
@ -19,7 +17,7 @@ impl Parser<'_> {
let double = match start.kind {
t!("d\"") => true,
t!("d'") => false,
x => unexpected!(self, x, "a datetime"),
x => bail!("Expected a datetime found {}",x, @start.span),
};
self.pop_peek();
@ -58,8 +56,9 @@ impl Parser<'_> {
year as i32
};
let date = NaiveDate::from_ymd_opt(year, month as u32, day as u32)
.ok_or_else(|| ParseError::new(ParseErrorKind::InvalidDatetimeDate, date_span))?;
let date = NaiveDate::from_ymd_opt(year, month as u32, day as u32).ok_or_else(
|| error!("Invalid DateTime date: date outside of valid range", @date_span),
)?;
if !self.eat(TokenKind::DatetimeChars(DatetimeChars::T)) {
let time = NaiveTime::default();
@ -81,13 +80,11 @@ impl Parser<'_> {
let nanos = if self.eat_whitespace(t!(".")) {
let digits_token = expected_whitespace!(self, TokenKind::Digits);
let slice = self.span_bytes(digits_token.span);
let slice = self.lexer.span_bytes(digits_token.span);
if slice.len() > 9 {
return Err(ParseError::new(
ParseErrorKind::TooManyNanosecondsDatetime,
digits_token.span,
));
bail!("Invalid DateTime nanoseconds, too many nanosecond digits",
@digits_token.span => "This section contains more then 9 digits");
}
let mut number = 0u32;
@ -110,9 +107,10 @@ impl Parser<'_> {
let time_span = start_time.covers(self.last_span());
let time =
NaiveTime::from_hms_nano_opt(hour as u32, minute as u32, second as u32, nanos)
.ok_or_else(|| ParseError::new(ParseErrorKind::InvalidDatetimeTime, time_span))?;
let time = NaiveTime::from_hms_nano_opt(hour as u32, minute as u32, second as u32, nanos)
.ok_or_else(
|| error!("Invalid DateTime time: time outside of valid range", @time_span),
)?;
let peek = self.peek_whitespace();
let timezone = match peek.kind {
@ -122,7 +120,7 @@ impl Parser<'_> {
self.pop_peek();
Utc.fix()
}
x => unexpected!(self, x, "`Z` or a timezone"),
_ => unexpected!(self, peek, "`Z` or a timezone"),
};
let date_time = NaiveDateTime::new(date, time);
@ -160,17 +158,13 @@ impl Parser<'_> {
let t = self.peek_whitespace();
match t.kind {
TokenKind::Digits => {}
x => unexpected!(self, x, "datetime digits"),
_ => unexpected!(self, t, "datetime digits"),
}
let digits_str = self.span_str(t.span);
let digits_str = self.lexer.span_str(t.span);
if digits_str.len() != len {
return Err(ParseError::new(
ParseErrorKind::InvalidDatetimePart {
len,
},
t.span,
));
bail!("Datetime digits section not the correct length, needs to be {len} characters",
@t.span => "This section has a length of {}", digits_str.len());
}
self.pop_peek();
@ -179,12 +173,7 @@ impl Parser<'_> {
let value = digits_str.parse().unwrap();
if !range.contains(&value) {
return Err(ParseError::new(
ParseErrorKind::OutrangeDatetimePart {
range,
},
t.span,
));
bail!("Datetime digits section outside of valid range of {}..={}", range.start(),range.end(), @t.span);
}
Ok(value)

View file

@ -1,7 +1,7 @@
use crate::{
sql::{language::Language, Datetime, Duration, Ident, Param, Regex, Strand, Table, Uuid},
syn::{
parser::{mac::unexpected, ParseError, ParseErrorKind, ParseResult, Parser},
parser::{mac::unexpected, ParseResult, Parser},
token::{t, QouteKind, TokenKind},
},
};
@ -17,7 +17,8 @@ pub trait TokenValue: Sized {
impl TokenValue for Ident {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
match parser.glue_ident(false)?.kind {
let token = parser.glue_ident(false)?;
match token.kind {
TokenKind::Identifier => {
parser.pop_peek();
let str = parser.lexer.string.take().unwrap();
@ -25,10 +26,10 @@ impl TokenValue for Ident {
}
TokenKind::Keyword(_) | TokenKind::Language(_) | TokenKind::Algorithm(_) => {
let s = parser.pop_peek().span;
Ok(Ident(parser.span_str(s).to_owned()))
Ok(Ident(parser.lexer.span_str(s).to_owned()))
}
x => {
unexpected!(parser, x, "an identifier");
_ => {
unexpected!(parser, token, "an identifier");
}
}
}
@ -42,7 +43,8 @@ impl TokenValue for Table {
impl TokenValue for Language {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
match parser.peek_kind() {
let peek = parser.peek();
match peek.kind {
TokenKind::Language(x) => {
parser.pop_peek();
Ok(x)
@ -52,32 +54,34 @@ impl TokenValue for Language {
parser.pop_peek();
Ok(Language::Norwegian)
}
x => unexpected!(parser, x, "a language"),
_ => unexpected!(parser, peek, "a language"),
}
}
}
impl TokenValue for Param {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
match parser.peek_kind() {
let peek = parser.peek();
match peek.kind {
TokenKind::Parameter => {
parser.pop_peek();
let param = parser.lexer.string.take().unwrap();
Ok(Param(Ident(param)))
}
x => unexpected!(parser, x, "a parameter"),
_ => unexpected!(parser, peek, "a parameter"),
}
}
}
impl TokenValue for Duration {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
match parser.glue_duration()?.kind {
let token = parser.glue_duration()?;
match token.kind {
TokenKind::Duration => {
parser.pop_peek();
Ok(Duration(parser.lexer.duration.unwrap()))
}
x => unexpected!(parser, x, "a duration"),
_ => unexpected!(parser, token, "a duration"),
}
}
}
@ -96,7 +100,7 @@ impl TokenValue for Strand {
parser.pop_peek();
let t = parser.lexer.relex_strand(token);
let TokenKind::Strand = t.kind else {
unexpected!(parser, t.kind, "a strand")
unexpected!(parser, t, "a strand")
};
Ok(Strand(parser.lexer.string.take().unwrap()))
}
@ -104,7 +108,7 @@ impl TokenValue for Strand {
parser.pop_peek();
Ok(Strand(parser.lexer.string.take().unwrap()))
}
x => unexpected!(parser, x, "a strand"),
_ => unexpected!(parser, token, "a strand"),
}
}
}
@ -117,24 +121,13 @@ impl TokenValue for Uuid {
impl TokenValue for Regex {
fn from_token(parser: &mut Parser<'_>) -> ParseResult<Self> {
match parser.peek().kind {
let peek = parser.peek();
match peek.kind {
t!("/") => {
let pop = parser.pop_peek();
assert!(!parser.has_peek());
let token = parser.lexer.relex_regex(pop);
let mut span = token.span;
// remove the starting and ending `/` characters.
span.offset += 1;
span.len -= 2;
let regex = parser
.span_str(span)
.parse()
.map_err(|e| ParseError::new(ParseErrorKind::InvalidRegex(e), token.span))?;
Ok(regex)
Ok(parser.lexer.lex_compound(pop)?.value)
}
x => unexpected!(parser, x, "a regex"),
_ => unexpected!(parser, peek, "a regex"),
}
}
}

View file

@ -8,7 +8,8 @@ use std::{
use crate::{
sql::Number,
syn::{
parser::{mac::unexpected, ParseError, ParseErrorKind, ParseResult, Parser},
error::error,
parser::{mac::unexpected, ParseResult, Parser},
token::{t, NumberKind, TokenKind},
},
};
@ -32,7 +33,7 @@ where
let mut peek = parser.peek();
if let t!("-") = peek.kind {
unexpected!(parser,t!("-"),"an integer" => "only positive integers are allowed here")
unexpected!(parser,peek,"an integer", => "only positive integers are allowed here")
}
if let t!("+") = peek.kind {
@ -47,25 +48,24 @@ where
let p = parser.peek_whitespace();
match p.kind {
t!(".") => {
unexpected!(parser, p.kind, "an integer")
unexpected!(parser, p, "an integer")
}
t!("dec") => {
unexpected!(parser, p.kind, "an integer" => "decimal numbers not supported here")
unexpected!(parser, p, "an integer", => "decimal numbers not supported here")
}
x if Parser::tokenkind_continues_ident(x) => {
unexpected!(parser, p.kind, "an integer")
unexpected!(parser, p, "an integer")
}
_ => {}
}
// remove the possible "f" number suffix and any '_' characters
let res = prepare_number_str(parser.span_str(peek.span))
let res = prepare_number_str(parser.lexer.span_str(peek.span))
.parse()
.map_err(ParseErrorKind::InvalidInteger)
.map_err(|e| ParseError::new(e, peek.span))?;
.map_err(|e| error!("Failed to parse integer: {e}", @peek.span))?;
Ok(res)
}
x => unexpected!(parser, x, "an integer"),
_ => unexpected!(parser, peek, "an integer"),
}
}
@ -104,23 +104,22 @@ where
match peek.kind {
TokenKind::NaN => return Ok("NaN".parse().unwrap()),
TokenKind::Digits | t!("+") | t!("-") => {}
x => unexpected!(parser, x, "a floating point number"),
_ => unexpected!(parser, peek, "a floating point number"),
};
let float_token = parser.glue_float()?;
match float_token.kind {
TokenKind::Number(NumberKind::Float) => {
parser.pop_peek();
}
x => unexpected!(parser, x, "a floating point number"),
_ => unexpected!(parser, float_token, "a floating point number"),
};
let span = parser.span_str(float_token.span);
let span = parser.lexer.span_str(float_token.span);
// remove the possible "f" number suffix and any '_' characters
prepare_number_str(span.strip_suffix('f').unwrap_or(span))
.parse()
.map_err(ParseErrorKind::InvalidFloat)
.map_err(|e| ParseError::new(e, float_token.span))
.map_err(|e| error!("Failed to parser floating point number: {e}", @float_token.span))
}
impl TokenValue for f32 {
@ -144,38 +143,37 @@ impl TokenValue for Number {
return Ok(Number::Float(f64::NAN));
}
TokenKind::Number(x) => x,
x => unexpected!(parser, x, "a number"),
_ => unexpected!(parser, number, "a number"),
};
parser.pop_peek();
let span = parser.span_str(number.span);
let span = parser.lexer.span_str(number.span);
match number_kind {
NumberKind::Decimal => {
let str = prepare_number_str(span.strip_suffix("dec").unwrap_or(span));
let decimal = if str.contains('e') {
Decimal::from_scientific(str.as_ref()).map_err(|e| {
ParseError::new(ParseErrorKind::InvalidDecimal(e), number.span)
})?
Decimal::from_scientific(str.as_ref())
.map_err(|e| error!("Failed to parser decimal: {e}", @number.span))?
} else {
Decimal::from_str(str.as_ref()).map_err(|e| {
ParseError::new(ParseErrorKind::InvalidDecimal(e), number.span)
})?
Decimal::from_str(str.as_ref())
.map_err(|e| error!("Failed to parser decimal: {e}", @number.span))?
};
Ok(Number::Decimal(decimal))
}
NumberKind::Float => {
let float = prepare_number_str(span.strip_suffix('f').unwrap_or(span))
.parse()
.map_err(|e| ParseError::new(ParseErrorKind::InvalidFloat(e), number.span))?;
let float =
prepare_number_str(span.strip_suffix('f').unwrap_or(span)).parse().map_err(
|e| error!("Failed to parser floating point number: {e}", @number.span),
)?;
Ok(Number::Float(float))
}
NumberKind::Integer => {
let integer = prepare_number_str(span.strip_suffix('f').unwrap_or(span))
.parse()
.map_err(|e| ParseError::new(ParseErrorKind::InvalidInteger(e), number.span))?;
.map_err(|e| error!("Failed to parse integer: {e}", @number.span))?;
Ok(Number::Int(integer))
}

View file

@ -1,9 +1,10 @@
use crate::{
sql::Uuid,
syn::{
error::bail,
parser::{
mac::{expected_whitespace, unexpected},
ParseError, ParseErrorKind, ParseResult, Parser,
ParseResult, Parser,
},
token::{t, DurationSuffix, NumberSuffix, TokenKind, VectorTypeKind},
},
@ -17,7 +18,7 @@ impl Parser<'_> {
let double = match quote_token.kind {
t!("u\"") => true,
t!("u'") => false,
x => unexpected!(self, x, "a uuid"),
_ => unexpected!(self, quote_token, "a uuid"),
};
self.pop_peek();
@ -97,40 +98,32 @@ impl Parser<'_> {
| TokenKind::VectorType(VectorTypeKind::F64 | VectorTypeKind::F32) => {
// there are some keywords and languages keywords which could be part of the
// hex section.
if !self.span_bytes(next.span).iter().all(|x| x.is_ascii_hexdigit()) {
unexpected!(self, TokenKind::Identifier, "UUID hex digits");
if !self.lexer.span_bytes(next.span).iter().all(|x| x.is_ascii_hexdigit()) {
bail!("Invalid UUID section, invalid hex character in section", @next.span)
}
cur = self.pop_peek();
break;
}
t!("-") | t!("\"") | t!("'") => break,
_ => unexpected!(self, TokenKind::Identifier, "UUID hex digits"),
_ => {
bail!("Invalid UUID section, invalid hex character in section", @next.span)
}
}
}
// Get the span that covered all eaten tokens.
let digits_span = start_token.span.covers(cur.span);
let digits_bytes = self.span_str(digits_span).as_bytes();
let digits_bytes = self.lexer.span_str(digits_span).as_bytes();
// for error handling, the incorrect hex character should be returned first, before
// returning the not correct length for segment error even if both are valid.
if !digits_bytes.iter().all(|x| x.is_ascii_hexdigit()) {
return Err(ParseError::new(
ParseErrorKind::Unexpected {
found: TokenKind::Strand,
expected: "UUID hex digits",
},
digits_span,
));
bail!("Unexpected characters in UUID token, expected UUID hex digits", @digits_span);
}
if digits_bytes.len() != required_len {
return Err(ParseError::new(
ParseErrorKind::InvalidUuidPart {
len: required_len,
},
digits_span,
));
bail!("Unexpected characters in UUID token, invalid length of hex digits are",
@digits_span => "this has `{}` character where `{}` are required", digits_bytes.len(), required_len);
}
// write into the buffer

View file

@ -2,9 +2,10 @@ use super::{ParseResult, Parser};
use crate::{
sql::{Constant, Function, Value},
syn::{
error::MessageKind,
parser::{
mac::{expected, unexpected},
ParseError, ParseErrorKind,
SyntaxError,
},
token::{t, Span},
},
@ -466,15 +467,15 @@ impl Parser<'_> {
let mut last_span = start;
while self.eat(t!("::")) {
let t = self.glue_ident(false)?;
if !t.kind.can_be_identifier() {
unexpected!(self, t.kind, "an identifier")
if !Self::tokenkind_can_start_ident(t.kind) {
unexpected!(self, t, "an identifier")
}
self.pop_peek();
last_span = self.last_span();
}
let span = start.covers(last_span);
let str = self.span_str(span);
let str = self.lexer.span_str(span);
match PATHS.get_entry(&UniCase::ascii(str)) {
Some((_, PathKind::Constant(x))) => Ok(Value::Constant(x.clone())),
@ -497,23 +498,20 @@ impl Parser<'_> {
})
.map(|x| x.into_inner());
if cut_off == MAX_LEVENSTHEIN_CUT_OFF {
// couldn't find a value which lowered the cut off,
// any suggestion probably will be nonsensical so don't give any.
return Err(ParseError::new(
ParseErrorKind::InvalidPath {
possibly: None,
},
span,
));
if let Some(possibly) = possibly {
// If we couldn't find a value which lowered the cut off,
// any suggestion probably will be nonsensical so give an suggestion when the
// the cut_off was lowered.
if cut_off < MAX_LEVENSTHEIN_CUT_OFF {
return Err(SyntaxError::new(format_args!(
"Invalid function/constant path, did you maybe mean `{possibly}`"
))
.with_span(span, MessageKind::Error));
}
}
Err(ParseError::new(
ParseErrorKind::InvalidPath {
possibly,
},
span,
))
Err(SyntaxError::new("Invalid function/constant path")
.with_span(span, MessageKind::Error))
}
}
}

View file

@ -1,405 +0,0 @@
use crate::syn::{
common::Location,
error::{RenderedError, Snippet},
lexer::Error as LexError,
token::{Span, TokenKind},
};
use std::{
fmt::Write,
num::{ParseFloatError, ParseIntError},
ops::RangeInclusive,
};
#[derive(Debug)]
#[non_exhaustive]
pub enum IntErrorKind {
FloatToInt,
DecimalToInt,
IntegerOverflow,
}
#[derive(Debug)]
#[non_exhaustive]
pub enum MissingKind {
Group,
Split,
Order,
}
#[derive(Debug)]
#[non_exhaustive]
pub enum ParseErrorKind {
/// The parser encountered an unexpected token.
Unexpected {
found: TokenKind,
expected: &'static str,
},
UnexpectedExplain {
found: TokenKind,
expected: &'static str,
explain: &'static str,
},
/// The parser encountered an unexpected token.
UnexpectedEof {
expected: &'static str,
},
/// An error for an unclosed delimiter with a span of the token which should be closed.
UnclosedDelimiter {
expected: TokenKind,
should_close: Span,
},
/// An error for parsing an integer
InvalidInteger(ParseIntError),
/// An error for parsing an float
InvalidFloat(ParseFloatError),
/// An error for parsing an decimal.
InvalidDecimal(rust_decimal::Error),
InvalidIdent,
DisallowedStatement {
found: TokenKind,
expected: TokenKind,
disallowed: Span,
},
/// The parser encountered an token which could not be lexed correctly.
InvalidToken(LexError),
/// Matched a path which was invalid.
InvalidPath {
possibly: Option<&'static str>,
},
InvalidRegex(regex::Error),
MissingField {
field: Span,
idiom: String,
kind: MissingKind,
},
InvalidUuidPart {
len: usize,
},
InvalidDatetimePart {
len: usize,
},
OutrangeDatetimePart {
range: RangeInclusive<usize>,
},
TooManyNanosecondsDatetime,
InvalidDatetimeDate,
InvalidDatetimeTime,
ExceededObjectDepthLimit,
ExceededQueryDepthLimit,
DurationOverflow,
NoWhitespace,
}
/// A parsing error.
#[derive(Debug)]
#[non_exhaustive]
pub struct ParseError {
pub kind: ParseErrorKind,
pub at: Span,
}
impl ParseError {
/// Create a new parse error.
pub fn new(kind: ParseErrorKind, at: Span) -> Self {
ParseError {
kind,
at,
}
}
pub fn render_on(&self, source: &str) -> RenderedError {
Self::render_on_inner(source, &self.kind, self.at)
}
/// Create a rendered error from the string this error was generated from.
pub fn render_on_inner(source: &str, kind: &ParseErrorKind, at: Span) -> RenderedError {
match kind {
ParseErrorKind::Unexpected {
found,
expected,
} => {
let text = format!("Unexpected token '{}' expected {}", found.as_str(), expected);
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseErrorKind::UnexpectedExplain {
found,
expected,
explain,
} => {
let text = format!("Unexpected token '{}' expected {}", found.as_str(), expected);
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, Some(explain));
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseErrorKind::UnexpectedEof {
expected,
} => {
let text = format!("Query ended early, expected {}", expected);
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseErrorKind::UnclosedDelimiter {
expected,
should_close,
} => {
let text = format!("Expected closing delimiter '{}'", expected.as_str());
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
let locations = Location::range_of_span(source, *should_close);
let close_snippet = Snippet::from_source_location_range(
source,
locations,
Some("Expected this delimiter to close"),
);
RenderedError {
text,
snippets: vec![snippet, close_snippet],
}
}
ParseErrorKind::DisallowedStatement {
found,
expected,
disallowed,
} => {
let text = format!(
"Unexpected token '{}' expected '{}'",
found.as_str(),
expected.as_str()
);
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
let locations = Location::range_of_span(source, *disallowed);
let dissallowed_snippet = Snippet::from_source_location_range(
source,
locations,
Some("this keyword is not allowed to start a statement in this position"),
);
RenderedError {
text,
snippets: vec![snippet, dissallowed_snippet],
}
}
ParseErrorKind::InvalidToken(e) => {
let text = e.to_string();
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseErrorKind::InvalidPath {
possibly,
} => {
let mut text = "Invalid function path".to_owned();
if let Some(p) = possibly {
// writing into a string never causes an error.
write!(text, ", did you maybe mean `{}`", p).unwrap();
}
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(
source,
locations,
Some("This path does not exist."),
);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseErrorKind::InvalidInteger(ref error) => {
let text = format!("failed to parse integer, {error}");
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text: text.to_string(),
snippets: vec![snippet],
}
}
ParseErrorKind::InvalidFloat(ref error) => {
let text = format!("failed to parse floating point, {error}");
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text: text.to_string(),
snippets: vec![snippet],
}
}
ParseErrorKind::InvalidDecimal(ref error) => {
let text = format!("failed to parse decimal number, {error}");
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text: text.to_string(),
snippets: vec![snippet],
}
}
ParseErrorKind::InvalidRegex(ref error) => {
let text = format!("failed to parse regex, {error}");
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text: text.to_string(),
snippets: vec![snippet],
}
}
ParseErrorKind::NoWhitespace => {
let text = "Whitespace is dissallowed in this position";
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text: text.to_string(),
snippets: vec![snippet],
}
}
ParseErrorKind::ExceededObjectDepthLimit => {
let text = "Parsing exceeded the depth limit for objects";
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text: text.to_string(),
snippets: vec![snippet],
}
}
ParseErrorKind::ExceededQueryDepthLimit => {
let text = "Parsing exceeded the depth limit for queries";
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text: text.to_string(),
snippets: vec![snippet],
}
}
ParseErrorKind::MissingField {
field,
idiom,
kind,
} => {
let text = match kind {
MissingKind::Group => {
format!("Missing group idiom `{idiom}` in statement selection")
}
MissingKind::Split => {
format!("Missing split idiom `{idiom}` in statement selection")
}
MissingKind::Order => {
format!("Missing order idiom `{idiom}` in statement selection")
}
};
let locations = Location::range_of_span(source, at);
let snippet_error = Snippet::from_source_location_range(source, locations, None);
let locations = Location::range_of_span(source, *field);
let snippet_hint = Snippet::from_source_location_range(
source,
locations,
Some("Idiom missing here"),
);
RenderedError {
text: text.to_string(),
snippets: vec![snippet_error, snippet_hint],
}
}
ParseErrorKind::DurationOverflow => {
let text = "Duration specified exceeds maximum allowed value";
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text: text.to_string(),
snippets: vec![snippet],
}
}
ParseErrorKind::InvalidIdent => {
let text = "Duration specified exceeds maximum allowed value";
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text: text.to_string(),
snippets: vec![snippet],
}
}
ParseErrorKind::InvalidUuidPart {
len,
} => {
let text = format!(
"Uuid hex section not the correct length, needs to be {len} characters"
);
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseErrorKind::InvalidDatetimePart {
len,
} => {
let text = format!(
"Datetime digits section not the correct length, needs to be {len} characters"
);
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseErrorKind::OutrangeDatetimePart {
range,
} => {
let text = format!(
"Datetime digits not within valid range {}..={}",
range.start(),
range.end()
);
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseErrorKind::TooManyNanosecondsDatetime => {
let text = "Too many digits in Datetime nanoseconds".to_owned();
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(
source,
locations,
Some("Nanoseconds can at most be 9 characters"),
);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseErrorKind::InvalidDatetimeDate => {
let text = "Invalid Datetime date".to_owned();
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseErrorKind::InvalidDatetimeTime => {
let text = "Datetime time outside of valid time range".to_owned();
let locations = Location::range_of_span(source, at);
let snippet = Snippet::from_source_location_range(source, locations, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
}
}
}

View file

@ -3,11 +3,11 @@
use reblessive::Stk;
use super::mac::unexpected;
use super::ParseError;
use crate::sql::{value::TryNeg, Cast, Expression, Number, Operator, Value};
use crate::syn::error::bail;
use crate::syn::token::Token;
use crate::syn::{
parser::{mac::expected, ParseErrorKind, ParseResult, Parser},
parser::{mac::expected, ParseResult, Parser},
token::{t, TokenKind},
};
@ -40,12 +40,13 @@ impl Parser<'_> {
/// Parse a assigner operator.
pub fn parse_assigner(&mut self) -> ParseResult<Operator> {
match self.next().kind {
let token = self.next();
match token.kind {
t!("=") => Ok(Operator::Equal),
t!("+=") => Ok(Operator::Inc),
t!("-=") => Ok(Operator::Dec),
t!("+?=") => Ok(Operator::Ext),
x => unexpected!(self, x, "an assign operator"),
_ => unexpected!(self, token, "an assign operator"),
}
}
@ -202,25 +203,20 @@ impl Parser<'_> {
pub fn parse_knn(&mut self, token: Token) -> ParseResult<Operator> {
let amount = self.next_token_value()?;
let op = if self.eat(t!(",")) {
match self.peek_kind(){
let token = self.peek();
match token.kind {
TokenKind::Distance(ref k) => {
self.pop_peek();
let d = self.convert_distance(k).map(Some)?;
Operator::Knn(amount, d)
},
}
TokenKind::Digits | TokenKind::Number(_) => {
let ef = self.next_token_value()?;
Operator::Ann(amount, ef)
}
_ => {
return Err(ParseError::new(
ParseErrorKind::UnexpectedExplain {
found: token.kind,
expected: "a distance or an integer",
explain: "The NN operator accepts either a distance for brute force operation, or an EF value for approximate operations",
},
token.span,
))
bail!("Unexpected token {} expected a distance of an integer", token.kind,
@token.span => "The NN operator accepts either a distance or an EF value (integer)")
}
}
} else {
@ -324,14 +320,8 @@ impl Parser<'_> {
let Some((l_bp, r_bp)) = Self::infix_binding_power(token.kind) else {
// explain that assignment operators can't be used in normal expressions.
if let t!("+=") | t!("*=") | t!("-=") | t!("+?=") = token.kind {
return Err(ParseError::new(
ParseErrorKind::UnexpectedExplain {
found: token.kind,
expected: "an operator",
explain: "assignement operator are only allowed in SET and DUPLICATE KEY UPDATE statements",
},
token.span,
));
unexpected!(self,token,"an operator",
=> "assignment operators are only allowed in SET and DUPLICATE KEY UPDATE clauses")
}
break;
};

View file

@ -3,10 +3,8 @@ use reblessive::Stk;
use crate::{
sql::{Function, Ident, Model, Value},
syn::{
parser::{
mac::{expected, expected_whitespace, unexpected},
ParseError, ParseErrorKind,
},
error::error,
parser::mac::{expected, expected_whitespace, unexpected},
token::{t, TokenKind},
},
};
@ -62,36 +60,36 @@ impl Parser<'_> {
let token = self.next();
let major: u32 = match token.kind {
TokenKind::Digits => std::str::from_utf8(self.lexer.reader.span(token.span))
.unwrap()
TokenKind::Digits => self
.lexer
.span_str(token.span)
.parse()
.map_err(ParseErrorKind::InvalidInteger)
.map_err(|e| ParseError::new(e, token.span))?,
x => unexpected!(self, x, "an integer"),
.map_err(|e| error!("Failed to parse model version: {e}", @token.span))?,
_ => unexpected!(self, token, "an integer"),
};
expected_whitespace!(self, t!("."));
let token = self.next_whitespace();
let minor: u32 = match token.kind {
TokenKind::Digits => std::str::from_utf8(self.lexer.reader.span(token.span))
.unwrap()
TokenKind::Digits => self
.lexer
.span_str(token.span)
.parse()
.map_err(ParseErrorKind::InvalidInteger)
.map_err(|e| ParseError::new(e, token.span))?,
x => unexpected!(self, x, "an integer"),
.map_err(|e| error!("Failed to parse model version: {e}", @token.span))?,
_ => unexpected!(self, token, "an integer"),
};
expected_whitespace!(self, t!("."));
let token = self.next_whitespace();
let patch: u32 = match token.kind {
TokenKind::Digits => std::str::from_utf8(self.lexer.reader.span(token.span))
.unwrap()
TokenKind::Digits => self
.lexer
.span_str(token.span)
.parse()
.map_err(ParseErrorKind::InvalidInteger)
.map_err(|e| ParseError::new(e, token.span))?,
x => unexpected!(self, x, "an integer"),
.map_err(|e| error!("Failed to parse model version: {e}", @token.span))?,
_ => unexpected!(self, token, "an integer"),
};
self.expect_closing_delimiter(t!(">"), start)?;

View file

@ -5,10 +5,13 @@ use crate::{
part::DestructurePart, Dir, Edges, Field, Fields, Graph, Ident, Idiom, Part, Table, Tables,
Value,
},
syn::token::{t, Span, TokenKind},
syn::{
error::bail,
token::{t, Span, TokenKind},
},
};
use super::{mac::unexpected, ParseError, ParseErrorKind, ParseResult, Parser};
use super::{mac::unexpected, ParseResult, Parser};
impl Parser<'_> {
/// Parse fields of a selecting query: `foo, bar` in `SELECT foo, bar FROM baz`.
@ -106,14 +109,8 @@ impl Parser<'_> {
res.push(Part::Graph(graph))
}
t!("..") => {
return Err(ParseError::new(
ParseErrorKind::UnexpectedExplain {
found: t!(".."),
expected: "an idiom",
explain: "Did you maybe mean the flatten operator `...`",
},
self.last_span(),
))
bail!("Unexpected token `{}` expected and idiom",t!(".."),
@self.last_span() => "Did you maybe intent to use the flatten operator `...`");
}
_ => break,
}
@ -171,14 +168,8 @@ impl Parser<'_> {
}
}
t!("..") => {
return Err(ParseError::new(
ParseErrorKind::UnexpectedExplain {
found: t!(".."),
expected: "an idiom",
explain: "Did you maybe mean the flatten operator `...`",
},
self.last_span(),
))
bail!("Unexpected token `{}` expected and idiom",t!(".."),
@self.last_span() => "Did you maybe intent to use the flatten operator `...`");
}
_ => break,
}
@ -301,13 +292,7 @@ impl Parser<'_> {
Part::All => DestructurePart::All(field),
Part::Destructure(v) => DestructurePart::Destructure(field, v),
_ => {
return Err(ParseError::new(
ParseErrorKind::Unexpected {
found,
expected: "a star or a destructuring",
},
self.last_span(),
))
bail!("Unexpected token `{}` expected a `*` or a destructuring", found, @self.last_span());
}
}
}
@ -327,7 +312,8 @@ impl Parser<'_> {
}
/// Parse the part after the `[` in a idiom
pub async fn parse_bracket_part(&mut self, ctx: &mut Stk, start: Span) -> ParseResult<Part> {
let res = match self.peek_kind() {
let peek = self.peek();
let res = match peek.kind {
t!("*") => {
self.pop_peek();
Part::All
@ -341,9 +327,9 @@ impl Parser<'_> {
}
t!("-") => {
if let TokenKind::Digits = self.peek_whitespace_token_at(1).kind {
unexpected!(self, t!("-"),"$, * or a number" => "an index can't be negative");
unexpected!(self, peek,"$, * or a number", => "An index can't be negative.");
}
unexpected!(self, t!("-"), "$, * or a number");
unexpected!(self, peek, "$, * or a number");
}
t!("?") | t!("WHERE") => {
self.pop_peek();
@ -386,7 +372,8 @@ impl Parser<'_> {
}
t!("[") => {
self.pop_peek();
let res = match self.peek_kind() {
let peek = self.peek();
let res = match peek.kind {
t!("*") => {
self.pop_peek();
Part::All
@ -403,11 +390,11 @@ impl Parser<'_> {
let peek_digit = self.peek_whitespace_token_at(1);
if let TokenKind::Digits = peek_digit.kind {
let span = self.recent_span().covers(peek_digit.span);
unexpected!(@ span, self, t!("-"),"$, * or a number" => "an index can't be negative");
bail!("Unexpected token `-` expected $, *, or a number", @span => "an index can't be negative");
}
unexpected!(self, t!("-"), "$, * or a number");
unexpected!(self, peek, "$, * or a number");
}
x => unexpected!(self, x, "$, * or a number"),
_ => unexpected!(self, peek, "$, * or a number"),
};
self.expect_closing_delimiter(t!("]"), token.span)?;
res
@ -436,7 +423,8 @@ impl Parser<'_> {
}
t!("[") => {
self.pop_peek();
let res = match self.peek_kind() {
let token = self.peek();
let res = match token.kind {
t!("*") => {
self.pop_peek();
Part::All
@ -449,11 +437,11 @@ impl Parser<'_> {
let peek_digit = self.peek_whitespace_token_at(1);
if let TokenKind::Digits = peek_digit.kind {
let span = self.recent_span().covers(peek_digit.span);
unexpected!(@ span, self, t!("-"),"$, * or a number" => "an index can't be negative");
bail!("Unexpected token `-` expected $, *, or a number", @span => "an index can't be negative");
}
unexpected!(self, t!("-"), "$, * or a number");
unexpected!(self, token, "$, * or a number");
}
x => unexpected!(self, x, "$, * or a number"),
_ => unexpected!(self, token, "$, * or a number"),
};
self.expect_closing_delimiter(t!("]"), token.span)?;
res
@ -465,18 +453,12 @@ impl Parser<'_> {
}
if self.eat(t!("...")) {
let span = self.last_span();
parts.push(Part::Flatten);
if let t!(".") | t!("[") = self.peek_kind() {
return Err(ParseError::new(
ParseErrorKind::UnexpectedExplain {
found: t!("..."),
expected: "local idiom to end.",
explain: "Flattening can only be done at the end of a local idiom.",
},
span,
));
let token = self.peek();
if let t!(".") | t!("[") = token.kind {
bail!("Unexpected token `...` expected a local idiom to end.",
@token.span => "Flattening can only be done at the end of a local idiom")
}
parts.push(Part::Flatten);
}
Ok(Idiom(parts))
@ -520,7 +502,8 @@ impl Parser<'_> {
/// Expects to just have eaten a direction (e.g. <-, <->, or ->) and be at the field like part
/// of the graph
pub async fn parse_graph(&mut self, ctx: &mut Stk, dir: Dir) -> ParseResult<Graph> {
match self.peek_kind() {
let token = self.peek();
match token.kind {
t!("?") => {
self.pop_peek();
Ok(Graph {
@ -530,12 +513,13 @@ impl Parser<'_> {
}
t!("(") => {
let span = self.pop_peek().span;
let what = match self.peek_kind() {
let token = self.peek();
let what = match token.kind {
t!("?") => {
self.pop_peek();
Tables::default()
}
x if x.can_be_identifier() => {
x if Self::tokenkind_can_start_ident(x) => {
// The following function should always succeed here,
// returning an error here would be a bug, so unwrap.
let table = self.next_token_value().unwrap();
@ -545,7 +529,7 @@ impl Parser<'_> {
}
tables
}
x => unexpected!(self, x, "`?` or an identifier"),
_ => unexpected!(self, token, "`?` or an identifier"),
};
let cond = self.try_parse_condition(ctx).await?;
@ -566,7 +550,7 @@ impl Parser<'_> {
..Default::default()
})
}
x if x.can_be_identifier() => {
x if Self::tokenkind_can_start_ident(x) => {
// The following function should always succeed here,
// returning an error here would be a bug, so unwrap.
let table = self.next_token_value().unwrap();
@ -577,7 +561,7 @@ impl Parser<'_> {
..Default::default()
})
}
x => unexpected!(self, x, "`?`, `(` or an identifier"),
_ => unexpected!(self, token, "`?`, `(` or an identifier"),
}
}
}

View file

@ -50,7 +50,7 @@ impl Parser<'_> {
match peek.kind {
TokenKind::Duration => Ok(Value::Duration(self.next_token_value()?)),
TokenKind::Number(_) => Ok(Value::Number(self.next_token_value()?)),
x => unexpected!(self, x, "a number"),
_ => unexpected!(self, peek, "a number"),
}
}
_ => {

View file

@ -75,7 +75,8 @@ impl Parser<'_> {
return Ok(Kind::Literal(literal));
}
match self.next().kind {
let next = self.next();
match next.kind {
t!("BOOL") => Ok(Kind::Bool),
t!("NULL") => Ok(Kind::Null),
t!("BYTES") => Ok(Kind::Bytes),
@ -139,13 +140,14 @@ impl Parser<'_> {
Ok(Kind::Set(Box::new(Kind::Any), None))
}
}
x => unexpected!(self, x, "a kind name"),
_ => unexpected!(self, next, "a kind name"),
}
}
/// Parse the kind of gemoetry
fn parse_geometry_kind(&mut self) -> ParseResult<String> {
match self.next().kind {
let next = self.next();
match next.kind {
TokenKind::Keyword(
x @ (Keyword::Feature
| Keyword::Point
@ -156,13 +158,14 @@ impl Parser<'_> {
| Keyword::MultiPolygon
| Keyword::Collection),
) => Ok(x.as_str().to_ascii_lowercase()),
x => unexpected!(self, x, "a geometry kind name"),
_ => unexpected!(self, next, "a geometry kind name"),
}
}
/// Parse a literal kind
async fn parse_literal_kind(&mut self, ctx: &mut Stk) -> ParseResult<Literal> {
match self.peek_kind() {
let peek = self.peek();
match peek.kind {
t!("'") | t!("\"") | TokenKind::Strand => {
let s = self.next_token_value::<Strand>()?;
Ok(Literal::String(s))
@ -172,7 +175,7 @@ impl Parser<'_> {
match token.kind {
TokenKind::Number(_) => self.next_token_value().map(Literal::Number),
TokenKind::Duration => self.next_token_value().map(Literal::Duration),
x => unexpected!(self, x, "a value"),
_ => unexpected!(self, token, "a value"),
}
}
t!("{") => {
@ -197,7 +200,7 @@ impl Parser<'_> {
}
Ok(Literal::Array(arr))
}
_ => unexpected!(self, self.peek().kind, "a literal kind"),
_ => unexpected!(self, peek, "a literal kind"),
}
}

View file

@ -1,106 +1,43 @@
/// A macro for requiring a certain token to be next, returning an error otherwise..
/// A macro for returning an error when a unexpected token was found.
///
/// This macro handles a variety of situations, including errors related to invalid tokens and
/// unexpected `EOF` or whitespace.
///
/// This macro takes a reference to the parser, the token which was unexpected and a expression
/// which explains what should be expected instead.
///
/// This macro attaches the span from the token as an error span to the error.
macro_rules! unexpected {
(@ $span:expr, $parser:expr, $found:expr, $expected:expr $(=> $explain:expr)?) => {{
unexpected!(@@withSpan, $span, $parser,$found, $expected $(=> $explain)?)
}};
($parser:expr, $found:expr, $expected:expr $(=> $explain:expr)?) => {{
let span = $parser.recent_span();
unexpected!(@@withSpan, span, $parser,$found, $expected $(=> $explain)?)
}};
(@@withSpan, $span:expr, $parser:expr, $found:expr, $expected:expr) => {
match $found {
($parser:expr, $found:expr, $expected:expr $(, @$span:expr)? $(, $($t:tt)* )?) => {{
let __found: $crate::syn::token::Token = $found;
match __found.kind{
$crate::syn::token::TokenKind::Invalid => {
let error = $parser.lexer.error.take().unwrap();
return Err($crate::syn::parser::ParseError::new(
$crate::syn::parser::ParseErrorKind::InvalidToken(error),
$span
));
return Err($parser.lexer.error.take().unwrap());
}
$crate::syn::token::TokenKind::Eof => {
let expected = $expected;
return Err($crate::syn::parser::ParseError::new(
$crate::syn::parser::ParseErrorKind::UnexpectedEof {
expected,
},
$span
));
let error = $crate::syn::error::error!("Unexpected end of file, expected {}",$expected, @__found.span $( $($t)* )?);
return Err(error.with_data_pending())
}
$crate::syn::token::TokenKind::WhiteSpace => {
$crate::syn::error::bail!("Unexpected whitespace, expected token {} to continue",$expected, @__found.span$( $($t)* )?)
}
x => {
let expected = $expected;
return Err($crate::syn::parser::ParseError::new(
$crate::syn::parser::ParseErrorKind::Unexpected {
found: x,
expected,
},
$span
));
$crate::syn::error::bail!("Unexpected token {}, expected {}",x,$expected, @__found.span$( $($t)* )?)
}
}
};
}};
(@@withSpan, $span:expr, $parser:expr, $found:expr, $expected:expr => $explain:expr) => {
match $found {
$crate::syn::token::TokenKind::Invalid => {
let error = $parser.lexer.error.take().unwrap();
return Err($crate::syn::parser::ParseError::new(
$crate::syn::parser::ParseErrorKind::InvalidToken(error),
$span
));
}
$crate::syn::token::TokenKind::Eof => {
let expected = $expected;
return Err($crate::syn::parser::ParseError::new(
$crate::syn::parser::ParseErrorKind::UnexpectedEof {
expected,
},
$span
));
}
x => {
let expected = $expected;
return Err($crate::syn::parser::ParseError::new(
$crate::syn::parser::ParseErrorKind::UnexpectedExplain {
found: x,
expected,
explain: $explain,
},
$span
));
}
}
};
}
/// A macro for indicating that the parser encountered an token which it didn't expect.
/// A macro for asserting that the next token should be of the given type, returns the token if
/// this is the case otherwise it returns an error.
macro_rules! expected {
($parser:expr, $($kind:tt)*) => {{
let token = $parser.next();
match token.kind {
$($kind)* => token,
$crate::syn::parser::TokenKind::Invalid => {
let error = $parser.lexer.error.take().unwrap();
return Err($crate::syn::parser::ParseError::new(
$crate::syn::parser::ParseErrorKind::InvalidToken(error),
$parser.recent_span(),
));
}
x => {
let expected = $($kind)*.as_str();
let kind = if let $crate::syn::token::TokenKind::Eof = x {
$crate::syn::parser::ParseErrorKind::UnexpectedEof {
expected,
}
} else {
$crate::syn::parser::ParseErrorKind::Unexpected {
found: x,
expected,
}
};
return Err($crate::syn::parser::ParseError::new(kind, $parser.last_span()));
}
let token: crate::syn::token::Token = $parser.next();
if let $($kind)* = token.kind{
token
}else{
$crate::syn::parser::unexpected!($parser,token, $($kind)*)
}
}};
}
@ -108,38 +45,16 @@ macro_rules! expected {
/// A macro for indicating that the parser encountered an token which it didn't expect.
macro_rules! expected_whitespace {
($parser:expr, $($kind:tt)*) => {{
let token = $parser.next_whitespace();
match token.kind {
$($kind)* => token,
$crate::syn::parser::TokenKind::Invalid => {
let error = $parser.lexer.error.take().unwrap();
return Err($crate::syn::parser::ParseError::new(
$crate::syn::parser::ParseErrorKind::InvalidToken(error),
$parser.recent_span(),
));
}
x => {
let expected = $($kind)*.as_str();
let kind = if let $crate::syn::token::TokenKind::Eof = x {
$crate::syn::parser::ParseErrorKind::UnexpectedEof {
expected,
}
} else {
$crate::syn::parser::ParseErrorKind::Unexpected {
found: x,
expected,
}
};
return Err($crate::syn::parser::ParseError::new(kind, $parser.last_span()));
}
let token: crate::syn::token::Token = $parser.next_whitespace();
if let $($kind)* = token.kind{
token
}else{
$crate::syn::parser::unexpected!($parser,token, $($kind)*)
}
}};
}
#[cfg(test)]
#[doc(hidden)]
#[macro_export]
macro_rules! test_parse {
($func:ident$( ( $($e:expr),* $(,)? ))? , $t:expr) => {{
let mut parser = $crate::syn::parser::Parser::new($t.as_bytes());
@ -148,15 +63,11 @@ macro_rules! test_parse {
}};
}
#[doc(hidden)]
#[macro_export]
macro_rules! enter_object_recursion {
($name:ident = $this:expr => { $($t:tt)* }) => {{
if $this.object_recursion == 0 {
return Err($crate::syn::parser::ParseError::new(
$crate::syn::parser::ParseErrorKind::ExceededObjectDepthLimit,
$this.last_span(),
));
return Err($crate::syn::parser::SyntaxError::new("Exceeded query recursion depth limit")
.with_span($this.last_span(), $crate::syn::error::MessageKind::Error))
}
struct Dropper<'a, 'b>(&'a mut $crate::syn::parser::Parser<'b>);
impl Drop for Dropper<'_, '_> {
@ -186,14 +97,11 @@ macro_rules! enter_object_recursion {
}};
}
#[macro_export]
macro_rules! enter_query_recursion {
($name:ident = $this:expr => { $($t:tt)* }) => {{
if $this.query_recursion == 0 {
return Err($crate::syn::parser::ParseError::new(
$crate::syn::parser::ParseErrorKind::ExceededQueryDepthLimit,
$this.last_span(),
));
return Err($crate::syn::parser::SyntaxError::new("Exceeded query recursion depth limit")
.with_span($this.last_span(), $crate::syn::error::MessageKind::Error))
}
struct Dropper<'a, 'b>(&'a mut $crate::syn::parser::Parser<'b>);
impl Drop for Dropper<'_, '_> {
@ -224,9 +132,11 @@ macro_rules! enter_query_recursion {
}};
}
pub(super) use expected;
pub(super) use expected_whitespace;
pub(super) use unexpected;
pub(crate) use enter_object_recursion;
pub(crate) use enter_query_recursion;
pub(crate) use expected;
pub(crate) use expected_whitespace;
pub(crate) use unexpected;
#[cfg(test)]
pub(super) use test_parse;
pub(crate) use test_parse;

View file

@ -55,7 +55,8 @@ use self::token_buffer::TokenBuffer;
use crate::{
sql,
syn::{
lexer::{Error as LexError, Lexer},
error::{bail, SyntaxError},
lexer::Lexer,
token::{t, Span, Token, TokenKind},
},
};
@ -63,7 +64,6 @@ use reblessive::Stk;
mod basic;
mod builtin;
mod error;
mod expression;
mod function;
mod idiom;
@ -77,38 +77,32 @@ mod thing;
mod token;
mod token_buffer;
pub(crate) use mac::{
enter_object_recursion, enter_query_recursion, expected_whitespace, unexpected,
};
#[cfg(test)]
pub mod test;
pub use error::{IntErrorKind, ParseError, ParseErrorKind};
/// The result returned by most parser function.
pub type ParseResult<T> = Result<T, ParseError>;
pub type ParseResult<T> = Result<T, SyntaxError>;
/// A result of trying to parse a possibly partial query.
#[derive(Debug)]
#[non_exhaustive]
pub enum PartialResult<T> {
/// The parser couldn't be sure that it has finished a full value.
Pending {
/// The value that was parsed.
/// This will not always be an error, if optional keywords after the end of a statement
/// where missing this will still parse that statement in full.
possible_value: Result<T, ParseError>,
/// number of bytes used for parsing the above statement.
MoreData,
Ok {
value: T,
used: usize,
},
/// The parser is sure that it doesn't need more data to return either an error or a value.
Ready {
/// The value the parser is sure the query should return.
value: Result<T, ParseError>,
/// number of bytes used
Err {
err: SyntaxError,
used: usize,
},
}
/// The SurrealQL parser.
#[non_exhaustive]
pub struct Parser<'a> {
lexer: Lexer<'a>,
last_span: Span,
@ -337,29 +331,14 @@ impl<'a> Parser<'a> {
self.token_buffer.push_front(token);
}
/// Returns the string for a given span of the source.
/// Will panic if the given span was not valid for the source, or invalid utf8
fn span_str(&self, span: Span) -> &'a str {
std::str::from_utf8(self.span_bytes(span)).expect("invalid span segment for source")
}
/// Returns the string for a given span of the source.
/// Will panic if the given span was not valid for the source, or invalid utf8
fn span_bytes(&self, span: Span) -> &'a [u8] {
self.lexer.reader.span(span)
}
/// Checks if the next token is of the given kind. If it isn't it returns a UnclosedDelimiter
/// error.
fn expect_closing_delimiter(&mut self, kind: TokenKind, should_close: Span) -> ParseResult<()> {
if !self.eat(kind) {
return Err(ParseError::new(
ParseErrorKind::UnclosedDelimiter {
expected: kind,
should_close,
},
self.recent_span(),
));
bail!("Unexpected token, expected delimiter `{kind}`",
@self.recent_span(),
@should_close => "expected this delimiter to close"
);
}
Ok(())
}
@ -394,59 +373,29 @@ impl<'a> Parser<'a> {
while self.eat(t!(";")) {}
let res = ctx.run(|ctx| self.parse_stmt(ctx)).await;
match res {
Err(ParseError {
kind: ParseErrorKind::UnexpectedEof {
..
},
..
})
| Err(ParseError {
kind: ParseErrorKind::InvalidToken(LexError::UnexpectedEof),
..
}) => {
return PartialResult::Pending {
possible_value: res,
let v = match res {
Err(e) => {
let peek = self.peek_whitespace_token_at(1);
if e.is_data_pending()
|| matches!(peek.kind, TokenKind::Eof | TokenKind::WhiteSpace)
{
return PartialResult::MoreData;
}
return PartialResult::Err {
err: e,
used: self.lexer.reader.offset(),
};
}
Err(ParseError {
kind: ParseErrorKind::Unexpected {
..
},
at,
..
}) => {
// Ensure the we are sure that the last token was fully parsed.
self.backup_after(at);
let peek = self.peek_whitespace();
if peek.kind != TokenKind::Eof && peek.kind != TokenKind::WhiteSpace {
// if there is a next token or we ate whitespace after the eof we can be sure
// that the error is not the result of a token only being partially present.
return PartialResult::Ready {
value: res,
used: self.lexer.reader.offset(),
};
}
}
_ => {}
Ok(x) => x,
};
let colon = self.next();
if colon.kind != t!(";") {
return PartialResult::Pending {
possible_value: res,
if self.eat(t!(";")) {
return PartialResult::Ok {
value: v,
used: self.lexer.reader.offset(),
};
}
// Might have peeked more tokens past the final ";" so backup to after the semi-colon.
self.backup_after(colon.span);
let used = self.lexer.reader.offset();
PartialResult::Ready {
value: res,
used,
}
PartialResult::MoreData
}
}

View file

@ -4,10 +4,10 @@ use geo_types::{LineString, MultiLineString, MultiPoint, MultiPolygon, Point, Po
use reblessive::Stk;
use crate::{
enter_object_recursion,
sql::{Block, Geometry, Number, Object, Strand, Value},
syn::{
parser::{mac::expected, ParseError, ParseErrorKind, ParseResult, Parser},
error::bail,
parser::{enter_object_recursion, mac::expected, ParseResult, Parser},
token::{t, Span, TokenKind},
},
};
@ -476,13 +476,10 @@ impl Parser<'_> {
if !self.eat(t!("}")) {
// the object didn't end, either an error or not a geometry.
if !comma {
return Err(ParseError::new(
ParseErrorKind::UnclosedDelimiter {
expected: t!("}"),
should_close: start,
},
self.last_span(),
));
bail!("Unexpected token, expected delimiter `}}`",
@self.recent_span(),
@start => "expected this delimiter to close"
);
}
return self
@ -692,7 +689,7 @@ impl Parser<'_> {
let number = self.next_token_value::<Number>()?.to_string();
Ok(number)
}
x => unexpected!(self, x, "an object key"),
_ => unexpected!(self, token, "an object key"),
}
}
}

View file

@ -5,17 +5,17 @@ use reblessive::Stk;
use super::{ParseResult, Parser};
use crate::{
enter_object_recursion, enter_query_recursion,
sql::{
Array, Closure, Dir, Function, Geometry, Ident, Idiom, Kind, Mock, Number, Param, Part,
Range, Script, Strand, Subquery, Table, Value,
},
syn::{
error::bail,
parser::{
enter_object_recursion, enter_query_recursion,
mac::{expected, expected_whitespace, unexpected},
ParseError, ParseErrorKind,
},
token::{t, DurationSuffix, Span, TokenKind},
token::{self, t, DurationSuffix, Span, TokenKind},
},
};
@ -24,7 +24,8 @@ impl Parser<'_> {
///
/// What's are values which are more restricted in what expressions they can contain.
pub async fn parse_what_primary(&mut self, ctx: &mut Stk) -> ParseResult<Value> {
match self.peek_kind() {
let peek = self.peek();
match peek.kind {
t!("..") => Ok(self.try_parse_range(ctx, None).await?.unwrap()),
t!("r\"") => {
self.pop_peek();
@ -103,13 +104,15 @@ impl Parser<'_> {
Ok(self.try_parse_inline(ctx, &value).await?.unwrap_or(value))
}
x => {
if !self.peek_can_start_ident() {
unexpected!(self, x, "a value")
if !Self::tokenkind_can_start_ident(x) {
unexpected!(self, peek, "a value")
}
// Combine possible multiple tokens into a single one. before scanning past it.
let span = self.glue()?.span;
match self.peek_token_at(1).kind {
let peek = self.peek_token_at(1);
match peek.kind {
t!("::") | t!("(") => {
self.pop_peek();
self.parse_builtin(ctx, span).await
@ -120,9 +123,11 @@ impl Parser<'_> {
}
x => {
if x.has_data() {
// Consume the first identifier to ensure streaming works correctly.
self.pop_peek();
// x had data and possibly overwrote the data from token, This is
// always an invalid production so just return error.
unexpected!(self, x, "a value");
unexpected!(self, peek, "a value");
} else {
Ok(Value::Table(self.next_token_value()?))
}
@ -215,7 +220,7 @@ impl Parser<'_> {
match token.kind {
TokenKind::Number(_) => self.next_token_value().map(Value::Number),
TokenKind::Duration => self.next_token_value().map(Value::Duration),
x => unexpected!(self, x, "a value"),
_ => unexpected!(self, token, "a value"),
}
}
@ -371,7 +376,8 @@ impl Parser<'_> {
_ => {
self.glue()?;
match self.peek_token_at(1).kind {
let peek = self.peek_token_at(1);
match peek.kind {
t!("::") | t!("(") => {
self.pop_peek();
self.parse_builtin(ctx, token.span).await?
@ -382,7 +388,9 @@ impl Parser<'_> {
}
x => {
if x.has_data() {
unexpected!(self, x, "a value");
// Pop the first identifier token so that streaming works correctly.
self.pop_peek();
unexpected!(self, peek, "a value");
} else if self.table_as_field {
Value::Idiom(Idiom(vec![Part::Field(self.next_token_value()?)]))
} else {
@ -607,29 +615,13 @@ impl Parser<'_> {
// eat ','
self.next();
match number {
Number::Decimal(_) => {
return Err(ParseError::new(
ParseErrorKind::UnexpectedExplain {
found: TokenKind::Digits,
expected: "a non-decimal, non-nan number",
explain: "coordinate numbers can't be NaN or a decimal",
},
number_token.span,
));
}
Number::Float(x) if x.is_nan() => {
return Err(ParseError::new(
ParseErrorKind::UnexpectedExplain {
found: TokenKind::Digits,
expected: "a non-decimal, non-nan number",
explain: "coordinate numbers can't be NaN or a decimal",
},
number_token.span,
));
}
_ => {}
if matches!(number, Number::Decimal(_))
|| matches!(number, Number::Float(x) if x.is_nan())
{
bail!("Unexpected token `dec` expecte a non-decimal, non-number",
@number_token.span => "Coordinate numbers can't be NaN or a decimal");
}
let x = number.as_float();
let y = self.next_token_value::<f64>()?;
self.expect_closing_delimiter(t!(")"), start)?;
@ -644,19 +636,13 @@ impl Parser<'_> {
Subquery::Value(value)
}
};
if self.peek_kind() != t!(")") && Self::starts_disallowed_subquery_statement(peek.kind) {
let token = self.peek();
if token.kind != t!(")") && Self::starts_disallowed_subquery_statement(peek.kind) {
if let Subquery::Value(Value::Idiom(Idiom(ref idiom))) = res {
if idiom.len() == 1 {
// we parsed a single idiom and the next token was a dissallowed statement so
// it is likely that the used meant to use an invalid statement.
return Err(ParseError::new(
ParseErrorKind::DisallowedStatement {
found: self.peek_kind(),
expected: t!(")"),
disallowed: peek.span,
},
self.recent_span(),
));
bail!("Unexpected token `{}` expected `)`",peek.kind,
@token.span,
@peek.span => "This is a reserved keyword here and can't be an identifier");
}
}
}
@ -737,20 +723,15 @@ impl Parser<'_> {
}
};
if let Some(start) = start {
if self.peek_kind() != t!(")") && Self::starts_disallowed_subquery_statement(peek.kind)
{
let token = self.peek();
if token.kind != t!(")") && Self::starts_disallowed_subquery_statement(peek.kind) {
if let Subquery::Value(Value::Idiom(Idiom(ref idiom))) = res {
if idiom.len() == 1 {
// we parsed a single idiom and the next token was a dissallowed statement so
// it is likely that the used meant to use an invalid statement.
return Err(ParseError::new(
ParseErrorKind::DisallowedStatement {
found: self.peek_kind(),
expected: t!(")"),
disallowed: peek.span,
},
self.recent_span(),
));
bail!("Unexpected token `{}` expected `)`",peek.kind,
@token.span,
@peek.span => "This is a reserved keyword here and can't be an identifier");
}
}
}
@ -806,12 +787,13 @@ impl Parser<'_> {
break;
}
}
expected!(self, t!("{"));
let body = self
.lexer
.lex_js_function_body()
.map_err(|(e, span)| ParseError::new(ParseErrorKind::InvalidToken(e), span))?;
Ok(Function::Script(Script(body), args))
let token = expected!(self, t!("{"));
let mut span = self.lexer.lex_compound::<token::JavaScript>(token)?.span;
// remove the starting `{` and ending `}`.
span.offset += 1;
span.len -= 2;
let body = self.lexer.span_str(span);
Ok(Function::Script(Script(body.to_string()), args))
}
/// Parse a simple singular value
@ -892,9 +874,9 @@ impl Parser<'_> {
}
_ => {
self.glue()?;
let x = self.peek_token_at(1).kind;
if x.has_data() {
unexpected!(self, x, "a value");
let peek = self.peek_token_at(1);
if peek.kind.has_data() {
unexpected!(self, peek, "a value");
} else if self.table_as_field {
Value::Idiom(Idiom(vec![Part::Field(self.next_token_value()?)]))
} else {

View file

@ -16,9 +16,10 @@ use crate::{
impl Parser<'_> {
pub async fn parse_alter_stmt(&mut self, ctx: &mut Stk) -> ParseResult<AlterStatement> {
match self.next().kind {
let next = self.next();
match next.kind {
t!("TABLE") => self.parse_alter_table(ctx).await.map(AlterStatement::Table),
x => unexpected!(self, x, "a alter statement keyword"),
_ => unexpected!(self, next, "a alter statement keyword"),
}
}
@ -56,7 +57,8 @@ impl Parser<'_> {
}
t!("TYPE") => {
self.pop_peek();
match self.peek_kind() {
let peek = self.peek();
match peek.kind {
t!("NORMAL") => {
self.pop_peek();
res.kind = Some(TableType::Normal);
@ -69,7 +71,7 @@ impl Parser<'_> {
self.pop_peek();
res.kind = Some(TableType::Any);
}
x => unexpected!(self, x, "`NORMAL`, `RELATION`, or `ANY`"),
_ => unexpected!(self, peek, "`NORMAL`, `RELATION`, or `ANY`"),
}
}
t!("SCHEMALESS") => {

View file

@ -32,7 +32,8 @@ use crate::{
impl Parser<'_> {
pub async fn parse_define_stmt(&mut self, ctx: &mut Stk) -> ParseResult<DefineStatement> {
match self.next().kind {
let next = self.next();
match next.kind {
t!("NAMESPACE") | t!("ns") => {
self.parse_define_namespace().map(DefineStatement::Namespace)
}
@ -54,7 +55,7 @@ impl Parser<'_> {
}
t!("ANALYZER") => self.parse_define_analyzer().map(DefineStatement::Analyzer),
t!("ACCESS") => self.parse_define_access(ctx).await.map(DefineStatement::Access),
x => unexpected!(self, x, "a define statement keyword"),
_ => unexpected!(self, next, "a define statement keyword"),
}
}
@ -243,11 +244,12 @@ impl Parser<'_> {
match self.peek_kind() {
t!("TOKEN") => {
self.pop_peek();
match self.peek_kind() {
let peek = self.peek();
match peek.kind {
t!("NONE") => {
// Currently, SurrealDB does not accept tokens without expiration.
// For this reason, some token duration must be set.
unexpected!(self, t!("NONE"), "a token duration");
unexpected!(self, peek, "a token duration");
}
_ => res.set_token_duration(Some(self.next_token_value()?)),
}
@ -308,20 +310,17 @@ impl Parser<'_> {
}
t!("TYPE") => {
self.pop_peek();
match self.peek_kind() {
let peek = self.peek();
match peek.kind {
t!("JWT") => {
self.pop_peek();
res.kind = AccessType::Jwt(self.parse_jwt()?);
}
t!("RECORD") => {
self.pop_peek();
let token = self.pop_peek();
// The record access type can only be defined at the database level
if !matches!(res.base, Base::Db) {
unexpected!(
self,
t!("RECORD"),
"a valid access type at this level"
);
unexpected!(self, token, "a valid access type at this level");
}
let mut ac = access_type::RecordAccess {
..Default::default()
@ -352,7 +351,7 @@ impl Parser<'_> {
if !*EXPERIMENTAL_BEARER_ACCESS {
unexpected!(
self,
t!("BEARER"),
peek,
"the experimental bearer access feature to be enabled"
);
}
@ -389,13 +388,14 @@ impl Parser<'_> {
}
t!("TOKEN") => {
self.pop_peek();
match self.peek_kind() {
let peek = self.peek();
match peek.kind {
t!("NONE") => {
// Currently, SurrealDB does not accept tokens without expiration.
// For this reason, some token duration must be set.
// In the future, allowing issuing tokens without expiration may be useful.
// Tokens issued by access methods can be consumed by third parties that support it.
unexpected!(self, t!("NONE"), "a token duration");
unexpected!(self, peek, "a token duration");
}
_ => res.duration.token = Some(self.next_token_value()?),
}
@ -463,7 +463,8 @@ impl Parser<'_> {
// This matches the display format of the legacy statement
t!("TYPE") => {
self.pop_peek();
match self.next().kind {
let next = self.next();
match next.kind {
TokenKind::Algorithm(alg) => {
expected!(self, t!("VALUE"));
ac.jwt.verify = access_type::JwtAccessVerify::Key(
@ -481,7 +482,7 @@ impl Parser<'_> {
},
);
}
x => unexpected!(self, x, "a token algorithm or 'JWKS'"),
_ => unexpected!(self, next, "a token algorithm or 'JWKS'"),
}
}
_ => break,
@ -505,7 +506,8 @@ impl Parser<'_> {
// This matches the display format of the legacy statement
t!("TYPE") => {
self.pop_peek();
match self.next().kind {
let next = self.next();
match next.kind {
TokenKind::Algorithm(alg) => {
expected!(self, t!("VALUE"));
ac.verify = access_type::JwtAccessVerify::Key(
@ -523,7 +525,7 @@ impl Parser<'_> {
},
);
}
x => unexpected!(self, x, "a token algorithm or 'JWKS'"),
_ => unexpected!(self, next, "a token algorithm or 'JWKS'"),
}
}
_ => break,
@ -661,7 +663,8 @@ impl Parser<'_> {
}
t!("TYPE") => {
self.pop_peek();
match self.peek_kind() {
let peek = self.peek();
match peek.kind {
t!("NORMAL") => {
self.pop_peek();
kind = Some(TableType::Normal);
@ -674,7 +677,7 @@ impl Parser<'_> {
self.pop_peek();
kind = Some(TableType::Any);
}
x => unexpected!(self, x, "`NORMAL`, `RELATION`, or `ANY`"),
_ => unexpected!(self, peek, "`NORMAL`, `RELATION`, or `ANY`"),
}
}
t!("SCHEMALESS") => {
@ -698,7 +701,8 @@ impl Parser<'_> {
}
t!("AS") => {
self.pop_peek();
match self.peek_kind() {
let peek = self.peek();
match peek.kind {
t!("(") => {
let open = self.pop_peek().span;
res.view = Some(self.parse_view(ctx).await?);
@ -707,7 +711,7 @@ impl Parser<'_> {
t!("SELECT") => {
res.view = Some(self.parse_view(ctx).await?);
}
x => unexpected!(self, x, "`SELECT`"),
_ => unexpected!(self, peek, "`SELECT`"),
}
}
_ => break,
@ -1124,7 +1128,8 @@ impl Parser<'_> {
self.pop_peek();
let mut filters = Vec::new();
loop {
match self.next().kind {
let next = self.next();
match next.kind {
t!("ASCII") => {
filters.push(Filter::Ascii);
}
@ -1156,7 +1161,7 @@ impl Parser<'_> {
self.expect_closing_delimiter(t!(")"), open_span)?;
filters.push(Filter::Snowball(language))
}
x => unexpected!(self, x, "a filter"),
_ => unexpected!(self, next, "a filter"),
}
if !self.eat(t!(",")) {
break;
@ -1169,12 +1174,13 @@ impl Parser<'_> {
let mut tokenizers = Vec::new();
loop {
let tokenizer = match self.next().kind {
let next = self.next();
let tokenizer = match next.kind {
t!("BLANK") => Tokenizer::Blank,
t!("CAMEL") => Tokenizer::Camel,
t!("CLASS") => Tokenizer::Class,
t!("PUNCT") => Tokenizer::Punct,
x => unexpected!(self, x, "a tokenizer"),
_ => unexpected!(self, next, "a tokenizer"),
};
tokenizers.push(tokenizer);
if !self.eat(t!(",")) {
@ -1252,35 +1258,40 @@ impl Parser<'_> {
..Default::default()
};
match self.peek_kind() {
let peek = self.peek();
match peek.kind {
t!("ALGORITHM") => {
self.pop_peek();
match self.next().kind {
TokenKind::Algorithm(alg) => match self.next().kind {
t!("KEY") => {
let key = self.next_token_value::<Strand>()?.0;
res.verify = access_type::JwtAccessVerify::Key(
access_type::JwtAccessVerifyKey {
alg,
key: key.to_owned(),
},
);
let next = self.next();
match next.kind {
TokenKind::Algorithm(alg) => {
let next = self.next();
match next.kind {
t!("KEY") => {
let key = self.next_token_value::<Strand>()?.0;
res.verify = access_type::JwtAccessVerify::Key(
access_type::JwtAccessVerifyKey {
alg,
key: key.to_owned(),
},
);
// Currently, issuer and verifier must use the same algorithm.
iss.alg = alg;
// Currently, issuer and verifier must use the same algorithm.
iss.alg = alg;
// If the algorithm is symmetric, the issuer and verifier keys are the same.
// For asymmetric algorithms, the key needs to be explicitly defined.
if alg.is_symmetric() {
iss.key = key;
// Since all the issuer data is known, it can already be assigned.
// Cloning allows updating the original with any explicit issuer data.
res.issue = Some(iss.clone());
// If the algorithm is symmetric, the issuer and verifier keys are the same.
// For asymmetric algorithms, the key needs to be explicitly defined.
if alg.is_symmetric() {
iss.key = key;
// Since all the issuer data is known, it can already be assigned.
// Cloning allows updating the original with any explicit issuer data.
res.issue = Some(iss.clone());
}
}
_ => unexpected!(self, next, "a key"),
}
x => unexpected!(self, x, "a key"),
},
x => unexpected!(self, x, "a valid algorithm"),
}
_ => unexpected!(self, next, "a valid algorithm"),
}
}
t!("URL") => {
@ -1290,30 +1301,32 @@ impl Parser<'_> {
url,
});
}
x => unexpected!(self, x, "`ALGORITHM`, or `URL`"),
_ => unexpected!(self, peek, "`ALGORITHM`, or `URL`"),
}
if self.eat(t!("WITH")) {
expected!(self, t!("ISSUER"));
loop {
match self.peek_kind() {
let peek = self.peek();
match peek.kind {
t!("ALGORITHM") => {
self.pop_peek();
match self.next().kind {
let next = self.next();
match next.kind {
TokenKind::Algorithm(alg) => {
// If an algorithm is already defined, a different value is not expected.
if let JwtAccessVerify::Key(ref ver) = res.verify {
if alg != ver.alg {
unexpected!(
self,
t!("ALGORITHM"),
next,
"a compatible algorithm or no algorithm"
);
}
}
iss.alg = alg;
}
x => unexpected!(self, x, "a valid algorithm"),
_ => unexpected!(self, next, "a valid algorithm"),
}
}
t!("KEY") => {
@ -1322,7 +1335,7 @@ impl Parser<'_> {
// If the algorithm is symmetric and a key is already defined, a different key is not expected.
if let JwtAccessVerify::Key(ref ver) = res.verify {
if ver.alg.is_symmetric() && key != ver.key {
unexpected!(self, t!("KEY"), "a symmetric key or no key");
unexpected!(self, peek, "a symmetric key or no key");
}
}
iss.key = key;

View file

@ -33,7 +33,7 @@ impl Parser<'_> {
res.exprs.push((condition, body.into()));
self.parse_bracketed_tail(ctx, &mut res).await?;
}
x => unexpected!(self, x, "THEN or '{'"),
_ => unexpected!(self, next, "THEN or '{'"),
}
Ok(res)
@ -45,7 +45,8 @@ impl Parser<'_> {
res: &mut IfelseStatement,
) -> ParseResult<()> {
loop {
match self.next().kind {
let next = self.next();
match next.kind {
t!("END") => return Ok(()),
t!("ELSE") => {
if self.eat(t!("IF")) {
@ -62,7 +63,7 @@ impl Parser<'_> {
return Ok(());
}
}
x => unexpected!(self, x, "if to end"),
_ => unexpected!(self, next, "if to end"),
}
}
}

View file

@ -1,7 +1,6 @@
use reblessive::Stk;
use crate::cnf::EXPERIMENTAL_BEARER_ACCESS;
use crate::enter_query_recursion;
use crate::sql::block::Entry;
use crate::sql::statements::rebuild::{RebuildIndexStatement, RebuildStatement};
use crate::sql::statements::show::{ShowSince, ShowStatement};
@ -13,7 +12,7 @@ use crate::sql::statements::{
KillStatement, LiveStatement, OptionStatement, SetStatement, ThrowStatement,
};
use crate::sql::{Fields, Ident, Param};
use crate::syn::parser::{ParseError, ParseErrorKind};
use crate::syn::parser::enter_query_recursion;
use crate::syn::token::{t, TokenKind};
use crate::{
sql::{
@ -61,17 +60,10 @@ impl Parser<'_> {
break;
}
if Self::token_kind_starts_statement(self.peek_kind()) {
let token = self.peek();
if Self::token_kind_starts_statement(token.kind) {
// user likely forgot a semicolon.
return Err(ParseError::new(
ParseErrorKind::UnexpectedExplain {
found: self.peek_kind(),
expected: "the query to end",
explain:
"maybe forgot a semicolon after the previous statement?",
},
self.recent_span(),
));
unexpected!(self,token,"the query to end", => "maybe forgot a semicolon after the previous statement?");
}
expected!(self, t!("eof"));
@ -118,7 +110,7 @@ impl Parser<'_> {
if !*EXPERIMENTAL_BEARER_ACCESS {
unexpected!(
self,
t!("ACCESS"),
token,
"the experimental bearer access feature to be enabled"
);
}
@ -385,7 +377,8 @@ impl Parser<'_> {
fn parse_access(&mut self) -> ParseResult<AccessStatement> {
let ac = self.next_token_value()?;
let base = self.eat(t!("ON")).then(|| self.parse_base(false)).transpose()?;
match self.peek_kind() {
let peek = self.peek();
match peek.kind {
t!("GRANT") => {
self.pop_peek();
// TODO(gguillemas): Implement rest of the syntax.
@ -416,7 +409,7 @@ impl Parser<'_> {
}))
}
// TODO(gguillemas): Implement rest of the statements.
x => unexpected!(self, x, "an implemented statement"),
_ => unexpected!(self, peek, "an implemented statement"),
}
}
@ -469,7 +462,8 @@ impl Parser<'_> {
/// # Parser State
/// Expects `USE` to already be consumed.
fn parse_use_stmt(&mut self) -> ParseResult<UseStatement> {
let (ns, db) = match self.peek_kind() {
let peek = self.peek();
let (ns, db) = match peek.kind {
t!("NAMESPACE") | t!("ns") => {
self.pop_peek();
let ns = self.next_token_value::<Ident>()?.0;
@ -485,7 +479,7 @@ impl Parser<'_> {
let db = self.next_token_value::<Ident>()?;
(None, Some(db.0))
}
x => unexpected!(self, x, "either DATABASE or NAMESPACE"),
_ => unexpected!(self, peek, "either DATABASE or NAMESPACE"),
};
Ok(UseStatement {
@ -518,7 +512,8 @@ impl Parser<'_> {
/// Expects `INFO` to already be consumed.
pub(crate) fn parse_info_stmt(&mut self) -> ParseResult<InfoStatement> {
expected!(self, t!("FOR"));
let mut stmt = match self.next().kind {
let next = self.next();
let mut stmt = match next.kind {
t!("ROOT") => InfoStatement::Root(false),
t!("NAMESPACE") | t!("ns") => InfoStatement::Ns(false),
t!("DATABASE") => InfoStatement::Db(false),
@ -538,7 +533,7 @@ impl Parser<'_> {
let table = self.next_token_value()?;
InfoStatement::Index(index, table, false)
}
x => unexpected!(self, x, "an info target"),
_ => unexpected!(self, next, "an info target"),
};
if self.peek_kind() == t!("STRUCTURE") {
@ -553,10 +548,11 @@ impl Parser<'_> {
/// # Parser State
/// Expects `KILL` to already be consumed.
pub(crate) fn parse_kill_stmt(&mut self) -> ParseResult<KillStatement> {
let id = match self.peek_kind() {
let peek = self.peek();
let id = match peek.kind {
t!("u\"") | t!("u'") => self.next_token_value().map(Value::Uuid)?,
t!("$param") => self.next_token_value().map(Value::Param)?,
x => unexpected!(self, x, "a UUID or a parameter"),
_ => unexpected!(self, peek, "a UUID or a parameter"),
};
Ok(KillStatement {
id,
@ -595,10 +591,11 @@ impl Parser<'_> {
pub(crate) fn parse_option_stmt(&mut self) -> ParseResult<OptionStatement> {
let name = self.next_token_value()?;
let what = if self.eat(t!("=")) {
match self.next().kind {
let next = self.next();
match next.kind {
t!("true") => true,
t!("false") => false,
x => unexpected!(self, x, "either 'true' or 'false'"),
_ => unexpected!(self, next, "either 'true' or 'false'"),
}
} else {
true
@ -610,7 +607,8 @@ impl Parser<'_> {
}
pub fn parse_rebuild_stmt(&mut self) -> ParseResult<RebuildStatement> {
let res = match self.next().kind {
let next = self.next();
let res = match next.kind {
t!("INDEX") => {
let if_exists = if self.eat(t!("IF")) {
expected!(self, t!("EXISTS"));
@ -629,7 +627,7 @@ impl Parser<'_> {
if_exists,
})
}
x => unexpected!(self, x, "a rebuild statement keyword"),
_ => unexpected!(self, next, "a rebuild statement keyword"),
};
Ok(res)
}
@ -683,13 +681,14 @@ impl Parser<'_> {
expected!(self, t!("CHANGES"));
expected!(self, t!("FOR"));
let table = match self.next().kind {
let next = self.next();
let table = match next.kind {
t!("TABLE") => {
let table = self.next_token_value()?;
Some(table)
}
t!("DATABASE") => None,
x => unexpected!(self, x, "`TABLE` or `DATABASE`"),
_ => unexpected!(self, next, "`TABLE` or `DATABASE`"),
};
expected!(self, t!("SINCE"));
@ -700,7 +699,7 @@ impl Parser<'_> {
ShowSince::Versionstamp(self.next_token_value()?)
}
t!("d\"") | t!("d'") => ShowSince::Timestamp(self.next_token_value()?),
x => unexpected!(self, x, "a version stamp or a date-time"),
_ => unexpected!(self, next, "a version stamp or a date-time"),
};
let limit = self.eat(t!("LIMIT")).then(|| self.next_token_value()).transpose()?;

View file

@ -3,6 +3,7 @@
use reblessive::Stk;
use crate::sql::Fetch;
use crate::syn::error::bail;
use crate::{
sql::{
changefeed::ChangeFeed,
@ -12,14 +13,19 @@ use crate::{
},
syn::{
parser::{
error::MissingKind,
mac::{expected, unexpected},
ParseError, ParseErrorKind, ParseResult, Parser,
ParseResult, Parser,
},
token::{t, DistanceKind, Span, TokenKind, VectorTypeKind},
},
};
pub(crate) enum MissingKind {
Split,
Order,
Group,
}
impl Parser<'_> {
/// Parses a data production if the next token is a data keyword.
/// Otherwise returns None
@ -153,7 +159,7 @@ impl Parser<'_> {
Ok(Some(Cond(v)))
}
pub fn check_idiom<'a>(
pub(crate) fn check_idiom<'a>(
kind: MissingKind,
fields: &'a Fields,
field_span: Span,
@ -193,16 +199,33 @@ impl Parser<'_> {
}
}
found.ok_or_else(|| {
ParseError::new(
ParseErrorKind::MissingField {
field: field_span,
idiom: idiom.to_string(),
kind,
},
idiom_span,
)
})
let Some(found) = found else {
match kind {
MissingKind::Split => {
bail!(
"Missing split idiom `{idiom}` in statement selection",
@idiom_span,
@field_span => "Idiom missing here",
)
}
MissingKind::Order => {
bail!(
"Missing order idiom `{idiom}` in statement selection",
@idiom_span,
@field_span => "Idiom missing here",
)
}
MissingKind::Group => {
bail!(
"Missing group idiom `{idiom}` in statement selection",
@idiom_span,
@field_span => "Idiom missing here",
)
}
};
};
Ok(found)
}
pub async fn try_parse_group(
@ -253,7 +276,8 @@ impl Parser<'_> {
stk: &mut Stk,
permissive: bool,
) -> ParseResult<Permissions> {
match self.next().kind {
let next = self.next();
match next.kind {
t!("NONE") => Ok(Permissions::none()),
t!("FULL") => Ok(Permissions::full()),
t!("FOR") => {
@ -270,7 +294,7 @@ impl Parser<'_> {
}
Ok(permission)
}
x => unexpected!(self, x, "'NONE', 'FULL' or 'FOR'"),
_ => unexpected!(self, next, "'NONE', 'FULL' or 'FOR'"),
}
}
@ -291,7 +315,8 @@ impl Parser<'_> {
let mut delete = false;
loop {
match self.next().kind {
let next = self.next();
match next.kind {
t!("SELECT") => {
select = true;
}
@ -304,7 +329,7 @@ impl Parser<'_> {
t!("DELETE") => {
delete = true;
}
x => unexpected!(self, x, "'SELECT', 'CREATE', 'UPDATE' or 'DELETE'"),
_ => unexpected!(self, next, "'SELECT', 'CREATE', 'UPDATE' or 'DELETE'"),
}
if !self.eat(t!(",")) {
break;
@ -334,11 +359,12 @@ impl Parser<'_> {
///
/// Expects the parser to just have eaten either `SELECT`, `CREATE`, `UPDATE` or `DELETE`.
pub async fn parse_permission_value(&mut self, stk: &mut Stk) -> ParseResult<Permission> {
match self.next().kind {
let next = self.next();
match next.kind {
t!("NONE") => Ok(Permission::None),
t!("FULL") => Ok(Permission::Full),
t!("WHERE") => Ok(Permission::Specific(self.parse_value_field(stk).await?)),
x => unexpected!(self, x, "'NONE', 'FULL', or 'WHERE'"),
_ => unexpected!(self, next, "'NONE', 'FULL', or 'WHERE'"),
}
}
@ -350,22 +376,23 @@ impl Parser<'_> {
/// # Parser state
/// Expects the next keyword to be a base.
pub fn parse_base(&mut self, scope_allowed: bool) -> ParseResult<Base> {
match self.next().kind {
let next = self.next();
match next.kind {
t!("NAMESPACE") | t!("ns") => Ok(Base::Ns),
t!("DATABASE") => Ok(Base::Db),
t!("ROOT") => Ok(Base::Root),
t!("SCOPE") => {
if !scope_allowed {
unexpected!(self, t!("SCOPE"), "a scope is not allowed here");
unexpected!(self, next, "a scope is not allowed here");
}
let name = self.next_token_value()?;
Ok(Base::Sc(name))
}
x => {
_ => {
if scope_allowed {
unexpected!(self, x, "'NAMEPSPACE', 'DATABASE', 'ROOT' or 'SCOPE'")
unexpected!(self, next, "'NAMEPSPACE', 'DATABASE', 'ROOT' or 'SCOPE'")
} else {
unexpected!(self, x, "'NAMEPSPACE', 'DATABASE' or 'ROOT'")
unexpected!(self, next, "'NAMEPSPACE', 'DATABASE' or 'ROOT'")
}
}
}
@ -436,14 +463,16 @@ impl Parser<'_> {
}
pub fn parse_distance(&mut self) -> ParseResult<Distance> {
match self.next().kind {
let next = self.next();
match next.kind {
TokenKind::Distance(k) => self.convert_distance(&k),
x => unexpected!(self, x, "a distance measure"),
_ => unexpected!(self, next, "a distance measure"),
}
}
pub fn parse_vector_type(&mut self) -> ParseResult<VectorType> {
match self.next().kind {
let next = self.next();
match next.kind {
TokenKind::VectorType(x) => Ok(match x {
VectorTypeKind::F64 => VectorType::F64,
VectorTypeKind::F32 => VectorType::F32,
@ -451,7 +480,7 @@ impl Parser<'_> {
VectorTypeKind::I32 => VectorType::I32,
VectorTypeKind::I16 => VectorType::I16,
}),
x => unexpected!(self, x, "a vector type"),
_ => unexpected!(self, next, "a vector type"),
}
}

View file

@ -36,10 +36,11 @@ impl Parser<'_> {
pub async fn parse_relation(&mut self, stk: &mut Stk) -> ParseResult<(Value, Value, Value)> {
let first = self.parse_relate_value(stk).await?;
let is_o = match self.next().kind {
let next = self.next();
let is_o = match next.kind {
t!("->") => true,
t!("<-") => false,
x => unexpected!(self, x, "a relation arrow"),
_ => unexpected!(self, next, "a relation arrow"),
};
let kind = self.parse_relate_kind(stk).await?;
if is_o {

View file

@ -21,7 +21,8 @@ use crate::{
impl Parser<'_> {
pub async fn parse_remove_stmt(&mut self, ctx: &mut Stk) -> ParseResult<RemoveStatement> {
let res = match self.next().kind {
let next = self.next();
let res = match next.kind {
t!("NAMESPACE") | t!("ns") => {
let if_exists = if self.eat(t!("IF")) {
expected!(self, t!("EXISTS"));
@ -198,7 +199,7 @@ impl Parser<'_> {
if_exists,
})
}
x => unexpected!(self, x, "a remove statement keyword"),
_ => unexpected!(self, next, "a remove statement keyword"),
};
Ok(res)
}

View file

@ -7,7 +7,6 @@ use crate::{
},
syn::{
parser::{
error::MissingKind,
mac::{expected, unexpected},
ParseResult, Parser,
},
@ -15,6 +14,8 @@ use crate::{
},
};
use super::parts::MissingKind;
impl Parser<'_> {
pub(crate) async fn parse_select_stmt(
&mut self,
@ -86,7 +87,8 @@ impl Parser<'_> {
if !self.eat(t!("WITH")) {
return Ok(None);
}
let with = match self.next().kind {
let next = self.next();
let with = match next.kind {
t!("NOINDEX") => With::NoIndex,
t!("NO") => {
expected!(self, t!("INDEX"));
@ -99,7 +101,7 @@ impl Parser<'_> {
}
With::Index(index)
}
x => unexpected!(self, x, "`NO`, `NOINDEX` or `INDEX`"),
_ => unexpected!(self, next, "`NO`, `NOINDEX` or `INDEX`"),
};
Ok(Some(with))
}

View file

@ -739,28 +739,41 @@ fn test_streaming() {
let mut parser = Parser::new(&[]);
let mut stack = Stack::new();
for i in 0..source_bytes.len() {
for i in 0..(source_bytes.len() - 1) {
let partial_source = &source_bytes[source_start..i];
//let src = String::from_utf8_lossy(partial_source);
//println!("{}:{}", i, src);
parser = parser.change_source(partial_source);
parser.reset();
match stack.enter(|stk| parser.parse_partial_statement(stk)).finish() {
PartialResult::Pending {
..
} => {
continue;
}
PartialResult::Ready {
PartialResult::MoreData => continue,
PartialResult::Ok {
value,
used,
} => {
//println!("USED: {}", used);
let value = value.unwrap();
assert_eq!(value, expected[current_stmt]);
current_stmt += 1;
source_start += used;
}
PartialResult::Err {
err,
..
} => {
panic!("Streaming test returned an error: {}", err.render_on_bytes(partial_source))
}
}
}
let partial_source = &source_bytes[source_start..];
parser = parser.change_source(partial_source);
parser.reset();
match stack.enter(|stk| parser.parse_stmt(stk)).finish() {
Ok(value) => {
assert_eq!(value, expected[current_stmt]);
current_stmt += 1;
}
Err(e) => {
panic!("Streaming test returned an error: {}", e.render_on_bytes(partial_source))
}
}

View file

@ -4,13 +4,11 @@ use super::{ParseResult, Parser};
use crate::{
sql::{
id::{range::IdRange, Gen},
Id, Ident, Range, Thing, Value,
Id, Ident, Param, Range, Thing, Value,
},
syn::{
parser::{
mac::{expected, expected_whitespace, unexpected},
ParseError, ParseErrorKind,
},
error::bail,
parser::mac::{expected, expected_whitespace, unexpected},
token::{t, TokenKind},
},
};
@ -102,23 +100,19 @@ impl Parser<'_> {
} else {
let id = match beg {
Bound::Unbounded => {
if self.peek_whitespace().kind == t!("$param") {
return Err(ParseError::new(
ParseErrorKind::UnexpectedExplain {
found: t!("$param"),
expected: "a record-id id",
explain: "you can create a record-id from a param with the function 'type::thing'",
},
self.recent_span(),
));
let token = self.peek_whitespace();
if token.kind == t!("$param") {
let param = self.next_token_value::<Param>()?;
bail!("Unexpected token `$param` expected a record-id key",
@token.span => "Record-id's can be create from a param with `type::thing(\"{}\",{})`", ident,param);
}
// we haven't matched anythong so far so we still want any type of id.
unexpected!(self, self.peek_whitespace().kind, "a record-id id")
// we haven't matched anything so far so we still want any type of id.
unexpected!(self, token, "a record-id key")
}
Bound::Excluded(_) => {
// we have matched a bounded id but we don't see an range operator.
unexpected!(self, self.peek_whitespace().kind, "the range operator `..`")
unexpected!(self, self.peek_whitespace(), "the range operator `..`")
}
// We previously converted the `Id` value to `Value` so it's safe to unwrap here.
Bound::Included(v) => v,
@ -209,7 +203,7 @@ impl Parser<'_> {
let digits_token = self.peek_whitespace();
match digits_token.kind {
TokenKind::Digits => {}
x => unexpected!(self, x, "an integer"),
_ => unexpected!(self, digits_token, "an integer"),
}
let next = self.peek_whitespace();
@ -217,17 +211,17 @@ impl Parser<'_> {
t!(".") | TokenKind::Exponent | TokenKind::NumberSuffix(_) => {
// TODO(delskayn) explain that record-id's cant have matissas,
// exponents or a number suffix
unexpected!(self, next.kind, "an integer");
unexpected!(self, next, "an integer");
}
x if Self::tokenkind_continues_ident(x) => {
let span = token.span.covers(next.span);
unexpected!(@span, self, x, "an integer");
bail!("Unexpected token `{x}` expected an integer", @span);
}
// allowed
_ => {}
}
let digits_str = self.span_str(digits_token.span);
let digits_str = self.lexer.span_str(digits_token.span);
if let Ok(number) = digits_str.parse() {
Ok(Id::Number(number))
} else {
@ -240,7 +234,7 @@ impl Parser<'_> {
let digits_token = self.peek_whitespace();
match digits_token.kind {
TokenKind::Digits => {}
x => unexpected!(self, x, "an integer"),
_ => unexpected!(self, digits_token, "an integer"),
}
let next = self.peek_whitespace();
@ -248,17 +242,17 @@ impl Parser<'_> {
t!(".") | TokenKind::Exponent | TokenKind::NumberSuffix(_) => {
// TODO(delskayn) explain that record-id's cant have matissas,
// exponents or a number suffix
unexpected!(self, next.kind, "an integer");
unexpected!(self, next, "an integer");
}
x if Self::tokenkind_continues_ident(x) => {
let span = token.span.covers(next.span);
unexpected!(@span, self, x, "an integer");
bail!("Unexpected token `{x}` expected an integer", @span);
}
// allowed
_ => {}
}
let digits_str = self.span_str(digits_token.span);
let digits_str = self.lexer.span_str(digits_token.span);
if let Ok(number) = digits_str.parse::<u64>() {
// Parse to u64 and check if the value is equal to `-i64::MIN` via u64 as
// `-i64::MIN` doesn't fit in an i64
@ -280,13 +274,13 @@ impl Parser<'_> {
self.pop_peek();
return Ok(Id::String(self.lexer.string.take().unwrap()));
} else {
unexpected!(self, glued.kind, "a record-id id")
unexpected!(self, glued, "a record-id id")
}
}
self.pop_peek();
let digits_str = self.span_str(token.span);
let digits_str = self.lexer.span_str(token.span);
if let Ok(number) = digits_str.parse::<i64>() {
Ok(Id::Number(number))
} else {
@ -297,7 +291,7 @@ impl Parser<'_> {
self.lexer.duration = None;
let slice = self.lexer.reader.span(token.span);
if slice.iter().any(|x| *x > 0b0111_1111) {
unexpected!(self, token.kind, "a identifier");
unexpected!(self, token, "a identifier");
}
// Should be valid utf-8 as it was already parsed by the lexer
let text = String::from_utf8(slice.to_vec()).unwrap();

View file

@ -5,7 +5,8 @@ use crate::{
SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE, SECONDS_PER_WEEK, SECONDS_PER_YEAR,
},
syn::{
parser::{mac::unexpected, ParseError, ParseErrorKind, ParseResult, Parser},
error::{bail, error},
parser::{mac::unexpected, ParseResult, Parser},
token::{t, DurationSuffix, NumberKind, NumberSuffix, Token, TokenKind},
},
};
@ -64,11 +65,6 @@ impl Parser<'_> {
)
}
/// Returns if the peeked token can be a identifier.
pub fn peek_can_start_ident(&mut self) -> bool {
Self::tokenkind_can_start_ident(self.peek_kind())
}
/// Returns if the peeked token can be a identifier.
pub fn peek_continues_ident(&mut self) -> bool {
Self::tokenkind_can_start_ident(self.peek_kind())
@ -94,7 +90,7 @@ impl Parser<'_> {
self.pop_peek();
let t = self.lexer.relex_strand(token);
let TokenKind::Strand = t.kind else {
unexpected!(self, t.kind, "a strand")
unexpected!(self, t, "a strand")
};
self.prepend_token(t);
Ok(t)
@ -118,21 +114,21 @@ impl Parser<'_> {
TokenKind::Exponent | TokenKind::NumberSuffix(_) => {
self.pop_peek();
self.span_str(start.span).to_owned()
self.lexer.span_str(start.span).to_owned()
}
TokenKind::Digits if flexible => {
self.pop_peek();
self.span_str(start.span).to_owned()
self.lexer.span_str(start.span).to_owned()
}
TokenKind::DurationSuffix(x) if x.can_be_ident() => {
self.pop_peek();
self.span_str(start.span).to_owned()
self.lexer.span_str(start.span).to_owned()
}
TokenKind::DatetimeChars(_) | TokenKind::VectorType(_) => {
self.pop_peek();
self.span_str(start.span).to_owned()
self.lexer.span_str(start.span).to_owned()
}
_ => return Ok(start),
};
@ -166,7 +162,7 @@ impl Parser<'_> {
| TokenKind::VectorType(_)
| TokenKind::NumberSuffix(_) => {
self.pop_peek();
let str = self.span_str(p.span);
let str = self.lexer.span_str(p.span);
token_buffer.push_str(str);
prev = p;
@ -176,7 +172,7 @@ impl Parser<'_> {
// These tokens might have some more parts following them
TokenKind::Exponent | TokenKind::DatetimeChars(_) | TokenKind::Digits => {
self.pop_peek();
let str = self.span_str(p.span);
let str = self.lexer.span_str(p.span);
token_buffer.push_str(str);
prev = p;
@ -184,7 +180,7 @@ impl Parser<'_> {
TokenKind::DurationSuffix(suffix) => {
self.pop_peek();
if !suffix.can_be_ident() {
return Err(ParseError::new(ParseErrorKind::InvalidIdent, p.span));
bail!("Invalid identifier containing non-ascii characters", @p.span);
}
token_buffer.push_str(suffix.as_str());
prev = p;
@ -237,7 +233,7 @@ impl Parser<'_> {
let n = self.peek_whitespace();
if n.kind != TokenKind::Digits {
unexpected!(self, start.kind, "a number")
unexpected!(self, start, "a number")
}
self.pop_peek();
@ -263,7 +259,7 @@ impl Parser<'_> {
self.pop_peek();
let next = self.peek_whitespace();
if next.kind != TokenKind::Digits {
unexpected!(self, next.kind, "digits after the dot");
unexpected!(self, next, "digits after the dot");
}
self.pop_peek();
kind = NumberKind::Float;
@ -278,11 +274,11 @@ impl Parser<'_> {
self.pop_peek();
let exponent_token = self.peek_whitespace();
if exponent_token.kind != TokenKind::Digits {
unexpected!(self, exponent_token.kind, "digits after the exponent")
unexpected!(self, exponent_token, "digits after the exponent")
}
}
TokenKind::Digits => {}
x => unexpected!(self, x, "digits after the exponent"),
_ => unexpected!(self, exponent_token, "digits after the exponent"),
}
self.pop_peek();
kind = NumberKind::Float;
@ -305,7 +301,7 @@ impl Parser<'_> {
// Check that no ident-like identifiers follow
let next = self.peek_whitespace();
if Self::tokenkind_continues_ident(next.kind) {
unexpected!(self, next.kind, "number to end")
unexpected!(self, next, "number to end")
}
let token = Token {
@ -340,16 +336,15 @@ impl Parser<'_> {
let suffix = match p.kind {
TokenKind::DurationSuffix(x) => x,
x => unexpected!(self, x, "a duration suffix"),
_ => unexpected!(self, p, "a duration suffix"),
};
self.pop_peek();
let digits_str = self.span_str(cur.span);
let digits_str = self.lexer.span_str(cur.span);
let digits_value: u64 = digits_str
.parse()
.map_err(ParseErrorKind::InvalidInteger)
.map_err(|e| ParseError::new(e, p.span))?;
.map_err(|e| error!("Failed to parse duration digits: {e}",@cur.span))?;
let addition = match suffix {
DurationSuffix::Nano => StdDuration::from_nanos(digits_value),
@ -362,35 +357,35 @@ impl Parser<'_> {
let minutes =
digits_value.checked_mul(SECONDS_PER_MINUTE).ok_or_else(|| {
let span = start.span.covers(p.span);
ParseError::new(ParseErrorKind::DurationOverflow, span)
error!("Invalid duration, value overflowed maximum allowed value", @span)
})?;
StdDuration::from_secs(minutes)
}
DurationSuffix::Hour => {
let hours = digits_value.checked_mul(SECONDS_PER_HOUR).ok_or_else(|| {
let span = start.span.covers(p.span);
ParseError::new(ParseErrorKind::DurationOverflow, span)
error!("Invalid duration, value overflowed maximum allowed value", @span)
})?;
StdDuration::from_secs(hours)
}
DurationSuffix::Day => {
let days = digits_value.checked_mul(SECONDS_PER_DAY).ok_or_else(|| {
let span = start.span.covers(p.span);
ParseError::new(ParseErrorKind::DurationOverflow, span)
error!("Invalid duration, value overflowed maximum allowed value", @span)
})?;
StdDuration::from_secs(days)
}
DurationSuffix::Week => {
let weeks = digits_value.checked_mul(SECONDS_PER_WEEK).ok_or_else(|| {
let span = start.span.covers(p.span);
ParseError::new(ParseErrorKind::DurationOverflow, span)
error!("Invalid duration, value overflowed maximum allowed value", @span)
})?;
StdDuration::from_secs(weeks)
}
DurationSuffix::Year => {
let years = digits_value.checked_mul(SECONDS_PER_YEAR).ok_or_else(|| {
let span = start.span.covers(p.span);
ParseError::new(ParseErrorKind::DurationOverflow, span)
error!("Invalid duration, value overflowed maximum allowed value", @span)
})?;
StdDuration::from_secs(years)
}
@ -398,7 +393,7 @@ impl Parser<'_> {
duration = duration.checked_add(addition).ok_or_else(|| {
let span = start.span.covers(p.span);
ParseError::new(ParseErrorKind::DurationOverflow, span)
error!("Invalid duration, value overflowed maximum allowed value", @span)
})?;
match self.peek_whitespace().kind {
@ -407,7 +402,7 @@ impl Parser<'_> {
}
x if Parser::tokenkind_continues_ident(x) => {
let span = start.span.covers(p.span);
unexpected!(@span, self, x, "a duration")
bail!("Invalid token, expected duration, but token contained invalid characters", @span)
}
_ => break,
}
@ -442,7 +437,7 @@ impl Parser<'_> {
let digits_token = self.peek_whitespace();
if TokenKind::Digits != digits_token.kind {
let span = start.span.covers(digits_token.span);
unexpected!(@span, self,digits_token.kind, "a floating point number")
bail!("Unexpected token `{}` expected a floating point number",digits_token.kind,@span);
}
self.pop_peek();
}
@ -465,7 +460,7 @@ impl Parser<'_> {
self.pop_peek();
let digits_token = self.peek_whitespace();
if TokenKind::Digits != digits_token.kind {
unexpected!(self, digits_token.kind, "a floating point number")
unexpected!(self, digits_token, "a floating point number")
}
self.pop_peek();
};
@ -481,26 +476,27 @@ impl Parser<'_> {
}
if TokenKind::Digits != digits_token.kind {
unexpected!(self, digits_token.kind, "a floating point number")
unexpected!(self, digits_token, "a floating point number")
}
self.pop_peek();
}
// check for exponent
if let TokenKind::NumberSuffix(suffix) = self.peek_whitespace().kind {
let token = self.peek_whitespace();
if let TokenKind::NumberSuffix(suffix) = token.kind {
match suffix {
NumberSuffix::Float => {
self.pop_peek();
}
NumberSuffix::Decimal => {
unexpected!(self, t!("dec"), "a floating point number")
unexpected!(self, token, "a floating point number")
}
}
}
let t = self.peek_whitespace();
if Self::tokenkind_continues_ident(t.kind) {
unexpected!(self, t.kind, "a floating point number to end")
unexpected!(self, t, "a floating point number to end")
}
let span = start.span.covers(self.last_span());

View file

@ -1,6 +1,5 @@
use crate::syn::token::Token;
#[non_exhaustive]
pub struct TokenBuffer<const S: usize> {
buffer: [Token; S],
write: u8,

View file

@ -1,6 +1,6 @@
//! Module specifying the token representation of the parser.
use std::hash::Hash;
use std::{fmt, hash::Hash};
mod keyword;
pub(crate) use keyword::keyword_t;
@ -440,6 +440,12 @@ pub enum TokenKind {
NaN,
}
impl fmt::Display for TokenKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
/// An assertion statically checking that the size of Tokenkind remains two bytes
const _TOKEN_KIND_SIZE_ASSERT: [(); 2] = [(); std::mem::size_of::<TokenKind>()];
@ -448,19 +454,6 @@ impl TokenKind {
matches!(self, TokenKind::Identifier | TokenKind::Duration)
}
pub fn can_be_identifier(&self) -> bool {
matches!(
self,
TokenKind::Identifier
| TokenKind::Keyword(_)
| TokenKind::Language(_)
| TokenKind::Algorithm(_)
| TokenKind::DatetimeChars(_)
| TokenKind::VectorType(_)
| TokenKind::Distance(_),
)
}
fn algorithm_as_str(alg: Algorithm) -> &'static str {
match alg {
Algorithm::EdDSA => "EDDSA",
@ -533,7 +526,6 @@ impl TokenKind {
}
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
#[non_exhaustive]
pub struct Token {
pub kind: TokenKind,
pub span: Span,
@ -565,3 +557,13 @@ impl Token {
self.span.follows_from(&other.span)
}
}
/// A token which is mad up of more complex inner parts.
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
pub struct CompoundToken<T> {
pub value: T,
pub span: Span,
}
/// A compound token which lexes a javascript function body.
pub struct JavaScript;

View file

@ -88,7 +88,7 @@ async fn main() -> surrealdb::Result<()> {
let students: Vec<StudentClasses> = results.take(0)?;
// Use the result as you see fit. In this case we are simply pretty printing it.
dbg!(students);
println!("Students = {:?}", students);
Ok(())
}