Remove nom, add recursion limits with arrays. ()

Co-authored-by: Tobie Morgan Hitchcock <tobie@surrealdb.com>
This commit is contained in:
Mees Delzenne 2024-05-15 11:12:06 +02:00 committed by GitHub
parent 97ce910832
commit 75f48af276
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 55 additions and 598 deletions

View file

@ -1,131 +0,0 @@
use crate::sql::comment::comment;
use crate::sql::comment::{mightbespace, shouldbespace};
use crate::sql::error::IResult;
use crate::sql::operator::{assigner, binary};
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::character::complete::char;
use nom::character::complete::multispace1;
use nom::combinator::peek;
use nom::combinator::{eof, value};
use nom::sequence::preceded;
pub fn number(i: &str) -> IResult<&str, ()> {
peek(alt((
value((), multispace1), // 1 + 1
value((), binary), // 1+1
value((), assigner), // 1=1
value((), comment), // 1/*comment*/
value((), char(')')), // (1)
value((), char(']')), // a[1]
value((), char('}')), // {k: 1}
value((), char('"')),
value((), char('\'')),
value((), char(';')), // SET a = 1;
value((), char(',')), // [1, 2]
value((), tag("..")), // thing:1..2
value((), eof), // SET a = 1
)))(i)
}
pub fn ident(i: &str) -> IResult<&str, ()> {
peek(alt((
value((), multispace1), // a + 1
value((), binary), // a+1
value((), assigner), // a+=1
value((), comment), // a/*comment*/
value((), char(')')), // (a)
value((), char(']')), // foo[a]
value((), char('}')), // {k: a}
value((), char(';')), // SET k = a;
value((), char(',')), // [a, b]
value((), char('.')), // a.k
value((), char('…')), // a…
value((), char('[')), // a[0]
value((), eof), // SET k = a
)))(i)
}
/// none, false, etc.
pub fn keyword(i: &str) -> IResult<&str, ()> {
peek(alt((
value((), multispace1), // false || true
value((), binary), // false||true
value((), comment), // false/*comment*/
value((), char(')')), // (false)
value((), char(']')), // [WHERE k = false]
value((), char('}')), // {k: false}
value((), char(';')), // SET a = false;
value((), char(',')), // [false, true]
value((), eof), // SET a = false
)))(i)
}
pub fn duration(i: &str) -> IResult<&str, ()> {
peek(alt((
value((), multispace1),
value((), binary),
value((), assigner),
value((), comment),
value((), char(')')),
value((), char(']')),
value((), char('}')),
value((), char(';')),
value((), char(',')),
value((), char('.')),
value((), eof),
)))(i)
}
pub fn field(i: &str) -> IResult<&str, ()> {
peek(alt((
value(
(),
preceded(
shouldbespace,
alt((tag_no_case("FROM"), tag_no_case("TIMEOUT"), tag_no_case("PARALLEL"))),
),
),
value((), char(';')),
value((), eof),
)))(i)
}
pub fn subquery(i: &str) -> IResult<&str, ()> {
peek(alt((
value((), preceded(shouldbespace, tag_no_case("THEN"))),
value((), preceded(shouldbespace, tag_no_case("ELSE"))),
value((), preceded(shouldbespace, tag_no_case("END"))),
|i| {
let (i, _) = mightbespace(i)?;
alt((
value((), eof),
value((), char(';')),
value((), char(',')),
value((), char('}')),
value((), char(')')),
value((), char(']')),
))(i)
},
)))(i)
}
pub fn query(i: &str) -> IResult<&str, ()> {
peek(alt((
value((), preceded(shouldbespace, tag_no_case("THEN"))),
value((), preceded(shouldbespace, tag_no_case("ELSE"))),
value((), preceded(shouldbespace, tag_no_case("END"))),
|i| {
let (i, _) = mightbespace(i)?;
alt((
value((), eof),
value((), char(';')),
value((), char(',')),
value((), char('}')),
value((), char(')')),
value((), char(']')),
))(i)
},
)))(i)
}

View file

@ -2,9 +2,6 @@ use std::{fmt, ops::Range};
use super::common::Location;
mod nom_error;
pub use nom_error::ParseError;
#[derive(Clone, Debug)]
#[non_exhaustive]
pub struct RenderedError {

View file

@ -1,434 +0,0 @@
use crate::syn::{
common::Location,
error::{RenderedError, Snippet},
};
use nom::error::ErrorKind;
use nom::error::FromExternalError;
use nom::error::ParseError as NomParseError;
use std::fmt::Write;
use std::num::ParseFloatError;
use std::num::ParseIntError;
use std::ops::Bound;
use thiserror::Error;
#[derive(Error, Debug, Clone)]
#[non_exhaustive]
pub enum ParseError<I> {
Base(I),
Expected {
tried: I,
expected: &'static str,
},
Explained {
tried: I,
explained: &'static str,
},
ExplainedExpected {
tried: I,
explained: &'static str,
expected: &'static str,
},
MissingDelimiter {
opened: I,
tried: I,
},
ExcessiveDepth(I),
Field(I, String),
Split(I, String),
Order(I, String),
Group(I, String),
Role(I, String),
ParseInt {
tried: I,
error: ParseIntError,
},
ParseFloat {
tried: I,
error: ParseFloatError,
},
ParseDecimal {
tried: I,
error: rust_decimal::Error,
},
ParseRegex {
tried: I,
error: regex::Error,
},
RangeError {
tried: I,
lower: Bound<u32>,
upper: Bound<u32>,
},
InvalidUnicode {
tried: I,
},
InvalidPath {
tried: I,
parent: I,
},
}
impl<I: Clone> ParseError<I> {
/// returns the input value where the parser failed.
pub fn tried(&self) -> I {
let (Self::Base(ref tried)
| Self::Expected {
ref tried,
..
}
| Self::Explained {
ref tried,
..
}
| Self::ExplainedExpected {
ref tried,
..
}
| Self::ExcessiveDepth(ref tried)
| Self::MissingDelimiter {
ref tried,
..
}
| Self::Field(ref tried, _)
| Self::Split(ref tried, _)
| Self::Order(ref tried, _)
| Self::Group(ref tried, _)
| Self::Role(ref tried, _)
| Self::ParseInt {
ref tried,
..
}
| Self::ParseFloat {
ref tried,
..
}
| Self::ParseDecimal {
ref tried,
..
}
| Self::ParseRegex {
ref tried,
..
}
| Self::RangeError {
ref tried,
..
}
| Self::InvalidUnicode {
ref tried,
..
}
| Self::InvalidPath {
ref tried,
..
}) = self;
tried.clone()
}
}
impl ParseError<&str> {
/// Returns the error represented as a pretty printed string formatted on the original source
/// text.
pub fn render_on(&self, input: &str) -> RenderedError {
match self {
ParseError::Base(i) => {
let location = Location::of_in(i, input);
let text = format!(
"Failed to parse query at line {} column {}",
location.line, location.column
);
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::Expected {
tried,
expected,
} => {
let location = Location::of_in(tried, input);
// Writing to a string can't return an error.
let text = format!(
"Failed to parse query at line {} column {} expected {}",
location.line, location.column, expected
);
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::Explained {
tried,
explained,
} => {
let location = Location::of_in(tried, input);
// Writing to a string can't return an error.
let text = format!(
"Failed to parse query at line {} column {}",
location.line, location.column
);
let snippet = Snippet::from_source_location(input, location, Some(*explained));
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::ExplainedExpected {
tried,
expected,
explained,
} => {
let location = Location::of_in(tried, input);
// Writing to a string can't return an error.
let text = format!(
"Failed to parse query at line {} column {} expected {}",
location.line, location.column, expected
);
let snippet = Snippet::from_source_location(input, location, Some(*explained));
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::InvalidPath {
tried,
parent,
} => {
let location = Location::of_in(tried, input);
// Writing to a string can't return an error.
let text = format!(
"Path is not a member of {parent} at line {} column {}",
location.line, location.column
);
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::MissingDelimiter {
tried,
opened,
} => {
let location = Location::of_in(tried, input);
let text = format!(
"Missing closing delimiter at line {} column {}",
location.line, location.column
);
let error_snippet = Snippet::from_source_location(input, location, None);
let location = Location::of_in(opened, input);
let open_snippet = Snippet::from_source_location(
input,
location,
Some("expected this delimiter to be closed"),
);
RenderedError {
text,
snippets: vec![error_snippet, open_snippet],
}
}
ParseError::ExcessiveDepth(tried) => {
let location = Location::of_in(tried, input);
// Writing to a string can't return an error.
let text = format!(
"Exceeded maximum parse depth at line {} column {}",
location.line, location.column
);
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::Field(tried, f) => {
let location = Location::of_in(tried, input);
let text = format!(
"Found '{f}' in SELECT clause at line {} column {}, but field is not an aggregate function, and is not present in GROUP BY expression",
location.line, location.column
);
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::Split(tried, f) => {
let location = Location::of_in(tried, input);
let text = format!(
"Found '{f}' in SPLIT ON clause at line {} column {}, but field is is not present in SELECT expression",
location.line, location.column
);
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::Order(tried, f) => {
let location = Location::of_in(tried, input);
let text = format!(
"Found '{f}' in ORDER BY clause at line {} column {}, but field is is not present in SELECT expression",
location.line, location.column
);
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::Group(tried, f) => {
let location = Location::of_in(tried, input);
let text = format!(
"Found '{f}' in GROUP BY clause at line {} column {}, but field is is not present in SELECT expression",
location.line, location.column
);
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::Role(tried, r) => {
let location = Location::of_in(tried, input);
let text = format!(
"Invalid role '{r}' at line {} column {}.",
location.line, location.column
);
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::ParseInt {
tried,
error,
} => {
let location = Location::of_in(tried, input);
// Writing to a string can't return an error.
let text = format!("Failed to parse '{tried}' as an integer: {error}.");
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::ParseFloat {
tried,
error,
} => {
let location = Location::of_in(tried, input);
// Writing to a string can't return an error.
let text = format!("Failed to parse '{tried}' as a float: {error}.");
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::ParseDecimal {
tried,
error,
} => {
let location = Location::of_in(tried, input);
// Writing to a string can't return an error.
let text = format!("Failed to parse '{tried}' as decimal: {error}.");
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::ParseRegex {
tried,
error,
} => {
let location = Location::of_in(tried, input);
// Writing to a string can't return an error.
let text = format!("Failed to parse '{tried}' as a regex: {error}.");
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::RangeError {
tried,
lower,
upper,
} => {
let location = Location::of_in(tried, input);
let mut text =
format!("Failed to parse '{tried}' as a bounded integer with bounds");
// Writing to a string can't return an error.
match lower {
Bound::Included(x) => write!(&mut text, "[{}", x).unwrap(),
Bound::Excluded(x) => write!(&mut text, "({}", x).unwrap(),
Bound::Unbounded => {}
}
write!(&mut text, "...").unwrap();
match upper {
Bound::Included(x) => write!(&mut text, "{}]", x).unwrap(),
Bound::Excluded(x) => write!(&mut text, "{})", x).unwrap(),
Bound::Unbounded => {}
}
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
ParseError::InvalidUnicode {
tried,
} => {
let location = Location::of_in(tried, input);
let text = "Invalid unicode escape code.".to_string();
let snippet = Snippet::from_source_location(input, location, None);
RenderedError {
text,
snippets: vec![snippet],
}
}
}
}
}
impl<I> FromExternalError<I, ParseIntError> for ParseError<I> {
fn from_external_error(input: I, _kind: ErrorKind, e: ParseIntError) -> Self {
ParseError::ParseInt {
error: e,
tried: input,
}
}
}
impl<I> FromExternalError<I, ParseFloatError> for ParseError<I> {
fn from_external_error(input: I, _kind: ErrorKind, e: ParseFloatError) -> Self {
ParseError::ParseFloat {
error: e,
tried: input,
}
}
}
impl<I> FromExternalError<I, regex::Error> for ParseError<I> {
fn from_external_error(input: I, _kind: ErrorKind, e: regex::Error) -> Self {
ParseError::ParseRegex {
error: e,
tried: input,
}
}
}
impl<I> NomParseError<I> for ParseError<I> {
fn from_error_kind(input: I, _: ErrorKind) -> Self {
Self::Base(input)
}
fn append(_: I, _: ErrorKind, other: Self) -> Self {
other
}
}

View file

@ -3,7 +3,7 @@ use reblessive::Stk;
use super::{ParseResult, Parser};
use crate::{
enter_query_recursion,
enter_object_recursion, enter_query_recursion,
sql::{
Array, Dir, Function, Geometry, Ident, Idiom, Mock, Part, Script, Strand, Subquery, Table,
Value,
@ -303,19 +303,21 @@ impl Parser<'_> {
/// Expects the starting `[` to already be eaten and its span passed as an argument.
pub async fn parse_array(&mut self, ctx: &mut Stk, start: Span) -> ParseResult<Array> {
let mut values = Vec::new();
loop {
if self.eat(t!("]")) {
break;
}
enter_object_recursion!(this = self => {
loop {
if this.eat(t!("]")) {
break;
}
let value = ctx.run(|ctx| self.parse_value_field(ctx)).await?;
values.push(value);
let value = ctx.run(|ctx| this.parse_value_field(ctx)).await?;
values.push(value);
if !self.eat(t!(",")) {
self.expect_closing_delimiter(t!("]"), start)?;
break;
if !this.eat(t!(",")) {
this.expect_closing_delimiter(t!("]"), start)?;
break;
}
}
}
});
Ok(Array(values))
}

View file

@ -45,6 +45,29 @@ fn object_depth() {
.expect_err("recursion limit of 5 didn't trigger on 6 deep object");
}
#[test]
fn array_depth() {
let mut stack = Stack::new();
let source = r#"
RETURN [ [ [ [ [ ] ] ] ] ]
"#;
let mut parser = Parser::new(source.as_bytes()).with_object_recursion_limit(5);
stack
.enter(|stk| parser.parse_query(stk))
.finish()
.expect("recursion limit of 5 couldn't parse 5 deep object");
let source = r#"
RETURN [ [ [ [ [ [ ] ] ] ] ] ]
"#;
let mut parser = Parser::new(source.as_bytes()).with_object_recursion_limit(5);
stack
.enter(|stk| parser.parse_query(stk))
.finish()
.expect_err("recursion limit of 5 didn't trigger on 6 deep object");
}
#[test]
fn object_depth_succeed_then_fail() {
let mut stack = Stack::new();
@ -59,16 +82,16 @@ fn object_depth_succeed_then_fail() {
}
}
};
RETURN {
a: {
b: {
c: {
d: {
}
RETURN {
a: {
b: {
c: {
d: {
}
}
}
};
}
};
"#;
let mut parser = Parser::new(source.as_bytes()).with_object_recursion_limit(5);
@ -89,18 +112,18 @@ fn object_depth_succeed_then_fail() {
}
}
};
RETURN {
a: {
b: {
c: {
d: {
e: {
}
RETURN {
a: {
b: {
c: {
d: {
e: {
}
}
}
}
};
}
};
"#;
let mut parser = Parser::new(source.as_bytes()).with_object_recursion_limit(5);
@ -116,7 +139,7 @@ fn query_depth_subquery() {
let source = r#"
RETURN select (select (select ( select foo from bar ) from bar ) from bar) from bar
"#;
"#;
let mut parser = Parser::new(source.as_bytes()).with_query_recursion_limit(5);
stack
.enter(|stk| parser.parse_query(stk))
@ -125,7 +148,7 @@ fn query_depth_subquery() {
let source = r#"
RETURN select (select (select ( select (select foo from bar) from bar ) from bar ) from bar) from bar
"#;
"#;
let mut parser = Parser::new(source.as_bytes()).with_query_recursion_limit(5);
stack
.enter(|stk| parser.parse_query(stk))
@ -179,7 +202,7 @@ fn query_depth_if() {
let mut stack = Stack::new();
let source = r#"
IF IF IF IF IF true THEN false END { false } { false } { false } { false }
IF IF IF IF IF true THEN false END { false } { false } { false } { false }
"#;
let mut parser = Parser::new(source.as_bytes()).with_query_recursion_limit(5);
stack
@ -188,7 +211,7 @@ fn query_depth_if() {
.expect("recursion limit of 5 couldn't parse 5 deep query");
let source = r#"
IF IF IF IF IF IF true THEN false END { false } { false } { false } { false } { false }
IF IF IF IF IF IF true THEN false END { false } { false } { false } { false } { false }
"#;
let mut parser = Parser::new(source.as_bytes()).with_query_recursion_limit(5);
stack