From 1528da9b9568bbba4d43013048b4d125bffe9bf5 Mon Sep 17 00:00:00 2001 From: Mees Delzenne Date: Mon, 25 Mar 2024 20:14:17 +0100 Subject: [PATCH] Implement limits for parsing depth in the new parser. (#3762) --- core/src/syn/v2/parser/error.rs | 20 +++ core/src/syn/v2/parser/mac.rs | 77 +++++++++++ core/src/syn/v2/parser/mod.rs | 2 +- core/src/syn/v2/parser/object.rs | 13 +- core/src/syn/v2/parser/prime.rs | 37 ++++- core/src/syn/v2/parser/stmt/mod.rs | 13 ++ core/src/syn/v2/parser/test/limit.rs | 198 +++++++++++++++++++++++++++ core/src/syn/v2/parser/test/mod.rs | 1 + 8 files changed, 351 insertions(+), 10 deletions(-) create mode 100644 core/src/syn/v2/parser/test/limit.rs diff --git a/core/src/syn/v2/parser/error.rs b/core/src/syn/v2/parser/error.rs index 6c488462..8d29ec10 100644 --- a/core/src/syn/v2/parser/error.rs +++ b/core/src/syn/v2/parser/error.rs @@ -74,6 +74,8 @@ pub enum ParseErrorKind { idiom: String, kind: MissingKind, }, + ExceededObjectDepthLimit, + ExceededQueryDepthLimit, NoWhitespace, } @@ -246,6 +248,24 @@ impl ParseError { snippets: vec![snippet], } } + ParseErrorKind::ExceededObjectDepthLimit => { + let text = "Parsing exceeded the depth limit for objects"; + let locations = Location::range_of_span(source, at); + let snippet = Snippet::from_source_location_range(source, locations, None); + RenderedError { + text: text.to_string(), + snippets: vec![snippet], + } + } + ParseErrorKind::ExceededQueryDepthLimit => { + let text = "Parsing exceeded the depth limit for queries"; + let locations = Location::range_of_span(source, at); + let snippet = Snippet::from_source_location_range(source, locations, None); + RenderedError { + text: text.to_string(), + snippets: vec![snippet], + } + } ParseErrorKind::MissingField { field, idiom, diff --git a/core/src/syn/v2/parser/mac.rs b/core/src/syn/v2/parser/mac.rs index 0923dbbd..ecba3f94 100644 --- a/core/src/syn/v2/parser/mac.rs +++ b/core/src/syn/v2/parser/mac.rs @@ -74,6 +74,83 @@ macro_rules! test_parse { }}; } +#[macro_export] +macro_rules! enter_object_recursion { + ($name:ident = $this:expr => { $($t:tt)* }) => {{ + if $this.object_recursion == 0 { + return Err($crate::syn::v2::parser::ParseError::new( + $crate::syn::v2::parser::ParseErrorKind::ExceededObjectDepthLimit, + $this.last_span(), + )); + } + struct Dropper<'a, 'b>(&'a mut $crate::syn::v2::parser::Parser<'b>); + impl Drop for Dropper<'_, '_> { + fn drop(&mut self) { + self.0.object_recursion += 1; + } + } + impl<'a> ::std::ops::Deref for Dropper<'_,'a>{ + type Target = $crate::syn::v2::parser::Parser<'a>; + + fn deref(&self) -> &Self::Target{ + self.0 + } + } + + impl<'a> ::std::ops::DerefMut for Dropper<'_,'a>{ + fn deref_mut(&mut self) -> &mut Self::Target{ + self.0 + } + } + + $this.object_recursion -= 1; + let mut $name = Dropper($this); + { + $($t)* + } + }}; +} + +#[macro_export] +macro_rules! enter_query_recursion { + ($name:ident = $this:expr => { $($t:tt)* }) => {{ + + println!("{} = {}",$this.query_recursion, std::backtrace::Backtrace::force_capture()); + if $this.query_recursion == 0 { + return Err($crate::syn::v2::parser::ParseError::new( + $crate::syn::v2::parser::ParseErrorKind::ExceededQueryDepthLimit, + $this.last_span(), + )); + } + struct Dropper<'a, 'b>(&'a mut $crate::syn::v2::parser::Parser<'b>); + impl Drop for Dropper<'_, '_> { + fn drop(&mut self) { + self.0.query_recursion += 1; + } + } + impl<'a> ::std::ops::Deref for Dropper<'_,'a>{ + type Target = $crate::syn::v2::parser::Parser<'a>; + + fn deref(&self) -> &Self::Target{ + self.0 + } + } + + impl<'a> ::std::ops::DerefMut for Dropper<'_,'a>{ + fn deref_mut(&mut self) -> &mut Self::Target{ + self.0 + } + } + + $this.query_recursion -= 1; + #[allow(unused_mut)] + let mut $name = Dropper($this); + { + $($t)* + } + }}; +} + pub(super) use expected; pub(super) use unexpected; diff --git a/core/src/syn/v2/parser/mod.rs b/core/src/syn/v2/parser/mod.rs index 5e81072e..f2227c40 100644 --- a/core/src/syn/v2/parser/mod.rs +++ b/core/src/syn/v2/parser/mod.rs @@ -24,6 +24,7 @@ use crate::{ token::{t, Span, Token, TokenKind}, }, }; +use reblessive::Stk; mod basic; mod builtin; @@ -44,7 +45,6 @@ mod token_buffer; pub mod test; pub use error::{IntErrorKind, ParseError, ParseErrorKind}; -use reblessive::Stk; /// The result returned by most parser function. pub type ParseResult = Result; diff --git a/core/src/syn/v2/parser/object.rs b/core/src/syn/v2/parser/object.rs index a7106116..e076cbc1 100644 --- a/core/src/syn/v2/parser/object.rs +++ b/core/src/syn/v2/parser/object.rs @@ -4,6 +4,7 @@ use geo_types::{LineString, MultiLineString, MultiPoint, MultiPolygon, Point, Po use reblessive::Stk; use crate::{ + enter_object_recursion, sql::{Block, Geometry, Object, Strand, Value}, syn::v2::{ parser::{mac::expected, ParseError, ParseErrorKind, ParseResult, Parser}, @@ -24,12 +25,16 @@ impl Parser<'_> { ) -> ParseResult { if self.eat(t!("}")) { // empty object, just return - return Ok(Value::Object(Object::default())); + enter_object_recursion!(_this = self => { + return Ok(Value::Object(Object::default())); + }) } // Check first if it can be an object. if self.peek_token_at(1).kind == t!(":") { - return self.parse_object_or_geometry(ctx, start).await; + enter_object_recursion!(this = self => { + return this.parse_object_or_geometry(ctx, start).await; + }) } // not an object so instead parse as a block. @@ -582,7 +587,9 @@ impl Parser<'_> { /// # Parser state /// Expects the first `{` to already have been eaten. pub(super) async fn parse_object(&mut self, ctx: &mut Stk, start: Span) -> ParseResult { - self.parse_object_from_map(ctx, BTreeMap::new(), start).await + enter_object_recursion!(this = self => { + this.parse_object_from_map(ctx, BTreeMap::new(), start).await + }) } async fn parse_object_from_map( diff --git a/core/src/syn/v2/parser/prime.rs b/core/src/syn/v2/parser/prime.rs index b865b568..35500085 100644 --- a/core/src/syn/v2/parser/prime.rs +++ b/core/src/syn/v2/parser/prime.rs @@ -3,6 +3,7 @@ use reblessive::Stk; use super::{ParseResult, Parser}; use crate::{ + enter_query_recursion, sql::{ Array, Dir, Function, Geometry, Ident, Idiom, Mock, Part, Script, Strand, Subquery, Table, Value, @@ -221,9 +222,11 @@ impl Parser<'_> { self.parse_mock(token.span).map(Value::Mock)? } t!("IF") => { - self.pop_peek(); - let stmt = ctx.run(|ctx| self.parse_if_stmt(ctx)).await?; - Value::Subquery(Box::new(Subquery::Ifelse(stmt))) + enter_query_recursion!(this = self => { + this.pop_peek(); + let stmt = ctx.run(|ctx| this.parse_if_stmt(ctx)).await?; + Value::Subquery(Box::new(Subquery::Ifelse(stmt))) + }) } t!("(") => { self.pop_peek(); @@ -341,9 +344,11 @@ impl Parser<'_> { self.parse_inner_subquery(ctx, Some(peek.span)).await } t!("IF") => { - self.pop_peek(); - let if_stmt = ctx.run(|ctx| self.parse_if_stmt(ctx)).await?; - Ok(Subquery::Ifelse(if_stmt)) + enter_query_recursion!(this = self => { + this.pop_peek(); + let if_stmt = ctx.run(|ctx| this.parse_if_stmt(ctx)).await?; + Ok(Subquery::Ifelse(if_stmt)) + }) } _ => self.parse_inner_subquery(ctx, None).await, } @@ -353,6 +358,16 @@ impl Parser<'_> { &mut self, ctx: &mut Stk, start: Span, + ) -> ParseResult { + enter_query_recursion!(this = self => { + this.parse_inner_subquery_or_coordinate_inner(ctx,start).await + }) + } + + async fn parse_inner_subquery_or_coordinate_inner( + &mut self, + ctx: &mut Stk, + start: Span, ) -> ParseResult { let peek = self.peek(); let res = match peek.kind { @@ -489,6 +504,16 @@ impl Parser<'_> { &mut self, ctx: &mut Stk, start: Option, + ) -> ParseResult { + enter_query_recursion!(this = self => { + this.parse_inner_subquery_inner(ctx,start).await + }) + } + + async fn parse_inner_subquery_inner( + &mut self, + ctx: &mut Stk, + start: Option, ) -> ParseResult { let peek = self.peek(); let res = match peek.kind { diff --git a/core/src/syn/v2/parser/stmt/mod.rs b/core/src/syn/v2/parser/stmt/mod.rs index 8ef99c10..083f0caa 100644 --- a/core/src/syn/v2/parser/stmt/mod.rs +++ b/core/src/syn/v2/parser/stmt/mod.rs @@ -1,5 +1,6 @@ use reblessive::Stk; +use crate::enter_query_recursion; use crate::sql::block::Entry; use crate::sql::statements::show::{ShowSince, ShowStatement}; use crate::sql::statements::sleep::SleepStatement; @@ -92,6 +93,12 @@ impl Parser<'_> { } pub(super) async fn parse_stmt(&mut self, ctx: &mut Stk) -> ParseResult { + enter_query_recursion!(this = self => { + this.parse_stmt_inner(ctx).await + }) + } + + async fn parse_stmt_inner(&mut self, ctx: &mut Stk) -> ParseResult { let token = self.peek(); match token.kind { t!("ANALYZE") => { @@ -207,6 +214,12 @@ impl Parser<'_> { } pub(super) async fn parse_entry(&mut self, ctx: &mut Stk) -> ParseResult { + enter_query_recursion!(this = self => { + this.parse_entry_inner(ctx).await + }) + } + + async fn parse_entry_inner(&mut self, ctx: &mut Stk) -> ParseResult { let token = self.peek(); match token.kind { t!("BREAK") => { diff --git a/core/src/syn/v2/parser/test/limit.rs b/core/src/syn/v2/parser/test/limit.rs new file mode 100644 index 00000000..3097878a --- /dev/null +++ b/core/src/syn/v2/parser/test/limit.rs @@ -0,0 +1,198 @@ +use reblessive::Stack; + +use crate::syn::v2::parser::Parser; + +#[test] +fn object_depth() { + let mut stack = Stack::new(); + + let source = r#" + RETURN { + a: { + b: { + c: { + d: { + } + } + } + } + } + "#; + let mut parser = Parser::new(source.as_bytes()).with_object_recursion_limit(5); + stack + .enter(|stk| parser.parse_query(stk)) + .finish() + .expect("recursion limit of 5 couldn't parse 5 deep object"); + + let source = r#" + RETURN { + a: { + b: { + c: { + d: { + e: { + } + } + } + } + } + } + "#; + let mut parser = Parser::new(source.as_bytes()).with_object_recursion_limit(5); + stack + .enter(|stk| parser.parse_query(stk)) + .finish() + .expect_err("recursion limit of 5 didn't trigger on 6 deep object"); +} + +#[test] +fn object_depth_succeed_then_fail() { + let mut stack = Stack::new(); + let source = r#" + RETURN { + a: { + b: { + c: { + d: { + } + } + } + } + }; + RETURN { + a: { + b: { + c: { + d: { + } + } + } + } + }; + "#; + + let mut parser = Parser::new(source.as_bytes()).with_object_recursion_limit(5); + stack + .enter(|stk| parser.parse_query(stk)) + .finish() + .expect("recursion limit of 5 couldn't parse 5 deep object"); + + let mut stack = Stack::new(); + let source = r#" + RETURN { + a: { + b: { + c: { + d: { + } + } + } + } + }; + RETURN { + a: { + b: { + c: { + d: { + e: { + } + } + } + } + } + }; + "#; + + let mut parser = Parser::new(source.as_bytes()).with_object_recursion_limit(5); + stack + .enter(|stk| parser.parse_query(stk)) + .finish() + .expect_err("recursion limit of 5 didn't trigger on 6 deep object"); +} + +#[test] +fn query_depth_subquery() { + let mut stack = Stack::new(); + + let source = r#" + RETURN select (select (select ( select foo from bar ) from bar ) from bar) from bar + "#; + let mut parser = Parser::new(source.as_bytes()).with_query_recursion_limit(5); + stack + .enter(|stk| parser.parse_query(stk)) + .finish() + .expect("recursion limit of 5 couldn't parse 5 deep query"); + + let source = r#" + RETURN select (select (select ( select (select foo from bar) from bar ) from bar ) from bar) from bar + "#; + let mut parser = Parser::new(source.as_bytes()).with_query_recursion_limit(5); + stack + .enter(|stk| parser.parse_query(stk)) + .finish() + .expect_err("recursion limit of 5 didn't trigger on 6 deep query"); +} + +#[test] +fn query_depth_block() { + let mut stack = Stack::new(); + + let source = r#" + { + { + { + { + RETURN "foo"; + } + } + } + } + "#; + let mut parser = Parser::new(source.as_bytes()).with_query_recursion_limit(5); + stack + .enter(|stk| parser.parse_query(stk)) + .finish() + .expect("recursion limit of 5 couldn't parse 5 deep query"); + + let source = r#" + { + { + { + { + { + RETURN "foo"; + } + } + } + } + } + "#; + let mut parser = Parser::new(source.as_bytes()).with_query_recursion_limit(5); + stack + .enter(|stk| parser.parse_query(stk)) + .finish() + .expect_err("recursion limit of 5 didn't trigger on 6 deep query"); +} + +#[test] +fn query_depth_if() { + let mut stack = Stack::new(); + + let source = r#" + IF IF IF IF IF true THEN false END { false } { false } { false } { false } + "#; + let mut parser = Parser::new(source.as_bytes()).with_query_recursion_limit(5); + stack + .enter(|stk| parser.parse_query(stk)) + .finish() + .expect("recursion limit of 5 couldn't parse 5 deep query"); + + let source = r#" + IF IF IF IF IF IF true THEN false END { false } { false } { false } { false } { false } + "#; + let mut parser = Parser::new(source.as_bytes()).with_query_recursion_limit(5); + stack + .enter(|stk| parser.parse_query(stk)) + .finish() + .expect_err("recursion limit of 5 didn't trigger on 6 deep query"); +} diff --git a/core/src/syn/v2/parser/test/mod.rs b/core/src/syn/v2/parser/test/mod.rs index 30be2080..fe12a78a 100644 --- a/core/src/syn/v2/parser/test/mod.rs +++ b/core/src/syn/v2/parser/test/mod.rs @@ -1,3 +1,4 @@ +mod limit; mod stmt; mod streaming; mod value;