From e0f357c18a62e8998ad03055cf4c37e1b7342c3d Mon Sep 17 00:00:00 2001 From: Mees Delzenne <DelSkayn@users.noreply.github.com> Date: Wed, 18 Sep 2024 22:00:27 +0200 Subject: [PATCH] Fix error message panic (#4820) --- core/src/sql/value/changed.rs | 2 +- core/src/syn/error/location.rs | 351 +++++++++---------------------- core/src/syn/error/render.rs | 53 +++-- core/src/syn/parser/mod.rs | 8 +- core/src/syn/parser/test/json.rs | 5 + 5 files changed, 147 insertions(+), 272 deletions(-) diff --git a/core/src/sql/value/changed.rs b/core/src/sql/value/changed.rs index b7c1ab05..abec87c9 100644 --- a/core/src/sql/value/changed.rs +++ b/core/src/sql/value/changed.rs @@ -64,7 +64,7 @@ mod tests { fn changed_remove() { let old = Value::parse("{ test: true, other: 'test' }"); let now = Value::parse("{ test: true }"); - let res = Value::parse("{ other: NONE }]"); + let res = Value::parse("{ other: NONE }"); assert_eq!(res, old.changed(&now)); } diff --git a/core/src/syn/error/location.rs b/core/src/syn/error/location.rs index db2893d1..7ee53fa7 100644 --- a/core/src/syn/error/location.rs +++ b/core/src/syn/error/location.rs @@ -12,281 +12,120 @@ pub struct Location { pub column: usize, } +/// Safety: b must be a substring of a. +unsafe fn str_offset(a: &str, b: &str) -> usize { + b.as_ptr().offset_from(a.as_ptr()) as usize +} + impl Location { - /// Returns the location of the start of substring in the larger input string. - /// - /// Assumption: substr must be a subslice of input. - pub fn of_in(substr: &str, input: &str) -> Self { - // Bytes of input before substr. - let offset = (substr.as_ptr() as usize) - .checked_sub(input.as_ptr() as usize) - .expect("tried to find location of substring in unrelated string"); - assert!(offset <= input.len(), "tried to find location of substring in unrelated string"); - // Bytes of input prior to line being iterated. - let mut bytes_prior = 0; - for (line_idx, (line, seperator_len)) in LineIterator::new(input).enumerate() { - let bytes_so_far = bytes_prior + line.len() + seperator_len.unwrap_or(0) as usize; - if bytes_so_far >= offset { - // found line. - let line_offset = offset - bytes_prior; + fn range_of_source_end(source: &str) -> Range<Self> { + let (line, column) = source + .lines() + .enumerate() + .last() + .map(|(idx, line)| { + let idx = idx + 1; + let line_idx = line.chars().count().max(1); + (idx, line_idx) + }) + .unwrap_or((0, 0)); - let column = if line_offset > line.len() { - // error is inside line terminator. - line.chars().count() + 1 - } else { - line[..line_offset].chars().count() - }; - // +1 because line and column are 1 index. - return Self { - line: line_idx + 1, - column: column + 1, - }; - } - bytes_prior = bytes_so_far; + Self { + line, + column, + }..Self { + line, + column: column + 1, } - unreachable!() } - - pub fn of_offset(source: &str, offset: usize) -> Self { - assert!(offset <= source.len(), "tried to find location of substring in unrelated string"); - - if offset == source.len() { - // Eof character - - let (last_line, column) = LineIterator::new(source) - .enumerate() - .last() - .map(|(idx, (l, _))| (idx, l.len())) - .unwrap_or((0, 0)); - return Self { - line: last_line + 1, - column: column + 1, - }; - } - - // Bytes of input prior to line being iterated. - let mut bytes_prior = 0; - for (line_idx, (line, seperator_len)) in LineIterator::new(source).enumerate() { - let bytes_so_far = bytes_prior + line.len() + seperator_len.unwrap_or(0) as usize; - if bytes_so_far >= offset { - // found line. - let line_offset = offset - bytes_prior; - - let column = if line_offset > line.len() { - // error is inside line terminator. - line.chars().count() + 1 - } else { - line[..line_offset].chars().count() - }; - // +1 because line and column are 1 index. - return Self { - line: line_idx + 1, - column: column + 1, - }; - } - bytes_prior = bytes_so_far; - } - unreachable!() - } - - pub fn of_span_start(source: &str, span: Span) -> Self { - // Bytes of input before substr. - - let offset = span.offset as usize; - Self::of_offset(source, offset) - } - - pub fn of_span_end(source: &str, span: Span) -> Self { - // Bytes of input before substr. - let offset = span.offset as usize + span.len as usize; - Self::of_offset(source, offset) - } - pub fn range_of_span(source: &str, span: Span) -> Range<Self> { - if source.len() == span.offset as usize { - // EOF span - let (line_idx, column) = LineIterator::new(source) - .map(|(l, _)| l.len()) - .enumerate() - .last() - .unwrap_or((0, 0)); - - return Self { - line: line_idx + 1, - column: column + 1, - }..Self { - line: line_idx + 1, - column: column + 2, - }; - } - - // Bytes of input before substr. - let offset = span.offset as usize; - let end = offset + span.len as usize; - - if span.len == 0 && source.len() == span.offset as usize { - // EOF span - let (last_line, column) = LineIterator::new(source) - .enumerate() - .last() - .map(|(idx, (l, _))| (idx, l.len())) - .unwrap_or((0, 0)); - return Self { - line: last_line + 1, - column, - }..Self { - line: last_line + 1, - column: column + 1, - }; + if source.len() <= span.offset as usize { + return Self::range_of_source_end(source); } + let mut prev_line = ""; + let mut lines = source.lines().enumerate().peekable(); // Bytes of input prior to line being iteratated. - let mut bytes_prior = 0; - let mut iterator = LineIterator::new(source).enumerate().peekable(); + let start_offset = span.offset as usize; let start = loop { - let Some((line_idx, (line, seperator_offset))) = iterator.peek() else { - panic!("tried to find location of span not belonging to string"); + let Some((line_idx, line)) = lines.peek().copied() else { + // Couldn't find the line, give up and return the last + return Self::range_of_source_end(source); }; - let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize; - if bytes_so_far > offset { - // found line. - let line_offset = offset - bytes_prior; - let column = if line_offset > line.len() { - line.chars().count() + 1 - } else { - line[..line_offset.min(line.len())].chars().count() + // Safety: line originates from source so it is a substring so calling str_offset is + // valid. + let line_offset = unsafe { str_offset(source, line) }; + + if start_offset < line_offset { + // Span is inside the previous line terminator, point to the end of the line. + let len = prev_line.chars().count(); + break Self { + line: line_idx, + column: len + 1, }; - // +1 because line and column are 1 index. - if bytes_so_far >= end { - // end is on the same line, finish immediatly. - let line_offset = end - bytes_prior; - let end_column = line[..line_offset].chars().count(); - return Self { - line: line_idx + 1, - column: column + 1, - }..Self { - line: line_idx + 1, - column: end_column + 1, - }; - } else { - break Self { - line: line_idx + 1, - column: column + 1, - }; - } } - bytes_prior = bytes_so_far; - iterator.next(); + + if (line_offset..(line_offset + line.len())).contains(&start_offset) { + let column_offset = start_offset - line_offset; + let column = line + .char_indices() + .enumerate() + .find(|(_, (char_idx, _))| *char_idx >= column_offset) + .map(|(l, _)| l) + .unwrap_or_else(|| { + // give up, just point to the end. + line.chars().count() + }); + break Self { + line: line_idx + 1, + column: column + 1, + }; + } + + lines.next(); + prev_line = line; }; - loop { - let Some((line_idx, (line, seperator_offset))) = iterator.next() else { - panic!("tried to find location of span not belonging to string"); + let end_offset = span.offset as usize + span.len as usize; + let end = loop { + let Some((line_idx, line)) = lines.peek().copied() else { + // Couldn't find the line, give up and return the last + break Self::range_of_source_end(source).end; }; - let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize; - if bytes_so_far >= end { - let line_offset = end - bytes_prior; - let column = if line_offset > line.len() { - line.chars().count() + 1 - } else { - line[..line_offset.min(line.len())].chars().count() + // Safety: line originates from source so it is a substring so calling str_offset is + // valid. + let line_offset = unsafe { str_offset(source, line) }; + + if end_offset < line_offset { + // Span is inside the previous line terminator, point to the end of the line. + let len = prev_line.chars().count(); + break Self { + line: line_idx, + column: len + 1, }; - return start..Self { + } + + if (line_offset..(line_offset + line.len())).contains(&end_offset) { + let column_offset = end_offset - line_offset; + let column = line + .char_indices() + .enumerate() + .find(|(_, (char_idx, _))| *char_idx >= column_offset) + .map(|(l, _)| l) + .unwrap_or_else(|| { + // give up, just point to the end. + line.chars().count() + }); + break Self { line: line_idx + 1, column: column + 1, }; } - bytes_prior = bytes_so_far; - } - } -} - -struct LineIterator<'a> { - current: &'a str, -} - -impl<'a> LineIterator<'a> { - pub fn new(s: &'a str) -> Self { - LineIterator { - current: s, - } - } -} - -impl<'a> Iterator for LineIterator<'a> { - type Item = (&'a str, Option<u8>); - - fn next(&mut self) -> Option<Self::Item> { - if self.current.is_empty() { - return None; - } - let bytes = self.current.as_bytes(); - for i in 0..bytes.len() { - match bytes[i] { - b'\r' => { - if let Some(b'\n') = bytes.get(i + 1) { - let res = &self.current[..i]; - self.current = &self.current[i + 2..]; - return Some((res, Some(2))); - } - let res = &self.current[..i]; - self.current = &self.current[i + 1..]; - return Some((res, Some(1))); - } - 0xb | 0xC | b'\n' => { - // vertical tab VT and form feed FF. - let res = &self.current[..i]; - self.current = &self.current[i + 1..]; - return Some((res, Some(1))); - } - 0xc2 => { - // next line NEL - if bytes.get(i + 1).copied() != Some(0x85) { - continue; - } - let res = &self.current[..i]; - self.current = &self.current[i + 2..]; - return Some((res, Some(2))); - } - 0xe2 => { - // line separator and paragraph seperator. - if bytes.get(i + 1).copied() != Some(0x80) { - continue; - } - let next_byte = bytes.get(i + 2).copied(); - if next_byte != Some(0xA8) && next_byte != Some(0xA9) { - continue; - } - - // vertical tab VT, next line NEL and form feed FF. - let res = &self.current[..i]; - self.current = &self.current[i + 3..]; - return Some((res, Some(3))); - } - _ => {} - } - } - Some((std::mem::take(&mut self.current), None)) - } -} - -#[cfg(test)] -mod test { - use super::LineIterator; - - #[test] - fn test_line_iterator() { - let lines = "foo\nbar\r\nfoo\rbar\u{000B}foo\u{000C}bar\u{0085}foo\u{2028}bar\u{2029}\n"; - let mut iterator = LineIterator::new(lines); - assert_eq!(iterator.next(), Some(("foo", Some(1)))); - assert_eq!(iterator.next(), Some(("bar", Some(2)))); - assert_eq!(iterator.next(), Some(("foo", Some(1)))); - assert_eq!(iterator.next(), Some(("bar", Some(1)))); - assert_eq!(iterator.next(), Some(("foo", Some(1)))); - assert_eq!(iterator.next(), Some(("bar", Some(2)))); - assert_eq!(iterator.next(), Some(("foo", Some(3)))); - assert_eq!(iterator.next(), Some(("bar", Some(3)))); - assert_eq!(iterator.next(), Some(("", Some(1)))); - assert_eq!(iterator.next(), None); + + lines.next(); + prev_line = line; + }; + + start..end } } diff --git a/core/src/syn/error/render.rs b/core/src/syn/error/render.rs index c2ffd75a..4f1cf799 100644 --- a/core/src/syn/error/render.rs +++ b/core/src/syn/error/render.rs @@ -222,17 +222,26 @@ impl fmt::Display for Snippet { #[cfg(test)] mod test { use super::{RenderedError, Snippet, Truncation}; - use crate::syn::error::{Location, MessageKind}; + use crate::syn::{ + error::{Location, MessageKind}, + token::Span, + }; #[test] fn truncate_whitespace() { let source = "\n\n\n\t $ \t"; let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0; - let error = &source[offset..]; - let location = Location::of_in(error, source); + let location = Location::range_of_span( + source, + Span { + offset: offset as u32, + len: 1, + }, + ); - let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error); + let snippet = + Snippet::from_source_location(source, location.start, None, MessageKind::Error); assert_eq!(snippet.truncation, Truncation::None); assert_eq!(snippet.offset, 0); assert_eq!(snippet.source.as_str(), "$"); @@ -242,11 +251,17 @@ mod test { fn truncate_start() { let source = " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa $ \t"; let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0; - let error = &source[offset..]; - let location = Location::of_in(error, source); + let location = Location::range_of_span( + source, + Span { + offset: offset as u32, + len: 1, + }, + ); - let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error); + let snippet = + Snippet::from_source_location(source, location.start, None, MessageKind::Error); assert_eq!(snippet.truncation, Truncation::Start); assert_eq!(snippet.offset, 10); assert_eq!(snippet.source.as_str(), "aaaaaaaaa $"); @@ -256,11 +271,17 @@ mod test { fn truncate_end() { let source = "\n\n a $ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa \t"; let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0; - let error = &source[offset..]; - let location = Location::of_in(error, source); + let location = Location::range_of_span( + source, + Span { + offset: offset as u32, + len: 1, + }, + ); - let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error); + let snippet = + Snippet::from_source_location(source, location.start, None, MessageKind::Error); assert_eq!(snippet.truncation, Truncation::End); assert_eq!(snippet.offset, 2); assert_eq!( @@ -273,11 +294,17 @@ mod test { fn truncate_both() { let source = "\n\n\n\n aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa $ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa \t"; let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0; - let error = &source[offset..]; - let location = Location::of_in(error, source); + let location = Location::range_of_span( + source, + Span { + offset: offset as u32, + len: 1, + }, + ); - let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error); + let snippet = + Snippet::from_source_location(source, location.start, None, MessageKind::Error); assert_eq!(snippet.truncation, Truncation::Both); assert_eq!(snippet.offset, 10); assert_eq!( diff --git a/core/src/syn/parser/mod.rs b/core/src/syn/parser/mod.rs index 73840ed3..26f49430 100644 --- a/core/src/syn/parser/mod.rs +++ b/core/src/syn/parser/mod.rs @@ -319,8 +319,12 @@ impl<'a> Parser<'a> { self.last_span } - pub fn assert_finished(&self) -> ParseResult<()> { - self.lexer.assert_finished() + pub fn assert_finished(&mut self) -> ParseResult<()> { + let p = self.peek(); + if self.peek().kind != TokenKind::Eof { + bail!("Unexpected token `{}`, expected no more tokens",p.kind, @p.span); + } + Ok(()) } /// Eat the next token if it is of the given kind. diff --git a/core/src/syn/parser/test/json.rs b/core/src/syn/parser/test/json.rs index a464a6d4..3756b12a 100644 --- a/core/src/syn/parser/test/json.rs +++ b/core/src/syn/parser/test/json.rs @@ -5,6 +5,11 @@ fn object_with_negative() { test_parse!(parse_json, r#"{"foo": -1 }"#).unwrap(); } +#[test] +fn object_with_trailing_whitespace() { + test_parse!(parse_json, r#"{"foo": -1 }\n"#).unwrap(); +} + #[test] fn array_with_negative() { test_parse!(parse_json, r#"[-1]"#).unwrap();