From e0f357c18a62e8998ad03055cf4c37e1b7342c3d Mon Sep 17 00:00:00 2001
From: Mees Delzenne <DelSkayn@users.noreply.github.com>
Date: Wed, 18 Sep 2024 22:00:27 +0200
Subject: [PATCH] Fix error message panic (#4820)

---
 core/src/sql/value/changed.rs    |   2 +-
 core/src/syn/error/location.rs   | 351 +++++++++----------------------
 core/src/syn/error/render.rs     |  53 +++--
 core/src/syn/parser/mod.rs       |   8 +-
 core/src/syn/parser/test/json.rs |   5 +
 5 files changed, 147 insertions(+), 272 deletions(-)

diff --git a/core/src/sql/value/changed.rs b/core/src/sql/value/changed.rs
index b7c1ab05..abec87c9 100644
--- a/core/src/sql/value/changed.rs
+++ b/core/src/sql/value/changed.rs
@@ -64,7 +64,7 @@ mod tests {
 	fn changed_remove() {
 		let old = Value::parse("{ test: true, other: 'test' }");
 		let now = Value::parse("{ test: true }");
-		let res = Value::parse("{ other: NONE }]");
+		let res = Value::parse("{ other: NONE }");
 		assert_eq!(res, old.changed(&now));
 	}
 
diff --git a/core/src/syn/error/location.rs b/core/src/syn/error/location.rs
index db2893d1..7ee53fa7 100644
--- a/core/src/syn/error/location.rs
+++ b/core/src/syn/error/location.rs
@@ -12,281 +12,120 @@ pub struct Location {
 	pub column: usize,
 }
 
+/// Safety: b must be a substring of a.
+unsafe fn str_offset(a: &str, b: &str) -> usize {
+	b.as_ptr().offset_from(a.as_ptr()) as usize
+}
+
 impl Location {
-	/// Returns the location of the start of substring in the larger input string.
-	///
-	/// Assumption: substr must be a subslice of input.
-	pub fn of_in(substr: &str, input: &str) -> Self {
-		// Bytes of input before substr.
-		let offset = (substr.as_ptr() as usize)
-			.checked_sub(input.as_ptr() as usize)
-			.expect("tried to find location of substring in unrelated string");
-		assert!(offset <= input.len(), "tried to find location of substring in unrelated string");
-		// Bytes of input prior to line being iterated.
-		let mut bytes_prior = 0;
-		for (line_idx, (line, seperator_len)) in LineIterator::new(input).enumerate() {
-			let bytes_so_far = bytes_prior + line.len() + seperator_len.unwrap_or(0) as usize;
-			if bytes_so_far >= offset {
-				// found line.
-				let line_offset = offset - bytes_prior;
+	fn range_of_source_end(source: &str) -> Range<Self> {
+		let (line, column) = source
+			.lines()
+			.enumerate()
+			.last()
+			.map(|(idx, line)| {
+				let idx = idx + 1;
+				let line_idx = line.chars().count().max(1);
+				(idx, line_idx)
+			})
+			.unwrap_or((0, 0));
 
-				let column = if line_offset > line.len() {
-					// error is inside line terminator.
-					line.chars().count() + 1
-				} else {
-					line[..line_offset].chars().count()
-				};
-				// +1 because line and column are 1 index.
-				return Self {
-					line: line_idx + 1,
-					column: column + 1,
-				};
-			}
-			bytes_prior = bytes_so_far;
+		Self {
+			line,
+			column,
+		}..Self {
+			line,
+			column: column + 1,
 		}
-		unreachable!()
 	}
-
-	pub fn of_offset(source: &str, offset: usize) -> Self {
-		assert!(offset <= source.len(), "tried to find location of substring in unrelated string");
-
-		if offset == source.len() {
-			// Eof character
-
-			let (last_line, column) = LineIterator::new(source)
-				.enumerate()
-				.last()
-				.map(|(idx, (l, _))| (idx, l.len()))
-				.unwrap_or((0, 0));
-			return Self {
-				line: last_line + 1,
-				column: column + 1,
-			};
-		}
-
-		// Bytes of input prior to line being iterated.
-		let mut bytes_prior = 0;
-		for (line_idx, (line, seperator_len)) in LineIterator::new(source).enumerate() {
-			let bytes_so_far = bytes_prior + line.len() + seperator_len.unwrap_or(0) as usize;
-			if bytes_so_far >= offset {
-				// found line.
-				let line_offset = offset - bytes_prior;
-
-				let column = if line_offset > line.len() {
-					// error is inside line terminator.
-					line.chars().count() + 1
-				} else {
-					line[..line_offset].chars().count()
-				};
-				// +1 because line and column are 1 index.
-				return Self {
-					line: line_idx + 1,
-					column: column + 1,
-				};
-			}
-			bytes_prior = bytes_so_far;
-		}
-		unreachable!()
-	}
-
-	pub fn of_span_start(source: &str, span: Span) -> Self {
-		// Bytes of input before substr.
-
-		let offset = span.offset as usize;
-		Self::of_offset(source, offset)
-	}
-
-	pub fn of_span_end(source: &str, span: Span) -> Self {
-		// Bytes of input before substr.
-		let offset = span.offset as usize + span.len as usize;
-		Self::of_offset(source, offset)
-	}
-
 	pub fn range_of_span(source: &str, span: Span) -> Range<Self> {
-		if source.len() == span.offset as usize {
-			// EOF span
-			let (line_idx, column) = LineIterator::new(source)
-				.map(|(l, _)| l.len())
-				.enumerate()
-				.last()
-				.unwrap_or((0, 0));
-
-			return Self {
-				line: line_idx + 1,
-				column: column + 1,
-			}..Self {
-				line: line_idx + 1,
-				column: column + 2,
-			};
-		}
-
-		// Bytes of input before substr.
-		let offset = span.offset as usize;
-		let end = offset + span.len as usize;
-
-		if span.len == 0 && source.len() == span.offset as usize {
-			// EOF span
-			let (last_line, column) = LineIterator::new(source)
-				.enumerate()
-				.last()
-				.map(|(idx, (l, _))| (idx, l.len()))
-				.unwrap_or((0, 0));
-			return Self {
-				line: last_line + 1,
-				column,
-			}..Self {
-				line: last_line + 1,
-				column: column + 1,
-			};
+		if source.len() <= span.offset as usize {
+			return Self::range_of_source_end(source);
 		}
 
+		let mut prev_line = "";
+		let mut lines = source.lines().enumerate().peekable();
 		// Bytes of input prior to line being iteratated.
-		let mut bytes_prior = 0;
-		let mut iterator = LineIterator::new(source).enumerate().peekable();
+		let start_offset = span.offset as usize;
 		let start = loop {
-			let Some((line_idx, (line, seperator_offset))) = iterator.peek() else {
-				panic!("tried to find location of span not belonging to string");
+			let Some((line_idx, line)) = lines.peek().copied() else {
+				// Couldn't find the line, give up and return the last
+				return Self::range_of_source_end(source);
 			};
-			let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize;
-			if bytes_so_far > offset {
-				// found line.
-				let line_offset = offset - bytes_prior;
-				let column = if line_offset > line.len() {
-					line.chars().count() + 1
-				} else {
-					line[..line_offset.min(line.len())].chars().count()
+			// Safety: line originates from source so it is a substring so calling str_offset is
+			// valid.
+			let line_offset = unsafe { str_offset(source, line) };
+
+			if start_offset < line_offset {
+				// Span is inside the previous line terminator, point to the end of the line.
+				let len = prev_line.chars().count();
+				break Self {
+					line: line_idx,
+					column: len + 1,
 				};
-				// +1 because line and column are 1 index.
-				if bytes_so_far >= end {
-					// end is on the same line, finish immediatly.
-					let line_offset = end - bytes_prior;
-					let end_column = line[..line_offset].chars().count();
-					return Self {
-						line: line_idx + 1,
-						column: column + 1,
-					}..Self {
-						line: line_idx + 1,
-						column: end_column + 1,
-					};
-				} else {
-					break Self {
-						line: line_idx + 1,
-						column: column + 1,
-					};
-				}
 			}
-			bytes_prior = bytes_so_far;
-			iterator.next();
+
+			if (line_offset..(line_offset + line.len())).contains(&start_offset) {
+				let column_offset = start_offset - line_offset;
+				let column = line
+					.char_indices()
+					.enumerate()
+					.find(|(_, (char_idx, _))| *char_idx >= column_offset)
+					.map(|(l, _)| l)
+					.unwrap_or_else(|| {
+						// give up, just point to the end.
+						line.chars().count()
+					});
+				break Self {
+					line: line_idx + 1,
+					column: column + 1,
+				};
+			}
+
+			lines.next();
+			prev_line = line;
 		};
 
-		loop {
-			let Some((line_idx, (line, seperator_offset))) = iterator.next() else {
-				panic!("tried to find location of span not belonging to string");
+		let end_offset = span.offset as usize + span.len as usize;
+		let end = loop {
+			let Some((line_idx, line)) = lines.peek().copied() else {
+				// Couldn't find the line, give up and return the last
+				break Self::range_of_source_end(source).end;
 			};
-			let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize;
-			if bytes_so_far >= end {
-				let line_offset = end - bytes_prior;
-				let column = if line_offset > line.len() {
-					line.chars().count() + 1
-				} else {
-					line[..line_offset.min(line.len())].chars().count()
+			// Safety: line originates from source so it is a substring so calling str_offset is
+			// valid.
+			let line_offset = unsafe { str_offset(source, line) };
+
+			if end_offset < line_offset {
+				// Span is inside the previous line terminator, point to the end of the line.
+				let len = prev_line.chars().count();
+				break Self {
+					line: line_idx,
+					column: len + 1,
 				};
-				return start..Self {
+			}
+
+			if (line_offset..(line_offset + line.len())).contains(&end_offset) {
+				let column_offset = end_offset - line_offset;
+				let column = line
+					.char_indices()
+					.enumerate()
+					.find(|(_, (char_idx, _))| *char_idx >= column_offset)
+					.map(|(l, _)| l)
+					.unwrap_or_else(|| {
+						// give up, just point to the end.
+						line.chars().count()
+					});
+				break Self {
 					line: line_idx + 1,
 					column: column + 1,
 				};
 			}
-			bytes_prior = bytes_so_far;
-		}
-	}
-}
-
-struct LineIterator<'a> {
-	current: &'a str,
-}
-
-impl<'a> LineIterator<'a> {
-	pub fn new(s: &'a str) -> Self {
-		LineIterator {
-			current: s,
-		}
-	}
-}
-
-impl<'a> Iterator for LineIterator<'a> {
-	type Item = (&'a str, Option<u8>);
-
-	fn next(&mut self) -> Option<Self::Item> {
-		if self.current.is_empty() {
-			return None;
-		}
-		let bytes = self.current.as_bytes();
-		for i in 0..bytes.len() {
-			match bytes[i] {
-				b'\r' => {
-					if let Some(b'\n') = bytes.get(i + 1) {
-						let res = &self.current[..i];
-						self.current = &self.current[i + 2..];
-						return Some((res, Some(2)));
-					}
-					let res = &self.current[..i];
-					self.current = &self.current[i + 1..];
-					return Some((res, Some(1)));
-				}
-				0xb | 0xC | b'\n' => {
-					// vertical tab VT and form feed FF.
-					let res = &self.current[..i];
-					self.current = &self.current[i + 1..];
-					return Some((res, Some(1)));
-				}
-				0xc2 => {
-					// next line NEL
-					if bytes.get(i + 1).copied() != Some(0x85) {
-						continue;
-					}
-					let res = &self.current[..i];
-					self.current = &self.current[i + 2..];
-					return Some((res, Some(2)));
-				}
-				0xe2 => {
-					// line separator and paragraph seperator.
-					if bytes.get(i + 1).copied() != Some(0x80) {
-						continue;
-					}
-					let next_byte = bytes.get(i + 2).copied();
-					if next_byte != Some(0xA8) && next_byte != Some(0xA9) {
-						continue;
-					}
-
-					// vertical tab VT, next line NEL and form feed FF.
-					let res = &self.current[..i];
-					self.current = &self.current[i + 3..];
-					return Some((res, Some(3)));
-				}
-				_ => {}
-			}
-		}
-		Some((std::mem::take(&mut self.current), None))
-	}
-}
-
-#[cfg(test)]
-mod test {
-	use super::LineIterator;
-
-	#[test]
-	fn test_line_iterator() {
-		let lines = "foo\nbar\r\nfoo\rbar\u{000B}foo\u{000C}bar\u{0085}foo\u{2028}bar\u{2029}\n";
-		let mut iterator = LineIterator::new(lines);
-		assert_eq!(iterator.next(), Some(("foo", Some(1))));
-		assert_eq!(iterator.next(), Some(("bar", Some(2))));
-		assert_eq!(iterator.next(), Some(("foo", Some(1))));
-		assert_eq!(iterator.next(), Some(("bar", Some(1))));
-		assert_eq!(iterator.next(), Some(("foo", Some(1))));
-		assert_eq!(iterator.next(), Some(("bar", Some(2))));
-		assert_eq!(iterator.next(), Some(("foo", Some(3))));
-		assert_eq!(iterator.next(), Some(("bar", Some(3))));
-		assert_eq!(iterator.next(), Some(("", Some(1))));
-		assert_eq!(iterator.next(), None);
+
+			lines.next();
+			prev_line = line;
+		};
+
+		start..end
 	}
 }
diff --git a/core/src/syn/error/render.rs b/core/src/syn/error/render.rs
index c2ffd75a..4f1cf799 100644
--- a/core/src/syn/error/render.rs
+++ b/core/src/syn/error/render.rs
@@ -222,17 +222,26 @@ impl fmt::Display for Snippet {
 #[cfg(test)]
 mod test {
 	use super::{RenderedError, Snippet, Truncation};
-	use crate::syn::error::{Location, MessageKind};
+	use crate::syn::{
+		error::{Location, MessageKind},
+		token::Span,
+	};
 
 	#[test]
 	fn truncate_whitespace() {
 		let source = "\n\n\n\t      $     \t";
 		let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0;
-		let error = &source[offset..];
 
-		let location = Location::of_in(error, source);
+		let location = Location::range_of_span(
+			source,
+			Span {
+				offset: offset as u32,
+				len: 1,
+			},
+		);
 
-		let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error);
+		let snippet =
+			Snippet::from_source_location(source, location.start, None, MessageKind::Error);
 		assert_eq!(snippet.truncation, Truncation::None);
 		assert_eq!(snippet.offset, 0);
 		assert_eq!(snippet.source.as_str(), "$");
@@ -242,11 +251,17 @@ mod test {
 	fn truncate_start() {
 		let source = "     aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa $     \t";
 		let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0;
-		let error = &source[offset..];
 
-		let location = Location::of_in(error, source);
+		let location = Location::range_of_span(
+			source,
+			Span {
+				offset: offset as u32,
+				len: 1,
+			},
+		);
 
-		let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error);
+		let snippet =
+			Snippet::from_source_location(source, location.start, None, MessageKind::Error);
 		assert_eq!(snippet.truncation, Truncation::Start);
 		assert_eq!(snippet.offset, 10);
 		assert_eq!(snippet.source.as_str(), "aaaaaaaaa $");
@@ -256,11 +271,17 @@ mod test {
 	fn truncate_end() {
 		let source = "\n\n  a $ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa    \t";
 		let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0;
-		let error = &source[offset..];
 
-		let location = Location::of_in(error, source);
+		let location = Location::range_of_span(
+			source,
+			Span {
+				offset: offset as u32,
+				len: 1,
+			},
+		);
 
-		let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error);
+		let snippet =
+			Snippet::from_source_location(source, location.start, None, MessageKind::Error);
 		assert_eq!(snippet.truncation, Truncation::End);
 		assert_eq!(snippet.offset, 2);
 		assert_eq!(
@@ -273,11 +294,17 @@ mod test {
 	fn truncate_both() {
 		let source = "\n\n\n\n  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa $ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa   \t";
 		let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0;
-		let error = &source[offset..];
 
-		let location = Location::of_in(error, source);
+		let location = Location::range_of_span(
+			source,
+			Span {
+				offset: offset as u32,
+				len: 1,
+			},
+		);
 
-		let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error);
+		let snippet =
+			Snippet::from_source_location(source, location.start, None, MessageKind::Error);
 		assert_eq!(snippet.truncation, Truncation::Both);
 		assert_eq!(snippet.offset, 10);
 		assert_eq!(
diff --git a/core/src/syn/parser/mod.rs b/core/src/syn/parser/mod.rs
index 73840ed3..26f49430 100644
--- a/core/src/syn/parser/mod.rs
+++ b/core/src/syn/parser/mod.rs
@@ -319,8 +319,12 @@ impl<'a> Parser<'a> {
 		self.last_span
 	}
 
-	pub fn assert_finished(&self) -> ParseResult<()> {
-		self.lexer.assert_finished()
+	pub fn assert_finished(&mut self) -> ParseResult<()> {
+		let p = self.peek();
+		if self.peek().kind != TokenKind::Eof {
+			bail!("Unexpected token `{}`, expected no more tokens",p.kind, @p.span);
+		}
+		Ok(())
 	}
 
 	/// Eat the next token if it is of the given kind.
diff --git a/core/src/syn/parser/test/json.rs b/core/src/syn/parser/test/json.rs
index a464a6d4..3756b12a 100644
--- a/core/src/syn/parser/test/json.rs
+++ b/core/src/syn/parser/test/json.rs
@@ -5,6 +5,11 @@ fn object_with_negative() {
 	test_parse!(parse_json, r#"{"foo": -1 }"#).unwrap();
 }
 
+#[test]
+fn object_with_trailing_whitespace() {
+	test_parse!(parse_json, r#"{"foo": -1 }\n"#).unwrap();
+}
+
 #[test]
 fn array_with_negative() {
 	test_parse!(parse_json, r#"[-1]"#).unwrap();