Fix error message panic ()

This commit is contained in:
Mees Delzenne 2024-09-18 22:00:27 +02:00 committed by GitHub
parent 52aef7ccc4
commit e0f357c18a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 147 additions and 272 deletions
core/src

View file

@ -64,7 +64,7 @@ mod tests {
fn changed_remove() { fn changed_remove() {
let old = Value::parse("{ test: true, other: 'test' }"); let old = Value::parse("{ test: true, other: 'test' }");
let now = Value::parse("{ test: true }"); let now = Value::parse("{ test: true }");
let res = Value::parse("{ other: NONE }]"); let res = Value::parse("{ other: NONE }");
assert_eq!(res, old.changed(&now)); assert_eq!(res, old.changed(&now));
} }

View file

@ -12,281 +12,120 @@ pub struct Location {
pub column: usize, pub column: usize,
} }
/// Safety: b must be a substring of a.
unsafe fn str_offset(a: &str, b: &str) -> usize {
b.as_ptr().offset_from(a.as_ptr()) as usize
}
impl Location { impl Location {
/// Returns the location of the start of substring in the larger input string. fn range_of_source_end(source: &str) -> Range<Self> {
/// let (line, column) = source
/// Assumption: substr must be a subslice of input. .lines()
pub fn of_in(substr: &str, input: &str) -> Self { .enumerate()
// Bytes of input before substr. .last()
let offset = (substr.as_ptr() as usize) .map(|(idx, line)| {
.checked_sub(input.as_ptr() as usize) let idx = idx + 1;
.expect("tried to find location of substring in unrelated string"); let line_idx = line.chars().count().max(1);
assert!(offset <= input.len(), "tried to find location of substring in unrelated string"); (idx, line_idx)
// Bytes of input prior to line being iterated. })
let mut bytes_prior = 0; .unwrap_or((0, 0));
for (line_idx, (line, seperator_len)) in LineIterator::new(input).enumerate() {
let bytes_so_far = bytes_prior + line.len() + seperator_len.unwrap_or(0) as usize;
if bytes_so_far >= offset {
// found line.
let line_offset = offset - bytes_prior;
let column = if line_offset > line.len() { Self {
// error is inside line terminator. line,
line.chars().count() + 1 column,
} else { }..Self {
line[..line_offset].chars().count() line,
}; column: column + 1,
// +1 because line and column are 1 index.
return Self {
line: line_idx + 1,
column: column + 1,
};
}
bytes_prior = bytes_so_far;
} }
unreachable!()
} }
pub fn of_offset(source: &str, offset: usize) -> Self {
assert!(offset <= source.len(), "tried to find location of substring in unrelated string");
if offset == source.len() {
// Eof character
let (last_line, column) = LineIterator::new(source)
.enumerate()
.last()
.map(|(idx, (l, _))| (idx, l.len()))
.unwrap_or((0, 0));
return Self {
line: last_line + 1,
column: column + 1,
};
}
// Bytes of input prior to line being iterated.
let mut bytes_prior = 0;
for (line_idx, (line, seperator_len)) in LineIterator::new(source).enumerate() {
let bytes_so_far = bytes_prior + line.len() + seperator_len.unwrap_or(0) as usize;
if bytes_so_far >= offset {
// found line.
let line_offset = offset - bytes_prior;
let column = if line_offset > line.len() {
// error is inside line terminator.
line.chars().count() + 1
} else {
line[..line_offset].chars().count()
};
// +1 because line and column are 1 index.
return Self {
line: line_idx + 1,
column: column + 1,
};
}
bytes_prior = bytes_so_far;
}
unreachable!()
}
pub fn of_span_start(source: &str, span: Span) -> Self {
// Bytes of input before substr.
let offset = span.offset as usize;
Self::of_offset(source, offset)
}
pub fn of_span_end(source: &str, span: Span) -> Self {
// Bytes of input before substr.
let offset = span.offset as usize + span.len as usize;
Self::of_offset(source, offset)
}
pub fn range_of_span(source: &str, span: Span) -> Range<Self> { pub fn range_of_span(source: &str, span: Span) -> Range<Self> {
if source.len() == span.offset as usize { if source.len() <= span.offset as usize {
// EOF span return Self::range_of_source_end(source);
let (line_idx, column) = LineIterator::new(source)
.map(|(l, _)| l.len())
.enumerate()
.last()
.unwrap_or((0, 0));
return Self {
line: line_idx + 1,
column: column + 1,
}..Self {
line: line_idx + 1,
column: column + 2,
};
}
// Bytes of input before substr.
let offset = span.offset as usize;
let end = offset + span.len as usize;
if span.len == 0 && source.len() == span.offset as usize {
// EOF span
let (last_line, column) = LineIterator::new(source)
.enumerate()
.last()
.map(|(idx, (l, _))| (idx, l.len()))
.unwrap_or((0, 0));
return Self {
line: last_line + 1,
column,
}..Self {
line: last_line + 1,
column: column + 1,
};
} }
let mut prev_line = "";
let mut lines = source.lines().enumerate().peekable();
// Bytes of input prior to line being iteratated. // Bytes of input prior to line being iteratated.
let mut bytes_prior = 0; let start_offset = span.offset as usize;
let mut iterator = LineIterator::new(source).enumerate().peekable();
let start = loop { let start = loop {
let Some((line_idx, (line, seperator_offset))) = iterator.peek() else { let Some((line_idx, line)) = lines.peek().copied() else {
panic!("tried to find location of span not belonging to string"); // Couldn't find the line, give up and return the last
return Self::range_of_source_end(source);
}; };
let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize; // Safety: line originates from source so it is a substring so calling str_offset is
if bytes_so_far > offset { // valid.
// found line. let line_offset = unsafe { str_offset(source, line) };
let line_offset = offset - bytes_prior;
let column = if line_offset > line.len() { if start_offset < line_offset {
line.chars().count() + 1 // Span is inside the previous line terminator, point to the end of the line.
} else { let len = prev_line.chars().count();
line[..line_offset.min(line.len())].chars().count() break Self {
line: line_idx,
column: len + 1,
}; };
// +1 because line and column are 1 index.
if bytes_so_far >= end {
// end is on the same line, finish immediatly.
let line_offset = end - bytes_prior;
let end_column = line[..line_offset].chars().count();
return Self {
line: line_idx + 1,
column: column + 1,
}..Self {
line: line_idx + 1,
column: end_column + 1,
};
} else {
break Self {
line: line_idx + 1,
column: column + 1,
};
}
} }
bytes_prior = bytes_so_far;
iterator.next(); if (line_offset..(line_offset + line.len())).contains(&start_offset) {
let column_offset = start_offset - line_offset;
let column = line
.char_indices()
.enumerate()
.find(|(_, (char_idx, _))| *char_idx >= column_offset)
.map(|(l, _)| l)
.unwrap_or_else(|| {
// give up, just point to the end.
line.chars().count()
});
break Self {
line: line_idx + 1,
column: column + 1,
};
}
lines.next();
prev_line = line;
}; };
loop { let end_offset = span.offset as usize + span.len as usize;
let Some((line_idx, (line, seperator_offset))) = iterator.next() else { let end = loop {
panic!("tried to find location of span not belonging to string"); let Some((line_idx, line)) = lines.peek().copied() else {
// Couldn't find the line, give up and return the last
break Self::range_of_source_end(source).end;
}; };
let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize; // Safety: line originates from source so it is a substring so calling str_offset is
if bytes_so_far >= end { // valid.
let line_offset = end - bytes_prior; let line_offset = unsafe { str_offset(source, line) };
let column = if line_offset > line.len() {
line.chars().count() + 1 if end_offset < line_offset {
} else { // Span is inside the previous line terminator, point to the end of the line.
line[..line_offset.min(line.len())].chars().count() let len = prev_line.chars().count();
break Self {
line: line_idx,
column: len + 1,
}; };
return start..Self { }
if (line_offset..(line_offset + line.len())).contains(&end_offset) {
let column_offset = end_offset - line_offset;
let column = line
.char_indices()
.enumerate()
.find(|(_, (char_idx, _))| *char_idx >= column_offset)
.map(|(l, _)| l)
.unwrap_or_else(|| {
// give up, just point to the end.
line.chars().count()
});
break Self {
line: line_idx + 1, line: line_idx + 1,
column: column + 1, column: column + 1,
}; };
} }
bytes_prior = bytes_so_far;
} lines.next();
} prev_line = line;
} };
struct LineIterator<'a> { start..end
current: &'a str,
}
impl<'a> LineIterator<'a> {
pub fn new(s: &'a str) -> Self {
LineIterator {
current: s,
}
}
}
impl<'a> Iterator for LineIterator<'a> {
type Item = (&'a str, Option<u8>);
fn next(&mut self) -> Option<Self::Item> {
if self.current.is_empty() {
return None;
}
let bytes = self.current.as_bytes();
for i in 0..bytes.len() {
match bytes[i] {
b'\r' => {
if let Some(b'\n') = bytes.get(i + 1) {
let res = &self.current[..i];
self.current = &self.current[i + 2..];
return Some((res, Some(2)));
}
let res = &self.current[..i];
self.current = &self.current[i + 1..];
return Some((res, Some(1)));
}
0xb | 0xC | b'\n' => {
// vertical tab VT and form feed FF.
let res = &self.current[..i];
self.current = &self.current[i + 1..];
return Some((res, Some(1)));
}
0xc2 => {
// next line NEL
if bytes.get(i + 1).copied() != Some(0x85) {
continue;
}
let res = &self.current[..i];
self.current = &self.current[i + 2..];
return Some((res, Some(2)));
}
0xe2 => {
// line separator and paragraph seperator.
if bytes.get(i + 1).copied() != Some(0x80) {
continue;
}
let next_byte = bytes.get(i + 2).copied();
if next_byte != Some(0xA8) && next_byte != Some(0xA9) {
continue;
}
// vertical tab VT, next line NEL and form feed FF.
let res = &self.current[..i];
self.current = &self.current[i + 3..];
return Some((res, Some(3)));
}
_ => {}
}
}
Some((std::mem::take(&mut self.current), None))
}
}
#[cfg(test)]
mod test {
use super::LineIterator;
#[test]
fn test_line_iterator() {
let lines = "foo\nbar\r\nfoo\rbar\u{000B}foo\u{000C}bar\u{0085}foo\u{2028}bar\u{2029}\n";
let mut iterator = LineIterator::new(lines);
assert_eq!(iterator.next(), Some(("foo", Some(1))));
assert_eq!(iterator.next(), Some(("bar", Some(2))));
assert_eq!(iterator.next(), Some(("foo", Some(1))));
assert_eq!(iterator.next(), Some(("bar", Some(1))));
assert_eq!(iterator.next(), Some(("foo", Some(1))));
assert_eq!(iterator.next(), Some(("bar", Some(2))));
assert_eq!(iterator.next(), Some(("foo", Some(3))));
assert_eq!(iterator.next(), Some(("bar", Some(3))));
assert_eq!(iterator.next(), Some(("", Some(1))));
assert_eq!(iterator.next(), None);
} }
} }

View file

@ -222,17 +222,26 @@ impl fmt::Display for Snippet {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::{RenderedError, Snippet, Truncation}; use super::{RenderedError, Snippet, Truncation};
use crate::syn::error::{Location, MessageKind}; use crate::syn::{
error::{Location, MessageKind},
token::Span,
};
#[test] #[test]
fn truncate_whitespace() { fn truncate_whitespace() {
let source = "\n\n\n\t $ \t"; let source = "\n\n\n\t $ \t";
let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0; let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0;
let error = &source[offset..];
let location = Location::of_in(error, source); let location = Location::range_of_span(
source,
Span {
offset: offset as u32,
len: 1,
},
);
let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error); let snippet =
Snippet::from_source_location(source, location.start, None, MessageKind::Error);
assert_eq!(snippet.truncation, Truncation::None); assert_eq!(snippet.truncation, Truncation::None);
assert_eq!(snippet.offset, 0); assert_eq!(snippet.offset, 0);
assert_eq!(snippet.source.as_str(), "$"); assert_eq!(snippet.source.as_str(), "$");
@ -242,11 +251,17 @@ mod test {
fn truncate_start() { fn truncate_start() {
let source = " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa $ \t"; let source = " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa $ \t";
let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0; let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0;
let error = &source[offset..];
let location = Location::of_in(error, source); let location = Location::range_of_span(
source,
Span {
offset: offset as u32,
len: 1,
},
);
let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error); let snippet =
Snippet::from_source_location(source, location.start, None, MessageKind::Error);
assert_eq!(snippet.truncation, Truncation::Start); assert_eq!(snippet.truncation, Truncation::Start);
assert_eq!(snippet.offset, 10); assert_eq!(snippet.offset, 10);
assert_eq!(snippet.source.as_str(), "aaaaaaaaa $"); assert_eq!(snippet.source.as_str(), "aaaaaaaaa $");
@ -256,11 +271,17 @@ mod test {
fn truncate_end() { fn truncate_end() {
let source = "\n\n a $ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa \t"; let source = "\n\n a $ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa \t";
let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0; let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0;
let error = &source[offset..];
let location = Location::of_in(error, source); let location = Location::range_of_span(
source,
Span {
offset: offset as u32,
len: 1,
},
);
let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error); let snippet =
Snippet::from_source_location(source, location.start, None, MessageKind::Error);
assert_eq!(snippet.truncation, Truncation::End); assert_eq!(snippet.truncation, Truncation::End);
assert_eq!(snippet.offset, 2); assert_eq!(snippet.offset, 2);
assert_eq!( assert_eq!(
@ -273,11 +294,17 @@ mod test {
fn truncate_both() { fn truncate_both() {
let source = "\n\n\n\n aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa $ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa \t"; let source = "\n\n\n\n aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa $ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa \t";
let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0; let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0;
let error = &source[offset..];
let location = Location::of_in(error, source); let location = Location::range_of_span(
source,
Span {
offset: offset as u32,
len: 1,
},
);
let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error); let snippet =
Snippet::from_source_location(source, location.start, None, MessageKind::Error);
assert_eq!(snippet.truncation, Truncation::Both); assert_eq!(snippet.truncation, Truncation::Both);
assert_eq!(snippet.offset, 10); assert_eq!(snippet.offset, 10);
assert_eq!( assert_eq!(

View file

@ -319,8 +319,12 @@ impl<'a> Parser<'a> {
self.last_span self.last_span
} }
pub fn assert_finished(&self) -> ParseResult<()> { pub fn assert_finished(&mut self) -> ParseResult<()> {
self.lexer.assert_finished() let p = self.peek();
if self.peek().kind != TokenKind::Eof {
bail!("Unexpected token `{}`, expected no more tokens",p.kind, @p.span);
}
Ok(())
} }
/// Eat the next token if it is of the given kind. /// Eat the next token if it is of the given kind.

View file

@ -5,6 +5,11 @@ fn object_with_negative() {
test_parse!(parse_json, r#"{"foo": -1 }"#).unwrap(); test_parse!(parse_json, r#"{"foo": -1 }"#).unwrap();
} }
#[test]
fn object_with_trailing_whitespace() {
test_parse!(parse_json, r#"{"foo": -1 }\n"#).unwrap();
}
#[test] #[test]
fn array_with_negative() { fn array_with_negative() {
test_parse!(parse_json, r#"[-1]"#).unwrap(); test_parse!(parse_json, r#"[-1]"#).unwrap();