Fix error message panic ()

This commit is contained in:
Mees Delzenne 2024-09-18 22:00:27 +02:00 committed by GitHub
parent 52aef7ccc4
commit e0f357c18a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 147 additions and 272 deletions
core/src

View file

@ -64,7 +64,7 @@ mod tests {
fn changed_remove() {
let old = Value::parse("{ test: true, other: 'test' }");
let now = Value::parse("{ test: true }");
let res = Value::parse("{ other: NONE }]");
let res = Value::parse("{ other: NONE }");
assert_eq!(res, old.changed(&now));
}

View file

@ -12,281 +12,120 @@ pub struct Location {
pub column: usize,
}
/// Safety: b must be a substring of a.
unsafe fn str_offset(a: &str, b: &str) -> usize {
b.as_ptr().offset_from(a.as_ptr()) as usize
}
impl Location {
/// Returns the location of the start of substring in the larger input string.
///
/// Assumption: substr must be a subslice of input.
pub fn of_in(substr: &str, input: &str) -> Self {
// Bytes of input before substr.
let offset = (substr.as_ptr() as usize)
.checked_sub(input.as_ptr() as usize)
.expect("tried to find location of substring in unrelated string");
assert!(offset <= input.len(), "tried to find location of substring in unrelated string");
// Bytes of input prior to line being iterated.
let mut bytes_prior = 0;
for (line_idx, (line, seperator_len)) in LineIterator::new(input).enumerate() {
let bytes_so_far = bytes_prior + line.len() + seperator_len.unwrap_or(0) as usize;
if bytes_so_far >= offset {
// found line.
let line_offset = offset - bytes_prior;
fn range_of_source_end(source: &str) -> Range<Self> {
let (line, column) = source
.lines()
.enumerate()
.last()
.map(|(idx, line)| {
let idx = idx + 1;
let line_idx = line.chars().count().max(1);
(idx, line_idx)
})
.unwrap_or((0, 0));
let column = if line_offset > line.len() {
// error is inside line terminator.
line.chars().count() + 1
} else {
line[..line_offset].chars().count()
};
// +1 because line and column are 1 index.
return Self {
line: line_idx + 1,
column: column + 1,
};
}
bytes_prior = bytes_so_far;
Self {
line,
column,
}..Self {
line,
column: column + 1,
}
unreachable!()
}
pub fn of_offset(source: &str, offset: usize) -> Self {
assert!(offset <= source.len(), "tried to find location of substring in unrelated string");
if offset == source.len() {
// Eof character
let (last_line, column) = LineIterator::new(source)
.enumerate()
.last()
.map(|(idx, (l, _))| (idx, l.len()))
.unwrap_or((0, 0));
return Self {
line: last_line + 1,
column: column + 1,
};
}
// Bytes of input prior to line being iterated.
let mut bytes_prior = 0;
for (line_idx, (line, seperator_len)) in LineIterator::new(source).enumerate() {
let bytes_so_far = bytes_prior + line.len() + seperator_len.unwrap_or(0) as usize;
if bytes_so_far >= offset {
// found line.
let line_offset = offset - bytes_prior;
let column = if line_offset > line.len() {
// error is inside line terminator.
line.chars().count() + 1
} else {
line[..line_offset].chars().count()
};
// +1 because line and column are 1 index.
return Self {
line: line_idx + 1,
column: column + 1,
};
}
bytes_prior = bytes_so_far;
}
unreachable!()
}
pub fn of_span_start(source: &str, span: Span) -> Self {
// Bytes of input before substr.
let offset = span.offset as usize;
Self::of_offset(source, offset)
}
pub fn of_span_end(source: &str, span: Span) -> Self {
// Bytes of input before substr.
let offset = span.offset as usize + span.len as usize;
Self::of_offset(source, offset)
}
pub fn range_of_span(source: &str, span: Span) -> Range<Self> {
if source.len() == span.offset as usize {
// EOF span
let (line_idx, column) = LineIterator::new(source)
.map(|(l, _)| l.len())
.enumerate()
.last()
.unwrap_or((0, 0));
return Self {
line: line_idx + 1,
column: column + 1,
}..Self {
line: line_idx + 1,
column: column + 2,
};
}
// Bytes of input before substr.
let offset = span.offset as usize;
let end = offset + span.len as usize;
if span.len == 0 && source.len() == span.offset as usize {
// EOF span
let (last_line, column) = LineIterator::new(source)
.enumerate()
.last()
.map(|(idx, (l, _))| (idx, l.len()))
.unwrap_or((0, 0));
return Self {
line: last_line + 1,
column,
}..Self {
line: last_line + 1,
column: column + 1,
};
if source.len() <= span.offset as usize {
return Self::range_of_source_end(source);
}
let mut prev_line = "";
let mut lines = source.lines().enumerate().peekable();
// Bytes of input prior to line being iteratated.
let mut bytes_prior = 0;
let mut iterator = LineIterator::new(source).enumerate().peekable();
let start_offset = span.offset as usize;
let start = loop {
let Some((line_idx, (line, seperator_offset))) = iterator.peek() else {
panic!("tried to find location of span not belonging to string");
let Some((line_idx, line)) = lines.peek().copied() else {
// Couldn't find the line, give up and return the last
return Self::range_of_source_end(source);
};
let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize;
if bytes_so_far > offset {
// found line.
let line_offset = offset - bytes_prior;
let column = if line_offset > line.len() {
line.chars().count() + 1
} else {
line[..line_offset.min(line.len())].chars().count()
// Safety: line originates from source so it is a substring so calling str_offset is
// valid.
let line_offset = unsafe { str_offset(source, line) };
if start_offset < line_offset {
// Span is inside the previous line terminator, point to the end of the line.
let len = prev_line.chars().count();
break Self {
line: line_idx,
column: len + 1,
};
// +1 because line and column are 1 index.
if bytes_so_far >= end {
// end is on the same line, finish immediatly.
let line_offset = end - bytes_prior;
let end_column = line[..line_offset].chars().count();
return Self {
line: line_idx + 1,
column: column + 1,
}..Self {
line: line_idx + 1,
column: end_column + 1,
};
} else {
break Self {
line: line_idx + 1,
column: column + 1,
};
}
}
bytes_prior = bytes_so_far;
iterator.next();
if (line_offset..(line_offset + line.len())).contains(&start_offset) {
let column_offset = start_offset - line_offset;
let column = line
.char_indices()
.enumerate()
.find(|(_, (char_idx, _))| *char_idx >= column_offset)
.map(|(l, _)| l)
.unwrap_or_else(|| {
// give up, just point to the end.
line.chars().count()
});
break Self {
line: line_idx + 1,
column: column + 1,
};
}
lines.next();
prev_line = line;
};
loop {
let Some((line_idx, (line, seperator_offset))) = iterator.next() else {
panic!("tried to find location of span not belonging to string");
let end_offset = span.offset as usize + span.len as usize;
let end = loop {
let Some((line_idx, line)) = lines.peek().copied() else {
// Couldn't find the line, give up and return the last
break Self::range_of_source_end(source).end;
};
let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize;
if bytes_so_far >= end {
let line_offset = end - bytes_prior;
let column = if line_offset > line.len() {
line.chars().count() + 1
} else {
line[..line_offset.min(line.len())].chars().count()
// Safety: line originates from source so it is a substring so calling str_offset is
// valid.
let line_offset = unsafe { str_offset(source, line) };
if end_offset < line_offset {
// Span is inside the previous line terminator, point to the end of the line.
let len = prev_line.chars().count();
break Self {
line: line_idx,
column: len + 1,
};
return start..Self {
}
if (line_offset..(line_offset + line.len())).contains(&end_offset) {
let column_offset = end_offset - line_offset;
let column = line
.char_indices()
.enumerate()
.find(|(_, (char_idx, _))| *char_idx >= column_offset)
.map(|(l, _)| l)
.unwrap_or_else(|| {
// give up, just point to the end.
line.chars().count()
});
break Self {
line: line_idx + 1,
column: column + 1,
};
}
bytes_prior = bytes_so_far;
}
}
}
struct LineIterator<'a> {
current: &'a str,
}
impl<'a> LineIterator<'a> {
pub fn new(s: &'a str) -> Self {
LineIterator {
current: s,
}
}
}
impl<'a> Iterator for LineIterator<'a> {
type Item = (&'a str, Option<u8>);
fn next(&mut self) -> Option<Self::Item> {
if self.current.is_empty() {
return None;
}
let bytes = self.current.as_bytes();
for i in 0..bytes.len() {
match bytes[i] {
b'\r' => {
if let Some(b'\n') = bytes.get(i + 1) {
let res = &self.current[..i];
self.current = &self.current[i + 2..];
return Some((res, Some(2)));
}
let res = &self.current[..i];
self.current = &self.current[i + 1..];
return Some((res, Some(1)));
}
0xb | 0xC | b'\n' => {
// vertical tab VT and form feed FF.
let res = &self.current[..i];
self.current = &self.current[i + 1..];
return Some((res, Some(1)));
}
0xc2 => {
// next line NEL
if bytes.get(i + 1).copied() != Some(0x85) {
continue;
}
let res = &self.current[..i];
self.current = &self.current[i + 2..];
return Some((res, Some(2)));
}
0xe2 => {
// line separator and paragraph seperator.
if bytes.get(i + 1).copied() != Some(0x80) {
continue;
}
let next_byte = bytes.get(i + 2).copied();
if next_byte != Some(0xA8) && next_byte != Some(0xA9) {
continue;
}
// vertical tab VT, next line NEL and form feed FF.
let res = &self.current[..i];
self.current = &self.current[i + 3..];
return Some((res, Some(3)));
}
_ => {}
}
}
Some((std::mem::take(&mut self.current), None))
}
}
#[cfg(test)]
mod test {
use super::LineIterator;
#[test]
fn test_line_iterator() {
let lines = "foo\nbar\r\nfoo\rbar\u{000B}foo\u{000C}bar\u{0085}foo\u{2028}bar\u{2029}\n";
let mut iterator = LineIterator::new(lines);
assert_eq!(iterator.next(), Some(("foo", Some(1))));
assert_eq!(iterator.next(), Some(("bar", Some(2))));
assert_eq!(iterator.next(), Some(("foo", Some(1))));
assert_eq!(iterator.next(), Some(("bar", Some(1))));
assert_eq!(iterator.next(), Some(("foo", Some(1))));
assert_eq!(iterator.next(), Some(("bar", Some(2))));
assert_eq!(iterator.next(), Some(("foo", Some(3))));
assert_eq!(iterator.next(), Some(("bar", Some(3))));
assert_eq!(iterator.next(), Some(("", Some(1))));
assert_eq!(iterator.next(), None);
lines.next();
prev_line = line;
};
start..end
}
}

View file

@ -222,17 +222,26 @@ impl fmt::Display for Snippet {
#[cfg(test)]
mod test {
use super::{RenderedError, Snippet, Truncation};
use crate::syn::error::{Location, MessageKind};
use crate::syn::{
error::{Location, MessageKind},
token::Span,
};
#[test]
fn truncate_whitespace() {
let source = "\n\n\n\t $ \t";
let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0;
let error = &source[offset..];
let location = Location::of_in(error, source);
let location = Location::range_of_span(
source,
Span {
offset: offset as u32,
len: 1,
},
);
let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error);
let snippet =
Snippet::from_source_location(source, location.start, None, MessageKind::Error);
assert_eq!(snippet.truncation, Truncation::None);
assert_eq!(snippet.offset, 0);
assert_eq!(snippet.source.as_str(), "$");
@ -242,11 +251,17 @@ mod test {
fn truncate_start() {
let source = " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa $ \t";
let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0;
let error = &source[offset..];
let location = Location::of_in(error, source);
let location = Location::range_of_span(
source,
Span {
offset: offset as u32,
len: 1,
},
);
let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error);
let snippet =
Snippet::from_source_location(source, location.start, None, MessageKind::Error);
assert_eq!(snippet.truncation, Truncation::Start);
assert_eq!(snippet.offset, 10);
assert_eq!(snippet.source.as_str(), "aaaaaaaaa $");
@ -256,11 +271,17 @@ mod test {
fn truncate_end() {
let source = "\n\n a $ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa \t";
let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0;
let error = &source[offset..];
let location = Location::of_in(error, source);
let location = Location::range_of_span(
source,
Span {
offset: offset as u32,
len: 1,
},
);
let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error);
let snippet =
Snippet::from_source_location(source, location.start, None, MessageKind::Error);
assert_eq!(snippet.truncation, Truncation::End);
assert_eq!(snippet.offset, 2);
assert_eq!(
@ -273,11 +294,17 @@ mod test {
fn truncate_both() {
let source = "\n\n\n\n aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa $ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa \t";
let offset = source.char_indices().find(|(_, c)| *c == '$').unwrap().0;
let error = &source[offset..];
let location = Location::of_in(error, source);
let location = Location::range_of_span(
source,
Span {
offset: offset as u32,
len: 1,
},
);
let snippet = Snippet::from_source_location(source, location, None, MessageKind::Error);
let snippet =
Snippet::from_source_location(source, location.start, None, MessageKind::Error);
assert_eq!(snippet.truncation, Truncation::Both);
assert_eq!(snippet.offset, 10);
assert_eq!(

View file

@ -319,8 +319,12 @@ impl<'a> Parser<'a> {
self.last_span
}
pub fn assert_finished(&self) -> ParseResult<()> {
self.lexer.assert_finished()
pub fn assert_finished(&mut self) -> ParseResult<()> {
let p = self.peek();
if self.peek().kind != TokenKind::Eof {
bail!("Unexpected token `{}`, expected no more tokens",p.kind, @p.span);
}
Ok(())
}
/// Eat the next token if it is of the given kind.

View file

@ -5,6 +5,11 @@ fn object_with_negative() {
test_parse!(parse_json, r#"{"foo": -1 }"#).unwrap();
}
#[test]
fn object_with_trailing_whitespace() {
test_parse!(parse_json, r#"{"foo": -1 }\n"#).unwrap();
}
#[test]
fn array_with_negative() {
test_parse!(parse_json, r#"[-1]"#).unwrap();