Fix exponent numbers causing record ids to fail (#4049)

This commit is contained in:
Mees Delzenne 2024-05-16 16:13:34 +02:00 committed by GitHub
parent 47054b2891
commit 8077c15f41
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 52 additions and 29 deletions

View file

@ -50,7 +50,7 @@ impl Lexer<'_> {
match self.reader.peek() { match self.reader.peek() {
Some(b'd' | b'f') => { Some(b'd' | b'f') => {
// not an integer but parse anyway for error reporting. // not an integer but parse anyway for error reporting.
return self.lex_suffix(false, false); return self.lex_suffix(false, false, false);
} }
Some(x) if x.is_ascii_alphabetic() => return self.invalid_suffix_token(), Some(x) if x.is_ascii_alphabetic() => return self.invalid_suffix_token(),
_ => {} _ => {}
@ -96,7 +96,7 @@ impl Lexer<'_> {
return self.finish_number_token(NumberKind::Integer); return self.finish_number_token(NumberKind::Integer);
} }
} }
b'f' | b'd' => return self.lex_suffix(false, false), b'f' | b'd' => return self.lex_suffix(false, false, false),
// Oxc2 is the start byte of 'µ' // Oxc2 is the start byte of 'µ'
0xc2 | b'n' | b'u' | b'm' | b'h' | b'w' | b'y' | b's' => { 0xc2 | b'n' | b'u' | b'm' | b'h' | b'w' | b'y' | b's' => {
// duration suffix, switch to lexing duration. // duration suffix, switch to lexing duration.
@ -132,13 +132,13 @@ impl Lexer<'_> {
} }
/// Lex a number suffix, either 'f' or 'dec'. /// Lex a number suffix, either 'f' or 'dec'.
fn lex_suffix(&mut self, had_mantissa: bool, had_exponent: bool) -> Token { fn lex_suffix(&mut self, had_mantissa: bool, had_exponent: bool, had_operator: bool) -> Token {
match self.reader.peek() { match self.reader.peek() {
Some(b'f') => { Some(b'f') => {
// float suffix // float suffix
self.reader.next(); self.reader.next();
if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) { if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) {
if self.flexible_ident && !had_mantissa { if self.flexible_ident && !had_mantissa && !had_operator {
self.scratch.push('f'); self.scratch.push('f');
self.lex_ident() self.lex_ident()
} else { } else {
@ -158,7 +158,7 @@ impl Lexer<'_> {
self.reader.next(); self.reader.next();
let checkpoint = self.reader.offset(); let checkpoint = self.reader.offset();
if !self.eat(b'e') { if !self.eat(b'e') {
if !had_mantissa && !had_exponent { if !had_mantissa && !had_exponent && !had_operator {
self.reader.backup(checkpoint - 1); self.reader.backup(checkpoint - 1);
return self.lex_duration(); return self.lex_duration();
} else if !had_mantissa && self.flexible_ident { } else if !had_mantissa && self.flexible_ident {
@ -215,7 +215,7 @@ impl Lexer<'_> {
self.scratch.push('e'); self.scratch.push('e');
return self.lex_exponent(true); return self.lex_exponent(true);
} }
b'f' | b'd' => return self.lex_suffix(true, false), b'f' | b'd' => return self.lex_suffix(true, false, false),
b'a'..=b'z' | b'A'..=b'Z' => { b'a'..=b'z' | b'A'..=b'Z' => {
// invalid token, random identifier characters immediately after number. // invalid token, random identifier characters immediately after number.
self.scratch.clear(); self.scratch.clear();
@ -230,33 +230,39 @@ impl Lexer<'_> {
/// Lexes the exponent of a number, i.e. `e10` in `1.1e10`; /// Lexes the exponent of a number, i.e. `e10` in `1.1e10`;
fn lex_exponent(&mut self, had_mantissa: bool) -> Token { fn lex_exponent(&mut self, had_mantissa: bool) -> Token {
loop { let mut had_operator = false;
match self.reader.peek() { let mut peek = self.reader.peek();
Some(x @ b'-' | x @ b'+') => {
if let Some(x @ b'-' | x @ b'+') = peek {
had_operator = true;
self.reader.next(); self.reader.next();
self.scratch.push(x as char); self.scratch.push(x as char);
peek = self.reader.peek();
} }
Some(x @ b'0'..=b'9') => {
if let Some(x @ b'0'..=b'9') = peek {
self.reader.next();
self.scratch.push(x as char); self.scratch.push(x as char);
break; } else {
} if self.flexible_ident && !had_mantissa && !had_operator {
_ => {
if self.flexible_ident && !had_mantissa {
return self.lex_ident(); return self.lex_ident();
} }
// random other character, expected atleast one digit.
return self.invalid_token(LexError::Number(Error::DigitExpectedExponent)); return self.invalid_token(LexError::Number(Error::DigitExpectedExponent));
} }
}
}
self.reader.next();
loop { loop {
match self.reader.peek() { match self.reader.peek() {
Some(x @ (b'0'..=b'9' | b'_')) => { Some(x @ (b'0'..=b'9' | b'_')) => {
self.reader.next(); self.reader.next();
self.scratch.push(x as char); self.scratch.push(x as char);
} }
Some(b'f' | b'd') => return self.lex_suffix(had_mantissa, true), Some(b'f' | b'd') => return self.lex_suffix(had_mantissa, true, had_operator),
Some(x) if x.is_identifier_continue() => {
if self.flexible_ident && !had_operator && !had_mantissa {
return self.lex_ident();
}
return self.invalid_token(LexError::Number(Error::InvalidSuffix));
}
_ => { _ => {
let kind = if had_mantissa { let kind = if had_mantissa {
NumberKind::MantissaExponent NumberKind::MantissaExponent

View file

@ -277,15 +277,30 @@ impl Parser<'_> {
Ok(Id::String(text)) Ok(Id::String(text))
} }
} }
TokenKind::Number(NumberKind::Decimal | NumberKind::DecimalExponent) TokenKind::Number(NumberKind::Exponent) if self.flexible_record_id => {
if self.flexible_record_id => let text = self.lexer.string.take().unwrap();
{ if text.bytes().any(|x| !x.is_ascii_alphanumeric()) {
unexpected!(self, token.kind, "a identifier");
}
Ok(Id::String(text))
}
TokenKind::Number(NumberKind::Decimal) if self.flexible_record_id => {
let mut text = self.lexer.string.take().unwrap(); let mut text = self.lexer.string.take().unwrap();
text.push('d'); text.push('d');
text.push('e'); text.push('e');
text.push('c'); text.push('c');
Ok(Id::String(text)) Ok(Id::String(text))
} }
TokenKind::Number(NumberKind::DecimalExponent) if self.flexible_record_id => {
let mut text = self.lexer.string.take().unwrap();
if text.bytes().any(|x| !x.is_ascii_alphanumeric()) {
unexpected!(self, token.kind, "a identifier");
}
text.push('d');
text.push('e');
text.push('c');
Ok(Id::String(text))
}
TokenKind::Number(NumberKind::Float) if self.flexible_record_id => { TokenKind::Number(NumberKind::Float) if self.flexible_record_id => {
let mut text = self.lexer.string.take().unwrap(); let mut text = self.lexer.string.take().unwrap();
text.push('f'); text.push('f');
@ -565,5 +580,7 @@ mod tests {
assert_ident_parses_correctly("1ns"); assert_ident_parses_correctly("1ns");
assert_ident_parses_correctly("1ns1"); assert_ident_parses_correctly("1ns1");
assert_ident_parses_correctly("1ns1h"); assert_ident_parses_correctly("1ns1h");
assert_ident_parses_correctly("000e8");
assert_ident_parses_correctly("000e8bla");
} }
} }