Allow scientific notation decimal ()

This commit is contained in:
Mees Delzenne 2024-03-04 16:04:08 +01:00 committed by GitHub
parent 4471433a78
commit 654443c9c1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 91 additions and 30 deletions
core/src/syn

View file

@ -49,14 +49,21 @@ fn not_nan(i: &str) -> IResult<&str, Number> {
.map_err(Err::Failure)?;
Number::from(float)
}
Suffix::Decimal => Number::from(
Suffix::Decimal => Number::from(if v.contains(['e', 'E']) {
Decimal::from_scientific(v)
.map_err(|e| ParseError::ParseDecimal {
tried: v,
error: e,
})
.map_err(Err::Failure)?
} else {
Decimal::from_str(v)
.map_err(|e| ParseError::ParseDecimal {
tried: v,
error: e,
})
.map_err(Err::Failure)?,
),
.map_err(Err::Failure)?
}),
};
Ok((i, number))
}
@ -87,6 +94,8 @@ pub fn integer(i: &str) -> IResult<&str, i64> {
#[cfg(test)]
mod tests {
use rust_decimal::prelude::FromPrimitive;
use super::*;
use std::{cmp::Ordering, ops::Div};
@ -186,6 +195,15 @@ mod tests {
assert_eq!(sql, format!("{}", out));
}
#[test]
fn number_scientific_upper_decimal() {
let sql = "12345E-02dec";
let res = number(sql);
let out = res.unwrap().1;
assert_eq!("123.45dec", format!("{}", out));
assert_eq!(out, Number::Decimal(rust_decimal::Decimal::from_f64(123.45).unwrap()));
}
#[test]
fn number_div_int() {
let res = Number::Int(3).div(Number::Int(2));

View file

@ -52,7 +52,7 @@ impl Lexer<'_> {
match self.reader.peek() {
Some(b'd' | b'f') => {
// not an integer but parse anyway for error reporting.
return self.lex_suffix(true);
return self.lex_suffix(false, true);
}
Some(x) if x.is_ascii_alphabetic() => return Err(self.invalid_suffix()),
_ => {}
@ -86,6 +86,12 @@ impl Lexer<'_> {
self.reader.next();
self.scratch.push(x as char);
}
b'e' | b'E' => {
// scientific notation
self.reader.next();
self.scratch.push('e');
return self.lex_exponent(false);
}
b'.' => {
// mantissa
let backup = self.reader.offset();
@ -101,7 +107,7 @@ impl Lexer<'_> {
return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)));
}
}
b'f' | b'd' => return self.lex_suffix(true),
b'f' | b'd' => return self.lex_suffix(false, true),
// Oxc2 is the start byte of 'µ'
0xc2 | b'n' | b'u' | b'm' | b'h' | b'w' | b'y' | b's' => {
// duration suffix, switch to lexing duration.
@ -136,7 +142,7 @@ impl Lexer<'_> {
}
/// Lex a number suffix, either 'f' or 'dec'.
fn lex_suffix(&mut self, can_be_duration: bool) -> Result<Token, Error> {
fn lex_suffix(&mut self, had_exponent: bool, can_be_duration: bool) -> Result<Token, Error> {
match self.reader.peek() {
Some(b'f') => {
// float suffix
@ -169,7 +175,11 @@ impl Lexer<'_> {
Err(self.invalid_suffix())
} else {
self.string = Some(mem::take(&mut self.scratch));
Ok(self.finish_token(TokenKind::Number(NumberKind::Decimal)))
if had_exponent {
Ok(self.finish_token(TokenKind::Number(NumberKind::DecimalExponent)))
} else {
Ok(self.finish_token(TokenKind::Number(NumberKind::Decimal)))
}
}
}
_ => unreachable!(),
@ -200,7 +210,7 @@ impl Lexer<'_> {
b'_' => {
self.reader.next();
}
b'f' | b'd' => return self.lex_suffix(false),
b'f' | b'd' => return self.lex_suffix(false, false),
b'a'..=b'z' | b'A'..=b'Z' => {
// invalid token, random identifier characters immediately after number.
self.scratch.clear();
@ -216,20 +226,25 @@ impl Lexer<'_> {
/// Lexes the exponent of a number, i.e. `e10` in `1.1e10`;
fn lex_exponent(&mut self, had_mantissa: bool) -> Result<Token, Error> {
let mut atleast_one = false;
match self.reader.peek() {
Some(b'-' | b'+') => {}
Some(b'0'..=b'9') => {
atleast_one = true;
}
_ => {
// random other character, expected atleast one digit.
return Err(Error::DigitExpectedExponent);
loop {
match self.reader.peek() {
Some(x @ b'-' | x @ b'+') => {
self.reader.next();
self.scratch.push(x as char);
}
Some(x @ b'0'..=b'9') => {
self.scratch.push(x as char);
break;
}
_ => {
// random other character, expected atleast one digit.
return Err(Error::DigitExpectedExponent);
}
}
}
self.reader.next();
loop {
match self.reader.peek() {
match dbg!(self.reader.peek()) {
Some(x @ b'0'..=b'9') => {
self.reader.next();
self.scratch.push(x as char);
@ -237,19 +252,15 @@ impl Lexer<'_> {
Some(b'_') => {
self.reader.next();
}
Some(b'f' | b'd') => return self.lex_suffix(false),
Some(b'f' | b'd') => return self.lex_suffix(true, false),
_ => {
if atleast_one {
let kind = if had_mantissa {
NumberKind::MantissaExponent
} else {
NumberKind::Exponent
};
self.string = Some(mem::take(&mut self.scratch));
return Ok(self.finish_token(TokenKind::Number(kind)));
let kind = if had_mantissa {
NumberKind::MantissaExponent
} else {
return Err(Error::DigitExpectedExponent);
}
NumberKind::Exponent
};
self.string = Some(mem::take(&mut self.scratch));
return Ok(self.finish_token(TokenKind::Number(kind)));
}
}
}

View file

@ -117,7 +117,7 @@ fn identifiers() {
fn numbers() {
test_case! {
r#"
123123+32010230.123012031+33043030dec+33043030f+
123123+32010230.123012031+33043030dec+33043030f+303e10dec+
"#
=> [
@ -129,6 +129,8 @@ fn numbers() {
t!("+"),
TokenKind::Number(NumberKind::Float),
t!("+"),
TokenKind::Number(NumberKind::DecimalExponent),
t!("+"),
]
}

View file

@ -230,6 +230,19 @@ impl TokenValue for Number {
})?;
Ok(Number::Decimal(x))
}
TokenKind::Number(NumberKind::DecimalExponent) => {
let source = parser.lexer.string.take().unwrap();
// As far as I can tell this will never fail for valid integers.
let x = rust_decimal::Decimal::from_scientific(&source).map_err(|error| {
ParseError::new(
ParseErrorKind::InvalidDecimal {
error,
},
token.span,
)
})?;
Ok(Number::Decimal(x))
}
x => unexpected!(parser, x, "a number"),
}
}

View file

@ -311,6 +311,7 @@ fn statements() -> Vec<Statement> {
index: Index::MTree(MTreeParams {
dimension: 4,
distance: Distance::Minkowski(Number::Int(5)),
_distance: Default::default(),
capacity: 6,
doc_ids_order: 7,
doc_ids_cache: 8,

View file

@ -64,3 +64,17 @@ fn constant_mixedcase() {
let out = test_parse!(parse_value, r#" MaTh::Pi "#).unwrap();
assert_eq!(out, Value::Constant(Constant::MathPi));
}
#[test]
fn scientific_decimal() {
let res = test_parse!(parse_value, r#" 9.7e-7dec "#).unwrap();
assert!(matches!(res, Value::Number(Number::Decimal(_))));
assert_eq!(res.to_string(), "0.00000097dec")
}
#[test]
fn scientific_number() {
let res = test_parse!(parse_value, r#" 9.7e-5"#).unwrap();
assert!(matches!(res, Value::Number(Number::Float(_))));
assert_eq!(res.to_string(), "0.000097f")
}

View file

@ -217,6 +217,8 @@ pub enum NumberKind {
Integer,
// A number with a decimal postfix.
Decimal,
// A number with a decimal postfix.
DecimalExponent,
// A number with a float postfix.
Float,
// A number with a `.3` part.