Fix params not supporting delimiters (#4411)

This commit is contained in:
Mees Delzenne 2024-08-13 22:50:40 +02:00 committed by GitHub
parent 4181367b98
commit ee8e6f00d7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 66 additions and 12 deletions

View file

@ -6,6 +6,8 @@ use crate::syn::{
token::{t, DatetimeChars, Token, TokenKind}, token::{t, DatetimeChars, Token, TokenKind},
}; };
use super::CharError;
impl<'a> Lexer<'a> { impl<'a> Lexer<'a> {
/// Eats a single line comment. /// Eats a single line comment.
pub fn eat_single_line_comment(&mut self) { pub fn eat_single_line_comment(&mut self) {
@ -312,13 +314,28 @@ impl<'a> Lexer<'a> {
} }
_ => t!(":"), _ => t!(":"),
}, },
b'$' => { b'$' => match self.reader.peek() {
if self.reader.peek().map(|x| x.is_ascii_alphabetic() || x == b'_').unwrap_or(false) Some(b'_') => return self.lex_param(),
{ Some(b'`') => {
return self.lex_param(); self.reader.next();
return self.lex_surrounded_param(true);
} }
t!("$") Some(x) if x.is_ascii_alphabetic() => return self.lex_param(),
} Some(x) if !x.is_ascii() => {
let backup = self.reader.offset();
self.reader.next();
match self.reader.complete_char(x) {
Ok('⟨') => return self.lex_surrounded_param(false),
Err(CharError::Eof) => return self.invalid_token(Error::InvalidUtf8),
Err(CharError::Unicode) => return self.invalid_token(Error::InvalidUtf8),
_ => {
self.reader.backup(backup);
t!("$")
}
}
}
_ => t!("$"),
},
b'#' => { b'#' => {
self.eat_single_line_comment(); self.eat_single_line_comment();
TokenKind::WhiteSpace TokenKind::WhiteSpace

View file

@ -33,6 +33,17 @@ impl<'a> Lexer<'a> {
} }
} }
pub fn lex_surrounded_param(&mut self, is_backtick: bool) -> Token {
debug_assert_eq!(self.scratch, "");
match self.lex_surrounded_ident_err(is_backtick) {
Ok(_) => self.finish_token(TokenKind::Parameter),
Err(e) => {
self.scratch.clear();
self.invalid_token(e)
}
}
}
/// Lex an not surrounded identifier in the form of `[a-zA-Z0-9_]*` /// Lex an not surrounded identifier in the form of `[a-zA-Z0-9_]*`
/// ///
/// The start byte should already a valid byte of the identifier. /// The start byte should already a valid byte of the identifier.
@ -80,7 +91,7 @@ impl<'a> Lexer<'a> {
/// Lex an ident which is surround by delimiters. /// Lex an ident which is surround by delimiters.
pub fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token { pub fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token {
match self.lex_surrounded_ident_err(is_backtick) { match self.lex_surrounded_ident_err(is_backtick) {
Ok(x) => x, Ok(_) => self.finish_token(TokenKind::Identifier),
Err(e) => { Err(e) => {
self.scratch.clear(); self.scratch.clear();
self.invalid_token(e) self.invalid_token(e)
@ -89,7 +100,7 @@ impl<'a> Lexer<'a> {
} }
/// Lex an ident surrounded either by `⟨⟩` or `\`\`` /// Lex an ident surrounded either by `⟨⟩` or `\`\``
pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<Token, Error> { pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<(), Error> {
loop { loop {
let Some(x) = self.reader.next() else { let Some(x) = self.reader.next() else {
let end_char = if is_backtick { let end_char = if is_backtick {
@ -103,7 +114,7 @@ impl<'a> Lexer<'a> {
match x { match x {
b'`' if is_backtick => { b'`' if is_backtick => {
self.string = Some(mem::take(&mut self.scratch)); self.string = Some(mem::take(&mut self.scratch));
return Ok(self.finish_token(TokenKind::Identifier)); return Ok(());
} }
b'\0' => { b'\0' => {
// null bytes not allowed // null bytes not allowed
@ -162,7 +173,7 @@ impl<'a> Lexer<'a> {
let c = self.reader.complete_char(x)?; let c = self.reader.complete_char(x)?;
if !is_backtick && c == '⟩' { if !is_backtick && c == '⟩' {
self.string = Some(mem::take(&mut self.scratch)); self.string = Some(mem::take(&mut self.scratch));
return Ok(self.finish_token(TokenKind::Identifier)); return Ok(());
} }
self.scratch.push(c); self.scratch.push(c);
} }

View file

@ -76,10 +76,12 @@ impl<'a> BytesReader<'a> {
pub fn peek(&self) -> Option<u8> { pub fn peek(&self) -> Option<u8> {
self.remaining().first().copied() self.remaining().first().copied()
} }
#[inline] #[inline]
pub fn span(&self, span: Span) -> &'a [u8] { pub fn span(&self, span: Span) -> &'a [u8] {
&self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)] &self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)]
} }
#[inline] #[inline]
pub fn next_continue_byte(&mut self) -> Result<u8, CharError> { pub fn next_continue_byte(&mut self) -> Result<u8, CharError> {
const CONTINUE_BYTE_PREFIX_MASK: u8 = 0b1100_0000; const CONTINUE_BYTE_PREFIX_MASK: u8 = 0b1100_0000;
@ -87,7 +89,7 @@ impl<'a> BytesReader<'a> {
let byte = self.next().ok_or(CharError::Eof)?; let byte = self.next().ok_or(CharError::Eof)?;
if byte & CONTINUE_BYTE_PREFIX_MASK != 0b1000_0000 { if byte & CONTINUE_BYTE_PREFIX_MASK != 0b1000_0000 {
return Err(CharError::Eof); return Err(CharError::Unicode);
} }
Ok(byte & CONTINUE_BYTE_MASK) Ok(byte & CONTINUE_BYTE_MASK)

View file

@ -140,7 +140,7 @@ macro_rules! expected_whitespace {
#[cfg(test)] #[cfg(test)]
#[macro_export] #[macro_export]
macro_rules! test_parse { macro_rules! test_parse {
($func:ident$( ( $($e:expr),* $(,)? ))? , $t:literal) => {{ ($func:ident$( ( $($e:expr),* $(,)? ))? , $t:expr) => {{
let mut parser = $crate::syn::parser::Parser::new($t.as_bytes()); let mut parser = $crate::syn::parser::Parser::new($t.as_bytes());
let mut stack = reblessive::Stack::new(); let mut stack = reblessive::Stack::new();
stack.enter(|ctx| parser.$func(ctx,$($($e),*)*)).finish() stack.enter(|ctx| parser.$func(ctx,$($($e),*)*)).finish()

View file

@ -1,3 +1,5 @@
use nom::AsBytes;
use crate::{sql, syn::parser::mac::test_parse}; use crate::{sql, syn::parser::mac::test_parse};
mod limit; mod limit;
@ -11,3 +13,25 @@ fn multiple_semicolons() {
let expected = sql::Query(sql::Statements(vec![])); let expected = sql::Query(sql::Statements(vec![]));
assert_eq!(res, expected); assert_eq!(res, expected);
} }
#[test]
fn escaped_params() {
let src = r#"LET $⟨R-_fYU8Wa31kg7tz0JI6Kme⟩ = 5;
RETURN $R-_fYU8Wa31kg7tz0JI6Kme"#;
for (idx, b) in src.as_bytes().iter().enumerate() {
println!("{:0>4}: {:0>8b}", idx, b);
}
test_parse!(parse_query, src).unwrap();
}
#[test]
fn escaped_params_backtick() {
test_parse!(
parse_query,
r#"LET $`R-_fYU8Wa31kg7tz0JI6Kme` = 5;
RETURN $`R-_fYU8Wa31kg7tz0JI6Kme`"#
)
.unwrap();
}