Fix params not supporting delimiters (#4411)

This commit is contained in:
Mees Delzenne 2024-08-13 22:50:40 +02:00 committed by GitHub
parent 4181367b98
commit ee8e6f00d7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 66 additions and 12 deletions

View file

@ -6,6 +6,8 @@ use crate::syn::{
token::{t, DatetimeChars, Token, TokenKind},
};
use super::CharError;
impl<'a> Lexer<'a> {
/// Eats a single line comment.
pub fn eat_single_line_comment(&mut self) {
@ -312,13 +314,28 @@ impl<'a> Lexer<'a> {
}
_ => t!(":"),
},
b'$' => {
if self.reader.peek().map(|x| x.is_ascii_alphabetic() || x == b'_').unwrap_or(false)
{
return self.lex_param();
b'$' => match self.reader.peek() {
Some(b'_') => return self.lex_param(),
Some(b'`') => {
self.reader.next();
return self.lex_surrounded_param(true);
}
Some(x) if x.is_ascii_alphabetic() => return self.lex_param(),
Some(x) if !x.is_ascii() => {
let backup = self.reader.offset();
self.reader.next();
match self.reader.complete_char(x) {
Ok('⟨') => return self.lex_surrounded_param(false),
Err(CharError::Eof) => return self.invalid_token(Error::InvalidUtf8),
Err(CharError::Unicode) => return self.invalid_token(Error::InvalidUtf8),
_ => {
self.reader.backup(backup);
t!("$")
}
}
}
_ => t!("$"),
},
b'#' => {
self.eat_single_line_comment();
TokenKind::WhiteSpace

View file

@ -33,6 +33,17 @@ impl<'a> Lexer<'a> {
}
}
pub fn lex_surrounded_param(&mut self, is_backtick: bool) -> Token {
debug_assert_eq!(self.scratch, "");
match self.lex_surrounded_ident_err(is_backtick) {
Ok(_) => self.finish_token(TokenKind::Parameter),
Err(e) => {
self.scratch.clear();
self.invalid_token(e)
}
}
}
/// Lex an not surrounded identifier in the form of `[a-zA-Z0-9_]*`
///
/// The start byte should already a valid byte of the identifier.
@ -80,7 +91,7 @@ impl<'a> Lexer<'a> {
/// Lex an ident which is surround by delimiters.
pub fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token {
match self.lex_surrounded_ident_err(is_backtick) {
Ok(x) => x,
Ok(_) => self.finish_token(TokenKind::Identifier),
Err(e) => {
self.scratch.clear();
self.invalid_token(e)
@ -89,7 +100,7 @@ impl<'a> Lexer<'a> {
}
/// Lex an ident surrounded either by `⟨⟩` or `\`\``
pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<Token, Error> {
pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<(), Error> {
loop {
let Some(x) = self.reader.next() else {
let end_char = if is_backtick {
@ -103,7 +114,7 @@ impl<'a> Lexer<'a> {
match x {
b'`' if is_backtick => {
self.string = Some(mem::take(&mut self.scratch));
return Ok(self.finish_token(TokenKind::Identifier));
return Ok(());
}
b'\0' => {
// null bytes not allowed
@ -162,7 +173,7 @@ impl<'a> Lexer<'a> {
let c = self.reader.complete_char(x)?;
if !is_backtick && c == '⟩' {
self.string = Some(mem::take(&mut self.scratch));
return Ok(self.finish_token(TokenKind::Identifier));
return Ok(());
}
self.scratch.push(c);
}

View file

@ -76,10 +76,12 @@ impl<'a> BytesReader<'a> {
pub fn peek(&self) -> Option<u8> {
self.remaining().first().copied()
}
#[inline]
pub fn span(&self, span: Span) -> &'a [u8] {
&self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)]
}
#[inline]
pub fn next_continue_byte(&mut self) -> Result<u8, CharError> {
const CONTINUE_BYTE_PREFIX_MASK: u8 = 0b1100_0000;
@ -87,7 +89,7 @@ impl<'a> BytesReader<'a> {
let byte = self.next().ok_or(CharError::Eof)?;
if byte & CONTINUE_BYTE_PREFIX_MASK != 0b1000_0000 {
return Err(CharError::Eof);
return Err(CharError::Unicode);
}
Ok(byte & CONTINUE_BYTE_MASK)

View file

@ -140,7 +140,7 @@ macro_rules! expected_whitespace {
#[cfg(test)]
#[macro_export]
macro_rules! test_parse {
($func:ident$( ( $($e:expr),* $(,)? ))? , $t:literal) => {{
($func:ident$( ( $($e:expr),* $(,)? ))? , $t:expr) => {{
let mut parser = $crate::syn::parser::Parser::new($t.as_bytes());
let mut stack = reblessive::Stack::new();
stack.enter(|ctx| parser.$func(ctx,$($($e),*)*)).finish()

View file

@ -1,3 +1,5 @@
use nom::AsBytes;
use crate::{sql, syn::parser::mac::test_parse};
mod limit;
@ -11,3 +13,25 @@ fn multiple_semicolons() {
let expected = sql::Query(sql::Statements(vec![]));
assert_eq!(res, expected);
}
#[test]
fn escaped_params() {
let src = r#"LET $⟨R-_fYU8Wa31kg7tz0JI6Kme⟩ = 5;
RETURN $R-_fYU8Wa31kg7tz0JI6Kme"#;
for (idx, b) in src.as_bytes().iter().enumerate() {
println!("{:0>4}: {:0>8b}", idx, b);
}
test_parse!(parse_query, src).unwrap();
}
#[test]
fn escaped_params_backtick() {
test_parse!(
parse_query,
r#"LET $`R-_fYU8Wa31kg7tz0JI6Kme` = 5;
RETURN $`R-_fYU8Wa31kg7tz0JI6Kme`"#
)
.unwrap();
}