Fix params not supporting delimiters (#4411)
This commit is contained in:
parent
4181367b98
commit
ee8e6f00d7
5 changed files with 66 additions and 12 deletions
|
@ -6,6 +6,8 @@ use crate::syn::{
|
|||
token::{t, DatetimeChars, Token, TokenKind},
|
||||
};
|
||||
|
||||
use super::CharError;
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Eats a single line comment.
|
||||
pub fn eat_single_line_comment(&mut self) {
|
||||
|
@ -312,13 +314,28 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
_ => t!(":"),
|
||||
},
|
||||
b'$' => {
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic() || x == b'_').unwrap_or(false)
|
||||
{
|
||||
return self.lex_param();
|
||||
b'$' => match self.reader.peek() {
|
||||
Some(b'_') => return self.lex_param(),
|
||||
Some(b'`') => {
|
||||
self.reader.next();
|
||||
return self.lex_surrounded_param(true);
|
||||
}
|
||||
t!("$")
|
||||
}
|
||||
Some(x) if x.is_ascii_alphabetic() => return self.lex_param(),
|
||||
Some(x) if !x.is_ascii() => {
|
||||
let backup = self.reader.offset();
|
||||
self.reader.next();
|
||||
match self.reader.complete_char(x) {
|
||||
Ok('⟨') => return self.lex_surrounded_param(false),
|
||||
Err(CharError::Eof) => return self.invalid_token(Error::InvalidUtf8),
|
||||
Err(CharError::Unicode) => return self.invalid_token(Error::InvalidUtf8),
|
||||
_ => {
|
||||
self.reader.backup(backup);
|
||||
t!("$")
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => t!("$"),
|
||||
},
|
||||
b'#' => {
|
||||
self.eat_single_line_comment();
|
||||
TokenKind::WhiteSpace
|
||||
|
|
|
@ -33,6 +33,17 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn lex_surrounded_param(&mut self, is_backtick: bool) -> Token {
|
||||
debug_assert_eq!(self.scratch, "");
|
||||
match self.lex_surrounded_ident_err(is_backtick) {
|
||||
Ok(_) => self.finish_token(TokenKind::Parameter),
|
||||
Err(e) => {
|
||||
self.scratch.clear();
|
||||
self.invalid_token(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex an not surrounded identifier in the form of `[a-zA-Z0-9_]*`
|
||||
///
|
||||
/// The start byte should already a valid byte of the identifier.
|
||||
|
@ -80,7 +91,7 @@ impl<'a> Lexer<'a> {
|
|||
/// Lex an ident which is surround by delimiters.
|
||||
pub fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token {
|
||||
match self.lex_surrounded_ident_err(is_backtick) {
|
||||
Ok(x) => x,
|
||||
Ok(_) => self.finish_token(TokenKind::Identifier),
|
||||
Err(e) => {
|
||||
self.scratch.clear();
|
||||
self.invalid_token(e)
|
||||
|
@ -89,7 +100,7 @@ impl<'a> Lexer<'a> {
|
|||
}
|
||||
|
||||
/// Lex an ident surrounded either by `⟨⟩` or `\`\``
|
||||
pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<Token, Error> {
|
||||
pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<(), Error> {
|
||||
loop {
|
||||
let Some(x) = self.reader.next() else {
|
||||
let end_char = if is_backtick {
|
||||
|
@ -103,7 +114,7 @@ impl<'a> Lexer<'a> {
|
|||
match x {
|
||||
b'`' if is_backtick => {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Identifier));
|
||||
return Ok(());
|
||||
}
|
||||
b'\0' => {
|
||||
// null bytes not allowed
|
||||
|
@ -162,7 +173,7 @@ impl<'a> Lexer<'a> {
|
|||
let c = self.reader.complete_char(x)?;
|
||||
if !is_backtick && c == '⟩' {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Identifier));
|
||||
return Ok(());
|
||||
}
|
||||
self.scratch.push(c);
|
||||
}
|
||||
|
|
|
@ -76,10 +76,12 @@ impl<'a> BytesReader<'a> {
|
|||
pub fn peek(&self) -> Option<u8> {
|
||||
self.remaining().first().copied()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn span(&self, span: Span) -> &'a [u8] {
|
||||
&self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn next_continue_byte(&mut self) -> Result<u8, CharError> {
|
||||
const CONTINUE_BYTE_PREFIX_MASK: u8 = 0b1100_0000;
|
||||
|
@ -87,7 +89,7 @@ impl<'a> BytesReader<'a> {
|
|||
|
||||
let byte = self.next().ok_or(CharError::Eof)?;
|
||||
if byte & CONTINUE_BYTE_PREFIX_MASK != 0b1000_0000 {
|
||||
return Err(CharError::Eof);
|
||||
return Err(CharError::Unicode);
|
||||
}
|
||||
|
||||
Ok(byte & CONTINUE_BYTE_MASK)
|
||||
|
|
|
@ -140,7 +140,7 @@ macro_rules! expected_whitespace {
|
|||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! test_parse {
|
||||
($func:ident$( ( $($e:expr),* $(,)? ))? , $t:literal) => {{
|
||||
($func:ident$( ( $($e:expr),* $(,)? ))? , $t:expr) => {{
|
||||
let mut parser = $crate::syn::parser::Parser::new($t.as_bytes());
|
||||
let mut stack = reblessive::Stack::new();
|
||||
stack.enter(|ctx| parser.$func(ctx,$($($e),*)*)).finish()
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
use nom::AsBytes;
|
||||
|
||||
use crate::{sql, syn::parser::mac::test_parse};
|
||||
|
||||
mod limit;
|
||||
|
@ -11,3 +13,25 @@ fn multiple_semicolons() {
|
|||
let expected = sql::Query(sql::Statements(vec![]));
|
||||
assert_eq!(res, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escaped_params() {
|
||||
let src = r#"LET $⟨R-_fYU8Wa31kg7tz0JI6Kme⟩ = 5;
|
||||
RETURN $⟨R-_fYU8Wa31kg7tz0JI6Kme⟩"#;
|
||||
|
||||
for (idx, b) in src.as_bytes().iter().enumerate() {
|
||||
println!("{:0>4}: {:0>8b}", idx, b);
|
||||
}
|
||||
|
||||
test_parse!(parse_query, src).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escaped_params_backtick() {
|
||||
test_parse!(
|
||||
parse_query,
|
||||
r#"LET $`R-_fYU8Wa31kg7tz0JI6Kme` = 5;
|
||||
RETURN $`R-_fYU8Wa31kg7tz0JI6Kme`"#
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue