Fix params not supporting delimiters (#4411)
This commit is contained in:
parent
4181367b98
commit
ee8e6f00d7
5 changed files with 66 additions and 12 deletions
|
@ -6,6 +6,8 @@ use crate::syn::{
|
||||||
token::{t, DatetimeChars, Token, TokenKind},
|
token::{t, DatetimeChars, Token, TokenKind},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use super::CharError;
|
||||||
|
|
||||||
impl<'a> Lexer<'a> {
|
impl<'a> Lexer<'a> {
|
||||||
/// Eats a single line comment.
|
/// Eats a single line comment.
|
||||||
pub fn eat_single_line_comment(&mut self) {
|
pub fn eat_single_line_comment(&mut self) {
|
||||||
|
@ -312,13 +314,28 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
_ => t!(":"),
|
_ => t!(":"),
|
||||||
},
|
},
|
||||||
b'$' => {
|
b'$' => match self.reader.peek() {
|
||||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic() || x == b'_').unwrap_or(false)
|
Some(b'_') => return self.lex_param(),
|
||||||
{
|
Some(b'`') => {
|
||||||
return self.lex_param();
|
self.reader.next();
|
||||||
|
return self.lex_surrounded_param(true);
|
||||||
}
|
}
|
||||||
t!("$")
|
Some(x) if x.is_ascii_alphabetic() => return self.lex_param(),
|
||||||
}
|
Some(x) if !x.is_ascii() => {
|
||||||
|
let backup = self.reader.offset();
|
||||||
|
self.reader.next();
|
||||||
|
match self.reader.complete_char(x) {
|
||||||
|
Ok('⟨') => return self.lex_surrounded_param(false),
|
||||||
|
Err(CharError::Eof) => return self.invalid_token(Error::InvalidUtf8),
|
||||||
|
Err(CharError::Unicode) => return self.invalid_token(Error::InvalidUtf8),
|
||||||
|
_ => {
|
||||||
|
self.reader.backup(backup);
|
||||||
|
t!("$")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => t!("$"),
|
||||||
|
},
|
||||||
b'#' => {
|
b'#' => {
|
||||||
self.eat_single_line_comment();
|
self.eat_single_line_comment();
|
||||||
TokenKind::WhiteSpace
|
TokenKind::WhiteSpace
|
||||||
|
|
|
@ -33,6 +33,17 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn lex_surrounded_param(&mut self, is_backtick: bool) -> Token {
|
||||||
|
debug_assert_eq!(self.scratch, "");
|
||||||
|
match self.lex_surrounded_ident_err(is_backtick) {
|
||||||
|
Ok(_) => self.finish_token(TokenKind::Parameter),
|
||||||
|
Err(e) => {
|
||||||
|
self.scratch.clear();
|
||||||
|
self.invalid_token(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Lex an not surrounded identifier in the form of `[a-zA-Z0-9_]*`
|
/// Lex an not surrounded identifier in the form of `[a-zA-Z0-9_]*`
|
||||||
///
|
///
|
||||||
/// The start byte should already a valid byte of the identifier.
|
/// The start byte should already a valid byte of the identifier.
|
||||||
|
@ -80,7 +91,7 @@ impl<'a> Lexer<'a> {
|
||||||
/// Lex an ident which is surround by delimiters.
|
/// Lex an ident which is surround by delimiters.
|
||||||
pub fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token {
|
pub fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token {
|
||||||
match self.lex_surrounded_ident_err(is_backtick) {
|
match self.lex_surrounded_ident_err(is_backtick) {
|
||||||
Ok(x) => x,
|
Ok(_) => self.finish_token(TokenKind::Identifier),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
self.scratch.clear();
|
self.scratch.clear();
|
||||||
self.invalid_token(e)
|
self.invalid_token(e)
|
||||||
|
@ -89,7 +100,7 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lex an ident surrounded either by `⟨⟩` or `\`\``
|
/// Lex an ident surrounded either by `⟨⟩` or `\`\``
|
||||||
pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<Token, Error> {
|
pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<(), Error> {
|
||||||
loop {
|
loop {
|
||||||
let Some(x) = self.reader.next() else {
|
let Some(x) = self.reader.next() else {
|
||||||
let end_char = if is_backtick {
|
let end_char = if is_backtick {
|
||||||
|
@ -103,7 +114,7 @@ impl<'a> Lexer<'a> {
|
||||||
match x {
|
match x {
|
||||||
b'`' if is_backtick => {
|
b'`' if is_backtick => {
|
||||||
self.string = Some(mem::take(&mut self.scratch));
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
return Ok(self.finish_token(TokenKind::Identifier));
|
return Ok(());
|
||||||
}
|
}
|
||||||
b'\0' => {
|
b'\0' => {
|
||||||
// null bytes not allowed
|
// null bytes not allowed
|
||||||
|
@ -162,7 +173,7 @@ impl<'a> Lexer<'a> {
|
||||||
let c = self.reader.complete_char(x)?;
|
let c = self.reader.complete_char(x)?;
|
||||||
if !is_backtick && c == '⟩' {
|
if !is_backtick && c == '⟩' {
|
||||||
self.string = Some(mem::take(&mut self.scratch));
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
return Ok(self.finish_token(TokenKind::Identifier));
|
return Ok(());
|
||||||
}
|
}
|
||||||
self.scratch.push(c);
|
self.scratch.push(c);
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,10 +76,12 @@ impl<'a> BytesReader<'a> {
|
||||||
pub fn peek(&self) -> Option<u8> {
|
pub fn peek(&self) -> Option<u8> {
|
||||||
self.remaining().first().copied()
|
self.remaining().first().copied()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn span(&self, span: Span) -> &'a [u8] {
|
pub fn span(&self, span: Span) -> &'a [u8] {
|
||||||
&self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)]
|
&self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)]
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn next_continue_byte(&mut self) -> Result<u8, CharError> {
|
pub fn next_continue_byte(&mut self) -> Result<u8, CharError> {
|
||||||
const CONTINUE_BYTE_PREFIX_MASK: u8 = 0b1100_0000;
|
const CONTINUE_BYTE_PREFIX_MASK: u8 = 0b1100_0000;
|
||||||
|
@ -87,7 +89,7 @@ impl<'a> BytesReader<'a> {
|
||||||
|
|
||||||
let byte = self.next().ok_or(CharError::Eof)?;
|
let byte = self.next().ok_or(CharError::Eof)?;
|
||||||
if byte & CONTINUE_BYTE_PREFIX_MASK != 0b1000_0000 {
|
if byte & CONTINUE_BYTE_PREFIX_MASK != 0b1000_0000 {
|
||||||
return Err(CharError::Eof);
|
return Err(CharError::Unicode);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(byte & CONTINUE_BYTE_MASK)
|
Ok(byte & CONTINUE_BYTE_MASK)
|
||||||
|
|
|
@ -140,7 +140,7 @@ macro_rules! expected_whitespace {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! test_parse {
|
macro_rules! test_parse {
|
||||||
($func:ident$( ( $($e:expr),* $(,)? ))? , $t:literal) => {{
|
($func:ident$( ( $($e:expr),* $(,)? ))? , $t:expr) => {{
|
||||||
let mut parser = $crate::syn::parser::Parser::new($t.as_bytes());
|
let mut parser = $crate::syn::parser::Parser::new($t.as_bytes());
|
||||||
let mut stack = reblessive::Stack::new();
|
let mut stack = reblessive::Stack::new();
|
||||||
stack.enter(|ctx| parser.$func(ctx,$($($e),*)*)).finish()
|
stack.enter(|ctx| parser.$func(ctx,$($($e),*)*)).finish()
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
use nom::AsBytes;
|
||||||
|
|
||||||
use crate::{sql, syn::parser::mac::test_parse};
|
use crate::{sql, syn::parser::mac::test_parse};
|
||||||
|
|
||||||
mod limit;
|
mod limit;
|
||||||
|
@ -11,3 +13,25 @@ fn multiple_semicolons() {
|
||||||
let expected = sql::Query(sql::Statements(vec![]));
|
let expected = sql::Query(sql::Statements(vec![]));
|
||||||
assert_eq!(res, expected);
|
assert_eq!(res, expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn escaped_params() {
|
||||||
|
let src = r#"LET $⟨R-_fYU8Wa31kg7tz0JI6Kme⟩ = 5;
|
||||||
|
RETURN $⟨R-_fYU8Wa31kg7tz0JI6Kme⟩"#;
|
||||||
|
|
||||||
|
for (idx, b) in src.as_bytes().iter().enumerate() {
|
||||||
|
println!("{:0>4}: {:0>8b}", idx, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
test_parse!(parse_query, src).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn escaped_params_backtick() {
|
||||||
|
test_parse!(
|
||||||
|
parse_query,
|
||||||
|
r#"LET $`R-_fYU8Wa31kg7tz0JI6Kme` = 5;
|
||||||
|
RETURN $`R-_fYU8Wa31kg7tz0JI6Kme`"#
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue