From 1e5bd504b24801dc74a8d83fc42fbd9c2c6238b8 Mon Sep 17 00:00:00 2001 From: Mees Delzenne Date: Mon, 19 Feb 2024 16:07:39 +0100 Subject: [PATCH] Feature: some more error message improvements for the new parser. (#3416) --- core/src/sql/v1/value/value.rs | 2 +- core/src/syn/common.rs | 7 +- core/src/syn/v2/lexer/byte.rs | 3 +- core/src/syn/v2/lexer/ident.rs | 8 +- core/src/syn/v2/lexer/keywords.rs | 544 +++++++++++++------------- core/src/syn/v2/parser/basic.rs | 40 +- core/src/syn/v2/parser/error.rs | 112 ++++-- core/src/syn/v2/parser/expression.rs | 22 +- core/src/syn/v2/parser/mac.rs | 8 +- core/src/syn/v2/parser/mod.rs | 30 +- core/src/syn/v2/parser/prime.rs | 148 +++++-- core/src/syn/v2/parser/stmt/mod.rs | 39 +- core/src/syn/v2/parser/stmt/parts.rs | 128 ++++-- core/src/syn/v2/parser/stmt/select.rs | 98 +++-- core/src/syn/v2/parser/thing.rs | 23 +- core/src/syn/v2/token/mod.rs | 138 ++++--- 16 files changed, 856 insertions(+), 494 deletions(-) diff --git a/core/src/sql/v1/value/value.rs b/core/src/sql/v1/value/value.rs index 29b2e867..60babbfb 100644 --- a/core/src/sql/v1/value/value.rs +++ b/core/src/sql/v1/value/value.rs @@ -2861,7 +2861,7 @@ mod tests { assert_eq!(12, std::mem::size_of::()); assert_eq!(24, std::mem::size_of::()); assert_eq!(24, std::mem::size_of::()); - assert!(56 >= std::mem::size_of::()); + assert_eq!(56, std::mem::size_of::()); assert_eq!(24, std::mem::size_of::()); assert_eq!(24, std::mem::size_of::()); assert_eq!(24, std::mem::size_of::()); diff --git a/core/src/syn/common.rs b/core/src/syn/common.rs index e25e79b0..2e3a1601 100644 --- a/core/src/syn/common.rs +++ b/core/src/syn/common.rs @@ -99,13 +99,13 @@ impl Location { // Bytes of input prior to line being iteratated. let mut bytes_prior = 0; - let mut iterator = LineIterator::new(source).enumerate(); + let mut iterator = LineIterator::new(source).enumerate().peekable(); let start = loop { - let Some((line_idx, (line, seperator_offset))) = iterator.next() else { + let Some((line_idx, (line, seperator_offset))) = iterator.peek() else { panic!("tried to find location of span not belonging to string"); }; let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize; - if bytes_so_far >= offset { + if bytes_so_far > offset { // found line. let line_offset = offset - bytes_prior; let column = if line_offset > line.len() { @@ -133,6 +133,7 @@ impl Location { } } bytes_prior = bytes_so_far; + iterator.next(); }; loop { diff --git a/core/src/syn/v2/lexer/byte.rs b/core/src/syn/v2/lexer/byte.rs index c45da866..affcab6d 100644 --- a/core/src/syn/v2/lexer/byte.rs +++ b/core/src/syn/v2/lexer/byte.rs @@ -52,10 +52,11 @@ impl<'a> Lexer<'a> { return Err(Error::UnexpectedEof); }; if let b'*' = byte { - let Some(byte) = self.reader.next() else { + let Some(byte) = self.reader.peek() else { return Err(Error::UnexpectedEof); }; if b'/' == byte { + self.reader.next(); self.set_whitespace_span(self.current_span()); self.skip_offset(); return Ok(()); diff --git a/core/src/syn/v2/lexer/ident.rs b/core/src/syn/v2/lexer/ident.rs index 665c441b..5b0c35d1 100644 --- a/core/src/syn/v2/lexer/ident.rs +++ b/core/src/syn/v2/lexer/ident.rs @@ -55,9 +55,11 @@ impl<'a> Lexer<'a> { // When finished parsing the identifier, try to match it to an keyword. // If there is one, return it as the keyword. Original identifier can be reconstructed // from the token. - if let Some(x) = KEYWORDS.get(&UniCase::ascii(&self.scratch)).copied().flatten() { - self.scratch.clear(); - return self.finish_token(x); + if let Some(x) = KEYWORDS.get(&UniCase::ascii(&self.scratch)).copied() { + if x != TokenKind::Identifier { + self.scratch.clear(); + return self.finish_token(x); + } } if self.scratch == "NaN" { diff --git a/core/src/syn/v2/lexer/keywords.rs b/core/src/syn/v2/lexer/keywords.rs index e44d2dfb..5d1250cf 100644 --- a/core/src/syn/v2/lexer/keywords.rs +++ b/core/src/syn/v2/lexer/keywords.rs @@ -6,290 +6,290 @@ use phf::phf_map; use unicase::UniCase; /// A map for mapping keyword strings to a tokenkind, -pub(crate) static KEYWORDS: phf::Map, Option> = phf_map! { +pub(crate) static KEYWORDS: phf::Map, TokenKind> = phf_map! { // Keywords - UniCase::ascii("AFTER") => Some(TokenKind::Keyword(Keyword::After)), - UniCase::ascii("ALL") => Some(TokenKind::Keyword(Keyword::All)), - UniCase::ascii("ANALYZE") => Some(TokenKind::Keyword(Keyword::Analyze)), - UniCase::ascii("ANALYZER") => Some(TokenKind::Keyword(Keyword::Analyzer)), - UniCase::ascii("AS") => Some(TokenKind::Keyword(Keyword::As)), - UniCase::ascii("ASCENDING") => Some(TokenKind::Keyword(Keyword::Ascending)), - UniCase::ascii("ASC") => Some(TokenKind::Keyword(Keyword::Ascending)), - UniCase::ascii("ASCII") => Some(TokenKind::Keyword(Keyword::Ascii)), - UniCase::ascii("ASSERT") => Some(TokenKind::Keyword(Keyword::Assert)), - UniCase::ascii("AT") => Some(TokenKind::Keyword(Keyword::At)), - UniCase::ascii("BEFORE") => Some(TokenKind::Keyword(Keyword::Before)), - UniCase::ascii("BEGIN") => Some(TokenKind::Keyword(Keyword::Begin)), - UniCase::ascii("BLANK") => Some(TokenKind::Keyword(Keyword::Blank)), - UniCase::ascii("BM25") => Some(TokenKind::Keyword(Keyword::Bm25)), - UniCase::ascii("BREAK") => Some(TokenKind::Keyword(Keyword::Break)), - UniCase::ascii("BY") => Some(TokenKind::Keyword(Keyword::By)), - UniCase::ascii("CAMEL") => Some(TokenKind::Keyword(Keyword::Camel)), - UniCase::ascii("CANCEL") => Some(TokenKind::Keyword(Keyword::Cancel)), - UniCase::ascii("CHANGEFEED") => Some(TokenKind::Keyword(Keyword::ChangeFeed)), - UniCase::ascii("CHANGES") => Some(TokenKind::Keyword(Keyword::Changes)), - UniCase::ascii("CAPACITY") => Some(TokenKind::Keyword(Keyword::Capacity)), - UniCase::ascii("CLASS") => Some(TokenKind::Keyword(Keyword::Class)), - UniCase::ascii("COMMENT") => Some(TokenKind::Keyword(Keyword::Comment)), - UniCase::ascii("COMMIT") => Some(TokenKind::Keyword(Keyword::Commit)), - UniCase::ascii("CONTENT") => Some(TokenKind::Keyword(Keyword::Content)), - UniCase::ascii("CONTINUE") => Some(TokenKind::Keyword(Keyword::Continue)), - UniCase::ascii("CREATE") => Some(TokenKind::Keyword(Keyword::Create)), - UniCase::ascii("DATABASE") => Some(TokenKind::Keyword(Keyword::Database)), - UniCase::ascii("DB") => Some(TokenKind::Keyword(Keyword::Database)), - UniCase::ascii("DEFAULT") => Some(TokenKind::Keyword(Keyword::Default)), - UniCase::ascii("DEFINE") => Some(TokenKind::Keyword(Keyword::Define)), - UniCase::ascii("DELETE") => Some(TokenKind::Keyword(Keyword::Delete)), - UniCase::ascii("DESCENDING") => Some(TokenKind::Keyword(Keyword::Descending)), - UniCase::ascii("DESC") => Some(TokenKind::Keyword(Keyword::Descending)), - UniCase::ascii("DIFF") => Some(TokenKind::Keyword(Keyword::Diff)), - UniCase::ascii("DIMENSION") => Some(TokenKind::Keyword(Keyword::Dimension)), - UniCase::ascii("DISTANCE") => Some(TokenKind::Keyword(Keyword::Distance)), - UniCase::ascii("DIST") => Some(TokenKind::Keyword(Keyword::Distance)), - UniCase::ascii("DOC_IDS_CACHE") => Some(TokenKind::Keyword(Keyword::DocIdsCache)), - UniCase::ascii("DOC_IDS_ORDER") => Some(TokenKind::Keyword(Keyword::DocIdsOrder)), - UniCase::ascii("DOC_LENGTHS_CACHE") => Some(TokenKind::Keyword(Keyword::DocLengthsCache)), - UniCase::ascii("DOC_LENGTHS_ORDER") => Some(TokenKind::Keyword(Keyword::DocLengthsOrder)), - UniCase::ascii("DROP") => Some(TokenKind::Keyword(Keyword::Drop)), - UniCase::ascii("DUPLICATE") => Some(TokenKind::Keyword(Keyword::Duplicate)), - UniCase::ascii("EDGENGRAM") => Some(TokenKind::Keyword(Keyword::Edgengram)), - UniCase::ascii("EVENT") => Some(TokenKind::Keyword(Keyword::Event)), - UniCase::ascii("ELSE") => Some(TokenKind::Keyword(Keyword::Else)), - UniCase::ascii("END") => Some(TokenKind::Keyword(Keyword::End)), - UniCase::ascii("EXISTS") => Some(TokenKind::Keyword(Keyword::Exists)), - UniCase::ascii("EXPLAIN") => Some(TokenKind::Keyword(Keyword::Explain)), - UniCase::ascii("false") => Some(TokenKind::Keyword(Keyword::False)), - UniCase::ascii("FETCH") => Some(TokenKind::Keyword(Keyword::Fetch)), - UniCase::ascii("FIELD") => Some(TokenKind::Keyword(Keyword::Field)), - UniCase::ascii("FIELDS") => Some(TokenKind::Keyword(Keyword::Fields)), - UniCase::ascii("COLUMNS") => Some(TokenKind::Keyword(Keyword::Fields)), - UniCase::ascii("FILTERS") => Some(TokenKind::Keyword(Keyword::Filters)), - UniCase::ascii("FLEXIBLE") => Some(TokenKind::Keyword(Keyword::Flexible)), - UniCase::ascii("FLEXI") => Some(TokenKind::Keyword(Keyword::Flexible)), - UniCase::ascii("FLEX") => Some(TokenKind::Keyword(Keyword::Flexible)), - UniCase::ascii("FOR") => Some(TokenKind::Keyword(Keyword::For)), - UniCase::ascii("FROM") => Some(TokenKind::Keyword(Keyword::From)), - UniCase::ascii("FULL") => Some(TokenKind::Keyword(Keyword::Full)), - UniCase::ascii("FUNCTION") => Some(TokenKind::Keyword(Keyword::Function)), - UniCase::ascii("GROUP") => Some(TokenKind::Keyword(Keyword::Group)), - UniCase::ascii("HIGHLIGHTS") => Some(TokenKind::Keyword(Keyword::Highlights)), - UniCase::ascii("IGNORE") => Some(TokenKind::Keyword(Keyword::Ignore)), - UniCase::ascii("INDEX") => Some(TokenKind::Keyword(Keyword::Index)), - UniCase::ascii("INFO") => Some(TokenKind::Keyword(Keyword::Info)), - UniCase::ascii("INSERT") => Some(TokenKind::Keyword(Keyword::Insert)), - UniCase::ascii("INTO") => Some(TokenKind::Keyword(Keyword::Into)), - UniCase::ascii("IF") => Some(TokenKind::Keyword(Keyword::If)), - UniCase::ascii("IS") => Some(TokenKind::Keyword(Keyword::Is)), - UniCase::ascii("KEY") => Some(TokenKind::Keyword(Keyword::Key)), - UniCase::ascii("KILL") => Some(TokenKind::Keyword(Keyword::Kill)), - UniCase::ascii("KNN") => Some(TokenKind::Keyword(Keyword::Knn)), - UniCase::ascii("LET") => Some(TokenKind::Keyword(Keyword::Let)), - UniCase::ascii("LIMIT") => Some(TokenKind::Keyword(Keyword::Limit)), - UniCase::ascii("LIVE") => Some(TokenKind::Keyword(Keyword::Live)), - UniCase::ascii("LOWERCASE") => Some(TokenKind::Keyword(Keyword::Lowercase)), - UniCase::ascii("MERGE") => Some(TokenKind::Keyword(Keyword::Merge)), - UniCase::ascii("MODEL") => Some(TokenKind::Keyword(Keyword::Model)), - UniCase::ascii("MTREE") => Some(TokenKind::Keyword(Keyword::MTree)), - UniCase::ascii("MTREE_CACHE") => Some(TokenKind::Keyword(Keyword::MTreeCache)), - UniCase::ascii("NAMESPACE") => Some(TokenKind::Keyword(Keyword::Namespace)), - UniCase::ascii("NS") => Some(TokenKind::Keyword(Keyword::Namespace)), - UniCase::ascii("NGRAM") => Some(TokenKind::Keyword(Keyword::Ngram)), - UniCase::ascii("NO") => Some(TokenKind::Keyword(Keyword::No)), - UniCase::ascii("NOINDEX") => Some(TokenKind::Keyword(Keyword::NoIndex)), - UniCase::ascii("NONE") => Some(TokenKind::Keyword(Keyword::None)), - UniCase::ascii("NULL") => Some(TokenKind::Keyword(Keyword::Null)), - UniCase::ascii("NUMERIC") => Some(TokenKind::Keyword(Keyword::Numeric)), - UniCase::ascii("OMIT") => Some(TokenKind::Keyword(Keyword::Omit)), - UniCase::ascii("ON") => Some(TokenKind::Keyword(Keyword::On)), - UniCase::ascii("ONLY") => Some(TokenKind::Keyword(Keyword::Only)), - UniCase::ascii("OPTION") => Some(TokenKind::Keyword(Keyword::Option)), - UniCase::ascii("ORDER") => Some(TokenKind::Keyword(Keyword::Order)), - UniCase::ascii("PARALLEL") => Some(TokenKind::Keyword(Keyword::Parallel)), - UniCase::ascii("PARAM") => Some(TokenKind::Keyword(Keyword::Param)), - UniCase::ascii("PASSHASH") => Some(TokenKind::Keyword(Keyword::Passhash)), - UniCase::ascii("PASSWORD") => Some(TokenKind::Keyword(Keyword::Password)), - UniCase::ascii("PATCH") => Some(TokenKind::Keyword(Keyword::Patch)), - UniCase::ascii("PERMISSIONS") => Some(TokenKind::Keyword(Keyword::Permissions)), - UniCase::ascii("POSTINGS_CACHE") => Some(TokenKind::Keyword(Keyword::PostingsCache)), - UniCase::ascii("POSTINGS_ORDER") => Some(TokenKind::Keyword(Keyword::PostingsOrder)), - UniCase::ascii("PUNCT") => Some(TokenKind::Keyword(Keyword::Punct)), - UniCase::ascii("READONLY") => Some(TokenKind::Keyword(Keyword::Readonly)), - UniCase::ascii("RELATE") => Some(TokenKind::Keyword(Keyword::Relate)), - UniCase::ascii("REMOVE") => Some(TokenKind::Keyword(Keyword::Remove)), - UniCase::ascii("REPLACE") => Some(TokenKind::Keyword(Keyword::Replace)), - UniCase::ascii("RETURN") => Some(TokenKind::Keyword(Keyword::Return)), - UniCase::ascii("ROLES") => Some(TokenKind::Keyword(Keyword::Roles)), - UniCase::ascii("ROOT") => Some(TokenKind::Keyword(Keyword::Root)), - UniCase::ascii("KV") => Some(TokenKind::Keyword(Keyword::Root)), - UniCase::ascii("SCHEMAFULL") => Some(TokenKind::Keyword(Keyword::Schemafull)), - UniCase::ascii("SCHEMAFUL") => Some(TokenKind::Keyword(Keyword::Schemafull)), - UniCase::ascii("SCHEMALESS") => Some(TokenKind::Keyword(Keyword::Schemaless)), - UniCase::ascii("SCOPE") => Some(TokenKind::Keyword(Keyword::Scope)), - UniCase::ascii("SC") => Some(TokenKind::Keyword(Keyword::Scope)), - UniCase::ascii("SEARCH") => Some(TokenKind::Keyword(Keyword::Search)), - UniCase::ascii("SELECT") => Some(TokenKind::Keyword(Keyword::Select)), - UniCase::ascii("SESSION") => Some(TokenKind::Keyword(Keyword::Session)), - UniCase::ascii("SET") => Some(TokenKind::Keyword(Keyword::Set)), - UniCase::ascii("SHOW") => Some(TokenKind::Keyword(Keyword::Show)), - UniCase::ascii("SIGNIN") => Some(TokenKind::Keyword(Keyword::Signin)), - UniCase::ascii("SIGNUP") => Some(TokenKind::Keyword(Keyword::Signup)), - UniCase::ascii("SINCE") => Some(TokenKind::Keyword(Keyword::Since)), - UniCase::ascii("SLEEP") => Some(TokenKind::Keyword(Keyword::Sleep)), - UniCase::ascii("SNOWBALL") => Some(TokenKind::Keyword(Keyword::Snowball)), - UniCase::ascii("SPLIT") => Some(TokenKind::Keyword(Keyword::Split)), - UniCase::ascii("START") => Some(TokenKind::Keyword(Keyword::Start)), - UniCase::ascii("TABLE") => Some(TokenKind::Keyword(Keyword::Table)), - UniCase::ascii("TB") => Some(TokenKind::Keyword(Keyword::Table)), - UniCase::ascii("TERMS_CACHE") => Some(TokenKind::Keyword(Keyword::TermsCache)), - UniCase::ascii("TERMS_ORDER") => Some(TokenKind::Keyword(Keyword::TermsOrder)), - UniCase::ascii("THEN") => Some(TokenKind::Keyword(Keyword::Then)), - UniCase::ascii("THROW") => Some(TokenKind::Keyword(Keyword::Throw)), - UniCase::ascii("TIMEOUT") => Some(TokenKind::Keyword(Keyword::Timeout)), - UniCase::ascii("TOKENIZERS") => Some(TokenKind::Keyword(Keyword::Tokenizers)), - UniCase::ascii("TOKEN") => Some(TokenKind::Keyword(Keyword::Token)), - UniCase::ascii("TRANSACTION") => Some(TokenKind::Keyword(Keyword::Transaction)), - UniCase::ascii("true") => Some(TokenKind::Keyword(Keyword::True)), - UniCase::ascii("TYPE") => Some(TokenKind::Keyword(Keyword::Type)), - UniCase::ascii("UNIQUE") => Some(TokenKind::Keyword(Keyword::Unique)), - UniCase::ascii("UNSET") => Some(TokenKind::Keyword(Keyword::Unset)), - UniCase::ascii("UPDATE") => Some(TokenKind::Keyword(Keyword::Update)), - UniCase::ascii("UPPERCASE") => Some(TokenKind::Keyword(Keyword::Uppercase)), - UniCase::ascii("USE") => Some(TokenKind::Keyword(Keyword::Use)), - UniCase::ascii("USER") => Some(TokenKind::Keyword(Keyword::User)), - UniCase::ascii("VALUE") => Some(TokenKind::Keyword(Keyword::Value)), - UniCase::ascii("VALUES") => Some(TokenKind::Keyword(Keyword::Values)), - UniCase::ascii("VERSION") => Some(TokenKind::Keyword(Keyword::Version)), - UniCase::ascii("VS") => Some(TokenKind::Keyword(Keyword::Vs)), - UniCase::ascii("WHEN") => Some(TokenKind::Keyword(Keyword::When)), - UniCase::ascii("WHERE") => Some(TokenKind::Keyword(Keyword::Where)), - UniCase::ascii("WITH") => Some(TokenKind::Keyword(Keyword::With)), - UniCase::ascii("ALLINSIDE") => Some(TokenKind::Keyword(Keyword::AllInside)), - UniCase::ascii("ANDKW") => Some(TokenKind::Keyword(Keyword::AndKw)), - UniCase::ascii("ANYINSIDE") => Some(TokenKind::Keyword(Keyword::AnyInside)), - UniCase::ascii("INSIDE") => Some(TokenKind::Keyword(Keyword::Inside)), - UniCase::ascii("INTERSECTS") => Some(TokenKind::Keyword(Keyword::Intersects)), - UniCase::ascii("NONEINSIDE") => Some(TokenKind::Keyword(Keyword::NoneInside)), - UniCase::ascii("NOTINSIDE") => Some(TokenKind::Keyword(Keyword::NotInside)), - UniCase::ascii("OR") => Some(TokenKind::Keyword(Keyword::OrKw)), - UniCase::ascii("OUTSIDE") => Some(TokenKind::Keyword(Keyword::Outside)), - UniCase::ascii("NOT") => Some(TokenKind::Keyword(Keyword::Not)), - UniCase::ascii("AND") => Some(TokenKind::Keyword(Keyword::And)), - UniCase::ascii("COLLATE") => Some(TokenKind::Keyword(Keyword::Collate)), - UniCase::ascii("CONTAINSALL") => Some(TokenKind::Keyword(Keyword::ContainsAll)), - UniCase::ascii("CONTAINSANY") => Some(TokenKind::Keyword(Keyword::ContainsAny)), - UniCase::ascii("CONTAINSNONE") => Some(TokenKind::Keyword(Keyword::ContainsNone)), - UniCase::ascii("CONTAINSNOT") => Some(TokenKind::Keyword(Keyword::ContainsNot)), - UniCase::ascii("CONTAINS") => Some(TokenKind::Keyword(Keyword::Contains)), - UniCase::ascii("IN") => Some(TokenKind::Keyword(Keyword::In)), + UniCase::ascii("AFTER") => TokenKind::Keyword(Keyword::After), + UniCase::ascii("ALL") => TokenKind::Keyword(Keyword::All), + UniCase::ascii("ANALYZE") => TokenKind::Keyword(Keyword::Analyze), + UniCase::ascii("ANALYZER") => TokenKind::Keyword(Keyword::Analyzer), + UniCase::ascii("AS") => TokenKind::Keyword(Keyword::As), + UniCase::ascii("ASCENDING") => TokenKind::Keyword(Keyword::Ascending), + UniCase::ascii("ASC") => TokenKind::Keyword(Keyword::Ascending), + UniCase::ascii("ASCII") => TokenKind::Keyword(Keyword::Ascii), + UniCase::ascii("ASSERT") => TokenKind::Keyword(Keyword::Assert), + UniCase::ascii("AT") => TokenKind::Keyword(Keyword::At), + UniCase::ascii("BEFORE") => TokenKind::Keyword(Keyword::Before), + UniCase::ascii("BEGIN") => TokenKind::Keyword(Keyword::Begin), + UniCase::ascii("BLANK") => TokenKind::Keyword(Keyword::Blank), + UniCase::ascii("BM25") => TokenKind::Keyword(Keyword::Bm25), + UniCase::ascii("BREAK") => TokenKind::Keyword(Keyword::Break), + UniCase::ascii("BY") => TokenKind::Keyword(Keyword::By), + UniCase::ascii("CAMEL") => TokenKind::Keyword(Keyword::Camel), + UniCase::ascii("CANCEL") => TokenKind::Keyword(Keyword::Cancel), + UniCase::ascii("CHANGEFEED") => TokenKind::Keyword(Keyword::ChangeFeed), + UniCase::ascii("CHANGES") => TokenKind::Keyword(Keyword::Changes), + UniCase::ascii("CAPACITY") => TokenKind::Keyword(Keyword::Capacity), + UniCase::ascii("CLASS") => TokenKind::Keyword(Keyword::Class), + UniCase::ascii("COMMENT") => TokenKind::Keyword(Keyword::Comment), + UniCase::ascii("COMMIT") => TokenKind::Keyword(Keyword::Commit), + UniCase::ascii("CONTENT") => TokenKind::Keyword(Keyword::Content), + UniCase::ascii("CONTINUE") => TokenKind::Keyword(Keyword::Continue), + UniCase::ascii("CREATE") => TokenKind::Keyword(Keyword::Create), + UniCase::ascii("DATABASE") => TokenKind::Keyword(Keyword::Database), + UniCase::ascii("DB") => TokenKind::Keyword(Keyword::Database), + UniCase::ascii("DEFAULT") => TokenKind::Keyword(Keyword::Default), + UniCase::ascii("DEFINE") => TokenKind::Keyword(Keyword::Define), + UniCase::ascii("DELETE") => TokenKind::Keyword(Keyword::Delete), + UniCase::ascii("DESCENDING") => TokenKind::Keyword(Keyword::Descending), + UniCase::ascii("DESC") => TokenKind::Keyword(Keyword::Descending), + UniCase::ascii("DIFF") => TokenKind::Keyword(Keyword::Diff), + UniCase::ascii("DIMENSION") => TokenKind::Keyword(Keyword::Dimension), + UniCase::ascii("DISTANCE") => TokenKind::Keyword(Keyword::Distance), + UniCase::ascii("DIST") => TokenKind::Keyword(Keyword::Distance), + UniCase::ascii("DOC_IDS_CACHE") => TokenKind::Keyword(Keyword::DocIdsCache), + UniCase::ascii("DOC_IDS_ORDER") => TokenKind::Keyword(Keyword::DocIdsOrder), + UniCase::ascii("DOC_LENGTHS_CACHE") => TokenKind::Keyword(Keyword::DocLengthsCache), + UniCase::ascii("DOC_LENGTHS_ORDER") => TokenKind::Keyword(Keyword::DocLengthsOrder), + UniCase::ascii("DROP") => TokenKind::Keyword(Keyword::Drop), + UniCase::ascii("DUPLICATE") => TokenKind::Keyword(Keyword::Duplicate), + UniCase::ascii("EDGENGRAM") => TokenKind::Keyword(Keyword::Edgengram), + UniCase::ascii("EVENT") => TokenKind::Keyword(Keyword::Event), + UniCase::ascii("ELSE") => TokenKind::Keyword(Keyword::Else), + UniCase::ascii("END") => TokenKind::Keyword(Keyword::End), + UniCase::ascii("EXISTS") => TokenKind::Keyword(Keyword::Exists), + UniCase::ascii("EXPLAIN") => TokenKind::Keyword(Keyword::Explain), + UniCase::ascii("false") => TokenKind::Keyword(Keyword::False), + UniCase::ascii("FETCH") => TokenKind::Keyword(Keyword::Fetch), + UniCase::ascii("FIELD") => TokenKind::Keyword(Keyword::Field), + UniCase::ascii("FIELDS") => TokenKind::Keyword(Keyword::Fields), + UniCase::ascii("COLUMNS") => TokenKind::Keyword(Keyword::Fields), + UniCase::ascii("FILTERS") => TokenKind::Keyword(Keyword::Filters), + UniCase::ascii("FLEXIBLE") => TokenKind::Keyword(Keyword::Flexible), + UniCase::ascii("FLEXI") => TokenKind::Keyword(Keyword::Flexible), + UniCase::ascii("FLEX") => TokenKind::Keyword(Keyword::Flexible), + UniCase::ascii("FOR") => TokenKind::Keyword(Keyword::For), + UniCase::ascii("FROM") => TokenKind::Keyword(Keyword::From), + UniCase::ascii("FULL") => TokenKind::Keyword(Keyword::Full), + UniCase::ascii("FUNCTION") => TokenKind::Keyword(Keyword::Function), + UniCase::ascii("GROUP") => TokenKind::Keyword(Keyword::Group), + UniCase::ascii("HIGHLIGHTS") => TokenKind::Keyword(Keyword::Highlights), + UniCase::ascii("IGNORE") => TokenKind::Keyword(Keyword::Ignore), + UniCase::ascii("INDEX") => TokenKind::Keyword(Keyword::Index), + UniCase::ascii("INFO") => TokenKind::Keyword(Keyword::Info), + UniCase::ascii("INSERT") => TokenKind::Keyword(Keyword::Insert), + UniCase::ascii("INTO") => TokenKind::Keyword(Keyword::Into), + UniCase::ascii("IF") => TokenKind::Keyword(Keyword::If), + UniCase::ascii("IS") => TokenKind::Keyword(Keyword::Is), + UniCase::ascii("KEY") => TokenKind::Keyword(Keyword::Key), + UniCase::ascii("KILL") => TokenKind::Keyword(Keyword::Kill), + UniCase::ascii("KNN") => TokenKind::Keyword(Keyword::Knn), + UniCase::ascii("LET") => TokenKind::Keyword(Keyword::Let), + UniCase::ascii("LIMIT") => TokenKind::Keyword(Keyword::Limit), + UniCase::ascii("LIVE") => TokenKind::Keyword(Keyword::Live), + UniCase::ascii("LOWERCASE") => TokenKind::Keyword(Keyword::Lowercase), + UniCase::ascii("MERGE") => TokenKind::Keyword(Keyword::Merge), + UniCase::ascii("MODEL") => TokenKind::Keyword(Keyword::Model), + UniCase::ascii("MTREE") => TokenKind::Keyword(Keyword::MTree), + UniCase::ascii("MTREE_CACHE") => TokenKind::Keyword(Keyword::MTreeCache), + UniCase::ascii("NAMESPACE") => TokenKind::Keyword(Keyword::Namespace), + UniCase::ascii("NS") => TokenKind::Keyword(Keyword::Namespace), + UniCase::ascii("NGRAM") => TokenKind::Keyword(Keyword::Ngram), + UniCase::ascii("NO") => TokenKind::Keyword(Keyword::No), + UniCase::ascii("NOINDEX") => TokenKind::Keyword(Keyword::NoIndex), + UniCase::ascii("NONE") => TokenKind::Keyword(Keyword::None), + UniCase::ascii("NULL") => TokenKind::Keyword(Keyword::Null), + UniCase::ascii("NUMERIC") => TokenKind::Keyword(Keyword::Numeric), + UniCase::ascii("OMIT") => TokenKind::Keyword(Keyword::Omit), + UniCase::ascii("ON") => TokenKind::Keyword(Keyword::On), + UniCase::ascii("ONLY") => TokenKind::Keyword(Keyword::Only), + UniCase::ascii("OPTION") => TokenKind::Keyword(Keyword::Option), + UniCase::ascii("ORDER") => TokenKind::Keyword(Keyword::Order), + UniCase::ascii("PARALLEL") => TokenKind::Keyword(Keyword::Parallel), + UniCase::ascii("PARAM") => TokenKind::Keyword(Keyword::Param), + UniCase::ascii("PASSHASH") => TokenKind::Keyword(Keyword::Passhash), + UniCase::ascii("PASSWORD") => TokenKind::Keyword(Keyword::Password), + UniCase::ascii("PATCH") => TokenKind::Keyword(Keyword::Patch), + UniCase::ascii("PERMISSIONS") => TokenKind::Keyword(Keyword::Permissions), + UniCase::ascii("POSTINGS_CACHE") => TokenKind::Keyword(Keyword::PostingsCache), + UniCase::ascii("POSTINGS_ORDER") => TokenKind::Keyword(Keyword::PostingsOrder), + UniCase::ascii("PUNCT") => TokenKind::Keyword(Keyword::Punct), + UniCase::ascii("READONLY") => TokenKind::Keyword(Keyword::Readonly), + UniCase::ascii("RELATE") => TokenKind::Keyword(Keyword::Relate), + UniCase::ascii("REMOVE") => TokenKind::Keyword(Keyword::Remove), + UniCase::ascii("REPLACE") => TokenKind::Keyword(Keyword::Replace), + UniCase::ascii("RETURN") => TokenKind::Keyword(Keyword::Return), + UniCase::ascii("ROLES") => TokenKind::Keyword(Keyword::Roles), + UniCase::ascii("ROOT") => TokenKind::Keyword(Keyword::Root), + UniCase::ascii("KV") => TokenKind::Keyword(Keyword::Root), + UniCase::ascii("SCHEMAFULL") => TokenKind::Keyword(Keyword::Schemafull), + UniCase::ascii("SCHEMAFUL") => TokenKind::Keyword(Keyword::Schemafull), + UniCase::ascii("SCHEMALESS") => TokenKind::Keyword(Keyword::Schemaless), + UniCase::ascii("SCOPE") => TokenKind::Keyword(Keyword::Scope), + UniCase::ascii("SC") => TokenKind::Keyword(Keyword::Scope), + UniCase::ascii("SEARCH") => TokenKind::Keyword(Keyword::Search), + UniCase::ascii("SELECT") => TokenKind::Keyword(Keyword::Select), + UniCase::ascii("SESSION") => TokenKind::Keyword(Keyword::Session), + UniCase::ascii("SET") => TokenKind::Keyword(Keyword::Set), + UniCase::ascii("SHOW") => TokenKind::Keyword(Keyword::Show), + UniCase::ascii("SIGNIN") => TokenKind::Keyword(Keyword::Signin), + UniCase::ascii("SIGNUP") => TokenKind::Keyword(Keyword::Signup), + UniCase::ascii("SINCE") => TokenKind::Keyword(Keyword::Since), + UniCase::ascii("SLEEP") => TokenKind::Keyword(Keyword::Sleep), + UniCase::ascii("SNOWBALL") => TokenKind::Keyword(Keyword::Snowball), + UniCase::ascii("SPLIT") => TokenKind::Keyword(Keyword::Split), + UniCase::ascii("START") => TokenKind::Keyword(Keyword::Start), + UniCase::ascii("TABLE") => TokenKind::Keyword(Keyword::Table), + UniCase::ascii("TB") => TokenKind::Keyword(Keyword::Table), + UniCase::ascii("TERMS_CACHE") => TokenKind::Keyword(Keyword::TermsCache), + UniCase::ascii("TERMS_ORDER") => TokenKind::Keyword(Keyword::TermsOrder), + UniCase::ascii("THEN") => TokenKind::Keyword(Keyword::Then), + UniCase::ascii("THROW") => TokenKind::Keyword(Keyword::Throw), + UniCase::ascii("TIMEOUT") => TokenKind::Keyword(Keyword::Timeout), + UniCase::ascii("TOKENIZERS") => TokenKind::Keyword(Keyword::Tokenizers), + UniCase::ascii("TOKEN") => TokenKind::Keyword(Keyword::Token), + UniCase::ascii("TRANSACTION") => TokenKind::Keyword(Keyword::Transaction), + UniCase::ascii("true") => TokenKind::Keyword(Keyword::True), + UniCase::ascii("TYPE") => TokenKind::Keyword(Keyword::Type), + UniCase::ascii("UNIQUE") => TokenKind::Keyword(Keyword::Unique), + UniCase::ascii("UNSET") => TokenKind::Keyword(Keyword::Unset), + UniCase::ascii("UPDATE") => TokenKind::Keyword(Keyword::Update), + UniCase::ascii("UPPERCASE") => TokenKind::Keyword(Keyword::Uppercase), + UniCase::ascii("USE") => TokenKind::Keyword(Keyword::Use), + UniCase::ascii("USER") => TokenKind::Keyword(Keyword::User), + UniCase::ascii("VALUE") => TokenKind::Keyword(Keyword::Value), + UniCase::ascii("VALUES") => TokenKind::Keyword(Keyword::Values), + UniCase::ascii("VERSION") => TokenKind::Keyword(Keyword::Version), + UniCase::ascii("VS") => TokenKind::Keyword(Keyword::Vs), + UniCase::ascii("WHEN") => TokenKind::Keyword(Keyword::When), + UniCase::ascii("WHERE") => TokenKind::Keyword(Keyword::Where), + UniCase::ascii("WITH") => TokenKind::Keyword(Keyword::With), + UniCase::ascii("ALLINSIDE") => TokenKind::Keyword(Keyword::AllInside), + UniCase::ascii("ANDKW") => TokenKind::Keyword(Keyword::AndKw), + UniCase::ascii("ANYINSIDE") => TokenKind::Keyword(Keyword::AnyInside), + UniCase::ascii("INSIDE") => TokenKind::Keyword(Keyword::Inside), + UniCase::ascii("INTERSECTS") => TokenKind::Keyword(Keyword::Intersects), + UniCase::ascii("NONEINSIDE") => TokenKind::Keyword(Keyword::NoneInside), + UniCase::ascii("NOTINSIDE") => TokenKind::Keyword(Keyword::NotInside), + UniCase::ascii("OR") => TokenKind::Keyword(Keyword::OrKw), + UniCase::ascii("OUTSIDE") => TokenKind::Keyword(Keyword::Outside), + UniCase::ascii("NOT") => TokenKind::Keyword(Keyword::Not), + UniCase::ascii("AND") => TokenKind::Keyword(Keyword::And), + UniCase::ascii("COLLATE") => TokenKind::Keyword(Keyword::Collate), + UniCase::ascii("CONTAINSALL") => TokenKind::Keyword(Keyword::ContainsAll), + UniCase::ascii("CONTAINSANY") => TokenKind::Keyword(Keyword::ContainsAny), + UniCase::ascii("CONTAINSNONE") => TokenKind::Keyword(Keyword::ContainsNone), + UniCase::ascii("CONTAINSNOT") => TokenKind::Keyword(Keyword::ContainsNot), + UniCase::ascii("CONTAINS") => TokenKind::Keyword(Keyword::Contains), + UniCase::ascii("IN") => TokenKind::Keyword(Keyword::In), - UniCase::ascii("ANY") => Some(TokenKind::Keyword(Keyword::Any)), - UniCase::ascii("ARRAY") => Some(TokenKind::Keyword(Keyword::Array)), - UniCase::ascii("GEOMETRY") => Some(TokenKind::Keyword(Keyword::Geometry)), - UniCase::ascii("RECORD") => Some(TokenKind::Keyword(Keyword::Record)), - UniCase::ascii("FUTURE") => Some(TokenKind::Keyword(Keyword::Future)), - UniCase::ascii("BOOL") => Some(TokenKind::Keyword(Keyword::Bool)), - UniCase::ascii("BYTES") => Some(TokenKind::Keyword(Keyword::Bytes)), - UniCase::ascii("DATETIME") => Some(TokenKind::Keyword(Keyword::Datetime)), - UniCase::ascii("DECIMAL") => Some(TokenKind::Keyword(Keyword::Decimal)), - UniCase::ascii("DURATION") => Some(TokenKind::Keyword(Keyword::Duration)), - UniCase::ascii("FLOAT") => Some(TokenKind::Keyword(Keyword::Float)), - UniCase::ascii("fn") => Some(TokenKind::Keyword(Keyword::Fn)), - UniCase::ascii("ml") => Some(TokenKind::Keyword(Keyword::ML)), - UniCase::ascii("INT") => Some(TokenKind::Keyword(Keyword::Int)), - UniCase::ascii("NUMBER") => Some(TokenKind::Keyword(Keyword::Number)), - UniCase::ascii("OBJECT") => Some(TokenKind::Keyword(Keyword::Object)), - UniCase::ascii("STRING") => Some(TokenKind::Keyword(Keyword::String)), - UniCase::ascii("UUID") => Some(TokenKind::Keyword(Keyword::Uuid)), - UniCase::ascii("ULID") => Some(TokenKind::Keyword(Keyword::Ulid)), - UniCase::ascii("RAND") => Some(TokenKind::Keyword(Keyword::Rand)), - UniCase::ascii("FEATURE") => Some(TokenKind::Keyword(Keyword::Feature)), - UniCase::ascii("LINE") => Some(TokenKind::Keyword(Keyword::Line)), - UniCase::ascii("POINT") => Some(TokenKind::Keyword(Keyword::Point)), - UniCase::ascii("POLYGON") => Some(TokenKind::Keyword(Keyword::Polygon)), - UniCase::ascii("MULTIPOINT") => Some(TokenKind::Keyword(Keyword::MultiPoint)), - UniCase::ascii("MULTILINE") => Some(TokenKind::Keyword(Keyword::MultiLine)), - UniCase::ascii("MULTIPOLYGON") => Some(TokenKind::Keyword(Keyword::MultiPolygon)), - UniCase::ascii("COLLECTION") => Some(TokenKind::Keyword(Keyword::Collection)), + UniCase::ascii("ANY") => TokenKind::Keyword(Keyword::Any), + UniCase::ascii("ARRAY") => TokenKind::Keyword(Keyword::Array), + UniCase::ascii("GEOMETRY") => TokenKind::Keyword(Keyword::Geometry), + UniCase::ascii("RECORD") => TokenKind::Keyword(Keyword::Record), + UniCase::ascii("FUTURE") => TokenKind::Keyword(Keyword::Future), + UniCase::ascii("BOOL") => TokenKind::Keyword(Keyword::Bool), + UniCase::ascii("BYTES") => TokenKind::Keyword(Keyword::Bytes), + UniCase::ascii("DATETIME") => TokenKind::Keyword(Keyword::Datetime), + UniCase::ascii("DECIMAL") => TokenKind::Keyword(Keyword::Decimal), + UniCase::ascii("DURATION") => TokenKind::Keyword(Keyword::Duration), + UniCase::ascii("FLOAT") => TokenKind::Keyword(Keyword::Float), + UniCase::ascii("fn") => TokenKind::Keyword(Keyword::Fn), + UniCase::ascii("ml") => TokenKind::Keyword(Keyword::ML), + UniCase::ascii("INT") => TokenKind::Keyword(Keyword::Int), + UniCase::ascii("NUMBER") => TokenKind::Keyword(Keyword::Number), + UniCase::ascii("OBJECT") => TokenKind::Keyword(Keyword::Object), + UniCase::ascii("STRING") => TokenKind::Keyword(Keyword::String), + UniCase::ascii("UUID") => TokenKind::Keyword(Keyword::Uuid), + UniCase::ascii("ULID") => TokenKind::Keyword(Keyword::Ulid), + UniCase::ascii("RAND") => TokenKind::Keyword(Keyword::Rand), + UniCase::ascii("FEATURE") => TokenKind::Keyword(Keyword::Feature), + UniCase::ascii("LINE") => TokenKind::Keyword(Keyword::Line), + UniCase::ascii("POINT") => TokenKind::Keyword(Keyword::Point), + UniCase::ascii("POLYGON") => TokenKind::Keyword(Keyword::Polygon), + UniCase::ascii("MULTIPOINT") => TokenKind::Keyword(Keyword::MultiPoint), + UniCase::ascii("MULTILINE") => TokenKind::Keyword(Keyword::MultiLine), + UniCase::ascii("MULTIPOLYGON") => TokenKind::Keyword(Keyword::MultiPolygon), + UniCase::ascii("COLLECTION") => TokenKind::Keyword(Keyword::Collection), // Languages - UniCase::ascii("ARABIC") => Some(TokenKind::Language(Language::Arabic)), - UniCase::ascii("ARA") => Some(TokenKind::Language(Language::Arabic)), - UniCase::ascii("AR") => Some(TokenKind::Language(Language::Arabic)), - UniCase::ascii("DANISH") => Some(TokenKind::Language(Language::Danish)), - UniCase::ascii("DAN") => Some(TokenKind::Language(Language::Danish)), - UniCase::ascii("DA") => Some(TokenKind::Language(Language::Danish)), - UniCase::ascii("DUTCH") => Some(TokenKind::Language(Language::Dutch)), - UniCase::ascii("NLD") => Some(TokenKind::Language(Language::Dutch)), - UniCase::ascii("NL") => Some(TokenKind::Language(Language::Dutch)), - UniCase::ascii("ENGLISH") => Some(TokenKind::Language(Language::English)), - UniCase::ascii("ENG") => Some(TokenKind::Language(Language::English)), - UniCase::ascii("EN") => Some(TokenKind::Language(Language::English)), - UniCase::ascii("FRENCH") => Some(TokenKind::Language(Language::French)), - UniCase::ascii("FRA") => Some(TokenKind::Language(Language::French)), - UniCase::ascii("FR") => Some(TokenKind::Language(Language::French)), - UniCase::ascii("GERMAN") => Some(TokenKind::Language(Language::German)), - UniCase::ascii("DEU") => Some(TokenKind::Language(Language::German)), - UniCase::ascii("DE") => Some(TokenKind::Language(Language::German)), - UniCase::ascii("GREEK") => Some(TokenKind::Language(Language::Greek)), - UniCase::ascii("ELL") => Some(TokenKind::Language(Language::Greek)), - UniCase::ascii("EL") => Some(TokenKind::Language(Language::Greek)), - UniCase::ascii("HUNGARIAN") => Some(TokenKind::Language(Language::Hungarian)), - UniCase::ascii("HUN") => Some(TokenKind::Language(Language::Hungarian)), - UniCase::ascii("HU") => Some(TokenKind::Language(Language::Hungarian)), - UniCase::ascii("ITALIAN") => Some(TokenKind::Language(Language::Italian)), - UniCase::ascii("ITA") => Some(TokenKind::Language(Language::Italian)), - UniCase::ascii("IT") => Some(TokenKind::Language(Language::Italian)), - UniCase::ascii("NORWEGIAN") => Some(TokenKind::Language(Language::Norwegian)), - UniCase::ascii("NOR") => Some(TokenKind::Language(Language::Norwegian)), - UniCase::ascii("PORTUGUESE") => Some(TokenKind::Language(Language::Portuguese)), - UniCase::ascii("POR") => Some(TokenKind::Language(Language::Portuguese)), - UniCase::ascii("PT") => Some(TokenKind::Language(Language::Portuguese)), - UniCase::ascii("ROMANIAN") => Some(TokenKind::Language(Language::Romanian)), - UniCase::ascii("RON") => Some(TokenKind::Language(Language::Romanian)), - UniCase::ascii("RO") => Some(TokenKind::Language(Language::Romanian)), - UniCase::ascii("RUSSIAN") => Some(TokenKind::Language(Language::Russian)), - UniCase::ascii("RUS") => Some(TokenKind::Language(Language::Russian)), - UniCase::ascii("RU") => Some(TokenKind::Language(Language::Russian)), - UniCase::ascii("SPANISH") => Some(TokenKind::Language(Language::Spanish)), - UniCase::ascii("SPA") => Some(TokenKind::Language(Language::Spanish)), - UniCase::ascii("ES") => Some(TokenKind::Language(Language::Spanish)), - UniCase::ascii("SWEDISH") => Some(TokenKind::Language(Language::Swedish)), - UniCase::ascii("SWE") => Some(TokenKind::Language(Language::Swedish)), - UniCase::ascii("SV") => Some(TokenKind::Language(Language::Swedish)), - UniCase::ascii("TAMIL") => Some(TokenKind::Language(Language::Tamil)), - UniCase::ascii("TAM") => Some(TokenKind::Language(Language::Tamil)), - UniCase::ascii("TA") => Some(TokenKind::Language(Language::Tamil)), - UniCase::ascii("TURKISH") => Some(TokenKind::Language(Language::Turkish)), - UniCase::ascii("TUR") => Some(TokenKind::Language(Language::Turkish)), - UniCase::ascii("TR") => Some(TokenKind::Language(Language::Turkish)), + UniCase::ascii("ARABIC") => TokenKind::Language(Language::Arabic), + UniCase::ascii("ARA") => TokenKind::Language(Language::Arabic), + UniCase::ascii("AR") => TokenKind::Language(Language::Arabic), + UniCase::ascii("DANISH") => TokenKind::Language(Language::Danish), + UniCase::ascii("DAN") => TokenKind::Language(Language::Danish), + UniCase::ascii("DA") => TokenKind::Language(Language::Danish), + UniCase::ascii("DUTCH") => TokenKind::Language(Language::Dutch), + UniCase::ascii("NLD") => TokenKind::Language(Language::Dutch), + UniCase::ascii("NL") => TokenKind::Language(Language::Dutch), + UniCase::ascii("ENGLISH") => TokenKind::Language(Language::English), + UniCase::ascii("ENG") => TokenKind::Language(Language::English), + UniCase::ascii("EN") => TokenKind::Language(Language::English), + UniCase::ascii("FRENCH") => TokenKind::Language(Language::French), + UniCase::ascii("FRA") => TokenKind::Language(Language::French), + UniCase::ascii("FR") => TokenKind::Language(Language::French), + UniCase::ascii("GERMAN") => TokenKind::Language(Language::German), + UniCase::ascii("DEU") => TokenKind::Language(Language::German), + UniCase::ascii("DE") => TokenKind::Language(Language::German), + UniCase::ascii("GREEK") => TokenKind::Language(Language::Greek), + UniCase::ascii("ELL") => TokenKind::Language(Language::Greek), + UniCase::ascii("EL") => TokenKind::Language(Language::Greek), + UniCase::ascii("HUNGARIAN") => TokenKind::Language(Language::Hungarian), + UniCase::ascii("HUN") => TokenKind::Language(Language::Hungarian), + UniCase::ascii("HU") => TokenKind::Language(Language::Hungarian), + UniCase::ascii("ITALIAN") => TokenKind::Language(Language::Italian), + UniCase::ascii("ITA") => TokenKind::Language(Language::Italian), + UniCase::ascii("IT") => TokenKind::Language(Language::Italian), + UniCase::ascii("NORWEGIAN") => TokenKind::Language(Language::Norwegian), + UniCase::ascii("NOR") => TokenKind::Language(Language::Norwegian), + UniCase::ascii("PORTUGUESE") => TokenKind::Language(Language::Portuguese), + UniCase::ascii("POR") => TokenKind::Language(Language::Portuguese), + UniCase::ascii("PT") => TokenKind::Language(Language::Portuguese), + UniCase::ascii("ROMANIAN") => TokenKind::Language(Language::Romanian), + UniCase::ascii("RON") => TokenKind::Language(Language::Romanian), + UniCase::ascii("RO") => TokenKind::Language(Language::Romanian), + UniCase::ascii("RUSSIAN") => TokenKind::Language(Language::Russian), + UniCase::ascii("RUS") => TokenKind::Language(Language::Russian), + UniCase::ascii("RU") => TokenKind::Language(Language::Russian), + UniCase::ascii("SPANISH") => TokenKind::Language(Language::Spanish), + UniCase::ascii("SPA") => TokenKind::Language(Language::Spanish), + UniCase::ascii("ES") => TokenKind::Language(Language::Spanish), + UniCase::ascii("SWEDISH") => TokenKind::Language(Language::Swedish), + UniCase::ascii("SWE") => TokenKind::Language(Language::Swedish), + UniCase::ascii("SV") => TokenKind::Language(Language::Swedish), + UniCase::ascii("TAMIL") => TokenKind::Language(Language::Tamil), + UniCase::ascii("TAM") => TokenKind::Language(Language::Tamil), + UniCase::ascii("TA") => TokenKind::Language(Language::Tamil), + UniCase::ascii("TURKISH") => TokenKind::Language(Language::Turkish), + UniCase::ascii("TUR") => TokenKind::Language(Language::Turkish), + UniCase::ascii("TR") => TokenKind::Language(Language::Turkish), // Algorithms - UniCase::ascii("EDDSA") => Some(TokenKind::Algorithm(Algorithm::EdDSA)), - UniCase::ascii("ES256") => Some(TokenKind::Algorithm(Algorithm::Es256)), - UniCase::ascii("ES384") => Some(TokenKind::Algorithm(Algorithm::Es384)), - UniCase::ascii("ES512") => Some(TokenKind::Algorithm(Algorithm::Es512)), - UniCase::ascii("HS256") => Some(TokenKind::Algorithm(Algorithm::Hs256)), - UniCase::ascii("HS384") => Some(TokenKind::Algorithm(Algorithm::Hs384)), - UniCase::ascii("HS512") => Some(TokenKind::Algorithm(Algorithm::Hs512)), - UniCase::ascii("PS256") => Some(TokenKind::Algorithm(Algorithm::Ps256)), - UniCase::ascii("PS384") => Some(TokenKind::Algorithm(Algorithm::Ps384)), - UniCase::ascii("PS512") => Some(TokenKind::Algorithm(Algorithm::Ps512)), - UniCase::ascii("RS256") => Some(TokenKind::Algorithm(Algorithm::Rs256)), - UniCase::ascii("RS384") => Some(TokenKind::Algorithm(Algorithm::Rs384)), - UniCase::ascii("RS512") => Some(TokenKind::Algorithm(Algorithm::Rs512)), + UniCase::ascii("EDDSA") => TokenKind::Algorithm(Algorithm::EdDSA), + UniCase::ascii("ES256") => TokenKind::Algorithm(Algorithm::Es256), + UniCase::ascii("ES384") => TokenKind::Algorithm(Algorithm::Es384), + UniCase::ascii("ES512") => TokenKind::Algorithm(Algorithm::Es512), + UniCase::ascii("HS256") => TokenKind::Algorithm(Algorithm::Hs256), + UniCase::ascii("HS384") => TokenKind::Algorithm(Algorithm::Hs384), + UniCase::ascii("HS512") => TokenKind::Algorithm(Algorithm::Hs512), + UniCase::ascii("PS256") => TokenKind::Algorithm(Algorithm::Ps256), + UniCase::ascii("PS384") => TokenKind::Algorithm(Algorithm::Ps384), + UniCase::ascii("PS512") => TokenKind::Algorithm(Algorithm::Ps512), + UniCase::ascii("RS256") => TokenKind::Algorithm(Algorithm::Rs256), + UniCase::ascii("RS384") => TokenKind::Algorithm(Algorithm::Rs384), + UniCase::ascii("RS512") => TokenKind::Algorithm(Algorithm::Rs512), UniCase::ascii("JWKS") => jwks_token_kind(), // Necessary because `phf_map!` doesn't support `cfg` attributes // Distance - UniCase::ascii("EUCLIDEAN") => Some(TokenKind::Distance(DistanceKind::Euclidean)), - UniCase::ascii("MANHATTAN") => Some(TokenKind::Distance(DistanceKind::Manhattan)), - UniCase::ascii("HAMMING") => Some(TokenKind::Distance(DistanceKind::Hamming)), - UniCase::ascii("MINKOWSKI") => Some(TokenKind::Distance(DistanceKind::Minkowski)), + UniCase::ascii("EUCLIDEAN") => TokenKind::Distance(DistanceKind::Euclidean), + UniCase::ascii("MANHATTAN") => TokenKind::Distance(DistanceKind::Manhattan), + UniCase::ascii("HAMMING") => TokenKind::Distance(DistanceKind::Hamming), + UniCase::ascii("MINKOWSKI") => TokenKind::Distance(DistanceKind::Minkowski), }; -const fn jwks_token_kind() -> Option { +const fn jwks_token_kind() -> TokenKind { #[cfg(feature = "jwks")] - let token = Some(TokenKind::Algorithm(Algorithm::Jwks)); + let token = TokenKind::Algorithm(Algorithm::Jwks); #[cfg(not(feature = "jwks"))] - let token = None; + let token = TokenKind::Identifier; token } diff --git a/core/src/syn/v2/parser/basic.rs b/core/src/syn/v2/parser/basic.rs index ebe12880..03e6c572 100644 --- a/core/src/syn/v2/parser/basic.rs +++ b/core/src/syn/v2/parser/basic.rs @@ -140,7 +140,32 @@ impl TokenValue for f32 { })?; Ok(number) } - x => unexpected!(parser, x, "an floating point"), + x => unexpected!(parser, x, "a floating point number"), + } + } +} + +impl TokenValue for f64 { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + match token.kind { + TokenKind::Number(NumberKind::NaN) => Ok(f64::NAN), + TokenKind::Number( + NumberKind::Integer + | NumberKind::Float + | NumberKind::Mantissa + | NumberKind::MantissaExponent, + ) => { + let number = parser.lexer.string.take().unwrap().parse().map_err(|e| { + ParseError::new( + ParseErrorKind::InvalidFloat { + error: e, + }, + token.span, + ) + })?; + Ok(number) + } + x => unexpected!(parser, x, "a floating point number"), } } } @@ -283,6 +308,19 @@ impl Parser<'_> { res } + pub fn parse_signed_float(&mut self) -> ParseResult { + let neg = self.eat(t!("-")); + if !neg { + self.eat(t!("+")); + } + let res: f64 = self.next_token_value()?; + if neg { + Ok(-res) + } else { + Ok(res) + } + } + /// Parse a token value from the given token. pub fn token_value(&mut self, token: Token) -> ParseResult { V::from_token(self, token) diff --git a/core/src/syn/v2/parser/error.rs b/core/src/syn/v2/parser/error.rs index 7080031a..6c488462 100644 --- a/core/src/syn/v2/parser/error.rs +++ b/core/src/syn/v2/parser/error.rs @@ -18,6 +18,13 @@ pub enum IntErrorKind { IntegerOverflow, } +#[derive(Debug)] +pub enum MissingKind { + Group, + Split, + Order, +} + #[derive(Debug)] pub enum ParseErrorKind { /// The parser encountered an unexpected token. @@ -51,17 +58,23 @@ pub enum ParseErrorKind { InvalidDecimal { error: rust_decimal::Error, }, - DisallowedStatement, + DisallowedStatement { + found: TokenKind, + expected: TokenKind, + disallowed: Span, + }, /// The parser encountered an token which could not be lexed correctly. InvalidToken(LexError), /// Matched a path which was invalid. InvalidPath { possibly: Option<&'static str>, }, + MissingField { + field: Span, + idiom: String, + kind: MissingKind, + }, NoWhitespace, - /// A path in the parser which was not yet finished. - /// Should eventually be removed. - Todo, } /// A parsing error. @@ -79,16 +92,19 @@ impl ParseError { at, } } + pub fn render_on(&self, source: &str) -> RenderedError { + Self::render_on_inner(source, &self.kind, self.at) + } /// Create a rendered error from the string this error was generated from. - pub fn render_on(&self, source: &str) -> RenderedError { - match &self.kind { + pub fn render_on_inner(source: &str, kind: &ParseErrorKind, at: Span) -> RenderedError { + match &kind { ParseErrorKind::Unexpected { found, expected, } => { let text = format!("Unexpected token '{}' expected {}", found.as_str(), expected); - let locations = Location::range_of_span(source, self.at); + let locations = Location::range_of_span(source, at); let snippet = Snippet::from_source_location_range(source, locations, None); RenderedError { text, @@ -101,7 +117,7 @@ impl ParseError { explain, } => { let text = format!("Unexpected token '{}' expected {}", found.as_str(), expected); - let locations = Location::range_of_span(source, self.at); + let locations = Location::range_of_span(source, at); let snippet = Snippet::from_source_location_range(source, locations, Some(explain)); RenderedError { text, @@ -112,7 +128,7 @@ impl ParseError { expected, } => { let text = format!("Query ended early, expected {}", expected); - let locations = Location::range_of_span(source, self.at); + let locations = Location::range_of_span(source, at); let snippet = Snippet::from_source_location_range(source, locations, None); RenderedError { text, @@ -123,8 +139,8 @@ impl ParseError { expected, should_close, } => { - let text = format!("Expected closing delimiter {}", expected.as_str()); - let locations = Location::range_of_span(source, self.at); + let text = format!("Expected closing delimiter '{}'", expected.as_str()); + let locations = Location::range_of_span(source, at); let snippet = Snippet::from_source_location_range(source, locations, None); let locations = Location::range_of_span(source, *should_close); let close_snippet = Snippet::from_source_location_range( @@ -137,27 +153,32 @@ impl ParseError { snippets: vec![snippet, close_snippet], } } - ParseErrorKind::DisallowedStatement => { - let text = "This statement is not allowed in this location".to_owned(); - let locations = Location::range_of_span(source, self.at); + ParseErrorKind::DisallowedStatement { + found, + expected, + disallowed, + } => { + let text = format!( + "Unexpected token '{}' expected '{}'", + found.as_str(), + expected.as_str() + ); + let locations = Location::range_of_span(source, at); let snippet = Snippet::from_source_location_range(source, locations, None); + let locations = Location::range_of_span(source, *disallowed); + let dissallowed_snippet = Snippet::from_source_location_range( + source, + locations, + Some("this keyword is not allowed to start a statement in this position"), + ); RenderedError { text, - snippets: vec![snippet], + snippets: vec![snippet, dissallowed_snippet], } } ParseErrorKind::InvalidToken(e) => { let text = e.to_string(); - let locations = Location::range_of_span(source, self.at); - let snippet = Snippet::from_source_location_range(source, locations, None); - RenderedError { - text, - snippets: vec![snippet], - } - } - ParseErrorKind::Todo => { - let text = "Parser hit not yet implemented path".to_string(); - let locations = Location::range_of_span(source, self.at); + let locations = Location::range_of_span(source, at); let snippet = Snippet::from_source_location_range(source, locations, None); RenderedError { text, @@ -167,12 +188,12 @@ impl ParseError { ParseErrorKind::InvalidPath { possibly, } => { - let mut text = "Invalid path".to_owned(); + let mut text = "Invalid function path".to_owned(); if let Some(p) = possibly { // writing into a string never causes an error. write!(text, ", did you maybe mean `{}`", p).unwrap(); } - let locations = Location::range_of_span(source, self.at); + let locations = Location::range_of_span(source, at); let snippet = Snippet::from_source_location_range( source, locations, @@ -187,7 +208,7 @@ impl ParseError { ref error, } => { let text = format!("failed to parse integer, {error}"); - let locations = Location::range_of_span(source, self.at); + let locations = Location::range_of_span(source, at); let snippet = Snippet::from_source_location_range(source, locations, None); RenderedError { text: text.to_string(), @@ -198,7 +219,7 @@ impl ParseError { ref error, } => { let text = format!("failed to parse floating point, {error}"); - let locations = Location::range_of_span(source, self.at); + let locations = Location::range_of_span(source, at); let snippet = Snippet::from_source_location_range(source, locations, None); RenderedError { text: text.to_string(), @@ -209,7 +230,7 @@ impl ParseError { ref error, } => { let text = format!("failed to parse decimal number, {error}"); - let locations = Location::range_of_span(source, self.at); + let locations = Location::range_of_span(source, at); let snippet = Snippet::from_source_location_range(source, locations, None); RenderedError { text: text.to_string(), @@ -218,13 +239,42 @@ impl ParseError { } ParseErrorKind::NoWhitespace => { let text = "Whitespace is dissallowed in this position"; - let locations = Location::range_of_span(source, self.at); + let locations = Location::range_of_span(source, at); let snippet = Snippet::from_source_location_range(source, locations, None); RenderedError { text: text.to_string(), snippets: vec![snippet], } } + ParseErrorKind::MissingField { + field, + idiom, + kind, + } => { + let text = match kind { + MissingKind::Group => { + format!("Missing group idiom `{idiom}` in statement selection") + } + MissingKind::Split => { + format!("Missing split idiom `{idiom}` in statement selection") + } + MissingKind::Order => { + format!("Missing order idiom `{idiom}` in statement selection") + } + }; + let locations = Location::range_of_span(source, at); + let snippet_error = Snippet::from_source_location_range(source, locations, None); + let locations = Location::range_of_span(source, *field); + let snippet_hint = Snippet::from_source_location_range( + source, + locations, + Some("Idiom missing here"), + ); + RenderedError { + text: text.to_string(), + snippets: vec![snippet_error, snippet_hint], + } + } } } } diff --git a/core/src/syn/v2/parser/expression.rs b/core/src/syn/v2/parser/expression.rs index bffbdec9..4bacb800 100644 --- a/core/src/syn/v2/parser/expression.rs +++ b/core/src/syn/v2/parser/expression.rs @@ -1,8 +1,10 @@ //! This module defines the pratt parser for operators. + use super::mac::unexpected; +use super::ParseError; use crate::sql::{value::TryNeg, Cast, Expression, Number, Operator, Value}; use crate::syn::v2::{ - parser::{mac::expected, ParseResult, Parser}, + parser::{mac::expected, ParseErrorKind, ParseResult, Parser}, token::{t, NumberKind, TokenKind}, }; use std::cmp::Ordering; @@ -60,9 +62,8 @@ impl Parser<'_> { fn infix_binding_power(token: TokenKind) -> Option<(u8, u8)> { // TODO: Look at ordering of operators. match token { - // assigment operators have the lowes binding power. - t!("+=") | t!("-=") | t!("+?=") => Some((2, 1)), - + // assigment operators have the lowest binding power. + //t!("+=") | t!("-=") | t!("+?=") => Some((2, 1)), t!("||") | t!("OR") => Some((3, 4)), t!("&&") | t!("AND") => Some((5, 6)), @@ -261,7 +262,7 @@ impl Parser<'_> { } // should be unreachable as we previously check if the token was a prefix op. - _ => unreachable!(), + x => unreachable!("found non-operator token {x:?}"), }; let rhs = self.pratt_parse_expr(min_bp)?; Ok(Value::Expression(Box::new(Expression::Binary { @@ -284,6 +285,17 @@ impl Parser<'_> { loop { let token = self.peek(); let Some((l_bp, r_bp)) = Self::infix_binding_power(token.kind) else { + // explain that assignment operators can't be used in normal expressions. + if let t!("+=") | t!("*=") | t!("-=") | t!("+?=") = token.kind { + return Err(ParseError::new( + ParseErrorKind::UnexpectedExplain { + found: token.kind, + expected: "an operator", + explain: "assignement operator are only allowed in SET and DUPLICATE KEY UPDATE statements", + }, + token.span, + )); + } break; }; diff --git a/core/src/syn/v2/parser/mac.rs b/core/src/syn/v2/parser/mac.rs index 4f2c9df1..8ca11fc3 100644 --- a/core/src/syn/v2/parser/mac.rs +++ b/core/src/syn/v2/parser/mac.rs @@ -6,7 +6,7 @@ macro_rules! unexpected { let error = $parser.lexer.error.take().unwrap(); return Err($crate::syn::v2::parser::ParseError::new( $crate::syn::v2::parser::ParseErrorKind::InvalidToken(error), - $parser.last_span(), + $parser.recent_span(), )); } $crate::syn::v2::token::TokenKind::Eof => { @@ -15,7 +15,7 @@ macro_rules! unexpected { $crate::syn::v2::parser::ParseErrorKind::UnexpectedEof { expected, }, - $parser.last_span(), + $parser.recent_span(), )); } x => { @@ -25,7 +25,7 @@ macro_rules! unexpected { found: x, expected, }, - $parser.last_span(), + $parser.recent_span(), )); } } @@ -42,7 +42,7 @@ macro_rules! expected { let error = $parser.lexer.error.take().unwrap(); return Err($crate::syn::v2::parser::ParseError::new( $crate::syn::v2::parser::ParseErrorKind::InvalidToken(error), - $parser.last_span(), + $parser.recent_span(), )); } x => { diff --git a/core/src/syn/v2/parser/mod.rs b/core/src/syn/v2/parser/mod.rs index d73df9da..7bcc5df3 100644 --- a/core/src/syn/v2/parser/mod.rs +++ b/core/src/syn/v2/parser/mod.rs @@ -21,7 +21,6 @@ use crate::{ sql, syn::v2::{ lexer::{Error as LexError, Lexer}, - parser::mac::expected, token::{t, Span, Token, TokenKind}, }, }; @@ -163,10 +162,15 @@ impl<'a> Parser<'a> { /// Returns the span of the next token if it was already peeked, otherwise returns the token of /// the last consumed token. - pub fn last_span(&mut self) -> Span { + pub fn recent_span(&mut self) -> Span { self.token_buffer.first().map(|x| x.span).unwrap_or(self.last_span) } + /// returns the token of the last consumed token. + pub fn last_span(&mut self) -> Span { + self.last_span + } + /// Eat the next token if it is of the given kind. /// Returns whether a token was eaten. pub fn eat(&mut self, token: TokenKind) -> bool { @@ -187,7 +191,7 @@ impl<'a> Parser<'a> { expected: kind, should_close, }, - self.last_span(), + self.recent_span(), )); } Ok(()) @@ -214,24 +218,8 @@ impl<'a> Parser<'a> { /// /// This is the primary entry point of the parser. pub fn parse_query(&mut self) -> ParseResult { - // eat possible empty statements. - while self.eat(t!(";")) {} - - let mut statements = vec![self.parse_stmt()?]; - - while self.eat(t!(";")) { - // eat possible empty statements. - while self.eat(t!(";")) {} - - if let TokenKind::Eof = self.peek().kind { - break; - }; - - statements.push(self.parse_stmt()?); - } - - expected!(self, TokenKind::Eof); - Ok(sql::Query(sql::Statements(statements))) + let statements = self.parse_stmt_list()?; + Ok(sql::Query(statements)) } /// Parse a single statement. diff --git a/core/src/syn/v2/parser/prime.rs b/core/src/syn/v2/parser/prime.rs index 457689b8..08030b3c 100644 --- a/core/src/syn/v2/parser/prime.rs +++ b/core/src/syn/v2/parser/prime.rs @@ -3,8 +3,8 @@ use geo::Point; use super::{ParseResult, Parser}; use crate::{ sql::{ - Array, Dir, Function, Geometry, Ident, Idiom, Mock, Number, Part, Script, Strand, Subquery, - Table, Value, + Array, Dir, Function, Geometry, Ident, Idiom, Mock, Part, Script, Strand, Subquery, Table, + Value, }, syn::v2::{ lexer::Lexer, @@ -12,7 +12,7 @@ use crate::{ mac::{expected, unexpected}, ParseError, ParseErrorKind, }, - token::{t, Span, TokenKind}, + token::{t, NumberKind, Span, TokenKind}, }, }; @@ -322,6 +322,7 @@ impl Parser<'_> { match peek.kind { t!("(") => { self.pop_peek(); + dbg!("called"); self.parse_inner_subquery(Some(peek.span)) } t!("IF") => { @@ -334,8 +335,8 @@ impl Parser<'_> { } pub fn parse_inner_subquery_or_coordinate(&mut self, start: Span) -> ParseResult { - let next = self.peek(); - let res = match next.kind { + let peek = self.peek(); + let res = match peek.kind { t!("RETURN") => { self.pop_peek(); let stmt = self.parse_return_stmt()?; @@ -376,40 +377,98 @@ impl Parser<'_> { let stmt = self.parse_remove_stmt()?; Subquery::Remove(stmt) } + t!("+") | t!("-") => { + // handle possible coordinate in the shape of ([-+]?number,[-+]?number) + if let TokenKind::Number(kind) = self.peek_token_at(1).kind { + // take the value so we don't overwrite it if the next token happens to be an + // strand or an ident, both of which are invalid syntax. + let number_value = self.lexer.string.take().unwrap(); + if self.peek_token_at(2).kind == t!(",") { + match kind { + NumberKind::Decimal | NumberKind::NaN => { + return Err(ParseError::new( + ParseErrorKind::UnexpectedExplain { + found: TokenKind::Number(kind), + expected: "a non-decimal, non-nan number", + explain: "coordinate numbers can't be NaN or a decimal", + }, + peek.span, + )); + } + _ => {} + } + + self.lexer.string = Some(number_value); + let a = self.parse_signed_float()?; + self.next(); + let b = self.parse_signed_float()?; + self.expect_closing_delimiter(t!(")"), start)?; + return Ok(Value::Geometry(Geometry::Point(Point::from((a, b))))); + } + self.lexer.string = Some(number_value); + } + Subquery::Value(self.parse_value_field()?) + } + TokenKind::Number(kind) => { + // handle possible coordinate in the shape of ([-+]?number,[-+]?number) + // take the value so we don't overwrite it if the next token happens to be an + // strand or an ident, both of which are invalid syntax. + let number_value = self.lexer.string.take().unwrap(); + if self.peek_token_at(1).kind == t!(",") { + match kind { + NumberKind::Decimal | NumberKind::NaN => { + return Err(ParseError::new( + ParseErrorKind::UnexpectedExplain { + found: TokenKind::Number(kind), + expected: "a non-decimal, non-nan number", + explain: "coordinate numbers can't be NaN or a decimal", + }, + peek.span, + )); + } + _ => {} + } + self.pop_peek(); + // was a semicolon, put the strand back for code reuse. + self.lexer.string = Some(number_value); + let a = self.token_value::(peek)?; + // eat the semicolon. + self.next(); + let b = self.parse_signed_float()?; + self.expect_closing_delimiter(t!(")"), start)?; + return Ok(Value::Geometry(Geometry::Point(Point::from((a, b))))); + } + self.lexer.string = Some(number_value); + Subquery::Value(self.parse_value_field()?) + } _ => { let value = self.parse_value_field()?; Subquery::Value(value) } }; - match res { - Subquery::Value(Value::Number(x)) => { - if self.eat(t!(",")) { - // TODO: Fix number parsing. - let b = self.next_token_value::()?; - - let a: f64 = x - .try_into() - .map_err(|_| ParseError::new(ParseErrorKind::Todo, next.span))?; - let b: f64 = b - .try_into() - .map_err(|_| ParseError::new(ParseErrorKind::Todo, next.span))?; - - self.expect_closing_delimiter(t!(")"), start)?; - Ok(Value::Geometry(Geometry::Point(Point::from((a, b))))) - } else { - self.expect_closing_delimiter(t!(")"), start)?; - Ok(Value::Subquery(Box::new(Subquery::Value(Value::Number(x))))) + if self.peek_kind() != t!(")") && Self::starts_disallowed_subquery_statement(peek.kind) { + if let Subquery::Value(Value::Idiom(Idiom(ref idiom))) = res { + if idiom.len() == 1 { + // we parsed a single idiom and the next token was a dissallowed statement so + // it is likely that the used meant to use an invalid statement. + return Err(ParseError::new( + ParseErrorKind::DisallowedStatement { + found: self.peek_kind(), + expected: t!(")"), + disallowed: peek.span, + }, + self.recent_span(), + )); } } - x => { - self.expect_closing_delimiter(t!(")"), start)?; - Ok(Value::Subquery(Box::new(x))) - } } + self.expect_closing_delimiter(t!(")"), start)?; + Ok(Value::Subquery(Box::new(res))) } pub fn parse_inner_subquery(&mut self, start: Option) -> ParseResult { - let res = match self.peek().kind { + let peek = self.peek(); + let res = match peek.kind { t!("RETURN") => { self.pop_peek(); let stmt = self.parse_return_stmt()?; @@ -456,11 +515,44 @@ impl Parser<'_> { } }; if let Some(start) = start { + if self.peek_kind() != t!(")") && Self::starts_disallowed_subquery_statement(peek.kind) + { + if let Subquery::Value(Value::Idiom(Idiom(ref idiom))) = res { + if idiom.len() == 1 { + // we parsed a single idiom and the next token was a dissallowed statement so + // it is likely that the used meant to use an invalid statement. + return Err(ParseError::new( + ParseErrorKind::DisallowedStatement { + found: self.peek_kind(), + expected: t!(")"), + disallowed: peek.span, + }, + self.recent_span(), + )); + } + } + } + self.expect_closing_delimiter(t!(")"), start)?; } Ok(res) } + fn starts_disallowed_subquery_statement(kind: TokenKind) -> bool { + matches!( + kind, + t!("ANALYZE") + | t!("BEGIN") | t!("BREAK") + | t!("CANCEL") | t!("COMMIT") + | t!("CONTINUE") | t!("FOR") + | t!("INFO") | t!("KILL") + | t!("LIVE") | t!("OPTION") + | t!("LET") | t!("SHOW") + | t!("SLEEP") | t!("THROW") + | t!("USE") + ) + } + /// Parses a strand with legacy rules, parsing to a record id, datetime or uuid if the string /// matches. pub fn parse_legacy_strand(&mut self) -> ParseResult { diff --git a/core/src/syn/v2/parser/stmt/mod.rs b/core/src/syn/v2/parser/stmt/mod.rs index 3c759c51..d9292a6e 100644 --- a/core/src/syn/v2/parser/stmt/mod.rs +++ b/core/src/syn/v2/parser/stmt/mod.rs @@ -5,6 +5,7 @@ use crate::sql::statements::{ KillStatement, LiveStatement, OptionStatement, SetStatement, ThrowStatement, }; use crate::sql::{Fields, Ident, Param}; +use crate::syn::v2::parser::{ParseError, ParseErrorKind}; use crate::syn::v2::token::{t, TokenKind}; use crate::{ sql::{ @@ -43,8 +44,24 @@ impl Parser<'_> { let stmt = self.parse_stmt()?; res.push(stmt); if !self.eat(t!(";")) { + if self.eat(t!("eof")) { + break; + } + + if Self::token_kind_starts_statement(self.peek_kind()) { + // user likely forgot a semicolon. + return Err(ParseError::new( + ParseErrorKind::UnexpectedExplain { + found: self.peek_kind(), + expected: "the query to end", + explain: + "maybe forgot a semicolon after the previous statement?", + }, + self.recent_span(), + )); + } + expected!(self, t!("eof")); - break; } } } @@ -52,6 +69,26 @@ impl Parser<'_> { Ok(Statements(res)) } + fn token_kind_starts_statement(kind: TokenKind) -> bool { + matches!( + kind, + t!("ANALYZE") + | t!("BEGIN") | t!("BREAK") + | t!("CANCEL") | t!("COMMIT") + | t!("CONTINUE") | t!("CREATE") + | t!("DEFINE") | t!("DELETE") + | t!("FOR") | t!("IF") + | t!("INFO") | t!("INSERT") + | t!("KILL") | t!("LIVE") + | t!("OPTION") | t!("RETURN") + | t!("RELATE") | t!("REMOVE") + | t!("SELECT") | t!("LET") + | t!("SHOW") | t!("SLEEP") + | t!("THROW") | t!("UPDATE") + | t!("USE") + ) + } + pub(super) fn parse_stmt(&mut self) -> ParseResult { let token = self.peek(); match token.kind { diff --git a/core/src/syn/v2/parser/stmt/parts.rs b/core/src/syn/v2/parser/stmt/parts.rs index e05eadd7..86b9c2ec 100644 --- a/core/src/syn/v2/parser/stmt/parts.rs +++ b/core/src/syn/v2/parser/stmt/parts.rs @@ -2,15 +2,17 @@ use crate::{ sql::{ - changefeed::ChangeFeed, index::Distance, Base, Cond, Data, Duration, Fetch, Fetchs, Group, - Groups, Ident, Operator, Output, Permission, Permissions, Tables, Timeout, View, + changefeed::ChangeFeed, index::Distance, Base, Cond, Data, Duration, Fetch, Fetchs, Field, + Fields, Group, Groups, Ident, Idiom, Output, Permission, Permissions, Tables, Timeout, + Value, View, }, syn::v2::{ parser::{ + error::MissingKind, mac::{expected, unexpected}, - ParseResult, Parser, + ParseError, ParseErrorKind, ParseResult, Parser, }, - token::{t, DistanceKind, TokenKind}, + token::{t, DistanceKind, Span, TokenKind}, }, }; @@ -24,13 +26,7 @@ impl Parser<'_> { let mut set_list = Vec::new(); loop { let idiom = self.parse_plain_idiom()?; - let operator = match self.next().kind { - t!("=") => Operator::Equal, - t!("+=") => Operator::Inc, - t!("-=") => Operator::Dec, - t!("+?=") => Operator::Ext, - x => unexpected!(self, x, "a assign operator"), - }; + let operator = self.parse_assigner()?; let value = self.parse_value()?; set_list.push((idiom, operator, value)); if !self.eat(t!(",")) { @@ -121,34 +117,94 @@ impl Parser<'_> { Ok(Some(Cond(v))) } - pub fn try_parse_group(&mut self) -> ParseResult> { + pub fn check_idiom<'a>( + kind: MissingKind, + fields: &'a Fields, + field_span: Span, + idiom: &Idiom, + idiom_span: Span, + ) -> ParseResult<&'a Field> { + let mut found = None; + for field in fields.iter() { + let Field::Single { + expr, + alias, + } = field + else { + unreachable!() + }; + + if let Some(alias) = alias { + if idiom == alias { + found = Some(field); + break; + } + } + + match expr { + Value::Idiom(x) => { + if idiom == x { + found = Some(field); + break; + } + } + v => { + if *idiom == v.to_idiom() { + found = Some(field); + break; + } + } + } + } + + found.ok_or_else(|| { + ParseError::new( + ParseErrorKind::MissingField { + field: field_span, + idiom: idiom.to_string(), + kind, + }, + idiom_span, + ) + }) + } + + pub fn try_parse_group( + &mut self, + fields: &Fields, + fields_span: Span, + ) -> ParseResult> { if !self.eat(t!("GROUP")) { return Ok(None); } - let res = match self.peek_kind() { - t!("ALL") => { - self.pop_peek(); - Groups(Vec::new()) - } - t!("BY") => { - self.pop_peek(); - let mut groups = Groups(vec![Group(self.parse_basic_idiom()?)]); - while self.eat(t!(",")) { - groups.0.push(Group(self.parse_basic_idiom()?)); - } - groups - } - _ => { - let mut groups = Groups(vec![Group(self.parse_basic_idiom()?)]); - while self.eat(t!(",")) { - groups.0.push(Group(self.parse_basic_idiom()?)); - } - groups - } - }; + if self.eat(t!("ALL")) { + return Ok(Some(Groups(Vec::new()))); + } - Ok(Some(res)) + self.eat(t!("BY")); + + let has_all = fields.contains(&Field::All); + + let before = self.peek().span; + let group = self.parse_basic_idiom()?; + let group_span = before.covers(self.last_span()); + if !has_all { + Self::check_idiom(MissingKind::Group, fields, fields_span, &group, group_span)?; + } + + let mut groups = Groups(vec![Group(group)]); + while self.eat(t!(",")) { + let before = self.peek().span; + let group = self.parse_basic_idiom()?; + let group_span = before.covers(self.last_span()); + if !has_all { + Self::check_idiom(MissingKind::Group, fields, fields_span, &group, group_span)?; + } + groups.0.push(Group(group)); + } + + Ok(Some(groups)) } /// Parse a permissions production @@ -287,7 +343,9 @@ impl Parser<'_> { /// parens. Expects the next keyword to be `SELECT`. pub fn parse_view(&mut self) -> ParseResult { expected!(self, t!("SELECT")); + let before_fields = self.peek().span; let fields = self.parse_fields()?; + let fields_span = before_fields.covers(self.recent_span()); expected!(self, t!("FROM")); let mut from = vec![self.next_token_value()?]; while self.eat(t!(",")) { @@ -295,7 +353,7 @@ impl Parser<'_> { } let cond = self.try_parse_condition()?; - let group = self.try_parse_group()?; + let group = self.try_parse_group(&fields, fields_span)?; Ok(View { expr: fields, diff --git a/core/src/syn/v2/parser/stmt/select.rs b/core/src/syn/v2/parser/stmt/select.rs index 48ed7a07..e661d124 100644 --- a/core/src/syn/v2/parser/stmt/select.rs +++ b/core/src/syn/v2/parser/stmt/select.rs @@ -1,21 +1,23 @@ use crate::{ sql::{ - statements::SelectStatement, Explain, Ident, Idioms, Limit, Order, Orders, Split, Splits, - Start, Values, Version, With, + statements::SelectStatement, Explain, Field, Fields, Ident, Idioms, Limit, Order, Orders, + Split, Splits, Start, Values, Version, With, }, syn::v2::{ parser::{ + error::MissingKind, mac::{expected, unexpected}, ParseResult, Parser, }, - token::t, + token::{t, Span}, }, }; impl Parser<'_> { pub(crate) fn parse_select_stmt(&mut self) -> ParseResult { - // + let before = self.peek().span; let expr = self.parse_fields()?; + let fields_span = before.covers(self.last_span()); let omit = self.eat(t!("OMIT")).then(|| self.parse_idiom_list()).transpose()?.map(Idioms); @@ -31,9 +33,9 @@ impl Parser<'_> { let with = self.try_parse_with()?; let cond = self.try_parse_condition()?; - let split = self.try_parse_split()?; - let group = self.try_parse_group()?; - let order = self.try_parse_orders()?; + let split = self.try_parse_split(&expr, fields_span)?; + let group = self.try_parse_group(&expr, fields_span)?; + let order = self.try_parse_orders(&expr, fields_span)?; let (limit, start) = if let t!("START") = self.peek_kind() { let start = self.try_parse_start()?; let limit = self.try_parse_limit()?; @@ -91,49 +93,83 @@ impl Parser<'_> { Ok(Some(with)) } - fn try_parse_split(&mut self) -> ParseResult> { + fn try_parse_split( + &mut self, + fields: &Fields, + fields_span: Span, + ) -> ParseResult> { if !self.eat(t!("SPLIT")) { return Ok(None); } self.eat(t!("ON")); - let mut res = vec![Split(self.parse_basic_idiom()?)]; + let has_all = fields.contains(&Field::All); + + let before = self.peek().span; + let split = self.parse_basic_idiom()?; + let split_span = before.covers(self.last_span()); + if !has_all { + Self::check_idiom(MissingKind::Split, fields, fields_span, &split, split_span)?; + } + + let mut res = vec![Split(split)]; while self.eat(t!(",")) { - res.push(Split(self.parse_basic_idiom()?)); + let before = self.peek().span; + let split = self.parse_basic_idiom()?; + let split_span = before.covers(self.last_span()); + if !has_all { + Self::check_idiom(MissingKind::Split, fields, fields_span, &split, split_span)?; + } + res.push(Split(split)) } Ok(Some(Splits(res))) } - fn try_parse_orders(&mut self) -> ParseResult> { + fn try_parse_orders( + &mut self, + fields: &Fields, + fields_span: Span, + ) -> ParseResult> { if !self.eat(t!("ORDER")) { return Ok(None); } self.eat(t!("BY")); - let orders = match self.peek_kind() { - t!("RAND") => { - self.pop_peek(); - let start = expected!(self, t!("(")).span; - self.expect_closing_delimiter(t!(")"), start)?; - vec![Order { - order: Default::default(), - random: true, - collate: false, - numeric: false, - direction: true, - }] - } - _ => { - let mut orders = vec![self.parse_order()?]; - while self.eat(t!(",")) { - orders.push(self.parse_order()?); - } - orders - } + if let t!("RAND") = self.peek_kind() { + self.pop_peek(); + let start = expected!(self, t!("(")).span; + self.expect_closing_delimiter(t!(")"), start)?; + return Ok(Some(Orders(vec![Order { + order: Default::default(), + random: true, + collate: false, + numeric: false, + direction: true, + }]))); }; + let has_all = fields.contains(&Field::All); + + let before = self.recent_span(); + let order = self.parse_order()?; + let order_span = before.covers(self.last_span()); + if !has_all { + Self::check_idiom(MissingKind::Order, fields, fields_span, &order, order_span)?; + } + + let mut orders = vec![order]; + while self.eat(t!(",")) { + let before = self.recent_span(); + let order = self.parse_order()?; + let order_span = before.covers(self.last_span()); + if !has_all { + Self::check_idiom(MissingKind::Order, fields, fields_span, &order, order_span)?; + } + orders.push(order) + } + Ok(Some(Orders(orders))) } diff --git a/core/src/syn/v2/parser/thing.rs b/core/src/syn/v2/parser/thing.rs index 4f29f0b5..1d2938f8 100644 --- a/core/src/syn/v2/parser/thing.rs +++ b/core/src/syn/v2/parser/thing.rs @@ -93,8 +93,27 @@ impl Parser<'_> { end, }))) } else { - let Bound::Included(id) = beg else { - unexpected!(self, self.peek_kind(), "the range operator '..'") + let id = match beg { + Bound::Unbounded => { + if self.peek_kind() == t!("$param") { + return Err(ParseError::new( + ParseErrorKind::UnexpectedExplain { + found: t!("$param"), + expected: "a record-id id", + explain: "you can create a record-id from a param with the function 'type::thing'", + }, + self.recent_span(), + )); + } + + // we haven't matched anythong so far so we still want any type of id. + unexpected!(self, self.peek_kind(), "a record-id id") + } + Bound::Excluded(_) => { + // we have matched a bounded id but we don't see an range operator. + unexpected!(self, self.peek_kind(), "the range operator `..`") + } + Bound::Included(id) => id, }; Ok(Value::Thing(Thing { tb: ident, diff --git a/core/src/syn/v2/token/mod.rs b/core/src/syn/v2/token/mod.rs index 03eb20aa..dc9cd9b3 100644 --- a/core/src/syn/v2/token/mod.rs +++ b/core/src/syn/v2/token/mod.rs @@ -42,6 +42,14 @@ impl Span { len, } } + + // returns a zero-length span that starts after the current span. + pub fn after(self) -> Span { + Span { + offset: self.offset + self.len, + len: 0, + } + } } #[repr(u8)] @@ -124,42 +132,42 @@ pub enum Operator { impl Operator { fn as_str(&self) -> &'static str { match self { - Operator::Not => "'!'", - Operator::Add => "'+'", - Operator::Subtract => "'-'", - Operator::Divide => "'÷'", - Operator::Or => "'||'", - Operator::And => "'&&'", - Operator::Mult => "'×'", - Operator::LessEqual => "'<='", - Operator::GreaterEqual => "'>='", - Operator::Star => "'*'", - Operator::Power => "'**'", - Operator::Equal => "'='", - Operator::Exact => "'=='", - Operator::NotEqual => "'!='", - Operator::AllEqual => "'*='", - Operator::AnyEqual => "'?='", - Operator::Like => "'~'", - Operator::NotLike => "'!~'", - Operator::AllLike => "'*~'", - Operator::AnyLike => "'?~'", - Operator::Contains => "'∋'", - Operator::NotContains => "'∌'", - Operator::ContainsAll => "'⊇'", - Operator::ContainsAny => "'⊃'", - Operator::ContainsNone => "'⊅'", - Operator::Inside => "'∈'", - Operator::NotInside => "'∉'", - Operator::AllInside => "'⊆'", - Operator::AnyInside => "'⊂'", - Operator::NoneInside => "'⊄'", - Operator::Matches => "'@@'", - Operator::Inc => "'+='", - Operator::Dec => "'-='", - Operator::Ext => "'+?='", - Operator::Tco => "'?:'", - Operator::Nco => "'??'", + Operator::Not => "!", + Operator::Add => "+", + Operator::Subtract => "-", + Operator::Divide => "÷", + Operator::Or => "||", + Operator::And => "&&", + Operator::Mult => "×", + Operator::LessEqual => "<=", + Operator::GreaterEqual => ">=", + Operator::Star => "*", + Operator::Power => "**", + Operator::Equal => "=", + Operator::Exact => "==", + Operator::NotEqual => "!=", + Operator::AllEqual => "*=", + Operator::AnyEqual => "?=", + Operator::Like => "~", + Operator::NotLike => "!~", + Operator::AllLike => "*~", + Operator::AnyLike => "?~", + Operator::Contains => "∋", + Operator::NotContains => "∌", + Operator::ContainsAll => "⊇", + Operator::ContainsAny => "⊃", + Operator::ContainsNone => "⊅", + Operator::Inside => "∈", + Operator::NotInside => "∉", + Operator::AllInside => "⊆", + Operator::AnyInside => "⊂", + Operator::NoneInside => "⊄", + Operator::Matches => "@@", + Operator::Inc => "+=", + Operator::Dec => "-=", + Operator::Ext => "+?=", + Operator::Tco => "?:", + Operator::Nco => "??", } } } @@ -308,11 +316,31 @@ impl TokenKind { ) } + fn algorithm_as_str(algo: Algorithm) -> &'static str { + match algo { + Algorithm::EdDSA => "EDDSA", + Algorithm::Es256 => "ES256", + Algorithm::Es384 => "ES384", + Algorithm::Es512 => "ES512", + Algorithm::Hs256 => "HS256", + Algorithm::Hs384 => "HS384", + Algorithm::Hs512 => "HS512", + Algorithm::Ps256 => "PS256", + Algorithm::Ps384 => "PS384", + Algorithm::Ps512 => "PS512", + Algorithm::Rs256 => "RS256", + Algorithm::Rs384 => "RS384", + Algorithm::Rs512 => "RS512", + #[cfg(feature = "sql2")] + Algorithm::Jwks => "JWKS", + } + } + pub fn as_str(&self) -> &'static str { match *self { TokenKind::Keyword(x) => x.as_str(), TokenKind::Operator(x) => x.as_str(), - TokenKind::Algorithm(_) => todo!(), + TokenKind::Algorithm(x) => Self::algorithm_as_str(x), TokenKind::Language(x) => x.as_str(), TokenKind::Distance(x) => x.as_str(), TokenKind::OpenDelim(Delim::Paren) => "(", @@ -335,24 +363,24 @@ impl TokenKind { TokenKind::Number(_) => "a number", TokenKind::Identifier => "an identifier", TokenKind::Regex => "a regex", - TokenKind::LeftChefron => "'<'", - TokenKind::RightChefron => "'>'", - TokenKind::Star => "'*'", - TokenKind::Dollar => "'$'", - TokenKind::Question => "'?'", - TokenKind::ArrowRight => "'->'", - TokenKind::ArrowLeft => "'<-'", - TokenKind::BiArrow => "'<->'", - TokenKind::ForwardSlash => "'/'", - TokenKind::Dot => "'.'", - TokenKind::DotDot => "'..'", - TokenKind::DotDotDot => "'...'", - TokenKind::SemiColon => "';'", - TokenKind::PathSeperator => "'::'", - TokenKind::Colon => "':'", - TokenKind::Comma => "','", - TokenKind::Vert => "'|'", - TokenKind::At => "'@'", + TokenKind::LeftChefron => "<", + TokenKind::RightChefron => ">", + TokenKind::Star => "*", + TokenKind::Dollar => "$", + TokenKind::Question => "?", + TokenKind::ArrowRight => "->", + TokenKind::ArrowLeft => "<-", + TokenKind::BiArrow => "<->", + TokenKind::ForwardSlash => "/", + TokenKind::Dot => ".", + TokenKind::DotDot => "..", + TokenKind::DotDotDot => "...", + TokenKind::SemiColon => ";", + TokenKind::PathSeperator => "::", + TokenKind::Colon => ":", + TokenKind::Comma => ",", + TokenKind::Vert => "|", + TokenKind::At => "@", TokenKind::Invalid => "Invalid", TokenKind::Eof => "Eof", }