Introduce new experimental parser (#2885)
Co-authored-by: Raphael Darley <raphael@raphaeldarley.com>
This commit is contained in:
parent
f7e6e028a2
commit
2755f572fc
152 changed files with 14640 additions and 823 deletions
39
.github/workflows/ci.yml
vendored
39
.github/workflows/ci.yml
vendored
|
@ -326,6 +326,45 @@ jobs:
|
|||
path: target/llvm-cov/html/
|
||||
retention-days: 5
|
||||
|
||||
test-parser:
|
||||
name: Test workspace with experimental parser
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
- name: Install stable toolchain
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
toolchain: 1.71.1
|
||||
|
||||
- name: Checkout sources
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
save-if: ${{ github.ref == 'refs/heads/main' }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt-get -y update
|
||||
|
||||
- name: Free up some disk space
|
||||
run: |
|
||||
(set -x; df -h)
|
||||
# Free up some disk space by removing unused files
|
||||
(set -x; sudo rm -rf /imagegeneration || true)
|
||||
(set -x; sudo rm -rf /opt/az || true)
|
||||
(set -x; sudo rm -rf /opt/hostedtoolcache || true)
|
||||
(set -x; sudo rm -rf /opt/google || true)
|
||||
(set -x; sudo rm -rf /opt/pipx || true)
|
||||
(set -x; df -h)
|
||||
|
||||
- name: Install cargo-make
|
||||
run: cargo install --debug --locked cargo-make
|
||||
|
||||
- name: Test workspace for experimental_parser
|
||||
run: cargo make test-experimental-parser
|
||||
|
||||
ws-engine:
|
||||
name: WebSocket engine
|
||||
runs-on: ubuntu-latest
|
||||
|
|
767
Cargo.lock
generated
767
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -8,7 +8,7 @@ authors = ["Tobie Morgan Hitchcock <tobie@surrealdb.com>"]
|
|||
|
||||
[features]
|
||||
# Public features
|
||||
default = ["storage-mem", "storage-rocksdb", "scripting", "http", "jwks"]
|
||||
default = ["storage-mem", "storage-rocksdb", "scripting", "http"]
|
||||
storage-mem = ["surrealdb/kv-mem"]
|
||||
storage-rocksdb = ["surrealdb/kv-rocksdb"]
|
||||
storage-speedb = ["surrealdb/kv-speedb"]
|
||||
|
@ -18,6 +18,7 @@ scripting = ["surrealdb/scripting"]
|
|||
http = ["surrealdb/http"]
|
||||
http-compression = []
|
||||
ml = ["surrealdb/ml", "surrealml-core"]
|
||||
experimental-parser = ["surrealdb/experimental-parser"]
|
||||
jwks = ["surrealdb/jwks"]
|
||||
|
||||
[workspace]
|
||||
|
@ -60,6 +61,7 @@ reqwest = { version = "0.11.22", default-features = false, features = ["blocking
|
|||
rmpv = "1.0.1"
|
||||
rustyline = { version = "12.0.0", features = ["derive"] }
|
||||
serde = { version = "1.0.193", features = ["derive"] }
|
||||
serde_cbor = "0.11.2"
|
||||
serde_json = "1.0.108"
|
||||
serde_pack = { version = "1.1.2", package = "rmp-serde" }
|
||||
surrealdb = { path = "lib", features = ["protocol-http", "protocol-ws", "rustls"] }
|
||||
|
|
126
Cargo.toml.orig
Normal file
126
Cargo.toml.orig
Normal file
|
@ -0,0 +1,126 @@
|
|||
[package]
|
||||
name = "surreal"
|
||||
publish = false
|
||||
edition = "2021"
|
||||
version = "1.1.0"
|
||||
license-file = "LICENSE"
|
||||
authors = ["Tobie Morgan Hitchcock <tobie@surrealdb.com>"]
|
||||
|
||||
[features]
|
||||
# Public features
|
||||
default = ["storage-mem", "storage-rocksdb", "scripting", "http", "jwks"]
|
||||
storage-mem = ["surrealdb/kv-mem"]
|
||||
storage-rocksdb = ["surrealdb/kv-rocksdb"]
|
||||
storage-speedb = ["surrealdb/kv-speedb"]
|
||||
storage-tikv = ["surrealdb/kv-tikv"]
|
||||
storage-fdb = ["surrealdb/kv-fdb-7_1"]
|
||||
scripting = ["surrealdb/scripting"]
|
||||
http = ["surrealdb/http"]
|
||||
http-compression = []
|
||||
ml = ["surrealdb/ml", "surrealml-core"]
|
||||
<<<<<<< HEAD
|
||||
experimental-parser = ["surrealdb/experimental-parser"]
|
||||
=======
|
||||
jwks = ["surrealdb/jwks"]
|
||||
>>>>>>> upstream/main
|
||||
|
||||
[workspace]
|
||||
members = ["lib", "lib/examples/actix", "lib/examples/axum"]
|
||||
|
||||
[profile.release]
|
||||
lto = true
|
||||
strip = true
|
||||
opt-level = 3
|
||||
panic = 'abort'
|
||||
codegen-units = 1
|
||||
|
||||
[profile.bench]
|
||||
strip = false
|
||||
|
||||
[dependencies]
|
||||
argon2 = "0.5.2"
|
||||
axum = { version = "0.6.20", features = ["tracing", "ws", "headers"] }
|
||||
axum-client-ip = "0.5.0"
|
||||
axum-extra = { version = "0.7.7", features = ["query", "typed-routing"] }
|
||||
axum-server = { version = "0.5.1", features = ["tls-rustls"] }
|
||||
base64 = "0.21.5"
|
||||
bytes = "1.5.0"
|
||||
ciborium = "0.2.1"
|
||||
clap = { version = "4.4.11", features = ["env", "derive", "wrap_help", "unicode"] }
|
||||
futures = "0.3.29"
|
||||
futures-util = "0.3.29"
|
||||
glob = "0.3.1"
|
||||
http = "0.2.11"
|
||||
http-body = "0.4.5"
|
||||
hyper = "0.14.27"
|
||||
ipnet = "2.9.0"
|
||||
ndarray = { version = "0.15.6", optional = true }
|
||||
once_cell = "1.18.0"
|
||||
opentelemetry = { version = "0.19", features = ["rt-tokio"] }
|
||||
opentelemetry-otlp = { version = "0.12.0", features = ["metrics"] }
|
||||
pin-project-lite = "0.2.13"
|
||||
rand = "0.8.5"
|
||||
reqwest = { version = "0.11.22", default-features = false, features = ["blocking", "gzip"] }
|
||||
rmpv = "1.0.1"
|
||||
rustyline = { version = "12.0.0", features = ["derive"] }
|
||||
serde = { version = "1.0.193", features = ["derive"] }
|
||||
serde_cbor = "0.11.2"
|
||||
serde_json = "1.0.108"
|
||||
serde_pack = { version = "1.1.2", package = "rmp-serde" }
|
||||
surrealdb = { path = "lib", features = ["protocol-http", "protocol-ws", "rustls"] }
|
||||
surrealml-core = { version = "0.0.3", optional = true}
|
||||
tempfile = "3.8.1"
|
||||
thiserror = "1.0.50"
|
||||
tokio = { version = "1.34.0", features = ["macros", "signal"] }
|
||||
tokio-util = { version = "0.7.10", features = ["io"] }
|
||||
tower = "0.4.13"
|
||||
tower-http = { version = "0.4.4", features = ["trace", "sensitive-headers", "auth", "request-id", "util", "catch-panic", "cors", "set-header", "limit", "add-extension", "compression-full"] }
|
||||
tracing = "0.1"
|
||||
tracing-opentelemetry = "0.19.0"
|
||||
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
|
||||
urlencoding = "2.1.3"
|
||||
uuid = { version = "1.6.1", features = ["serde", "js", "v4", "v7"] }
|
||||
|
||||
[target.'cfg(unix)'.dependencies]
|
||||
nix = { version = "0.27.1", features = ["user"] }
|
||||
|
||||
[target.'cfg(unix)'.dev-dependencies]
|
||||
nix = { version = "0.27.1", features = ["signal", "user"] }
|
||||
|
||||
[target.'cfg(any(target_os = "linux", target_os = "macos", target_os = "ios"))'.dependencies]
|
||||
mimalloc = { version = "0.1.39", default-features = false }
|
||||
|
||||
[target.'cfg(any(target_os = "android", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))'.dependencies]
|
||||
jemallocator = "0.5.4"
|
||||
|
||||
[dev-dependencies]
|
||||
assert_fs = "1.0.13"
|
||||
env_logger = "0.10.1"
|
||||
opentelemetry-proto = { version = "0.2.0", features = ["gen-tonic", "traces", "metrics", "logs"] }
|
||||
rcgen = "0.11.3"
|
||||
serial_test = "2.0.0"
|
||||
temp-env = { version = "0.3.6", features = ["async_closure"] }
|
||||
test-log = { version = "0.2.13", features = ["trace"] }
|
||||
tokio-stream = { version = "0.1", features = ["net"] }
|
||||
tokio-tungstenite = { version = "0.20.1" }
|
||||
tonic = "0.8.3"
|
||||
ulid = "1.1.0"
|
||||
wiremock = "0.5.22"
|
||||
|
||||
[build-dependencies]
|
||||
semver = "1.0.20"
|
||||
|
||||
[package.metadata.deb]
|
||||
maintainer-scripts = "pkg/deb/"
|
||||
maintainer = "Tobie Morgan Hitchcock <tobie@surrealdb.com>"
|
||||
copyright = "SurrealDB Ltd. 2022"
|
||||
systemd-units = { enable = true }
|
||||
depends = "$auto"
|
||||
section = "utility"
|
||||
priority = "optional"
|
||||
assets = [
|
||||
["target/release/surreal", "usr/share/surrealdb/surreal", "755"],
|
||||
["pkg/deb/README", "usr/share/surrealdb/README", "644"],
|
||||
]
|
||||
extended-description = "A scalable, distributed, collaborative, document-graph database, for the realtime web."
|
||||
license-file = ["LICENSE", "4"]
|
|
@ -56,6 +56,18 @@ args = [
|
|||
"--skip", "ws_integration"
|
||||
]
|
||||
|
||||
|
||||
[tasks.test-experimental-parser]
|
||||
category = "CI - INTEGRATION TESTS"
|
||||
command = "cargo"
|
||||
args = [
|
||||
"test", "--locked", "--no-default-features", "--features", "storage-mem,scripting,http,experimental-parser", "--workspace", "--",
|
||||
"--skip", "api_integration",
|
||||
"--skip", "cli_integration",
|
||||
"--skip", "http_integration",
|
||||
"--skip", "ws_integration"
|
||||
]
|
||||
|
||||
[tasks.test-workspace-coverage-complete]
|
||||
category = "CI - INTEGRATION TESTS"
|
||||
command = "cargo"
|
||||
|
|
|
@ -40,6 +40,7 @@ rustls = ["dep:rustls", "reqwest?/rustls-tls", "tokio-tungstenite?/rustls-tls-we
|
|||
ml = ["surrealml-core", "ndarray"]
|
||||
jwks = ["dep:reqwest"]
|
||||
arbitrary = ["dep:arbitrary", "dep:regex-syntax", "rust_decimal/rust-fuzz", "geo-types/arbitrary", "uuid/arbitrary"]
|
||||
experimental-parser = ["dep:phf", "dep:unicase"]
|
||||
# Private features
|
||||
kv-fdb = ["foundationdb", "tokio/time"]
|
||||
|
||||
|
@ -123,6 +124,8 @@ tracing = "0.1.40"
|
|||
trice = "0.4.0"
|
||||
ulid = { version = "1.1.0", features = ["serde"] }
|
||||
url = "2.5.0"
|
||||
phf = { version = "0.11.2", features = ["macros", "unicase"], optional=true }
|
||||
unicase = { version = "2.7.0", optional = true }
|
||||
arbitrary = { version = "1.3.2", features = ["derive"], optional = true }
|
||||
regex-syntax = { version = "0.8.2", optional = true, features = ["arbitrary"] }
|
||||
geo-types = { version = "0.7.12", features = ["arbitrary"] }
|
||||
|
|
|
@ -59,6 +59,7 @@ fn bench_parser(c: &mut Criterion) {
|
|||
&(1..=100).map(|n| format!("'{n}': {n}")).collect::<Vec<_>>().join(", ")
|
||||
)
|
||||
);
|
||||
parser!(c, full_test, surrealdb::sql::parse, include_str!("../test.surql"));
|
||||
c.finish();
|
||||
}
|
||||
|
||||
|
|
1
lib/fuzz/Cargo.lock
generated
1
lib/fuzz/Cargo.lock
generated
|
@ -2584,7 +2584,6 @@ dependencies = [
|
|||
"futures-concurrency",
|
||||
"fuzzy-matcher",
|
||||
"geo 0.27.0",
|
||||
"geo-types",
|
||||
"hex",
|
||||
"indexmap 2.1.0",
|
||||
"ipnet",
|
||||
|
|
|
@ -255,7 +255,7 @@ mod tests {
|
|||
use crate::dbs::Capabilities;
|
||||
use crate::opt::auth::Root;
|
||||
use crate::sql::Value;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn local_engine_without_auth() {
|
||||
|
|
|
@ -153,6 +153,7 @@ struct IntervalStream {
|
|||
}
|
||||
|
||||
impl IntervalStream {
|
||||
#[allow(unused)]
|
||||
fn new(interval: Interval) -> Self {
|
||||
Self {
|
||||
inner: interval,
|
||||
|
|
|
@ -177,7 +177,8 @@ impl<T: Target + Hash + Eq + PartialEq + std::fmt::Display> std::fmt::Display fo
|
|||
/// # Examples
|
||||
///
|
||||
/// Create a new instance, and allow all capabilities
|
||||
/// ```no_run
|
||||
#[cfg_attr(feature = "kv-rocksdb", doc = "```no_run")]
|
||||
#[cfg_attr(not(feature = "kv-rocksdb"), doc = "```ignore")]
|
||||
/// # use surrealdb::opt::capabilities::Capabilities;
|
||||
/// # use surrealdb::opt::Config;
|
||||
/// # use surrealdb::Surreal;
|
||||
|
@ -192,7 +193,8 @@ impl<T: Target + Hash + Eq + PartialEq + std::fmt::Display> std::fmt::Display fo
|
|||
/// ```
|
||||
///
|
||||
/// Create a new instance, and allow certain functions
|
||||
/// ```no_run
|
||||
#[cfg_attr(feature = "kv-rocksdb", doc = "```no_run")]
|
||||
#[cfg_attr(not(feature = "kv-rocksdb"), doc = "```ignore")]
|
||||
/// # use std::str::FromStr;
|
||||
/// # use surrealdb::engine::local::File;
|
||||
/// # use surrealdb::opt::capabilities::Capabilities;
|
||||
|
|
|
@ -423,6 +423,7 @@ pub async fn asynchronous(
|
|||
mod tests {
|
||||
#[cfg(all(feature = "scripting", feature = "kv-mem"))]
|
||||
use crate::dbs::Capabilities;
|
||||
use crate::sql::{statements::OutputStatement, Function, Query, Statement, Value};
|
||||
|
||||
#[tokio::test]
|
||||
async fn implementations_are_present() {
|
||||
|
@ -442,8 +443,28 @@ mod tests {
|
|||
let (quote, _) = line.split_once("=>").unwrap();
|
||||
let name = quote.trim().trim_matches('"');
|
||||
|
||||
let builtin_name = crate::syn::test::builtin_name(name);
|
||||
if builtin_name.is_err() {
|
||||
let res = crate::syn::parse(&format!("RETURN {}()", name));
|
||||
if let Ok(Query(mut x)) = res {
|
||||
match x.0.pop() {
|
||||
Some(Statement::Output(OutputStatement {
|
||||
what: Value::Function(x),
|
||||
..
|
||||
})) => match *x {
|
||||
Function::Normal(parsed_name, _) => {
|
||||
if parsed_name != name {
|
||||
problems
|
||||
.push(format!("function `{name}` parsed as `{parsed_name}`"));
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
problems.push(format!("couldn't parse {name} function"));
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
problems.push(format!("couldn't parse {name} function"));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
problems.push(format!("couldn't parse {name} function"));
|
||||
}
|
||||
|
||||
|
|
|
@ -1 +1,2 @@
|
|||
#[cfg(feature = "http")]
|
||||
mod fetch;
|
||||
|
|
|
@ -14,10 +14,10 @@ use std::str::{self, FromStr};
|
|||
use std::sync::Arc;
|
||||
|
||||
async fn config(
|
||||
kvs: &Datastore,
|
||||
_kvs: &Datastore,
|
||||
de_kind: Algorithm,
|
||||
de_code: String,
|
||||
token_header: Header,
|
||||
_token_header: Header,
|
||||
) -> Result<(DecodingKey, Validation), Error> {
|
||||
if de_kind == Algorithm::Jwks {
|
||||
#[cfg(not(feature = "jwks"))]
|
||||
|
@ -27,8 +27,8 @@ async fn config(
|
|||
}
|
||||
#[cfg(feature = "jwks")]
|
||||
// The key identifier header must be present
|
||||
if let Some(kid) = token_header.kid {
|
||||
jwks::config(kvs, &kid, &de_code).await
|
||||
if let Some(kid) = _token_header.kid {
|
||||
jwks::config(_kvs, &kid, &de_code).await
|
||||
} else {
|
||||
Err(Error::MissingTokenHeader("kid".to_string()))
|
||||
}
|
||||
|
@ -1125,7 +1125,7 @@ mod tests {
|
|||
// Test with generic user identifier
|
||||
//
|
||||
{
|
||||
let resource_id = "user:2k9qnabxuxh8k4d5gfto".to_string();
|
||||
let resource_id = "user:`2k9qnabxuxh8k4d5gfto`".to_string();
|
||||
// Prepare the claims object
|
||||
let mut claims = claims.clone();
|
||||
claims.id = Some(resource_id.clone());
|
||||
|
@ -1254,6 +1254,7 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "jwks")]
|
||||
#[tokio::test]
|
||||
async fn test_token_scope_jwks() {
|
||||
use crate::opt::capabilities::{Capabilities, NetTarget, Targets};
|
||||
|
|
|
@ -8,8 +8,7 @@ use crate::idx::ft::postings::TermFrequency;
|
|||
use crate::idx::ft::terms::{TermId, Terms};
|
||||
use crate::sql::statements::DefineAnalyzerStatement;
|
||||
use crate::sql::tokenizer::Tokenizer as SqlTokenizer;
|
||||
use crate::sql::Value;
|
||||
use crate::syn::path_like;
|
||||
use crate::sql::{Function, Strand, Value};
|
||||
use async_recursion::async_recursion;
|
||||
use filter::Filter;
|
||||
use std::collections::hash_map::Entry;
|
||||
|
@ -194,26 +193,16 @@ impl Analyzer {
|
|||
txn: &Transaction,
|
||||
mut input: String,
|
||||
) -> Result<Tokens, Error> {
|
||||
if let Some(function_name) = &self.function {
|
||||
let fns = format!("fn::{function_name}(\"{input}\")");
|
||||
match path_like(&fns) {
|
||||
Ok(func_value) => {
|
||||
let val = func_value.compute(ctx, opt, txn, None).await?;
|
||||
if let Value::Strand(val) = val {
|
||||
input = val.0;
|
||||
} else {
|
||||
return Err(Error::InvalidFunction {
|
||||
name: function_name.to_string(),
|
||||
message: "The function should return a string.".to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return Err(Error::InvalidFunction {
|
||||
name: function_name.to_string(),
|
||||
message: e.to_string(),
|
||||
})
|
||||
}
|
||||
if let Some(function_name) = self.function.clone() {
|
||||
let fns = Function::Custom(function_name.clone(), vec![Value::Strand(Strand(input))]);
|
||||
let val = fns.compute(ctx, opt, txn, None).await?;
|
||||
if let Value::Strand(val) = val {
|
||||
input = val.0;
|
||||
} else {
|
||||
return Err(Error::InvalidFunction {
|
||||
name: function_name,
|
||||
message: "The function should return a string.".to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
if let Some(t) = &self.tokenizers {
|
||||
|
|
|
@ -308,7 +308,7 @@ impl RangeQueryBuilder {
|
|||
mod tests {
|
||||
use crate::idx::planner::plan::{IndexOperator, IndexOption, RangeValue};
|
||||
use crate::sql::{Array, Idiom, Value};
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
|
||||
|
|
|
@ -193,7 +193,7 @@ mod tests {
|
|||
#[test]
|
||||
fn key() {
|
||||
use super::*;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
let fk = Thing::parse("other:test");
|
||||
#[rustfmt::skip]
|
||||
let val = Graph::new(
|
||||
|
|
|
@ -90,8 +90,7 @@ mod tests {
|
|||
let dec = Thing::decode(&enc).unwrap();
|
||||
assert_eq!(val, dec);
|
||||
println!("---");
|
||||
//
|
||||
let id2 = "foo:['f8e238f2-e734-47b8-9a16-476b291bd78a']";
|
||||
let id2 = "foo:[u'f8e238f2-e734-47b8-9a16-476b291bd78a']";
|
||||
let thing = syn::thing(id2).expect("Failed to parse the ID");
|
||||
let id2 = thing.id;
|
||||
let val = Thing::new("testns", "testdb", "testtb", id2);
|
||||
|
|
|
@ -219,6 +219,18 @@ impl Datastore {
|
|||
#[allow(unused_variables)]
|
||||
let default_clock: Arc<RwLock<SizedClock>> =
|
||||
Arc::new(RwLock::new(SizedClock::System(SystemClock::new())));
|
||||
|
||||
// removes warning if no storage is enabled.
|
||||
#[cfg(not(any(
|
||||
feature = "kv-mem",
|
||||
feature = "kv-rocksdb",
|
||||
feature = "kv-speedb",
|
||||
feature = "kv-indxdb",
|
||||
feature = "kv-tikv",
|
||||
feature = "kv-fdb"
|
||||
)))]
|
||||
let _ = (clock_override, default_clock);
|
||||
|
||||
// Initiate the desired datastore
|
||||
let (inner, clock): (Result<Inner, Error>, Arc<RwLock<SizedClock>>) = match path {
|
||||
"memory" => {
|
||||
|
@ -340,7 +352,7 @@ impl Datastore {
|
|||
// The datastore path is not valid
|
||||
_ => {
|
||||
// use clock_override and default_clock to remove warning when no kv is enabled.
|
||||
let _ = (clock_override, default_clock);
|
||||
let _ = default_clock;
|
||||
info!("Unable to load the specified datastore {}", path);
|
||||
Err(Error::Ds("Unable to load the specified datastore".into()))
|
||||
}
|
||||
|
|
|
@ -25,6 +25,14 @@ mod tx;
|
|||
|
||||
mod clock;
|
||||
#[cfg(test)]
|
||||
#[cfg(any(
|
||||
feature = "kv-mem",
|
||||
feature = "kv-rocksdb",
|
||||
feature = "kv-speedb",
|
||||
feature = "kv-indxdb",
|
||||
feature = "kv-tikv",
|
||||
feature = "kv-fdb"
|
||||
))]
|
||||
mod tests;
|
||||
|
||||
pub use self::ds::*;
|
||||
|
|
|
@ -2,7 +2,7 @@ use revision::revisioned;
|
|||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
||||
#[revisioned(revision = 1)]
|
||||
pub enum Algorithm {
|
||||
|
|
|
@ -80,7 +80,13 @@ pub fn duration(i: &str) -> IResult<&str, ()> {
|
|||
|
||||
pub fn field(i: &str) -> IResult<&str, ()> {
|
||||
peek(alt((
|
||||
value((), preceded(shouldbespace, tag_no_case("FROM"))),
|
||||
value(
|
||||
(),
|
||||
preceded(
|
||||
shouldbespace,
|
||||
alt((tag_no_case("FROM"), tag_no_case("TIMEOUT"), tag_no_case("PARALLEL"))),
|
||||
),
|
||||
),
|
||||
value((), char(';')),
|
||||
value((), eof),
|
||||
)))(i)
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
use nom::character::is_digit;
|
||||
use std::borrow::Cow;
|
||||
|
||||
const SINGLE: char = '\'';
|
||||
|
@ -54,9 +53,15 @@ pub fn quote_str(s: &str) -> String {
|
|||
|
||||
#[inline]
|
||||
pub fn quote_plain_str(s: &str) -> String {
|
||||
let mut ret = quote_str(s);
|
||||
#[cfg(not(feature = "experimental_parser"))]
|
||||
#[cfg(not(feature = "experimental-parser"))]
|
||||
{
|
||||
if crate::syn::thing(s).is_ok() {
|
||||
let mut ret = quote_str(s);
|
||||
ret.insert(0, 's');
|
||||
return ret;
|
||||
}
|
||||
|
||||
let mut ret = quote_str(s);
|
||||
// HACK: We need to prefix strands which look like records, uuids, or datetimes with an `s`
|
||||
// otherwise the strands will parsed as a different type when parsed again.
|
||||
// This is not required for the new parser.
|
||||
|
@ -64,13 +69,14 @@ pub fn quote_plain_str(s: &str) -> String {
|
|||
// directly to avoid having to create a common interface between the old and new parser.
|
||||
if crate::syn::v1::literal::uuid(&ret).is_ok()
|
||||
|| crate::syn::v1::literal::datetime(&ret).is_ok()
|
||||
|| crate::syn::thing(&ret).is_ok()
|
||||
{
|
||||
ret.insert(0, 's');
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
ret
|
||||
#[cfg(feature = "experimental-parser")]
|
||||
quote_str(s)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -106,24 +112,16 @@ pub fn escape_normal<'a>(s: &'a str, l: char, r: char, e: &str) -> Cow<'a, str>
|
|||
|
||||
#[inline]
|
||||
pub fn escape_numeric<'a>(s: &'a str, l: char, r: char, e: &str) -> Cow<'a, str> {
|
||||
// Presume this is numeric
|
||||
let mut numeric = true;
|
||||
// Loop over each character
|
||||
for x in s.bytes() {
|
||||
for (idx, x) in s.bytes().enumerate() {
|
||||
// the first character is not allowed to be a digit.
|
||||
if idx == 0 && x.is_ascii_digit() {
|
||||
return Cow::Owned(format!("{l}{}{r}", s.replace(r, e)));
|
||||
}
|
||||
// Check if character is allowed
|
||||
if !(x.is_ascii_alphanumeric() || x == b'_') {
|
||||
return Cow::Owned(format!("{l}{}{r}", s.replace(r, e)));
|
||||
}
|
||||
// Check if character is non-numeric
|
||||
if !is_digit(x) {
|
||||
numeric = false;
|
||||
}
|
||||
}
|
||||
// Output the id value
|
||||
match numeric {
|
||||
// This is numeric so escape it
|
||||
true => Cow::Owned(format!("{l}{}{r}", s.replace(r, e))),
|
||||
// No need to escape the value
|
||||
_ => Cow::Borrowed(s),
|
||||
}
|
||||
Cow::Borrowed(s)
|
||||
}
|
||||
|
|
|
@ -48,29 +48,6 @@ impl Expression {
|
|||
r,
|
||||
}
|
||||
}
|
||||
/// Augment an existing expression
|
||||
pub(crate) fn augment(mut self, l: Value, o: Operator) -> Self {
|
||||
match &mut self {
|
||||
Self::Binary {
|
||||
l: left,
|
||||
o: op,
|
||||
..
|
||||
} if o.precedence() >= op.precedence() => match left {
|
||||
Value::Expression(x) => {
|
||||
*x.as_mut() = std::mem::take(x).augment(l, o);
|
||||
self
|
||||
}
|
||||
_ => {
|
||||
*left = Self::new(l, o, std::mem::take(left)).into();
|
||||
self
|
||||
}
|
||||
},
|
||||
e => {
|
||||
let r = Value::from(std::mem::take(e));
|
||||
Self::new(l, o, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Expression {
|
||||
|
@ -132,6 +109,8 @@ impl Expression {
|
|||
let operand = v.compute(ctx, opt, txn, doc).await?;
|
||||
return match o {
|
||||
Operator::Neg => fnc::operate::neg(operand),
|
||||
// TODO: Check if it is a number?
|
||||
Operator::Add => Ok(operand),
|
||||
Operator::Not => fnc::operate::not(operand),
|
||||
op => unreachable!("{op:?} is not a unary op"),
|
||||
};
|
||||
|
|
|
@ -2,13 +2,13 @@ use crate::ctx::Context;
|
|||
use crate::dbs::{Options, Transaction};
|
||||
use crate::doc::CursorDoc;
|
||||
use crate::err::Error;
|
||||
use crate::sql::fmt::{fmt_separated_by, Fmt};
|
||||
use crate::sql::part::Next;
|
||||
use crate::sql::part::Part;
|
||||
use crate::sql::paths::{ID, IN, META, OUT};
|
||||
use crate::sql::value::Value;
|
||||
use md5::Digest;
|
||||
use md5::Md5;
|
||||
use crate::sql::{
|
||||
fmt::{fmt_separated_by, Fmt},
|
||||
part::Next,
|
||||
paths::{ID, IN, META, OUT},
|
||||
Part, Value,
|
||||
};
|
||||
use md5::{Digest, Md5};
|
||||
use revision::revisioned;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
|
@ -73,6 +73,11 @@ impl From<&[Part]> for Idiom {
|
|||
Self(v.to_vec())
|
||||
}
|
||||
}
|
||||
impl From<Part> for Idiom {
|
||||
fn from(v: Part) -> Self {
|
||||
Self(vec![v])
|
||||
}
|
||||
}
|
||||
|
||||
impl Idiom {
|
||||
/// Appends a part to the end of this Idiom
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
use crate::sql::fmt::Fmt;
|
||||
use crate::sql::table::Table;
|
||||
use crate::sql::{fmt::Fmt, Table};
|
||||
use revision::revisioned;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
|
|
|
@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize};
|
|||
use std::fmt;
|
||||
use std::fmt::Display;
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
||||
#[revisioned(revision = 1)]
|
||||
pub enum Language {
|
||||
|
@ -26,9 +26,9 @@ pub enum Language {
|
|||
Turkish,
|
||||
}
|
||||
|
||||
impl Display for Language {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.write_str(match self {
|
||||
impl Language {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Arabic => "ARABIC",
|
||||
Self::Danish => "DANISH",
|
||||
Self::Dutch => "DUTCH",
|
||||
|
@ -46,6 +46,12 @@ impl Display for Language {
|
|||
Self::Swedish => "SWEDISH",
|
||||
Self::Tamil => "TAMIL",
|
||||
Self::Turkish => "TURKISH",
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Language {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.write_str(self.as_str())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -149,4 +149,4 @@ mod parser {
|
|||
pub use crate::syn::*;
|
||||
}
|
||||
|
||||
pub use self::parser::{idiom, json, parse, subquery, thing, v1::ParseError, value};
|
||||
pub use self::parser::{error::ParseError, idiom, json, parse, subquery, thing, value};
|
||||
|
|
|
@ -39,6 +39,7 @@ pub struct Order {
|
|||
pub random: bool,
|
||||
pub collate: bool,
|
||||
pub numeric: bool,
|
||||
/// true if the direction is ascending
|
||||
pub direction: bool,
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use crate::sql::fmt::Pretty;
|
||||
use crate::sql::statement::{Statement, Statements};
|
||||
use crate::sql::statements::{DefineStatement, RemoveStatement};
|
||||
use crate::sql::{Statement, Statements};
|
||||
use derive::Store;
|
||||
use revision::revisioned;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
|
@ -23,6 +23,12 @@ pub struct DefineScopeStatement {
|
|||
pub comment: Option<Strand>,
|
||||
}
|
||||
|
||||
impl DefineScopeStatement {
|
||||
pub(crate) fn random_code() -> String {
|
||||
rand::thread_rng().sample_iter(&Alphanumeric).take(128).map(char::from).collect::<String>()
|
||||
}
|
||||
}
|
||||
|
||||
impl DefineScopeStatement {
|
||||
/// Process this type returning a computed simple Value
|
||||
pub(crate) async fn compute(
|
||||
|
@ -46,10 +52,6 @@ impl DefineScopeStatement {
|
|||
// Ok all good
|
||||
Ok(Value::None)
|
||||
}
|
||||
|
||||
pub fn random_code() -> String {
|
||||
rand::thread_rng().sample_iter(&Alphanumeric).take(128).map(char::from).collect::<String>()
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for DefineScopeStatement {
|
||||
|
|
|
@ -47,6 +47,31 @@ impl From<(Base, &str, &str)> for DefineUserStatement {
|
|||
}
|
||||
|
||||
impl DefineUserStatement {
|
||||
pub(crate) fn from_parsed_values(name: Ident, base: Base, roles: Vec<Ident>) -> Self {
|
||||
DefineUserStatement {
|
||||
name,
|
||||
base,
|
||||
roles, // New users get the viewer role by default
|
||||
code: rand::thread_rng()
|
||||
.sample_iter(&Alphanumeric)
|
||||
.take(128)
|
||||
.map(char::from)
|
||||
.collect::<String>(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn set_password(&mut self, password: &str) {
|
||||
self.hash = Argon2::default()
|
||||
.hash_password(password.as_bytes(), &SaltString::generate(&mut OsRng))
|
||||
.unwrap()
|
||||
.to_string()
|
||||
}
|
||||
|
||||
pub(crate) fn set_passhash(&mut self, passhash: String) {
|
||||
self.hash = passhash;
|
||||
}
|
||||
|
||||
/// Process this type returning a computed simple Value
|
||||
pub(crate) async fn compute(
|
||||
&self,
|
||||
|
|
|
@ -13,7 +13,9 @@ use std::fmt::{self, Display, Write};
|
|||
#[revisioned(revision = 1)]
|
||||
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
||||
pub struct IfelseStatement {
|
||||
/// The first if condition followed by a body, followed by any number of else if's
|
||||
pub exprs: Vec<(Value, Value)>,
|
||||
/// the final else body, if there is one
|
||||
pub close: Option<Value>,
|
||||
}
|
||||
|
||||
|
|
|
@ -3,6 +3,8 @@ use revision::revisioned;
|
|||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
|
||||
use crate::sql::escape::escape_ident;
|
||||
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Store, Hash)]
|
||||
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
||||
#[revisioned(revision = 1)]
|
||||
|
@ -15,9 +17,11 @@ impl fmt::Display for UseStatement {
|
|||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.write_str("USE")?;
|
||||
if let Some(ref ns) = self.ns {
|
||||
let ns = escape_ident(ns);
|
||||
write!(f, " NS {ns}")?;
|
||||
}
|
||||
if let Some(ref db) = self.db {
|
||||
let db = escape_ident(db);
|
||||
write!(f, " DB {db}")?;
|
||||
}
|
||||
Ok(())
|
||||
|
|
|
@ -130,7 +130,7 @@ pub(crate) mod no_nul_bytes {
|
|||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
#[cfg(not(feature = "experimental_parser"))]
|
||||
#[cfg(not(feature = "experimental-parser"))]
|
||||
#[test]
|
||||
fn ensure_strands_are_prefixed() {
|
||||
use super::Strand;
|
||||
|
|
|
@ -75,7 +75,7 @@ impl TryFrom<Strand> for Thing {
|
|||
impl TryFrom<&str> for Thing {
|
||||
type Error = ();
|
||||
fn try_from(v: &str) -> Result<Self, Self::Error> {
|
||||
match syn::thing_raw(v) {
|
||||
match syn::thing(v) {
|
||||
Ok(v) => Ok(v),
|
||||
_ => Err(()),
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ impl Value {
|
|||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[test]
|
||||
fn changed_none() {
|
||||
|
|
|
@ -12,7 +12,7 @@ impl Value {
|
|||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn clear_value() {
|
||||
|
|
|
@ -92,7 +92,7 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[test]
|
||||
fn compare_none() {
|
||||
|
|
|
@ -97,7 +97,7 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn cut_none() {
|
||||
|
|
|
@ -30,7 +30,7 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn decrement_none() {
|
||||
|
|
|
@ -41,7 +41,7 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::dbs::test::mock;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn decrement_none() {
|
||||
|
|
|
@ -201,7 +201,7 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::dbs::test::mock;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn del_none() {
|
||||
|
|
|
@ -78,7 +78,7 @@ impl Value {
|
|||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[test]
|
||||
fn diff_none() {
|
||||
|
|
|
@ -59,7 +59,7 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[test]
|
||||
fn each_none() {
|
||||
|
|
|
@ -53,7 +53,7 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[test]
|
||||
fn every_with_empty_objects_arrays() {
|
||||
|
|
|
@ -34,7 +34,7 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::dbs::test::mock;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn extend_array_value() {
|
||||
|
|
|
@ -250,7 +250,7 @@ mod tests {
|
|||
use crate::sql::id::Id;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::sql::thing::Thing;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn get_none() {
|
||||
|
|
|
@ -30,7 +30,7 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn increment_none() {
|
||||
|
|
|
@ -42,7 +42,7 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::dbs::test::mock;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn increment_none() {
|
||||
|
|
|
@ -24,7 +24,7 @@ impl Value {
|
|||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn merge_none() {
|
||||
|
|
|
@ -86,7 +86,7 @@ impl Value {
|
|||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn patch_add_simple() {
|
||||
|
|
|
@ -54,7 +54,7 @@ mod tests {
|
|||
use crate::sql::id::Id;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::sql::thing::Thing;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[test]
|
||||
fn pick_none() {
|
||||
|
|
|
@ -87,7 +87,7 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn put_none() {
|
||||
|
|
|
@ -19,7 +19,7 @@ impl Value {
|
|||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn replace() {
|
||||
|
|
|
@ -13,7 +13,7 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::sql::id::Id;
|
||||
use crate::sql::thing::Thing;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn rid_none() {
|
||||
|
|
|
@ -692,7 +692,7 @@ mod tests {
|
|||
#[test]
|
||||
fn duration() {
|
||||
let duration = Duration::default();
|
||||
let value = to_value(&duration).unwrap();
|
||||
let value = to_value(duration).unwrap();
|
||||
let expected = Value::Duration(duration);
|
||||
assert_eq!(value, expected);
|
||||
assert_eq!(expected, to_value(&expected).unwrap());
|
||||
|
|
|
@ -159,7 +159,7 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::dbs::test::mock;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[tokio::test]
|
||||
async fn set_none() {
|
||||
|
|
|
@ -1087,7 +1087,8 @@ impl Value {
|
|||
| Value::Array(_)
|
||||
| Value::Param(_)
|
||||
| Value::Edges(_)
|
||||
| Value::Thing(_) => true,
|
||||
| Value::Thing(_)
|
||||
| Value::Table(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
@ -2774,7 +2775,7 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
use crate::sql::uuid::Uuid;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[test]
|
||||
fn check_none() {
|
||||
|
|
|
@ -62,7 +62,7 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
use crate::sql::idiom::Idiom;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[test]
|
||||
fn walk_blank() {
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
#[cfg(feature = "experimental-parser")]
|
||||
use super::v2::token::Span;
|
||||
#[cfg(feature = "experimental-parser")]
|
||||
use std::ops::Range;
|
||||
|
||||
/// A human readable location inside a string.
|
||||
///
|
||||
/// Locations are 1 indexed, the first character on the first line being on line 1 column 1.
|
||||
|
@ -19,10 +24,9 @@ impl Location {
|
|||
.expect("tried to find location of substring in unrelated string");
|
||||
// Bytes of input prior to line being iteratated.
|
||||
let mut bytes_prior = 0;
|
||||
for (line_idx, line) in input.split('\n').enumerate() {
|
||||
// +1 for the '\n'
|
||||
let bytes_so_far = bytes_prior + line.len() + 1;
|
||||
if bytes_so_far > offset {
|
||||
for (line_idx, (line, seperator_offset)) in LineIterator::new(input).enumerate() {
|
||||
let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize;
|
||||
if bytes_so_far >= offset {
|
||||
// found line.
|
||||
let line_offset = offset - bytes_prior;
|
||||
let column = line[..line_offset].chars().count();
|
||||
|
@ -37,16 +41,13 @@ impl Location {
|
|||
unreachable!()
|
||||
}
|
||||
|
||||
#[cfg(feature = "experimental_parser")]
|
||||
pub fn of_span_start(source: &str, span: Span) -> Self {
|
||||
// Bytes of input before substr.
|
||||
let offset = span.offset as usize;
|
||||
#[cfg(feature = "experimental-parser")]
|
||||
pub fn of_offset(source: &str, offset: usize) -> Self {
|
||||
// Bytes of input prior to line being iteratated.
|
||||
let mut bytes_prior = 0;
|
||||
for (line_idx, line) in source.split('\n').enumerate() {
|
||||
// +1 for the '\n'
|
||||
let bytes_so_far = bytes_prior + line.len() + 1;
|
||||
if bytes_so_far > offset {
|
||||
for (line_idx, (line, seperator_offset)) in LineIterator::new(source).enumerate() {
|
||||
let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize;
|
||||
if bytes_so_far >= offset {
|
||||
// found line.
|
||||
let line_offset = offset - bytes_prior;
|
||||
let column = line[..line_offset].chars().count();
|
||||
|
@ -61,31 +62,22 @@ impl Location {
|
|||
unreachable!()
|
||||
}
|
||||
|
||||
#[cfg(feature = "experimental_parser")]
|
||||
#[cfg(feature = "experimental-parser")]
|
||||
pub fn of_span_start(source: &str, span: Span) -> Self {
|
||||
// Bytes of input before substr.
|
||||
|
||||
let offset = span.offset as usize;
|
||||
Self::of_offset(source, offset)
|
||||
}
|
||||
|
||||
#[cfg(feature = "experimental-parser")]
|
||||
pub fn of_span_end(source: &str, span: Span) -> Self {
|
||||
// Bytes of input before substr.
|
||||
let offset = span.offset as usize + span.len as usize;
|
||||
// Bytes of input prior to line being iteratated.
|
||||
let mut bytes_prior = 0;
|
||||
for (line_idx, line) in source.split('\n').enumerate() {
|
||||
// +1 for the '\n'
|
||||
let bytes_so_far = bytes_prior + line.len() + 1;
|
||||
if bytes_so_far > offset {
|
||||
// found line.
|
||||
let line_offset = offset - bytes_prior;
|
||||
let column = line[..line_offset].chars().count();
|
||||
// +1 because line and column are 1 index.
|
||||
return Self {
|
||||
line: line_idx + 1,
|
||||
column: column + 1,
|
||||
};
|
||||
}
|
||||
bytes_prior = bytes_so_far;
|
||||
}
|
||||
unreachable!()
|
||||
Self::of_offset(source, offset)
|
||||
}
|
||||
|
||||
#[cfg(feature = "experimental_parser")]
|
||||
#[cfg(feature = "experimental-parser")]
|
||||
pub fn range_of_span(source: &str, span: Span) -> Range<Self> {
|
||||
// Bytes of input before substr.
|
||||
let offset = span.offset as usize;
|
||||
|
@ -93,19 +85,18 @@ impl Location {
|
|||
|
||||
// Bytes of input prior to line being iteratated.
|
||||
let mut bytes_prior = 0;
|
||||
let mut iterator = source.split('\n').enumerate();
|
||||
let mut iterator = LineIterator::new(source).enumerate();
|
||||
let start = loop {
|
||||
let Some((line_idx, line)) = iterator.next() else {
|
||||
let Some((line_idx, (line, seperator_offset))) = iterator.next() else {
|
||||
panic!("tried to find location of span not belonging to string");
|
||||
};
|
||||
// +1 for the '\n'
|
||||
let bytes_so_far = bytes_prior + line.len() + 1;
|
||||
if bytes_so_far > offset {
|
||||
let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize;
|
||||
if bytes_so_far >= offset {
|
||||
// found line.
|
||||
let line_offset = offset - bytes_prior;
|
||||
let column = line[..line_offset].chars().count();
|
||||
// +1 because line and column are 1 index.
|
||||
if bytes_so_far > end {
|
||||
if bytes_so_far >= end {
|
||||
// end is on the same line, finish immediatly.
|
||||
let line_offset = end - bytes_prior;
|
||||
let end_column = line[..line_offset].chars().count();
|
||||
|
@ -127,12 +118,11 @@ impl Location {
|
|||
};
|
||||
|
||||
loop {
|
||||
let Some((line_idx, line)) = iterator.next() else {
|
||||
let Some((line_idx, (line, seperator_offset))) = iterator.next() else {
|
||||
panic!("tried to find location of span not belonging to string");
|
||||
};
|
||||
// +1 for the '\n'
|
||||
let bytes_so_far = bytes_prior + line.len() + 1;
|
||||
if bytes_so_far > end {
|
||||
let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize;
|
||||
if bytes_so_far >= end {
|
||||
let line_offset = end - bytes_prior;
|
||||
let column = line[..line_offset].chars().count();
|
||||
return start..Self {
|
||||
|
@ -143,3 +133,93 @@ impl Location {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct LineIterator<'a> {
|
||||
current: &'a str,
|
||||
}
|
||||
|
||||
impl<'a> LineIterator<'a> {
|
||||
pub fn new(s: &'a str) -> Self {
|
||||
LineIterator {
|
||||
current: s,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for LineIterator<'a> {
|
||||
type Item = (&'a str, Option<u8>);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.current.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let bytes = self.current.as_bytes();
|
||||
for i in 0..bytes.len() {
|
||||
match bytes[i] {
|
||||
b'\r' => {
|
||||
if let Some(b'\n') = bytes.get(i + 1) {
|
||||
let res = &self.current[..i];
|
||||
self.current = &self.current[i + 2..];
|
||||
return Some((res, Some(2)));
|
||||
}
|
||||
let res = &self.current[..i];
|
||||
self.current = &self.current[i + 1..];
|
||||
return Some((res, Some(1)));
|
||||
}
|
||||
0xb | 0xC | b'\n' => {
|
||||
// vertical tab VT and form feed FF.
|
||||
let res = &self.current[..i];
|
||||
self.current = &self.current[i + 1..];
|
||||
return Some((res, Some(1)));
|
||||
}
|
||||
0xc2 => {
|
||||
// next line NEL
|
||||
if bytes.get(i + 1).copied() != Some(0x85) {
|
||||
continue;
|
||||
}
|
||||
let res = &self.current[..i];
|
||||
self.current = &self.current[i + 2..];
|
||||
return Some((res, Some(2)));
|
||||
}
|
||||
0xe2 => {
|
||||
// line separator and paragraph seperator.
|
||||
if bytes.get(i + 1).copied() != Some(0x80) {
|
||||
continue;
|
||||
}
|
||||
let next_byte = bytes.get(i + 2).copied();
|
||||
if next_byte != Some(0xA8) && next_byte != Some(0xA9) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// vertical tab VT, next line NEL and form feed FF.
|
||||
let res = &self.current[..i];
|
||||
self.current = &self.current[i + 3..];
|
||||
return Some((res, Some(3)));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Some((std::mem::take(&mut self.current), None))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::LineIterator;
|
||||
|
||||
#[test]
|
||||
fn test_line_iterator() {
|
||||
let lines = "foo\nbar\r\nfoo\rbar\u{000B}foo\u{000C}bar\u{0085}foo\u{2028}bar\u{2029}\n";
|
||||
let mut iterator = LineIterator::new(lines);
|
||||
assert_eq!(iterator.next(), Some(("foo", Some(1))));
|
||||
assert_eq!(iterator.next(), Some(("bar", Some(2))));
|
||||
assert_eq!(iterator.next(), Some(("foo", Some(1))));
|
||||
assert_eq!(iterator.next(), Some(("bar", Some(1))));
|
||||
assert_eq!(iterator.next(), Some(("foo", Some(1))));
|
||||
assert_eq!(iterator.next(), Some(("bar", Some(2))));
|
||||
assert_eq!(iterator.next(), Some(("foo", Some(3))));
|
||||
assert_eq!(iterator.next(), Some(("bar", Some(3))));
|
||||
assert_eq!(iterator.next(), Some(("", Some(1))));
|
||||
assert_eq!(iterator.next(), None);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,9 @@ use std::{fmt, ops::Range};
|
|||
|
||||
use super::common::Location;
|
||||
|
||||
mod nom_error;
|
||||
pub use nom_error::ParseError;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RenderedError {
|
||||
pub text: String,
|
|
@ -5,19 +5,12 @@ use crate::syn::{
|
|||
use nom::error::ErrorKind;
|
||||
use nom::error::FromExternalError;
|
||||
use nom::error::ParseError as NomParseError;
|
||||
use nom::Err;
|
||||
use std::fmt::Write;
|
||||
use std::num::ParseFloatError;
|
||||
use std::num::ParseIntError;
|
||||
use std::ops::Bound;
|
||||
use thiserror::Error;
|
||||
|
||||
mod utils;
|
||||
pub use utils::*;
|
||||
mod render;
|
||||
|
||||
pub type IResult<I, O, E = ParseError<I>> = Result<(I, O), Err<E>>;
|
||||
|
||||
#[derive(Error, Debug, Clone)]
|
||||
pub enum ParseError<I> {
|
||||
Base(I),
|
|
@ -3,11 +3,20 @@
|
|||
pub mod common;
|
||||
pub mod error;
|
||||
|
||||
#[cfg(not(feature = "experimental-parser"))]
|
||||
pub mod v1;
|
||||
pub use v1::{
|
||||
datetime, datetime_raw, duration, idiom, json, parse, path_like, range, subquery, thing,
|
||||
thing_raw, value,
|
||||
#[cfg(not(feature = "experimental-parser"))]
|
||||
pub use v1::{datetime_raw, duration, idiom, json, parse, range, subquery, thing, value};
|
||||
|
||||
#[cfg(feature = "experimental-parser")]
|
||||
pub mod v2;
|
||||
#[cfg(feature = "experimental-parser")]
|
||||
pub use v2::{
|
||||
datetime_raw, duration, idiom, json, json_legacy_strand, parse, range, subquery, thing, value,
|
||||
value_legacy_strand,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test;
|
||||
pub trait Parse<T> {
|
||||
fn parse(val: &str) -> T;
|
||||
}
|
||||
|
|
|
@ -1,50 +0,0 @@
|
|||
pub(crate) use super::v1::builtin::builtin_name;
|
||||
use crate::sql::{Array, Expression, Idiom, Param, Script, Thing, Value};
|
||||
|
||||
use super::v1::test::*;
|
||||
|
||||
pub trait Parse<T> {
|
||||
fn parse(val: &str) -> T;
|
||||
}
|
||||
|
||||
impl Parse<Self> for Value {
|
||||
fn parse(val: &str) -> Self {
|
||||
value(val).unwrap().1
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<Self> for Array {
|
||||
fn parse(val: &str) -> Self {
|
||||
array(val).unwrap().1
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<Self> for Param {
|
||||
fn parse(val: &str) -> Self {
|
||||
param(val).unwrap().1
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<Self> for Idiom {
|
||||
fn parse(val: &str) -> Self {
|
||||
idiom(val).unwrap().1
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<Self> for Script {
|
||||
fn parse(val: &str) -> Self {
|
||||
script(val).unwrap().1
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<Self> for Thing {
|
||||
fn parse(val: &str) -> Self {
|
||||
thing(val).unwrap().1
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<Self> for Expression {
|
||||
fn parse(val: &str) -> Self {
|
||||
expression(val).unwrap().1
|
||||
}
|
||||
}
|
|
@ -1,7 +1,7 @@
|
|||
use super::{IResult, ParseError};
|
||||
use nom::bytes::complete::tag_no_case;
|
||||
use nom::Err;
|
||||
use nom::Parser;
|
||||
pub use crate::syn::error::ParseError;
|
||||
use nom::{bytes::complete::tag_no_case, Err, Parser};
|
||||
|
||||
pub type IResult<I, O, E = ParseError<I>> = Result<(I, O), Err<E>>;
|
||||
|
||||
pub fn expected<I, O, P>(expect: &'static str, mut parser: P) -> impl FnMut(I) -> IResult<I, O>
|
||||
where
|
|
@ -1 +0,0 @@
|
|||
|
|
@ -7,7 +7,7 @@ use super::{
|
|||
value::single,
|
||||
IResult,
|
||||
};
|
||||
use crate::sql::{Cast, Expression, Future};
|
||||
use crate::sql::{Cast, Expression, Future, Operator, Value};
|
||||
use nom::{bytes::complete::tag, character::complete::char, combinator::cut, sequence::delimited};
|
||||
|
||||
pub fn cast(i: &str) -> IResult<&str, Cast> {
|
||||
|
@ -30,10 +30,32 @@ pub fn unary(i: &str) -> IResult<&str, Expression> {
|
|||
))
|
||||
}
|
||||
|
||||
/// Augment an existing expression
|
||||
pub(crate) fn augment(mut this: Expression, l: Value, o: Operator) -> Expression {
|
||||
match &mut this {
|
||||
Expression::Binary {
|
||||
l: left,
|
||||
o: op,
|
||||
..
|
||||
} if o.precedence() >= op.precedence() => match left {
|
||||
Value::Expression(x) => {
|
||||
*x.as_mut() = augment(std::mem::take(x), l, o);
|
||||
this
|
||||
}
|
||||
_ => {
|
||||
*left = Expression::new(l, o, std::mem::take(left)).into();
|
||||
this
|
||||
}
|
||||
},
|
||||
e => {
|
||||
let r = Value::from(std::mem::take(e));
|
||||
Expression::new(l, o, r)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn binary(i: &str) -> IResult<&str, Expression> {
|
||||
use crate::sql::Value;
|
||||
|
||||
use super::depth;
|
||||
use super::value;
|
||||
|
||||
|
@ -43,7 +65,7 @@ pub fn binary(i: &str) -> IResult<&str, Expression> {
|
|||
let _diving = depth::dive(i)?;
|
||||
let (i, r) = value::value(i)?;
|
||||
let v = match r {
|
||||
Value::Expression(r) => r.augment(l, o),
|
||||
Value::Expression(r) => augment(*r, l, o),
|
||||
_ => Expression::new(l, o, r),
|
||||
};
|
||||
Ok((i, v))
|
||||
|
|
|
@ -189,7 +189,7 @@ mod tests {
|
|||
use super::super::builtin::{builtin_name, BuiltinName};
|
||||
use super::*;
|
||||
use crate::sql::Value;
|
||||
use crate::syn::{self, test::Parse};
|
||||
use crate::syn::{self, Parse};
|
||||
|
||||
fn function(i: &str) -> IResult<&str, Function> {
|
||||
alt((defined_function, |i| {
|
||||
|
|
|
@ -275,9 +275,8 @@ pub fn bracketed_value(i: &str) -> IResult<&str, Part> {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::sql::{Dir, Expression, Id, Number, Param, Strand, Table, Thing};
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
|
|
@ -29,8 +29,8 @@ fn datetime_single(i: &str) -> IResult<&str, Datetime> {
|
|||
|
||||
fn datetime_double(i: &str) -> IResult<&str, Datetime> {
|
||||
alt((
|
||||
delimited(tag("d\""), cut(datetime_raw), cut(char('\"'))),
|
||||
delimited(char('\"'), datetime_raw, char('\"')),
|
||||
delimited(tag("d\""), cut(datetime_raw), cut(char('"'))),
|
||||
delimited(char('"'), datetime_raw, char('"')),
|
||||
))(i)
|
||||
}
|
||||
|
||||
|
@ -194,7 +194,7 @@ mod tests {
|
|||
|
||||
// use chrono::Date;
|
||||
|
||||
use crate::{sql::Value, syn::test::Parse};
|
||||
use crate::{sql::Value, syn::Parse};
|
||||
|
||||
use super::*;
|
||||
|
||||
|
|
|
@ -108,7 +108,7 @@ pub fn tables(i: &str) -> IResult<&str, Tables> {
|
|||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[test]
|
||||
fn ident_normal() {
|
||||
|
|
|
@ -163,7 +163,7 @@ fn char_unicode_bracketed(i: &str) -> IResult<&str, char> {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::{sql::Value, syn::test::Parse};
|
||||
use crate::{sql::Value, syn::Parse};
|
||||
|
||||
use super::*;
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ fn uuid_raw(i: &str) -> IResult<&str, Uuid> {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use crate::{sql::Value, syn::test::Parse};
|
||||
use crate::{sql::Value, syn::Parse};
|
||||
|
||||
use super::*;
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ mod part;
|
|||
mod stmt;
|
||||
|
||||
mod block;
|
||||
pub(crate) mod builtin;
|
||||
mod builtin;
|
||||
mod comment;
|
||||
mod common;
|
||||
mod depth;
|
||||
|
@ -79,10 +79,6 @@ pub fn idiom(input: &str) -> Result<Idiom, Error> {
|
|||
parse_impl(input, idiom::plain)
|
||||
}
|
||||
|
||||
pub fn datetime(input: &str) -> Result<Datetime, Error> {
|
||||
parse_impl(input, literal::datetime)
|
||||
}
|
||||
|
||||
pub fn datetime_raw(input: &str) -> Result<Datetime, Error> {
|
||||
parse_impl(input, literal::datetime_all_raw)
|
||||
}
|
||||
|
@ -91,20 +87,12 @@ pub fn duration(input: &str) -> Result<Duration, Error> {
|
|||
parse_impl(input, literal::duration)
|
||||
}
|
||||
|
||||
pub fn path_like(input: &str) -> Result<Value, Error> {
|
||||
parse_impl(input, value::path_like)
|
||||
}
|
||||
|
||||
pub fn range(input: &str) -> Result<Range, Error> {
|
||||
parse_impl(input, literal::range)
|
||||
}
|
||||
|
||||
/// Parses a SurrealQL [`Thing`]
|
||||
pub fn thing(input: &str) -> Result<Thing, Error> {
|
||||
parse_impl(input, thing::thing)
|
||||
}
|
||||
|
||||
pub fn thing_raw(input: &str) -> Result<Thing, Error> {
|
||||
parse_impl(input, thing::thing_raw)
|
||||
}
|
||||
|
||||
|
|
|
@ -149,6 +149,7 @@ pub fn knn_distance(i: &str) -> IResult<&str, Distance> {
|
|||
}
|
||||
|
||||
pub fn knn(i: &str) -> IResult<&str, Operator> {
|
||||
let (i, _) = opt(tag_no_case("knn"))(i)?;
|
||||
let (i, _) = char('<')(i)?;
|
||||
let (i, k) = u32(i)?;
|
||||
let (i, dist) = opt(knn_distance)(i)?;
|
||||
|
@ -228,4 +229,13 @@ mod tests {
|
|||
assert_eq!("<3,EUCLIDEAN>", format!("{}", out));
|
||||
assert_eq!(out, Operator::Knn(3, Some(Distance::Euclidean)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_knn_with_prefix() {
|
||||
let res = knn("knn<5>");
|
||||
assert!(res.is_ok());
|
||||
let out = res.unwrap().1;
|
||||
assert_eq!("<5>", format!("{}", out));
|
||||
assert_eq!(out, Operator::Knn(5, None));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,7 +72,6 @@ pub fn single(i: &str) -> IResult<&str, Data> {
|
|||
|
||||
pub fn values(i: &str) -> IResult<&str, Data> {
|
||||
let (i, _) = tag_no_case("(")(i)?;
|
||||
// TODO: look at call tree here.
|
||||
let (i, fields) = separated_list1(commas, plain)(i)?;
|
||||
let (i, _) = tag_no_case(")")(i)?;
|
||||
let (i, _) = shouldbespace(i)?;
|
||||
|
|
|
@ -6,7 +6,6 @@ use super::{
|
|||
literal::{datetime, duration, ident, table, tables},
|
||||
operator::dir,
|
||||
thing::thing,
|
||||
// TODO: go through and check every import for alias.
|
||||
value::value,
|
||||
IResult,
|
||||
};
|
||||
|
@ -238,7 +237,7 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
use crate::sql::{Datetime, Idiom, Value};
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
use std::time;
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -121,7 +121,7 @@ fn rule(i: &str) -> IResult<&str, Vec<(PermissionKind, Permission)>> {
|
|||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::sql::{Expression, Value};
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ fn split_raw(i: &str) -> IResult<&str, Split> {
|
|||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::{sql::Idiom, syn::test::Parse};
|
||||
use crate::{sql::Idiom, syn::Parse};
|
||||
|
||||
#[test]
|
||||
fn split_statement() {
|
||||
|
|
|
@ -11,7 +11,6 @@ use crate::{
|
|||
iam::Role,
|
||||
sql::{statements::DefineUserStatement, Ident, Strand},
|
||||
};
|
||||
use argon2::{password_hash::SaltString, Argon2, PasswordHasher};
|
||||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::tag_no_case,
|
||||
|
@ -19,7 +18,6 @@ use nom::{
|
|||
multi::{many0, separated_list1},
|
||||
Err,
|
||||
};
|
||||
use rand::{distributions::Alphanumeric, rngs::OsRng, Rng};
|
||||
|
||||
pub fn user(i: &str) -> IResult<&str, DefineUserStatement> {
|
||||
let (i, _) = tag_no_case("USER")(i)?;
|
||||
|
@ -35,28 +33,19 @@ pub fn user(i: &str) -> IResult<&str, DefineUserStatement> {
|
|||
Ok((i, (name, base, opts)))
|
||||
})(i)?;
|
||||
// Create the base statement
|
||||
let mut res = DefineUserStatement {
|
||||
let mut res = DefineUserStatement::from_parsed_values(
|
||||
name,
|
||||
base,
|
||||
roles: vec!["Viewer".into()], // New users get the viewer role by default
|
||||
code: rand::thread_rng()
|
||||
.sample_iter(&Alphanumeric)
|
||||
.take(128)
|
||||
.map(char::from)
|
||||
.collect::<String>(),
|
||||
..Default::default()
|
||||
};
|
||||
vec!["Viewer".into()], // New users get the viewer role by default
|
||||
);
|
||||
// Assign any defined options
|
||||
for opt in opts {
|
||||
match opt {
|
||||
DefineUserOption::Password(v) => {
|
||||
res.hash = Argon2::default()
|
||||
.hash_password(v.as_ref(), &SaltString::generate(&mut OsRng))
|
||||
.unwrap()
|
||||
.to_string()
|
||||
res.set_password(&v);
|
||||
}
|
||||
DefineUserOption::Passhash(v) => {
|
||||
res.hash = v;
|
||||
res.set_passhash(v);
|
||||
}
|
||||
DefineUserOption::Roles(v) => {
|
||||
res.roles = v;
|
||||
|
|
|
@ -9,7 +9,7 @@ use nom::{
|
|||
branch::alt,
|
||||
bytes::complete::tag_no_case,
|
||||
character::complete::char,
|
||||
combinator::{cut, opt, value},
|
||||
combinator::{opt, value},
|
||||
sequence::tuple,
|
||||
};
|
||||
|
||||
|
@ -19,10 +19,10 @@ pub fn option(i: &str) -> IResult<&str, OptionStatement> {
|
|||
let (i, n) = ident(i)?;
|
||||
let (i, v) = expected(
|
||||
"'=' followed by a value for the option",
|
||||
cut(opt(alt((
|
||||
opt(alt((
|
||||
value(true, tuple((mightbespace, char('='), mightbespace, tag_no_case("TRUE")))),
|
||||
value(false, tuple((mightbespace, char('='), mightbespace, tag_no_case("FALSE")))),
|
||||
)))),
|
||||
))),
|
||||
)(i)?;
|
||||
Ok((
|
||||
i,
|
||||
|
|
|
@ -87,7 +87,6 @@ fn disallowed_subquery_statements(i: &str) -> IResult<&str, ()> {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
pub use super::{
|
||||
use super::{
|
||||
super::Parse,
|
||||
expression::binary as expression,
|
||||
function::script_body as script,
|
||||
idiom::plain as idiom,
|
||||
|
@ -6,3 +7,48 @@ pub use super::{
|
|||
thing::thing,
|
||||
value::{array, value},
|
||||
};
|
||||
use nom::Finish;
|
||||
|
||||
use crate::sql::{Array, Expression, Idiom, Param, Script, Thing, Value};
|
||||
|
||||
impl Parse<Self> for Value {
|
||||
fn parse(val: &str) -> Self {
|
||||
value(val).finish().unwrap().1
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<Self> for Array {
|
||||
fn parse(val: &str) -> Self {
|
||||
array(val).finish().unwrap().1
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<Self> for Param {
|
||||
fn parse(val: &str) -> Self {
|
||||
param(val).finish().unwrap().1
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<Self> for Idiom {
|
||||
fn parse(val: &str) -> Self {
|
||||
idiom(val).finish().unwrap().1
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<Self> for Script {
|
||||
fn parse(val: &str) -> Self {
|
||||
script(val).finish().unwrap().1
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<Self> for Thing {
|
||||
fn parse(val: &str) -> Self {
|
||||
thing(val).finish().unwrap().1
|
||||
}
|
||||
}
|
||||
|
||||
impl Parse<Self> for Expression {
|
||||
fn parse(val: &str) -> Self {
|
||||
expression(val).finish().unwrap().1
|
||||
}
|
||||
}
|
||||
|
|
|
@ -81,7 +81,7 @@ mod tests {
|
|||
use crate::sql::object::Object;
|
||||
use crate::sql::value::Value;
|
||||
use crate::sql::Strand;
|
||||
use crate::syn::test::Parse;
|
||||
use crate::syn::Parse;
|
||||
|
||||
#[test]
|
||||
fn thing_normal() {
|
||||
|
@ -249,7 +249,7 @@ mod tests {
|
|||
let res = id(sql);
|
||||
let out = res.unwrap().1;
|
||||
assert_eq!(Id::from("100test"), out);
|
||||
assert_eq!("100test", format!("{}", out));
|
||||
assert_eq!("⟨100test⟩", format!("{}", out));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
@ -9,7 +9,7 @@ use super::{
|
|||
depth,
|
||||
ending::keyword,
|
||||
error::expected,
|
||||
expression::{cast, future, unary},
|
||||
expression::{augment, cast, future, unary},
|
||||
function::{builtin_function, defined_function, model},
|
||||
idiom::{self, reparse_idiom_start},
|
||||
literal::{
|
||||
|
@ -62,7 +62,7 @@ pub fn value(i: &str) -> IResult<&str, Value> {
|
|||
let _diving = depth::dive(i)?;
|
||||
let (i, r) = cut(value)(i)?;
|
||||
let expr = match r {
|
||||
Value::Expression(r) => r.augment(start, o),
|
||||
Value::Expression(r) => augment(*r, start, o),
|
||||
_ => Expression::new(start, o, r),
|
||||
};
|
||||
let v = Value::from(expr);
|
||||
|
@ -179,7 +179,7 @@ pub fn select(i: &str) -> IResult<&str, Value> {
|
|||
};
|
||||
let (i, r) = cut(value)(i)?;
|
||||
let expr = match r {
|
||||
Value::Expression(r) => r.augment(start, op),
|
||||
Value::Expression(r) => augment(*r, start, op),
|
||||
_ => Expression::new(start, op, r),
|
||||
};
|
||||
let v = Value::from(expr);
|
||||
|
|
387
lib/src/syn/v2/lexer/byte.rs
Normal file
387
lib/src/syn/v2/lexer/byte.rs
Normal file
|
@ -0,0 +1,387 @@
|
|||
use crate::syn::v2::{
|
||||
lexer::{
|
||||
unicode::{byte, chars},
|
||||
Error, Lexer,
|
||||
},
|
||||
token::{t, Token, TokenKind},
|
||||
};
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Eats a single line comment.
|
||||
pub fn eat_single_line_comment(&mut self) {
|
||||
loop {
|
||||
let Some(byte) = self.reader.next() else {
|
||||
break;
|
||||
};
|
||||
match byte {
|
||||
byte::CR => {
|
||||
self.eat(byte::LF);
|
||||
break;
|
||||
}
|
||||
byte::LF => {
|
||||
break;
|
||||
}
|
||||
x if !x.is_ascii() => {
|
||||
// -1 because we already ate the byte.
|
||||
let backup = self.reader.offset() - 1;
|
||||
let char = match self.reader.complete_char(x) {
|
||||
Ok(x) => x,
|
||||
Err(_) => {
|
||||
// let the next token handle the error.
|
||||
self.reader.backup(backup);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
match char {
|
||||
chars::LS | chars::PS | chars::NEL => break,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
self.set_whitespace_span(self.current_span());
|
||||
self.skip_offset();
|
||||
}
|
||||
|
||||
/// Eats a multi line comment and returns an error if `*/` would be missing.
|
||||
pub fn eat_multi_line_comment(&mut self) -> Result<(), Error> {
|
||||
loop {
|
||||
let Some(byte) = self.reader.next() else {
|
||||
return Err(Error::UnexpectedEof);
|
||||
};
|
||||
if let b'*' = byte {
|
||||
let Some(byte) = self.reader.next() else {
|
||||
return Err(Error::UnexpectedEof);
|
||||
};
|
||||
if b'/' == byte {
|
||||
self.set_whitespace_span(self.current_span());
|
||||
self.skip_offset();
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Eat whitespace like spaces tables and new-lines.
|
||||
pub fn eat_whitespace(&mut self) {
|
||||
loop {
|
||||
let Some(byte) = self.reader.peek() else {
|
||||
return;
|
||||
};
|
||||
match byte {
|
||||
byte::CR | byte::FF | byte::LF | byte::SP | byte::VT | byte::TAB => {
|
||||
self.reader.next();
|
||||
}
|
||||
x if !x.is_ascii() => {
|
||||
let backup = self.reader.offset();
|
||||
self.reader.next();
|
||||
let char = match self.reader.complete_char(x) {
|
||||
Ok(x) => x,
|
||||
Err(_) => {
|
||||
self.reader.backup(backup);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
match char {
|
||||
'\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}'
|
||||
| '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}'
|
||||
| '\u{2008}' | '\u{2009}' | '\u{200A}' | '\u{202F}' | '\u{205F}'
|
||||
| '\u{3000}' => {}
|
||||
_ => {
|
||||
self.reader.backup(backup);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
self.set_whitespace_span(self.current_span());
|
||||
self.skip_offset();
|
||||
}
|
||||
|
||||
// re-lexes a `/` token to a regex token.
|
||||
pub fn relex_regex(&mut self, token: Token) -> Token {
|
||||
debug_assert_eq!(token.kind, t!("/"));
|
||||
debug_assert_eq!(token.span.offset + 1, self.last_offset);
|
||||
debug_assert_eq!(token.span.len, 1);
|
||||
debug_assert_eq!(self.scratch, "");
|
||||
|
||||
self.last_offset = token.span.offset;
|
||||
loop {
|
||||
match self.reader.next() {
|
||||
Some(b'\\') => {
|
||||
if let Some(b'/') = self.reader.peek() {
|
||||
self.reader.next();
|
||||
self.scratch.push('/')
|
||||
} else {
|
||||
self.scratch.push('\\')
|
||||
}
|
||||
}
|
||||
Some(b'/') => break,
|
||||
Some(x) => {
|
||||
if x.is_ascii() {
|
||||
self.scratch.push(x as char);
|
||||
} else {
|
||||
match self.reader.complete_char(x) {
|
||||
Ok(x) => {
|
||||
self.scratch.push(x);
|
||||
}
|
||||
Err(e) => return self.invalid_token(e.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
None => return self.invalid_token(Error::UnexpectedEof),
|
||||
}
|
||||
}
|
||||
|
||||
match self.scratch.parse() {
|
||||
Ok(x) => {
|
||||
self.scratch.clear();
|
||||
self.regex = Some(x);
|
||||
self.finish_token(TokenKind::Regex)
|
||||
}
|
||||
Err(e) => self.invalid_token(Error::Regex(e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex the next token, starting from the given byte.
|
||||
pub fn lex_ascii(&mut self, byte: u8) -> Token {
|
||||
let kind = match byte {
|
||||
b'{' => t!("{"),
|
||||
b'}' => t!("}"),
|
||||
b'[' => t!("["),
|
||||
b']' => t!("]"),
|
||||
b')' => t!(")"),
|
||||
b'(' => t!("("),
|
||||
b';' => t!(";"),
|
||||
b',' => t!(","),
|
||||
b'@' => t!("@"),
|
||||
byte::CR | byte::FF | byte::LF | byte::SP | byte::VT | byte::TAB => {
|
||||
self.eat_whitespace();
|
||||
return self.next_token_inner();
|
||||
}
|
||||
b'|' => match self.reader.peek() {
|
||||
Some(b'|') => {
|
||||
self.reader.next();
|
||||
t!("||")
|
||||
}
|
||||
_ => t!("|"),
|
||||
},
|
||||
b'&' => match self.reader.peek() {
|
||||
Some(b'&') => {
|
||||
self.reader.next();
|
||||
t!("&&")
|
||||
}
|
||||
_ => return self.invalid_token(Error::ExpectedEnd('&')),
|
||||
},
|
||||
b'.' => match self.reader.peek() {
|
||||
Some(b'.') => {
|
||||
self.reader.next();
|
||||
match self.reader.peek() {
|
||||
Some(b'.') => {
|
||||
self.reader.next();
|
||||
t!("...")
|
||||
}
|
||||
_ => t!(".."),
|
||||
}
|
||||
}
|
||||
_ => t!("."),
|
||||
},
|
||||
b'!' => match self.reader.peek() {
|
||||
Some(b'=') => {
|
||||
self.reader.next();
|
||||
t!("!=")
|
||||
}
|
||||
Some(b'~') => {
|
||||
self.reader.next();
|
||||
t!("!~")
|
||||
}
|
||||
_ => t!("!"),
|
||||
},
|
||||
b'?' => match self.reader.peek() {
|
||||
Some(b'?') => {
|
||||
self.reader.next();
|
||||
t!("??")
|
||||
}
|
||||
Some(b':') => {
|
||||
self.reader.next();
|
||||
t!("?:")
|
||||
}
|
||||
Some(b'~') => {
|
||||
self.reader.next();
|
||||
t!("?~")
|
||||
}
|
||||
Some(b'=') => {
|
||||
self.reader.next();
|
||||
t!("?=")
|
||||
}
|
||||
_ => t!("?"),
|
||||
},
|
||||
b'<' => match self.reader.peek() {
|
||||
Some(b'=') => {
|
||||
self.reader.next();
|
||||
t!("<=")
|
||||
}
|
||||
Some(b'-') => {
|
||||
self.reader.next();
|
||||
match self.reader.peek() {
|
||||
Some(b'>') => {
|
||||
self.reader.next();
|
||||
t!("<->")
|
||||
}
|
||||
_ => t!("<-"),
|
||||
}
|
||||
}
|
||||
_ => t!("<"),
|
||||
},
|
||||
b'>' => match self.reader.peek() {
|
||||
Some(b'=') => {
|
||||
self.reader.next();
|
||||
t!(">=")
|
||||
}
|
||||
_ => t!(">"),
|
||||
},
|
||||
b'-' => match self.reader.peek() {
|
||||
Some(b'>') => {
|
||||
self.reader.next();
|
||||
t!("->")
|
||||
}
|
||||
Some(b'-') => {
|
||||
self.reader.next();
|
||||
self.eat_single_line_comment();
|
||||
return self.next_token_inner();
|
||||
}
|
||||
Some(b'=') => {
|
||||
self.reader.next();
|
||||
t!("-=")
|
||||
}
|
||||
_ => t!("-"),
|
||||
},
|
||||
b'+' => match self.reader.peek() {
|
||||
Some(b'=') => {
|
||||
self.reader.next();
|
||||
t!("+=")
|
||||
}
|
||||
Some(b'?') => {
|
||||
self.reader.next();
|
||||
match self.reader.peek() {
|
||||
Some(b'=') => {
|
||||
self.reader.next();
|
||||
t!("+?=")
|
||||
}
|
||||
_ => return self.invalid_token(Error::ExpectedEnd('=')),
|
||||
}
|
||||
}
|
||||
_ => t!("+"),
|
||||
},
|
||||
b'/' => match self.reader.peek() {
|
||||
Some(b'*') => {
|
||||
self.reader.next();
|
||||
// A `*/` could be missing which would be invalid.
|
||||
if let Err(e) = self.eat_multi_line_comment() {
|
||||
return self.invalid_token(e);
|
||||
}
|
||||
return self.next_token_inner();
|
||||
}
|
||||
Some(b'/') => {
|
||||
self.reader.next();
|
||||
self.eat_single_line_comment();
|
||||
return self.next_token_inner();
|
||||
}
|
||||
_ => t!("/"),
|
||||
},
|
||||
b'*' => match self.reader.peek() {
|
||||
Some(b'*') => {
|
||||
self.reader.next();
|
||||
t!("**")
|
||||
}
|
||||
Some(b'=') => {
|
||||
self.reader.next();
|
||||
t!("*=")
|
||||
}
|
||||
Some(b'~') => {
|
||||
self.reader.next();
|
||||
t!("*~")
|
||||
}
|
||||
_ => t!("*"),
|
||||
},
|
||||
b'=' => match self.reader.peek() {
|
||||
Some(b'=') => {
|
||||
self.reader.next();
|
||||
t!("==")
|
||||
}
|
||||
_ => t!("="),
|
||||
},
|
||||
b':' => match self.reader.peek() {
|
||||
Some(b':') => {
|
||||
self.reader.next();
|
||||
t!("::")
|
||||
}
|
||||
_ => t!(":"),
|
||||
},
|
||||
b'$' => {
|
||||
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||
return self.lex_param();
|
||||
}
|
||||
t!("$")
|
||||
}
|
||||
b'#' => {
|
||||
self.eat_single_line_comment();
|
||||
return self.next_token_inner();
|
||||
}
|
||||
b'`' => return self.lex_surrounded_ident(true),
|
||||
b'"' => return self.lex_strand(true),
|
||||
b'\'' => return self.lex_strand(false),
|
||||
b'd' => {
|
||||
match self.reader.peek() {
|
||||
Some(b'"') => {
|
||||
self.reader.next();
|
||||
return self.lex_datetime(true);
|
||||
}
|
||||
Some(b'\'') => {
|
||||
self.reader.next();
|
||||
return self.lex_datetime(false);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
return self.lex_ident_from_next_byte(b'd');
|
||||
}
|
||||
b'u' => {
|
||||
match self.reader.peek() {
|
||||
Some(b'"') => {
|
||||
self.reader.next();
|
||||
return self.lex_uuid(true);
|
||||
}
|
||||
Some(b'\'') => {
|
||||
self.reader.next();
|
||||
return self.lex_uuid(false);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
return self.lex_ident_from_next_byte(b'u');
|
||||
}
|
||||
b'r' => match self.reader.peek() {
|
||||
Some(b'\"') => {
|
||||
self.reader.next();
|
||||
t!("r\"")
|
||||
}
|
||||
Some(b'\'') => {
|
||||
self.reader.next();
|
||||
t!("r'")
|
||||
}
|
||||
_ => return self.lex_ident_from_next_byte(byte),
|
||||
},
|
||||
b'a'..=b'z' | b'A'..=b'Z' | b'_' => {
|
||||
return self.lex_ident_from_next_byte(byte);
|
||||
}
|
||||
b'0'..=b'9' => return self.lex_number(byte),
|
||||
x => return self.invalid_token(Error::UnexpectedCharacter(x as char)),
|
||||
};
|
||||
|
||||
self.finish_token(kind)
|
||||
}
|
||||
}
|
37
lib/src/syn/v2/lexer/char.rs
Normal file
37
lib/src/syn/v2/lexer/char.rs
Normal file
|
@ -0,0 +1,37 @@
|
|||
use crate::syn::v2::{
|
||||
lexer::{CharError, Lexer},
|
||||
token::{t, Token},
|
||||
};
|
||||
|
||||
use super::Error;
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// lex non-ascii characters.
|
||||
///
|
||||
/// Should only be called after determining that the byte is not a valid ascii character.
|
||||
pub fn lex_char(&mut self, byte: u8) -> Token {
|
||||
let c = match self.reader.complete_char(byte) {
|
||||
Ok(x) => x,
|
||||
Err(CharError::Eof) => return self.invalid_token(Error::InvalidUtf8),
|
||||
Err(CharError::Unicode) => return self.invalid_token(Error::InvalidUtf8),
|
||||
};
|
||||
let kind = match c {
|
||||
'⟨' => return self.lex_surrounded_ident(false),
|
||||
'…' => t!("..."),
|
||||
'∋' => t!("∋"),
|
||||
'∌' => t!("∌"),
|
||||
'∈' => t!("∈"),
|
||||
'∉' => t!("∉"),
|
||||
'⊇' => t!("⊇"),
|
||||
'⊃' => t!("⊃"),
|
||||
'⊅' => t!("⊅"),
|
||||
'⊆' => t!("⊆"),
|
||||
'⊂' => t!("⊂"),
|
||||
'⊄' => t!("⊄"),
|
||||
'×' => t!("×"),
|
||||
'÷' => t!("÷"),
|
||||
x => return self.invalid_token(Error::UnexpectedCharacter(x)),
|
||||
};
|
||||
self.finish_token(kind)
|
||||
}
|
||||
}
|
267
lib/src/syn/v2/lexer/datetime.rs
Normal file
267
lib/src/syn/v2/lexer/datetime.rs
Normal file
|
@ -0,0 +1,267 @@
|
|||
use std::ops::RangeInclusive;
|
||||
|
||||
use chrono::{FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone, Utc};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::{
|
||||
sql::Datetime,
|
||||
syn::v2::token::{Token, TokenKind},
|
||||
};
|
||||
|
||||
use super::{Error as LexError, Lexer};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum PartError {
|
||||
#[error("value outside of allowed range")]
|
||||
OutsideRange,
|
||||
#[error("missing digit(s)")]
|
||||
MissingDigits,
|
||||
#[error("too many digits")]
|
||||
TooManyDigits,
|
||||
}
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error("invalid year, {0}")]
|
||||
Year(PartError),
|
||||
#[error("invalid month, {0}")]
|
||||
Month(PartError),
|
||||
#[error("invalid day, {0}")]
|
||||
Day(PartError),
|
||||
#[error("invalid hour, {0}")]
|
||||
Hour(PartError),
|
||||
#[error("invalid time minute, {0}")]
|
||||
Minute(PartError),
|
||||
#[error("invalid second, {0}")]
|
||||
Second(PartError),
|
||||
#[error("invalid nano_seconds, {0}")]
|
||||
NanoSeconds(PartError),
|
||||
#[error("invalid time-zone hour, {0}")]
|
||||
TimeZoneHour(PartError),
|
||||
#[error("invalid time-zone minute, {0}")]
|
||||
TimeZoneMinute(PartError),
|
||||
#[error("missing seperator `{}`",*(.0) as char)]
|
||||
MissingSeparator(u8),
|
||||
#[error("expected date-time strand to end")]
|
||||
ExpectedEnd,
|
||||
#[error("missing time-zone")]
|
||||
MissingTimeZone,
|
||||
#[error("date does not exist")]
|
||||
NonExistantDate,
|
||||
#[error("time does not exist")]
|
||||
NonExistantTime,
|
||||
#[error("time-zone offset too big")]
|
||||
TimeZoneOutOfRange,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Lex a date-time strand.
|
||||
pub fn lex_datetime(&mut self, double: bool) -> Token {
|
||||
match self.lex_datetime_err(double) {
|
||||
Ok(x) => {
|
||||
self.datetime = Some(x);
|
||||
self.finish_token(TokenKind::DateTime)
|
||||
}
|
||||
Err(e) => self.invalid_token(LexError::DateTime(e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex datetime without enclosing `"` or `'` but return a result or parser error.
|
||||
pub fn lex_datetime_raw_err(&mut self) -> Result<Datetime, Error> {
|
||||
let negative = match self.reader.peek() {
|
||||
Some(b'+') => {
|
||||
self.reader.next();
|
||||
false
|
||||
}
|
||||
Some(b'-') => {
|
||||
self.reader.next();
|
||||
true
|
||||
}
|
||||
_ => false,
|
||||
};
|
||||
|
||||
let mut year = self.lex_datetime_part(4, 0..=9999).map_err(Error::Year)? as i16;
|
||||
if negative {
|
||||
year = -year;
|
||||
}
|
||||
if !self.eat(b'-') {
|
||||
return Err(Error::MissingSeparator(b'-'));
|
||||
}
|
||||
let month = self.lex_datetime_part(2, 1..=12).map_err(Error::Month)?;
|
||||
if !self.eat(b'-') {
|
||||
return Err(Error::MissingSeparator(b'-'));
|
||||
}
|
||||
let day = self.lex_datetime_part(2, 1..=31).map_err(Error::Day)?;
|
||||
|
||||
if !self.eat(b'T') {
|
||||
let Some(date) = NaiveDate::from_ymd_opt(year as i32, month as u32, day as u32) else {
|
||||
return Err(Error::NonExistantDate);
|
||||
};
|
||||
let time = NaiveTime::default();
|
||||
let date_time = NaiveDateTime::new(date, time);
|
||||
|
||||
let datetime = Utc
|
||||
.fix()
|
||||
.from_local_datetime(&date_time)
|
||||
.earliest()
|
||||
// this should never panic with a fixed offset.
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
return Ok(Datetime(datetime));
|
||||
}
|
||||
|
||||
let hour = self.lex_datetime_part(2, 0..=24).map_err(Error::Hour)?;
|
||||
if !self.eat(b':') {
|
||||
return Err(Error::MissingSeparator(b':'));
|
||||
}
|
||||
|
||||
let minutes = self.lex_datetime_part(2, 0..=59).map_err(Error::Minute)?;
|
||||
|
||||
if !self.eat(b':') {
|
||||
return Err(Error::MissingSeparator(b':'));
|
||||
}
|
||||
|
||||
let seconds = self.lex_datetime_part(2, 0..=59).map_err(Error::Second)?;
|
||||
|
||||
// nano seconds
|
||||
let nano = if let Some(b'.') = self.reader.peek() {
|
||||
self.reader.next();
|
||||
// check if there is atleast one digit.
|
||||
if !matches!(self.reader.peek(), Some(b'0'..=b'9')) {
|
||||
return Err(Error::NanoSeconds(PartError::MissingDigits));
|
||||
}
|
||||
let mut number = 0u32;
|
||||
for i in 0..9 {
|
||||
let Some(c) = self.reader.peek() else {
|
||||
// always invalid token, just let the next section handle the error.
|
||||
break;
|
||||
};
|
||||
if !c.is_ascii_digit() {
|
||||
// If digits are missing they are counted as 0's
|
||||
for _ in i..9 {
|
||||
number *= 10;
|
||||
}
|
||||
break;
|
||||
}
|
||||
self.reader.next();
|
||||
number *= 10;
|
||||
number += (c - b'0') as u32;
|
||||
}
|
||||
// ensure nano_seconds are at most 9 digits.
|
||||
if matches!(self.reader.peek(), Some(b'0'..=b'9')) {
|
||||
return Err(Error::NanoSeconds(PartError::TooManyDigits));
|
||||
}
|
||||
number
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
// time zone
|
||||
let time_zone = match self.reader.peek() {
|
||||
Some(b'Z') => {
|
||||
self.reader.next();
|
||||
None
|
||||
}
|
||||
Some(x @ (b'-' | b'+')) => {
|
||||
self.reader.next();
|
||||
let negative = x == b'-';
|
||||
let hour = self.lex_datetime_part(2, 0..=24).map_err(Error::TimeZoneHour)? as i32;
|
||||
let Some(b':') = self.reader.next() else {
|
||||
return Err(Error::MissingSeparator(b':'));
|
||||
};
|
||||
let minute =
|
||||
self.lex_datetime_part(2, 0..=59).map_err(Error::TimeZoneMinute)? as i32;
|
||||
let time = hour * 3600 + minute * 60;
|
||||
if negative {
|
||||
Some(-time)
|
||||
} else {
|
||||
Some(time)
|
||||
}
|
||||
}
|
||||
_ => return Err(Error::MissingTimeZone),
|
||||
};
|
||||
|
||||
// calculate the given datetime from individual parts.
|
||||
let Some(date) = NaiveDate::from_ymd_opt(year as i32, month as u32, day as u32) else {
|
||||
return Err(Error::NonExistantDate);
|
||||
};
|
||||
let Some(time) =
|
||||
NaiveTime::from_hms_nano_opt(hour as u32, minutes as u32, seconds as u32, nano)
|
||||
else {
|
||||
return Err(Error::NonExistantTime);
|
||||
};
|
||||
|
||||
let date_time = NaiveDateTime::new(date, time);
|
||||
|
||||
let zone = match time_zone {
|
||||
None => Utc.fix(),
|
||||
Some(offset) => if offset < 0 {
|
||||
FixedOffset::west_opt(-offset)
|
||||
} else {
|
||||
FixedOffset::east_opt(offset)
|
||||
}
|
||||
.ok_or(Error::TimeZoneOutOfRange)?,
|
||||
};
|
||||
|
||||
let datetime = zone
|
||||
.from_local_datetime(&date_time)
|
||||
.earliest()
|
||||
// this should never panic with a fixed offset.
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
Ok(Datetime(datetime))
|
||||
}
|
||||
|
||||
/// Lex full datetime but return an result instead of a token.
|
||||
pub fn lex_datetime_err(&mut self, double: bool) -> Result<Datetime, Error> {
|
||||
let datetime = self.lex_datetime_raw_err()?;
|
||||
|
||||
let end_char = if double {
|
||||
b'"'
|
||||
} else {
|
||||
b'\''
|
||||
};
|
||||
|
||||
if !self.eat(end_char) {
|
||||
return Err(Error::ExpectedEnd);
|
||||
}
|
||||
|
||||
Ok(datetime)
|
||||
}
|
||||
|
||||
/// Lexes a digit part of date time.
|
||||
///
|
||||
/// This function eats an amount of digits and then checks if the value the digits represent
|
||||
/// is within the given range.
|
||||
pub fn lex_datetime_part(
|
||||
&mut self,
|
||||
mut amount: u8,
|
||||
range: RangeInclusive<u16>,
|
||||
) -> Result<u16, PartError> {
|
||||
let mut value = 0u16;
|
||||
|
||||
while amount != 0 {
|
||||
value *= 10;
|
||||
let Some(char) = self.reader.peek() else {
|
||||
return Err(PartError::MissingDigits);
|
||||
};
|
||||
if !char.is_ascii_digit() {
|
||||
return Err(PartError::MissingDigits);
|
||||
}
|
||||
self.reader.next();
|
||||
value += (char - b'0') as u16;
|
||||
amount -= 1;
|
||||
}
|
||||
|
||||
if matches!(self.reader.peek(), Some(b'0'..=b'8')) {
|
||||
return Err(PartError::TooManyDigits);
|
||||
}
|
||||
|
||||
if !range.contains(&value) {
|
||||
return Err(PartError::OutsideRange);
|
||||
}
|
||||
Ok(value)
|
||||
}
|
||||
}
|
170
lib/src/syn/v2/lexer/duration.rs
Normal file
170
lib/src/syn/v2/lexer/duration.rs
Normal file
|
@ -0,0 +1,170 @@
|
|||
use std::time::Duration as StdDuration;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::{
|
||||
sql::duration::{
|
||||
Duration, SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE, SECONDS_PER_WEEK,
|
||||
SECONDS_PER_YEAR,
|
||||
},
|
||||
syn::v2::token::{Token, TokenKind},
|
||||
};
|
||||
|
||||
use super::{Error as LexError, Lexer};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error("invalid duration suffix")]
|
||||
InvalidSuffix,
|
||||
#[error("duration value overflowed")]
|
||||
Overflow,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Lex a duration.
|
||||
///
|
||||
/// Expect the lexer to have already eaten the digits starting the duration.
|
||||
pub fn lex_duration(&mut self) -> Token {
|
||||
match self.lex_duration_err() {
|
||||
Ok(x) => {
|
||||
self.duration = Some(x);
|
||||
self.finish_token(TokenKind::Duration)
|
||||
}
|
||||
Err(e) => self.invalid_token(LexError::Duration(e)),
|
||||
}
|
||||
}
|
||||
|
||||
fn invalid_suffix_duration(&mut self) -> Error {
|
||||
// eat the whole suffix.
|
||||
while let Some(x) = self.reader.peek() {
|
||||
if !x.is_ascii_alphanumeric() {
|
||||
break;
|
||||
}
|
||||
self.reader.next();
|
||||
}
|
||||
Error::InvalidSuffix
|
||||
}
|
||||
|
||||
/// Lex a duration,
|
||||
///
|
||||
/// Should only be called from lexing a number.
|
||||
///
|
||||
/// Expects any number but at least one numeric characters be pushed into scratch.
|
||||
pub fn lex_duration_err(&mut self) -> Result<Duration, Error> {
|
||||
let mut duration = StdDuration::ZERO;
|
||||
|
||||
let mut current_value = 0u64;
|
||||
// use the existing eat span to generate the current value.
|
||||
// span already contains
|
||||
let mut span = self.current_span();
|
||||
span.len -= 1;
|
||||
for b in self.scratch.as_bytes() {
|
||||
debug_assert!(b.is_ascii_digit(), "`{}` is not a digit", b);
|
||||
current_value = current_value.checked_mul(10).ok_or(Error::Overflow)?;
|
||||
current_value = current_value.checked_add((b - b'0') as u64).ok_or(Error::Overflow)?;
|
||||
}
|
||||
self.scratch.clear();
|
||||
|
||||
loop {
|
||||
let Some(next) = self.reader.peek() else {
|
||||
return Err(Error::InvalidSuffix);
|
||||
};
|
||||
|
||||
// Match the suffix.
|
||||
let new_duration = match next {
|
||||
x @ (b'n' | b'u') => {
|
||||
// Nano or micro suffix
|
||||
self.reader.next();
|
||||
if !self.eat(b's') {
|
||||
return Err(Error::InvalidSuffix);
|
||||
};
|
||||
|
||||
if x == b'n' {
|
||||
StdDuration::from_nanos(current_value)
|
||||
} else {
|
||||
StdDuration::from_micros(current_value)
|
||||
}
|
||||
}
|
||||
// Starting byte of 'µ'
|
||||
0xc2 => {
|
||||
self.reader.next();
|
||||
// Second byte of 'µ'.
|
||||
// Always consume as the next byte will always be part of a two byte character.
|
||||
if !self.eat(0xb5) {
|
||||
return Err(self.invalid_suffix_duration());
|
||||
}
|
||||
|
||||
if !self.eat(b's') {
|
||||
return Err(self.invalid_suffix_duration());
|
||||
}
|
||||
|
||||
StdDuration::from_micros(current_value)
|
||||
}
|
||||
b'm' => {
|
||||
self.reader.next();
|
||||
// Either milli or minute
|
||||
let is_milli = self.eat(b's');
|
||||
|
||||
if is_milli {
|
||||
StdDuration::from_millis(current_value)
|
||||
} else {
|
||||
let Some(number) = current_value.checked_mul(SECONDS_PER_MINUTE) else {
|
||||
return Err(Error::Overflow);
|
||||
};
|
||||
StdDuration::from_secs(number)
|
||||
}
|
||||
}
|
||||
x @ (b's' | b'h' | b'd' | b'w' | b'y') => {
|
||||
self.reader.next();
|
||||
// second, hour, day, week or year.
|
||||
|
||||
let new_duration = match x {
|
||||
b's' => Some(StdDuration::from_secs(current_value)),
|
||||
b'h' => {
|
||||
current_value.checked_mul(SECONDS_PER_HOUR).map(StdDuration::from_secs)
|
||||
}
|
||||
b'd' => {
|
||||
current_value.checked_mul(SECONDS_PER_DAY).map(StdDuration::from_secs)
|
||||
}
|
||||
b'w' => {
|
||||
current_value.checked_mul(SECONDS_PER_WEEK).map(StdDuration::from_secs)
|
||||
}
|
||||
b'y' => {
|
||||
current_value.checked_mul(SECONDS_PER_YEAR).map(StdDuration::from_secs)
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let Some(new_duration) = new_duration else {
|
||||
return Err(Error::Overflow);
|
||||
};
|
||||
new_duration
|
||||
}
|
||||
_ => {
|
||||
return Err(self.invalid_suffix_duration());
|
||||
}
|
||||
};
|
||||
|
||||
duration = duration.checked_add(new_duration).ok_or(Error::Overflow)?;
|
||||
|
||||
let next = self.reader.peek();
|
||||
match next {
|
||||
// there was some remaining alphabetic characters after the valid suffix, so the
|
||||
// suffix is invalid.
|
||||
Some(b'a'..=b'z' | b'A'..=b'Z' | b'_') => {
|
||||
return Err(self.invalid_suffix_duration())
|
||||
}
|
||||
Some(b'0'..=b'9') => {} // Duration continues.
|
||||
_ => return Ok(Duration(duration)),
|
||||
}
|
||||
|
||||
current_value = 0;
|
||||
// Eat all the next numbers
|
||||
while let Some(b @ b'0'..=b'9') = self.reader.peek() {
|
||||
self.reader.next();
|
||||
current_value = current_value.checked_mul(10).ok_or(Error::Overflow)?;
|
||||
current_value =
|
||||
current_value.checked_add((b - b'0') as u64).ok_or(Error::Overflow)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
164
lib/src/syn/v2/lexer/ident.rs
Normal file
164
lib/src/syn/v2/lexer/ident.rs
Normal file
|
@ -0,0 +1,164 @@
|
|||
use std::mem;
|
||||
|
||||
use unicase::UniCase;
|
||||
|
||||
use crate::syn::v2::lexer::{keywords::KEYWORDS, Error, Lexer};
|
||||
use crate::syn::v2::token::{NumberKind, Token, TokenKind};
|
||||
|
||||
use super::unicode::{chars, U8Ext};
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Lex a parameter in the form of `$[a-zA-Z0-9_]*`
|
||||
///
|
||||
/// # Lexer State
|
||||
/// Expected the lexer to have already eaten the param starting `$`
|
||||
pub fn lex_param(&mut self) -> Token {
|
||||
debug_assert_eq!(self.scratch, "");
|
||||
loop {
|
||||
if let Some(x) = self.reader.peek() {
|
||||
if x.is_ascii_alphanumeric() || x == b'_' {
|
||||
self.scratch.push(x as char);
|
||||
self.reader.next();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return self.finish_token(TokenKind::Parameter);
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex an not surrounded identifier in the form of `[a-zA-Z0-9_]*`
|
||||
///
|
||||
/// The start byte should already a valid byte of the identifier.
|
||||
///
|
||||
/// When calling the caller should already know that the token can't be any other token covered
|
||||
/// by `[a-zA-Z0-9_]*`.
|
||||
pub fn lex_ident_from_next_byte(&mut self, start: u8) -> Token {
|
||||
debug_assert!(matches!(start, b'a'..=b'z' | b'A'..=b'Z' | b'_'));
|
||||
debug_assert_eq!(self.scratch, "");
|
||||
self.scratch.push(start as char);
|
||||
self.lex_ident()
|
||||
}
|
||||
|
||||
/// Lex a not surrounded identfier.
|
||||
///
|
||||
/// The scratch should contain only identifier valid chars.
|
||||
pub fn lex_ident(&mut self) -> Token {
|
||||
loop {
|
||||
if let Some(x) = self.reader.peek() {
|
||||
if x.is_identifier_continue() {
|
||||
self.scratch.push(x as char);
|
||||
self.reader.next();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// When finished parsing the identifier, try to match it to an keyword.
|
||||
// If there is one, return it as the keyword. Original identifier can be reconstructed
|
||||
// from the token.
|
||||
if let Some(x) = KEYWORDS.get(&UniCase::ascii(&self.scratch)).copied() {
|
||||
self.scratch.clear();
|
||||
return self.finish_token(x);
|
||||
}
|
||||
|
||||
if self.scratch == "NaN" {
|
||||
self.scratch.clear();
|
||||
return self.finish_token(TokenKind::Number(NumberKind::NaN));
|
||||
} else {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return self.finish_token(TokenKind::Identifier);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex an ident which is surround by delimiters.
|
||||
pub fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token {
|
||||
match self.lex_surrounded_ident_err(is_backtick) {
|
||||
Ok(x) => x,
|
||||
Err(e) => {
|
||||
self.scratch.clear();
|
||||
self.invalid_token(e)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex an ident surrounded either by `⟨⟩` or `\`\``
|
||||
pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<Token, Error> {
|
||||
loop {
|
||||
let Some(x) = self.reader.next() else {
|
||||
let end_char = if is_backtick {
|
||||
'`'
|
||||
} else {
|
||||
'⟩'
|
||||
};
|
||||
return Err(Error::ExpectedEnd(end_char));
|
||||
};
|
||||
if x.is_ascii() {
|
||||
match x {
|
||||
b'`' if is_backtick => {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Identifier));
|
||||
}
|
||||
b'\0' => {
|
||||
// null bytes not allowed
|
||||
return Err(Error::UnexpectedCharacter('\0'));
|
||||
}
|
||||
b'\\' if is_backtick => {
|
||||
// handle escape sequences.
|
||||
// This is compliant with the orignal parser which didn't permit
|
||||
// escape sequences in `⟨⟩` surrounded idents.
|
||||
let Some(next) = self.reader.next() else {
|
||||
let end_char = if is_backtick {
|
||||
'`'
|
||||
} else {
|
||||
'⟩'
|
||||
};
|
||||
return Err(Error::ExpectedEnd(end_char));
|
||||
};
|
||||
match next {
|
||||
b'\\' => {
|
||||
self.scratch.push('\\');
|
||||
}
|
||||
b'`' => {
|
||||
self.scratch.push('`');
|
||||
}
|
||||
b'/' => {
|
||||
self.scratch.push('/');
|
||||
}
|
||||
b'b' => {
|
||||
self.scratch.push(chars::BS);
|
||||
}
|
||||
b'f' => {
|
||||
self.scratch.push(chars::FF);
|
||||
}
|
||||
b'n' => {
|
||||
self.scratch.push(chars::LF);
|
||||
}
|
||||
b'r' => {
|
||||
self.scratch.push(chars::CR);
|
||||
}
|
||||
b't' => {
|
||||
self.scratch.push(chars::TAB);
|
||||
}
|
||||
_ => {
|
||||
let char = if x.is_ascii() {
|
||||
x as char
|
||||
} else {
|
||||
self.reader.complete_char(x)?
|
||||
};
|
||||
return Err(Error::InvalidEscapeCharacter(char));
|
||||
}
|
||||
}
|
||||
}
|
||||
x => self.scratch.push(x as char),
|
||||
}
|
||||
} else {
|
||||
let c = self.reader.complete_char(x)?;
|
||||
if !is_backtick && c == '⟩' {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Identifier));
|
||||
}
|
||||
self.scratch.push(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
97
lib/src/syn/v2/lexer/js.rs
Normal file
97
lib/src/syn/v2/lexer/js.rs
Normal file
|
@ -0,0 +1,97 @@
|
|||
use crate::syn::v2::token::Span;
|
||||
|
||||
use super::{unicode::chars::JS_LINE_TERIMATORS, Error, Lexer};
|
||||
|
||||
impl Lexer<'_> {
|
||||
/// Lex the body of a js functions.
|
||||
///
|
||||
/// This function will never be called while lexing normally.
|
||||
pub fn lex_js_function_body(&mut self) -> Result<String, (Error, Span)> {
|
||||
self.lex_js_function_body_inner().map_err(|e| (e, self.current_span()))
|
||||
}
|
||||
|
||||
/// Lex the body of a js function.
|
||||
fn lex_js_function_body_inner(&mut self) -> Result<String, Error> {
|
||||
let mut block_depth = 1;
|
||||
loop {
|
||||
let byte = self.reader.next().ok_or(Error::UnexpectedEof)?;
|
||||
match byte {
|
||||
b'`' => self.lex_js_string(b'`')?,
|
||||
b'\'' => self.lex_js_string(b'\'')?,
|
||||
b'\"' => self.lex_js_string(b'\"')?,
|
||||
b'/' => match self.reader.peek() {
|
||||
Some(b'/') => {
|
||||
self.reader.next();
|
||||
self.lex_js_single_comment()?;
|
||||
}
|
||||
Some(b'*') => {
|
||||
self.reader.next();
|
||||
self.lex_js_multi_comment()?
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
b'{' => {
|
||||
block_depth += 1;
|
||||
}
|
||||
b'}' => {
|
||||
block_depth -= 1;
|
||||
if block_depth == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
x if !x.is_ascii() => {
|
||||
// check for invalid characters.
|
||||
self.reader.complete_char(x)?;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let mut span = self.current_span();
|
||||
// remove the `}` from the source text;
|
||||
span.len -= 1;
|
||||
// lexer ensures that it is valid utf8
|
||||
let source = String::from_utf8(self.reader.span(span).to_vec()).unwrap();
|
||||
Ok(source)
|
||||
}
|
||||
|
||||
/// lex a js string with the given delimiter.
|
||||
fn lex_js_string(&mut self, enclosing_byte: u8) -> Result<(), Error> {
|
||||
loop {
|
||||
let byte = self.reader.next().ok_or(Error::UnexpectedEof)?;
|
||||
if byte == enclosing_byte {
|
||||
return Ok(());
|
||||
}
|
||||
if byte == b'\\' {
|
||||
self.reader.next();
|
||||
}
|
||||
// check for invalid characters.
|
||||
self.reader.convert_to_char(byte)?;
|
||||
}
|
||||
}
|
||||
|
||||
/// lex a single line js comment.
|
||||
fn lex_js_single_comment(&mut self) -> Result<(), Error> {
|
||||
loop {
|
||||
let Some(byte) = self.reader.next() else {
|
||||
return Ok(());
|
||||
};
|
||||
let char = self.reader.convert_to_char(byte)?;
|
||||
if JS_LINE_TERIMATORS.contains(&char) {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// lex a multi line js comment.
|
||||
fn lex_js_multi_comment(&mut self) -> Result<(), Error> {
|
||||
loop {
|
||||
let byte = self.reader.next().ok_or(Error::UnexpectedEof)?;
|
||||
if byte == b'*' && self.reader.peek() == Some(b'/') {
|
||||
self.reader.next();
|
||||
return Ok(());
|
||||
}
|
||||
// check for invalid characters.
|
||||
self.reader.convert_to_char(byte)?;
|
||||
}
|
||||
}
|
||||
}
|
285
lib/src/syn/v2/lexer/keywords.rs
Normal file
285
lib/src/syn/v2/lexer/keywords.rs
Normal file
|
@ -0,0 +1,285 @@
|
|||
use crate::{
|
||||
sql::{language::Language, Algorithm},
|
||||
syn::v2::token::{DistanceKind, Keyword, TokenKind},
|
||||
};
|
||||
use phf::phf_map;
|
||||
use unicase::UniCase;
|
||||
|
||||
/// A map for mapping keyword strings to a tokenkind,
|
||||
pub(crate) static KEYWORDS: phf::Map<UniCase<&'static str>, TokenKind> = phf_map! {
|
||||
// Keywords
|
||||
UniCase::ascii("AFTER") => TokenKind::Keyword(Keyword::After),
|
||||
UniCase::ascii("ALL") => TokenKind::Keyword(Keyword::All),
|
||||
UniCase::ascii("ANALYZE") => TokenKind::Keyword(Keyword::Analyze),
|
||||
UniCase::ascii("ANALYZER") => TokenKind::Keyword(Keyword::Analyzer),
|
||||
UniCase::ascii("AS") => TokenKind::Keyword(Keyword::As),
|
||||
UniCase::ascii("ASCENDING") => TokenKind::Keyword(Keyword::Ascending),
|
||||
UniCase::ascii("ASC") => TokenKind::Keyword(Keyword::Ascending),
|
||||
UniCase::ascii("ASCII") => TokenKind::Keyword(Keyword::Ascii),
|
||||
UniCase::ascii("ASSERT") => TokenKind::Keyword(Keyword::Assert),
|
||||
UniCase::ascii("AT") => TokenKind::Keyword(Keyword::At),
|
||||
UniCase::ascii("BEFORE") => TokenKind::Keyword(Keyword::Before),
|
||||
UniCase::ascii("BEGIN") => TokenKind::Keyword(Keyword::Begin),
|
||||
UniCase::ascii("BLANK") => TokenKind::Keyword(Keyword::Blank),
|
||||
UniCase::ascii("BM25") => TokenKind::Keyword(Keyword::Bm25),
|
||||
UniCase::ascii("BREAK") => TokenKind::Keyword(Keyword::Break),
|
||||
UniCase::ascii("BY") => TokenKind::Keyword(Keyword::By),
|
||||
UniCase::ascii("CAMEL") => TokenKind::Keyword(Keyword::Camel),
|
||||
UniCase::ascii("CANCEL") => TokenKind::Keyword(Keyword::Cancel),
|
||||
UniCase::ascii("CHANGEFEED") => TokenKind::Keyword(Keyword::ChangeFeed),
|
||||
UniCase::ascii("CHANGES") => TokenKind::Keyword(Keyword::Changes),
|
||||
UniCase::ascii("CAPACITY") => TokenKind::Keyword(Keyword::Capacity),
|
||||
UniCase::ascii("CLASS") => TokenKind::Keyword(Keyword::Class),
|
||||
UniCase::ascii("COMMENT") => TokenKind::Keyword(Keyword::Comment),
|
||||
UniCase::ascii("COMMIT") => TokenKind::Keyword(Keyword::Commit),
|
||||
UniCase::ascii("CONTENT") => TokenKind::Keyword(Keyword::Content),
|
||||
UniCase::ascii("CONTINUE") => TokenKind::Keyword(Keyword::Continue),
|
||||
UniCase::ascii("CREATE") => TokenKind::Keyword(Keyword::Create),
|
||||
UniCase::ascii("DATABASE") => TokenKind::Keyword(Keyword::Database),
|
||||
UniCase::ascii("DB") => TokenKind::Keyword(Keyword::Database),
|
||||
UniCase::ascii("DEFAULT") => TokenKind::Keyword(Keyword::Default),
|
||||
UniCase::ascii("DEFINE") => TokenKind::Keyword(Keyword::Define),
|
||||
UniCase::ascii("DELETE") => TokenKind::Keyword(Keyword::Delete),
|
||||
UniCase::ascii("DESCENDING") => TokenKind::Keyword(Keyword::Descending),
|
||||
UniCase::ascii("DESC") => TokenKind::Keyword(Keyword::Descending),
|
||||
UniCase::ascii("DIFF") => TokenKind::Keyword(Keyword::Diff),
|
||||
UniCase::ascii("DIMENSION") => TokenKind::Keyword(Keyword::Dimension),
|
||||
UniCase::ascii("DISTANCE") => TokenKind::Keyword(Keyword::Distance),
|
||||
UniCase::ascii("DIST") => TokenKind::Keyword(Keyword::Distance),
|
||||
UniCase::ascii("DOC_IDS_CACHE") => TokenKind::Keyword(Keyword::DocIdsCache),
|
||||
UniCase::ascii("DOC_IDS_ORDER") => TokenKind::Keyword(Keyword::DocIdsOrder),
|
||||
UniCase::ascii("DOC_LENGTHS_CACHE") => TokenKind::Keyword(Keyword::DocLengthsCache),
|
||||
UniCase::ascii("DOC_LENGTHS_ORDER") => TokenKind::Keyword(Keyword::DocLengthsOrder),
|
||||
UniCase::ascii("DROP") => TokenKind::Keyword(Keyword::Drop),
|
||||
UniCase::ascii("DUPLICATE") => TokenKind::Keyword(Keyword::Duplicate),
|
||||
UniCase::ascii("EDGENGRAM") => TokenKind::Keyword(Keyword::Edgengram),
|
||||
UniCase::ascii("EVENT") => TokenKind::Keyword(Keyword::Event),
|
||||
UniCase::ascii("ELSE") => TokenKind::Keyword(Keyword::Else),
|
||||
UniCase::ascii("END") => TokenKind::Keyword(Keyword::End),
|
||||
UniCase::ascii("EXPLAIN") => TokenKind::Keyword(Keyword::Explain),
|
||||
UniCase::ascii("false") => TokenKind::Keyword(Keyword::False),
|
||||
UniCase::ascii("FETCH") => TokenKind::Keyword(Keyword::Fetch),
|
||||
UniCase::ascii("FIELD") => TokenKind::Keyword(Keyword::Field),
|
||||
UniCase::ascii("FIELDS") => TokenKind::Keyword(Keyword::Fields),
|
||||
UniCase::ascii("COLUMNS") => TokenKind::Keyword(Keyword::Fields),
|
||||
UniCase::ascii("FILTERS") => TokenKind::Keyword(Keyword::Filters),
|
||||
UniCase::ascii("FLEXIBLE") => TokenKind::Keyword(Keyword::Flexible),
|
||||
UniCase::ascii("FLEXI") => TokenKind::Keyword(Keyword::Flexible),
|
||||
UniCase::ascii("FLEX") => TokenKind::Keyword(Keyword::Flexible),
|
||||
UniCase::ascii("FOR") => TokenKind::Keyword(Keyword::For),
|
||||
UniCase::ascii("FROM") => TokenKind::Keyword(Keyword::From),
|
||||
UniCase::ascii("FULL") => TokenKind::Keyword(Keyword::Full),
|
||||
UniCase::ascii("FUNCTION") => TokenKind::Keyword(Keyword::Function),
|
||||
UniCase::ascii("GROUP") => TokenKind::Keyword(Keyword::Group),
|
||||
UniCase::ascii("HIGHLIGHTS") => TokenKind::Keyword(Keyword::Highlights),
|
||||
UniCase::ascii("IGNORE") => TokenKind::Keyword(Keyword::Ignore),
|
||||
UniCase::ascii("INDEX") => TokenKind::Keyword(Keyword::Index),
|
||||
UniCase::ascii("INFO") => TokenKind::Keyword(Keyword::Info),
|
||||
UniCase::ascii("INSERT") => TokenKind::Keyword(Keyword::Insert),
|
||||
UniCase::ascii("INTO") => TokenKind::Keyword(Keyword::Into),
|
||||
UniCase::ascii("IF") => TokenKind::Keyword(Keyword::If),
|
||||
UniCase::ascii("IS") => TokenKind::Keyword(Keyword::Is),
|
||||
UniCase::ascii("KEY") => TokenKind::Keyword(Keyword::Key),
|
||||
UniCase::ascii("KILL") => TokenKind::Keyword(Keyword::Kill),
|
||||
UniCase::ascii("KNN") => TokenKind::Keyword(Keyword::Knn),
|
||||
UniCase::ascii("LET") => TokenKind::Keyword(Keyword::Let),
|
||||
UniCase::ascii("LIMIT") => TokenKind::Keyword(Keyword::Limit),
|
||||
UniCase::ascii("LIVE") => TokenKind::Keyword(Keyword::Live),
|
||||
UniCase::ascii("LOWERCASE") => TokenKind::Keyword(Keyword::Lowercase),
|
||||
UniCase::ascii("MERGE") => TokenKind::Keyword(Keyword::Merge),
|
||||
UniCase::ascii("MODEL") => TokenKind::Keyword(Keyword::Model),
|
||||
UniCase::ascii("MTREE") => TokenKind::Keyword(Keyword::MTree),
|
||||
UniCase::ascii("MTREE_CACHE") => TokenKind::Keyword(Keyword::MTreeCache),
|
||||
UniCase::ascii("NAMESPACE") => TokenKind::Keyword(Keyword::Namespace),
|
||||
UniCase::ascii("NS") => TokenKind::Keyword(Keyword::Namespace),
|
||||
UniCase::ascii("NGRAM") => TokenKind::Keyword(Keyword::Ngram),
|
||||
UniCase::ascii("NO") => TokenKind::Keyword(Keyword::No),
|
||||
UniCase::ascii("NOINDEX") => TokenKind::Keyword(Keyword::NoIndex),
|
||||
UniCase::ascii("NONE") => TokenKind::Keyword(Keyword::None),
|
||||
UniCase::ascii("NULL") => TokenKind::Keyword(Keyword::Null),
|
||||
UniCase::ascii("NUMERIC") => TokenKind::Keyword(Keyword::Numeric),
|
||||
UniCase::ascii("OMIT") => TokenKind::Keyword(Keyword::Omit),
|
||||
UniCase::ascii("ON") => TokenKind::Keyword(Keyword::On),
|
||||
UniCase::ascii("ONLY") => TokenKind::Keyword(Keyword::Only),
|
||||
UniCase::ascii("OPTION") => TokenKind::Keyword(Keyword::Option),
|
||||
UniCase::ascii("ORDER") => TokenKind::Keyword(Keyword::Order),
|
||||
UniCase::ascii("PARALLEL") => TokenKind::Keyword(Keyword::Parallel),
|
||||
UniCase::ascii("PARAM") => TokenKind::Keyword(Keyword::Param),
|
||||
UniCase::ascii("PASSHASH") => TokenKind::Keyword(Keyword::Passhash),
|
||||
UniCase::ascii("PASSWORD") => TokenKind::Keyword(Keyword::Password),
|
||||
UniCase::ascii("PATCH") => TokenKind::Keyword(Keyword::Patch),
|
||||
UniCase::ascii("PERMISSIONS") => TokenKind::Keyword(Keyword::Permissions),
|
||||
UniCase::ascii("POSTINGS_CACHE") => TokenKind::Keyword(Keyword::PostingsCache),
|
||||
UniCase::ascii("POSTINGS_ORDER") => TokenKind::Keyword(Keyword::PostingsOrder),
|
||||
UniCase::ascii("PUNCT") => TokenKind::Keyword(Keyword::Punct),
|
||||
UniCase::ascii("RELATE") => TokenKind::Keyword(Keyword::Relate),
|
||||
UniCase::ascii("REMOVE") => TokenKind::Keyword(Keyword::Remove),
|
||||
UniCase::ascii("REPLACE") => TokenKind::Keyword(Keyword::Replace),
|
||||
UniCase::ascii("RETURN") => TokenKind::Keyword(Keyword::Return),
|
||||
UniCase::ascii("ROLES") => TokenKind::Keyword(Keyword::Roles),
|
||||
UniCase::ascii("ROOT") => TokenKind::Keyword(Keyword::Root),
|
||||
UniCase::ascii("KV") => TokenKind::Keyword(Keyword::Root),
|
||||
UniCase::ascii("SCHEMAFULL") => TokenKind::Keyword(Keyword::Schemafull),
|
||||
UniCase::ascii("SCHEMAFUL") => TokenKind::Keyword(Keyword::Schemafull),
|
||||
UniCase::ascii("SCHEMALESS") => TokenKind::Keyword(Keyword::Schemaless),
|
||||
UniCase::ascii("SCOPE") => TokenKind::Keyword(Keyword::Scope),
|
||||
UniCase::ascii("SC") => TokenKind::Keyword(Keyword::Scope),
|
||||
UniCase::ascii("SEARCH") => TokenKind::Keyword(Keyword::Search),
|
||||
UniCase::ascii("SELECT") => TokenKind::Keyword(Keyword::Select),
|
||||
UniCase::ascii("SESSION") => TokenKind::Keyword(Keyword::Session),
|
||||
UniCase::ascii("SET") => TokenKind::Keyword(Keyword::Set),
|
||||
UniCase::ascii("SHOW") => TokenKind::Keyword(Keyword::Show),
|
||||
UniCase::ascii("SIGNIN") => TokenKind::Keyword(Keyword::Signin),
|
||||
UniCase::ascii("SIGNUP") => TokenKind::Keyword(Keyword::Signup),
|
||||
UniCase::ascii("SINCE") => TokenKind::Keyword(Keyword::Since),
|
||||
UniCase::ascii("SLEEP") => TokenKind::Keyword(Keyword::Sleep),
|
||||
UniCase::ascii("SNOWBALL") => TokenKind::Keyword(Keyword::Snowball),
|
||||
UniCase::ascii("SPLIT") => TokenKind::Keyword(Keyword::Split),
|
||||
UniCase::ascii("START") => TokenKind::Keyword(Keyword::Start),
|
||||
UniCase::ascii("TABLE") => TokenKind::Keyword(Keyword::Table),
|
||||
UniCase::ascii("TB") => TokenKind::Keyword(Keyword::Table),
|
||||
UniCase::ascii("TERMS_CACHE") => TokenKind::Keyword(Keyword::TermsCache),
|
||||
UniCase::ascii("TERMS_ORDER") => TokenKind::Keyword(Keyword::TermsOrder),
|
||||
UniCase::ascii("THEN") => TokenKind::Keyword(Keyword::Then),
|
||||
UniCase::ascii("THROW") => TokenKind::Keyword(Keyword::Throw),
|
||||
UniCase::ascii("TIMEOUT") => TokenKind::Keyword(Keyword::Timeout),
|
||||
UniCase::ascii("TOKENIZERS") => TokenKind::Keyword(Keyword::Tokenizers),
|
||||
UniCase::ascii("TOKEN") => TokenKind::Keyword(Keyword::Token),
|
||||
UniCase::ascii("TRANSACTION") => TokenKind::Keyword(Keyword::Transaction),
|
||||
UniCase::ascii("true") => TokenKind::Keyword(Keyword::True),
|
||||
UniCase::ascii("TYPE") => TokenKind::Keyword(Keyword::Type),
|
||||
UniCase::ascii("UNIQUE") => TokenKind::Keyword(Keyword::Unique),
|
||||
UniCase::ascii("UNSET") => TokenKind::Keyword(Keyword::Unset),
|
||||
UniCase::ascii("UPDATE") => TokenKind::Keyword(Keyword::Update),
|
||||
UniCase::ascii("UPPERCASE") => TokenKind::Keyword(Keyword::Uppercase),
|
||||
UniCase::ascii("USE") => TokenKind::Keyword(Keyword::Use),
|
||||
UniCase::ascii("USER") => TokenKind::Keyword(Keyword::User),
|
||||
UniCase::ascii("VALUE") => TokenKind::Keyword(Keyword::Value),
|
||||
UniCase::ascii("VALUES") => TokenKind::Keyword(Keyword::Values),
|
||||
UniCase::ascii("VERSION") => TokenKind::Keyword(Keyword::Version),
|
||||
UniCase::ascii("VS") => TokenKind::Keyword(Keyword::Vs),
|
||||
UniCase::ascii("WHEN") => TokenKind::Keyword(Keyword::When),
|
||||
UniCase::ascii("WHERE") => TokenKind::Keyword(Keyword::Where),
|
||||
UniCase::ascii("WITH") => TokenKind::Keyword(Keyword::With),
|
||||
UniCase::ascii("ALLINSIDE") => TokenKind::Keyword(Keyword::AllInside),
|
||||
UniCase::ascii("ANDKW") => TokenKind::Keyword(Keyword::AndKw),
|
||||
UniCase::ascii("ANYINSIDE") => TokenKind::Keyword(Keyword::AnyInside),
|
||||
UniCase::ascii("INSIDE") => TokenKind::Keyword(Keyword::Inside),
|
||||
UniCase::ascii("INTERSECTS") => TokenKind::Keyword(Keyword::Intersects),
|
||||
UniCase::ascii("NONEINSIDE") => TokenKind::Keyword(Keyword::NoneInside),
|
||||
UniCase::ascii("NOTINSIDE") => TokenKind::Keyword(Keyword::NotInside),
|
||||
UniCase::ascii("OR") => TokenKind::Keyword(Keyword::OrKw),
|
||||
UniCase::ascii("OUTSIDE") => TokenKind::Keyword(Keyword::Outside),
|
||||
UniCase::ascii("NOT") => TokenKind::Keyword(Keyword::Not),
|
||||
UniCase::ascii("AND") => TokenKind::Keyword(Keyword::And),
|
||||
UniCase::ascii("COLLATE") => TokenKind::Keyword(Keyword::Collate),
|
||||
UniCase::ascii("CONTAINSALL") => TokenKind::Keyword(Keyword::ContainsAll),
|
||||
UniCase::ascii("CONTAINSANY") => TokenKind::Keyword(Keyword::ContainsAny),
|
||||
UniCase::ascii("CONTAINSNONE") => TokenKind::Keyword(Keyword::ContainsNone),
|
||||
UniCase::ascii("CONTAINSNOT") => TokenKind::Keyword(Keyword::ContainsNot),
|
||||
UniCase::ascii("CONTAINS") => TokenKind::Keyword(Keyword::Contains),
|
||||
UniCase::ascii("IN") => TokenKind::Keyword(Keyword::In),
|
||||
|
||||
UniCase::ascii("ANY") => TokenKind::Keyword(Keyword::Any),
|
||||
UniCase::ascii("ARRAY") => TokenKind::Keyword(Keyword::Array),
|
||||
UniCase::ascii("GEOMETRY") => TokenKind::Keyword(Keyword::Geometry),
|
||||
UniCase::ascii("RECORD") => TokenKind::Keyword(Keyword::Record),
|
||||
UniCase::ascii("FUTURE") => TokenKind::Keyword(Keyword::Future),
|
||||
UniCase::ascii("BOOL") => TokenKind::Keyword(Keyword::Bool),
|
||||
UniCase::ascii("BYTES") => TokenKind::Keyword(Keyword::Bytes),
|
||||
UniCase::ascii("DATETIME") => TokenKind::Keyword(Keyword::Datetime),
|
||||
UniCase::ascii("DECIMAL") => TokenKind::Keyword(Keyword::Decimal),
|
||||
UniCase::ascii("DURATION") => TokenKind::Keyword(Keyword::Duration),
|
||||
UniCase::ascii("FLOAT") => TokenKind::Keyword(Keyword::Float),
|
||||
UniCase::ascii("fn") => TokenKind::Keyword(Keyword::Fn),
|
||||
UniCase::ascii("ml") => TokenKind::Keyword(Keyword::ML),
|
||||
UniCase::ascii("INT") => TokenKind::Keyword(Keyword::Int),
|
||||
UniCase::ascii("NUMBER") => TokenKind::Keyword(Keyword::Number),
|
||||
UniCase::ascii("OBJECT") => TokenKind::Keyword(Keyword::Object),
|
||||
UniCase::ascii("STRING") => TokenKind::Keyword(Keyword::String),
|
||||
UniCase::ascii("UUID") => TokenKind::Keyword(Keyword::Uuid),
|
||||
UniCase::ascii("ULID") => TokenKind::Keyword(Keyword::Ulid),
|
||||
UniCase::ascii("RAND") => TokenKind::Keyword(Keyword::Rand),
|
||||
UniCase::ascii("FEATURE") => TokenKind::Keyword(Keyword::Feature),
|
||||
UniCase::ascii("LINE") => TokenKind::Keyword(Keyword::Line),
|
||||
UniCase::ascii("POINT") => TokenKind::Keyword(Keyword::Point),
|
||||
UniCase::ascii("POLYGON") => TokenKind::Keyword(Keyword::Polygon),
|
||||
UniCase::ascii("MULTIPOINT") => TokenKind::Keyword(Keyword::MultiPoint),
|
||||
UniCase::ascii("MULTILINE") => TokenKind::Keyword(Keyword::MultiLine),
|
||||
UniCase::ascii("MULTIPOLYGON") => TokenKind::Keyword(Keyword::MultiPolygon),
|
||||
UniCase::ascii("COLLECTION") => TokenKind::Keyword(Keyword::Collection),
|
||||
|
||||
// Languages
|
||||
UniCase::ascii("ARABIC") => TokenKind::Language(Language::Arabic),
|
||||
UniCase::ascii("ARA") => TokenKind::Language(Language::Arabic),
|
||||
UniCase::ascii("AR") => TokenKind::Language(Language::Arabic),
|
||||
UniCase::ascii("DANISH") => TokenKind::Language(Language::Danish),
|
||||
UniCase::ascii("DAN") => TokenKind::Language(Language::Danish),
|
||||
UniCase::ascii("DA") => TokenKind::Language(Language::Danish),
|
||||
UniCase::ascii("DUTCH") => TokenKind::Language(Language::Dutch),
|
||||
UniCase::ascii("NLD") => TokenKind::Language(Language::Dutch),
|
||||
UniCase::ascii("NL") => TokenKind::Language(Language::Dutch),
|
||||
UniCase::ascii("ENGLISH") => TokenKind::Language(Language::English),
|
||||
UniCase::ascii("ENG") => TokenKind::Language(Language::English),
|
||||
UniCase::ascii("EN") => TokenKind::Language(Language::English),
|
||||
UniCase::ascii("FRENCH") => TokenKind::Language(Language::French),
|
||||
UniCase::ascii("FRA") => TokenKind::Language(Language::French),
|
||||
UniCase::ascii("FR") => TokenKind::Language(Language::French),
|
||||
UniCase::ascii("GERMAN") => TokenKind::Language(Language::German),
|
||||
UniCase::ascii("DEU") => TokenKind::Language(Language::German),
|
||||
UniCase::ascii("DE") => TokenKind::Language(Language::German),
|
||||
UniCase::ascii("GREEK") => TokenKind::Language(Language::Greek),
|
||||
UniCase::ascii("ELL") => TokenKind::Language(Language::Greek),
|
||||
UniCase::ascii("EL") => TokenKind::Language(Language::Greek),
|
||||
UniCase::ascii("HUNGARIAN") => TokenKind::Language(Language::Hungarian),
|
||||
UniCase::ascii("HUN") => TokenKind::Language(Language::Hungarian),
|
||||
UniCase::ascii("HU") => TokenKind::Language(Language::Hungarian),
|
||||
UniCase::ascii("ITALIAN") => TokenKind::Language(Language::Italian),
|
||||
UniCase::ascii("ITA") => TokenKind::Language(Language::Italian),
|
||||
UniCase::ascii("IT") => TokenKind::Language(Language::Italian),
|
||||
UniCase::ascii("NORWEGIAN") => TokenKind::Language(Language::Norwegian),
|
||||
UniCase::ascii("NOR") => TokenKind::Language(Language::Norwegian),
|
||||
UniCase::ascii("PORTUGUESE") => TokenKind::Language(Language::Portuguese),
|
||||
UniCase::ascii("POR") => TokenKind::Language(Language::Portuguese),
|
||||
UniCase::ascii("PT") => TokenKind::Language(Language::Portuguese),
|
||||
UniCase::ascii("ROMANIAN") => TokenKind::Language(Language::Romanian),
|
||||
UniCase::ascii("RON") => TokenKind::Language(Language::Romanian),
|
||||
UniCase::ascii("RO") => TokenKind::Language(Language::Romanian),
|
||||
UniCase::ascii("RUSSIAN") => TokenKind::Language(Language::Russian),
|
||||
UniCase::ascii("RUS") => TokenKind::Language(Language::Russian),
|
||||
UniCase::ascii("RU") => TokenKind::Language(Language::Russian),
|
||||
UniCase::ascii("SPANISH") => TokenKind::Language(Language::Spanish),
|
||||
UniCase::ascii("SPA") => TokenKind::Language(Language::Spanish),
|
||||
UniCase::ascii("ES") => TokenKind::Language(Language::Spanish),
|
||||
UniCase::ascii("SWEDISH") => TokenKind::Language(Language::Swedish),
|
||||
UniCase::ascii("SWE") => TokenKind::Language(Language::Swedish),
|
||||
UniCase::ascii("SV") => TokenKind::Language(Language::Swedish),
|
||||
UniCase::ascii("TAMIL") => TokenKind::Language(Language::Tamil),
|
||||
UniCase::ascii("TAM") => TokenKind::Language(Language::Tamil),
|
||||
UniCase::ascii("TA") => TokenKind::Language(Language::Tamil),
|
||||
UniCase::ascii("TURKISH") => TokenKind::Language(Language::Turkish),
|
||||
UniCase::ascii("TUR") => TokenKind::Language(Language::Turkish),
|
||||
UniCase::ascii("TR") => TokenKind::Language(Language::Turkish),
|
||||
|
||||
// Algorithms
|
||||
UniCase::ascii("EDDSA") => TokenKind::Algorithm(Algorithm::EdDSA),
|
||||
UniCase::ascii("ES256") => TokenKind::Algorithm(Algorithm::Es256),
|
||||
UniCase::ascii("ES384") => TokenKind::Algorithm(Algorithm::Es384),
|
||||
UniCase::ascii("ES512") => TokenKind::Algorithm(Algorithm::Es512),
|
||||
UniCase::ascii("HS256") => TokenKind::Algorithm(Algorithm::Hs256),
|
||||
UniCase::ascii("HS384") => TokenKind::Algorithm(Algorithm::Hs384),
|
||||
UniCase::ascii("HS512") => TokenKind::Algorithm(Algorithm::Hs512),
|
||||
UniCase::ascii("PS256") => TokenKind::Algorithm(Algorithm::Ps256),
|
||||
UniCase::ascii("PS384") => TokenKind::Algorithm(Algorithm::Ps384),
|
||||
UniCase::ascii("PS512") => TokenKind::Algorithm(Algorithm::Ps512),
|
||||
UniCase::ascii("RS256") => TokenKind::Algorithm(Algorithm::Rs256),
|
||||
UniCase::ascii("RS384") => TokenKind::Algorithm(Algorithm::Rs384),
|
||||
UniCase::ascii("RS512") => TokenKind::Algorithm(Algorithm::Rs512),
|
||||
UniCase::ascii("JWKS") => TokenKind::Algorithm(Algorithm::Jwks),
|
||||
|
||||
// Distance
|
||||
UniCase::ascii("EUCLIDEAN") => TokenKind::Distance(DistanceKind::Euclidean),
|
||||
UniCase::ascii("MANHATTAN") => TokenKind::Distance(DistanceKind::Manhattan),
|
||||
UniCase::ascii("HAMMING") => TokenKind::Distance(DistanceKind::Hamming),
|
||||
UniCase::ascii("MINKOWSKI") => TokenKind::Distance(DistanceKind::Minkowski),
|
||||
};
|
417
lib/src/syn/v2/lexer/mod.rs
Normal file
417
lib/src/syn/v2/lexer/mod.rs
Normal file
|
@ -0,0 +1,417 @@
|
|||
use crate::{
|
||||
sql::{Datetime, Duration, Regex, Uuid},
|
||||
syn::v2::token::{Span, Token, TokenKind},
|
||||
};
|
||||
use thiserror::Error;
|
||||
|
||||
mod byte;
|
||||
mod char;
|
||||
mod datetime;
|
||||
mod duration;
|
||||
mod ident;
|
||||
mod js;
|
||||
mod keywords;
|
||||
mod number;
|
||||
mod reader;
|
||||
mod strand;
|
||||
mod unicode;
|
||||
mod uuid;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
pub use reader::{BytesReader, CharError};
|
||||
|
||||
/// A error returned by the lexer when an invalid token is encountered.
|
||||
///
|
||||
/// Can be retrieved from the `Lexer::error` field whenever it returned a [`TokenKind::Invalid`]
|
||||
/// token.
|
||||
#[derive(Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error("Lexer encountered unexpected character {0:?}")]
|
||||
UnexpectedCharacter(char),
|
||||
#[error("invalid escape character {0:?}")]
|
||||
InvalidEscapeCharacter(char),
|
||||
#[error("Lexer encountered unexpected end of source characters")]
|
||||
UnexpectedEof,
|
||||
#[error("source was not valid utf-8")]
|
||||
InvalidUtf8,
|
||||
#[error("expected next character to be '{0}'")]
|
||||
ExpectedEnd(char),
|
||||
#[error("failed to lex date-time, {0}")]
|
||||
DateTime(#[from] datetime::Error),
|
||||
#[error("failed to lex uuid, {0}")]
|
||||
Uuid(#[from] uuid::Error),
|
||||
#[error("failed to lex duration, {0}")]
|
||||
Duration(#[from] duration::Error),
|
||||
#[error("failed to lex number, {0}")]
|
||||
Number(#[from] number::Error),
|
||||
#[error("failed to parse regex, {0}")]
|
||||
Regex(regex::Error),
|
||||
}
|
||||
|
||||
impl From<CharError> for Error {
|
||||
fn from(value: CharError) -> Self {
|
||||
match value {
|
||||
CharError::Eof => Self::UnexpectedEof,
|
||||
CharError::Unicode => Self::InvalidUtf8,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The SurrealQL lexer.
|
||||
/// Takes a slice of bytes and turns it into tokens. The lexer is designed with possible invalid utf-8
|
||||
/// in mind and will handle bytes which are invalid utf-8 with an error.
|
||||
///
|
||||
/// The lexer generates tokens lazily. whenever [`Lexer::next_token`] is called on the lexer it will
|
||||
/// try to lex the next bytes in the give source as a token. The lexer always returns a token, even
|
||||
/// if the source contains invalid tokens or as at the end of the source. In both cases a specific
|
||||
/// type of token is returned.
|
||||
///
|
||||
/// Note that SurrealQL syntax cannot be lexed in advance. For example, record strings and regexes,
|
||||
/// both cannot be parsed correctly without knowledge of previous tokens as they are both ambigious
|
||||
/// with other tokens.
|
||||
pub struct Lexer<'a> {
|
||||
/// The reader for reading the source bytes.
|
||||
pub reader: BytesReader<'a>,
|
||||
/// The one past the last character of the previous token.
|
||||
last_offset: u32,
|
||||
/// The span of whitespace if it was read between two tokens.
|
||||
whitespace_span: Option<Span>,
|
||||
/// A buffer used to build the value of tokens which can't be read straight from the source.
|
||||
/// like for example strings with escape characters.
|
||||
scratch: String,
|
||||
|
||||
// below are a collection of storage for values produced by tokens.
|
||||
// For performance reasons we wan't to keep the tokens as small as possible.
|
||||
// As only some tokens have an additional value associated with them we don't store that value
|
||||
// in the token itself but, instead, in the lexer ensureing a smaller size for each individual
|
||||
// token.
|
||||
//
|
||||
// This does result in some additional state to keep track of as peeking a token while a token
|
||||
// value is still in the variables below will overwrite the previous value.
|
||||
//
|
||||
// Both numbers and actual strings are stored as a string value.
|
||||
// The parser can, depending on position in syntax, decide to parse a number in a variety of
|
||||
// different precisions or formats. The only way to support all is to delay parsing the
|
||||
// actual number value to when the parser can decide on a format.
|
||||
pub string: Option<String>,
|
||||
pub duration: Option<Duration>,
|
||||
pub datetime: Option<Datetime>,
|
||||
pub regex: Option<Regex>,
|
||||
pub uuid: Option<Uuid>,
|
||||
pub error: Option<Error>,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Create a new lexer.
|
||||
/// # Panic
|
||||
/// This function will panic if the source is longer then u32::MAX.
|
||||
pub fn new(source: &'a [u8]) -> Lexer<'a> {
|
||||
let reader = BytesReader::new(source);
|
||||
assert!(reader.len() <= u32::MAX as usize, "source code exceeded maximum size");
|
||||
Lexer {
|
||||
reader,
|
||||
last_offset: 0,
|
||||
whitespace_span: None,
|
||||
scratch: String::new(),
|
||||
string: None,
|
||||
datetime: None,
|
||||
duration: None,
|
||||
regex: None,
|
||||
uuid: None,
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset the state of the lexer.
|
||||
///
|
||||
/// Doesn't change the state of the reader.
|
||||
pub fn reset(&mut self) {
|
||||
self.last_offset = 0;
|
||||
self.scratch.clear();
|
||||
self.whitespace_span = None;
|
||||
self.string = None;
|
||||
self.datetime = None;
|
||||
self.duration = None;
|
||||
self.regex = None;
|
||||
self.uuid = None;
|
||||
self.error = None;
|
||||
}
|
||||
|
||||
/// Change the used source from the lexer to a new buffer.
|
||||
///
|
||||
/// Usefull for reusing buffers.
|
||||
///
|
||||
/// # Panic
|
||||
/// This function will panic if the source is longer then u32::MAX.
|
||||
pub fn change_source<'b>(self, source: &'b [u8]) -> Lexer<'b> {
|
||||
let reader = BytesReader::<'b>::new(source);
|
||||
assert!(reader.len() <= u32::MAX as usize, "source code exceeded maximum size");
|
||||
Lexer {
|
||||
reader,
|
||||
last_offset: 0,
|
||||
whitespace_span: None,
|
||||
scratch: self.scratch,
|
||||
string: self.string,
|
||||
datetime: self.datetime,
|
||||
duration: self.duration,
|
||||
regex: self.regex,
|
||||
uuid: self.uuid,
|
||||
error: self.error,
|
||||
}
|
||||
}
|
||||
|
||||
/// return the whitespace of the last token buffered, either peeked or poped.
|
||||
pub fn whitespace_span(&self) -> Option<Span> {
|
||||
self.whitespace_span
|
||||
}
|
||||
|
||||
/// Used for seting the span of whitespace between tokens. Will extend the current whitespace
|
||||
/// if there already is one.
|
||||
fn set_whitespace_span(&mut self, span: Span) {
|
||||
if let Some(existing) = self.whitespace_span.as_mut() {
|
||||
*existing = existing.covers(span);
|
||||
} else {
|
||||
self.whitespace_span = Some(span);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the next token, driving the lexer forward.
|
||||
///
|
||||
/// If the lexer is at the end the source it will always return the Eof token.
|
||||
pub fn next_token(&mut self) -> Token {
|
||||
self.whitespace_span = None;
|
||||
self.next_token_inner()
|
||||
}
|
||||
|
||||
fn next_token_inner(&mut self) -> Token {
|
||||
let Some(byte) = self.reader.next() else {
|
||||
return self.eof_token();
|
||||
};
|
||||
if byte.is_ascii() {
|
||||
self.lex_ascii(byte)
|
||||
} else {
|
||||
self.lex_char(byte)
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates the eof token.
|
||||
///
|
||||
/// An eof token has tokenkind Eof and an span which points to the last character of the
|
||||
/// source.
|
||||
fn eof_token(&mut self) -> Token {
|
||||
Token {
|
||||
kind: TokenKind::Eof,
|
||||
span: Span {
|
||||
offset: self.last_offset.saturating_sub(1),
|
||||
len: 1,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Skip the last consumed bytes in the reader.
|
||||
///
|
||||
/// The bytes consumed before this point won't be part of the span.
|
||||
fn skip_offset(&mut self) {
|
||||
self.last_offset = self.reader.offset() as u32;
|
||||
}
|
||||
|
||||
/// Return an invalid token.
|
||||
fn invalid_token(&mut self, error: Error) -> Token {
|
||||
self.error = Some(error);
|
||||
self.finish_token(TokenKind::Invalid)
|
||||
}
|
||||
|
||||
// Returns the span for the current token being lexed.
|
||||
pub fn current_span(&self) -> Span {
|
||||
// We make sure that the source is no longer then u32::MAX so this can't overflow.
|
||||
let new_offset = self.reader.offset() as u32;
|
||||
let len = new_offset - self.last_offset;
|
||||
Span {
|
||||
offset: self.last_offset,
|
||||
len,
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a token from an TokenKind.
|
||||
///
|
||||
/// Attaches a span to the token and returns, updates the new offset.
|
||||
fn finish_token(&mut self, kind: TokenKind) -> Token {
|
||||
let span = self.current_span();
|
||||
// We make sure that the source is no longer then u32::MAX so this can't overflow.
|
||||
self.last_offset = self.reader.offset() as u32;
|
||||
Token {
|
||||
kind,
|
||||
span,
|
||||
}
|
||||
}
|
||||
|
||||
/// Moves the lexer state back to before the give span.
|
||||
///
|
||||
/// # Warning
|
||||
/// Moving the lexer into a state where the next byte is within a multibyte character will
|
||||
/// result in spurious errors.
|
||||
pub fn backup_before(&mut self, span: Span) {
|
||||
self.reader.backup(span.offset as usize);
|
||||
self.last_offset = span.offset;
|
||||
}
|
||||
|
||||
/// Moves the lexer state to after the give span.
|
||||
///
|
||||
/// # Warning
|
||||
/// Moving the lexer into a state where the next byte is within a multibyte character will
|
||||
/// result in spurious errors.
|
||||
pub fn backup_after(&mut self, span: Span) {
|
||||
let offset = span.offset + span.len;
|
||||
self.reader.backup(offset as usize);
|
||||
self.last_offset = offset;
|
||||
}
|
||||
|
||||
/// Checks if the next byte is the given byte, if it is it consumes the byte and returns true.
|
||||
/// Otherwise returns false.
|
||||
///
|
||||
/// Also returns false if there is no next character.
|
||||
pub fn eat(&mut self, byte: u8) -> bool {
|
||||
if self.reader.peek() == Some(byte) {
|
||||
self.reader.next();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Checks if the closure returns true when given the next byte, if it is it consumes the byte
|
||||
/// and returns true. Otherwise returns false.
|
||||
///
|
||||
/// Also returns false if there is no next character.
|
||||
pub fn eat_when<F: FnOnce(u8) -> bool>(&mut self, f: F) -> bool {
|
||||
let Some(x) = self.reader.peek() else {
|
||||
return false;
|
||||
};
|
||||
if f(x) {
|
||||
self.reader.next();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex a single `"` character with possible leading whitespace.
|
||||
///
|
||||
/// Used for parsing record strings.
|
||||
pub fn lex_record_string_close(&mut self) -> Token {
|
||||
loop {
|
||||
let Some(byte) = self.reader.next() else {
|
||||
return self.invalid_token(Error::UnexpectedEof);
|
||||
};
|
||||
match byte {
|
||||
unicode::byte::CR
|
||||
| unicode::byte::FF
|
||||
| unicode::byte::LF
|
||||
| unicode::byte::SP
|
||||
| unicode::byte::VT
|
||||
| unicode::byte::TAB => {
|
||||
self.eat_whitespace();
|
||||
continue;
|
||||
}
|
||||
b'"' => {
|
||||
return self.finish_token(TokenKind::CloseRecordString {
|
||||
double: true,
|
||||
});
|
||||
}
|
||||
b'\'' => {
|
||||
return self.finish_token(TokenKind::CloseRecordString {
|
||||
double: false,
|
||||
});
|
||||
}
|
||||
b'-' => match self.reader.next() {
|
||||
Some(b'-') => {
|
||||
self.eat_single_line_comment();
|
||||
continue;
|
||||
}
|
||||
Some(x) => match self.reader.convert_to_char(x) {
|
||||
Ok(c) => return self.invalid_token(Error::UnexpectedCharacter(c)),
|
||||
Err(e) => return self.invalid_token(e.into()),
|
||||
},
|
||||
None => return self.invalid_token(Error::UnexpectedEof),
|
||||
},
|
||||
b'/' => match self.reader.next() {
|
||||
Some(b'*') => {
|
||||
if let Err(e) = self.eat_multi_line_comment() {
|
||||
return self.invalid_token(e);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
Some(b'/') => {
|
||||
self.eat_single_line_comment();
|
||||
continue;
|
||||
}
|
||||
Some(x) => match self.reader.convert_to_char(x) {
|
||||
Ok(c) => return self.invalid_token(Error::UnexpectedCharacter(c)),
|
||||
Err(e) => return self.invalid_token(e.into()),
|
||||
},
|
||||
None => return self.invalid_token(Error::UnexpectedEof),
|
||||
},
|
||||
b'#' => {
|
||||
self.eat_single_line_comment();
|
||||
continue;
|
||||
}
|
||||
x => match self.reader.convert_to_char(x) {
|
||||
Ok(c) => return self.invalid_token(Error::UnexpectedCharacter(c)),
|
||||
Err(e) => return self.invalid_token(e.into()),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex only a datetime without enclosing delimiters.
|
||||
///
|
||||
/// Used for reusing lexer lexing code for parsing datetimes. Should not be called during
|
||||
/// normal parsing.
|
||||
pub fn lex_only_datetime(&mut self) -> Result<Datetime, Error> {
|
||||
self.lex_datetime_raw_err().map_err(Error::DateTime)
|
||||
}
|
||||
|
||||
/// Lex only a duration.
|
||||
///
|
||||
/// Used for reusing lexer lexing code for parsing durations. Should not be used during normal
|
||||
/// parsing.
|
||||
pub fn lex_only_duration(&mut self) -> Result<Duration, Error> {
|
||||
match self.reader.next() {
|
||||
Some(x @ b'0'..=b'9') => {
|
||||
self.scratch.push(x as char);
|
||||
while let Some(x @ b'0'..=b'9') = self.reader.peek() {
|
||||
self.reader.next();
|
||||
self.scratch.push(x as char);
|
||||
}
|
||||
self.lex_duration_err().map_err(Error::Duration)
|
||||
}
|
||||
Some(x) => {
|
||||
let char = self.reader.convert_to_char(x)?;
|
||||
Err(Error::UnexpectedCharacter(char))
|
||||
}
|
||||
None => Err(Error::UnexpectedEof),
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex only a UUID.
|
||||
///
|
||||
/// Used for reusing lexer lexing code for parsing UUID's. Should not be used during normal
|
||||
/// parsing.
|
||||
pub fn lex_only_uuid(&mut self) -> Result<Uuid, Error> {
|
||||
Ok(self.lex_uuid_err_inner()?)
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Lexer<'_> {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let token = self.next_token();
|
||||
if token.is_eof() {
|
||||
return None;
|
||||
}
|
||||
Some(token)
|
||||
}
|
||||
}
|
257
lib/src/syn/v2/lexer/number.rs
Normal file
257
lib/src/syn/v2/lexer/number.rs
Normal file
|
@ -0,0 +1,257 @@
|
|||
use crate::syn::v2::{
|
||||
lexer::{unicode::U8Ext, Error as LexError, Lexer},
|
||||
token::{NumberKind, Token, TokenKind},
|
||||
};
|
||||
use std::mem;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error("invalid number suffix")]
|
||||
InvalidSuffix,
|
||||
#[error("expected atleast a single digit in the exponent")]
|
||||
DigitExpectedExponent,
|
||||
}
|
||||
|
||||
impl Lexer<'_> {
|
||||
/// Lex only an integer.
|
||||
/// Use when a number can be followed immediatly by a `.` like in a model version.
|
||||
pub fn lex_only_integer(&mut self) -> Token {
|
||||
match self.lex_only_integer_err() {
|
||||
Ok(x) => x,
|
||||
Err(e) => self.invalid_token(LexError::Number(e)),
|
||||
}
|
||||
}
|
||||
|
||||
fn lex_only_integer_err(&mut self) -> Result<Token, Error> {
|
||||
let Some(next) = self.reader.peek() else {
|
||||
return Ok(self.eof_token());
|
||||
};
|
||||
|
||||
// not a number, return a different token kind, for error reporting.
|
||||
if !next.is_ascii_digit() {
|
||||
return Ok(self.next_token());
|
||||
}
|
||||
|
||||
self.scratch.push(next as char);
|
||||
self.reader.next();
|
||||
|
||||
// eat all the ascii digits
|
||||
while let Some(x) = self.reader.peek() {
|
||||
if x == b'_' {
|
||||
self.reader.next();
|
||||
} else if !x.is_ascii_digit() {
|
||||
break;
|
||||
} else {
|
||||
self.scratch.push(x as char);
|
||||
self.reader.next();
|
||||
}
|
||||
}
|
||||
|
||||
// test for a suffix.
|
||||
match self.reader.peek() {
|
||||
Some(b'd' | b'f') => {
|
||||
// not an integer but parse anyway for error reporting.
|
||||
return self.lex_suffix(true);
|
||||
}
|
||||
Some(x) if x.is_ascii_alphabetic() => return Err(self.invalid_suffix()),
|
||||
_ => {}
|
||||
}
|
||||
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)))
|
||||
}
|
||||
|
||||
pub fn lex_number(&mut self, start: u8) -> Token {
|
||||
match self.lex_number_err(start) {
|
||||
Ok(x) => x,
|
||||
Err(e) => self.invalid_token(LexError::Number(e)),
|
||||
}
|
||||
}
|
||||
/// Lex a number.
|
||||
///
|
||||
/// Expects the digit which started the number as the start argument.
|
||||
pub fn lex_number_err(&mut self, start: u8) -> Result<Token, Error> {
|
||||
debug_assert!(start.is_ascii_digit());
|
||||
debug_assert_eq!(self.scratch, "");
|
||||
self.scratch.push(start as char);
|
||||
loop {
|
||||
let Some(x) = self.reader.peek() else {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)));
|
||||
};
|
||||
match x {
|
||||
b'0'..=b'9' => {
|
||||
// next digits.
|
||||
self.reader.next();
|
||||
self.scratch.push(x as char);
|
||||
}
|
||||
b'.' => {
|
||||
// mantissa
|
||||
let backup = self.reader.offset();
|
||||
self.reader.next();
|
||||
let next = self.reader.peek();
|
||||
if let Some(b'0'..=b'9') = next {
|
||||
self.scratch.push('.');
|
||||
return self.lex_mantissa();
|
||||
} else {
|
||||
// indexing a number
|
||||
self.reader.backup(backup);
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)));
|
||||
}
|
||||
}
|
||||
b'f' | b'd' => return self.lex_suffix(true),
|
||||
// Oxc2 is the start byte of 'µ'
|
||||
0xc2 | b'n' | b'u' | b'm' | b'h' | b'w' | b'y' | b's' => {
|
||||
// duration suffix, switch to lexing duration.
|
||||
return Ok(self.lex_duration());
|
||||
}
|
||||
b'_' => {
|
||||
self.reader.next();
|
||||
}
|
||||
b'a'..=b'z' | b'A'..=b'Z' => {
|
||||
return Err(self.invalid_suffix());
|
||||
// invalid token, unexpected identifier character immediatly after number.
|
||||
// Eat all remaining identifier like characters.
|
||||
}
|
||||
_ => {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn invalid_suffix(&mut self) -> Error {
|
||||
// eat the whole suffix.
|
||||
while let Some(x) = self.reader.peek() {
|
||||
if !x.is_ascii_alphanumeric() {
|
||||
break;
|
||||
}
|
||||
self.reader.next();
|
||||
}
|
||||
self.scratch.clear();
|
||||
Error::InvalidSuffix
|
||||
}
|
||||
|
||||
/// Lex a number suffix, either 'f' or 'dec'.
|
||||
fn lex_suffix(&mut self, can_be_duration: bool) -> Result<Token, Error> {
|
||||
match self.reader.peek() {
|
||||
Some(b'f') => {
|
||||
// float suffix
|
||||
self.reader.next();
|
||||
if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) {
|
||||
Err(self.invalid_suffix())
|
||||
} else {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
Ok(self.finish_token(TokenKind::Number(NumberKind::Float)))
|
||||
}
|
||||
}
|
||||
Some(b'd') => {
|
||||
// decimal suffix
|
||||
self.reader.next();
|
||||
let checkpoint = self.reader.offset();
|
||||
if !self.eat(b'e') {
|
||||
if can_be_duration {
|
||||
self.reader.backup(checkpoint - 1);
|
||||
return Ok(self.lex_duration());
|
||||
} else {
|
||||
return Err(self.invalid_suffix());
|
||||
}
|
||||
}
|
||||
|
||||
if !self.eat(b'c') {
|
||||
return Err(self.invalid_suffix());
|
||||
}
|
||||
|
||||
if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) {
|
||||
Err(self.invalid_suffix())
|
||||
} else {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
Ok(self.finish_token(TokenKind::Number(NumberKind::Decimal)))
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Lexes the mantissa of a number, i.e. `.8` in `1.8`
|
||||
pub fn lex_mantissa(&mut self) -> Result<Token, Error> {
|
||||
loop {
|
||||
// lex_number already checks if there exists a digit after the dot.
|
||||
// So this will never fail the first iteration of the loop.
|
||||
let Some(x) = self.reader.peek() else {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Number(NumberKind::Mantissa)));
|
||||
};
|
||||
match x {
|
||||
b'0'..=b'9' => {
|
||||
// next digit.
|
||||
self.reader.next();
|
||||
self.scratch.push(x as char);
|
||||
}
|
||||
b'e' | b'E' => {
|
||||
// scientific notation
|
||||
self.reader.next();
|
||||
self.scratch.push('e');
|
||||
return self.lex_exponent(true);
|
||||
}
|
||||
b'_' => {
|
||||
self.reader.next();
|
||||
}
|
||||
b'f' | b'd' => return self.lex_suffix(false),
|
||||
b'a'..=b'z' | b'A'..=b'Z' => {
|
||||
// invalid token, random identifier characters immediately after number.
|
||||
self.scratch.clear();
|
||||
return Err(Error::InvalidSuffix);
|
||||
}
|
||||
_ => {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Number(NumberKind::Mantissa)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lexes the exponent of a number, i.e. `e10` in `1.1e10`;
|
||||
fn lex_exponent(&mut self, had_mantissa: bool) -> Result<Token, Error> {
|
||||
let mut atleast_one = false;
|
||||
match self.reader.peek() {
|
||||
Some(b'-' | b'+') => {}
|
||||
Some(b'0'..=b'9') => {
|
||||
atleast_one = true;
|
||||
}
|
||||
_ => {
|
||||
// random other character, expected atleast one digit.
|
||||
return Err(Error::DigitExpectedExponent);
|
||||
}
|
||||
}
|
||||
self.reader.next();
|
||||
loop {
|
||||
match self.reader.peek() {
|
||||
Some(x @ b'0'..=b'9') => {
|
||||
self.reader.next();
|
||||
self.scratch.push(x as char);
|
||||
}
|
||||
Some(b'_') => {
|
||||
self.reader.next();
|
||||
}
|
||||
Some(b'f' | b'd') => return self.lex_suffix(false),
|
||||
_ => {
|
||||
if atleast_one {
|
||||
let kind = if had_mantissa {
|
||||
NumberKind::MantissaExponent
|
||||
} else {
|
||||
NumberKind::Exponent
|
||||
};
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Number(kind)));
|
||||
} else {
|
||||
return Err(Error::DigitExpectedExponent);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
157
lib/src/syn/v2/lexer/reader.rs
Normal file
157
lib/src/syn/v2/lexer/reader.rs
Normal file
|
@ -0,0 +1,157 @@
|
|||
use thiserror::Error;
|
||||
|
||||
use crate::syn::v2::token::Span;
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum CharError {
|
||||
#[error("found eof inside multi byte character")]
|
||||
Eof,
|
||||
#[error("string is not valid utf-8")]
|
||||
Unicode,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct BytesReader<'a> {
|
||||
data: &'a [u8],
|
||||
current: usize,
|
||||
}
|
||||
|
||||
impl fmt::Debug for BytesReader<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("BytesReader")
|
||||
.field("used", &self.used())
|
||||
.field("remaining", &self.remaining())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesReader<'a> {
|
||||
pub fn new(slice: &'a [u8]) -> Self {
|
||||
BytesReader {
|
||||
data: slice,
|
||||
current: 0,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn full(&self) -> &'a [u8] {
|
||||
self.data
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn used(&self) -> &'a [u8] {
|
||||
&self.data[..self.current]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn remaining(&self) -> &'a [u8] {
|
||||
&self.data[self.current..]
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
self.remaining().len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn offset(&self) -> usize {
|
||||
self.current
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn backup(&mut self, offset: usize) {
|
||||
assert!(offset <= self.offset());
|
||||
self.current = offset;
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.remaining().is_empty()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn peek(&self) -> Option<u8> {
|
||||
self.remaining().get(0).copied()
|
||||
}
|
||||
#[inline]
|
||||
pub fn span(&self, span: Span) -> &[u8] {
|
||||
&self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)]
|
||||
}
|
||||
#[inline]
|
||||
pub fn next_continue_byte(&mut self) -> Result<u8, CharError> {
|
||||
const CONTINUE_BYTE_PREFIX_MASK: u8 = 0b1100_0000;
|
||||
const CONTINUE_BYTE_MASK: u8 = 0b0011_1111;
|
||||
|
||||
let byte = self.next().ok_or(CharError::Eof)?;
|
||||
if byte & CONTINUE_BYTE_PREFIX_MASK != 0b1000_0000 {
|
||||
return Err(CharError::Eof);
|
||||
}
|
||||
|
||||
Ok(byte & CONTINUE_BYTE_MASK)
|
||||
}
|
||||
|
||||
pub fn convert_to_char(&mut self, start: u8) -> Result<char, CharError> {
|
||||
if start.is_ascii() {
|
||||
return Ok(start as char);
|
||||
}
|
||||
self.complete_char(start)
|
||||
}
|
||||
|
||||
pub fn complete_char(&mut self, start: u8) -> Result<char, CharError> {
|
||||
match start & 0b1111_1000 {
|
||||
0b1100_0000 | 0b1101_0000 | 0b1100_1000 | 0b1101_1000 => {
|
||||
let mut val = (start & 0b0001_1111) as u32;
|
||||
val <<= 6;
|
||||
let next = self.next_continue_byte()?;
|
||||
val |= next as u32;
|
||||
char::from_u32(val).ok_or(CharError::Unicode)
|
||||
}
|
||||
0b1110_0000 | 0b1110_1000 => {
|
||||
let mut val = (start & 0b0000_1111) as u32;
|
||||
val <<= 6;
|
||||
let next = self.next_continue_byte()?;
|
||||
val |= next as u32;
|
||||
val <<= 6;
|
||||
let next = self.next_continue_byte()?;
|
||||
val |= next as u32;
|
||||
char::from_u32(val).ok_or(CharError::Unicode)
|
||||
}
|
||||
0b1111_0000 => {
|
||||
let mut val = (start & 0b0000_0111) as u32;
|
||||
val <<= 6;
|
||||
let next = self.next_continue_byte()?;
|
||||
val |= next as u32;
|
||||
val <<= 6;
|
||||
let next = self.next_continue_byte()?;
|
||||
val |= next as u32;
|
||||
val <<= 6;
|
||||
let next = self.next_continue_byte()?;
|
||||
val |= next as u32;
|
||||
char::from_u32(val).ok_or(CharError::Unicode)
|
||||
}
|
||||
x => panic!("start byte did not start multi byte character: {:b}", x),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for BytesReader<'a> {
|
||||
type Item = u8;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let res = self.peek()?;
|
||||
self.current += 1;
|
||||
Some(res)
|
||||
}
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let len = self.len();
|
||||
(len, Some(len))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ExactSizeIterator for BytesReader<'a> {
|
||||
fn len(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
}
|
95
lib/src/syn/v2/lexer/strand.rs
Normal file
95
lib/src/syn/v2/lexer/strand.rs
Normal file
|
@ -0,0 +1,95 @@
|
|||
//! Lexing of strand like characters.
|
||||
|
||||
use std::mem;
|
||||
|
||||
use crate::syn::v2::token::{Token, TokenKind};
|
||||
|
||||
use super::{unicode::chars, Error, Lexer};
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Lex a plain strand with either single or double quotes.
|
||||
pub fn lex_strand(&mut self, is_double: bool) -> Token {
|
||||
match self.lex_strand_err(is_double) {
|
||||
Ok(x) => x,
|
||||
Err(x) => {
|
||||
self.scratch.clear();
|
||||
self.invalid_token(x)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex a strand with either double or single quotes but return an result instead of a token.
|
||||
pub fn lex_strand_err(&mut self, is_double: bool) -> Result<Token, Error> {
|
||||
loop {
|
||||
let Some(x) = self.reader.next() else {
|
||||
self.scratch.clear();
|
||||
return Ok(self.eof_token());
|
||||
};
|
||||
|
||||
if x.is_ascii() {
|
||||
match x {
|
||||
b'\'' if !is_double => {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Strand));
|
||||
}
|
||||
b'"' if is_double => {
|
||||
self.string = Some(mem::take(&mut self.scratch));
|
||||
return Ok(self.finish_token(TokenKind::Strand));
|
||||
}
|
||||
b'\0' => {
|
||||
// null bytes not allowed
|
||||
return Err(Error::UnexpectedCharacter('\0'));
|
||||
}
|
||||
b'\\' => {
|
||||
// Handle escape sequences.
|
||||
let Some(next) = self.reader.next() else {
|
||||
self.scratch.clear();
|
||||
return Ok(self.eof_token());
|
||||
};
|
||||
match next {
|
||||
b'\\' => {
|
||||
self.scratch.push('\\');
|
||||
}
|
||||
b'\'' if !is_double => {
|
||||
self.scratch.push('\'');
|
||||
}
|
||||
b'\"' if is_double => {
|
||||
self.scratch.push('\"');
|
||||
}
|
||||
b'/' => {
|
||||
self.scratch.push('/');
|
||||
}
|
||||
b'b' => {
|
||||
self.scratch.push(chars::BS);
|
||||
}
|
||||
b'f' => {
|
||||
self.scratch.push(chars::FF);
|
||||
}
|
||||
b'n' => {
|
||||
self.scratch.push(chars::LF);
|
||||
}
|
||||
b'r' => {
|
||||
self.scratch.push(chars::CR);
|
||||
}
|
||||
b't' => {
|
||||
self.scratch.push(chars::TAB);
|
||||
}
|
||||
x => {
|
||||
let char = if x.is_ascii() {
|
||||
x as char
|
||||
} else {
|
||||
self.reader.complete_char(x)?
|
||||
};
|
||||
return Err(Error::InvalidEscapeCharacter(char));
|
||||
}
|
||||
}
|
||||
}
|
||||
x => self.scratch.push(x as char),
|
||||
}
|
||||
} else {
|
||||
let c = self.reader.complete_char(x)?;
|
||||
self.scratch.push(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
482
lib/src/syn/v2/lexer/test.rs
Normal file
482
lib/src/syn/v2/lexer/test.rs
Normal file
|
@ -0,0 +1,482 @@
|
|||
use chrono::{FixedOffset, NaiveDate, Offset, TimeZone, Utc};
|
||||
|
||||
use crate::syn::v2::token::{t, NumberKind, TokenKind};
|
||||
|
||||
macro_rules! test_case(
|
||||
($source:expr => [$($token:expr),*$(,)?]) => {
|
||||
let mut lexer = crate::syn::v2::lexer::Lexer::new($source.as_bytes());
|
||||
let mut i = 0;
|
||||
$(
|
||||
let next = lexer.next();
|
||||
if let Some(next) = next {
|
||||
let span = std::str::from_utf8(lexer.reader.span(next.span)).unwrap_or("invalid utf8");
|
||||
if let TokenKind::Invalid = next.kind{
|
||||
let error = lexer.error.take().unwrap();
|
||||
assert_eq!(next.kind, $token, "{} = {}:{} => {}",span, i, stringify!($token), error);
|
||||
}else{
|
||||
assert_eq!(next.kind, $token, "{} = {}:{}", span, i, stringify!($token));
|
||||
}
|
||||
}else{
|
||||
assert_eq!(next,None);
|
||||
}
|
||||
i += 1;
|
||||
)*
|
||||
let _ = i;
|
||||
assert_eq!(lexer.next(),None)
|
||||
};
|
||||
);
|
||||
|
||||
#[test]
|
||||
fn operators() {
|
||||
test_case! {
|
||||
r#"- + / * ! **
|
||||
< > <= >= <- <-> ->
|
||||
= == -= += != +?=
|
||||
? ?? ?: ?~ ?=
|
||||
{ } [ ] ( )
|
||||
; , | || & &&
|
||||
$
|
||||
. .. ...
|
||||
|
||||
^
|
||||
"# => [
|
||||
t!("-"), t!("+"), t!("/"), t!("*"), t!("!"), t!("**"),
|
||||
|
||||
t!("<"), t!(">"), t!("<="), t!(">="), t!("<-"), t!("<->"), t!("->"),
|
||||
|
||||
t!("="), t!("=="), t!("-="), t!("+="), t!("!="), t!("+?="),
|
||||
|
||||
t!("?"), t!("??"), t!("?:"), t!("?~"), t!("?="),
|
||||
|
||||
t!("{"), t!("}"), t!("["), t!("]"), t!("("), t!(")"),
|
||||
|
||||
t!(";"), t!(","), t!("|"), t!("||"), TokenKind::Invalid, t!("&&"),
|
||||
|
||||
t!("$"),
|
||||
|
||||
t!("."), t!(".."), t!("..."),
|
||||
|
||||
TokenKind::Invalid
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn comments() {
|
||||
test_case! {
|
||||
r"
|
||||
+ /* some comment */
|
||||
- // another comment
|
||||
+ -- a third comment
|
||||
-
|
||||
" => [
|
||||
t!("+"),
|
||||
t!("-"),
|
||||
t!("+"),
|
||||
t!("-"),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn whitespace() {
|
||||
test_case! {
|
||||
"+= \t\n\r -=" => [
|
||||
t!("+="),
|
||||
t!("-="),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn identifiers() {
|
||||
test_case! {
|
||||
r#"
|
||||
123123adwad +
|
||||
akdwkj +
|
||||
akdwkj1231312313123 +
|
||||
_a_k_d_wkj1231312313123 +
|
||||
____wdw____ +
|
||||
"#
|
||||
=> [
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
TokenKind::Identifier,
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn numbers() {
|
||||
test_case! {
|
||||
r#"
|
||||
123123+32010230.123012031+33043030dec+33043030f+
|
||||
|
||||
"#
|
||||
=> [
|
||||
TokenKind::Number(NumberKind::Integer),
|
||||
t!("+"),
|
||||
TokenKind::Number(NumberKind::Mantissa),
|
||||
t!("+"),
|
||||
TokenKind::Number(NumberKind::Decimal),
|
||||
t!("+"),
|
||||
TokenKind::Number(NumberKind::Float),
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
||||
test_case! {
|
||||
"+123129decs+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
||||
test_case! {
|
||||
"+39349fs+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
||||
test_case! {
|
||||
"+394393df+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
||||
test_case! {
|
||||
"+32932932def+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
|
||||
test_case! {
|
||||
"+329239329z+"
|
||||
=> [
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn duration() {
|
||||
test_case! {
|
||||
r#"
|
||||
1ns+1µs+1us+1ms+1s+1m+1h+1w+1y
|
||||
|
||||
1nsa+1ans+1aus+1usa+1ams+1msa+1am+1ma+1ah+1ha+1aw+1wa+1ay+1ya+1µsa
|
||||
"#
|
||||
=> [
|
||||
TokenKind::Duration,
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
t!("+"),
|
||||
TokenKind::Duration,
|
||||
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
t!("+"),
|
||||
TokenKind::Invalid,
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keyword() {
|
||||
test_case! {
|
||||
r#"select SELECT sElEcT"# => [
|
||||
t!("SELECT"),
|
||||
t!("SELECT"),
|
||||
t!("SELECT"),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uuid() {
|
||||
let mut lexer = crate::syn::v2::lexer::Lexer::new(
|
||||
r#" u"e72bee20-f49b-11ec-b939-0242ac120002" "#.as_bytes(),
|
||||
);
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {} @ ", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::Uuid);
|
||||
let uuid = lexer.uuid.take().unwrap();
|
||||
assert_eq!(uuid.0.to_string(), "e72bee20-f49b-11ec-b939-0242ac120002");
|
||||
|
||||
let mut lexer = crate::syn::v2::lexer::Lexer::new(
|
||||
r#" u"b19bc00b-aa98-486c-ae37-c8e1c54295b1" "#.as_bytes(),
|
||||
);
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {} @ ", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::Uuid);
|
||||
let uuid = lexer.uuid.take().unwrap();
|
||||
assert_eq!(uuid.0.to_string(), "b19bc00b-aa98-486c-ae37-c8e1c54295b1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_just_date() {
|
||||
let mut lexer = crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {} @ ", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let expected_datetime = Utc
|
||||
.fix()
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23).unwrap().and_hms_nano_opt(0, 0, 0, 0).unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_zone_time() {
|
||||
let mut lexer = crate::syn::v2::lexer::Lexer::new(r#" d"2020-01-01T00:00:00Z" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {} @ ", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let expected_datetime = Utc
|
||||
.fix()
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_nano_opt(0, 0, 0, 0).unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_with_time() {
|
||||
let mut lexer = crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43Z" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {} @ ", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let expected_datetime = Utc
|
||||
.fix()
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23).unwrap().and_hms_nano_opt(18, 25, 43, 0).unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_nanos() {
|
||||
let mut lexer =
|
||||
crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.5631Z" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {} @ ", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let expected_datetime = Utc
|
||||
.fix()
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||
.unwrap()
|
||||
.and_hms_nano_opt(18, 25, 43, 563_100_000)
|
||||
.unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_timezone_utc() {
|
||||
let mut lexer =
|
||||
crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.0000511Z" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {}", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let expected_datetime = Utc
|
||||
.fix()
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||
.unwrap()
|
||||
.and_hms_nano_opt(18, 25, 43, 51_100)
|
||||
.unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_timezone_pacific() {
|
||||
let mut lexer =
|
||||
crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.511-08:00" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {}", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let offset = FixedOffset::west_opt(8 * 3600).unwrap();
|
||||
let expected_datetime = offset
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||
.unwrap()
|
||||
.and_hms_nano_opt(18, 25, 43, 511_000_000)
|
||||
.unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_timezone_pacific_partial() {
|
||||
let mut lexer =
|
||||
crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.511+08:30" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {}", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let offset = FixedOffset::east_opt(8 * 3600 + 30 * 60).unwrap();
|
||||
let expected_datetime = offset
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||
.unwrap()
|
||||
.and_hms_nano_opt(18, 25, 43, 511_000_000)
|
||||
.unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_timezone_utc_nanoseconds() {
|
||||
let mut lexer =
|
||||
crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.5110000Z" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {}", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let offset = Utc.fix();
|
||||
let expected_datetime = offset
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||
.unwrap()
|
||||
.and_hms_nano_opt(18, 25, 43, 511_000_000)
|
||||
.unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn date_time_timezone_utc_sub_nanoseconds() {
|
||||
let mut lexer =
|
||||
crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.0000511Z" "#.as_bytes());
|
||||
let token = lexer.next_token();
|
||||
if let Some(error) = lexer.error {
|
||||
println!("ERROR: {}", error);
|
||||
}
|
||||
assert_eq!(token.kind, TokenKind::DateTime);
|
||||
let datetime = lexer.datetime.take().unwrap();
|
||||
let offset = Utc.fix();
|
||||
let expected_datetime = offset
|
||||
.from_local_datetime(
|
||||
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||
.unwrap()
|
||||
.and_hms_nano_opt(18, 25, 43, 51_100)
|
||||
.unwrap(),
|
||||
)
|
||||
.earliest()
|
||||
.unwrap()
|
||||
.with_timezone(&Utc);
|
||||
assert_eq!(datetime.0, expected_datetime);
|
||||
}
|
68
lib/src/syn/v2/lexer/unicode.rs
Normal file
68
lib/src/syn/v2/lexer/unicode.rs
Normal file
|
@ -0,0 +1,68 @@
|
|||
//! Unicode related utilities.
|
||||
|
||||
/// Character constants
|
||||
pub mod chars {
|
||||
// Character tabulation
|
||||
pub const TAB: char = '\u{0009}';
|
||||
/// Form feed
|
||||
pub const FF: char = '\u{000C}';
|
||||
|
||||
/// Line feed
|
||||
pub const LF: char = '\u{000A}';
|
||||
/// Carriage return
|
||||
pub const CR: char = '\u{000D}';
|
||||
/// Line separator
|
||||
pub const LS: char = '\u{2020}';
|
||||
/// Backspace
|
||||
pub const BS: char = '\u{0008}';
|
||||
/// Paragraph separator
|
||||
pub const PS: char = '\u{2029}';
|
||||
/// Next line
|
||||
pub const NEL: char = '\u{0085}';
|
||||
|
||||
/// Line terminators for javascript source code.
|
||||
pub const JS_LINE_TERIMATORS: [char; 4] = [LF, CR, LS, PS];
|
||||
}
|
||||
|
||||
pub mod byte {
|
||||
/// Character tabulation
|
||||
pub const TAB: u8 = b'\t';
|
||||
/// Line tabulation
|
||||
pub const VT: u8 = 0xB;
|
||||
/// Form feed
|
||||
pub const FF: u8 = 0xC;
|
||||
|
||||
/// Line feed
|
||||
pub const LF: u8 = 0xA;
|
||||
/// Carriage return
|
||||
pub const CR: u8 = 0xD;
|
||||
|
||||
/// Space
|
||||
pub const SP: u8 = 0x20;
|
||||
}
|
||||
|
||||
/// A trait extending u8 for adding some extra function.
|
||||
pub trait U8Ext {
|
||||
///. Returns if the u8 is the start of an identifier.
|
||||
fn is_identifier_start(&self) -> bool;
|
||||
|
||||
/// Returns if the u8 can start an identifier.
|
||||
fn is_number_start(&self) -> bool;
|
||||
|
||||
/// Returns if the u8 can continue an identifier after the first character.
|
||||
fn is_identifier_continue(&self) -> bool;
|
||||
}
|
||||
|
||||
impl U8Ext for u8 {
|
||||
fn is_identifier_start(&self) -> bool {
|
||||
matches!(self, b'a'..=b'z' | b'A'..=b'Z' | b'_')
|
||||
}
|
||||
|
||||
fn is_identifier_continue(&self) -> bool {
|
||||
matches!(self, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_')
|
||||
}
|
||||
|
||||
fn is_number_start(&self) -> bool {
|
||||
self.is_ascii_digit()
|
||||
}
|
||||
}
|
124
lib/src/syn/v2/lexer/uuid.rs
Normal file
124
lib/src/syn/v2/lexer/uuid.rs
Normal file
|
@ -0,0 +1,124 @@
|
|||
use crate::{
|
||||
sql::Uuid,
|
||||
syn::v2::token::{Token, TokenKind},
|
||||
};
|
||||
|
||||
use super::{Error as LexError, Lexer};
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum Error {
|
||||
#[error("missing digits")]
|
||||
MissingDigits,
|
||||
#[error("digit was not in allowed range")]
|
||||
InvalidRange,
|
||||
#[error("expected uuid-strand to end")]
|
||||
ExpectedStrandEnd,
|
||||
#[error("missing a uuid seperator")]
|
||||
MissingSeperator,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Lex a uuid strand with either double or single quotes.
|
||||
///
|
||||
/// Expects the first delimiter to already have been eaten.
|
||||
pub fn lex_uuid(&mut self, double: bool) -> Token {
|
||||
match self.lex_uuid_err(double) {
|
||||
Ok(x) => {
|
||||
debug_assert!(self.uuid.is_none());
|
||||
self.uuid = Some(x);
|
||||
self.finish_token(TokenKind::Uuid)
|
||||
}
|
||||
Err(_) => self.invalid_token(LexError::Uuid(Error::MissingDigits)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex a uuid strand with either double or single quotes but return an result instead of a
|
||||
/// token.
|
||||
///
|
||||
/// Expects the first delimiter to already have been eaten.
|
||||
pub fn lex_uuid_err(&mut self, double: bool) -> Result<Uuid, Error> {
|
||||
let uuid = self.lex_uuid_err_inner()?;
|
||||
|
||||
let end_char = if double {
|
||||
b'"'
|
||||
} else {
|
||||
b'\''
|
||||
};
|
||||
// closing strand character
|
||||
if !self.eat(end_char) {
|
||||
return Err(Error::ExpectedStrandEnd);
|
||||
}
|
||||
|
||||
Ok(uuid)
|
||||
}
|
||||
|
||||
/// Lex a uuid strand without delimiting quotes but return an result instead of a
|
||||
/// token.
|
||||
///
|
||||
/// Expects the first delimiter to already have been eaten.
|
||||
pub fn lex_uuid_err_inner(&mut self) -> Result<Uuid, Error> {
|
||||
let start = self.reader.offset();
|
||||
|
||||
if !self.lex_hex(8) {
|
||||
return Err(Error::MissingDigits);
|
||||
}
|
||||
|
||||
if !self.eat(b'-') {
|
||||
return Err(Error::MissingSeperator);
|
||||
}
|
||||
|
||||
if !self.lex_hex(4) {
|
||||
return Err(Error::MissingDigits);
|
||||
}
|
||||
|
||||
if !self.eat(b'-') {
|
||||
return Err(Error::MissingSeperator);
|
||||
}
|
||||
|
||||
if !self.eat_when(|x| (b'1'..=b'8').contains(&x)) {
|
||||
if self.reader.peek().map(|x| x.is_ascii_digit()).unwrap_or(false) {
|
||||
// byte was an ascii digit but not in the valid range.
|
||||
return Err(Error::InvalidRange);
|
||||
}
|
||||
return Err(Error::MissingDigits);
|
||||
};
|
||||
|
||||
if !self.lex_hex(3) {
|
||||
return Err(Error::MissingDigits);
|
||||
}
|
||||
|
||||
if !self.eat(b'-') {
|
||||
return Err(Error::MissingSeperator);
|
||||
}
|
||||
|
||||
if !self.lex_hex(4) {
|
||||
return Err(Error::MissingDigits);
|
||||
}
|
||||
|
||||
if !self.eat(b'-') {
|
||||
return Err(Error::MissingSeperator);
|
||||
}
|
||||
|
||||
if !self.lex_hex(12) {
|
||||
return Err(Error::MissingDigits);
|
||||
}
|
||||
|
||||
let end = self.reader.offset();
|
||||
// The lexer ensures that the section of bytes is valid utf8 so this should never panic.
|
||||
let uuid_str = std::str::from_utf8(&self.reader.full()[start..end]).unwrap();
|
||||
// The lexer ensures that the bytes are a valid uuid so this should never panic.
|
||||
Ok(Uuid(uuid::Uuid::try_from(uuid_str).unwrap()))
|
||||
}
|
||||
|
||||
/// lexes a given amount of hex characters. returns true if the lexing was successfull, false
|
||||
/// otherwise.
|
||||
pub fn lex_hex(&mut self, amount: u8) -> bool {
|
||||
for _ in 0..amount {
|
||||
if !self.eat_when(|x| matches!(x,b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue