Introduce new experimental parser (#2885)
Co-authored-by: Raphael Darley <raphael@raphaeldarley.com>
This commit is contained in:
parent
f7e6e028a2
commit
2755f572fc
152 changed files with 14640 additions and 823 deletions
43
.github/workflows/ci.yml
vendored
43
.github/workflows/ci.yml
vendored
|
@ -180,7 +180,7 @@ jobs:
|
||||||
|
|
||||||
- name: Run CLI integration tests
|
- name: Run CLI integration tests
|
||||||
run: cargo make ci-cli-integration
|
run: cargo make ci-cli-integration
|
||||||
|
|
||||||
- name: Debug info
|
- name: Debug info
|
||||||
if: always()
|
if: always()
|
||||||
run: |
|
run: |
|
||||||
|
@ -189,7 +189,7 @@ jobs:
|
||||||
df -h
|
df -h
|
||||||
ps auxf
|
ps auxf
|
||||||
cat /tmp/surrealdb.log || true
|
cat /tmp/surrealdb.log || true
|
||||||
|
|
||||||
|
|
||||||
http-server:
|
http-server:
|
||||||
name: HTTP integration tests
|
name: HTTP integration tests
|
||||||
|
@ -326,6 +326,45 @@ jobs:
|
||||||
path: target/llvm-cov/html/
|
path: target/llvm-cov/html/
|
||||||
retention-days: 5
|
retention-days: 5
|
||||||
|
|
||||||
|
test-parser:
|
||||||
|
name: Test workspace with experimental parser
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
|
||||||
|
- name: Install stable toolchain
|
||||||
|
uses: dtolnay/rust-toolchain@stable
|
||||||
|
with:
|
||||||
|
toolchain: 1.71.1
|
||||||
|
|
||||||
|
- name: Checkout sources
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup cache
|
||||||
|
uses: Swatinem/rust-cache@v2
|
||||||
|
with:
|
||||||
|
save-if: ${{ github.ref == 'refs/heads/main' }}
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
sudo apt-get -y update
|
||||||
|
|
||||||
|
- name: Free up some disk space
|
||||||
|
run: |
|
||||||
|
(set -x; df -h)
|
||||||
|
# Free up some disk space by removing unused files
|
||||||
|
(set -x; sudo rm -rf /imagegeneration || true)
|
||||||
|
(set -x; sudo rm -rf /opt/az || true)
|
||||||
|
(set -x; sudo rm -rf /opt/hostedtoolcache || true)
|
||||||
|
(set -x; sudo rm -rf /opt/google || true)
|
||||||
|
(set -x; sudo rm -rf /opt/pipx || true)
|
||||||
|
(set -x; df -h)
|
||||||
|
|
||||||
|
- name: Install cargo-make
|
||||||
|
run: cargo install --debug --locked cargo-make
|
||||||
|
|
||||||
|
- name: Test workspace for experimental_parser
|
||||||
|
run: cargo make test-experimental-parser
|
||||||
|
|
||||||
ws-engine:
|
ws-engine:
|
||||||
name: WebSocket engine
|
name: WebSocket engine
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
767
Cargo.lock
generated
767
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -8,7 +8,7 @@ authors = ["Tobie Morgan Hitchcock <tobie@surrealdb.com>"]
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
# Public features
|
# Public features
|
||||||
default = ["storage-mem", "storage-rocksdb", "scripting", "http", "jwks"]
|
default = ["storage-mem", "storage-rocksdb", "scripting", "http"]
|
||||||
storage-mem = ["surrealdb/kv-mem"]
|
storage-mem = ["surrealdb/kv-mem"]
|
||||||
storage-rocksdb = ["surrealdb/kv-rocksdb"]
|
storage-rocksdb = ["surrealdb/kv-rocksdb"]
|
||||||
storage-speedb = ["surrealdb/kv-speedb"]
|
storage-speedb = ["surrealdb/kv-speedb"]
|
||||||
|
@ -18,6 +18,7 @@ scripting = ["surrealdb/scripting"]
|
||||||
http = ["surrealdb/http"]
|
http = ["surrealdb/http"]
|
||||||
http-compression = []
|
http-compression = []
|
||||||
ml = ["surrealdb/ml", "surrealml-core"]
|
ml = ["surrealdb/ml", "surrealml-core"]
|
||||||
|
experimental-parser = ["surrealdb/experimental-parser"]
|
||||||
jwks = ["surrealdb/jwks"]
|
jwks = ["surrealdb/jwks"]
|
||||||
|
|
||||||
[workspace]
|
[workspace]
|
||||||
|
@ -60,6 +61,7 @@ reqwest = { version = "0.11.22", default-features = false, features = ["blocking
|
||||||
rmpv = "1.0.1"
|
rmpv = "1.0.1"
|
||||||
rustyline = { version = "12.0.0", features = ["derive"] }
|
rustyline = { version = "12.0.0", features = ["derive"] }
|
||||||
serde = { version = "1.0.193", features = ["derive"] }
|
serde = { version = "1.0.193", features = ["derive"] }
|
||||||
|
serde_cbor = "0.11.2"
|
||||||
serde_json = "1.0.108"
|
serde_json = "1.0.108"
|
||||||
serde_pack = { version = "1.1.2", package = "rmp-serde" }
|
serde_pack = { version = "1.1.2", package = "rmp-serde" }
|
||||||
surrealdb = { path = "lib", features = ["protocol-http", "protocol-ws", "rustls"] }
|
surrealdb = { path = "lib", features = ["protocol-http", "protocol-ws", "rustls"] }
|
||||||
|
|
126
Cargo.toml.orig
Normal file
126
Cargo.toml.orig
Normal file
|
@ -0,0 +1,126 @@
|
||||||
|
[package]
|
||||||
|
name = "surreal"
|
||||||
|
publish = false
|
||||||
|
edition = "2021"
|
||||||
|
version = "1.1.0"
|
||||||
|
license-file = "LICENSE"
|
||||||
|
authors = ["Tobie Morgan Hitchcock <tobie@surrealdb.com>"]
|
||||||
|
|
||||||
|
[features]
|
||||||
|
# Public features
|
||||||
|
default = ["storage-mem", "storage-rocksdb", "scripting", "http", "jwks"]
|
||||||
|
storage-mem = ["surrealdb/kv-mem"]
|
||||||
|
storage-rocksdb = ["surrealdb/kv-rocksdb"]
|
||||||
|
storage-speedb = ["surrealdb/kv-speedb"]
|
||||||
|
storage-tikv = ["surrealdb/kv-tikv"]
|
||||||
|
storage-fdb = ["surrealdb/kv-fdb-7_1"]
|
||||||
|
scripting = ["surrealdb/scripting"]
|
||||||
|
http = ["surrealdb/http"]
|
||||||
|
http-compression = []
|
||||||
|
ml = ["surrealdb/ml", "surrealml-core"]
|
||||||
|
<<<<<<< HEAD
|
||||||
|
experimental-parser = ["surrealdb/experimental-parser"]
|
||||||
|
=======
|
||||||
|
jwks = ["surrealdb/jwks"]
|
||||||
|
>>>>>>> upstream/main
|
||||||
|
|
||||||
|
[workspace]
|
||||||
|
members = ["lib", "lib/examples/actix", "lib/examples/axum"]
|
||||||
|
|
||||||
|
[profile.release]
|
||||||
|
lto = true
|
||||||
|
strip = true
|
||||||
|
opt-level = 3
|
||||||
|
panic = 'abort'
|
||||||
|
codegen-units = 1
|
||||||
|
|
||||||
|
[profile.bench]
|
||||||
|
strip = false
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
argon2 = "0.5.2"
|
||||||
|
axum = { version = "0.6.20", features = ["tracing", "ws", "headers"] }
|
||||||
|
axum-client-ip = "0.5.0"
|
||||||
|
axum-extra = { version = "0.7.7", features = ["query", "typed-routing"] }
|
||||||
|
axum-server = { version = "0.5.1", features = ["tls-rustls"] }
|
||||||
|
base64 = "0.21.5"
|
||||||
|
bytes = "1.5.0"
|
||||||
|
ciborium = "0.2.1"
|
||||||
|
clap = { version = "4.4.11", features = ["env", "derive", "wrap_help", "unicode"] }
|
||||||
|
futures = "0.3.29"
|
||||||
|
futures-util = "0.3.29"
|
||||||
|
glob = "0.3.1"
|
||||||
|
http = "0.2.11"
|
||||||
|
http-body = "0.4.5"
|
||||||
|
hyper = "0.14.27"
|
||||||
|
ipnet = "2.9.0"
|
||||||
|
ndarray = { version = "0.15.6", optional = true }
|
||||||
|
once_cell = "1.18.0"
|
||||||
|
opentelemetry = { version = "0.19", features = ["rt-tokio"] }
|
||||||
|
opentelemetry-otlp = { version = "0.12.0", features = ["metrics"] }
|
||||||
|
pin-project-lite = "0.2.13"
|
||||||
|
rand = "0.8.5"
|
||||||
|
reqwest = { version = "0.11.22", default-features = false, features = ["blocking", "gzip"] }
|
||||||
|
rmpv = "1.0.1"
|
||||||
|
rustyline = { version = "12.0.0", features = ["derive"] }
|
||||||
|
serde = { version = "1.0.193", features = ["derive"] }
|
||||||
|
serde_cbor = "0.11.2"
|
||||||
|
serde_json = "1.0.108"
|
||||||
|
serde_pack = { version = "1.1.2", package = "rmp-serde" }
|
||||||
|
surrealdb = { path = "lib", features = ["protocol-http", "protocol-ws", "rustls"] }
|
||||||
|
surrealml-core = { version = "0.0.3", optional = true}
|
||||||
|
tempfile = "3.8.1"
|
||||||
|
thiserror = "1.0.50"
|
||||||
|
tokio = { version = "1.34.0", features = ["macros", "signal"] }
|
||||||
|
tokio-util = { version = "0.7.10", features = ["io"] }
|
||||||
|
tower = "0.4.13"
|
||||||
|
tower-http = { version = "0.4.4", features = ["trace", "sensitive-headers", "auth", "request-id", "util", "catch-panic", "cors", "set-header", "limit", "add-extension", "compression-full"] }
|
||||||
|
tracing = "0.1"
|
||||||
|
tracing-opentelemetry = "0.19.0"
|
||||||
|
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }
|
||||||
|
urlencoding = "2.1.3"
|
||||||
|
uuid = { version = "1.6.1", features = ["serde", "js", "v4", "v7"] }
|
||||||
|
|
||||||
|
[target.'cfg(unix)'.dependencies]
|
||||||
|
nix = { version = "0.27.1", features = ["user"] }
|
||||||
|
|
||||||
|
[target.'cfg(unix)'.dev-dependencies]
|
||||||
|
nix = { version = "0.27.1", features = ["signal", "user"] }
|
||||||
|
|
||||||
|
[target.'cfg(any(target_os = "linux", target_os = "macos", target_os = "ios"))'.dependencies]
|
||||||
|
mimalloc = { version = "0.1.39", default-features = false }
|
||||||
|
|
||||||
|
[target.'cfg(any(target_os = "android", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))'.dependencies]
|
||||||
|
jemallocator = "0.5.4"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
assert_fs = "1.0.13"
|
||||||
|
env_logger = "0.10.1"
|
||||||
|
opentelemetry-proto = { version = "0.2.0", features = ["gen-tonic", "traces", "metrics", "logs"] }
|
||||||
|
rcgen = "0.11.3"
|
||||||
|
serial_test = "2.0.0"
|
||||||
|
temp-env = { version = "0.3.6", features = ["async_closure"] }
|
||||||
|
test-log = { version = "0.2.13", features = ["trace"] }
|
||||||
|
tokio-stream = { version = "0.1", features = ["net"] }
|
||||||
|
tokio-tungstenite = { version = "0.20.1" }
|
||||||
|
tonic = "0.8.3"
|
||||||
|
ulid = "1.1.0"
|
||||||
|
wiremock = "0.5.22"
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
semver = "1.0.20"
|
||||||
|
|
||||||
|
[package.metadata.deb]
|
||||||
|
maintainer-scripts = "pkg/deb/"
|
||||||
|
maintainer = "Tobie Morgan Hitchcock <tobie@surrealdb.com>"
|
||||||
|
copyright = "SurrealDB Ltd. 2022"
|
||||||
|
systemd-units = { enable = true }
|
||||||
|
depends = "$auto"
|
||||||
|
section = "utility"
|
||||||
|
priority = "optional"
|
||||||
|
assets = [
|
||||||
|
["target/release/surreal", "usr/share/surrealdb/surreal", "755"],
|
||||||
|
["pkg/deb/README", "usr/share/surrealdb/README", "644"],
|
||||||
|
]
|
||||||
|
extended-description = "A scalable, distributed, collaborative, document-graph database, for the realtime web."
|
||||||
|
license-file = ["LICENSE", "4"]
|
|
@ -56,6 +56,18 @@ args = [
|
||||||
"--skip", "ws_integration"
|
"--skip", "ws_integration"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
[tasks.test-experimental-parser]
|
||||||
|
category = "CI - INTEGRATION TESTS"
|
||||||
|
command = "cargo"
|
||||||
|
args = [
|
||||||
|
"test", "--locked", "--no-default-features", "--features", "storage-mem,scripting,http,experimental-parser", "--workspace", "--",
|
||||||
|
"--skip", "api_integration",
|
||||||
|
"--skip", "cli_integration",
|
||||||
|
"--skip", "http_integration",
|
||||||
|
"--skip", "ws_integration"
|
||||||
|
]
|
||||||
|
|
||||||
[tasks.test-workspace-coverage-complete]
|
[tasks.test-workspace-coverage-complete]
|
||||||
category = "CI - INTEGRATION TESTS"
|
category = "CI - INTEGRATION TESTS"
|
||||||
command = "cargo"
|
command = "cargo"
|
||||||
|
|
|
@ -40,6 +40,7 @@ rustls = ["dep:rustls", "reqwest?/rustls-tls", "tokio-tungstenite?/rustls-tls-we
|
||||||
ml = ["surrealml-core", "ndarray"]
|
ml = ["surrealml-core", "ndarray"]
|
||||||
jwks = ["dep:reqwest"]
|
jwks = ["dep:reqwest"]
|
||||||
arbitrary = ["dep:arbitrary", "dep:regex-syntax", "rust_decimal/rust-fuzz", "geo-types/arbitrary", "uuid/arbitrary"]
|
arbitrary = ["dep:arbitrary", "dep:regex-syntax", "rust_decimal/rust-fuzz", "geo-types/arbitrary", "uuid/arbitrary"]
|
||||||
|
experimental-parser = ["dep:phf", "dep:unicase"]
|
||||||
# Private features
|
# Private features
|
||||||
kv-fdb = ["foundationdb", "tokio/time"]
|
kv-fdb = ["foundationdb", "tokio/time"]
|
||||||
|
|
||||||
|
@ -123,6 +124,8 @@ tracing = "0.1.40"
|
||||||
trice = "0.4.0"
|
trice = "0.4.0"
|
||||||
ulid = { version = "1.1.0", features = ["serde"] }
|
ulid = { version = "1.1.0", features = ["serde"] }
|
||||||
url = "2.5.0"
|
url = "2.5.0"
|
||||||
|
phf = { version = "0.11.2", features = ["macros", "unicase"], optional=true }
|
||||||
|
unicase = { version = "2.7.0", optional = true }
|
||||||
arbitrary = { version = "1.3.2", features = ["derive"], optional = true }
|
arbitrary = { version = "1.3.2", features = ["derive"], optional = true }
|
||||||
regex-syntax = { version = "0.8.2", optional = true, features = ["arbitrary"] }
|
regex-syntax = { version = "0.8.2", optional = true, features = ["arbitrary"] }
|
||||||
geo-types = { version = "0.7.12", features = ["arbitrary"] }
|
geo-types = { version = "0.7.12", features = ["arbitrary"] }
|
||||||
|
|
|
@ -59,6 +59,7 @@ fn bench_parser(c: &mut Criterion) {
|
||||||
&(1..=100).map(|n| format!("'{n}': {n}")).collect::<Vec<_>>().join(", ")
|
&(1..=100).map(|n| format!("'{n}': {n}")).collect::<Vec<_>>().join(", ")
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
|
parser!(c, full_test, surrealdb::sql::parse, include_str!("../test.surql"));
|
||||||
c.finish();
|
c.finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
1
lib/fuzz/Cargo.lock
generated
1
lib/fuzz/Cargo.lock
generated
|
@ -2584,7 +2584,6 @@ dependencies = [
|
||||||
"futures-concurrency",
|
"futures-concurrency",
|
||||||
"fuzzy-matcher",
|
"fuzzy-matcher",
|
||||||
"geo 0.27.0",
|
"geo 0.27.0",
|
||||||
"geo-types",
|
|
||||||
"hex",
|
"hex",
|
||||||
"indexmap 2.1.0",
|
"indexmap 2.1.0",
|
||||||
"ipnet",
|
"ipnet",
|
||||||
|
|
|
@ -255,7 +255,7 @@ mod tests {
|
||||||
use crate::dbs::Capabilities;
|
use crate::dbs::Capabilities;
|
||||||
use crate::opt::auth::Root;
|
use crate::opt::auth::Root;
|
||||||
use crate::sql::Value;
|
use crate::sql::Value;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn local_engine_without_auth() {
|
async fn local_engine_without_auth() {
|
||||||
|
|
|
@ -153,6 +153,7 @@ struct IntervalStream {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IntervalStream {
|
impl IntervalStream {
|
||||||
|
#[allow(unused)]
|
||||||
fn new(interval: Interval) -> Self {
|
fn new(interval: Interval) -> Self {
|
||||||
Self {
|
Self {
|
||||||
inner: interval,
|
inner: interval,
|
||||||
|
|
|
@ -177,7 +177,8 @@ impl<T: Target + Hash + Eq + PartialEq + std::fmt::Display> std::fmt::Display fo
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
/// Create a new instance, and allow all capabilities
|
/// Create a new instance, and allow all capabilities
|
||||||
/// ```no_run
|
#[cfg_attr(feature = "kv-rocksdb", doc = "```no_run")]
|
||||||
|
#[cfg_attr(not(feature = "kv-rocksdb"), doc = "```ignore")]
|
||||||
/// # use surrealdb::opt::capabilities::Capabilities;
|
/// # use surrealdb::opt::capabilities::Capabilities;
|
||||||
/// # use surrealdb::opt::Config;
|
/// # use surrealdb::opt::Config;
|
||||||
/// # use surrealdb::Surreal;
|
/// # use surrealdb::Surreal;
|
||||||
|
@ -192,7 +193,8 @@ impl<T: Target + Hash + Eq + PartialEq + std::fmt::Display> std::fmt::Display fo
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// Create a new instance, and allow certain functions
|
/// Create a new instance, and allow certain functions
|
||||||
/// ```no_run
|
#[cfg_attr(feature = "kv-rocksdb", doc = "```no_run")]
|
||||||
|
#[cfg_attr(not(feature = "kv-rocksdb"), doc = "```ignore")]
|
||||||
/// # use std::str::FromStr;
|
/// # use std::str::FromStr;
|
||||||
/// # use surrealdb::engine::local::File;
|
/// # use surrealdb::engine::local::File;
|
||||||
/// # use surrealdb::opt::capabilities::Capabilities;
|
/// # use surrealdb::opt::capabilities::Capabilities;
|
||||||
|
|
|
@ -423,6 +423,7 @@ pub async fn asynchronous(
|
||||||
mod tests {
|
mod tests {
|
||||||
#[cfg(all(feature = "scripting", feature = "kv-mem"))]
|
#[cfg(all(feature = "scripting", feature = "kv-mem"))]
|
||||||
use crate::dbs::Capabilities;
|
use crate::dbs::Capabilities;
|
||||||
|
use crate::sql::{statements::OutputStatement, Function, Query, Statement, Value};
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn implementations_are_present() {
|
async fn implementations_are_present() {
|
||||||
|
@ -442,8 +443,28 @@ mod tests {
|
||||||
let (quote, _) = line.split_once("=>").unwrap();
|
let (quote, _) = line.split_once("=>").unwrap();
|
||||||
let name = quote.trim().trim_matches('"');
|
let name = quote.trim().trim_matches('"');
|
||||||
|
|
||||||
let builtin_name = crate::syn::test::builtin_name(name);
|
let res = crate::syn::parse(&format!("RETURN {}()", name));
|
||||||
if builtin_name.is_err() {
|
if let Ok(Query(mut x)) = res {
|
||||||
|
match x.0.pop() {
|
||||||
|
Some(Statement::Output(OutputStatement {
|
||||||
|
what: Value::Function(x),
|
||||||
|
..
|
||||||
|
})) => match *x {
|
||||||
|
Function::Normal(parsed_name, _) => {
|
||||||
|
if parsed_name != name {
|
||||||
|
problems
|
||||||
|
.push(format!("function `{name}` parsed as `{parsed_name}`"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
problems.push(format!("couldn't parse {name} function"));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
problems.push(format!("couldn't parse {name} function"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
problems.push(format!("couldn't parse {name} function"));
|
problems.push(format!("couldn't parse {name} function"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1 +1,2 @@
|
||||||
|
#[cfg(feature = "http")]
|
||||||
mod fetch;
|
mod fetch;
|
||||||
|
|
|
@ -14,10 +14,10 @@ use std::str::{self, FromStr};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
async fn config(
|
async fn config(
|
||||||
kvs: &Datastore,
|
_kvs: &Datastore,
|
||||||
de_kind: Algorithm,
|
de_kind: Algorithm,
|
||||||
de_code: String,
|
de_code: String,
|
||||||
token_header: Header,
|
_token_header: Header,
|
||||||
) -> Result<(DecodingKey, Validation), Error> {
|
) -> Result<(DecodingKey, Validation), Error> {
|
||||||
if de_kind == Algorithm::Jwks {
|
if de_kind == Algorithm::Jwks {
|
||||||
#[cfg(not(feature = "jwks"))]
|
#[cfg(not(feature = "jwks"))]
|
||||||
|
@ -27,8 +27,8 @@ async fn config(
|
||||||
}
|
}
|
||||||
#[cfg(feature = "jwks")]
|
#[cfg(feature = "jwks")]
|
||||||
// The key identifier header must be present
|
// The key identifier header must be present
|
||||||
if let Some(kid) = token_header.kid {
|
if let Some(kid) = _token_header.kid {
|
||||||
jwks::config(kvs, &kid, &de_code).await
|
jwks::config(_kvs, &kid, &de_code).await
|
||||||
} else {
|
} else {
|
||||||
Err(Error::MissingTokenHeader("kid".to_string()))
|
Err(Error::MissingTokenHeader("kid".to_string()))
|
||||||
}
|
}
|
||||||
|
@ -1125,7 +1125,7 @@ mod tests {
|
||||||
// Test with generic user identifier
|
// Test with generic user identifier
|
||||||
//
|
//
|
||||||
{
|
{
|
||||||
let resource_id = "user:2k9qnabxuxh8k4d5gfto".to_string();
|
let resource_id = "user:`2k9qnabxuxh8k4d5gfto`".to_string();
|
||||||
// Prepare the claims object
|
// Prepare the claims object
|
||||||
let mut claims = claims.clone();
|
let mut claims = claims.clone();
|
||||||
claims.id = Some(resource_id.clone());
|
claims.id = Some(resource_id.clone());
|
||||||
|
@ -1254,6 +1254,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "jwks")]
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_token_scope_jwks() {
|
async fn test_token_scope_jwks() {
|
||||||
use crate::opt::capabilities::{Capabilities, NetTarget, Targets};
|
use crate::opt::capabilities::{Capabilities, NetTarget, Targets};
|
||||||
|
|
|
@ -8,8 +8,7 @@ use crate::idx::ft::postings::TermFrequency;
|
||||||
use crate::idx::ft::terms::{TermId, Terms};
|
use crate::idx::ft::terms::{TermId, Terms};
|
||||||
use crate::sql::statements::DefineAnalyzerStatement;
|
use crate::sql::statements::DefineAnalyzerStatement;
|
||||||
use crate::sql::tokenizer::Tokenizer as SqlTokenizer;
|
use crate::sql::tokenizer::Tokenizer as SqlTokenizer;
|
||||||
use crate::sql::Value;
|
use crate::sql::{Function, Strand, Value};
|
||||||
use crate::syn::path_like;
|
|
||||||
use async_recursion::async_recursion;
|
use async_recursion::async_recursion;
|
||||||
use filter::Filter;
|
use filter::Filter;
|
||||||
use std::collections::hash_map::Entry;
|
use std::collections::hash_map::Entry;
|
||||||
|
@ -194,26 +193,16 @@ impl Analyzer {
|
||||||
txn: &Transaction,
|
txn: &Transaction,
|
||||||
mut input: String,
|
mut input: String,
|
||||||
) -> Result<Tokens, Error> {
|
) -> Result<Tokens, Error> {
|
||||||
if let Some(function_name) = &self.function {
|
if let Some(function_name) = self.function.clone() {
|
||||||
let fns = format!("fn::{function_name}(\"{input}\")");
|
let fns = Function::Custom(function_name.clone(), vec![Value::Strand(Strand(input))]);
|
||||||
match path_like(&fns) {
|
let val = fns.compute(ctx, opt, txn, None).await?;
|
||||||
Ok(func_value) => {
|
if let Value::Strand(val) = val {
|
||||||
let val = func_value.compute(ctx, opt, txn, None).await?;
|
input = val.0;
|
||||||
if let Value::Strand(val) = val {
|
} else {
|
||||||
input = val.0;
|
return Err(Error::InvalidFunction {
|
||||||
} else {
|
name: function_name,
|
||||||
return Err(Error::InvalidFunction {
|
message: "The function should return a string.".to_string(),
|
||||||
name: function_name.to_string(),
|
});
|
||||||
message: "The function should return a string.".to_string(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(e) => {
|
|
||||||
return Err(Error::InvalidFunction {
|
|
||||||
name: function_name.to_string(),
|
|
||||||
message: e.to_string(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if let Some(t) = &self.tokenizers {
|
if let Some(t) = &self.tokenizers {
|
||||||
|
|
|
@ -308,7 +308,7 @@ impl RangeQueryBuilder {
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::idx::planner::plan::{IndexOperator, IndexOption, RangeValue};
|
use crate::idx::planner::plan::{IndexOperator, IndexOption, RangeValue};
|
||||||
use crate::sql::{Array, Idiom, Value};
|
use crate::sql::{Array, Idiom, Value};
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
|
|
@ -193,7 +193,7 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn key() {
|
fn key() {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
let fk = Thing::parse("other:test");
|
let fk = Thing::parse("other:test");
|
||||||
#[rustfmt::skip]
|
#[rustfmt::skip]
|
||||||
let val = Graph::new(
|
let val = Graph::new(
|
||||||
|
|
|
@ -90,8 +90,7 @@ mod tests {
|
||||||
let dec = Thing::decode(&enc).unwrap();
|
let dec = Thing::decode(&enc).unwrap();
|
||||||
assert_eq!(val, dec);
|
assert_eq!(val, dec);
|
||||||
println!("---");
|
println!("---");
|
||||||
//
|
let id2 = "foo:[u'f8e238f2-e734-47b8-9a16-476b291bd78a']";
|
||||||
let id2 = "foo:['f8e238f2-e734-47b8-9a16-476b291bd78a']";
|
|
||||||
let thing = syn::thing(id2).expect("Failed to parse the ID");
|
let thing = syn::thing(id2).expect("Failed to parse the ID");
|
||||||
let id2 = thing.id;
|
let id2 = thing.id;
|
||||||
let val = Thing::new("testns", "testdb", "testtb", id2);
|
let val = Thing::new("testns", "testdb", "testtb", id2);
|
||||||
|
|
|
@ -219,6 +219,18 @@ impl Datastore {
|
||||||
#[allow(unused_variables)]
|
#[allow(unused_variables)]
|
||||||
let default_clock: Arc<RwLock<SizedClock>> =
|
let default_clock: Arc<RwLock<SizedClock>> =
|
||||||
Arc::new(RwLock::new(SizedClock::System(SystemClock::new())));
|
Arc::new(RwLock::new(SizedClock::System(SystemClock::new())));
|
||||||
|
|
||||||
|
// removes warning if no storage is enabled.
|
||||||
|
#[cfg(not(any(
|
||||||
|
feature = "kv-mem",
|
||||||
|
feature = "kv-rocksdb",
|
||||||
|
feature = "kv-speedb",
|
||||||
|
feature = "kv-indxdb",
|
||||||
|
feature = "kv-tikv",
|
||||||
|
feature = "kv-fdb"
|
||||||
|
)))]
|
||||||
|
let _ = (clock_override, default_clock);
|
||||||
|
|
||||||
// Initiate the desired datastore
|
// Initiate the desired datastore
|
||||||
let (inner, clock): (Result<Inner, Error>, Arc<RwLock<SizedClock>>) = match path {
|
let (inner, clock): (Result<Inner, Error>, Arc<RwLock<SizedClock>>) = match path {
|
||||||
"memory" => {
|
"memory" => {
|
||||||
|
@ -340,7 +352,7 @@ impl Datastore {
|
||||||
// The datastore path is not valid
|
// The datastore path is not valid
|
||||||
_ => {
|
_ => {
|
||||||
// use clock_override and default_clock to remove warning when no kv is enabled.
|
// use clock_override and default_clock to remove warning when no kv is enabled.
|
||||||
let _ = (clock_override, default_clock);
|
let _ = default_clock;
|
||||||
info!("Unable to load the specified datastore {}", path);
|
info!("Unable to load the specified datastore {}", path);
|
||||||
Err(Error::Ds("Unable to load the specified datastore".into()))
|
Err(Error::Ds("Unable to load the specified datastore".into()))
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,14 @@ mod tx;
|
||||||
|
|
||||||
mod clock;
|
mod clock;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
#[cfg(any(
|
||||||
|
feature = "kv-mem",
|
||||||
|
feature = "kv-rocksdb",
|
||||||
|
feature = "kv-speedb",
|
||||||
|
feature = "kv-indxdb",
|
||||||
|
feature = "kv-tikv",
|
||||||
|
feature = "kv-fdb"
|
||||||
|
))]
|
||||||
mod tests;
|
mod tests;
|
||||||
|
|
||||||
pub use self::ds::*;
|
pub use self::ds::*;
|
||||||
|
|
|
@ -2,7 +2,7 @@ use revision::revisioned;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||||
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
||||||
#[revisioned(revision = 1)]
|
#[revisioned(revision = 1)]
|
||||||
pub enum Algorithm {
|
pub enum Algorithm {
|
||||||
|
|
|
@ -80,7 +80,13 @@ pub fn duration(i: &str) -> IResult<&str, ()> {
|
||||||
|
|
||||||
pub fn field(i: &str) -> IResult<&str, ()> {
|
pub fn field(i: &str) -> IResult<&str, ()> {
|
||||||
peek(alt((
|
peek(alt((
|
||||||
value((), preceded(shouldbespace, tag_no_case("FROM"))),
|
value(
|
||||||
|
(),
|
||||||
|
preceded(
|
||||||
|
shouldbespace,
|
||||||
|
alt((tag_no_case("FROM"), tag_no_case("TIMEOUT"), tag_no_case("PARALLEL"))),
|
||||||
|
),
|
||||||
|
),
|
||||||
value((), char(';')),
|
value((), char(';')),
|
||||||
value((), eof),
|
value((), eof),
|
||||||
)))(i)
|
)))(i)
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
use nom::character::is_digit;
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
const SINGLE: char = '\'';
|
const SINGLE: char = '\'';
|
||||||
|
@ -54,9 +53,15 @@ pub fn quote_str(s: &str) -> String {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn quote_plain_str(s: &str) -> String {
|
pub fn quote_plain_str(s: &str) -> String {
|
||||||
let mut ret = quote_str(s);
|
#[cfg(not(feature = "experimental-parser"))]
|
||||||
#[cfg(not(feature = "experimental_parser"))]
|
|
||||||
{
|
{
|
||||||
|
if crate::syn::thing(s).is_ok() {
|
||||||
|
let mut ret = quote_str(s);
|
||||||
|
ret.insert(0, 's');
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut ret = quote_str(s);
|
||||||
// HACK: We need to prefix strands which look like records, uuids, or datetimes with an `s`
|
// HACK: We need to prefix strands which look like records, uuids, or datetimes with an `s`
|
||||||
// otherwise the strands will parsed as a different type when parsed again.
|
// otherwise the strands will parsed as a different type when parsed again.
|
||||||
// This is not required for the new parser.
|
// This is not required for the new parser.
|
||||||
|
@ -64,13 +69,14 @@ pub fn quote_plain_str(s: &str) -> String {
|
||||||
// directly to avoid having to create a common interface between the old and new parser.
|
// directly to avoid having to create a common interface between the old and new parser.
|
||||||
if crate::syn::v1::literal::uuid(&ret).is_ok()
|
if crate::syn::v1::literal::uuid(&ret).is_ok()
|
||||||
|| crate::syn::v1::literal::datetime(&ret).is_ok()
|
|| crate::syn::v1::literal::datetime(&ret).is_ok()
|
||||||
|| crate::syn::thing(&ret).is_ok()
|
|
||||||
{
|
{
|
||||||
ret.insert(0, 's');
|
ret.insert(0, 's');
|
||||||
}
|
}
|
||||||
|
ret
|
||||||
}
|
}
|
||||||
|
|
||||||
ret
|
#[cfg(feature = "experimental-parser")]
|
||||||
|
quote_str(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -106,24 +112,16 @@ pub fn escape_normal<'a>(s: &'a str, l: char, r: char, e: &str) -> Cow<'a, str>
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn escape_numeric<'a>(s: &'a str, l: char, r: char, e: &str) -> Cow<'a, str> {
|
pub fn escape_numeric<'a>(s: &'a str, l: char, r: char, e: &str) -> Cow<'a, str> {
|
||||||
// Presume this is numeric
|
|
||||||
let mut numeric = true;
|
|
||||||
// Loop over each character
|
// Loop over each character
|
||||||
for x in s.bytes() {
|
for (idx, x) in s.bytes().enumerate() {
|
||||||
|
// the first character is not allowed to be a digit.
|
||||||
|
if idx == 0 && x.is_ascii_digit() {
|
||||||
|
return Cow::Owned(format!("{l}{}{r}", s.replace(r, e)));
|
||||||
|
}
|
||||||
// Check if character is allowed
|
// Check if character is allowed
|
||||||
if !(x.is_ascii_alphanumeric() || x == b'_') {
|
if !(x.is_ascii_alphanumeric() || x == b'_') {
|
||||||
return Cow::Owned(format!("{l}{}{r}", s.replace(r, e)));
|
return Cow::Owned(format!("{l}{}{r}", s.replace(r, e)));
|
||||||
}
|
}
|
||||||
// Check if character is non-numeric
|
|
||||||
if !is_digit(x) {
|
|
||||||
numeric = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Output the id value
|
|
||||||
match numeric {
|
|
||||||
// This is numeric so escape it
|
|
||||||
true => Cow::Owned(format!("{l}{}{r}", s.replace(r, e))),
|
|
||||||
// No need to escape the value
|
|
||||||
_ => Cow::Borrowed(s),
|
|
||||||
}
|
}
|
||||||
|
Cow::Borrowed(s)
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,29 +48,6 @@ impl Expression {
|
||||||
r,
|
r,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// Augment an existing expression
|
|
||||||
pub(crate) fn augment(mut self, l: Value, o: Operator) -> Self {
|
|
||||||
match &mut self {
|
|
||||||
Self::Binary {
|
|
||||||
l: left,
|
|
||||||
o: op,
|
|
||||||
..
|
|
||||||
} if o.precedence() >= op.precedence() => match left {
|
|
||||||
Value::Expression(x) => {
|
|
||||||
*x.as_mut() = std::mem::take(x).augment(l, o);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
*left = Self::new(l, o, std::mem::take(left)).into();
|
|
||||||
self
|
|
||||||
}
|
|
||||||
},
|
|
||||||
e => {
|
|
||||||
let r = Value::from(std::mem::take(e));
|
|
||||||
Self::new(l, o, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Expression {
|
impl Expression {
|
||||||
|
@ -132,6 +109,8 @@ impl Expression {
|
||||||
let operand = v.compute(ctx, opt, txn, doc).await?;
|
let operand = v.compute(ctx, opt, txn, doc).await?;
|
||||||
return match o {
|
return match o {
|
||||||
Operator::Neg => fnc::operate::neg(operand),
|
Operator::Neg => fnc::operate::neg(operand),
|
||||||
|
// TODO: Check if it is a number?
|
||||||
|
Operator::Add => Ok(operand),
|
||||||
Operator::Not => fnc::operate::not(operand),
|
Operator::Not => fnc::operate::not(operand),
|
||||||
op => unreachable!("{op:?} is not a unary op"),
|
op => unreachable!("{op:?} is not a unary op"),
|
||||||
};
|
};
|
||||||
|
|
|
@ -2,13 +2,13 @@ use crate::ctx::Context;
|
||||||
use crate::dbs::{Options, Transaction};
|
use crate::dbs::{Options, Transaction};
|
||||||
use crate::doc::CursorDoc;
|
use crate::doc::CursorDoc;
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::sql::fmt::{fmt_separated_by, Fmt};
|
use crate::sql::{
|
||||||
use crate::sql::part::Next;
|
fmt::{fmt_separated_by, Fmt},
|
||||||
use crate::sql::part::Part;
|
part::Next,
|
||||||
use crate::sql::paths::{ID, IN, META, OUT};
|
paths::{ID, IN, META, OUT},
|
||||||
use crate::sql::value::Value;
|
Part, Value,
|
||||||
use md5::Digest;
|
};
|
||||||
use md5::Md5;
|
use md5::{Digest, Md5};
|
||||||
use revision::revisioned;
|
use revision::revisioned;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::fmt::{self, Display, Formatter};
|
use std::fmt::{self, Display, Formatter};
|
||||||
|
@ -73,6 +73,11 @@ impl From<&[Part]> for Idiom {
|
||||||
Self(v.to_vec())
|
Self(v.to_vec())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
impl From<Part> for Idiom {
|
||||||
|
fn from(v: Part) -> Self {
|
||||||
|
Self(vec![v])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Idiom {
|
impl Idiom {
|
||||||
/// Appends a part to the end of this Idiom
|
/// Appends a part to the end of this Idiom
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
use crate::sql::fmt::Fmt;
|
use crate::sql::{fmt::Fmt, Table};
|
||||||
use crate::sql::table::Table;
|
|
||||||
use revision::revisioned;
|
use revision::revisioned;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::fmt::{self, Display, Formatter};
|
use std::fmt::{self, Display, Formatter};
|
||||||
|
|
|
@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize};
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
|
|
||||||
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||||
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
||||||
#[revisioned(revision = 1)]
|
#[revisioned(revision = 1)]
|
||||||
pub enum Language {
|
pub enum Language {
|
||||||
|
@ -26,9 +26,9 @@ pub enum Language {
|
||||||
Turkish,
|
Turkish,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for Language {
|
impl Language {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
pub fn as_str(&self) -> &'static str {
|
||||||
f.write_str(match self {
|
match self {
|
||||||
Self::Arabic => "ARABIC",
|
Self::Arabic => "ARABIC",
|
||||||
Self::Danish => "DANISH",
|
Self::Danish => "DANISH",
|
||||||
Self::Dutch => "DUTCH",
|
Self::Dutch => "DUTCH",
|
||||||
|
@ -46,6 +46,12 @@ impl Display for Language {
|
||||||
Self::Swedish => "SWEDISH",
|
Self::Swedish => "SWEDISH",
|
||||||
Self::Tamil => "TAMIL",
|
Self::Tamil => "TAMIL",
|
||||||
Self::Turkish => "TURKISH",
|
Self::Turkish => "TURKISH",
|
||||||
})
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for Language {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
f.write_str(self.as_str())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -149,4 +149,4 @@ mod parser {
|
||||||
pub use crate::syn::*;
|
pub use crate::syn::*;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub use self::parser::{idiom, json, parse, subquery, thing, v1::ParseError, value};
|
pub use self::parser::{error::ParseError, idiom, json, parse, subquery, thing, value};
|
||||||
|
|
|
@ -39,6 +39,7 @@ pub struct Order {
|
||||||
pub random: bool,
|
pub random: bool,
|
||||||
pub collate: bool,
|
pub collate: bool,
|
||||||
pub numeric: bool,
|
pub numeric: bool,
|
||||||
|
/// true if the direction is ascending
|
||||||
pub direction: bool,
|
pub direction: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
use crate::sql::fmt::Pretty;
|
use crate::sql::fmt::Pretty;
|
||||||
use crate::sql::statement::{Statement, Statements};
|
|
||||||
use crate::sql::statements::{DefineStatement, RemoveStatement};
|
use crate::sql::statements::{DefineStatement, RemoveStatement};
|
||||||
|
use crate::sql::{Statement, Statements};
|
||||||
use derive::Store;
|
use derive::Store;
|
||||||
use revision::revisioned;
|
use revision::revisioned;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
|
@ -23,6 +23,12 @@ pub struct DefineScopeStatement {
|
||||||
pub comment: Option<Strand>,
|
pub comment: Option<Strand>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl DefineScopeStatement {
|
||||||
|
pub(crate) fn random_code() -> String {
|
||||||
|
rand::thread_rng().sample_iter(&Alphanumeric).take(128).map(char::from).collect::<String>()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl DefineScopeStatement {
|
impl DefineScopeStatement {
|
||||||
/// Process this type returning a computed simple Value
|
/// Process this type returning a computed simple Value
|
||||||
pub(crate) async fn compute(
|
pub(crate) async fn compute(
|
||||||
|
@ -46,10 +52,6 @@ impl DefineScopeStatement {
|
||||||
// Ok all good
|
// Ok all good
|
||||||
Ok(Value::None)
|
Ok(Value::None)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn random_code() -> String {
|
|
||||||
rand::thread_rng().sample_iter(&Alphanumeric).take(128).map(char::from).collect::<String>()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for DefineScopeStatement {
|
impl Display for DefineScopeStatement {
|
||||||
|
|
|
@ -47,6 +47,31 @@ impl From<(Base, &str, &str)> for DefineUserStatement {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DefineUserStatement {
|
impl DefineUserStatement {
|
||||||
|
pub(crate) fn from_parsed_values(name: Ident, base: Base, roles: Vec<Ident>) -> Self {
|
||||||
|
DefineUserStatement {
|
||||||
|
name,
|
||||||
|
base,
|
||||||
|
roles, // New users get the viewer role by default
|
||||||
|
code: rand::thread_rng()
|
||||||
|
.sample_iter(&Alphanumeric)
|
||||||
|
.take(128)
|
||||||
|
.map(char::from)
|
||||||
|
.collect::<String>(),
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn set_password(&mut self, password: &str) {
|
||||||
|
self.hash = Argon2::default()
|
||||||
|
.hash_password(password.as_bytes(), &SaltString::generate(&mut OsRng))
|
||||||
|
.unwrap()
|
||||||
|
.to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn set_passhash(&mut self, passhash: String) {
|
||||||
|
self.hash = passhash;
|
||||||
|
}
|
||||||
|
|
||||||
/// Process this type returning a computed simple Value
|
/// Process this type returning a computed simple Value
|
||||||
pub(crate) async fn compute(
|
pub(crate) async fn compute(
|
||||||
&self,
|
&self,
|
||||||
|
|
|
@ -13,7 +13,9 @@ use std::fmt::{self, Display, Write};
|
||||||
#[revisioned(revision = 1)]
|
#[revisioned(revision = 1)]
|
||||||
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
||||||
pub struct IfelseStatement {
|
pub struct IfelseStatement {
|
||||||
|
/// The first if condition followed by a body, followed by any number of else if's
|
||||||
pub exprs: Vec<(Value, Value)>,
|
pub exprs: Vec<(Value, Value)>,
|
||||||
|
/// the final else body, if there is one
|
||||||
pub close: Option<Value>,
|
pub close: Option<Value>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,8 @@ use revision::revisioned;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
|
use crate::sql::escape::escape_ident;
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Store, Hash)]
|
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Store, Hash)]
|
||||||
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
||||||
#[revisioned(revision = 1)]
|
#[revisioned(revision = 1)]
|
||||||
|
@ -15,9 +17,11 @@ impl fmt::Display for UseStatement {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
f.write_str("USE")?;
|
f.write_str("USE")?;
|
||||||
if let Some(ref ns) = self.ns {
|
if let Some(ref ns) = self.ns {
|
||||||
|
let ns = escape_ident(ns);
|
||||||
write!(f, " NS {ns}")?;
|
write!(f, " NS {ns}")?;
|
||||||
}
|
}
|
||||||
if let Some(ref db) = self.db {
|
if let Some(ref db) = self.db {
|
||||||
|
let db = escape_ident(db);
|
||||||
write!(f, " DB {db}")?;
|
write!(f, " DB {db}")?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
@ -130,7 +130,7 @@ pub(crate) mod no_nul_bytes {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
|
|
||||||
#[cfg(not(feature = "experimental_parser"))]
|
#[cfg(not(feature = "experimental-parser"))]
|
||||||
#[test]
|
#[test]
|
||||||
fn ensure_strands_are_prefixed() {
|
fn ensure_strands_are_prefixed() {
|
||||||
use super::Strand;
|
use super::Strand;
|
||||||
|
|
|
@ -75,7 +75,7 @@ impl TryFrom<Strand> for Thing {
|
||||||
impl TryFrom<&str> for Thing {
|
impl TryFrom<&str> for Thing {
|
||||||
type Error = ();
|
type Error = ();
|
||||||
fn try_from(v: &str) -> Result<Self, Self::Error> {
|
fn try_from(v: &str) -> Result<Self, Self::Error> {
|
||||||
match syn::thing_raw(v) {
|
match syn::thing(v) {
|
||||||
Ok(v) => Ok(v),
|
Ok(v) => Ok(v),
|
||||||
_ => Err(()),
|
_ => Err(()),
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,7 @@ impl Value {
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn changed_none() {
|
fn changed_none() {
|
||||||
|
|
|
@ -12,7 +12,7 @@ impl Value {
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn clear_value() {
|
async fn clear_value() {
|
||||||
|
|
|
@ -92,7 +92,7 @@ mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn compare_none() {
|
fn compare_none() {
|
||||||
|
|
|
@ -97,7 +97,7 @@ mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn cut_none() {
|
async fn cut_none() {
|
||||||
|
|
|
@ -30,7 +30,7 @@ mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn decrement_none() {
|
async fn decrement_none() {
|
||||||
|
|
|
@ -41,7 +41,7 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::dbs::test::mock;
|
use crate::dbs::test::mock;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn decrement_none() {
|
async fn decrement_none() {
|
||||||
|
|
|
@ -201,7 +201,7 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::dbs::test::mock;
|
use crate::dbs::test::mock;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn del_none() {
|
async fn del_none() {
|
||||||
|
|
|
@ -78,7 +78,7 @@ impl Value {
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn diff_none() {
|
fn diff_none() {
|
||||||
|
|
|
@ -59,7 +59,7 @@ mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn each_none() {
|
fn each_none() {
|
||||||
|
|
|
@ -53,7 +53,7 @@ mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn every_with_empty_objects_arrays() {
|
fn every_with_empty_objects_arrays() {
|
||||||
|
|
|
@ -34,7 +34,7 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::dbs::test::mock;
|
use crate::dbs::test::mock;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn extend_array_value() {
|
async fn extend_array_value() {
|
||||||
|
|
|
@ -250,7 +250,7 @@ mod tests {
|
||||||
use crate::sql::id::Id;
|
use crate::sql::id::Id;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::sql::thing::Thing;
|
use crate::sql::thing::Thing;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn get_none() {
|
async fn get_none() {
|
||||||
|
|
|
@ -30,7 +30,7 @@ mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn increment_none() {
|
async fn increment_none() {
|
||||||
|
|
|
@ -42,7 +42,7 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::dbs::test::mock;
|
use crate::dbs::test::mock;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn increment_none() {
|
async fn increment_none() {
|
||||||
|
|
|
@ -24,7 +24,7 @@ impl Value {
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn merge_none() {
|
async fn merge_none() {
|
||||||
|
|
|
@ -86,7 +86,7 @@ impl Value {
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn patch_add_simple() {
|
async fn patch_add_simple() {
|
||||||
|
|
|
@ -54,7 +54,7 @@ mod tests {
|
||||||
use crate::sql::id::Id;
|
use crate::sql::id::Id;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::sql::thing::Thing;
|
use crate::sql::thing::Thing;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn pick_none() {
|
fn pick_none() {
|
||||||
|
|
|
@ -87,7 +87,7 @@ mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn put_none() {
|
async fn put_none() {
|
||||||
|
|
|
@ -19,7 +19,7 @@ impl Value {
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn replace() {
|
async fn replace() {
|
||||||
|
|
|
@ -13,7 +13,7 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::sql::id::Id;
|
use crate::sql::id::Id;
|
||||||
use crate::sql::thing::Thing;
|
use crate::sql::thing::Thing;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn rid_none() {
|
async fn rid_none() {
|
||||||
|
|
|
@ -692,7 +692,7 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn duration() {
|
fn duration() {
|
||||||
let duration = Duration::default();
|
let duration = Duration::default();
|
||||||
let value = to_value(&duration).unwrap();
|
let value = to_value(duration).unwrap();
|
||||||
let expected = Value::Duration(duration);
|
let expected = Value::Duration(duration);
|
||||||
assert_eq!(value, expected);
|
assert_eq!(value, expected);
|
||||||
assert_eq!(expected, to_value(&expected).unwrap());
|
assert_eq!(expected, to_value(&expected).unwrap());
|
||||||
|
|
|
@ -159,7 +159,7 @@ mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::dbs::test::mock;
|
use crate::dbs::test::mock;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn set_none() {
|
async fn set_none() {
|
||||||
|
|
|
@ -1087,7 +1087,8 @@ impl Value {
|
||||||
| Value::Array(_)
|
| Value::Array(_)
|
||||||
| Value::Param(_)
|
| Value::Param(_)
|
||||||
| Value::Edges(_)
|
| Value::Edges(_)
|
||||||
| Value::Thing(_) => true,
|
| Value::Thing(_)
|
||||||
|
| Value::Table(_) => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2774,7 +2775,7 @@ mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::sql::uuid::Uuid;
|
use crate::sql::uuid::Uuid;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn check_none() {
|
fn check_none() {
|
||||||
|
|
|
@ -62,7 +62,7 @@ mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::sql::idiom::Idiom;
|
use crate::sql::idiom::Idiom;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn walk_blank() {
|
fn walk_blank() {
|
||||||
|
|
|
@ -1,3 +1,8 @@
|
||||||
|
#[cfg(feature = "experimental-parser")]
|
||||||
|
use super::v2::token::Span;
|
||||||
|
#[cfg(feature = "experimental-parser")]
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
/// A human readable location inside a string.
|
/// A human readable location inside a string.
|
||||||
///
|
///
|
||||||
/// Locations are 1 indexed, the first character on the first line being on line 1 column 1.
|
/// Locations are 1 indexed, the first character on the first line being on line 1 column 1.
|
||||||
|
@ -19,10 +24,9 @@ impl Location {
|
||||||
.expect("tried to find location of substring in unrelated string");
|
.expect("tried to find location of substring in unrelated string");
|
||||||
// Bytes of input prior to line being iteratated.
|
// Bytes of input prior to line being iteratated.
|
||||||
let mut bytes_prior = 0;
|
let mut bytes_prior = 0;
|
||||||
for (line_idx, line) in input.split('\n').enumerate() {
|
for (line_idx, (line, seperator_offset)) in LineIterator::new(input).enumerate() {
|
||||||
// +1 for the '\n'
|
let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize;
|
||||||
let bytes_so_far = bytes_prior + line.len() + 1;
|
if bytes_so_far >= offset {
|
||||||
if bytes_so_far > offset {
|
|
||||||
// found line.
|
// found line.
|
||||||
let line_offset = offset - bytes_prior;
|
let line_offset = offset - bytes_prior;
|
||||||
let column = line[..line_offset].chars().count();
|
let column = line[..line_offset].chars().count();
|
||||||
|
@ -37,16 +41,13 @@ impl Location {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "experimental_parser")]
|
#[cfg(feature = "experimental-parser")]
|
||||||
pub fn of_span_start(source: &str, span: Span) -> Self {
|
pub fn of_offset(source: &str, offset: usize) -> Self {
|
||||||
// Bytes of input before substr.
|
|
||||||
let offset = span.offset as usize;
|
|
||||||
// Bytes of input prior to line being iteratated.
|
// Bytes of input prior to line being iteratated.
|
||||||
let mut bytes_prior = 0;
|
let mut bytes_prior = 0;
|
||||||
for (line_idx, line) in source.split('\n').enumerate() {
|
for (line_idx, (line, seperator_offset)) in LineIterator::new(source).enumerate() {
|
||||||
// +1 for the '\n'
|
let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize;
|
||||||
let bytes_so_far = bytes_prior + line.len() + 1;
|
if bytes_so_far >= offset {
|
||||||
if bytes_so_far > offset {
|
|
||||||
// found line.
|
// found line.
|
||||||
let line_offset = offset - bytes_prior;
|
let line_offset = offset - bytes_prior;
|
||||||
let column = line[..line_offset].chars().count();
|
let column = line[..line_offset].chars().count();
|
||||||
|
@ -61,31 +62,22 @@ impl Location {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "experimental_parser")]
|
#[cfg(feature = "experimental-parser")]
|
||||||
|
pub fn of_span_start(source: &str, span: Span) -> Self {
|
||||||
|
// Bytes of input before substr.
|
||||||
|
|
||||||
|
let offset = span.offset as usize;
|
||||||
|
Self::of_offset(source, offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "experimental-parser")]
|
||||||
pub fn of_span_end(source: &str, span: Span) -> Self {
|
pub fn of_span_end(source: &str, span: Span) -> Self {
|
||||||
// Bytes of input before substr.
|
// Bytes of input before substr.
|
||||||
let offset = span.offset as usize + span.len as usize;
|
let offset = span.offset as usize + span.len as usize;
|
||||||
// Bytes of input prior to line being iteratated.
|
Self::of_offset(source, offset)
|
||||||
let mut bytes_prior = 0;
|
|
||||||
for (line_idx, line) in source.split('\n').enumerate() {
|
|
||||||
// +1 for the '\n'
|
|
||||||
let bytes_so_far = bytes_prior + line.len() + 1;
|
|
||||||
if bytes_so_far > offset {
|
|
||||||
// found line.
|
|
||||||
let line_offset = offset - bytes_prior;
|
|
||||||
let column = line[..line_offset].chars().count();
|
|
||||||
// +1 because line and column are 1 index.
|
|
||||||
return Self {
|
|
||||||
line: line_idx + 1,
|
|
||||||
column: column + 1,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
bytes_prior = bytes_so_far;
|
|
||||||
}
|
|
||||||
unreachable!()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "experimental_parser")]
|
#[cfg(feature = "experimental-parser")]
|
||||||
pub fn range_of_span(source: &str, span: Span) -> Range<Self> {
|
pub fn range_of_span(source: &str, span: Span) -> Range<Self> {
|
||||||
// Bytes of input before substr.
|
// Bytes of input before substr.
|
||||||
let offset = span.offset as usize;
|
let offset = span.offset as usize;
|
||||||
|
@ -93,19 +85,18 @@ impl Location {
|
||||||
|
|
||||||
// Bytes of input prior to line being iteratated.
|
// Bytes of input prior to line being iteratated.
|
||||||
let mut bytes_prior = 0;
|
let mut bytes_prior = 0;
|
||||||
let mut iterator = source.split('\n').enumerate();
|
let mut iterator = LineIterator::new(source).enumerate();
|
||||||
let start = loop {
|
let start = loop {
|
||||||
let Some((line_idx, line)) = iterator.next() else {
|
let Some((line_idx, (line, seperator_offset))) = iterator.next() else {
|
||||||
panic!("tried to find location of span not belonging to string");
|
panic!("tried to find location of span not belonging to string");
|
||||||
};
|
};
|
||||||
// +1 for the '\n'
|
let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize;
|
||||||
let bytes_so_far = bytes_prior + line.len() + 1;
|
if bytes_so_far >= offset {
|
||||||
if bytes_so_far > offset {
|
|
||||||
// found line.
|
// found line.
|
||||||
let line_offset = offset - bytes_prior;
|
let line_offset = offset - bytes_prior;
|
||||||
let column = line[..line_offset].chars().count();
|
let column = line[..line_offset].chars().count();
|
||||||
// +1 because line and column are 1 index.
|
// +1 because line and column are 1 index.
|
||||||
if bytes_so_far > end {
|
if bytes_so_far >= end {
|
||||||
// end is on the same line, finish immediatly.
|
// end is on the same line, finish immediatly.
|
||||||
let line_offset = end - bytes_prior;
|
let line_offset = end - bytes_prior;
|
||||||
let end_column = line[..line_offset].chars().count();
|
let end_column = line[..line_offset].chars().count();
|
||||||
|
@ -127,12 +118,11 @@ impl Location {
|
||||||
};
|
};
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let Some((line_idx, line)) = iterator.next() else {
|
let Some((line_idx, (line, seperator_offset))) = iterator.next() else {
|
||||||
panic!("tried to find location of span not belonging to string");
|
panic!("tried to find location of span not belonging to string");
|
||||||
};
|
};
|
||||||
// +1 for the '\n'
|
let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize;
|
||||||
let bytes_so_far = bytes_prior + line.len() + 1;
|
if bytes_so_far >= end {
|
||||||
if bytes_so_far > end {
|
|
||||||
let line_offset = end - bytes_prior;
|
let line_offset = end - bytes_prior;
|
||||||
let column = line[..line_offset].chars().count();
|
let column = line[..line_offset].chars().count();
|
||||||
return start..Self {
|
return start..Self {
|
||||||
|
@ -143,3 +133,93 @@ impl Location {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct LineIterator<'a> {
|
||||||
|
current: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> LineIterator<'a> {
|
||||||
|
pub fn new(s: &'a str) -> Self {
|
||||||
|
LineIterator {
|
||||||
|
current: s,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for LineIterator<'a> {
|
||||||
|
type Item = (&'a str, Option<u8>);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
if self.current.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let bytes = self.current.as_bytes();
|
||||||
|
for i in 0..bytes.len() {
|
||||||
|
match bytes[i] {
|
||||||
|
b'\r' => {
|
||||||
|
if let Some(b'\n') = bytes.get(i + 1) {
|
||||||
|
let res = &self.current[..i];
|
||||||
|
self.current = &self.current[i + 2..];
|
||||||
|
return Some((res, Some(2)));
|
||||||
|
}
|
||||||
|
let res = &self.current[..i];
|
||||||
|
self.current = &self.current[i + 1..];
|
||||||
|
return Some((res, Some(1)));
|
||||||
|
}
|
||||||
|
0xb | 0xC | b'\n' => {
|
||||||
|
// vertical tab VT and form feed FF.
|
||||||
|
let res = &self.current[..i];
|
||||||
|
self.current = &self.current[i + 1..];
|
||||||
|
return Some((res, Some(1)));
|
||||||
|
}
|
||||||
|
0xc2 => {
|
||||||
|
// next line NEL
|
||||||
|
if bytes.get(i + 1).copied() != Some(0x85) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let res = &self.current[..i];
|
||||||
|
self.current = &self.current[i + 2..];
|
||||||
|
return Some((res, Some(2)));
|
||||||
|
}
|
||||||
|
0xe2 => {
|
||||||
|
// line separator and paragraph seperator.
|
||||||
|
if bytes.get(i + 1).copied() != Some(0x80) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let next_byte = bytes.get(i + 2).copied();
|
||||||
|
if next_byte != Some(0xA8) && next_byte != Some(0xA9) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// vertical tab VT, next line NEL and form feed FF.
|
||||||
|
let res = &self.current[..i];
|
||||||
|
self.current = &self.current[i + 3..];
|
||||||
|
return Some((res, Some(3)));
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some((std::mem::take(&mut self.current), None))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::LineIterator;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_line_iterator() {
|
||||||
|
let lines = "foo\nbar\r\nfoo\rbar\u{000B}foo\u{000C}bar\u{0085}foo\u{2028}bar\u{2029}\n";
|
||||||
|
let mut iterator = LineIterator::new(lines);
|
||||||
|
assert_eq!(iterator.next(), Some(("foo", Some(1))));
|
||||||
|
assert_eq!(iterator.next(), Some(("bar", Some(2))));
|
||||||
|
assert_eq!(iterator.next(), Some(("foo", Some(1))));
|
||||||
|
assert_eq!(iterator.next(), Some(("bar", Some(1))));
|
||||||
|
assert_eq!(iterator.next(), Some(("foo", Some(1))));
|
||||||
|
assert_eq!(iterator.next(), Some(("bar", Some(2))));
|
||||||
|
assert_eq!(iterator.next(), Some(("foo", Some(3))));
|
||||||
|
assert_eq!(iterator.next(), Some(("bar", Some(3))));
|
||||||
|
assert_eq!(iterator.next(), Some(("", Some(1))));
|
||||||
|
assert_eq!(iterator.next(), None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -2,6 +2,9 @@ use std::{fmt, ops::Range};
|
||||||
|
|
||||||
use super::common::Location;
|
use super::common::Location;
|
||||||
|
|
||||||
|
mod nom_error;
|
||||||
|
pub use nom_error::ParseError;
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct RenderedError {
|
pub struct RenderedError {
|
||||||
pub text: String,
|
pub text: String,
|
|
@ -5,19 +5,12 @@ use crate::syn::{
|
||||||
use nom::error::ErrorKind;
|
use nom::error::ErrorKind;
|
||||||
use nom::error::FromExternalError;
|
use nom::error::FromExternalError;
|
||||||
use nom::error::ParseError as NomParseError;
|
use nom::error::ParseError as NomParseError;
|
||||||
use nom::Err;
|
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
use std::num::ParseFloatError;
|
use std::num::ParseFloatError;
|
||||||
use std::num::ParseIntError;
|
use std::num::ParseIntError;
|
||||||
use std::ops::Bound;
|
use std::ops::Bound;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
mod utils;
|
|
||||||
pub use utils::*;
|
|
||||||
mod render;
|
|
||||||
|
|
||||||
pub type IResult<I, O, E = ParseError<I>> = Result<(I, O), Err<E>>;
|
|
||||||
|
|
||||||
#[derive(Error, Debug, Clone)]
|
#[derive(Error, Debug, Clone)]
|
||||||
pub enum ParseError<I> {
|
pub enum ParseError<I> {
|
||||||
Base(I),
|
Base(I),
|
|
@ -3,11 +3,20 @@
|
||||||
pub mod common;
|
pub mod common;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
|
|
||||||
|
#[cfg(not(feature = "experimental-parser"))]
|
||||||
pub mod v1;
|
pub mod v1;
|
||||||
pub use v1::{
|
#[cfg(not(feature = "experimental-parser"))]
|
||||||
datetime, datetime_raw, duration, idiom, json, parse, path_like, range, subquery, thing,
|
pub use v1::{datetime_raw, duration, idiom, json, parse, range, subquery, thing, value};
|
||||||
thing_raw, value,
|
|
||||||
|
#[cfg(feature = "experimental-parser")]
|
||||||
|
pub mod v2;
|
||||||
|
#[cfg(feature = "experimental-parser")]
|
||||||
|
pub use v2::{
|
||||||
|
datetime_raw, duration, idiom, json, json_legacy_strand, parse, range, subquery, thing, value,
|
||||||
|
value_legacy_strand,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub mod test;
|
pub trait Parse<T> {
|
||||||
|
fn parse(val: &str) -> T;
|
||||||
|
}
|
||||||
|
|
|
@ -1,50 +0,0 @@
|
||||||
pub(crate) use super::v1::builtin::builtin_name;
|
|
||||||
use crate::sql::{Array, Expression, Idiom, Param, Script, Thing, Value};
|
|
||||||
|
|
||||||
use super::v1::test::*;
|
|
||||||
|
|
||||||
pub trait Parse<T> {
|
|
||||||
fn parse(val: &str) -> T;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Parse<Self> for Value {
|
|
||||||
fn parse(val: &str) -> Self {
|
|
||||||
value(val).unwrap().1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Parse<Self> for Array {
|
|
||||||
fn parse(val: &str) -> Self {
|
|
||||||
array(val).unwrap().1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Parse<Self> for Param {
|
|
||||||
fn parse(val: &str) -> Self {
|
|
||||||
param(val).unwrap().1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Parse<Self> for Idiom {
|
|
||||||
fn parse(val: &str) -> Self {
|
|
||||||
idiom(val).unwrap().1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Parse<Self> for Script {
|
|
||||||
fn parse(val: &str) -> Self {
|
|
||||||
script(val).unwrap().1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Parse<Self> for Thing {
|
|
||||||
fn parse(val: &str) -> Self {
|
|
||||||
thing(val).unwrap().1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Parse<Self> for Expression {
|
|
||||||
fn parse(val: &str) -> Self {
|
|
||||||
expression(val).unwrap().1
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,7 +1,7 @@
|
||||||
use super::{IResult, ParseError};
|
pub use crate::syn::error::ParseError;
|
||||||
use nom::bytes::complete::tag_no_case;
|
use nom::{bytes::complete::tag_no_case, Err, Parser};
|
||||||
use nom::Err;
|
|
||||||
use nom::Parser;
|
pub type IResult<I, O, E = ParseError<I>> = Result<(I, O), Err<E>>;
|
||||||
|
|
||||||
pub fn expected<I, O, P>(expect: &'static str, mut parser: P) -> impl FnMut(I) -> IResult<I, O>
|
pub fn expected<I, O, P>(expect: &'static str, mut parser: P) -> impl FnMut(I) -> IResult<I, O>
|
||||||
where
|
where
|
|
@ -1 +0,0 @@
|
||||||
|
|
|
@ -7,7 +7,7 @@ use super::{
|
||||||
value::single,
|
value::single,
|
||||||
IResult,
|
IResult,
|
||||||
};
|
};
|
||||||
use crate::sql::{Cast, Expression, Future};
|
use crate::sql::{Cast, Expression, Future, Operator, Value};
|
||||||
use nom::{bytes::complete::tag, character::complete::char, combinator::cut, sequence::delimited};
|
use nom::{bytes::complete::tag, character::complete::char, combinator::cut, sequence::delimited};
|
||||||
|
|
||||||
pub fn cast(i: &str) -> IResult<&str, Cast> {
|
pub fn cast(i: &str) -> IResult<&str, Cast> {
|
||||||
|
@ -30,10 +30,32 @@ pub fn unary(i: &str) -> IResult<&str, Expression> {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Augment an existing expression
|
||||||
|
pub(crate) fn augment(mut this: Expression, l: Value, o: Operator) -> Expression {
|
||||||
|
match &mut this {
|
||||||
|
Expression::Binary {
|
||||||
|
l: left,
|
||||||
|
o: op,
|
||||||
|
..
|
||||||
|
} if o.precedence() >= op.precedence() => match left {
|
||||||
|
Value::Expression(x) => {
|
||||||
|
*x.as_mut() = augment(std::mem::take(x), l, o);
|
||||||
|
this
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
*left = Expression::new(l, o, std::mem::take(left)).into();
|
||||||
|
this
|
||||||
|
}
|
||||||
|
},
|
||||||
|
e => {
|
||||||
|
let r = Value::from(std::mem::take(e));
|
||||||
|
Expression::new(l, o, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub fn binary(i: &str) -> IResult<&str, Expression> {
|
pub fn binary(i: &str) -> IResult<&str, Expression> {
|
||||||
use crate::sql::Value;
|
|
||||||
|
|
||||||
use super::depth;
|
use super::depth;
|
||||||
use super::value;
|
use super::value;
|
||||||
|
|
||||||
|
@ -43,7 +65,7 @@ pub fn binary(i: &str) -> IResult<&str, Expression> {
|
||||||
let _diving = depth::dive(i)?;
|
let _diving = depth::dive(i)?;
|
||||||
let (i, r) = value::value(i)?;
|
let (i, r) = value::value(i)?;
|
||||||
let v = match r {
|
let v = match r {
|
||||||
Value::Expression(r) => r.augment(l, o),
|
Value::Expression(r) => augment(*r, l, o),
|
||||||
_ => Expression::new(l, o, r),
|
_ => Expression::new(l, o, r),
|
||||||
};
|
};
|
||||||
Ok((i, v))
|
Ok((i, v))
|
||||||
|
|
|
@ -189,7 +189,7 @@ mod tests {
|
||||||
use super::super::builtin::{builtin_name, BuiltinName};
|
use super::super::builtin::{builtin_name, BuiltinName};
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::sql::Value;
|
use crate::sql::Value;
|
||||||
use crate::syn::{self, test::Parse};
|
use crate::syn::{self, Parse};
|
||||||
|
|
||||||
fn function(i: &str) -> IResult<&str, Function> {
|
fn function(i: &str) -> IResult<&str, Function> {
|
||||||
alt((defined_function, |i| {
|
alt((defined_function, |i| {
|
||||||
|
|
|
@ -275,9 +275,8 @@ pub fn bracketed_value(i: &str) -> IResult<&str, Part> {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use crate::sql::{Dir, Expression, Id, Number, Param, Strand, Table, Thing};
|
use crate::sql::{Dir, Expression, Id, Number, Param, Strand, Table, Thing};
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
|
|
@ -29,8 +29,8 @@ fn datetime_single(i: &str) -> IResult<&str, Datetime> {
|
||||||
|
|
||||||
fn datetime_double(i: &str) -> IResult<&str, Datetime> {
|
fn datetime_double(i: &str) -> IResult<&str, Datetime> {
|
||||||
alt((
|
alt((
|
||||||
delimited(tag("d\""), cut(datetime_raw), cut(char('\"'))),
|
delimited(tag("d\""), cut(datetime_raw), cut(char('"'))),
|
||||||
delimited(char('\"'), datetime_raw, char('\"')),
|
delimited(char('"'), datetime_raw, char('"')),
|
||||||
))(i)
|
))(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -194,7 +194,7 @@ mod tests {
|
||||||
|
|
||||||
// use chrono::Date;
|
// use chrono::Date;
|
||||||
|
|
||||||
use crate::{sql::Value, syn::test::Parse};
|
use crate::{sql::Value, syn::Parse};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
|
|
@ -108,7 +108,7 @@ pub fn tables(i: &str) -> IResult<&str, Tables> {
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn ident_normal() {
|
fn ident_normal() {
|
||||||
|
|
|
@ -163,7 +163,7 @@ fn char_unicode_bracketed(i: &str) -> IResult<&str, char> {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use crate::{sql::Value, syn::test::Parse};
|
use crate::{sql::Value, syn::Parse};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
|
|
@ -54,7 +54,7 @@ fn uuid_raw(i: &str) -> IResult<&str, Uuid> {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use crate::{sql::Value, syn::test::Parse};
|
use crate::{sql::Value, syn::Parse};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ mod part;
|
||||||
mod stmt;
|
mod stmt;
|
||||||
|
|
||||||
mod block;
|
mod block;
|
||||||
pub(crate) mod builtin;
|
mod builtin;
|
||||||
mod comment;
|
mod comment;
|
||||||
mod common;
|
mod common;
|
||||||
mod depth;
|
mod depth;
|
||||||
|
@ -79,10 +79,6 @@ pub fn idiom(input: &str) -> Result<Idiom, Error> {
|
||||||
parse_impl(input, idiom::plain)
|
parse_impl(input, idiom::plain)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn datetime(input: &str) -> Result<Datetime, Error> {
|
|
||||||
parse_impl(input, literal::datetime)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn datetime_raw(input: &str) -> Result<Datetime, Error> {
|
pub fn datetime_raw(input: &str) -> Result<Datetime, Error> {
|
||||||
parse_impl(input, literal::datetime_all_raw)
|
parse_impl(input, literal::datetime_all_raw)
|
||||||
}
|
}
|
||||||
|
@ -91,20 +87,12 @@ pub fn duration(input: &str) -> Result<Duration, Error> {
|
||||||
parse_impl(input, literal::duration)
|
parse_impl(input, literal::duration)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn path_like(input: &str) -> Result<Value, Error> {
|
|
||||||
parse_impl(input, value::path_like)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn range(input: &str) -> Result<Range, Error> {
|
pub fn range(input: &str) -> Result<Range, Error> {
|
||||||
parse_impl(input, literal::range)
|
parse_impl(input, literal::range)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses a SurrealQL [`Thing`]
|
/// Parses a SurrealQL [`Thing`]
|
||||||
pub fn thing(input: &str) -> Result<Thing, Error> {
|
pub fn thing(input: &str) -> Result<Thing, Error> {
|
||||||
parse_impl(input, thing::thing)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn thing_raw(input: &str) -> Result<Thing, Error> {
|
|
||||||
parse_impl(input, thing::thing_raw)
|
parse_impl(input, thing::thing_raw)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -149,6 +149,7 @@ pub fn knn_distance(i: &str) -> IResult<&str, Distance> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn knn(i: &str) -> IResult<&str, Operator> {
|
pub fn knn(i: &str) -> IResult<&str, Operator> {
|
||||||
|
let (i, _) = opt(tag_no_case("knn"))(i)?;
|
||||||
let (i, _) = char('<')(i)?;
|
let (i, _) = char('<')(i)?;
|
||||||
let (i, k) = u32(i)?;
|
let (i, k) = u32(i)?;
|
||||||
let (i, dist) = opt(knn_distance)(i)?;
|
let (i, dist) = opt(knn_distance)(i)?;
|
||||||
|
@ -228,4 +229,13 @@ mod tests {
|
||||||
assert_eq!("<3,EUCLIDEAN>", format!("{}", out));
|
assert_eq!("<3,EUCLIDEAN>", format!("{}", out));
|
||||||
assert_eq!(out, Operator::Knn(3, Some(Distance::Euclidean)));
|
assert_eq!(out, Operator::Knn(3, Some(Distance::Euclidean)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_knn_with_prefix() {
|
||||||
|
let res = knn("knn<5>");
|
||||||
|
assert!(res.is_ok());
|
||||||
|
let out = res.unwrap().1;
|
||||||
|
assert_eq!("<5>", format!("{}", out));
|
||||||
|
assert_eq!(out, Operator::Knn(5, None));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -72,7 +72,6 @@ pub fn single(i: &str) -> IResult<&str, Data> {
|
||||||
|
|
||||||
pub fn values(i: &str) -> IResult<&str, Data> {
|
pub fn values(i: &str) -> IResult<&str, Data> {
|
||||||
let (i, _) = tag_no_case("(")(i)?;
|
let (i, _) = tag_no_case("(")(i)?;
|
||||||
// TODO: look at call tree here.
|
|
||||||
let (i, fields) = separated_list1(commas, plain)(i)?;
|
let (i, fields) = separated_list1(commas, plain)(i)?;
|
||||||
let (i, _) = tag_no_case(")")(i)?;
|
let (i, _) = tag_no_case(")")(i)?;
|
||||||
let (i, _) = shouldbespace(i)?;
|
let (i, _) = shouldbespace(i)?;
|
||||||
|
|
|
@ -6,7 +6,6 @@ use super::{
|
||||||
literal::{datetime, duration, ident, table, tables},
|
literal::{datetime, duration, ident, table, tables},
|
||||||
operator::dir,
|
operator::dir,
|
||||||
thing::thing,
|
thing::thing,
|
||||||
// TODO: go through and check every import for alias.
|
|
||||||
value::value,
|
value::value,
|
||||||
IResult,
|
IResult,
|
||||||
};
|
};
|
||||||
|
@ -238,7 +237,7 @@ mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::sql::{Datetime, Idiom, Value};
|
use crate::sql::{Datetime, Idiom, Value};
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
use std::time;
|
use std::time;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -121,7 +121,7 @@ fn rule(i: &str) -> IResult<&str, Vec<(PermissionKind, Permission)>> {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use crate::sql::{Expression, Value};
|
use crate::sql::{Expression, Value};
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,7 @@ fn split_raw(i: &str) -> IResult<&str, Split> {
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::{sql::Idiom, syn::test::Parse};
|
use crate::{sql::Idiom, syn::Parse};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn split_statement() {
|
fn split_statement() {
|
||||||
|
|
|
@ -11,7 +11,6 @@ use crate::{
|
||||||
iam::Role,
|
iam::Role,
|
||||||
sql::{statements::DefineUserStatement, Ident, Strand},
|
sql::{statements::DefineUserStatement, Ident, Strand},
|
||||||
};
|
};
|
||||||
use argon2::{password_hash::SaltString, Argon2, PasswordHasher};
|
|
||||||
use nom::{
|
use nom::{
|
||||||
branch::alt,
|
branch::alt,
|
||||||
bytes::complete::tag_no_case,
|
bytes::complete::tag_no_case,
|
||||||
|
@ -19,7 +18,6 @@ use nom::{
|
||||||
multi::{many0, separated_list1},
|
multi::{many0, separated_list1},
|
||||||
Err,
|
Err,
|
||||||
};
|
};
|
||||||
use rand::{distributions::Alphanumeric, rngs::OsRng, Rng};
|
|
||||||
|
|
||||||
pub fn user(i: &str) -> IResult<&str, DefineUserStatement> {
|
pub fn user(i: &str) -> IResult<&str, DefineUserStatement> {
|
||||||
let (i, _) = tag_no_case("USER")(i)?;
|
let (i, _) = tag_no_case("USER")(i)?;
|
||||||
|
@ -35,28 +33,19 @@ pub fn user(i: &str) -> IResult<&str, DefineUserStatement> {
|
||||||
Ok((i, (name, base, opts)))
|
Ok((i, (name, base, opts)))
|
||||||
})(i)?;
|
})(i)?;
|
||||||
// Create the base statement
|
// Create the base statement
|
||||||
let mut res = DefineUserStatement {
|
let mut res = DefineUserStatement::from_parsed_values(
|
||||||
name,
|
name,
|
||||||
base,
|
base,
|
||||||
roles: vec!["Viewer".into()], // New users get the viewer role by default
|
vec!["Viewer".into()], // New users get the viewer role by default
|
||||||
code: rand::thread_rng()
|
);
|
||||||
.sample_iter(&Alphanumeric)
|
|
||||||
.take(128)
|
|
||||||
.map(char::from)
|
|
||||||
.collect::<String>(),
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
// Assign any defined options
|
// Assign any defined options
|
||||||
for opt in opts {
|
for opt in opts {
|
||||||
match opt {
|
match opt {
|
||||||
DefineUserOption::Password(v) => {
|
DefineUserOption::Password(v) => {
|
||||||
res.hash = Argon2::default()
|
res.set_password(&v);
|
||||||
.hash_password(v.as_ref(), &SaltString::generate(&mut OsRng))
|
|
||||||
.unwrap()
|
|
||||||
.to_string()
|
|
||||||
}
|
}
|
||||||
DefineUserOption::Passhash(v) => {
|
DefineUserOption::Passhash(v) => {
|
||||||
res.hash = v;
|
res.set_passhash(v);
|
||||||
}
|
}
|
||||||
DefineUserOption::Roles(v) => {
|
DefineUserOption::Roles(v) => {
|
||||||
res.roles = v;
|
res.roles = v;
|
||||||
|
|
|
@ -9,7 +9,7 @@ use nom::{
|
||||||
branch::alt,
|
branch::alt,
|
||||||
bytes::complete::tag_no_case,
|
bytes::complete::tag_no_case,
|
||||||
character::complete::char,
|
character::complete::char,
|
||||||
combinator::{cut, opt, value},
|
combinator::{opt, value},
|
||||||
sequence::tuple,
|
sequence::tuple,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -19,10 +19,10 @@ pub fn option(i: &str) -> IResult<&str, OptionStatement> {
|
||||||
let (i, n) = ident(i)?;
|
let (i, n) = ident(i)?;
|
||||||
let (i, v) = expected(
|
let (i, v) = expected(
|
||||||
"'=' followed by a value for the option",
|
"'=' followed by a value for the option",
|
||||||
cut(opt(alt((
|
opt(alt((
|
||||||
value(true, tuple((mightbespace, char('='), mightbespace, tag_no_case("TRUE")))),
|
value(true, tuple((mightbespace, char('='), mightbespace, tag_no_case("TRUE")))),
|
||||||
value(false, tuple((mightbespace, char('='), mightbespace, tag_no_case("FALSE")))),
|
value(false, tuple((mightbespace, char('='), mightbespace, tag_no_case("FALSE")))),
|
||||||
)))),
|
))),
|
||||||
)(i)?;
|
)(i)?;
|
||||||
Ok((
|
Ok((
|
||||||
i,
|
i,
|
||||||
|
|
|
@ -87,7 +87,6 @@ fn disallowed_subquery_statements(i: &str) -> IResult<&str, ()> {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
pub use super::{
|
use super::{
|
||||||
|
super::Parse,
|
||||||
expression::binary as expression,
|
expression::binary as expression,
|
||||||
function::script_body as script,
|
function::script_body as script,
|
||||||
idiom::plain as idiom,
|
idiom::plain as idiom,
|
||||||
|
@ -6,3 +7,48 @@ pub use super::{
|
||||||
thing::thing,
|
thing::thing,
|
||||||
value::{array, value},
|
value::{array, value},
|
||||||
};
|
};
|
||||||
|
use nom::Finish;
|
||||||
|
|
||||||
|
use crate::sql::{Array, Expression, Idiom, Param, Script, Thing, Value};
|
||||||
|
|
||||||
|
impl Parse<Self> for Value {
|
||||||
|
fn parse(val: &str) -> Self {
|
||||||
|
value(val).finish().unwrap().1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parse<Self> for Array {
|
||||||
|
fn parse(val: &str) -> Self {
|
||||||
|
array(val).finish().unwrap().1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parse<Self> for Param {
|
||||||
|
fn parse(val: &str) -> Self {
|
||||||
|
param(val).finish().unwrap().1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parse<Self> for Idiom {
|
||||||
|
fn parse(val: &str) -> Self {
|
||||||
|
idiom(val).finish().unwrap().1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parse<Self> for Script {
|
||||||
|
fn parse(val: &str) -> Self {
|
||||||
|
script(val).finish().unwrap().1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parse<Self> for Thing {
|
||||||
|
fn parse(val: &str) -> Self {
|
||||||
|
thing(val).finish().unwrap().1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parse<Self> for Expression {
|
||||||
|
fn parse(val: &str) -> Self {
|
||||||
|
expression(val).finish().unwrap().1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -81,7 +81,7 @@ mod tests {
|
||||||
use crate::sql::object::Object;
|
use crate::sql::object::Object;
|
||||||
use crate::sql::value::Value;
|
use crate::sql::value::Value;
|
||||||
use crate::sql::Strand;
|
use crate::sql::Strand;
|
||||||
use crate::syn::test::Parse;
|
use crate::syn::Parse;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn thing_normal() {
|
fn thing_normal() {
|
||||||
|
@ -249,7 +249,7 @@ mod tests {
|
||||||
let res = id(sql);
|
let res = id(sql);
|
||||||
let out = res.unwrap().1;
|
let out = res.unwrap().1;
|
||||||
assert_eq!(Id::from("100test"), out);
|
assert_eq!(Id::from("100test"), out);
|
||||||
assert_eq!("100test", format!("{}", out));
|
assert_eq!("⟨100test⟩", format!("{}", out));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
@ -9,7 +9,7 @@ use super::{
|
||||||
depth,
|
depth,
|
||||||
ending::keyword,
|
ending::keyword,
|
||||||
error::expected,
|
error::expected,
|
||||||
expression::{cast, future, unary},
|
expression::{augment, cast, future, unary},
|
||||||
function::{builtin_function, defined_function, model},
|
function::{builtin_function, defined_function, model},
|
||||||
idiom::{self, reparse_idiom_start},
|
idiom::{self, reparse_idiom_start},
|
||||||
literal::{
|
literal::{
|
||||||
|
@ -62,7 +62,7 @@ pub fn value(i: &str) -> IResult<&str, Value> {
|
||||||
let _diving = depth::dive(i)?;
|
let _diving = depth::dive(i)?;
|
||||||
let (i, r) = cut(value)(i)?;
|
let (i, r) = cut(value)(i)?;
|
||||||
let expr = match r {
|
let expr = match r {
|
||||||
Value::Expression(r) => r.augment(start, o),
|
Value::Expression(r) => augment(*r, start, o),
|
||||||
_ => Expression::new(start, o, r),
|
_ => Expression::new(start, o, r),
|
||||||
};
|
};
|
||||||
let v = Value::from(expr);
|
let v = Value::from(expr);
|
||||||
|
@ -179,7 +179,7 @@ pub fn select(i: &str) -> IResult<&str, Value> {
|
||||||
};
|
};
|
||||||
let (i, r) = cut(value)(i)?;
|
let (i, r) = cut(value)(i)?;
|
||||||
let expr = match r {
|
let expr = match r {
|
||||||
Value::Expression(r) => r.augment(start, op),
|
Value::Expression(r) => augment(*r, start, op),
|
||||||
_ => Expression::new(start, op, r),
|
_ => Expression::new(start, op, r),
|
||||||
};
|
};
|
||||||
let v = Value::from(expr);
|
let v = Value::from(expr);
|
||||||
|
|
387
lib/src/syn/v2/lexer/byte.rs
Normal file
387
lib/src/syn/v2/lexer/byte.rs
Normal file
|
@ -0,0 +1,387 @@
|
||||||
|
use crate::syn::v2::{
|
||||||
|
lexer::{
|
||||||
|
unicode::{byte, chars},
|
||||||
|
Error, Lexer,
|
||||||
|
},
|
||||||
|
token::{t, Token, TokenKind},
|
||||||
|
};
|
||||||
|
|
||||||
|
impl<'a> Lexer<'a> {
|
||||||
|
/// Eats a single line comment.
|
||||||
|
pub fn eat_single_line_comment(&mut self) {
|
||||||
|
loop {
|
||||||
|
let Some(byte) = self.reader.next() else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
match byte {
|
||||||
|
byte::CR => {
|
||||||
|
self.eat(byte::LF);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
byte::LF => {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
x if !x.is_ascii() => {
|
||||||
|
// -1 because we already ate the byte.
|
||||||
|
let backup = self.reader.offset() - 1;
|
||||||
|
let char = match self.reader.complete_char(x) {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(_) => {
|
||||||
|
// let the next token handle the error.
|
||||||
|
self.reader.backup(backup);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
match char {
|
||||||
|
chars::LS | chars::PS | chars::NEL => break,
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.set_whitespace_span(self.current_span());
|
||||||
|
self.skip_offset();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Eats a multi line comment and returns an error if `*/` would be missing.
|
||||||
|
pub fn eat_multi_line_comment(&mut self) -> Result<(), Error> {
|
||||||
|
loop {
|
||||||
|
let Some(byte) = self.reader.next() else {
|
||||||
|
return Err(Error::UnexpectedEof);
|
||||||
|
};
|
||||||
|
if let b'*' = byte {
|
||||||
|
let Some(byte) = self.reader.next() else {
|
||||||
|
return Err(Error::UnexpectedEof);
|
||||||
|
};
|
||||||
|
if b'/' == byte {
|
||||||
|
self.set_whitespace_span(self.current_span());
|
||||||
|
self.skip_offset();
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Eat whitespace like spaces tables and new-lines.
|
||||||
|
pub fn eat_whitespace(&mut self) {
|
||||||
|
loop {
|
||||||
|
let Some(byte) = self.reader.peek() else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
match byte {
|
||||||
|
byte::CR | byte::FF | byte::LF | byte::SP | byte::VT | byte::TAB => {
|
||||||
|
self.reader.next();
|
||||||
|
}
|
||||||
|
x if !x.is_ascii() => {
|
||||||
|
let backup = self.reader.offset();
|
||||||
|
self.reader.next();
|
||||||
|
let char = match self.reader.complete_char(x) {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(_) => {
|
||||||
|
self.reader.backup(backup);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
match char {
|
||||||
|
'\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}'
|
||||||
|
| '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}'
|
||||||
|
| '\u{2008}' | '\u{2009}' | '\u{200A}' | '\u{202F}' | '\u{205F}'
|
||||||
|
| '\u{3000}' => {}
|
||||||
|
_ => {
|
||||||
|
self.reader.backup(backup);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.set_whitespace_span(self.current_span());
|
||||||
|
self.skip_offset();
|
||||||
|
}
|
||||||
|
|
||||||
|
// re-lexes a `/` token to a regex token.
|
||||||
|
pub fn relex_regex(&mut self, token: Token) -> Token {
|
||||||
|
debug_assert_eq!(token.kind, t!("/"));
|
||||||
|
debug_assert_eq!(token.span.offset + 1, self.last_offset);
|
||||||
|
debug_assert_eq!(token.span.len, 1);
|
||||||
|
debug_assert_eq!(self.scratch, "");
|
||||||
|
|
||||||
|
self.last_offset = token.span.offset;
|
||||||
|
loop {
|
||||||
|
match self.reader.next() {
|
||||||
|
Some(b'\\') => {
|
||||||
|
if let Some(b'/') = self.reader.peek() {
|
||||||
|
self.reader.next();
|
||||||
|
self.scratch.push('/')
|
||||||
|
} else {
|
||||||
|
self.scratch.push('\\')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(b'/') => break,
|
||||||
|
Some(x) => {
|
||||||
|
if x.is_ascii() {
|
||||||
|
self.scratch.push(x as char);
|
||||||
|
} else {
|
||||||
|
match self.reader.complete_char(x) {
|
||||||
|
Ok(x) => {
|
||||||
|
self.scratch.push(x);
|
||||||
|
}
|
||||||
|
Err(e) => return self.invalid_token(e.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => return self.invalid_token(Error::UnexpectedEof),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match self.scratch.parse() {
|
||||||
|
Ok(x) => {
|
||||||
|
self.scratch.clear();
|
||||||
|
self.regex = Some(x);
|
||||||
|
self.finish_token(TokenKind::Regex)
|
||||||
|
}
|
||||||
|
Err(e) => self.invalid_token(Error::Regex(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex the next token, starting from the given byte.
|
||||||
|
pub fn lex_ascii(&mut self, byte: u8) -> Token {
|
||||||
|
let kind = match byte {
|
||||||
|
b'{' => t!("{"),
|
||||||
|
b'}' => t!("}"),
|
||||||
|
b'[' => t!("["),
|
||||||
|
b']' => t!("]"),
|
||||||
|
b')' => t!(")"),
|
||||||
|
b'(' => t!("("),
|
||||||
|
b';' => t!(";"),
|
||||||
|
b',' => t!(","),
|
||||||
|
b'@' => t!("@"),
|
||||||
|
byte::CR | byte::FF | byte::LF | byte::SP | byte::VT | byte::TAB => {
|
||||||
|
self.eat_whitespace();
|
||||||
|
return self.next_token_inner();
|
||||||
|
}
|
||||||
|
b'|' => match self.reader.peek() {
|
||||||
|
Some(b'|') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("||")
|
||||||
|
}
|
||||||
|
_ => t!("|"),
|
||||||
|
},
|
||||||
|
b'&' => match self.reader.peek() {
|
||||||
|
Some(b'&') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("&&")
|
||||||
|
}
|
||||||
|
_ => return self.invalid_token(Error::ExpectedEnd('&')),
|
||||||
|
},
|
||||||
|
b'.' => match self.reader.peek() {
|
||||||
|
Some(b'.') => {
|
||||||
|
self.reader.next();
|
||||||
|
match self.reader.peek() {
|
||||||
|
Some(b'.') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("...")
|
||||||
|
}
|
||||||
|
_ => t!(".."),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => t!("."),
|
||||||
|
},
|
||||||
|
b'!' => match self.reader.peek() {
|
||||||
|
Some(b'=') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("!=")
|
||||||
|
}
|
||||||
|
Some(b'~') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("!~")
|
||||||
|
}
|
||||||
|
_ => t!("!"),
|
||||||
|
},
|
||||||
|
b'?' => match self.reader.peek() {
|
||||||
|
Some(b'?') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("??")
|
||||||
|
}
|
||||||
|
Some(b':') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("?:")
|
||||||
|
}
|
||||||
|
Some(b'~') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("?~")
|
||||||
|
}
|
||||||
|
Some(b'=') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("?=")
|
||||||
|
}
|
||||||
|
_ => t!("?"),
|
||||||
|
},
|
||||||
|
b'<' => match self.reader.peek() {
|
||||||
|
Some(b'=') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("<=")
|
||||||
|
}
|
||||||
|
Some(b'-') => {
|
||||||
|
self.reader.next();
|
||||||
|
match self.reader.peek() {
|
||||||
|
Some(b'>') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("<->")
|
||||||
|
}
|
||||||
|
_ => t!("<-"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => t!("<"),
|
||||||
|
},
|
||||||
|
b'>' => match self.reader.peek() {
|
||||||
|
Some(b'=') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!(">=")
|
||||||
|
}
|
||||||
|
_ => t!(">"),
|
||||||
|
},
|
||||||
|
b'-' => match self.reader.peek() {
|
||||||
|
Some(b'>') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("->")
|
||||||
|
}
|
||||||
|
Some(b'-') => {
|
||||||
|
self.reader.next();
|
||||||
|
self.eat_single_line_comment();
|
||||||
|
return self.next_token_inner();
|
||||||
|
}
|
||||||
|
Some(b'=') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("-=")
|
||||||
|
}
|
||||||
|
_ => t!("-"),
|
||||||
|
},
|
||||||
|
b'+' => match self.reader.peek() {
|
||||||
|
Some(b'=') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("+=")
|
||||||
|
}
|
||||||
|
Some(b'?') => {
|
||||||
|
self.reader.next();
|
||||||
|
match self.reader.peek() {
|
||||||
|
Some(b'=') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("+?=")
|
||||||
|
}
|
||||||
|
_ => return self.invalid_token(Error::ExpectedEnd('=')),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => t!("+"),
|
||||||
|
},
|
||||||
|
b'/' => match self.reader.peek() {
|
||||||
|
Some(b'*') => {
|
||||||
|
self.reader.next();
|
||||||
|
// A `*/` could be missing which would be invalid.
|
||||||
|
if let Err(e) = self.eat_multi_line_comment() {
|
||||||
|
return self.invalid_token(e);
|
||||||
|
}
|
||||||
|
return self.next_token_inner();
|
||||||
|
}
|
||||||
|
Some(b'/') => {
|
||||||
|
self.reader.next();
|
||||||
|
self.eat_single_line_comment();
|
||||||
|
return self.next_token_inner();
|
||||||
|
}
|
||||||
|
_ => t!("/"),
|
||||||
|
},
|
||||||
|
b'*' => match self.reader.peek() {
|
||||||
|
Some(b'*') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("**")
|
||||||
|
}
|
||||||
|
Some(b'=') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("*=")
|
||||||
|
}
|
||||||
|
Some(b'~') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("*~")
|
||||||
|
}
|
||||||
|
_ => t!("*"),
|
||||||
|
},
|
||||||
|
b'=' => match self.reader.peek() {
|
||||||
|
Some(b'=') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("==")
|
||||||
|
}
|
||||||
|
_ => t!("="),
|
||||||
|
},
|
||||||
|
b':' => match self.reader.peek() {
|
||||||
|
Some(b':') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("::")
|
||||||
|
}
|
||||||
|
_ => t!(":"),
|
||||||
|
},
|
||||||
|
b'$' => {
|
||||||
|
if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) {
|
||||||
|
return self.lex_param();
|
||||||
|
}
|
||||||
|
t!("$")
|
||||||
|
}
|
||||||
|
b'#' => {
|
||||||
|
self.eat_single_line_comment();
|
||||||
|
return self.next_token_inner();
|
||||||
|
}
|
||||||
|
b'`' => return self.lex_surrounded_ident(true),
|
||||||
|
b'"' => return self.lex_strand(true),
|
||||||
|
b'\'' => return self.lex_strand(false),
|
||||||
|
b'd' => {
|
||||||
|
match self.reader.peek() {
|
||||||
|
Some(b'"') => {
|
||||||
|
self.reader.next();
|
||||||
|
return self.lex_datetime(true);
|
||||||
|
}
|
||||||
|
Some(b'\'') => {
|
||||||
|
self.reader.next();
|
||||||
|
return self.lex_datetime(false);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
return self.lex_ident_from_next_byte(b'd');
|
||||||
|
}
|
||||||
|
b'u' => {
|
||||||
|
match self.reader.peek() {
|
||||||
|
Some(b'"') => {
|
||||||
|
self.reader.next();
|
||||||
|
return self.lex_uuid(true);
|
||||||
|
}
|
||||||
|
Some(b'\'') => {
|
||||||
|
self.reader.next();
|
||||||
|
return self.lex_uuid(false);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
return self.lex_ident_from_next_byte(b'u');
|
||||||
|
}
|
||||||
|
b'r' => match self.reader.peek() {
|
||||||
|
Some(b'\"') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("r\"")
|
||||||
|
}
|
||||||
|
Some(b'\'') => {
|
||||||
|
self.reader.next();
|
||||||
|
t!("r'")
|
||||||
|
}
|
||||||
|
_ => return self.lex_ident_from_next_byte(byte),
|
||||||
|
},
|
||||||
|
b'a'..=b'z' | b'A'..=b'Z' | b'_' => {
|
||||||
|
return self.lex_ident_from_next_byte(byte);
|
||||||
|
}
|
||||||
|
b'0'..=b'9' => return self.lex_number(byte),
|
||||||
|
x => return self.invalid_token(Error::UnexpectedCharacter(x as char)),
|
||||||
|
};
|
||||||
|
|
||||||
|
self.finish_token(kind)
|
||||||
|
}
|
||||||
|
}
|
37
lib/src/syn/v2/lexer/char.rs
Normal file
37
lib/src/syn/v2/lexer/char.rs
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
use crate::syn::v2::{
|
||||||
|
lexer::{CharError, Lexer},
|
||||||
|
token::{t, Token},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::Error;
|
||||||
|
|
||||||
|
impl<'a> Lexer<'a> {
|
||||||
|
/// lex non-ascii characters.
|
||||||
|
///
|
||||||
|
/// Should only be called after determining that the byte is not a valid ascii character.
|
||||||
|
pub fn lex_char(&mut self, byte: u8) -> Token {
|
||||||
|
let c = match self.reader.complete_char(byte) {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(CharError::Eof) => return self.invalid_token(Error::InvalidUtf8),
|
||||||
|
Err(CharError::Unicode) => return self.invalid_token(Error::InvalidUtf8),
|
||||||
|
};
|
||||||
|
let kind = match c {
|
||||||
|
'⟨' => return self.lex_surrounded_ident(false),
|
||||||
|
'…' => t!("..."),
|
||||||
|
'∋' => t!("∋"),
|
||||||
|
'∌' => t!("∌"),
|
||||||
|
'∈' => t!("∈"),
|
||||||
|
'∉' => t!("∉"),
|
||||||
|
'⊇' => t!("⊇"),
|
||||||
|
'⊃' => t!("⊃"),
|
||||||
|
'⊅' => t!("⊅"),
|
||||||
|
'⊆' => t!("⊆"),
|
||||||
|
'⊂' => t!("⊂"),
|
||||||
|
'⊄' => t!("⊄"),
|
||||||
|
'×' => t!("×"),
|
||||||
|
'÷' => t!("÷"),
|
||||||
|
x => return self.invalid_token(Error::UnexpectedCharacter(x)),
|
||||||
|
};
|
||||||
|
self.finish_token(kind)
|
||||||
|
}
|
||||||
|
}
|
267
lib/src/syn/v2/lexer/datetime.rs
Normal file
267
lib/src/syn/v2/lexer/datetime.rs
Normal file
|
@ -0,0 +1,267 @@
|
||||||
|
use std::ops::RangeInclusive;
|
||||||
|
|
||||||
|
use chrono::{FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone, Utc};
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
sql::Datetime,
|
||||||
|
syn::v2::token::{Token, TokenKind},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{Error as LexError, Lexer};
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum PartError {
|
||||||
|
#[error("value outside of allowed range")]
|
||||||
|
OutsideRange,
|
||||||
|
#[error("missing digit(s)")]
|
||||||
|
MissingDigits,
|
||||||
|
#[error("too many digits")]
|
||||||
|
TooManyDigits,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
#[error("invalid year, {0}")]
|
||||||
|
Year(PartError),
|
||||||
|
#[error("invalid month, {0}")]
|
||||||
|
Month(PartError),
|
||||||
|
#[error("invalid day, {0}")]
|
||||||
|
Day(PartError),
|
||||||
|
#[error("invalid hour, {0}")]
|
||||||
|
Hour(PartError),
|
||||||
|
#[error("invalid time minute, {0}")]
|
||||||
|
Minute(PartError),
|
||||||
|
#[error("invalid second, {0}")]
|
||||||
|
Second(PartError),
|
||||||
|
#[error("invalid nano_seconds, {0}")]
|
||||||
|
NanoSeconds(PartError),
|
||||||
|
#[error("invalid time-zone hour, {0}")]
|
||||||
|
TimeZoneHour(PartError),
|
||||||
|
#[error("invalid time-zone minute, {0}")]
|
||||||
|
TimeZoneMinute(PartError),
|
||||||
|
#[error("missing seperator `{}`",*(.0) as char)]
|
||||||
|
MissingSeparator(u8),
|
||||||
|
#[error("expected date-time strand to end")]
|
||||||
|
ExpectedEnd,
|
||||||
|
#[error("missing time-zone")]
|
||||||
|
MissingTimeZone,
|
||||||
|
#[error("date does not exist")]
|
||||||
|
NonExistantDate,
|
||||||
|
#[error("time does not exist")]
|
||||||
|
NonExistantTime,
|
||||||
|
#[error("time-zone offset too big")]
|
||||||
|
TimeZoneOutOfRange,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Lexer<'a> {
|
||||||
|
/// Lex a date-time strand.
|
||||||
|
pub fn lex_datetime(&mut self, double: bool) -> Token {
|
||||||
|
match self.lex_datetime_err(double) {
|
||||||
|
Ok(x) => {
|
||||||
|
self.datetime = Some(x);
|
||||||
|
self.finish_token(TokenKind::DateTime)
|
||||||
|
}
|
||||||
|
Err(e) => self.invalid_token(LexError::DateTime(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex datetime without enclosing `"` or `'` but return a result or parser error.
|
||||||
|
pub fn lex_datetime_raw_err(&mut self) -> Result<Datetime, Error> {
|
||||||
|
let negative = match self.reader.peek() {
|
||||||
|
Some(b'+') => {
|
||||||
|
self.reader.next();
|
||||||
|
false
|
||||||
|
}
|
||||||
|
Some(b'-') => {
|
||||||
|
self.reader.next();
|
||||||
|
true
|
||||||
|
}
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut year = self.lex_datetime_part(4, 0..=9999).map_err(Error::Year)? as i16;
|
||||||
|
if negative {
|
||||||
|
year = -year;
|
||||||
|
}
|
||||||
|
if !self.eat(b'-') {
|
||||||
|
return Err(Error::MissingSeparator(b'-'));
|
||||||
|
}
|
||||||
|
let month = self.lex_datetime_part(2, 1..=12).map_err(Error::Month)?;
|
||||||
|
if !self.eat(b'-') {
|
||||||
|
return Err(Error::MissingSeparator(b'-'));
|
||||||
|
}
|
||||||
|
let day = self.lex_datetime_part(2, 1..=31).map_err(Error::Day)?;
|
||||||
|
|
||||||
|
if !self.eat(b'T') {
|
||||||
|
let Some(date) = NaiveDate::from_ymd_opt(year as i32, month as u32, day as u32) else {
|
||||||
|
return Err(Error::NonExistantDate);
|
||||||
|
};
|
||||||
|
let time = NaiveTime::default();
|
||||||
|
let date_time = NaiveDateTime::new(date, time);
|
||||||
|
|
||||||
|
let datetime = Utc
|
||||||
|
.fix()
|
||||||
|
.from_local_datetime(&date_time)
|
||||||
|
.earliest()
|
||||||
|
// this should never panic with a fixed offset.
|
||||||
|
.unwrap()
|
||||||
|
.with_timezone(&Utc);
|
||||||
|
|
||||||
|
return Ok(Datetime(datetime));
|
||||||
|
}
|
||||||
|
|
||||||
|
let hour = self.lex_datetime_part(2, 0..=24).map_err(Error::Hour)?;
|
||||||
|
if !self.eat(b':') {
|
||||||
|
return Err(Error::MissingSeparator(b':'));
|
||||||
|
}
|
||||||
|
|
||||||
|
let minutes = self.lex_datetime_part(2, 0..=59).map_err(Error::Minute)?;
|
||||||
|
|
||||||
|
if !self.eat(b':') {
|
||||||
|
return Err(Error::MissingSeparator(b':'));
|
||||||
|
}
|
||||||
|
|
||||||
|
let seconds = self.lex_datetime_part(2, 0..=59).map_err(Error::Second)?;
|
||||||
|
|
||||||
|
// nano seconds
|
||||||
|
let nano = if let Some(b'.') = self.reader.peek() {
|
||||||
|
self.reader.next();
|
||||||
|
// check if there is atleast one digit.
|
||||||
|
if !matches!(self.reader.peek(), Some(b'0'..=b'9')) {
|
||||||
|
return Err(Error::NanoSeconds(PartError::MissingDigits));
|
||||||
|
}
|
||||||
|
let mut number = 0u32;
|
||||||
|
for i in 0..9 {
|
||||||
|
let Some(c) = self.reader.peek() else {
|
||||||
|
// always invalid token, just let the next section handle the error.
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
if !c.is_ascii_digit() {
|
||||||
|
// If digits are missing they are counted as 0's
|
||||||
|
for _ in i..9 {
|
||||||
|
number *= 10;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
self.reader.next();
|
||||||
|
number *= 10;
|
||||||
|
number += (c - b'0') as u32;
|
||||||
|
}
|
||||||
|
// ensure nano_seconds are at most 9 digits.
|
||||||
|
if matches!(self.reader.peek(), Some(b'0'..=b'9')) {
|
||||||
|
return Err(Error::NanoSeconds(PartError::TooManyDigits));
|
||||||
|
}
|
||||||
|
number
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
|
// time zone
|
||||||
|
let time_zone = match self.reader.peek() {
|
||||||
|
Some(b'Z') => {
|
||||||
|
self.reader.next();
|
||||||
|
None
|
||||||
|
}
|
||||||
|
Some(x @ (b'-' | b'+')) => {
|
||||||
|
self.reader.next();
|
||||||
|
let negative = x == b'-';
|
||||||
|
let hour = self.lex_datetime_part(2, 0..=24).map_err(Error::TimeZoneHour)? as i32;
|
||||||
|
let Some(b':') = self.reader.next() else {
|
||||||
|
return Err(Error::MissingSeparator(b':'));
|
||||||
|
};
|
||||||
|
let minute =
|
||||||
|
self.lex_datetime_part(2, 0..=59).map_err(Error::TimeZoneMinute)? as i32;
|
||||||
|
let time = hour * 3600 + minute * 60;
|
||||||
|
if negative {
|
||||||
|
Some(-time)
|
||||||
|
} else {
|
||||||
|
Some(time)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => return Err(Error::MissingTimeZone),
|
||||||
|
};
|
||||||
|
|
||||||
|
// calculate the given datetime from individual parts.
|
||||||
|
let Some(date) = NaiveDate::from_ymd_opt(year as i32, month as u32, day as u32) else {
|
||||||
|
return Err(Error::NonExistantDate);
|
||||||
|
};
|
||||||
|
let Some(time) =
|
||||||
|
NaiveTime::from_hms_nano_opt(hour as u32, minutes as u32, seconds as u32, nano)
|
||||||
|
else {
|
||||||
|
return Err(Error::NonExistantTime);
|
||||||
|
};
|
||||||
|
|
||||||
|
let date_time = NaiveDateTime::new(date, time);
|
||||||
|
|
||||||
|
let zone = match time_zone {
|
||||||
|
None => Utc.fix(),
|
||||||
|
Some(offset) => if offset < 0 {
|
||||||
|
FixedOffset::west_opt(-offset)
|
||||||
|
} else {
|
||||||
|
FixedOffset::east_opt(offset)
|
||||||
|
}
|
||||||
|
.ok_or(Error::TimeZoneOutOfRange)?,
|
||||||
|
};
|
||||||
|
|
||||||
|
let datetime = zone
|
||||||
|
.from_local_datetime(&date_time)
|
||||||
|
.earliest()
|
||||||
|
// this should never panic with a fixed offset.
|
||||||
|
.unwrap()
|
||||||
|
.with_timezone(&Utc);
|
||||||
|
|
||||||
|
Ok(Datetime(datetime))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex full datetime but return an result instead of a token.
|
||||||
|
pub fn lex_datetime_err(&mut self, double: bool) -> Result<Datetime, Error> {
|
||||||
|
let datetime = self.lex_datetime_raw_err()?;
|
||||||
|
|
||||||
|
let end_char = if double {
|
||||||
|
b'"'
|
||||||
|
} else {
|
||||||
|
b'\''
|
||||||
|
};
|
||||||
|
|
||||||
|
if !self.eat(end_char) {
|
||||||
|
return Err(Error::ExpectedEnd);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(datetime)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lexes a digit part of date time.
|
||||||
|
///
|
||||||
|
/// This function eats an amount of digits and then checks if the value the digits represent
|
||||||
|
/// is within the given range.
|
||||||
|
pub fn lex_datetime_part(
|
||||||
|
&mut self,
|
||||||
|
mut amount: u8,
|
||||||
|
range: RangeInclusive<u16>,
|
||||||
|
) -> Result<u16, PartError> {
|
||||||
|
let mut value = 0u16;
|
||||||
|
|
||||||
|
while amount != 0 {
|
||||||
|
value *= 10;
|
||||||
|
let Some(char) = self.reader.peek() else {
|
||||||
|
return Err(PartError::MissingDigits);
|
||||||
|
};
|
||||||
|
if !char.is_ascii_digit() {
|
||||||
|
return Err(PartError::MissingDigits);
|
||||||
|
}
|
||||||
|
self.reader.next();
|
||||||
|
value += (char - b'0') as u16;
|
||||||
|
amount -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if matches!(self.reader.peek(), Some(b'0'..=b'8')) {
|
||||||
|
return Err(PartError::TooManyDigits);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !range.contains(&value) {
|
||||||
|
return Err(PartError::OutsideRange);
|
||||||
|
}
|
||||||
|
Ok(value)
|
||||||
|
}
|
||||||
|
}
|
170
lib/src/syn/v2/lexer/duration.rs
Normal file
170
lib/src/syn/v2/lexer/duration.rs
Normal file
|
@ -0,0 +1,170 @@
|
||||||
|
use std::time::Duration as StdDuration;
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
sql::duration::{
|
||||||
|
Duration, SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE, SECONDS_PER_WEEK,
|
||||||
|
SECONDS_PER_YEAR,
|
||||||
|
},
|
||||||
|
syn::v2::token::{Token, TokenKind},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{Error as LexError, Lexer};
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
#[error("invalid duration suffix")]
|
||||||
|
InvalidSuffix,
|
||||||
|
#[error("duration value overflowed")]
|
||||||
|
Overflow,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Lexer<'a> {
|
||||||
|
/// Lex a duration.
|
||||||
|
///
|
||||||
|
/// Expect the lexer to have already eaten the digits starting the duration.
|
||||||
|
pub fn lex_duration(&mut self) -> Token {
|
||||||
|
match self.lex_duration_err() {
|
||||||
|
Ok(x) => {
|
||||||
|
self.duration = Some(x);
|
||||||
|
self.finish_token(TokenKind::Duration)
|
||||||
|
}
|
||||||
|
Err(e) => self.invalid_token(LexError::Duration(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn invalid_suffix_duration(&mut self) -> Error {
|
||||||
|
// eat the whole suffix.
|
||||||
|
while let Some(x) = self.reader.peek() {
|
||||||
|
if !x.is_ascii_alphanumeric() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
self.reader.next();
|
||||||
|
}
|
||||||
|
Error::InvalidSuffix
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex a duration,
|
||||||
|
///
|
||||||
|
/// Should only be called from lexing a number.
|
||||||
|
///
|
||||||
|
/// Expects any number but at least one numeric characters be pushed into scratch.
|
||||||
|
pub fn lex_duration_err(&mut self) -> Result<Duration, Error> {
|
||||||
|
let mut duration = StdDuration::ZERO;
|
||||||
|
|
||||||
|
let mut current_value = 0u64;
|
||||||
|
// use the existing eat span to generate the current value.
|
||||||
|
// span already contains
|
||||||
|
let mut span = self.current_span();
|
||||||
|
span.len -= 1;
|
||||||
|
for b in self.scratch.as_bytes() {
|
||||||
|
debug_assert!(b.is_ascii_digit(), "`{}` is not a digit", b);
|
||||||
|
current_value = current_value.checked_mul(10).ok_or(Error::Overflow)?;
|
||||||
|
current_value = current_value.checked_add((b - b'0') as u64).ok_or(Error::Overflow)?;
|
||||||
|
}
|
||||||
|
self.scratch.clear();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let Some(next) = self.reader.peek() else {
|
||||||
|
return Err(Error::InvalidSuffix);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Match the suffix.
|
||||||
|
let new_duration = match next {
|
||||||
|
x @ (b'n' | b'u') => {
|
||||||
|
// Nano or micro suffix
|
||||||
|
self.reader.next();
|
||||||
|
if !self.eat(b's') {
|
||||||
|
return Err(Error::InvalidSuffix);
|
||||||
|
};
|
||||||
|
|
||||||
|
if x == b'n' {
|
||||||
|
StdDuration::from_nanos(current_value)
|
||||||
|
} else {
|
||||||
|
StdDuration::from_micros(current_value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Starting byte of 'µ'
|
||||||
|
0xc2 => {
|
||||||
|
self.reader.next();
|
||||||
|
// Second byte of 'µ'.
|
||||||
|
// Always consume as the next byte will always be part of a two byte character.
|
||||||
|
if !self.eat(0xb5) {
|
||||||
|
return Err(self.invalid_suffix_duration());
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.eat(b's') {
|
||||||
|
return Err(self.invalid_suffix_duration());
|
||||||
|
}
|
||||||
|
|
||||||
|
StdDuration::from_micros(current_value)
|
||||||
|
}
|
||||||
|
b'm' => {
|
||||||
|
self.reader.next();
|
||||||
|
// Either milli or minute
|
||||||
|
let is_milli = self.eat(b's');
|
||||||
|
|
||||||
|
if is_milli {
|
||||||
|
StdDuration::from_millis(current_value)
|
||||||
|
} else {
|
||||||
|
let Some(number) = current_value.checked_mul(SECONDS_PER_MINUTE) else {
|
||||||
|
return Err(Error::Overflow);
|
||||||
|
};
|
||||||
|
StdDuration::from_secs(number)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x @ (b's' | b'h' | b'd' | b'w' | b'y') => {
|
||||||
|
self.reader.next();
|
||||||
|
// second, hour, day, week or year.
|
||||||
|
|
||||||
|
let new_duration = match x {
|
||||||
|
b's' => Some(StdDuration::from_secs(current_value)),
|
||||||
|
b'h' => {
|
||||||
|
current_value.checked_mul(SECONDS_PER_HOUR).map(StdDuration::from_secs)
|
||||||
|
}
|
||||||
|
b'd' => {
|
||||||
|
current_value.checked_mul(SECONDS_PER_DAY).map(StdDuration::from_secs)
|
||||||
|
}
|
||||||
|
b'w' => {
|
||||||
|
current_value.checked_mul(SECONDS_PER_WEEK).map(StdDuration::from_secs)
|
||||||
|
}
|
||||||
|
b'y' => {
|
||||||
|
current_value.checked_mul(SECONDS_PER_YEAR).map(StdDuration::from_secs)
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let Some(new_duration) = new_duration else {
|
||||||
|
return Err(Error::Overflow);
|
||||||
|
};
|
||||||
|
new_duration
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return Err(self.invalid_suffix_duration());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
duration = duration.checked_add(new_duration).ok_or(Error::Overflow)?;
|
||||||
|
|
||||||
|
let next = self.reader.peek();
|
||||||
|
match next {
|
||||||
|
// there was some remaining alphabetic characters after the valid suffix, so the
|
||||||
|
// suffix is invalid.
|
||||||
|
Some(b'a'..=b'z' | b'A'..=b'Z' | b'_') => {
|
||||||
|
return Err(self.invalid_suffix_duration())
|
||||||
|
}
|
||||||
|
Some(b'0'..=b'9') => {} // Duration continues.
|
||||||
|
_ => return Ok(Duration(duration)),
|
||||||
|
}
|
||||||
|
|
||||||
|
current_value = 0;
|
||||||
|
// Eat all the next numbers
|
||||||
|
while let Some(b @ b'0'..=b'9') = self.reader.peek() {
|
||||||
|
self.reader.next();
|
||||||
|
current_value = current_value.checked_mul(10).ok_or(Error::Overflow)?;
|
||||||
|
current_value =
|
||||||
|
current_value.checked_add((b - b'0') as u64).ok_or(Error::Overflow)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
164
lib/src/syn/v2/lexer/ident.rs
Normal file
164
lib/src/syn/v2/lexer/ident.rs
Normal file
|
@ -0,0 +1,164 @@
|
||||||
|
use std::mem;
|
||||||
|
|
||||||
|
use unicase::UniCase;
|
||||||
|
|
||||||
|
use crate::syn::v2::lexer::{keywords::KEYWORDS, Error, Lexer};
|
||||||
|
use crate::syn::v2::token::{NumberKind, Token, TokenKind};
|
||||||
|
|
||||||
|
use super::unicode::{chars, U8Ext};
|
||||||
|
|
||||||
|
impl<'a> Lexer<'a> {
|
||||||
|
/// Lex a parameter in the form of `$[a-zA-Z0-9_]*`
|
||||||
|
///
|
||||||
|
/// # Lexer State
|
||||||
|
/// Expected the lexer to have already eaten the param starting `$`
|
||||||
|
pub fn lex_param(&mut self) -> Token {
|
||||||
|
debug_assert_eq!(self.scratch, "");
|
||||||
|
loop {
|
||||||
|
if let Some(x) = self.reader.peek() {
|
||||||
|
if x.is_ascii_alphanumeric() || x == b'_' {
|
||||||
|
self.scratch.push(x as char);
|
||||||
|
self.reader.next();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
return self.finish_token(TokenKind::Parameter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex an not surrounded identifier in the form of `[a-zA-Z0-9_]*`
|
||||||
|
///
|
||||||
|
/// The start byte should already a valid byte of the identifier.
|
||||||
|
///
|
||||||
|
/// When calling the caller should already know that the token can't be any other token covered
|
||||||
|
/// by `[a-zA-Z0-9_]*`.
|
||||||
|
pub fn lex_ident_from_next_byte(&mut self, start: u8) -> Token {
|
||||||
|
debug_assert!(matches!(start, b'a'..=b'z' | b'A'..=b'Z' | b'_'));
|
||||||
|
debug_assert_eq!(self.scratch, "");
|
||||||
|
self.scratch.push(start as char);
|
||||||
|
self.lex_ident()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex a not surrounded identfier.
|
||||||
|
///
|
||||||
|
/// The scratch should contain only identifier valid chars.
|
||||||
|
pub fn lex_ident(&mut self) -> Token {
|
||||||
|
loop {
|
||||||
|
if let Some(x) = self.reader.peek() {
|
||||||
|
if x.is_identifier_continue() {
|
||||||
|
self.scratch.push(x as char);
|
||||||
|
self.reader.next();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// When finished parsing the identifier, try to match it to an keyword.
|
||||||
|
// If there is one, return it as the keyword. Original identifier can be reconstructed
|
||||||
|
// from the token.
|
||||||
|
if let Some(x) = KEYWORDS.get(&UniCase::ascii(&self.scratch)).copied() {
|
||||||
|
self.scratch.clear();
|
||||||
|
return self.finish_token(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.scratch == "NaN" {
|
||||||
|
self.scratch.clear();
|
||||||
|
return self.finish_token(TokenKind::Number(NumberKind::NaN));
|
||||||
|
} else {
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
return self.finish_token(TokenKind::Identifier);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex an ident which is surround by delimiters.
|
||||||
|
pub fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token {
|
||||||
|
match self.lex_surrounded_ident_err(is_backtick) {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(e) => {
|
||||||
|
self.scratch.clear();
|
||||||
|
self.invalid_token(e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex an ident surrounded either by `⟨⟩` or `\`\``
|
||||||
|
pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result<Token, Error> {
|
||||||
|
loop {
|
||||||
|
let Some(x) = self.reader.next() else {
|
||||||
|
let end_char = if is_backtick {
|
||||||
|
'`'
|
||||||
|
} else {
|
||||||
|
'⟩'
|
||||||
|
};
|
||||||
|
return Err(Error::ExpectedEnd(end_char));
|
||||||
|
};
|
||||||
|
if x.is_ascii() {
|
||||||
|
match x {
|
||||||
|
b'`' if is_backtick => {
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
return Ok(self.finish_token(TokenKind::Identifier));
|
||||||
|
}
|
||||||
|
b'\0' => {
|
||||||
|
// null bytes not allowed
|
||||||
|
return Err(Error::UnexpectedCharacter('\0'));
|
||||||
|
}
|
||||||
|
b'\\' if is_backtick => {
|
||||||
|
// handle escape sequences.
|
||||||
|
// This is compliant with the orignal parser which didn't permit
|
||||||
|
// escape sequences in `⟨⟩` surrounded idents.
|
||||||
|
let Some(next) = self.reader.next() else {
|
||||||
|
let end_char = if is_backtick {
|
||||||
|
'`'
|
||||||
|
} else {
|
||||||
|
'⟩'
|
||||||
|
};
|
||||||
|
return Err(Error::ExpectedEnd(end_char));
|
||||||
|
};
|
||||||
|
match next {
|
||||||
|
b'\\' => {
|
||||||
|
self.scratch.push('\\');
|
||||||
|
}
|
||||||
|
b'`' => {
|
||||||
|
self.scratch.push('`');
|
||||||
|
}
|
||||||
|
b'/' => {
|
||||||
|
self.scratch.push('/');
|
||||||
|
}
|
||||||
|
b'b' => {
|
||||||
|
self.scratch.push(chars::BS);
|
||||||
|
}
|
||||||
|
b'f' => {
|
||||||
|
self.scratch.push(chars::FF);
|
||||||
|
}
|
||||||
|
b'n' => {
|
||||||
|
self.scratch.push(chars::LF);
|
||||||
|
}
|
||||||
|
b'r' => {
|
||||||
|
self.scratch.push(chars::CR);
|
||||||
|
}
|
||||||
|
b't' => {
|
||||||
|
self.scratch.push(chars::TAB);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let char = if x.is_ascii() {
|
||||||
|
x as char
|
||||||
|
} else {
|
||||||
|
self.reader.complete_char(x)?
|
||||||
|
};
|
||||||
|
return Err(Error::InvalidEscapeCharacter(char));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x => self.scratch.push(x as char),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let c = self.reader.complete_char(x)?;
|
||||||
|
if !is_backtick && c == '⟩' {
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
return Ok(self.finish_token(TokenKind::Identifier));
|
||||||
|
}
|
||||||
|
self.scratch.push(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
97
lib/src/syn/v2/lexer/js.rs
Normal file
97
lib/src/syn/v2/lexer/js.rs
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
use crate::syn::v2::token::Span;
|
||||||
|
|
||||||
|
use super::{unicode::chars::JS_LINE_TERIMATORS, Error, Lexer};
|
||||||
|
|
||||||
|
impl Lexer<'_> {
|
||||||
|
/// Lex the body of a js functions.
|
||||||
|
///
|
||||||
|
/// This function will never be called while lexing normally.
|
||||||
|
pub fn lex_js_function_body(&mut self) -> Result<String, (Error, Span)> {
|
||||||
|
self.lex_js_function_body_inner().map_err(|e| (e, self.current_span()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex the body of a js function.
|
||||||
|
fn lex_js_function_body_inner(&mut self) -> Result<String, Error> {
|
||||||
|
let mut block_depth = 1;
|
||||||
|
loop {
|
||||||
|
let byte = self.reader.next().ok_or(Error::UnexpectedEof)?;
|
||||||
|
match byte {
|
||||||
|
b'`' => self.lex_js_string(b'`')?,
|
||||||
|
b'\'' => self.lex_js_string(b'\'')?,
|
||||||
|
b'\"' => self.lex_js_string(b'\"')?,
|
||||||
|
b'/' => match self.reader.peek() {
|
||||||
|
Some(b'/') => {
|
||||||
|
self.reader.next();
|
||||||
|
self.lex_js_single_comment()?;
|
||||||
|
}
|
||||||
|
Some(b'*') => {
|
||||||
|
self.reader.next();
|
||||||
|
self.lex_js_multi_comment()?
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
},
|
||||||
|
b'{' => {
|
||||||
|
block_depth += 1;
|
||||||
|
}
|
||||||
|
b'}' => {
|
||||||
|
block_depth -= 1;
|
||||||
|
if block_depth == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x if !x.is_ascii() => {
|
||||||
|
// check for invalid characters.
|
||||||
|
self.reader.complete_char(x)?;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let mut span = self.current_span();
|
||||||
|
// remove the `}` from the source text;
|
||||||
|
span.len -= 1;
|
||||||
|
// lexer ensures that it is valid utf8
|
||||||
|
let source = String::from_utf8(self.reader.span(span).to_vec()).unwrap();
|
||||||
|
Ok(source)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// lex a js string with the given delimiter.
|
||||||
|
fn lex_js_string(&mut self, enclosing_byte: u8) -> Result<(), Error> {
|
||||||
|
loop {
|
||||||
|
let byte = self.reader.next().ok_or(Error::UnexpectedEof)?;
|
||||||
|
if byte == enclosing_byte {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
if byte == b'\\' {
|
||||||
|
self.reader.next();
|
||||||
|
}
|
||||||
|
// check for invalid characters.
|
||||||
|
self.reader.convert_to_char(byte)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// lex a single line js comment.
|
||||||
|
fn lex_js_single_comment(&mut self) -> Result<(), Error> {
|
||||||
|
loop {
|
||||||
|
let Some(byte) = self.reader.next() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
let char = self.reader.convert_to_char(byte)?;
|
||||||
|
if JS_LINE_TERIMATORS.contains(&char) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// lex a multi line js comment.
|
||||||
|
fn lex_js_multi_comment(&mut self) -> Result<(), Error> {
|
||||||
|
loop {
|
||||||
|
let byte = self.reader.next().ok_or(Error::UnexpectedEof)?;
|
||||||
|
if byte == b'*' && self.reader.peek() == Some(b'/') {
|
||||||
|
self.reader.next();
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
// check for invalid characters.
|
||||||
|
self.reader.convert_to_char(byte)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
285
lib/src/syn/v2/lexer/keywords.rs
Normal file
285
lib/src/syn/v2/lexer/keywords.rs
Normal file
|
@ -0,0 +1,285 @@
|
||||||
|
use crate::{
|
||||||
|
sql::{language::Language, Algorithm},
|
||||||
|
syn::v2::token::{DistanceKind, Keyword, TokenKind},
|
||||||
|
};
|
||||||
|
use phf::phf_map;
|
||||||
|
use unicase::UniCase;
|
||||||
|
|
||||||
|
/// A map for mapping keyword strings to a tokenkind,
|
||||||
|
pub(crate) static KEYWORDS: phf::Map<UniCase<&'static str>, TokenKind> = phf_map! {
|
||||||
|
// Keywords
|
||||||
|
UniCase::ascii("AFTER") => TokenKind::Keyword(Keyword::After),
|
||||||
|
UniCase::ascii("ALL") => TokenKind::Keyword(Keyword::All),
|
||||||
|
UniCase::ascii("ANALYZE") => TokenKind::Keyword(Keyword::Analyze),
|
||||||
|
UniCase::ascii("ANALYZER") => TokenKind::Keyword(Keyword::Analyzer),
|
||||||
|
UniCase::ascii("AS") => TokenKind::Keyword(Keyword::As),
|
||||||
|
UniCase::ascii("ASCENDING") => TokenKind::Keyword(Keyword::Ascending),
|
||||||
|
UniCase::ascii("ASC") => TokenKind::Keyword(Keyword::Ascending),
|
||||||
|
UniCase::ascii("ASCII") => TokenKind::Keyword(Keyword::Ascii),
|
||||||
|
UniCase::ascii("ASSERT") => TokenKind::Keyword(Keyword::Assert),
|
||||||
|
UniCase::ascii("AT") => TokenKind::Keyword(Keyword::At),
|
||||||
|
UniCase::ascii("BEFORE") => TokenKind::Keyword(Keyword::Before),
|
||||||
|
UniCase::ascii("BEGIN") => TokenKind::Keyword(Keyword::Begin),
|
||||||
|
UniCase::ascii("BLANK") => TokenKind::Keyword(Keyword::Blank),
|
||||||
|
UniCase::ascii("BM25") => TokenKind::Keyword(Keyword::Bm25),
|
||||||
|
UniCase::ascii("BREAK") => TokenKind::Keyword(Keyword::Break),
|
||||||
|
UniCase::ascii("BY") => TokenKind::Keyword(Keyword::By),
|
||||||
|
UniCase::ascii("CAMEL") => TokenKind::Keyword(Keyword::Camel),
|
||||||
|
UniCase::ascii("CANCEL") => TokenKind::Keyword(Keyword::Cancel),
|
||||||
|
UniCase::ascii("CHANGEFEED") => TokenKind::Keyword(Keyword::ChangeFeed),
|
||||||
|
UniCase::ascii("CHANGES") => TokenKind::Keyword(Keyword::Changes),
|
||||||
|
UniCase::ascii("CAPACITY") => TokenKind::Keyword(Keyword::Capacity),
|
||||||
|
UniCase::ascii("CLASS") => TokenKind::Keyword(Keyword::Class),
|
||||||
|
UniCase::ascii("COMMENT") => TokenKind::Keyword(Keyword::Comment),
|
||||||
|
UniCase::ascii("COMMIT") => TokenKind::Keyword(Keyword::Commit),
|
||||||
|
UniCase::ascii("CONTENT") => TokenKind::Keyword(Keyword::Content),
|
||||||
|
UniCase::ascii("CONTINUE") => TokenKind::Keyword(Keyword::Continue),
|
||||||
|
UniCase::ascii("CREATE") => TokenKind::Keyword(Keyword::Create),
|
||||||
|
UniCase::ascii("DATABASE") => TokenKind::Keyword(Keyword::Database),
|
||||||
|
UniCase::ascii("DB") => TokenKind::Keyword(Keyword::Database),
|
||||||
|
UniCase::ascii("DEFAULT") => TokenKind::Keyword(Keyword::Default),
|
||||||
|
UniCase::ascii("DEFINE") => TokenKind::Keyword(Keyword::Define),
|
||||||
|
UniCase::ascii("DELETE") => TokenKind::Keyword(Keyword::Delete),
|
||||||
|
UniCase::ascii("DESCENDING") => TokenKind::Keyword(Keyword::Descending),
|
||||||
|
UniCase::ascii("DESC") => TokenKind::Keyword(Keyword::Descending),
|
||||||
|
UniCase::ascii("DIFF") => TokenKind::Keyword(Keyword::Diff),
|
||||||
|
UniCase::ascii("DIMENSION") => TokenKind::Keyword(Keyword::Dimension),
|
||||||
|
UniCase::ascii("DISTANCE") => TokenKind::Keyword(Keyword::Distance),
|
||||||
|
UniCase::ascii("DIST") => TokenKind::Keyword(Keyword::Distance),
|
||||||
|
UniCase::ascii("DOC_IDS_CACHE") => TokenKind::Keyword(Keyword::DocIdsCache),
|
||||||
|
UniCase::ascii("DOC_IDS_ORDER") => TokenKind::Keyword(Keyword::DocIdsOrder),
|
||||||
|
UniCase::ascii("DOC_LENGTHS_CACHE") => TokenKind::Keyword(Keyword::DocLengthsCache),
|
||||||
|
UniCase::ascii("DOC_LENGTHS_ORDER") => TokenKind::Keyword(Keyword::DocLengthsOrder),
|
||||||
|
UniCase::ascii("DROP") => TokenKind::Keyword(Keyword::Drop),
|
||||||
|
UniCase::ascii("DUPLICATE") => TokenKind::Keyword(Keyword::Duplicate),
|
||||||
|
UniCase::ascii("EDGENGRAM") => TokenKind::Keyword(Keyword::Edgengram),
|
||||||
|
UniCase::ascii("EVENT") => TokenKind::Keyword(Keyword::Event),
|
||||||
|
UniCase::ascii("ELSE") => TokenKind::Keyword(Keyword::Else),
|
||||||
|
UniCase::ascii("END") => TokenKind::Keyword(Keyword::End),
|
||||||
|
UniCase::ascii("EXPLAIN") => TokenKind::Keyword(Keyword::Explain),
|
||||||
|
UniCase::ascii("false") => TokenKind::Keyword(Keyword::False),
|
||||||
|
UniCase::ascii("FETCH") => TokenKind::Keyword(Keyword::Fetch),
|
||||||
|
UniCase::ascii("FIELD") => TokenKind::Keyword(Keyword::Field),
|
||||||
|
UniCase::ascii("FIELDS") => TokenKind::Keyword(Keyword::Fields),
|
||||||
|
UniCase::ascii("COLUMNS") => TokenKind::Keyword(Keyword::Fields),
|
||||||
|
UniCase::ascii("FILTERS") => TokenKind::Keyword(Keyword::Filters),
|
||||||
|
UniCase::ascii("FLEXIBLE") => TokenKind::Keyword(Keyword::Flexible),
|
||||||
|
UniCase::ascii("FLEXI") => TokenKind::Keyword(Keyword::Flexible),
|
||||||
|
UniCase::ascii("FLEX") => TokenKind::Keyword(Keyword::Flexible),
|
||||||
|
UniCase::ascii("FOR") => TokenKind::Keyword(Keyword::For),
|
||||||
|
UniCase::ascii("FROM") => TokenKind::Keyword(Keyword::From),
|
||||||
|
UniCase::ascii("FULL") => TokenKind::Keyword(Keyword::Full),
|
||||||
|
UniCase::ascii("FUNCTION") => TokenKind::Keyword(Keyword::Function),
|
||||||
|
UniCase::ascii("GROUP") => TokenKind::Keyword(Keyword::Group),
|
||||||
|
UniCase::ascii("HIGHLIGHTS") => TokenKind::Keyword(Keyword::Highlights),
|
||||||
|
UniCase::ascii("IGNORE") => TokenKind::Keyword(Keyword::Ignore),
|
||||||
|
UniCase::ascii("INDEX") => TokenKind::Keyword(Keyword::Index),
|
||||||
|
UniCase::ascii("INFO") => TokenKind::Keyword(Keyword::Info),
|
||||||
|
UniCase::ascii("INSERT") => TokenKind::Keyword(Keyword::Insert),
|
||||||
|
UniCase::ascii("INTO") => TokenKind::Keyword(Keyword::Into),
|
||||||
|
UniCase::ascii("IF") => TokenKind::Keyword(Keyword::If),
|
||||||
|
UniCase::ascii("IS") => TokenKind::Keyword(Keyword::Is),
|
||||||
|
UniCase::ascii("KEY") => TokenKind::Keyword(Keyword::Key),
|
||||||
|
UniCase::ascii("KILL") => TokenKind::Keyword(Keyword::Kill),
|
||||||
|
UniCase::ascii("KNN") => TokenKind::Keyword(Keyword::Knn),
|
||||||
|
UniCase::ascii("LET") => TokenKind::Keyword(Keyword::Let),
|
||||||
|
UniCase::ascii("LIMIT") => TokenKind::Keyword(Keyword::Limit),
|
||||||
|
UniCase::ascii("LIVE") => TokenKind::Keyword(Keyword::Live),
|
||||||
|
UniCase::ascii("LOWERCASE") => TokenKind::Keyword(Keyword::Lowercase),
|
||||||
|
UniCase::ascii("MERGE") => TokenKind::Keyword(Keyword::Merge),
|
||||||
|
UniCase::ascii("MODEL") => TokenKind::Keyword(Keyword::Model),
|
||||||
|
UniCase::ascii("MTREE") => TokenKind::Keyword(Keyword::MTree),
|
||||||
|
UniCase::ascii("MTREE_CACHE") => TokenKind::Keyword(Keyword::MTreeCache),
|
||||||
|
UniCase::ascii("NAMESPACE") => TokenKind::Keyword(Keyword::Namespace),
|
||||||
|
UniCase::ascii("NS") => TokenKind::Keyword(Keyword::Namespace),
|
||||||
|
UniCase::ascii("NGRAM") => TokenKind::Keyword(Keyword::Ngram),
|
||||||
|
UniCase::ascii("NO") => TokenKind::Keyword(Keyword::No),
|
||||||
|
UniCase::ascii("NOINDEX") => TokenKind::Keyword(Keyword::NoIndex),
|
||||||
|
UniCase::ascii("NONE") => TokenKind::Keyword(Keyword::None),
|
||||||
|
UniCase::ascii("NULL") => TokenKind::Keyword(Keyword::Null),
|
||||||
|
UniCase::ascii("NUMERIC") => TokenKind::Keyword(Keyword::Numeric),
|
||||||
|
UniCase::ascii("OMIT") => TokenKind::Keyword(Keyword::Omit),
|
||||||
|
UniCase::ascii("ON") => TokenKind::Keyword(Keyword::On),
|
||||||
|
UniCase::ascii("ONLY") => TokenKind::Keyword(Keyword::Only),
|
||||||
|
UniCase::ascii("OPTION") => TokenKind::Keyword(Keyword::Option),
|
||||||
|
UniCase::ascii("ORDER") => TokenKind::Keyword(Keyword::Order),
|
||||||
|
UniCase::ascii("PARALLEL") => TokenKind::Keyword(Keyword::Parallel),
|
||||||
|
UniCase::ascii("PARAM") => TokenKind::Keyword(Keyword::Param),
|
||||||
|
UniCase::ascii("PASSHASH") => TokenKind::Keyword(Keyword::Passhash),
|
||||||
|
UniCase::ascii("PASSWORD") => TokenKind::Keyword(Keyword::Password),
|
||||||
|
UniCase::ascii("PATCH") => TokenKind::Keyword(Keyword::Patch),
|
||||||
|
UniCase::ascii("PERMISSIONS") => TokenKind::Keyword(Keyword::Permissions),
|
||||||
|
UniCase::ascii("POSTINGS_CACHE") => TokenKind::Keyword(Keyword::PostingsCache),
|
||||||
|
UniCase::ascii("POSTINGS_ORDER") => TokenKind::Keyword(Keyword::PostingsOrder),
|
||||||
|
UniCase::ascii("PUNCT") => TokenKind::Keyword(Keyword::Punct),
|
||||||
|
UniCase::ascii("RELATE") => TokenKind::Keyword(Keyword::Relate),
|
||||||
|
UniCase::ascii("REMOVE") => TokenKind::Keyword(Keyword::Remove),
|
||||||
|
UniCase::ascii("REPLACE") => TokenKind::Keyword(Keyword::Replace),
|
||||||
|
UniCase::ascii("RETURN") => TokenKind::Keyword(Keyword::Return),
|
||||||
|
UniCase::ascii("ROLES") => TokenKind::Keyword(Keyword::Roles),
|
||||||
|
UniCase::ascii("ROOT") => TokenKind::Keyword(Keyword::Root),
|
||||||
|
UniCase::ascii("KV") => TokenKind::Keyword(Keyword::Root),
|
||||||
|
UniCase::ascii("SCHEMAFULL") => TokenKind::Keyword(Keyword::Schemafull),
|
||||||
|
UniCase::ascii("SCHEMAFUL") => TokenKind::Keyword(Keyword::Schemafull),
|
||||||
|
UniCase::ascii("SCHEMALESS") => TokenKind::Keyword(Keyword::Schemaless),
|
||||||
|
UniCase::ascii("SCOPE") => TokenKind::Keyword(Keyword::Scope),
|
||||||
|
UniCase::ascii("SC") => TokenKind::Keyword(Keyword::Scope),
|
||||||
|
UniCase::ascii("SEARCH") => TokenKind::Keyword(Keyword::Search),
|
||||||
|
UniCase::ascii("SELECT") => TokenKind::Keyword(Keyword::Select),
|
||||||
|
UniCase::ascii("SESSION") => TokenKind::Keyword(Keyword::Session),
|
||||||
|
UniCase::ascii("SET") => TokenKind::Keyword(Keyword::Set),
|
||||||
|
UniCase::ascii("SHOW") => TokenKind::Keyword(Keyword::Show),
|
||||||
|
UniCase::ascii("SIGNIN") => TokenKind::Keyword(Keyword::Signin),
|
||||||
|
UniCase::ascii("SIGNUP") => TokenKind::Keyword(Keyword::Signup),
|
||||||
|
UniCase::ascii("SINCE") => TokenKind::Keyword(Keyword::Since),
|
||||||
|
UniCase::ascii("SLEEP") => TokenKind::Keyword(Keyword::Sleep),
|
||||||
|
UniCase::ascii("SNOWBALL") => TokenKind::Keyword(Keyword::Snowball),
|
||||||
|
UniCase::ascii("SPLIT") => TokenKind::Keyword(Keyword::Split),
|
||||||
|
UniCase::ascii("START") => TokenKind::Keyword(Keyword::Start),
|
||||||
|
UniCase::ascii("TABLE") => TokenKind::Keyword(Keyword::Table),
|
||||||
|
UniCase::ascii("TB") => TokenKind::Keyword(Keyword::Table),
|
||||||
|
UniCase::ascii("TERMS_CACHE") => TokenKind::Keyword(Keyword::TermsCache),
|
||||||
|
UniCase::ascii("TERMS_ORDER") => TokenKind::Keyword(Keyword::TermsOrder),
|
||||||
|
UniCase::ascii("THEN") => TokenKind::Keyword(Keyword::Then),
|
||||||
|
UniCase::ascii("THROW") => TokenKind::Keyword(Keyword::Throw),
|
||||||
|
UniCase::ascii("TIMEOUT") => TokenKind::Keyword(Keyword::Timeout),
|
||||||
|
UniCase::ascii("TOKENIZERS") => TokenKind::Keyword(Keyword::Tokenizers),
|
||||||
|
UniCase::ascii("TOKEN") => TokenKind::Keyword(Keyword::Token),
|
||||||
|
UniCase::ascii("TRANSACTION") => TokenKind::Keyword(Keyword::Transaction),
|
||||||
|
UniCase::ascii("true") => TokenKind::Keyword(Keyword::True),
|
||||||
|
UniCase::ascii("TYPE") => TokenKind::Keyword(Keyword::Type),
|
||||||
|
UniCase::ascii("UNIQUE") => TokenKind::Keyword(Keyword::Unique),
|
||||||
|
UniCase::ascii("UNSET") => TokenKind::Keyword(Keyword::Unset),
|
||||||
|
UniCase::ascii("UPDATE") => TokenKind::Keyword(Keyword::Update),
|
||||||
|
UniCase::ascii("UPPERCASE") => TokenKind::Keyword(Keyword::Uppercase),
|
||||||
|
UniCase::ascii("USE") => TokenKind::Keyword(Keyword::Use),
|
||||||
|
UniCase::ascii("USER") => TokenKind::Keyword(Keyword::User),
|
||||||
|
UniCase::ascii("VALUE") => TokenKind::Keyword(Keyword::Value),
|
||||||
|
UniCase::ascii("VALUES") => TokenKind::Keyword(Keyword::Values),
|
||||||
|
UniCase::ascii("VERSION") => TokenKind::Keyword(Keyword::Version),
|
||||||
|
UniCase::ascii("VS") => TokenKind::Keyword(Keyword::Vs),
|
||||||
|
UniCase::ascii("WHEN") => TokenKind::Keyword(Keyword::When),
|
||||||
|
UniCase::ascii("WHERE") => TokenKind::Keyword(Keyword::Where),
|
||||||
|
UniCase::ascii("WITH") => TokenKind::Keyword(Keyword::With),
|
||||||
|
UniCase::ascii("ALLINSIDE") => TokenKind::Keyword(Keyword::AllInside),
|
||||||
|
UniCase::ascii("ANDKW") => TokenKind::Keyword(Keyword::AndKw),
|
||||||
|
UniCase::ascii("ANYINSIDE") => TokenKind::Keyword(Keyword::AnyInside),
|
||||||
|
UniCase::ascii("INSIDE") => TokenKind::Keyword(Keyword::Inside),
|
||||||
|
UniCase::ascii("INTERSECTS") => TokenKind::Keyword(Keyword::Intersects),
|
||||||
|
UniCase::ascii("NONEINSIDE") => TokenKind::Keyword(Keyword::NoneInside),
|
||||||
|
UniCase::ascii("NOTINSIDE") => TokenKind::Keyword(Keyword::NotInside),
|
||||||
|
UniCase::ascii("OR") => TokenKind::Keyword(Keyword::OrKw),
|
||||||
|
UniCase::ascii("OUTSIDE") => TokenKind::Keyword(Keyword::Outside),
|
||||||
|
UniCase::ascii("NOT") => TokenKind::Keyword(Keyword::Not),
|
||||||
|
UniCase::ascii("AND") => TokenKind::Keyword(Keyword::And),
|
||||||
|
UniCase::ascii("COLLATE") => TokenKind::Keyword(Keyword::Collate),
|
||||||
|
UniCase::ascii("CONTAINSALL") => TokenKind::Keyword(Keyword::ContainsAll),
|
||||||
|
UniCase::ascii("CONTAINSANY") => TokenKind::Keyword(Keyword::ContainsAny),
|
||||||
|
UniCase::ascii("CONTAINSNONE") => TokenKind::Keyword(Keyword::ContainsNone),
|
||||||
|
UniCase::ascii("CONTAINSNOT") => TokenKind::Keyword(Keyword::ContainsNot),
|
||||||
|
UniCase::ascii("CONTAINS") => TokenKind::Keyword(Keyword::Contains),
|
||||||
|
UniCase::ascii("IN") => TokenKind::Keyword(Keyword::In),
|
||||||
|
|
||||||
|
UniCase::ascii("ANY") => TokenKind::Keyword(Keyword::Any),
|
||||||
|
UniCase::ascii("ARRAY") => TokenKind::Keyword(Keyword::Array),
|
||||||
|
UniCase::ascii("GEOMETRY") => TokenKind::Keyword(Keyword::Geometry),
|
||||||
|
UniCase::ascii("RECORD") => TokenKind::Keyword(Keyword::Record),
|
||||||
|
UniCase::ascii("FUTURE") => TokenKind::Keyword(Keyword::Future),
|
||||||
|
UniCase::ascii("BOOL") => TokenKind::Keyword(Keyword::Bool),
|
||||||
|
UniCase::ascii("BYTES") => TokenKind::Keyword(Keyword::Bytes),
|
||||||
|
UniCase::ascii("DATETIME") => TokenKind::Keyword(Keyword::Datetime),
|
||||||
|
UniCase::ascii("DECIMAL") => TokenKind::Keyword(Keyword::Decimal),
|
||||||
|
UniCase::ascii("DURATION") => TokenKind::Keyword(Keyword::Duration),
|
||||||
|
UniCase::ascii("FLOAT") => TokenKind::Keyword(Keyword::Float),
|
||||||
|
UniCase::ascii("fn") => TokenKind::Keyword(Keyword::Fn),
|
||||||
|
UniCase::ascii("ml") => TokenKind::Keyword(Keyword::ML),
|
||||||
|
UniCase::ascii("INT") => TokenKind::Keyword(Keyword::Int),
|
||||||
|
UniCase::ascii("NUMBER") => TokenKind::Keyword(Keyword::Number),
|
||||||
|
UniCase::ascii("OBJECT") => TokenKind::Keyword(Keyword::Object),
|
||||||
|
UniCase::ascii("STRING") => TokenKind::Keyword(Keyword::String),
|
||||||
|
UniCase::ascii("UUID") => TokenKind::Keyword(Keyword::Uuid),
|
||||||
|
UniCase::ascii("ULID") => TokenKind::Keyword(Keyword::Ulid),
|
||||||
|
UniCase::ascii("RAND") => TokenKind::Keyword(Keyword::Rand),
|
||||||
|
UniCase::ascii("FEATURE") => TokenKind::Keyword(Keyword::Feature),
|
||||||
|
UniCase::ascii("LINE") => TokenKind::Keyword(Keyword::Line),
|
||||||
|
UniCase::ascii("POINT") => TokenKind::Keyword(Keyword::Point),
|
||||||
|
UniCase::ascii("POLYGON") => TokenKind::Keyword(Keyword::Polygon),
|
||||||
|
UniCase::ascii("MULTIPOINT") => TokenKind::Keyword(Keyword::MultiPoint),
|
||||||
|
UniCase::ascii("MULTILINE") => TokenKind::Keyword(Keyword::MultiLine),
|
||||||
|
UniCase::ascii("MULTIPOLYGON") => TokenKind::Keyword(Keyword::MultiPolygon),
|
||||||
|
UniCase::ascii("COLLECTION") => TokenKind::Keyword(Keyword::Collection),
|
||||||
|
|
||||||
|
// Languages
|
||||||
|
UniCase::ascii("ARABIC") => TokenKind::Language(Language::Arabic),
|
||||||
|
UniCase::ascii("ARA") => TokenKind::Language(Language::Arabic),
|
||||||
|
UniCase::ascii("AR") => TokenKind::Language(Language::Arabic),
|
||||||
|
UniCase::ascii("DANISH") => TokenKind::Language(Language::Danish),
|
||||||
|
UniCase::ascii("DAN") => TokenKind::Language(Language::Danish),
|
||||||
|
UniCase::ascii("DA") => TokenKind::Language(Language::Danish),
|
||||||
|
UniCase::ascii("DUTCH") => TokenKind::Language(Language::Dutch),
|
||||||
|
UniCase::ascii("NLD") => TokenKind::Language(Language::Dutch),
|
||||||
|
UniCase::ascii("NL") => TokenKind::Language(Language::Dutch),
|
||||||
|
UniCase::ascii("ENGLISH") => TokenKind::Language(Language::English),
|
||||||
|
UniCase::ascii("ENG") => TokenKind::Language(Language::English),
|
||||||
|
UniCase::ascii("EN") => TokenKind::Language(Language::English),
|
||||||
|
UniCase::ascii("FRENCH") => TokenKind::Language(Language::French),
|
||||||
|
UniCase::ascii("FRA") => TokenKind::Language(Language::French),
|
||||||
|
UniCase::ascii("FR") => TokenKind::Language(Language::French),
|
||||||
|
UniCase::ascii("GERMAN") => TokenKind::Language(Language::German),
|
||||||
|
UniCase::ascii("DEU") => TokenKind::Language(Language::German),
|
||||||
|
UniCase::ascii("DE") => TokenKind::Language(Language::German),
|
||||||
|
UniCase::ascii("GREEK") => TokenKind::Language(Language::Greek),
|
||||||
|
UniCase::ascii("ELL") => TokenKind::Language(Language::Greek),
|
||||||
|
UniCase::ascii("EL") => TokenKind::Language(Language::Greek),
|
||||||
|
UniCase::ascii("HUNGARIAN") => TokenKind::Language(Language::Hungarian),
|
||||||
|
UniCase::ascii("HUN") => TokenKind::Language(Language::Hungarian),
|
||||||
|
UniCase::ascii("HU") => TokenKind::Language(Language::Hungarian),
|
||||||
|
UniCase::ascii("ITALIAN") => TokenKind::Language(Language::Italian),
|
||||||
|
UniCase::ascii("ITA") => TokenKind::Language(Language::Italian),
|
||||||
|
UniCase::ascii("IT") => TokenKind::Language(Language::Italian),
|
||||||
|
UniCase::ascii("NORWEGIAN") => TokenKind::Language(Language::Norwegian),
|
||||||
|
UniCase::ascii("NOR") => TokenKind::Language(Language::Norwegian),
|
||||||
|
UniCase::ascii("PORTUGUESE") => TokenKind::Language(Language::Portuguese),
|
||||||
|
UniCase::ascii("POR") => TokenKind::Language(Language::Portuguese),
|
||||||
|
UniCase::ascii("PT") => TokenKind::Language(Language::Portuguese),
|
||||||
|
UniCase::ascii("ROMANIAN") => TokenKind::Language(Language::Romanian),
|
||||||
|
UniCase::ascii("RON") => TokenKind::Language(Language::Romanian),
|
||||||
|
UniCase::ascii("RO") => TokenKind::Language(Language::Romanian),
|
||||||
|
UniCase::ascii("RUSSIAN") => TokenKind::Language(Language::Russian),
|
||||||
|
UniCase::ascii("RUS") => TokenKind::Language(Language::Russian),
|
||||||
|
UniCase::ascii("RU") => TokenKind::Language(Language::Russian),
|
||||||
|
UniCase::ascii("SPANISH") => TokenKind::Language(Language::Spanish),
|
||||||
|
UniCase::ascii("SPA") => TokenKind::Language(Language::Spanish),
|
||||||
|
UniCase::ascii("ES") => TokenKind::Language(Language::Spanish),
|
||||||
|
UniCase::ascii("SWEDISH") => TokenKind::Language(Language::Swedish),
|
||||||
|
UniCase::ascii("SWE") => TokenKind::Language(Language::Swedish),
|
||||||
|
UniCase::ascii("SV") => TokenKind::Language(Language::Swedish),
|
||||||
|
UniCase::ascii("TAMIL") => TokenKind::Language(Language::Tamil),
|
||||||
|
UniCase::ascii("TAM") => TokenKind::Language(Language::Tamil),
|
||||||
|
UniCase::ascii("TA") => TokenKind::Language(Language::Tamil),
|
||||||
|
UniCase::ascii("TURKISH") => TokenKind::Language(Language::Turkish),
|
||||||
|
UniCase::ascii("TUR") => TokenKind::Language(Language::Turkish),
|
||||||
|
UniCase::ascii("TR") => TokenKind::Language(Language::Turkish),
|
||||||
|
|
||||||
|
// Algorithms
|
||||||
|
UniCase::ascii("EDDSA") => TokenKind::Algorithm(Algorithm::EdDSA),
|
||||||
|
UniCase::ascii("ES256") => TokenKind::Algorithm(Algorithm::Es256),
|
||||||
|
UniCase::ascii("ES384") => TokenKind::Algorithm(Algorithm::Es384),
|
||||||
|
UniCase::ascii("ES512") => TokenKind::Algorithm(Algorithm::Es512),
|
||||||
|
UniCase::ascii("HS256") => TokenKind::Algorithm(Algorithm::Hs256),
|
||||||
|
UniCase::ascii("HS384") => TokenKind::Algorithm(Algorithm::Hs384),
|
||||||
|
UniCase::ascii("HS512") => TokenKind::Algorithm(Algorithm::Hs512),
|
||||||
|
UniCase::ascii("PS256") => TokenKind::Algorithm(Algorithm::Ps256),
|
||||||
|
UniCase::ascii("PS384") => TokenKind::Algorithm(Algorithm::Ps384),
|
||||||
|
UniCase::ascii("PS512") => TokenKind::Algorithm(Algorithm::Ps512),
|
||||||
|
UniCase::ascii("RS256") => TokenKind::Algorithm(Algorithm::Rs256),
|
||||||
|
UniCase::ascii("RS384") => TokenKind::Algorithm(Algorithm::Rs384),
|
||||||
|
UniCase::ascii("RS512") => TokenKind::Algorithm(Algorithm::Rs512),
|
||||||
|
UniCase::ascii("JWKS") => TokenKind::Algorithm(Algorithm::Jwks),
|
||||||
|
|
||||||
|
// Distance
|
||||||
|
UniCase::ascii("EUCLIDEAN") => TokenKind::Distance(DistanceKind::Euclidean),
|
||||||
|
UniCase::ascii("MANHATTAN") => TokenKind::Distance(DistanceKind::Manhattan),
|
||||||
|
UniCase::ascii("HAMMING") => TokenKind::Distance(DistanceKind::Hamming),
|
||||||
|
UniCase::ascii("MINKOWSKI") => TokenKind::Distance(DistanceKind::Minkowski),
|
||||||
|
};
|
417
lib/src/syn/v2/lexer/mod.rs
Normal file
417
lib/src/syn/v2/lexer/mod.rs
Normal file
|
@ -0,0 +1,417 @@
|
||||||
|
use crate::{
|
||||||
|
sql::{Datetime, Duration, Regex, Uuid},
|
||||||
|
syn::v2::token::{Span, Token, TokenKind},
|
||||||
|
};
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
mod byte;
|
||||||
|
mod char;
|
||||||
|
mod datetime;
|
||||||
|
mod duration;
|
||||||
|
mod ident;
|
||||||
|
mod js;
|
||||||
|
mod keywords;
|
||||||
|
mod number;
|
||||||
|
mod reader;
|
||||||
|
mod strand;
|
||||||
|
mod unicode;
|
||||||
|
mod uuid;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test;
|
||||||
|
|
||||||
|
pub use reader::{BytesReader, CharError};
|
||||||
|
|
||||||
|
/// A error returned by the lexer when an invalid token is encountered.
|
||||||
|
///
|
||||||
|
/// Can be retrieved from the `Lexer::error` field whenever it returned a [`TokenKind::Invalid`]
|
||||||
|
/// token.
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
#[error("Lexer encountered unexpected character {0:?}")]
|
||||||
|
UnexpectedCharacter(char),
|
||||||
|
#[error("invalid escape character {0:?}")]
|
||||||
|
InvalidEscapeCharacter(char),
|
||||||
|
#[error("Lexer encountered unexpected end of source characters")]
|
||||||
|
UnexpectedEof,
|
||||||
|
#[error("source was not valid utf-8")]
|
||||||
|
InvalidUtf8,
|
||||||
|
#[error("expected next character to be '{0}'")]
|
||||||
|
ExpectedEnd(char),
|
||||||
|
#[error("failed to lex date-time, {0}")]
|
||||||
|
DateTime(#[from] datetime::Error),
|
||||||
|
#[error("failed to lex uuid, {0}")]
|
||||||
|
Uuid(#[from] uuid::Error),
|
||||||
|
#[error("failed to lex duration, {0}")]
|
||||||
|
Duration(#[from] duration::Error),
|
||||||
|
#[error("failed to lex number, {0}")]
|
||||||
|
Number(#[from] number::Error),
|
||||||
|
#[error("failed to parse regex, {0}")]
|
||||||
|
Regex(regex::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<CharError> for Error {
|
||||||
|
fn from(value: CharError) -> Self {
|
||||||
|
match value {
|
||||||
|
CharError::Eof => Self::UnexpectedEof,
|
||||||
|
CharError::Unicode => Self::InvalidUtf8,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The SurrealQL lexer.
|
||||||
|
/// Takes a slice of bytes and turns it into tokens. The lexer is designed with possible invalid utf-8
|
||||||
|
/// in mind and will handle bytes which are invalid utf-8 with an error.
|
||||||
|
///
|
||||||
|
/// The lexer generates tokens lazily. whenever [`Lexer::next_token`] is called on the lexer it will
|
||||||
|
/// try to lex the next bytes in the give source as a token. The lexer always returns a token, even
|
||||||
|
/// if the source contains invalid tokens or as at the end of the source. In both cases a specific
|
||||||
|
/// type of token is returned.
|
||||||
|
///
|
||||||
|
/// Note that SurrealQL syntax cannot be lexed in advance. For example, record strings and regexes,
|
||||||
|
/// both cannot be parsed correctly without knowledge of previous tokens as they are both ambigious
|
||||||
|
/// with other tokens.
|
||||||
|
pub struct Lexer<'a> {
|
||||||
|
/// The reader for reading the source bytes.
|
||||||
|
pub reader: BytesReader<'a>,
|
||||||
|
/// The one past the last character of the previous token.
|
||||||
|
last_offset: u32,
|
||||||
|
/// The span of whitespace if it was read between two tokens.
|
||||||
|
whitespace_span: Option<Span>,
|
||||||
|
/// A buffer used to build the value of tokens which can't be read straight from the source.
|
||||||
|
/// like for example strings with escape characters.
|
||||||
|
scratch: String,
|
||||||
|
|
||||||
|
// below are a collection of storage for values produced by tokens.
|
||||||
|
// For performance reasons we wan't to keep the tokens as small as possible.
|
||||||
|
// As only some tokens have an additional value associated with them we don't store that value
|
||||||
|
// in the token itself but, instead, in the lexer ensureing a smaller size for each individual
|
||||||
|
// token.
|
||||||
|
//
|
||||||
|
// This does result in some additional state to keep track of as peeking a token while a token
|
||||||
|
// value is still in the variables below will overwrite the previous value.
|
||||||
|
//
|
||||||
|
// Both numbers and actual strings are stored as a string value.
|
||||||
|
// The parser can, depending on position in syntax, decide to parse a number in a variety of
|
||||||
|
// different precisions or formats. The only way to support all is to delay parsing the
|
||||||
|
// actual number value to when the parser can decide on a format.
|
||||||
|
pub string: Option<String>,
|
||||||
|
pub duration: Option<Duration>,
|
||||||
|
pub datetime: Option<Datetime>,
|
||||||
|
pub regex: Option<Regex>,
|
||||||
|
pub uuid: Option<Uuid>,
|
||||||
|
pub error: Option<Error>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Lexer<'a> {
|
||||||
|
/// Create a new lexer.
|
||||||
|
/// # Panic
|
||||||
|
/// This function will panic if the source is longer then u32::MAX.
|
||||||
|
pub fn new(source: &'a [u8]) -> Lexer<'a> {
|
||||||
|
let reader = BytesReader::new(source);
|
||||||
|
assert!(reader.len() <= u32::MAX as usize, "source code exceeded maximum size");
|
||||||
|
Lexer {
|
||||||
|
reader,
|
||||||
|
last_offset: 0,
|
||||||
|
whitespace_span: None,
|
||||||
|
scratch: String::new(),
|
||||||
|
string: None,
|
||||||
|
datetime: None,
|
||||||
|
duration: None,
|
||||||
|
regex: None,
|
||||||
|
uuid: None,
|
||||||
|
error: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reset the state of the lexer.
|
||||||
|
///
|
||||||
|
/// Doesn't change the state of the reader.
|
||||||
|
pub fn reset(&mut self) {
|
||||||
|
self.last_offset = 0;
|
||||||
|
self.scratch.clear();
|
||||||
|
self.whitespace_span = None;
|
||||||
|
self.string = None;
|
||||||
|
self.datetime = None;
|
||||||
|
self.duration = None;
|
||||||
|
self.regex = None;
|
||||||
|
self.uuid = None;
|
||||||
|
self.error = None;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Change the used source from the lexer to a new buffer.
|
||||||
|
///
|
||||||
|
/// Usefull for reusing buffers.
|
||||||
|
///
|
||||||
|
/// # Panic
|
||||||
|
/// This function will panic if the source is longer then u32::MAX.
|
||||||
|
pub fn change_source<'b>(self, source: &'b [u8]) -> Lexer<'b> {
|
||||||
|
let reader = BytesReader::<'b>::new(source);
|
||||||
|
assert!(reader.len() <= u32::MAX as usize, "source code exceeded maximum size");
|
||||||
|
Lexer {
|
||||||
|
reader,
|
||||||
|
last_offset: 0,
|
||||||
|
whitespace_span: None,
|
||||||
|
scratch: self.scratch,
|
||||||
|
string: self.string,
|
||||||
|
datetime: self.datetime,
|
||||||
|
duration: self.duration,
|
||||||
|
regex: self.regex,
|
||||||
|
uuid: self.uuid,
|
||||||
|
error: self.error,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// return the whitespace of the last token buffered, either peeked or poped.
|
||||||
|
pub fn whitespace_span(&self) -> Option<Span> {
|
||||||
|
self.whitespace_span
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Used for seting the span of whitespace between tokens. Will extend the current whitespace
|
||||||
|
/// if there already is one.
|
||||||
|
fn set_whitespace_span(&mut self, span: Span) {
|
||||||
|
if let Some(existing) = self.whitespace_span.as_mut() {
|
||||||
|
*existing = existing.covers(span);
|
||||||
|
} else {
|
||||||
|
self.whitespace_span = Some(span);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the next token, driving the lexer forward.
|
||||||
|
///
|
||||||
|
/// If the lexer is at the end the source it will always return the Eof token.
|
||||||
|
pub fn next_token(&mut self) -> Token {
|
||||||
|
self.whitespace_span = None;
|
||||||
|
self.next_token_inner()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_token_inner(&mut self) -> Token {
|
||||||
|
let Some(byte) = self.reader.next() else {
|
||||||
|
return self.eof_token();
|
||||||
|
};
|
||||||
|
if byte.is_ascii() {
|
||||||
|
self.lex_ascii(byte)
|
||||||
|
} else {
|
||||||
|
self.lex_char(byte)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates the eof token.
|
||||||
|
///
|
||||||
|
/// An eof token has tokenkind Eof and an span which points to the last character of the
|
||||||
|
/// source.
|
||||||
|
fn eof_token(&mut self) -> Token {
|
||||||
|
Token {
|
||||||
|
kind: TokenKind::Eof,
|
||||||
|
span: Span {
|
||||||
|
offset: self.last_offset.saturating_sub(1),
|
||||||
|
len: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Skip the last consumed bytes in the reader.
|
||||||
|
///
|
||||||
|
/// The bytes consumed before this point won't be part of the span.
|
||||||
|
fn skip_offset(&mut self) {
|
||||||
|
self.last_offset = self.reader.offset() as u32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return an invalid token.
|
||||||
|
fn invalid_token(&mut self, error: Error) -> Token {
|
||||||
|
self.error = Some(error);
|
||||||
|
self.finish_token(TokenKind::Invalid)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the span for the current token being lexed.
|
||||||
|
pub fn current_span(&self) -> Span {
|
||||||
|
// We make sure that the source is no longer then u32::MAX so this can't overflow.
|
||||||
|
let new_offset = self.reader.offset() as u32;
|
||||||
|
let len = new_offset - self.last_offset;
|
||||||
|
Span {
|
||||||
|
offset: self.last_offset,
|
||||||
|
len,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Builds a token from an TokenKind.
|
||||||
|
///
|
||||||
|
/// Attaches a span to the token and returns, updates the new offset.
|
||||||
|
fn finish_token(&mut self, kind: TokenKind) -> Token {
|
||||||
|
let span = self.current_span();
|
||||||
|
// We make sure that the source is no longer then u32::MAX so this can't overflow.
|
||||||
|
self.last_offset = self.reader.offset() as u32;
|
||||||
|
Token {
|
||||||
|
kind,
|
||||||
|
span,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Moves the lexer state back to before the give span.
|
||||||
|
///
|
||||||
|
/// # Warning
|
||||||
|
/// Moving the lexer into a state where the next byte is within a multibyte character will
|
||||||
|
/// result in spurious errors.
|
||||||
|
pub fn backup_before(&mut self, span: Span) {
|
||||||
|
self.reader.backup(span.offset as usize);
|
||||||
|
self.last_offset = span.offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Moves the lexer state to after the give span.
|
||||||
|
///
|
||||||
|
/// # Warning
|
||||||
|
/// Moving the lexer into a state where the next byte is within a multibyte character will
|
||||||
|
/// result in spurious errors.
|
||||||
|
pub fn backup_after(&mut self, span: Span) {
|
||||||
|
let offset = span.offset + span.len;
|
||||||
|
self.reader.backup(offset as usize);
|
||||||
|
self.last_offset = offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Checks if the next byte is the given byte, if it is it consumes the byte and returns true.
|
||||||
|
/// Otherwise returns false.
|
||||||
|
///
|
||||||
|
/// Also returns false if there is no next character.
|
||||||
|
pub fn eat(&mut self, byte: u8) -> bool {
|
||||||
|
if self.reader.peek() == Some(byte) {
|
||||||
|
self.reader.next();
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Checks if the closure returns true when given the next byte, if it is it consumes the byte
|
||||||
|
/// and returns true. Otherwise returns false.
|
||||||
|
///
|
||||||
|
/// Also returns false if there is no next character.
|
||||||
|
pub fn eat_when<F: FnOnce(u8) -> bool>(&mut self, f: F) -> bool {
|
||||||
|
let Some(x) = self.reader.peek() else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
if f(x) {
|
||||||
|
self.reader.next();
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex a single `"` character with possible leading whitespace.
|
||||||
|
///
|
||||||
|
/// Used for parsing record strings.
|
||||||
|
pub fn lex_record_string_close(&mut self) -> Token {
|
||||||
|
loop {
|
||||||
|
let Some(byte) = self.reader.next() else {
|
||||||
|
return self.invalid_token(Error::UnexpectedEof);
|
||||||
|
};
|
||||||
|
match byte {
|
||||||
|
unicode::byte::CR
|
||||||
|
| unicode::byte::FF
|
||||||
|
| unicode::byte::LF
|
||||||
|
| unicode::byte::SP
|
||||||
|
| unicode::byte::VT
|
||||||
|
| unicode::byte::TAB => {
|
||||||
|
self.eat_whitespace();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
b'"' => {
|
||||||
|
return self.finish_token(TokenKind::CloseRecordString {
|
||||||
|
double: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
b'\'' => {
|
||||||
|
return self.finish_token(TokenKind::CloseRecordString {
|
||||||
|
double: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
b'-' => match self.reader.next() {
|
||||||
|
Some(b'-') => {
|
||||||
|
self.eat_single_line_comment();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Some(x) => match self.reader.convert_to_char(x) {
|
||||||
|
Ok(c) => return self.invalid_token(Error::UnexpectedCharacter(c)),
|
||||||
|
Err(e) => return self.invalid_token(e.into()),
|
||||||
|
},
|
||||||
|
None => return self.invalid_token(Error::UnexpectedEof),
|
||||||
|
},
|
||||||
|
b'/' => match self.reader.next() {
|
||||||
|
Some(b'*') => {
|
||||||
|
if let Err(e) = self.eat_multi_line_comment() {
|
||||||
|
return self.invalid_token(e);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Some(b'/') => {
|
||||||
|
self.eat_single_line_comment();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Some(x) => match self.reader.convert_to_char(x) {
|
||||||
|
Ok(c) => return self.invalid_token(Error::UnexpectedCharacter(c)),
|
||||||
|
Err(e) => return self.invalid_token(e.into()),
|
||||||
|
},
|
||||||
|
None => return self.invalid_token(Error::UnexpectedEof),
|
||||||
|
},
|
||||||
|
b'#' => {
|
||||||
|
self.eat_single_line_comment();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
x => match self.reader.convert_to_char(x) {
|
||||||
|
Ok(c) => return self.invalid_token(Error::UnexpectedCharacter(c)),
|
||||||
|
Err(e) => return self.invalid_token(e.into()),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex only a datetime without enclosing delimiters.
|
||||||
|
///
|
||||||
|
/// Used for reusing lexer lexing code for parsing datetimes. Should not be called during
|
||||||
|
/// normal parsing.
|
||||||
|
pub fn lex_only_datetime(&mut self) -> Result<Datetime, Error> {
|
||||||
|
self.lex_datetime_raw_err().map_err(Error::DateTime)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex only a duration.
|
||||||
|
///
|
||||||
|
/// Used for reusing lexer lexing code for parsing durations. Should not be used during normal
|
||||||
|
/// parsing.
|
||||||
|
pub fn lex_only_duration(&mut self) -> Result<Duration, Error> {
|
||||||
|
match self.reader.next() {
|
||||||
|
Some(x @ b'0'..=b'9') => {
|
||||||
|
self.scratch.push(x as char);
|
||||||
|
while let Some(x @ b'0'..=b'9') = self.reader.peek() {
|
||||||
|
self.reader.next();
|
||||||
|
self.scratch.push(x as char);
|
||||||
|
}
|
||||||
|
self.lex_duration_err().map_err(Error::Duration)
|
||||||
|
}
|
||||||
|
Some(x) => {
|
||||||
|
let char = self.reader.convert_to_char(x)?;
|
||||||
|
Err(Error::UnexpectedCharacter(char))
|
||||||
|
}
|
||||||
|
None => Err(Error::UnexpectedEof),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex only a UUID.
|
||||||
|
///
|
||||||
|
/// Used for reusing lexer lexing code for parsing UUID's. Should not be used during normal
|
||||||
|
/// parsing.
|
||||||
|
pub fn lex_only_uuid(&mut self) -> Result<Uuid, Error> {
|
||||||
|
Ok(self.lex_uuid_err_inner()?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for Lexer<'_> {
|
||||||
|
type Item = Token;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
let token = self.next_token();
|
||||||
|
if token.is_eof() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(token)
|
||||||
|
}
|
||||||
|
}
|
257
lib/src/syn/v2/lexer/number.rs
Normal file
257
lib/src/syn/v2/lexer/number.rs
Normal file
|
@ -0,0 +1,257 @@
|
||||||
|
use crate::syn::v2::{
|
||||||
|
lexer::{unicode::U8Ext, Error as LexError, Lexer},
|
||||||
|
token::{NumberKind, Token, TokenKind},
|
||||||
|
};
|
||||||
|
use std::mem;
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
#[error("invalid number suffix")]
|
||||||
|
InvalidSuffix,
|
||||||
|
#[error("expected atleast a single digit in the exponent")]
|
||||||
|
DigitExpectedExponent,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Lexer<'_> {
|
||||||
|
/// Lex only an integer.
|
||||||
|
/// Use when a number can be followed immediatly by a `.` like in a model version.
|
||||||
|
pub fn lex_only_integer(&mut self) -> Token {
|
||||||
|
match self.lex_only_integer_err() {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(e) => self.invalid_token(LexError::Number(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lex_only_integer_err(&mut self) -> Result<Token, Error> {
|
||||||
|
let Some(next) = self.reader.peek() else {
|
||||||
|
return Ok(self.eof_token());
|
||||||
|
};
|
||||||
|
|
||||||
|
// not a number, return a different token kind, for error reporting.
|
||||||
|
if !next.is_ascii_digit() {
|
||||||
|
return Ok(self.next_token());
|
||||||
|
}
|
||||||
|
|
||||||
|
self.scratch.push(next as char);
|
||||||
|
self.reader.next();
|
||||||
|
|
||||||
|
// eat all the ascii digits
|
||||||
|
while let Some(x) = self.reader.peek() {
|
||||||
|
if x == b'_' {
|
||||||
|
self.reader.next();
|
||||||
|
} else if !x.is_ascii_digit() {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
self.scratch.push(x as char);
|
||||||
|
self.reader.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// test for a suffix.
|
||||||
|
match self.reader.peek() {
|
||||||
|
Some(b'd' | b'f') => {
|
||||||
|
// not an integer but parse anyway for error reporting.
|
||||||
|
return self.lex_suffix(true);
|
||||||
|
}
|
||||||
|
Some(x) if x.is_ascii_alphabetic() => return Err(self.invalid_suffix()),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn lex_number(&mut self, start: u8) -> Token {
|
||||||
|
match self.lex_number_err(start) {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(e) => self.invalid_token(LexError::Number(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// Lex a number.
|
||||||
|
///
|
||||||
|
/// Expects the digit which started the number as the start argument.
|
||||||
|
pub fn lex_number_err(&mut self, start: u8) -> Result<Token, Error> {
|
||||||
|
debug_assert!(start.is_ascii_digit());
|
||||||
|
debug_assert_eq!(self.scratch, "");
|
||||||
|
self.scratch.push(start as char);
|
||||||
|
loop {
|
||||||
|
let Some(x) = self.reader.peek() else {
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)));
|
||||||
|
};
|
||||||
|
match x {
|
||||||
|
b'0'..=b'9' => {
|
||||||
|
// next digits.
|
||||||
|
self.reader.next();
|
||||||
|
self.scratch.push(x as char);
|
||||||
|
}
|
||||||
|
b'.' => {
|
||||||
|
// mantissa
|
||||||
|
let backup = self.reader.offset();
|
||||||
|
self.reader.next();
|
||||||
|
let next = self.reader.peek();
|
||||||
|
if let Some(b'0'..=b'9') = next {
|
||||||
|
self.scratch.push('.');
|
||||||
|
return self.lex_mantissa();
|
||||||
|
} else {
|
||||||
|
// indexing a number
|
||||||
|
self.reader.backup(backup);
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
b'f' | b'd' => return self.lex_suffix(true),
|
||||||
|
// Oxc2 is the start byte of 'µ'
|
||||||
|
0xc2 | b'n' | b'u' | b'm' | b'h' | b'w' | b'y' | b's' => {
|
||||||
|
// duration suffix, switch to lexing duration.
|
||||||
|
return Ok(self.lex_duration());
|
||||||
|
}
|
||||||
|
b'_' => {
|
||||||
|
self.reader.next();
|
||||||
|
}
|
||||||
|
b'a'..=b'z' | b'A'..=b'Z' => {
|
||||||
|
return Err(self.invalid_suffix());
|
||||||
|
// invalid token, unexpected identifier character immediatly after number.
|
||||||
|
// Eat all remaining identifier like characters.
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn invalid_suffix(&mut self) -> Error {
|
||||||
|
// eat the whole suffix.
|
||||||
|
while let Some(x) = self.reader.peek() {
|
||||||
|
if !x.is_ascii_alphanumeric() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
self.reader.next();
|
||||||
|
}
|
||||||
|
self.scratch.clear();
|
||||||
|
Error::InvalidSuffix
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex a number suffix, either 'f' or 'dec'.
|
||||||
|
fn lex_suffix(&mut self, can_be_duration: bool) -> Result<Token, Error> {
|
||||||
|
match self.reader.peek() {
|
||||||
|
Some(b'f') => {
|
||||||
|
// float suffix
|
||||||
|
self.reader.next();
|
||||||
|
if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) {
|
||||||
|
Err(self.invalid_suffix())
|
||||||
|
} else {
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
Ok(self.finish_token(TokenKind::Number(NumberKind::Float)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some(b'd') => {
|
||||||
|
// decimal suffix
|
||||||
|
self.reader.next();
|
||||||
|
let checkpoint = self.reader.offset();
|
||||||
|
if !self.eat(b'e') {
|
||||||
|
if can_be_duration {
|
||||||
|
self.reader.backup(checkpoint - 1);
|
||||||
|
return Ok(self.lex_duration());
|
||||||
|
} else {
|
||||||
|
return Err(self.invalid_suffix());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.eat(b'c') {
|
||||||
|
return Err(self.invalid_suffix());
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) {
|
||||||
|
Err(self.invalid_suffix())
|
||||||
|
} else {
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
Ok(self.finish_token(TokenKind::Number(NumberKind::Decimal)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lexes the mantissa of a number, i.e. `.8` in `1.8`
|
||||||
|
pub fn lex_mantissa(&mut self) -> Result<Token, Error> {
|
||||||
|
loop {
|
||||||
|
// lex_number already checks if there exists a digit after the dot.
|
||||||
|
// So this will never fail the first iteration of the loop.
|
||||||
|
let Some(x) = self.reader.peek() else {
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
return Ok(self.finish_token(TokenKind::Number(NumberKind::Mantissa)));
|
||||||
|
};
|
||||||
|
match x {
|
||||||
|
b'0'..=b'9' => {
|
||||||
|
// next digit.
|
||||||
|
self.reader.next();
|
||||||
|
self.scratch.push(x as char);
|
||||||
|
}
|
||||||
|
b'e' | b'E' => {
|
||||||
|
// scientific notation
|
||||||
|
self.reader.next();
|
||||||
|
self.scratch.push('e');
|
||||||
|
return self.lex_exponent(true);
|
||||||
|
}
|
||||||
|
b'_' => {
|
||||||
|
self.reader.next();
|
||||||
|
}
|
||||||
|
b'f' | b'd' => return self.lex_suffix(false),
|
||||||
|
b'a'..=b'z' | b'A'..=b'Z' => {
|
||||||
|
// invalid token, random identifier characters immediately after number.
|
||||||
|
self.scratch.clear();
|
||||||
|
return Err(Error::InvalidSuffix);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
return Ok(self.finish_token(TokenKind::Number(NumberKind::Mantissa)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lexes the exponent of a number, i.e. `e10` in `1.1e10`;
|
||||||
|
fn lex_exponent(&mut self, had_mantissa: bool) -> Result<Token, Error> {
|
||||||
|
let mut atleast_one = false;
|
||||||
|
match self.reader.peek() {
|
||||||
|
Some(b'-' | b'+') => {}
|
||||||
|
Some(b'0'..=b'9') => {
|
||||||
|
atleast_one = true;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// random other character, expected atleast one digit.
|
||||||
|
return Err(Error::DigitExpectedExponent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.reader.next();
|
||||||
|
loop {
|
||||||
|
match self.reader.peek() {
|
||||||
|
Some(x @ b'0'..=b'9') => {
|
||||||
|
self.reader.next();
|
||||||
|
self.scratch.push(x as char);
|
||||||
|
}
|
||||||
|
Some(b'_') => {
|
||||||
|
self.reader.next();
|
||||||
|
}
|
||||||
|
Some(b'f' | b'd') => return self.lex_suffix(false),
|
||||||
|
_ => {
|
||||||
|
if atleast_one {
|
||||||
|
let kind = if had_mantissa {
|
||||||
|
NumberKind::MantissaExponent
|
||||||
|
} else {
|
||||||
|
NumberKind::Exponent
|
||||||
|
};
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
return Ok(self.finish_token(TokenKind::Number(kind)));
|
||||||
|
} else {
|
||||||
|
return Err(Error::DigitExpectedExponent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
157
lib/src/syn/v2/lexer/reader.rs
Normal file
157
lib/src/syn/v2/lexer/reader.rs
Normal file
|
@ -0,0 +1,157 @@
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
use crate::syn::v2::token::Span;
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum CharError {
|
||||||
|
#[error("found eof inside multi byte character")]
|
||||||
|
Eof,
|
||||||
|
#[error("string is not valid utf-8")]
|
||||||
|
Unicode,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct BytesReader<'a> {
|
||||||
|
data: &'a [u8],
|
||||||
|
current: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for BytesReader<'_> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
f.debug_struct("BytesReader")
|
||||||
|
.field("used", &self.used())
|
||||||
|
.field("remaining", &self.remaining())
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> BytesReader<'a> {
|
||||||
|
pub fn new(slice: &'a [u8]) -> Self {
|
||||||
|
BytesReader {
|
||||||
|
data: slice,
|
||||||
|
current: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn full(&self) -> &'a [u8] {
|
||||||
|
self.data
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn used(&self) -> &'a [u8] {
|
||||||
|
&self.data[..self.current]
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn remaining(&self) -> &'a [u8] {
|
||||||
|
&self.data[self.current..]
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.remaining().len()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn offset(&self) -> usize {
|
||||||
|
self.current
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn backup(&mut self, offset: usize) {
|
||||||
|
assert!(offset <= self.offset());
|
||||||
|
self.current = offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.remaining().is_empty()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn peek(&self) -> Option<u8> {
|
||||||
|
self.remaining().get(0).copied()
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
|
pub fn span(&self, span: Span) -> &[u8] {
|
||||||
|
&self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)]
|
||||||
|
}
|
||||||
|
#[inline]
|
||||||
|
pub fn next_continue_byte(&mut self) -> Result<u8, CharError> {
|
||||||
|
const CONTINUE_BYTE_PREFIX_MASK: u8 = 0b1100_0000;
|
||||||
|
const CONTINUE_BYTE_MASK: u8 = 0b0011_1111;
|
||||||
|
|
||||||
|
let byte = self.next().ok_or(CharError::Eof)?;
|
||||||
|
if byte & CONTINUE_BYTE_PREFIX_MASK != 0b1000_0000 {
|
||||||
|
return Err(CharError::Eof);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(byte & CONTINUE_BYTE_MASK)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn convert_to_char(&mut self, start: u8) -> Result<char, CharError> {
|
||||||
|
if start.is_ascii() {
|
||||||
|
return Ok(start as char);
|
||||||
|
}
|
||||||
|
self.complete_char(start)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn complete_char(&mut self, start: u8) -> Result<char, CharError> {
|
||||||
|
match start & 0b1111_1000 {
|
||||||
|
0b1100_0000 | 0b1101_0000 | 0b1100_1000 | 0b1101_1000 => {
|
||||||
|
let mut val = (start & 0b0001_1111) as u32;
|
||||||
|
val <<= 6;
|
||||||
|
let next = self.next_continue_byte()?;
|
||||||
|
val |= next as u32;
|
||||||
|
char::from_u32(val).ok_or(CharError::Unicode)
|
||||||
|
}
|
||||||
|
0b1110_0000 | 0b1110_1000 => {
|
||||||
|
let mut val = (start & 0b0000_1111) as u32;
|
||||||
|
val <<= 6;
|
||||||
|
let next = self.next_continue_byte()?;
|
||||||
|
val |= next as u32;
|
||||||
|
val <<= 6;
|
||||||
|
let next = self.next_continue_byte()?;
|
||||||
|
val |= next as u32;
|
||||||
|
char::from_u32(val).ok_or(CharError::Unicode)
|
||||||
|
}
|
||||||
|
0b1111_0000 => {
|
||||||
|
let mut val = (start & 0b0000_0111) as u32;
|
||||||
|
val <<= 6;
|
||||||
|
let next = self.next_continue_byte()?;
|
||||||
|
val |= next as u32;
|
||||||
|
val <<= 6;
|
||||||
|
let next = self.next_continue_byte()?;
|
||||||
|
val |= next as u32;
|
||||||
|
val <<= 6;
|
||||||
|
let next = self.next_continue_byte()?;
|
||||||
|
val |= next as u32;
|
||||||
|
char::from_u32(val).ok_or(CharError::Unicode)
|
||||||
|
}
|
||||||
|
x => panic!("start byte did not start multi byte character: {:b}", x),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for BytesReader<'a> {
|
||||||
|
type Item = u8;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
let res = self.peek()?;
|
||||||
|
self.current += 1;
|
||||||
|
Some(res)
|
||||||
|
}
|
||||||
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||||
|
let len = self.len();
|
||||||
|
(len, Some(len))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> ExactSizeIterator for BytesReader<'a> {
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
self.len()
|
||||||
|
}
|
||||||
|
}
|
95
lib/src/syn/v2/lexer/strand.rs
Normal file
95
lib/src/syn/v2/lexer/strand.rs
Normal file
|
@ -0,0 +1,95 @@
|
||||||
|
//! Lexing of strand like characters.
|
||||||
|
|
||||||
|
use std::mem;
|
||||||
|
|
||||||
|
use crate::syn::v2::token::{Token, TokenKind};
|
||||||
|
|
||||||
|
use super::{unicode::chars, Error, Lexer};
|
||||||
|
|
||||||
|
impl<'a> Lexer<'a> {
|
||||||
|
/// Lex a plain strand with either single or double quotes.
|
||||||
|
pub fn lex_strand(&mut self, is_double: bool) -> Token {
|
||||||
|
match self.lex_strand_err(is_double) {
|
||||||
|
Ok(x) => x,
|
||||||
|
Err(x) => {
|
||||||
|
self.scratch.clear();
|
||||||
|
self.invalid_token(x)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex a strand with either double or single quotes but return an result instead of a token.
|
||||||
|
pub fn lex_strand_err(&mut self, is_double: bool) -> Result<Token, Error> {
|
||||||
|
loop {
|
||||||
|
let Some(x) = self.reader.next() else {
|
||||||
|
self.scratch.clear();
|
||||||
|
return Ok(self.eof_token());
|
||||||
|
};
|
||||||
|
|
||||||
|
if x.is_ascii() {
|
||||||
|
match x {
|
||||||
|
b'\'' if !is_double => {
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
return Ok(self.finish_token(TokenKind::Strand));
|
||||||
|
}
|
||||||
|
b'"' if is_double => {
|
||||||
|
self.string = Some(mem::take(&mut self.scratch));
|
||||||
|
return Ok(self.finish_token(TokenKind::Strand));
|
||||||
|
}
|
||||||
|
b'\0' => {
|
||||||
|
// null bytes not allowed
|
||||||
|
return Err(Error::UnexpectedCharacter('\0'));
|
||||||
|
}
|
||||||
|
b'\\' => {
|
||||||
|
// Handle escape sequences.
|
||||||
|
let Some(next) = self.reader.next() else {
|
||||||
|
self.scratch.clear();
|
||||||
|
return Ok(self.eof_token());
|
||||||
|
};
|
||||||
|
match next {
|
||||||
|
b'\\' => {
|
||||||
|
self.scratch.push('\\');
|
||||||
|
}
|
||||||
|
b'\'' if !is_double => {
|
||||||
|
self.scratch.push('\'');
|
||||||
|
}
|
||||||
|
b'\"' if is_double => {
|
||||||
|
self.scratch.push('\"');
|
||||||
|
}
|
||||||
|
b'/' => {
|
||||||
|
self.scratch.push('/');
|
||||||
|
}
|
||||||
|
b'b' => {
|
||||||
|
self.scratch.push(chars::BS);
|
||||||
|
}
|
||||||
|
b'f' => {
|
||||||
|
self.scratch.push(chars::FF);
|
||||||
|
}
|
||||||
|
b'n' => {
|
||||||
|
self.scratch.push(chars::LF);
|
||||||
|
}
|
||||||
|
b'r' => {
|
||||||
|
self.scratch.push(chars::CR);
|
||||||
|
}
|
||||||
|
b't' => {
|
||||||
|
self.scratch.push(chars::TAB);
|
||||||
|
}
|
||||||
|
x => {
|
||||||
|
let char = if x.is_ascii() {
|
||||||
|
x as char
|
||||||
|
} else {
|
||||||
|
self.reader.complete_char(x)?
|
||||||
|
};
|
||||||
|
return Err(Error::InvalidEscapeCharacter(char));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x => self.scratch.push(x as char),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let c = self.reader.complete_char(x)?;
|
||||||
|
self.scratch.push(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
482
lib/src/syn/v2/lexer/test.rs
Normal file
482
lib/src/syn/v2/lexer/test.rs
Normal file
|
@ -0,0 +1,482 @@
|
||||||
|
use chrono::{FixedOffset, NaiveDate, Offset, TimeZone, Utc};
|
||||||
|
|
||||||
|
use crate::syn::v2::token::{t, NumberKind, TokenKind};
|
||||||
|
|
||||||
|
macro_rules! test_case(
|
||||||
|
($source:expr => [$($token:expr),*$(,)?]) => {
|
||||||
|
let mut lexer = crate::syn::v2::lexer::Lexer::new($source.as_bytes());
|
||||||
|
let mut i = 0;
|
||||||
|
$(
|
||||||
|
let next = lexer.next();
|
||||||
|
if let Some(next) = next {
|
||||||
|
let span = std::str::from_utf8(lexer.reader.span(next.span)).unwrap_or("invalid utf8");
|
||||||
|
if let TokenKind::Invalid = next.kind{
|
||||||
|
let error = lexer.error.take().unwrap();
|
||||||
|
assert_eq!(next.kind, $token, "{} = {}:{} => {}",span, i, stringify!($token), error);
|
||||||
|
}else{
|
||||||
|
assert_eq!(next.kind, $token, "{} = {}:{}", span, i, stringify!($token));
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
assert_eq!(next,None);
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
)*
|
||||||
|
let _ = i;
|
||||||
|
assert_eq!(lexer.next(),None)
|
||||||
|
};
|
||||||
|
);
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn operators() {
|
||||||
|
test_case! {
|
||||||
|
r#"- + / * ! **
|
||||||
|
< > <= >= <- <-> ->
|
||||||
|
= == -= += != +?=
|
||||||
|
? ?? ?: ?~ ?=
|
||||||
|
{ } [ ] ( )
|
||||||
|
; , | || & &&
|
||||||
|
$
|
||||||
|
. .. ...
|
||||||
|
|
||||||
|
^
|
||||||
|
"# => [
|
||||||
|
t!("-"), t!("+"), t!("/"), t!("*"), t!("!"), t!("**"),
|
||||||
|
|
||||||
|
t!("<"), t!(">"), t!("<="), t!(">="), t!("<-"), t!("<->"), t!("->"),
|
||||||
|
|
||||||
|
t!("="), t!("=="), t!("-="), t!("+="), t!("!="), t!("+?="),
|
||||||
|
|
||||||
|
t!("?"), t!("??"), t!("?:"), t!("?~"), t!("?="),
|
||||||
|
|
||||||
|
t!("{"), t!("}"), t!("["), t!("]"), t!("("), t!(")"),
|
||||||
|
|
||||||
|
t!(";"), t!(","), t!("|"), t!("||"), TokenKind::Invalid, t!("&&"),
|
||||||
|
|
||||||
|
t!("$"),
|
||||||
|
|
||||||
|
t!("."), t!(".."), t!("..."),
|
||||||
|
|
||||||
|
TokenKind::Invalid
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn comments() {
|
||||||
|
test_case! {
|
||||||
|
r"
|
||||||
|
+ /* some comment */
|
||||||
|
- // another comment
|
||||||
|
+ -- a third comment
|
||||||
|
-
|
||||||
|
" => [
|
||||||
|
t!("+"),
|
||||||
|
t!("-"),
|
||||||
|
t!("+"),
|
||||||
|
t!("-"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn whitespace() {
|
||||||
|
test_case! {
|
||||||
|
"+= \t\n\r -=" => [
|
||||||
|
t!("+="),
|
||||||
|
t!("-="),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn identifiers() {
|
||||||
|
test_case! {
|
||||||
|
r#"
|
||||||
|
123123adwad +
|
||||||
|
akdwkj +
|
||||||
|
akdwkj1231312313123 +
|
||||||
|
_a_k_d_wkj1231312313123 +
|
||||||
|
____wdw____ +
|
||||||
|
"#
|
||||||
|
=> [
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Identifier,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Identifier,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Identifier,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Identifier,
|
||||||
|
t!("+"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn numbers() {
|
||||||
|
test_case! {
|
||||||
|
r#"
|
||||||
|
123123+32010230.123012031+33043030dec+33043030f+
|
||||||
|
|
||||||
|
"#
|
||||||
|
=> [
|
||||||
|
TokenKind::Number(NumberKind::Integer),
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Number(NumberKind::Mantissa),
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Number(NumberKind::Decimal),
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Number(NumberKind::Float),
|
||||||
|
t!("+"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
test_case! {
|
||||||
|
"+123129decs+"
|
||||||
|
=> [
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
test_case! {
|
||||||
|
"+39349fs+"
|
||||||
|
=> [
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
test_case! {
|
||||||
|
"+394393df+"
|
||||||
|
=> [
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
test_case! {
|
||||||
|
"+32932932def+"
|
||||||
|
=> [
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
test_case! {
|
||||||
|
"+329239329z+"
|
||||||
|
=> [
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn duration() {
|
||||||
|
test_case! {
|
||||||
|
r#"
|
||||||
|
1ns+1µs+1us+1ms+1s+1m+1h+1w+1y
|
||||||
|
|
||||||
|
1nsa+1ans+1aus+1usa+1ams+1msa+1am+1ma+1ah+1ha+1aw+1wa+1ay+1ya+1µsa
|
||||||
|
"#
|
||||||
|
=> [
|
||||||
|
TokenKind::Duration,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Duration,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Duration,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Duration,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Duration,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Duration,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Duration,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Duration,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Duration,
|
||||||
|
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
t!("+"),
|
||||||
|
TokenKind::Invalid,
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn keyword() {
|
||||||
|
test_case! {
|
||||||
|
r#"select SELECT sElEcT"# => [
|
||||||
|
t!("SELECT"),
|
||||||
|
t!("SELECT"),
|
||||||
|
t!("SELECT"),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn uuid() {
|
||||||
|
let mut lexer = crate::syn::v2::lexer::Lexer::new(
|
||||||
|
r#" u"e72bee20-f49b-11ec-b939-0242ac120002" "#.as_bytes(),
|
||||||
|
);
|
||||||
|
let token = lexer.next_token();
|
||||||
|
if let Some(error) = lexer.error {
|
||||||
|
println!("ERROR: {} @ ", error);
|
||||||
|
}
|
||||||
|
assert_eq!(token.kind, TokenKind::Uuid);
|
||||||
|
let uuid = lexer.uuid.take().unwrap();
|
||||||
|
assert_eq!(uuid.0.to_string(), "e72bee20-f49b-11ec-b939-0242ac120002");
|
||||||
|
|
||||||
|
let mut lexer = crate::syn::v2::lexer::Lexer::new(
|
||||||
|
r#" u"b19bc00b-aa98-486c-ae37-c8e1c54295b1" "#.as_bytes(),
|
||||||
|
);
|
||||||
|
let token = lexer.next_token();
|
||||||
|
if let Some(error) = lexer.error {
|
||||||
|
println!("ERROR: {} @ ", error);
|
||||||
|
}
|
||||||
|
assert_eq!(token.kind, TokenKind::Uuid);
|
||||||
|
let uuid = lexer.uuid.take().unwrap();
|
||||||
|
assert_eq!(uuid.0.to_string(), "b19bc00b-aa98-486c-ae37-c8e1c54295b1");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn date_time_just_date() {
|
||||||
|
let mut lexer = crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23" "#.as_bytes());
|
||||||
|
let token = lexer.next_token();
|
||||||
|
if let Some(error) = lexer.error {
|
||||||
|
println!("ERROR: {} @ ", error);
|
||||||
|
}
|
||||||
|
assert_eq!(token.kind, TokenKind::DateTime);
|
||||||
|
let datetime = lexer.datetime.take().unwrap();
|
||||||
|
let expected_datetime = Utc
|
||||||
|
.fix()
|
||||||
|
.from_local_datetime(
|
||||||
|
&NaiveDate::from_ymd_opt(2012, 4, 23).unwrap().and_hms_nano_opt(0, 0, 0, 0).unwrap(),
|
||||||
|
)
|
||||||
|
.earliest()
|
||||||
|
.unwrap()
|
||||||
|
.with_timezone(&Utc);
|
||||||
|
|
||||||
|
assert_eq!(datetime.0, expected_datetime);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn date_zone_time() {
|
||||||
|
let mut lexer = crate::syn::v2::lexer::Lexer::new(r#" d"2020-01-01T00:00:00Z" "#.as_bytes());
|
||||||
|
let token = lexer.next_token();
|
||||||
|
if let Some(error) = lexer.error {
|
||||||
|
println!("ERROR: {} @ ", error);
|
||||||
|
}
|
||||||
|
assert_eq!(token.kind, TokenKind::DateTime);
|
||||||
|
let datetime = lexer.datetime.take().unwrap();
|
||||||
|
let expected_datetime = Utc
|
||||||
|
.fix()
|
||||||
|
.from_local_datetime(
|
||||||
|
&NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_nano_opt(0, 0, 0, 0).unwrap(),
|
||||||
|
)
|
||||||
|
.earliest()
|
||||||
|
.unwrap()
|
||||||
|
.with_timezone(&Utc);
|
||||||
|
|
||||||
|
assert_eq!(datetime.0, expected_datetime);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn date_time_with_time() {
|
||||||
|
let mut lexer = crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43Z" "#.as_bytes());
|
||||||
|
let token = lexer.next_token();
|
||||||
|
if let Some(error) = lexer.error {
|
||||||
|
println!("ERROR: {} @ ", error);
|
||||||
|
}
|
||||||
|
assert_eq!(token.kind, TokenKind::DateTime);
|
||||||
|
let datetime = lexer.datetime.take().unwrap();
|
||||||
|
let expected_datetime = Utc
|
||||||
|
.fix()
|
||||||
|
.from_local_datetime(
|
||||||
|
&NaiveDate::from_ymd_opt(2012, 4, 23).unwrap().and_hms_nano_opt(18, 25, 43, 0).unwrap(),
|
||||||
|
)
|
||||||
|
.earliest()
|
||||||
|
.unwrap()
|
||||||
|
.with_timezone(&Utc);
|
||||||
|
|
||||||
|
assert_eq!(datetime.0, expected_datetime);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn date_time_nanos() {
|
||||||
|
let mut lexer =
|
||||||
|
crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.5631Z" "#.as_bytes());
|
||||||
|
let token = lexer.next_token();
|
||||||
|
if let Some(error) = lexer.error {
|
||||||
|
println!("ERROR: {} @ ", error);
|
||||||
|
}
|
||||||
|
assert_eq!(token.kind, TokenKind::DateTime);
|
||||||
|
let datetime = lexer.datetime.take().unwrap();
|
||||||
|
let expected_datetime = Utc
|
||||||
|
.fix()
|
||||||
|
.from_local_datetime(
|
||||||
|
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||||
|
.unwrap()
|
||||||
|
.and_hms_nano_opt(18, 25, 43, 563_100_000)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.earliest()
|
||||||
|
.unwrap()
|
||||||
|
.with_timezone(&Utc);
|
||||||
|
assert_eq!(datetime.0, expected_datetime);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn date_time_timezone_utc() {
|
||||||
|
let mut lexer =
|
||||||
|
crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.0000511Z" "#.as_bytes());
|
||||||
|
let token = lexer.next_token();
|
||||||
|
if let Some(error) = lexer.error {
|
||||||
|
println!("ERROR: {}", error);
|
||||||
|
}
|
||||||
|
assert_eq!(token.kind, TokenKind::DateTime);
|
||||||
|
let datetime = lexer.datetime.take().unwrap();
|
||||||
|
let expected_datetime = Utc
|
||||||
|
.fix()
|
||||||
|
.from_local_datetime(
|
||||||
|
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||||
|
.unwrap()
|
||||||
|
.and_hms_nano_opt(18, 25, 43, 51_100)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.earliest()
|
||||||
|
.unwrap()
|
||||||
|
.with_timezone(&Utc);
|
||||||
|
assert_eq!(datetime.0, expected_datetime);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn date_time_timezone_pacific() {
|
||||||
|
let mut lexer =
|
||||||
|
crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.511-08:00" "#.as_bytes());
|
||||||
|
let token = lexer.next_token();
|
||||||
|
if let Some(error) = lexer.error {
|
||||||
|
println!("ERROR: {}", error);
|
||||||
|
}
|
||||||
|
assert_eq!(token.kind, TokenKind::DateTime);
|
||||||
|
let datetime = lexer.datetime.take().unwrap();
|
||||||
|
let offset = FixedOffset::west_opt(8 * 3600).unwrap();
|
||||||
|
let expected_datetime = offset
|
||||||
|
.from_local_datetime(
|
||||||
|
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||||
|
.unwrap()
|
||||||
|
.and_hms_nano_opt(18, 25, 43, 511_000_000)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.earliest()
|
||||||
|
.unwrap()
|
||||||
|
.with_timezone(&Utc);
|
||||||
|
assert_eq!(datetime.0, expected_datetime);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn date_time_timezone_pacific_partial() {
|
||||||
|
let mut lexer =
|
||||||
|
crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.511+08:30" "#.as_bytes());
|
||||||
|
let token = lexer.next_token();
|
||||||
|
if let Some(error) = lexer.error {
|
||||||
|
println!("ERROR: {}", error);
|
||||||
|
}
|
||||||
|
assert_eq!(token.kind, TokenKind::DateTime);
|
||||||
|
let datetime = lexer.datetime.take().unwrap();
|
||||||
|
let offset = FixedOffset::east_opt(8 * 3600 + 30 * 60).unwrap();
|
||||||
|
let expected_datetime = offset
|
||||||
|
.from_local_datetime(
|
||||||
|
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||||
|
.unwrap()
|
||||||
|
.and_hms_nano_opt(18, 25, 43, 511_000_000)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.earliest()
|
||||||
|
.unwrap()
|
||||||
|
.with_timezone(&Utc);
|
||||||
|
assert_eq!(datetime.0, expected_datetime);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn date_time_timezone_utc_nanoseconds() {
|
||||||
|
let mut lexer =
|
||||||
|
crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.5110000Z" "#.as_bytes());
|
||||||
|
let token = lexer.next_token();
|
||||||
|
if let Some(error) = lexer.error {
|
||||||
|
println!("ERROR: {}", error);
|
||||||
|
}
|
||||||
|
assert_eq!(token.kind, TokenKind::DateTime);
|
||||||
|
let datetime = lexer.datetime.take().unwrap();
|
||||||
|
let offset = Utc.fix();
|
||||||
|
let expected_datetime = offset
|
||||||
|
.from_local_datetime(
|
||||||
|
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||||
|
.unwrap()
|
||||||
|
.and_hms_nano_opt(18, 25, 43, 511_000_000)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.earliest()
|
||||||
|
.unwrap()
|
||||||
|
.with_timezone(&Utc);
|
||||||
|
assert_eq!(datetime.0, expected_datetime);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn date_time_timezone_utc_sub_nanoseconds() {
|
||||||
|
let mut lexer =
|
||||||
|
crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.0000511Z" "#.as_bytes());
|
||||||
|
let token = lexer.next_token();
|
||||||
|
if let Some(error) = lexer.error {
|
||||||
|
println!("ERROR: {}", error);
|
||||||
|
}
|
||||||
|
assert_eq!(token.kind, TokenKind::DateTime);
|
||||||
|
let datetime = lexer.datetime.take().unwrap();
|
||||||
|
let offset = Utc.fix();
|
||||||
|
let expected_datetime = offset
|
||||||
|
.from_local_datetime(
|
||||||
|
&NaiveDate::from_ymd_opt(2012, 4, 23)
|
||||||
|
.unwrap()
|
||||||
|
.and_hms_nano_opt(18, 25, 43, 51_100)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.earliest()
|
||||||
|
.unwrap()
|
||||||
|
.with_timezone(&Utc);
|
||||||
|
assert_eq!(datetime.0, expected_datetime);
|
||||||
|
}
|
68
lib/src/syn/v2/lexer/unicode.rs
Normal file
68
lib/src/syn/v2/lexer/unicode.rs
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
//! Unicode related utilities.
|
||||||
|
|
||||||
|
/// Character constants
|
||||||
|
pub mod chars {
|
||||||
|
// Character tabulation
|
||||||
|
pub const TAB: char = '\u{0009}';
|
||||||
|
/// Form feed
|
||||||
|
pub const FF: char = '\u{000C}';
|
||||||
|
|
||||||
|
/// Line feed
|
||||||
|
pub const LF: char = '\u{000A}';
|
||||||
|
/// Carriage return
|
||||||
|
pub const CR: char = '\u{000D}';
|
||||||
|
/// Line separator
|
||||||
|
pub const LS: char = '\u{2020}';
|
||||||
|
/// Backspace
|
||||||
|
pub const BS: char = '\u{0008}';
|
||||||
|
/// Paragraph separator
|
||||||
|
pub const PS: char = '\u{2029}';
|
||||||
|
/// Next line
|
||||||
|
pub const NEL: char = '\u{0085}';
|
||||||
|
|
||||||
|
/// Line terminators for javascript source code.
|
||||||
|
pub const JS_LINE_TERIMATORS: [char; 4] = [LF, CR, LS, PS];
|
||||||
|
}
|
||||||
|
|
||||||
|
pub mod byte {
|
||||||
|
/// Character tabulation
|
||||||
|
pub const TAB: u8 = b'\t';
|
||||||
|
/// Line tabulation
|
||||||
|
pub const VT: u8 = 0xB;
|
||||||
|
/// Form feed
|
||||||
|
pub const FF: u8 = 0xC;
|
||||||
|
|
||||||
|
/// Line feed
|
||||||
|
pub const LF: u8 = 0xA;
|
||||||
|
/// Carriage return
|
||||||
|
pub const CR: u8 = 0xD;
|
||||||
|
|
||||||
|
/// Space
|
||||||
|
pub const SP: u8 = 0x20;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A trait extending u8 for adding some extra function.
|
||||||
|
pub trait U8Ext {
|
||||||
|
///. Returns if the u8 is the start of an identifier.
|
||||||
|
fn is_identifier_start(&self) -> bool;
|
||||||
|
|
||||||
|
/// Returns if the u8 can start an identifier.
|
||||||
|
fn is_number_start(&self) -> bool;
|
||||||
|
|
||||||
|
/// Returns if the u8 can continue an identifier after the first character.
|
||||||
|
fn is_identifier_continue(&self) -> bool;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl U8Ext for u8 {
|
||||||
|
fn is_identifier_start(&self) -> bool {
|
||||||
|
matches!(self, b'a'..=b'z' | b'A'..=b'Z' | b'_')
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_identifier_continue(&self) -> bool {
|
||||||
|
matches!(self, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_')
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_number_start(&self) -> bool {
|
||||||
|
self.is_ascii_digit()
|
||||||
|
}
|
||||||
|
}
|
124
lib/src/syn/v2/lexer/uuid.rs
Normal file
124
lib/src/syn/v2/lexer/uuid.rs
Normal file
|
@ -0,0 +1,124 @@
|
||||||
|
use crate::{
|
||||||
|
sql::Uuid,
|
||||||
|
syn::v2::token::{Token, TokenKind},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{Error as LexError, Lexer};
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
#[error("missing digits")]
|
||||||
|
MissingDigits,
|
||||||
|
#[error("digit was not in allowed range")]
|
||||||
|
InvalidRange,
|
||||||
|
#[error("expected uuid-strand to end")]
|
||||||
|
ExpectedStrandEnd,
|
||||||
|
#[error("missing a uuid seperator")]
|
||||||
|
MissingSeperator,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Lexer<'a> {
|
||||||
|
/// Lex a uuid strand with either double or single quotes.
|
||||||
|
///
|
||||||
|
/// Expects the first delimiter to already have been eaten.
|
||||||
|
pub fn lex_uuid(&mut self, double: bool) -> Token {
|
||||||
|
match self.lex_uuid_err(double) {
|
||||||
|
Ok(x) => {
|
||||||
|
debug_assert!(self.uuid.is_none());
|
||||||
|
self.uuid = Some(x);
|
||||||
|
self.finish_token(TokenKind::Uuid)
|
||||||
|
}
|
||||||
|
Err(_) => self.invalid_token(LexError::Uuid(Error::MissingDigits)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex a uuid strand with either double or single quotes but return an result instead of a
|
||||||
|
/// token.
|
||||||
|
///
|
||||||
|
/// Expects the first delimiter to already have been eaten.
|
||||||
|
pub fn lex_uuid_err(&mut self, double: bool) -> Result<Uuid, Error> {
|
||||||
|
let uuid = self.lex_uuid_err_inner()?;
|
||||||
|
|
||||||
|
let end_char = if double {
|
||||||
|
b'"'
|
||||||
|
} else {
|
||||||
|
b'\''
|
||||||
|
};
|
||||||
|
// closing strand character
|
||||||
|
if !self.eat(end_char) {
|
||||||
|
return Err(Error::ExpectedStrandEnd);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(uuid)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lex a uuid strand without delimiting quotes but return an result instead of a
|
||||||
|
/// token.
|
||||||
|
///
|
||||||
|
/// Expects the first delimiter to already have been eaten.
|
||||||
|
pub fn lex_uuid_err_inner(&mut self) -> Result<Uuid, Error> {
|
||||||
|
let start = self.reader.offset();
|
||||||
|
|
||||||
|
if !self.lex_hex(8) {
|
||||||
|
return Err(Error::MissingDigits);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.eat(b'-') {
|
||||||
|
return Err(Error::MissingSeperator);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.lex_hex(4) {
|
||||||
|
return Err(Error::MissingDigits);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.eat(b'-') {
|
||||||
|
return Err(Error::MissingSeperator);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.eat_when(|x| (b'1'..=b'8').contains(&x)) {
|
||||||
|
if self.reader.peek().map(|x| x.is_ascii_digit()).unwrap_or(false) {
|
||||||
|
// byte was an ascii digit but not in the valid range.
|
||||||
|
return Err(Error::InvalidRange);
|
||||||
|
}
|
||||||
|
return Err(Error::MissingDigits);
|
||||||
|
};
|
||||||
|
|
||||||
|
if !self.lex_hex(3) {
|
||||||
|
return Err(Error::MissingDigits);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.eat(b'-') {
|
||||||
|
return Err(Error::MissingSeperator);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.lex_hex(4) {
|
||||||
|
return Err(Error::MissingDigits);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.eat(b'-') {
|
||||||
|
return Err(Error::MissingSeperator);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.lex_hex(12) {
|
||||||
|
return Err(Error::MissingDigits);
|
||||||
|
}
|
||||||
|
|
||||||
|
let end = self.reader.offset();
|
||||||
|
// The lexer ensures that the section of bytes is valid utf8 so this should never panic.
|
||||||
|
let uuid_str = std::str::from_utf8(&self.reader.full()[start..end]).unwrap();
|
||||||
|
// The lexer ensures that the bytes are a valid uuid so this should never panic.
|
||||||
|
Ok(Uuid(uuid::Uuid::try_from(uuid_str).unwrap()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// lexes a given amount of hex characters. returns true if the lexing was successfull, false
|
||||||
|
/// otherwise.
|
||||||
|
pub fn lex_hex(&mut self, amount: u8) -> bool {
|
||||||
|
for _ in 0..amount {
|
||||||
|
if !self.eat_when(|x| matches!(x,b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue