From 2755f572fcd5de8f714e70205bf6cfdc4803f7b9 Mon Sep 17 00:00:00 2001 From: Mees Delzenne Date: Wed, 10 Jan 2024 17:43:56 +0100 Subject: [PATCH] Introduce new experimental parser (#2885) Co-authored-by: Raphael Darley --- .github/workflows/ci.yml | 43 +- Cargo.lock | 767 ++++++----- Cargo.toml | 4 +- Cargo.toml.orig | 126 ++ Makefile.ci.toml | 12 + lib/Cargo.toml | 3 + lib/benches/parser.rs | 1 + lib/fuzz/Cargo.lock | 1 - lib/src/api/engine/any/mod.rs | 2 +- lib/src/api/engine/mod.rs | 1 + lib/src/dbs/capabilities.rs | 6 +- lib/src/fnc/mod.rs | 25 +- lib/src/fnc/script/tests/mod.rs | 1 + lib/src/iam/verify.rs | 11 +- lib/src/idx/ft/analyzer/mod.rs | 33 +- lib/src/idx/planner/plan.rs | 2 +- lib/src/key/graph/mod.rs | 2 +- lib/src/key/thing/mod.rs | 3 +- lib/src/kvs/ds.rs | 14 +- lib/src/kvs/mod.rs | 8 + lib/src/sql/algorithm.rs | 2 +- lib/src/sql/ending.rs | 8 +- lib/src/sql/escape.rs | 36 +- lib/src/sql/expression.rs | 25 +- lib/src/sql/idiom.rs | 19 +- lib/src/sql/kind.rs | 3 +- lib/src/sql/language.rs | 16 +- lib/src/sql/mod.rs | 2 +- lib/src/sql/order.rs | 1 + lib/src/sql/query.rs | 2 +- lib/src/sql/statements/define/scope.rs | 10 +- lib/src/sql/statements/define/user.rs | 25 + lib/src/sql/statements/ifelse.rs | 2 + lib/src/sql/statements/use.rs | 4 + lib/src/sql/strand.rs | 2 +- lib/src/sql/thing.rs | 2 +- lib/src/sql/value/changed.rs | 2 +- lib/src/sql/value/clear.rs | 2 +- lib/src/sql/value/compare.rs | 2 +- lib/src/sql/value/cut.rs | 2 +- lib/src/sql/value/dec.rs | 2 +- lib/src/sql/value/decrement.rs | 2 +- lib/src/sql/value/del.rs | 2 +- lib/src/sql/value/diff.rs | 2 +- lib/src/sql/value/each.rs | 2 +- lib/src/sql/value/every.rs | 2 +- lib/src/sql/value/extend.rs | 2 +- lib/src/sql/value/get.rs | 2 +- lib/src/sql/value/inc.rs | 2 +- lib/src/sql/value/increment.rs | 2 +- lib/src/sql/value/merge.rs | 2 +- lib/src/sql/value/patch.rs | 2 +- lib/src/sql/value/pick.rs | 2 +- lib/src/sql/value/put.rs | 2 +- lib/src/sql/value/replace.rs | 2 +- lib/src/sql/value/rid.rs | 2 +- lib/src/sql/value/serde/ser/value/mod.rs | 2 +- lib/src/sql/value/set.rs | 2 +- lib/src/sql/value/value.rs | 5 +- lib/src/sql/value/walk.rs | 2 +- lib/src/syn/common.rs | 164 ++- lib/src/syn/{error.rs => error/mod.rs} | 3 + .../{v1/error/mod.rs => error/nom_error.rs} | 7 - lib/src/syn/mod.rs | 17 +- lib/src/syn/test.rs | 50 - lib/src/syn/v1/{error/utils.rs => error.rs} | 8 +- lib/src/syn/v1/error/render.rs | 1 - lib/src/syn/v1/expression.rs | 30 +- lib/src/syn/v1/function.rs | 2 +- lib/src/syn/v1/idiom.rs | 3 +- lib/src/syn/v1/literal/datetime.rs | 6 +- lib/src/syn/v1/literal/mod.rs | 2 +- lib/src/syn/v1/literal/strand.rs | 2 +- lib/src/syn/v1/literal/uuid.rs | 2 +- lib/src/syn/v1/mod.rs | 14 +- lib/src/syn/v1/operator.rs | 10 + lib/src/syn/v1/part/data.rs | 1 - lib/src/syn/v1/part/mod.rs | 3 +- lib/src/syn/v1/part/permission.rs | 2 +- lib/src/syn/v1/part/split.rs | 2 +- lib/src/syn/v1/stmt/define/user.rs | 21 +- lib/src/syn/v1/stmt/option.rs | 6 +- lib/src/syn/v1/subquery.rs | 1 - lib/src/syn/v1/test.rs | 48 +- lib/src/syn/v1/thing.rs | 4 +- lib/src/syn/v1/value/mod.rs | 6 +- lib/src/syn/v2/lexer/byte.rs | 387 ++++++ lib/src/syn/v2/lexer/char.rs | 37 + lib/src/syn/v2/lexer/datetime.rs | 267 ++++ lib/src/syn/v2/lexer/duration.rs | 170 +++ lib/src/syn/v2/lexer/ident.rs | 164 +++ lib/src/syn/v2/lexer/js.rs | 97 ++ lib/src/syn/v2/lexer/keywords.rs | 285 ++++ lib/src/syn/v2/lexer/mod.rs | 417 ++++++ lib/src/syn/v2/lexer/number.rs | 257 ++++ lib/src/syn/v2/lexer/reader.rs | 157 +++ lib/src/syn/v2/lexer/strand.rs | 95 ++ lib/src/syn/v2/lexer/test.rs | 482 +++++++ lib/src/syn/v2/lexer/unicode.rs | 68 + lib/src/syn/v2/lexer/uuid.rs | 124 ++ lib/src/syn/v2/mod.rs | 123 ++ lib/src/syn/v2/parser/basic.rs | 302 +++++ lib/src/syn/v2/parser/builtin.rs | 500 +++++++ lib/src/syn/v2/parser/error.rs | 230 ++++ lib/src/syn/v2/parser/expression.rs | 400 ++++++ lib/src/syn/v2/parser/function.rs | 302 +++++ lib/src/syn/v2/parser/idiom.rs | 824 +++++++++++ lib/src/syn/v2/parser/json.rs | 73 + lib/src/syn/v2/parser/kind.rs | 438 ++++++ lib/src/syn/v2/parser/mac.rs | 79 ++ lib/src/syn/v2/parser/mod.rs | 304 +++++ lib/src/syn/v2/parser/object.rs | 715 ++++++++++ lib/src/syn/v2/parser/prime.rs | 584 ++++++++ lib/src/syn/v2/parser/stmt/create.rs | 27 + lib/src/syn/v2/parser/stmt/define.rs | 655 +++++++++ lib/src/syn/v2/parser/stmt/delete.rs | 31 + lib/src/syn/v2/parser/stmt/if.rs | 85 ++ lib/src/syn/v2/parser/stmt/insert.rs | 97 ++ lib/src/syn/v2/parser/stmt/mod.rs | 551 ++++++++ lib/src/syn/v2/parser/stmt/parts.rs | 343 +++++ lib/src/syn/v2/parser/stmt/relate.rs | 92 ++ lib/src/syn/v2/parser/stmt/remove.rs | 121 ++ lib/src/syn/v2/parser/stmt/select.rs | 189 +++ lib/src/syn/v2/parser/stmt/update.rs | 29 + lib/src/syn/v2/parser/test/mod.rs | 3 + lib/src/syn/v2/parser/test/stmt.rs | 1205 +++++++++++++++++ lib/src/syn/v2/parser/test/streaming.rs | 680 ++++++++++ lib/src/syn/v2/parser/test/value.rs | 66 + lib/src/syn/v2/parser/thing.rs | 362 +++++ lib/src/syn/v2/parser/token_buffer.rs | 71 + lib/src/syn/v2/test.rs | 58 + lib/src/syn/v2/token/keyword.rs | 219 +++ lib/src/syn/v2/token/mac.rs | 300 ++++ lib/src/syn/v2/token/mod.rs | 385 ++++++ lib/test.surql | 18 +- lib/tests/api/backup.rs | 10 +- lib/tests/api/mod.rs | 15 +- lib/tests/changefeeds.rs | 8 +- lib/tests/complex.rs | 6 +- lib/tests/create.rs | 6 +- lib/tests/datetimes.rs | 4 +- lib/tests/escape.rs | 4 +- lib/tests/function.rs | 112 +- lib/tests/future.rs | 6 +- lib/tests/geometry.rs | 22 +- lib/tests/group.rs | 54 +- lib/tests/script.rs | 4 +- lib/tests/vector.rs | 12 +- src/net/params.rs | 9 +- tests/cli_integration.rs | 29 +- tests/http_integration.rs | 2 +- tests/ws_integration.rs | 2 - 152 files changed, 14640 insertions(+), 823 deletions(-) create mode 100644 Cargo.toml.orig rename lib/src/syn/{error.rs => error/mod.rs} (99%) rename lib/src/syn/{v1/error/mod.rs => error/nom_error.rs} (98%) delete mode 100644 lib/src/syn/test.rs rename lib/src/syn/v1/{error/utils.rs => error.rs} (94%) delete mode 100644 lib/src/syn/v1/error/render.rs create mode 100644 lib/src/syn/v2/lexer/byte.rs create mode 100644 lib/src/syn/v2/lexer/char.rs create mode 100644 lib/src/syn/v2/lexer/datetime.rs create mode 100644 lib/src/syn/v2/lexer/duration.rs create mode 100644 lib/src/syn/v2/lexer/ident.rs create mode 100644 lib/src/syn/v2/lexer/js.rs create mode 100644 lib/src/syn/v2/lexer/keywords.rs create mode 100644 lib/src/syn/v2/lexer/mod.rs create mode 100644 lib/src/syn/v2/lexer/number.rs create mode 100644 lib/src/syn/v2/lexer/reader.rs create mode 100644 lib/src/syn/v2/lexer/strand.rs create mode 100644 lib/src/syn/v2/lexer/test.rs create mode 100644 lib/src/syn/v2/lexer/unicode.rs create mode 100644 lib/src/syn/v2/lexer/uuid.rs create mode 100644 lib/src/syn/v2/mod.rs create mode 100644 lib/src/syn/v2/parser/basic.rs create mode 100644 lib/src/syn/v2/parser/builtin.rs create mode 100644 lib/src/syn/v2/parser/error.rs create mode 100644 lib/src/syn/v2/parser/expression.rs create mode 100644 lib/src/syn/v2/parser/function.rs create mode 100644 lib/src/syn/v2/parser/idiom.rs create mode 100644 lib/src/syn/v2/parser/json.rs create mode 100644 lib/src/syn/v2/parser/kind.rs create mode 100644 lib/src/syn/v2/parser/mac.rs create mode 100644 lib/src/syn/v2/parser/mod.rs create mode 100644 lib/src/syn/v2/parser/object.rs create mode 100644 lib/src/syn/v2/parser/prime.rs create mode 100644 lib/src/syn/v2/parser/stmt/create.rs create mode 100644 lib/src/syn/v2/parser/stmt/define.rs create mode 100644 lib/src/syn/v2/parser/stmt/delete.rs create mode 100644 lib/src/syn/v2/parser/stmt/if.rs create mode 100644 lib/src/syn/v2/parser/stmt/insert.rs create mode 100644 lib/src/syn/v2/parser/stmt/mod.rs create mode 100644 lib/src/syn/v2/parser/stmt/parts.rs create mode 100644 lib/src/syn/v2/parser/stmt/relate.rs create mode 100644 lib/src/syn/v2/parser/stmt/remove.rs create mode 100644 lib/src/syn/v2/parser/stmt/select.rs create mode 100644 lib/src/syn/v2/parser/stmt/update.rs create mode 100644 lib/src/syn/v2/parser/test/mod.rs create mode 100644 lib/src/syn/v2/parser/test/stmt.rs create mode 100644 lib/src/syn/v2/parser/test/streaming.rs create mode 100644 lib/src/syn/v2/parser/test/value.rs create mode 100644 lib/src/syn/v2/parser/thing.rs create mode 100644 lib/src/syn/v2/parser/token_buffer.rs create mode 100644 lib/src/syn/v2/test.rs create mode 100644 lib/src/syn/v2/token/keyword.rs create mode 100644 lib/src/syn/v2/token/mac.rs create mode 100644 lib/src/syn/v2/token/mod.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a97b47f2..08594e48 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -180,7 +180,7 @@ jobs: - name: Run CLI integration tests run: cargo make ci-cli-integration - + - name: Debug info if: always() run: | @@ -189,7 +189,7 @@ jobs: df -h ps auxf cat /tmp/surrealdb.log || true - + http-server: name: HTTP integration tests @@ -326,6 +326,45 @@ jobs: path: target/llvm-cov/html/ retention-days: 5 + test-parser: + name: Test workspace with experimental parser + runs-on: ubuntu-latest + steps: + + - name: Install stable toolchain + uses: dtolnay/rust-toolchain@stable + with: + toolchain: 1.71.1 + + - name: Checkout sources + uses: actions/checkout@v4 + + - name: Setup cache + uses: Swatinem/rust-cache@v2 + with: + save-if: ${{ github.ref == 'refs/heads/main' }} + + - name: Install dependencies + run: | + sudo apt-get -y update + + - name: Free up some disk space + run: | + (set -x; df -h) + # Free up some disk space by removing unused files + (set -x; sudo rm -rf /imagegeneration || true) + (set -x; sudo rm -rf /opt/az || true) + (set -x; sudo rm -rf /opt/hostedtoolcache || true) + (set -x; sudo rm -rf /opt/google || true) + (set -x; sudo rm -rf /opt/pipx || true) + (set -x; df -h) + + - name: Install cargo-make + run: cargo install --debug --locked cargo-make + + - name: Test workspace for experimental_parser + run: cargo make test-experimental-parser + ws-engine: name: WebSocket engine runs-on: ubuntu-latest diff --git a/Cargo.lock b/Cargo.lock index 5e26799a..1e43a2b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -32,15 +32,15 @@ dependencies = [ [[package]] name = "actix-http" -version = "3.4.0" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a92ef85799cba03f76e4f7c10f533e66d87c9a7e7055f3391f09000ad8351bc9" +checksum = "129d4c88e98860e1758c5de288d1632b07970a16d59bdf7b8d66053d582bb71f" dependencies = [ "actix-codec", "actix-rt", "actix-service", "actix-utils", - "ahash 0.8.6", + "ahash 0.8.7", "base64 0.21.5", "bitflags 2.4.1", "brotli", @@ -66,7 +66,7 @@ dependencies = [ "tokio", "tokio-util", "tracing", - "zstd 0.12.4", + "zstd", ] [[package]] @@ -76,14 +76,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" dependencies = [ "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] name = "actix-router" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66ff4d247d2b160861fa2866457e85706833527840e4133f8f49aa423a38799" +checksum = "d22475596539443685426b6bdadb926ad0ecaefdfc5fb05e5e3441f15463c511" dependencies = [ "bytestring", "http 0.2.11", @@ -114,7 +114,7 @@ dependencies = [ "futures-core", "futures-util", "mio", - "socket2 0.5.5", + "socket2", "tokio", "tracing", ] @@ -142,9 +142,9 @@ dependencies = [ [[package]] name = "actix-web" -version = "4.4.0" +version = "4.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4a5b5e29603ca8c94a77c65cf874718ceb60292c5a5c3e5f4ace041af462b9" +checksum = "e43428f3bf11dee6d166b00ec2df4e3aa8cc1606aaa0b7433c146852e2f4e03b" dependencies = [ "actix-codec", "actix-http", @@ -155,7 +155,7 @@ dependencies = [ "actix-service", "actix-utils", "actix-web-codegen", - "ahash 0.8.6", + "ahash 0.8.7", "bytes", "bytestring", "cfg-if", @@ -175,7 +175,7 @@ dependencies = [ "serde_json", "serde_urlencoded", "smallvec", - "socket2 0.5.5", + "socket2", "time", "url", ] @@ -189,7 +189,7 @@ dependencies = [ "actix-router", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -229,9 +229,9 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91429305e9f0a25f6205c5b8e0d2db09e0708a7a6df0f42212bb56c32c8ac97a" +checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" dependencies = [ "cfg-if", "getrandom 0.2.11", @@ -293,9 +293,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.6.4" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44" +checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6" dependencies = [ "anstyle", "anstyle-parse", @@ -313,30 +313,30 @@ checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" [[package]] name = "anstyle-parse" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317b9a89c1868f5ea6ff1d9539a69f45dffc21ce321ac1fd1160dfa48c8e2140" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.1" +version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" dependencies = [ "anstyle", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -347,9 +347,9 @@ checksum = "ea50b14b7a4b9343f8c627a7a53c52076482bd4bdad0a24fd3ec533ed616cc2c" [[package]] name = "anyhow" -version = "1.0.75" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" [[package]] name = "approx" @@ -414,9 +414,9 @@ dependencies = [ [[package]] name = "assert_fs" -version = "1.0.13" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f070617a68e5c2ed5d06ee8dd620ee18fb72b99f6c094bed34cf8ab07c875b48" +checksum = "adc5d78e9048d836d12a0c0040ca5f45b18a94d204b4ba4f677a8a7de162426b" dependencies = [ "anstyle", "doc-comment", @@ -450,8 +450,8 @@ dependencies = [ "memchr", "pin-project-lite", "tokio", - "zstd 0.13.0", - "zstd-safe 7.0.0", + "zstd", + "zstd-safe", ] [[package]] @@ -460,11 +460,11 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17ae5ebefcc48e7452b4987947920dac9450be1110cadf34d1b8c116bdbaf97c" dependencies = [ - "async-lock 3.1.2", + "async-lock 3.2.0", "async-task", "concurrent-queue", "fastrand 2.0.1", - "futures-lite 2.0.1", + "futures-lite 2.1.0", "slab", ] @@ -479,11 +479,11 @@ dependencies = [ [[package]] name = "async-lock" -version = "3.1.2" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dea8b3453dd7cc96711834b75400d671b73e3656975fa68d9f277163b7f7e316" +checksum = "7125e42787d53db9dd54261812ef17e937c95a51e4d291373b670342fa44310c" dependencies = [ - "event-listener 4.0.0", + "event-listener 4.0.2", "event-listener-strategy", "pin-project-lite", ] @@ -507,7 +507,7 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -529,24 +529,24 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] name = "async-task" -version = "4.5.0" +version = "4.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4eb2cdb97421e01129ccb49169d8279ed21e829929144f4a22a6e54ac549ca1" +checksum = "fbb36e985947064623dbd357f727af08ffd077f93d696782f3c56365fa2e2799" [[package]] name = "async-trait" -version = "0.1.74" +version = "0.1.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9" +checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -568,9 +568,9 @@ checksum = "c59bdb34bc650a32731b31bd8f0829cc15d24a708ee31559e0bb34f2bc320cba" [[package]] name = "atomic-polyfill" -version = "0.1.11" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3ff7eb3f316534d83a8a2c3d1674ace8a5a71198eba31e2e2b597833f699b28" +checksum = "8cf2bce30dfe09ef0bfaef228b9d414faaf7e563035494d7fe092dba54b300f4" dependencies = [ "critical-section", ] @@ -595,8 +595,8 @@ dependencies = [ "futures-util", "headers", "http 0.2.11", - "http-body 0.4.5", - "hyper 0.14.27", + "http-body 0.4.6", + "hyper 0.14.28", "itoa", "matchit", "memchr", @@ -620,18 +620,18 @@ dependencies = [ [[package]] name = "axum" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "202651474fe73c62d9e0a56c6133f7a0ff1dc1c8cf7a5b03381af2a26553ac9d" +checksum = "d09dbe0e490df5da9d69b36dca48a76635288a82f92eca90024883a56202026d" dependencies = [ "async-trait", - "axum-core 0.4.1", + "axum-core 0.4.2", "bytes", "futures-util", "http 1.0.0", "http-body 1.0.0", "http-body-util", - "hyper 1.0.1", + "hyper 1.1.0", "hyper-util", "itoa", "matchit", @@ -649,6 +649,7 @@ dependencies = [ "tower", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -657,7 +658,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f5ffe4637708b326c621d5494ab6c91dcf62ee440fa6ee967d289315a9c6f81" dependencies = [ - "axum 0.7.2", + "axum 0.7.3", "forwarded-header-value", "serde", ] @@ -672,7 +673,7 @@ dependencies = [ "bytes", "futures-util", "http 0.2.11", - "http-body 0.4.5", + "http-body 0.4.6", "mime", "rustversion", "tower-layer", @@ -682,9 +683,9 @@ dependencies = [ [[package]] name = "axum-core" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77cb22c689c44d4c07b0ab44ebc25d69d8ae601a2f28fb8d672d344178fa17aa" +checksum = "e87c8503f93e6d144ee5690907ba22db7ba79ab001a932ab99034f0fe836b3df" dependencies = [ "async-trait", "bytes", @@ -698,13 +699,14 @@ dependencies = [ "sync_wrapper", "tower-layer", "tower-service", + "tracing", ] [[package]] name = "axum-example" version = "0.1.0" dependencies = [ - "axum 0.7.2", + "axum 0.7.3", "serde", "surrealdb", "thiserror", @@ -724,7 +726,7 @@ dependencies = [ "form_urlencoded", "futures-util", "http 0.2.11", - "http-body 0.4.5", + "http-body 0.4.6", "mime", "percent-encoding", "pin-project-lite", @@ -745,7 +747,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -758,8 +760,8 @@ dependencies = [ "bytes", "futures-util", "http 0.2.11", - "http-body 0.4.5", - "hyper 0.14.27", + "http-body 0.4.6", + "hyper 0.14.28", "pin-project-lite", "rustls", "rustls-pemfile", @@ -842,7 +844,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.39", + "syn 2.0.47", "which", ] @@ -865,7 +867,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.39", + "syn 2.0.47", "which", ] @@ -944,9 +946,9 @@ dependencies = [ [[package]] name = "borsh" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf617fabf5cdbdc92f774bfe5062d870f228b80056d41180797abf48bed4056e" +checksum = "26d4d6dafc1a3bb54687538972158f07b2c948bc57d5890df22c0739098b3028" dependencies = [ "borsh-derive", "cfg_aliases", @@ -954,15 +956,15 @@ dependencies = [ [[package]] name = "borsh-derive" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f404657a7ea7b5249e36808dff544bc88a28f26e0ac40009f674b7a009d14be3" +checksum = "bf4918709cc4dd777ad2b6303ed03cb37f3ca0ccede8c1b0d28ac6db8f4710e0" dependencies = [ "once_cell", - "proc-macro-crate 2.0.0", + "proc-macro-crate 2.0.1", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", "syn_derive", ] @@ -989,9 +991,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "542f33a8835a0884b006a0c3df3dadd99c0c3f296ed26c2fdc8028e01ad6230c" +checksum = "c48f0051a4b4c5e0b6d365cd04af53aeaa209e3cc15ec2cdb69e73cc87fbd0dc" dependencies = [ "memchr", "serde", @@ -1081,9 +1083,9 @@ dependencies = [ [[package]] name = "cedar-policy" -version = "2.4.2" +version = "2.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d91e3b10a0f7f2911774d5e49713c4d25753466f9e11d1cd2ec627f8a2dc857" +checksum = "31ff2003d0aba0a4b2e5212660321d63dc7c36efe636d6ca1882d489cbc0bef8" dependencies = [ "cedar-policy-core", "cedar-policy-validator", @@ -1098,9 +1100,9 @@ dependencies = [ [[package]] name = "cedar-policy-core" -version = "2.4.2" +version = "2.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd2315591c6b7e18f8038f0a0529f254235fd902b6c217aabc04f2459b0d9995" +checksum = "9c52f9666c7cb1b6f14a6e77d3ffcffa20fd3e1012ac8dcc393498c33ff632c3" dependencies = [ "either", "ipnet", @@ -1121,9 +1123,9 @@ dependencies = [ [[package]] name = "cedar-policy-validator" -version = "2.4.2" +version = "2.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e756e1b2a5da742ed97e65199ad6d0893e9aa4bd6b34be1de9e70bd1e6adc7df" +checksum = "76a63c1a72bcafda800830cbdde316162074b341b7d59bd4b1cea6156f22dfa7" dependencies = [ "cedar-policy-core", "itertools 0.10.5", @@ -1211,20 +1213,20 @@ dependencies = [ [[package]] name = "clang-sys" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" dependencies = [ "glob", "libc", - "libloading", + "libloading 0.8.1", ] [[package]] name = "clap" -version = "4.4.11" +version = "4.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfaff671f6b22ca62406885ece523383b9b64022e341e53e009a62ebc47a45f2" +checksum = "dcfab8ba68f3668e89f6ff60f5b205cea56aa7b769451a59f34b8682f51c056d" dependencies = [ "clap_builder", "clap_derive", @@ -1232,9 +1234,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.4.11" +version = "4.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a216b506622bb1d316cd51328dce24e07bdff4a6128a47c7e7fad11878d5adbb" +checksum = "fb7fb5e4e979aec3be7791562fcba452f94ad85e954da024396433e0e25a79e9" dependencies = [ "anstream", "anstyle", @@ -1254,7 +1256,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -1301,9 +1303,9 @@ dependencies = [ [[package]] name = "const-oid" -version = "0.9.5" +version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28c122c3980598d243d63d9a704629a2d748d101f278052ff068be5a4423ab6f" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" [[package]] name = "convert_case" @@ -1333,9 +1335,9 @@ dependencies = [ [[package]] name = "core-foundation" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" dependencies = [ "core-foundation-sys", "libc", @@ -1343,9 +1345,9 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.4" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" [[package]] name = "cpp_demangle" @@ -1420,9 +1422,9 @@ checksum = "7059fff8937831a9ae6f0fe4d658ffabf58f2ca96aa9dec1c889f936f705f216" [[package]] name = "crossbeam-channel" -version = "0.5.8" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +checksum = "82a9b73a36529d9c47029b9fb3a6f0ea3cc916a261195352ba19e770fc1748b2" dependencies = [ "cfg-if", "crossbeam-utils", @@ -1430,9 +1432,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -1441,22 +1443,20 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.15" +version = "0.9.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +checksum = "0e3681d554572a651dda4186cd47240627c3d0114d45a95f6ad27f2f22e7548d" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", - "memoffset", - "scopeguard", ] [[package]] name = "crossbeam-utils" -version = "0.8.16" +version = "0.8.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294" +checksum = "c3a430a770ebd84726f584a90ee7f020d28db52c6d02138900f22341f866d39c" dependencies = [ "cfg-if", ] @@ -1498,7 +1498,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -1509,7 +1509,7 @@ checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" dependencies = [ "darling_core", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -1572,9 +1572,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.3.9" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" dependencies = [ "powerfmt", "serde", @@ -1599,7 +1599,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -1617,9 +1617,9 @@ dependencies = [ [[package]] name = "deunicode" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a1abaf4d861455be59f64fd2b55606cb151fce304ede7165f410243ce96bde6" +checksum = "3ae2a35373c5c74340b79ae6780b498b2b183915ec5dacf263aac5a099bf485a" [[package]] name = "diff" @@ -1780,9 +1780,9 @@ checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" [[package]] name = "event-listener" -version = "4.0.0" +version = "4.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "770d968249b5d99410d61f5bf89057f3199a077a04d087092f58e7d10692baae" +checksum = "218a870470cce1469024e9fb66b901aa983929d81304a1cdb299f28118e550d5" dependencies = [ "concurrent-queue", "parking", @@ -1795,7 +1795,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "958e4d70b6d5e81971bebec42271ec641e7ff4e170a6fa605f2b8a8b65cb97d3" dependencies = [ - "event-listener 4.0.0", + "event-listener 4.0.2", "pin-project-lite", ] @@ -1838,14 +1838,14 @@ dependencies = [ [[package]] name = "filetime" -version = "0.2.22" +version = "0.2.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4029edd3e734da6fe05b6cd7bd2960760a616bd2ddd0d59a0124746d6272af0" +checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.3.5", - "windows-sys 0.48.0", + "redox_syscall", + "windows-sys 0.52.0", ] [[package]] @@ -1971,7 +1971,7 @@ checksum = "83c8d52fe8b46ab822b4decdcc0d6d85aeedfc98f0d52ba2bd4aec4a97807516" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", "try_map", ] @@ -1998,9 +1998,9 @@ checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" [[package]] name = "futures" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0290714b38af9b4a7b094b8a37086d1b4e61f2df9122c3cad2577669145335" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" dependencies = [ "futures-channel", "futures-core", @@ -2013,9 +2013,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff4dd66668b557604244583e3e1e1eada8c5c2e96a6d0d6653ede395b78bbacb" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", "futures-sink", @@ -2036,15 +2036,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb1d22c66e66d9d72e1758f0bd7d4fd0bee04cad842ee34587d68c07e45d088c" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" [[package]] name = "futures-executor" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f4fb8693db0cf099eadcca0efe2a5a22e4550f98ed16aba6c48700da29597bc" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" dependencies = [ "futures-core", "futures-task", @@ -2053,9 +2053,9 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf34a163b5c4c52d0478a4d757da8fb65cabef42ba90515efee0f6f9fa45aaa" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" [[package]] name = "futures-lite" @@ -2074,40 +2074,39 @@ dependencies = [ [[package]] name = "futures-lite" -version = "2.0.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3831c2651acb5177cbd83943f3d9c8912c5ad03c76afcc0e9511ba568ec5ebb" +checksum = "aeee267a1883f7ebef3700f262d2d54de95dfaf38189015a74fdc4e0c7ad8143" dependencies = [ "fastrand 2.0.1", "futures-core", "futures-io", - "memchr", "parking", "pin-project-lite", ] [[package]] name = "futures-macro" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] name = "futures-sink" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e36d3378ee38c2a36ad710c5d30c2911d752cb941c00c72dbabfb786a7970817" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" [[package]] name = "futures-task" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efd193069b0ddadc69c46389b740bbccdd97203899b48d09c5f7969591d6bae2" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" [[package]] name = "futures-timer" @@ -2117,9 +2116,9 @@ checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" [[package]] name = "futures-util" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a19526d624e703a3179b3d322efec918b6246ea0fa51d41124525f00f1cc8104" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-channel", "futures-core", @@ -2247,15 +2246,15 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "globset" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "759c97c1e17c55525b57192c06a267cda0ac5210b222d6b82189a2338fa1c13d" +checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1" dependencies = [ "aho-corasick", "bstr", - "fnv", "log", - "regex", + "regex-automata 0.4.3", + "regex-syntax 0.8.2", ] [[package]] @@ -2337,7 +2336,7 @@ version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ - "ahash 0.8.6", + "ahash 0.8.7", "allocator-api2", ] @@ -2367,9 +2366,9 @@ dependencies = [ [[package]] name = "heapless" -version = "0.7.16" +version = "0.7.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db04bc24a18b9ea980628ecf00e6c0264f3c1426dac36c00cb49b6fbad8b0743" +checksum = "cdc6457c0eb62c71aac4bc17216026d8410337c4126773b9c5daba343f17964f" dependencies = [ "atomic-polyfill", "hash32", @@ -2407,11 +2406,11 @@ dependencies = [ [[package]] name = "home" -version = "0.5.5" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -2438,9 +2437,9 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", "http 0.2.11", @@ -2517,9 +2516,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.27" +version = "0.14.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" +checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" dependencies = [ "bytes", "futures-channel", @@ -2527,12 +2526,12 @@ dependencies = [ "futures-util", "h2 0.3.22", "http 0.2.11", - "http-body 0.4.5", + "http-body 0.4.6", "httparse", "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.10", + "socket2", "tokio", "tower-service", "tracing", @@ -2541,9 +2540,9 @@ dependencies = [ [[package]] name = "hyper" -version = "1.0.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "403f9214f3e703236b221f1a9cd88ec8b4adfa5296de01ab96216361f4692f56" +checksum = "fb5aa53871fc917b1a9ed87b683a5d86db645e23acb32c2e0785a353e522fb75" dependencies = [ "bytes", "futures-channel", @@ -2566,7 +2565,7 @@ checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http 0.2.11", - "hyper 0.14.27", + "hyper 0.14.28", "rustls", "tokio", "tokio-rustls", @@ -2578,7 +2577,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" dependencies = [ - "hyper 0.14.27", + "hyper 0.14.28", "pin-project-lite", "tokio", "tokio-io-timeout", @@ -2591,7 +2590,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper 0.14.27", + "hyper 0.14.28", "native-tls", "tokio", "tokio-native-tls", @@ -2599,29 +2598,27 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ca339002caeb0d159cc6e023dff48e199f081e42fa039895c7c6f38b37f2e9d" +checksum = "bdea9aac0dbe5a9240d68cfd9501e2db94222c6dc06843e06640b9e07f0fdc67" dependencies = [ "bytes", "futures-channel", "futures-util", "http 1.0.0", "http-body 1.0.0", - "hyper 1.0.1", + "hyper 1.1.0", "pin-project-lite", - "socket2 0.5.5", + "socket2", "tokio", - "tower", - "tower-service", "tracing", ] [[package]] name = "iana-time-zone" -version = "0.1.58" +version = "0.1.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" +checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -2658,17 +2655,16 @@ dependencies = [ [[package]] name = "ignore" -version = "0.4.20" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbe7873dab538a9a44ad79ede1faf5f30d49f9a5c883ddbab48bce81b64b7492" +checksum = "747ad1b4ae841a78e8aba0d63adbfbeaea26b517b63705d47856b73015d27060" dependencies = [ + "crossbeam-deque", "globset", - "lazy_static", "log", "memchr", - "regex", + "regex-automata 0.4.3", "same-file", - "thread_local", "walkdir", "winapi-util", ] @@ -2742,7 +2738,7 @@ version = "0.11.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "321f0f839cd44a4686e9504b0a62b4d69a50b62072144c71c68f5873c167b8d9" dependencies = [ - "ahash 0.8.6", + "ahash 0.8.7", "indexmap 2.1.0", "is-terminal", "itoa", @@ -2780,13 +2776,13 @@ checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" [[package]] name = "is-terminal" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" +checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" dependencies = [ "hermit-abi", "rustix", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -2809,9 +2805,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jemalloc-sys" @@ -2915,9 +2911,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.150" +version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" [[package]] name = "libloading" @@ -2929,6 +2925,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "libloading" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "libm" version = "0.2.8" @@ -2953,7 +2959,7 @@ checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8" dependencies = [ "bitflags 2.4.1", "libc", - "redox_syscall 0.4.1", + "redox_syscall", ] [[package]] @@ -3085,28 +3091,19 @@ dependencies = [ [[package]] name = "memchr" -version = "2.6.4" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] name = "memmap2" -version = "0.8.0" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a5a03cefb0d953ec0be133036f14e109412fa594edc2f77227249db66cc3ed" +checksum = "45fd3a57831bf88bc63f8cebc0cf956116276e97fef3966103e96416209f7c92" dependencies = [ "libc", ] -[[package]] -name = "memoffset" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" -dependencies = [ - "autocfg", -] - [[package]] name = "miette" version = "5.10.0" @@ -3127,7 +3124,7 @@ checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -3172,9 +3169,9 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.9" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0" +checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" dependencies = [ "libc", "log", @@ -3384,9 +3381,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.1" +version = "0.32.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ "memchr", ] @@ -3414,9 +3411,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.18.0" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" [[package]] name = "oorandom" @@ -3426,9 +3423,9 @@ checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] name = "openssl" -version = "0.10.60" +version = "0.10.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79a4c6c3a2b158f7f8f2a2fc5a969fa3a068df6fc9dbb4a43845436e3af7c800" +checksum = "8cde4d2d9200ad5909f8dac647e29482e07c3a35de8a13fce7c9c7747ad9f671" dependencies = [ "bitflags 2.4.1", "cfg-if", @@ -3447,7 +3444,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -3458,9 +3455,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.96" +version = "0.9.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3812c071ba60da8b5677cc12bcb1d42989a65553772897a7e0355545a819838f" +checksum = "c1665caf8ab2dc9aef43d1c0023bd904633a6a05cb30b0ad59bec2ae986e57a7" dependencies = [ "cc", "libc", @@ -3556,7 +3553,7 @@ dependencies = [ "flate2", "lazy_static", "libc", - "libloading", + "libloading 0.7.4", "ndarray", "tar", "thiserror", @@ -3595,7 +3592,7 @@ checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.4.1", + "redox_syscall", "smallvec", "windows-targets 0.48.5", ] @@ -3653,9 +3650,9 @@ dependencies = [ [[package]] name = "pem" -version = "3.0.2" +version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3163d2912b7c3b52d651a055f2c7eec9ba5cd22d26ef75b8dd3a59980b185923" +checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310" dependencies = [ "base64 0.21.5", "serde", @@ -3696,6 +3693,40 @@ dependencies = [ "rustc_version", ] +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_macros", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared 0.11.2", + "rand 0.8.5", +] + +[[package]] +name = "phf_macros" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" +dependencies = [ + "phf_generator", + "phf_shared 0.11.2", + "proc-macro2", + "quote", + "syn 2.0.47", + "unicase", +] + [[package]] name = "phf_shared" version = "0.10.0" @@ -3705,6 +3736,16 @@ dependencies = [ "siphasher", ] +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", + "unicase", +] + [[package]] name = "pico-args" version = "0.5.0" @@ -3728,7 +3769,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -3766,9 +3807,9 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.27" +version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a" [[package]] name = "plotters" @@ -3868,12 +3909,12 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d" +checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" dependencies = [ "proc-macro2", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -3888,11 +3929,12 @@ dependencies = [ [[package]] name = "proc-macro-crate" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8366a6159044a37876a2b9817124296703c586a5c92e2c53751fa06d8d43e8" +checksum = "97dc5fea232fc28d2f597b37c4876b348a40e33f3b02cc975c8d006d78d94b1a" dependencies = [ - "toml_edit 0.20.7", + "toml_datetime", + "toml_edit 0.20.2", ] [[package]] @@ -3921,9 +3963,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.70" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +checksum = "907a61bd0f64c2f29cd1cf1dc34d05176426a3f504a78010f08416ddb7b13708" dependencies = [ "unicode-ident", ] @@ -4015,7 +4057,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f69f8d22fa3f34f3083d9a4375c038732c7a7e964de1beb81c544da92dfc40b8" dependencies = [ - "ahash 0.8.6", + "ahash 0.8.7", "equivalent", "hashbrown 0.14.3", "parking_lot", @@ -4023,9 +4065,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -4159,21 +4201,12 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52c4f3084aa3bc7dfbba4eff4fab2a54db4324965d8872ab933565e6fbd83bc6" dependencies = [ - "pem 3.0.2", + "pem 3.0.3", "ring 0.16.20", "time", "yasna", ] -[[package]] -name = "redox_syscall" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.4.1" @@ -4196,22 +4229,22 @@ dependencies = [ [[package]] name = "ref-cast" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acde58d073e9c79da00f2b5b84eed919c8326832648a5b109b3fce1bb1175280" +checksum = "c4846d4c50d1721b1a3bef8af76924eef20d5e723647333798c1b519b3a9473f" dependencies = [ "ref-cast-impl", ] [[package]] name = "ref-cast-impl" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f7473c2cfcf90008193dd0e3e16599455cb601a9fce322b5bb55de799664925" +checksum = "5fddb4f8d99b0a2ebafc65a87a69a7b9875e4b1ae1f00db265d300ef7f28bccc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -4269,9 +4302,9 @@ dependencies = [ [[package]] name = "relative-path" -version = "1.9.0" +version = "1.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c707298afce11da2efef2f600116fa93ffa7a032b5d7b628aa17711ec81383ca" +checksum = "e898588f33fdd5b9420719948f9f2a32c922a246964576f71ba7f24f80610fbc" [[package]] name = "rend" @@ -4284,9 +4317,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.22" +version = "0.11.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b" +checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41" dependencies = [ "async-compression", "base64 0.21.5", @@ -4296,8 +4329,8 @@ dependencies = [ "futures-util", "h2 0.3.22", "http 0.2.11", - "http-body 0.4.5", - "hyper 0.14.27", + "http-body 0.4.6", + "hyper 0.14.28", "hyper-rustls", "hyper-tls", "ipnet", @@ -4363,7 +4396,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -4407,9 +4440,9 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.5" +version = "0.17.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb0205304757e5d899b9c2e448b867ffd03ae7f988002e47cd24954391394d0b" +checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" dependencies = [ "cc", "getrandom 0.2.11", @@ -4421,12 +4454,13 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.7.42" +version = "0.7.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0200c8230b013893c0b2d6213d6ec64ed2b9be2e0e016682b7224ff82cff5c58" +checksum = "527a97cdfef66f65998b5f3b637c26f5a5ec09cc52a3f9932313ac645f4190f5" dependencies = [ "bitvec", "bytecheck", + "bytes", "hashbrown 0.12.3", "ptr_meta", "rend", @@ -4438,9 +4472,9 @@ dependencies = [ [[package]] name = "rkyv_derive" -version = "0.7.42" +version = "0.7.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2e06b915b5c230a17d7a736d1e2e63ee753c256a8614ef3f5147b13a4f5541d" +checksum = "b5c462a1328c8e67e4d6dbad1eb0355dd43e8ab432c6e227a43657f16ade5033" dependencies = [ "proc-macro2", "quote", @@ -4543,7 +4577,7 @@ dependencies = [ "proc-macro2", "quote", "rquickjs-core", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -4558,9 +4592,9 @@ dependencies = [ [[package]] name = "rsa" -version = "0.9.4" +version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a3211b01eea83d80687da9eef70e39d65144a3894866a5153a2723e425a157f" +checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc" dependencies = [ "const-oid", "digest", @@ -4646,9 +4680,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.26" +version = "0.38.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9470c4bf8246c8daf25f9598dca807fb6510347b1e1cfa55749113850c79d88a" +checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" dependencies = [ "bitflags 2.4.1", "errno", @@ -4684,7 +4718,7 @@ version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring 0.17.5", + "ring 0.17.7", "untrusted 0.9.0", ] @@ -4726,14 +4760,14 @@ checksum = "5a32af5427251d2e4be14fc151eabe18abb4a7aad5efee7044da9f096c906a43" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] name = "ryu" -version = "1.0.15" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" [[package]] name = "salsa20" @@ -4755,11 +4789,11 @@ dependencies = [ [[package]] name = "schannel" -version = "0.1.22" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c3733bf4cf7ea0880754e19cb5a462007c4a8c1914bff372ccc95b464f1df88" +checksum = "fbc91545643bcf3a0bbb6569265615222618bdf33ce4ffbbd13c4bbd4c093534" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -4786,7 +4820,7 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring 0.17.5", + "ring 0.17.7", "untrusted 0.9.0", ] @@ -4845,13 +4879,23 @@ dependencies = [ [[package]] name = "serde_bytes" -version = "0.11.12" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab33ec92f677585af6d88c65593ae2375adde54efdbf16d597f2cbc7a6d368ff" +checksum = "8b8497c313fd43ab992087548117643f6fcd935cbf36f176ffda0aacf9591734" dependencies = [ "serde", ] +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + [[package]] name = "serde_derive" version = "1.0.193" @@ -4860,14 +4904,14 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] name = "serde_html_form" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cde65b75f2603066b78d6fa239b2c07b43e06ead09435f60554d3912962b4a3c" +checksum = "224e6a14f315852940f3ec103125aa6482f0e224732ed91ed3330ed633077c34" dependencies = [ "form_urlencoded", "indexmap 2.1.0", @@ -4947,7 +4991,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -4972,7 +5016,7 @@ checksum = "91d129178576168c589c9ec973feedf7d3126c01ac2bf08795109aa35b69fb8f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -5103,19 +5147,9 @@ dependencies = [ [[package]] name = "snap" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" - -[[package]] -name = "socket2" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d" -dependencies = [ - "libc", - "winapi", -] +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" @@ -5129,9 +5163,9 @@ dependencies = [ [[package]] name = "spade" -version = "2.4.1" +version = "2.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87a3ef2efbc408c9051c1a27ce7edff430d74531d31a480b7ca4f618072c2670" +checksum = "bd774eb23cff002036706e6ea83c3f4ab4c80dad89da76fe16d49f77ab71682f" dependencies = [ "hashbrown 0.14.3", "num-traits", @@ -5166,9 +5200,9 @@ dependencies = [ [[package]] name = "spki" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d1e996ef02c474957d681f1b05213dfb0abab947b446a62d37770b23500184a" +checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d" dependencies = [ "base64ct", "der", @@ -5232,7 +5266,7 @@ dependencies = [ "new_debug_unreachable", "once_cell", "parking_lot", - "phf_shared", + "phf_shared 0.10.0", "precomputed-hash", ] @@ -5267,8 +5301,8 @@ dependencies = [ "futures-util", "glob", "http 0.2.11", - "http-body 0.4.5", - "hyper 0.14.27", + "http-body 0.4.6", + "hyper 0.14.28", "ipnet", "jemallocator", "mimalloc", @@ -5287,6 +5321,7 @@ dependencies = [ "rustyline", "semver", "serde", + "serde_cbor", "serde_json", "serial_test", "surrealdb", @@ -5357,6 +5392,7 @@ dependencies = [ "path-clean", "pbkdf2", "pharos", + "phf", "pin-project-lite", "pprof", "quick_cache", @@ -5397,6 +5433,7 @@ dependencies = [ "tracing-subscriber", "trice", "ulid", + "unicase", "url", "uuid", "wasm-bindgen-futures", @@ -5478,9 +5515,9 @@ dependencies = [ [[package]] name = "symbolic-common" -version = "12.7.0" +version = "12.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39eac77836da383d35edbd9ff4585b4fc1109929ff641232f2e9a1aefdfc9e91" +checksum = "1cccfffbc6bb3bb2d3a26cd2077f4d055f6808d266f9d4d158797a4c60510dfe" dependencies = [ "debugid", "memmap2", @@ -5490,9 +5527,9 @@ dependencies = [ [[package]] name = "symbolic-demangle" -version = "12.7.0" +version = "12.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ee1608a1d13061fb0e307a316de29f6c6e737b05459fe6bbf5dd8d7837c4fb7" +checksum = "76a99812da4020a67e76c4eb41f08c87364c14170495ff780f30dd519c221a68" dependencies = [ "cpp_demangle", "rustc-demangle", @@ -5512,9 +5549,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.39" +version = "2.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +checksum = "1726efe18f42ae774cc644f330953a5e7b3c3003d3edcecf18850fe9d4dd9afb" dependencies = [ "proc-macro2", "quote", @@ -5530,7 +5567,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -5579,9 +5616,9 @@ dependencies = [ [[package]] name = "temp-dir" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af547b166dd1ea4b472165569fc456cfb6818116f854690b0ff205e636523dab" +checksum = "dd16aa9ffe15fe021c6ee3766772132c6e98dfa395a167e16864f61a9cfb71d6" [[package]] name = "temp-env" @@ -5595,15 +5632,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.8.1" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef1adac450ad7f4b3c28589471ade84f25f731a7a0fe30d71dfa9f60fd808e5" +checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" dependencies = [ "cfg-if", "fastrand 2.0.1", - "redox_syscall 0.4.1", + "redox_syscall", "rustix", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -5644,33 +5681,44 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "test-log" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f66edd6b6cd810743c0c71e1d085e92b01ce6a72782032e3f794c8284fe4bcdd" +checksum = "6159ab4116165c99fc88cce31f99fa2c9dbe08d3691cb38da02fc3b45f357d2b" +dependencies = [ + "env_logger", + "test-log-macros", + "tracing-subscriber", +] + +[[package]] +name = "test-log-macros" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ba277e77219e9eea169e8508942db1bf5d8a41ff2db9b20aab5a5aadc9fa25d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] name = "thiserror" -version = "1.0.50" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.50" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -5685,9 +5733,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.30" +version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" +checksum = "f657ba42c3f86e7680e53c8cd3af8abbe56b5491790b46e22e19c0d57463583e" dependencies = [ "deranged", "itoa", @@ -5705,9 +5753,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" +checksum = "26197e33420244aeb70c3e8c78376ca46571bc4e701e4791c2cd9f57dcb3a43f" dependencies = [ "time-core", ] @@ -5748,9 +5796,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.34.0" +version = "1.35.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9" +checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104" dependencies = [ "backtrace", "bytes", @@ -5760,7 +5808,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.5", + "socket2", "tokio-macros", "windows-sys 0.48.0", ] @@ -5783,7 +5831,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -5851,9 +5899,9 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.5" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b" [[package]] name = "toml_edit" @@ -5868,9 +5916,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.20.7" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70f427fce4d84c72b5b732388bf4a9f4531b53f74e2887e3ecb2481f68f66d81" +checksum = "396e4d48bbb2b7554c944bde63101b5ae446cff6ec4a24227428f15eb72ef338" dependencies = [ "indexmap 2.1.0", "toml_datetime", @@ -5892,8 +5940,8 @@ dependencies = [ "futures-util", "h2 0.3.22", "http 0.2.11", - "http-body 0.4.5", - "hyper 0.14.27", + "http-body 0.4.6", + "hyper 0.14.28", "hyper-timeout", "percent-encoding", "pin-project", @@ -5924,8 +5972,8 @@ dependencies = [ "futures-util", "h2 0.3.22", "http 0.2.11", - "http-body 0.4.5", - "hyper 0.14.27", + "http-body 0.4.6", + "hyper 0.14.28", "hyper-timeout", "percent-encoding", "pin-project", @@ -5973,7 +6021,7 @@ dependencies = [ "futures-core", "futures-util", "http 0.2.11", - "http-body 0.4.5", + "http-body 0.4.6", "http-range-header", "mime", "pin-project-lite", @@ -6018,7 +6066,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -6108,9 +6156,9 @@ dependencies = [ [[package]] name = "try-lock" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "try_map" @@ -6166,9 +6214,9 @@ dependencies = [ [[package]] name = "unicode-bidi" -version = "0.3.13" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" +checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416" [[package]] name = "unicode-ident" @@ -6370,7 +6418,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", "wasm-bindgen-shared", ] @@ -6404,7 +6452,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -6502,11 +6550,11 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-core" -version = "0.51.1" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.48.5", + "windows-targets 0.52.0", ] [[package]] @@ -6643,9 +6691,9 @@ checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" [[package]] name = "winnow" -version = "0.5.19" +version = "0.5.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "829846f3e3db426d4cee4510841b71a8e58aa2a76b1132579487ae430ccd9c7b" +checksum = "8434aeec7b290e8da5c3f0d628cb0eac6cabcb31d14bb74f779a08109a5914d6" dependencies = [ "memchr", ] @@ -6673,7 +6721,7 @@ dependencies = [ "futures", "futures-timer", "http-types", - "hyper 0.14.27", + "hyper 0.14.28", "log", "once_cell", "regex", @@ -6712,11 +6760,13 @@ dependencies = [ [[package]] name = "xattr" -version = "1.0.1" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4686009f71ff3e5c4dbcf1a282d0a44db3f021ba69350cd42086b3e5f1c6985" +checksum = "914566e6413e7fa959cc394fb30e563ba80f3541fbd40816d4c05a0fc3f2a0f1" dependencies = [ "libc", + "linux-raw-sys", + "rustix", ] [[package]] @@ -6736,22 +6786,22 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.31" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.7.31" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.39", + "syn 2.0.47", ] [[package]] @@ -6760,32 +6810,13 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" -[[package]] -name = "zstd" -version = "0.12.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" -dependencies = [ - "zstd-safe 6.0.6", -] - [[package]] name = "zstd" version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" dependencies = [ - "zstd-safe 7.0.0", -] - -[[package]] -name = "zstd-safe" -version = "6.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" -dependencies = [ - "libc", - "zstd-sys", + "zstd-safe", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 68e6e2cf..74872bd6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ authors = ["Tobie Morgan Hitchcock "] [features] # Public features -default = ["storage-mem", "storage-rocksdb", "scripting", "http", "jwks"] +default = ["storage-mem", "storage-rocksdb", "scripting", "http"] storage-mem = ["surrealdb/kv-mem"] storage-rocksdb = ["surrealdb/kv-rocksdb"] storage-speedb = ["surrealdb/kv-speedb"] @@ -18,6 +18,7 @@ scripting = ["surrealdb/scripting"] http = ["surrealdb/http"] http-compression = [] ml = ["surrealdb/ml", "surrealml-core"] +experimental-parser = ["surrealdb/experimental-parser"] jwks = ["surrealdb/jwks"] [workspace] @@ -60,6 +61,7 @@ reqwest = { version = "0.11.22", default-features = false, features = ["blocking rmpv = "1.0.1" rustyline = { version = "12.0.0", features = ["derive"] } serde = { version = "1.0.193", features = ["derive"] } +serde_cbor = "0.11.2" serde_json = "1.0.108" serde_pack = { version = "1.1.2", package = "rmp-serde" } surrealdb = { path = "lib", features = ["protocol-http", "protocol-ws", "rustls"] } diff --git a/Cargo.toml.orig b/Cargo.toml.orig new file mode 100644 index 00000000..3c56bc03 --- /dev/null +++ b/Cargo.toml.orig @@ -0,0 +1,126 @@ +[package] +name = "surreal" +publish = false +edition = "2021" +version = "1.1.0" +license-file = "LICENSE" +authors = ["Tobie Morgan Hitchcock "] + +[features] +# Public features +default = ["storage-mem", "storage-rocksdb", "scripting", "http", "jwks"] +storage-mem = ["surrealdb/kv-mem"] +storage-rocksdb = ["surrealdb/kv-rocksdb"] +storage-speedb = ["surrealdb/kv-speedb"] +storage-tikv = ["surrealdb/kv-tikv"] +storage-fdb = ["surrealdb/kv-fdb-7_1"] +scripting = ["surrealdb/scripting"] +http = ["surrealdb/http"] +http-compression = [] +ml = ["surrealdb/ml", "surrealml-core"] +<<<<<<< HEAD +experimental-parser = ["surrealdb/experimental-parser"] +======= +jwks = ["surrealdb/jwks"] +>>>>>>> upstream/main + +[workspace] +members = ["lib", "lib/examples/actix", "lib/examples/axum"] + +[profile.release] +lto = true +strip = true +opt-level = 3 +panic = 'abort' +codegen-units = 1 + +[profile.bench] +strip = false + +[dependencies] +argon2 = "0.5.2" +axum = { version = "0.6.20", features = ["tracing", "ws", "headers"] } +axum-client-ip = "0.5.0" +axum-extra = { version = "0.7.7", features = ["query", "typed-routing"] } +axum-server = { version = "0.5.1", features = ["tls-rustls"] } +base64 = "0.21.5" +bytes = "1.5.0" +ciborium = "0.2.1" +clap = { version = "4.4.11", features = ["env", "derive", "wrap_help", "unicode"] } +futures = "0.3.29" +futures-util = "0.3.29" +glob = "0.3.1" +http = "0.2.11" +http-body = "0.4.5" +hyper = "0.14.27" +ipnet = "2.9.0" +ndarray = { version = "0.15.6", optional = true } +once_cell = "1.18.0" +opentelemetry = { version = "0.19", features = ["rt-tokio"] } +opentelemetry-otlp = { version = "0.12.0", features = ["metrics"] } +pin-project-lite = "0.2.13" +rand = "0.8.5" +reqwest = { version = "0.11.22", default-features = false, features = ["blocking", "gzip"] } +rmpv = "1.0.1" +rustyline = { version = "12.0.0", features = ["derive"] } +serde = { version = "1.0.193", features = ["derive"] } +serde_cbor = "0.11.2" +serde_json = "1.0.108" +serde_pack = { version = "1.1.2", package = "rmp-serde" } +surrealdb = { path = "lib", features = ["protocol-http", "protocol-ws", "rustls"] } +surrealml-core = { version = "0.0.3", optional = true} +tempfile = "3.8.1" +thiserror = "1.0.50" +tokio = { version = "1.34.0", features = ["macros", "signal"] } +tokio-util = { version = "0.7.10", features = ["io"] } +tower = "0.4.13" +tower-http = { version = "0.4.4", features = ["trace", "sensitive-headers", "auth", "request-id", "util", "catch-panic", "cors", "set-header", "limit", "add-extension", "compression-full"] } +tracing = "0.1" +tracing-opentelemetry = "0.19.0" +tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } +urlencoding = "2.1.3" +uuid = { version = "1.6.1", features = ["serde", "js", "v4", "v7"] } + +[target.'cfg(unix)'.dependencies] +nix = { version = "0.27.1", features = ["user"] } + +[target.'cfg(unix)'.dev-dependencies] +nix = { version = "0.27.1", features = ["signal", "user"] } + +[target.'cfg(any(target_os = "linux", target_os = "macos", target_os = "ios"))'.dependencies] +mimalloc = { version = "0.1.39", default-features = false } + +[target.'cfg(any(target_os = "android", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))'.dependencies] +jemallocator = "0.5.4" + +[dev-dependencies] +assert_fs = "1.0.13" +env_logger = "0.10.1" +opentelemetry-proto = { version = "0.2.0", features = ["gen-tonic", "traces", "metrics", "logs"] } +rcgen = "0.11.3" +serial_test = "2.0.0" +temp-env = { version = "0.3.6", features = ["async_closure"] } +test-log = { version = "0.2.13", features = ["trace"] } +tokio-stream = { version = "0.1", features = ["net"] } +tokio-tungstenite = { version = "0.20.1" } +tonic = "0.8.3" +ulid = "1.1.0" +wiremock = "0.5.22" + +[build-dependencies] +semver = "1.0.20" + +[package.metadata.deb] +maintainer-scripts = "pkg/deb/" +maintainer = "Tobie Morgan Hitchcock " +copyright = "SurrealDB Ltd. 2022" +systemd-units = { enable = true } +depends = "$auto" +section = "utility" +priority = "optional" +assets = [ + ["target/release/surreal", "usr/share/surrealdb/surreal", "755"], + ["pkg/deb/README", "usr/share/surrealdb/README", "644"], +] +extended-description = "A scalable, distributed, collaborative, document-graph database, for the realtime web." +license-file = ["LICENSE", "4"] diff --git a/Makefile.ci.toml b/Makefile.ci.toml index 5d88dcd0..4d1dfacd 100644 --- a/Makefile.ci.toml +++ b/Makefile.ci.toml @@ -56,6 +56,18 @@ args = [ "--skip", "ws_integration" ] + +[tasks.test-experimental-parser] +category = "CI - INTEGRATION TESTS" +command = "cargo" +args = [ + "test", "--locked", "--no-default-features", "--features", "storage-mem,scripting,http,experimental-parser", "--workspace", "--", + "--skip", "api_integration", + "--skip", "cli_integration", + "--skip", "http_integration", + "--skip", "ws_integration" +] + [tasks.test-workspace-coverage-complete] category = "CI - INTEGRATION TESTS" command = "cargo" diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 09d66b85..229f63dd 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -40,6 +40,7 @@ rustls = ["dep:rustls", "reqwest?/rustls-tls", "tokio-tungstenite?/rustls-tls-we ml = ["surrealml-core", "ndarray"] jwks = ["dep:reqwest"] arbitrary = ["dep:arbitrary", "dep:regex-syntax", "rust_decimal/rust-fuzz", "geo-types/arbitrary", "uuid/arbitrary"] +experimental-parser = ["dep:phf", "dep:unicase"] # Private features kv-fdb = ["foundationdb", "tokio/time"] @@ -123,6 +124,8 @@ tracing = "0.1.40" trice = "0.4.0" ulid = { version = "1.1.0", features = ["serde"] } url = "2.5.0" +phf = { version = "0.11.2", features = ["macros", "unicase"], optional=true } +unicase = { version = "2.7.0", optional = true } arbitrary = { version = "1.3.2", features = ["derive"], optional = true } regex-syntax = { version = "0.8.2", optional = true, features = ["arbitrary"] } geo-types = { version = "0.7.12", features = ["arbitrary"] } diff --git a/lib/benches/parser.rs b/lib/benches/parser.rs index 8b420ae7..2b67928a 100644 --- a/lib/benches/parser.rs +++ b/lib/benches/parser.rs @@ -59,6 +59,7 @@ fn bench_parser(c: &mut Criterion) { &(1..=100).map(|n| format!("'{n}': {n}")).collect::>().join(", ") ) ); + parser!(c, full_test, surrealdb::sql::parse, include_str!("../test.surql")); c.finish(); } diff --git a/lib/fuzz/Cargo.lock b/lib/fuzz/Cargo.lock index b940e8d9..7a7e72ed 100644 --- a/lib/fuzz/Cargo.lock +++ b/lib/fuzz/Cargo.lock @@ -2584,7 +2584,6 @@ dependencies = [ "futures-concurrency", "fuzzy-matcher", "geo 0.27.0", - "geo-types", "hex", "indexmap 2.1.0", "ipnet", diff --git a/lib/src/api/engine/any/mod.rs b/lib/src/api/engine/any/mod.rs index 28b165c6..81364f7e 100644 --- a/lib/src/api/engine/any/mod.rs +++ b/lib/src/api/engine/any/mod.rs @@ -255,7 +255,7 @@ mod tests { use crate::dbs::Capabilities; use crate::opt::auth::Root; use crate::sql::Value; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn local_engine_without_auth() { diff --git a/lib/src/api/engine/mod.rs b/lib/src/api/engine/mod.rs index 122f42a9..441fa752 100644 --- a/lib/src/api/engine/mod.rs +++ b/lib/src/api/engine/mod.rs @@ -153,6 +153,7 @@ struct IntervalStream { } impl IntervalStream { + #[allow(unused)] fn new(interval: Interval) -> Self { Self { inner: interval, diff --git a/lib/src/dbs/capabilities.rs b/lib/src/dbs/capabilities.rs index b6d295a2..3347e985 100644 --- a/lib/src/dbs/capabilities.rs +++ b/lib/src/dbs/capabilities.rs @@ -177,7 +177,8 @@ impl std::fmt::Display fo /// # Examples /// /// Create a new instance, and allow all capabilities -/// ```no_run +#[cfg_attr(feature = "kv-rocksdb", doc = "```no_run")] +#[cfg_attr(not(feature = "kv-rocksdb"), doc = "```ignore")] /// # use surrealdb::opt::capabilities::Capabilities; /// # use surrealdb::opt::Config; /// # use surrealdb::Surreal; @@ -192,7 +193,8 @@ impl std::fmt::Display fo /// ``` /// /// Create a new instance, and allow certain functions -/// ```no_run +#[cfg_attr(feature = "kv-rocksdb", doc = "```no_run")] +#[cfg_attr(not(feature = "kv-rocksdb"), doc = "```ignore")] /// # use std::str::FromStr; /// # use surrealdb::engine::local::File; /// # use surrealdb::opt::capabilities::Capabilities; diff --git a/lib/src/fnc/mod.rs b/lib/src/fnc/mod.rs index 8f220155..ebca7c8b 100644 --- a/lib/src/fnc/mod.rs +++ b/lib/src/fnc/mod.rs @@ -423,6 +423,7 @@ pub async fn asynchronous( mod tests { #[cfg(all(feature = "scripting", feature = "kv-mem"))] use crate::dbs::Capabilities; + use crate::sql::{statements::OutputStatement, Function, Query, Statement, Value}; #[tokio::test] async fn implementations_are_present() { @@ -442,8 +443,28 @@ mod tests { let (quote, _) = line.split_once("=>").unwrap(); let name = quote.trim().trim_matches('"'); - let builtin_name = crate::syn::test::builtin_name(name); - if builtin_name.is_err() { + let res = crate::syn::parse(&format!("RETURN {}()", name)); + if let Ok(Query(mut x)) = res { + match x.0.pop() { + Some(Statement::Output(OutputStatement { + what: Value::Function(x), + .. + })) => match *x { + Function::Normal(parsed_name, _) => { + if parsed_name != name { + problems + .push(format!("function `{name}` parsed as `{parsed_name}`")); + } + } + _ => { + problems.push(format!("couldn't parse {name} function")); + } + }, + _ => { + problems.push(format!("couldn't parse {name} function")); + } + } + } else { problems.push(format!("couldn't parse {name} function")); } diff --git a/lib/src/fnc/script/tests/mod.rs b/lib/src/fnc/script/tests/mod.rs index e29e6a33..aa571a60 100644 --- a/lib/src/fnc/script/tests/mod.rs +++ b/lib/src/fnc/script/tests/mod.rs @@ -1 +1,2 @@ +#[cfg(feature = "http")] mod fetch; diff --git a/lib/src/iam/verify.rs b/lib/src/iam/verify.rs index 845af926..4bcb07dd 100644 --- a/lib/src/iam/verify.rs +++ b/lib/src/iam/verify.rs @@ -14,10 +14,10 @@ use std::str::{self, FromStr}; use std::sync::Arc; async fn config( - kvs: &Datastore, + _kvs: &Datastore, de_kind: Algorithm, de_code: String, - token_header: Header, + _token_header: Header, ) -> Result<(DecodingKey, Validation), Error> { if de_kind == Algorithm::Jwks { #[cfg(not(feature = "jwks"))] @@ -27,8 +27,8 @@ async fn config( } #[cfg(feature = "jwks")] // The key identifier header must be present - if let Some(kid) = token_header.kid { - jwks::config(kvs, &kid, &de_code).await + if let Some(kid) = _token_header.kid { + jwks::config(_kvs, &kid, &de_code).await } else { Err(Error::MissingTokenHeader("kid".to_string())) } @@ -1125,7 +1125,7 @@ mod tests { // Test with generic user identifier // { - let resource_id = "user:2k9qnabxuxh8k4d5gfto".to_string(); + let resource_id = "user:`2k9qnabxuxh8k4d5gfto`".to_string(); // Prepare the claims object let mut claims = claims.clone(); claims.id = Some(resource_id.clone()); @@ -1254,6 +1254,7 @@ mod tests { } } + #[cfg(feature = "jwks")] #[tokio::test] async fn test_token_scope_jwks() { use crate::opt::capabilities::{Capabilities, NetTarget, Targets}; diff --git a/lib/src/idx/ft/analyzer/mod.rs b/lib/src/idx/ft/analyzer/mod.rs index 59d34167..98217af7 100644 --- a/lib/src/idx/ft/analyzer/mod.rs +++ b/lib/src/idx/ft/analyzer/mod.rs @@ -8,8 +8,7 @@ use crate::idx::ft::postings::TermFrequency; use crate::idx::ft::terms::{TermId, Terms}; use crate::sql::statements::DefineAnalyzerStatement; use crate::sql::tokenizer::Tokenizer as SqlTokenizer; -use crate::sql::Value; -use crate::syn::path_like; +use crate::sql::{Function, Strand, Value}; use async_recursion::async_recursion; use filter::Filter; use std::collections::hash_map::Entry; @@ -194,26 +193,16 @@ impl Analyzer { txn: &Transaction, mut input: String, ) -> Result { - if let Some(function_name) = &self.function { - let fns = format!("fn::{function_name}(\"{input}\")"); - match path_like(&fns) { - Ok(func_value) => { - let val = func_value.compute(ctx, opt, txn, None).await?; - if let Value::Strand(val) = val { - input = val.0; - } else { - return Err(Error::InvalidFunction { - name: function_name.to_string(), - message: "The function should return a string.".to_string(), - }); - } - } - Err(e) => { - return Err(Error::InvalidFunction { - name: function_name.to_string(), - message: e.to_string(), - }) - } + if let Some(function_name) = self.function.clone() { + let fns = Function::Custom(function_name.clone(), vec![Value::Strand(Strand(input))]); + let val = fns.compute(ctx, opt, txn, None).await?; + if let Value::Strand(val) = val { + input = val.0; + } else { + return Err(Error::InvalidFunction { + name: function_name, + message: "The function should return a string.".to_string(), + }); } } if let Some(t) = &self.tokenizers { diff --git a/lib/src/idx/planner/plan.rs b/lib/src/idx/planner/plan.rs index 1e807391..98556076 100644 --- a/lib/src/idx/planner/plan.rs +++ b/lib/src/idx/planner/plan.rs @@ -308,7 +308,7 @@ impl RangeQueryBuilder { mod tests { use crate::idx::planner::plan::{IndexOperator, IndexOption, RangeValue}; use crate::sql::{Array, Idiom, Value}; - use crate::syn::test::Parse; + use crate::syn::Parse; use std::collections::HashSet; use std::sync::Arc; diff --git a/lib/src/key/graph/mod.rs b/lib/src/key/graph/mod.rs index ce281202..88b2aae3 100644 --- a/lib/src/key/graph/mod.rs +++ b/lib/src/key/graph/mod.rs @@ -193,7 +193,7 @@ mod tests { #[test] fn key() { use super::*; - use crate::syn::test::Parse; + use crate::syn::Parse; let fk = Thing::parse("other:test"); #[rustfmt::skip] let val = Graph::new( diff --git a/lib/src/key/thing/mod.rs b/lib/src/key/thing/mod.rs index 28c6cfd8..d9c40766 100644 --- a/lib/src/key/thing/mod.rs +++ b/lib/src/key/thing/mod.rs @@ -90,8 +90,7 @@ mod tests { let dec = Thing::decode(&enc).unwrap(); assert_eq!(val, dec); println!("---"); - // - let id2 = "foo:['f8e238f2-e734-47b8-9a16-476b291bd78a']"; + let id2 = "foo:[u'f8e238f2-e734-47b8-9a16-476b291bd78a']"; let thing = syn::thing(id2).expect("Failed to parse the ID"); let id2 = thing.id; let val = Thing::new("testns", "testdb", "testtb", id2); diff --git a/lib/src/kvs/ds.rs b/lib/src/kvs/ds.rs index ef1cba48..ae014d98 100644 --- a/lib/src/kvs/ds.rs +++ b/lib/src/kvs/ds.rs @@ -219,6 +219,18 @@ impl Datastore { #[allow(unused_variables)] let default_clock: Arc> = Arc::new(RwLock::new(SizedClock::System(SystemClock::new()))); + + // removes warning if no storage is enabled. + #[cfg(not(any( + feature = "kv-mem", + feature = "kv-rocksdb", + feature = "kv-speedb", + feature = "kv-indxdb", + feature = "kv-tikv", + feature = "kv-fdb" + )))] + let _ = (clock_override, default_clock); + // Initiate the desired datastore let (inner, clock): (Result, Arc>) = match path { "memory" => { @@ -340,7 +352,7 @@ impl Datastore { // The datastore path is not valid _ => { // use clock_override and default_clock to remove warning when no kv is enabled. - let _ = (clock_override, default_clock); + let _ = default_clock; info!("Unable to load the specified datastore {}", path); Err(Error::Ds("Unable to load the specified datastore".into())) } diff --git a/lib/src/kvs/mod.rs b/lib/src/kvs/mod.rs index 1ef41ebb..1993dca3 100644 --- a/lib/src/kvs/mod.rs +++ b/lib/src/kvs/mod.rs @@ -25,6 +25,14 @@ mod tx; mod clock; #[cfg(test)] +#[cfg(any( + feature = "kv-mem", + feature = "kv-rocksdb", + feature = "kv-speedb", + feature = "kv-indxdb", + feature = "kv-tikv", + feature = "kv-fdb" +))] mod tests; pub use self::ds::*; diff --git a/lib/src/sql/algorithm.rs b/lib/src/sql/algorithm.rs index 8f60b926..d62817ee 100644 --- a/lib/src/sql/algorithm.rs +++ b/lib/src/sql/algorithm.rs @@ -2,7 +2,7 @@ use revision::revisioned; use serde::{Deserialize, Serialize}; use std::fmt; -#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)] +#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] #[revisioned(revision = 1)] pub enum Algorithm { diff --git a/lib/src/sql/ending.rs b/lib/src/sql/ending.rs index a9c609c0..378382b0 100644 --- a/lib/src/sql/ending.rs +++ b/lib/src/sql/ending.rs @@ -80,7 +80,13 @@ pub fn duration(i: &str) -> IResult<&str, ()> { pub fn field(i: &str) -> IResult<&str, ()> { peek(alt(( - value((), preceded(shouldbespace, tag_no_case("FROM"))), + value( + (), + preceded( + shouldbespace, + alt((tag_no_case("FROM"), tag_no_case("TIMEOUT"), tag_no_case("PARALLEL"))), + ), + ), value((), char(';')), value((), eof), )))(i) diff --git a/lib/src/sql/escape.rs b/lib/src/sql/escape.rs index a178d892..5734fcf3 100644 --- a/lib/src/sql/escape.rs +++ b/lib/src/sql/escape.rs @@ -1,4 +1,3 @@ -use nom::character::is_digit; use std::borrow::Cow; const SINGLE: char = '\''; @@ -54,9 +53,15 @@ pub fn quote_str(s: &str) -> String { #[inline] pub fn quote_plain_str(s: &str) -> String { - let mut ret = quote_str(s); - #[cfg(not(feature = "experimental_parser"))] + #[cfg(not(feature = "experimental-parser"))] { + if crate::syn::thing(s).is_ok() { + let mut ret = quote_str(s); + ret.insert(0, 's'); + return ret; + } + + let mut ret = quote_str(s); // HACK: We need to prefix strands which look like records, uuids, or datetimes with an `s` // otherwise the strands will parsed as a different type when parsed again. // This is not required for the new parser. @@ -64,13 +69,14 @@ pub fn quote_plain_str(s: &str) -> String { // directly to avoid having to create a common interface between the old and new parser. if crate::syn::v1::literal::uuid(&ret).is_ok() || crate::syn::v1::literal::datetime(&ret).is_ok() - || crate::syn::thing(&ret).is_ok() { ret.insert(0, 's'); } + ret } - ret + #[cfg(feature = "experimental-parser")] + quote_str(s) } #[inline] @@ -106,24 +112,16 @@ pub fn escape_normal<'a>(s: &'a str, l: char, r: char, e: &str) -> Cow<'a, str> #[inline] pub fn escape_numeric<'a>(s: &'a str, l: char, r: char, e: &str) -> Cow<'a, str> { - // Presume this is numeric - let mut numeric = true; // Loop over each character - for x in s.bytes() { + for (idx, x) in s.bytes().enumerate() { + // the first character is not allowed to be a digit. + if idx == 0 && x.is_ascii_digit() { + return Cow::Owned(format!("{l}{}{r}", s.replace(r, e))); + } // Check if character is allowed if !(x.is_ascii_alphanumeric() || x == b'_') { return Cow::Owned(format!("{l}{}{r}", s.replace(r, e))); } - // Check if character is non-numeric - if !is_digit(x) { - numeric = false; - } - } - // Output the id value - match numeric { - // This is numeric so escape it - true => Cow::Owned(format!("{l}{}{r}", s.replace(r, e))), - // No need to escape the value - _ => Cow::Borrowed(s), } + Cow::Borrowed(s) } diff --git a/lib/src/sql/expression.rs b/lib/src/sql/expression.rs index 0f58cd87..c5ba5ec8 100644 --- a/lib/src/sql/expression.rs +++ b/lib/src/sql/expression.rs @@ -48,29 +48,6 @@ impl Expression { r, } } - /// Augment an existing expression - pub(crate) fn augment(mut self, l: Value, o: Operator) -> Self { - match &mut self { - Self::Binary { - l: left, - o: op, - .. - } if o.precedence() >= op.precedence() => match left { - Value::Expression(x) => { - *x.as_mut() = std::mem::take(x).augment(l, o); - self - } - _ => { - *left = Self::new(l, o, std::mem::take(left)).into(); - self - } - }, - e => { - let r = Value::from(std::mem::take(e)); - Self::new(l, o, r) - } - } - } } impl Expression { @@ -132,6 +109,8 @@ impl Expression { let operand = v.compute(ctx, opt, txn, doc).await?; return match o { Operator::Neg => fnc::operate::neg(operand), + // TODO: Check if it is a number? + Operator::Add => Ok(operand), Operator::Not => fnc::operate::not(operand), op => unreachable!("{op:?} is not a unary op"), }; diff --git a/lib/src/sql/idiom.rs b/lib/src/sql/idiom.rs index 3272fcbc..40921600 100644 --- a/lib/src/sql/idiom.rs +++ b/lib/src/sql/idiom.rs @@ -2,13 +2,13 @@ use crate::ctx::Context; use crate::dbs::{Options, Transaction}; use crate::doc::CursorDoc; use crate::err::Error; -use crate::sql::fmt::{fmt_separated_by, Fmt}; -use crate::sql::part::Next; -use crate::sql::part::Part; -use crate::sql::paths::{ID, IN, META, OUT}; -use crate::sql::value::Value; -use md5::Digest; -use md5::Md5; +use crate::sql::{ + fmt::{fmt_separated_by, Fmt}, + part::Next, + paths::{ID, IN, META, OUT}, + Part, Value, +}; +use md5::{Digest, Md5}; use revision::revisioned; use serde::{Deserialize, Serialize}; use std::fmt::{self, Display, Formatter}; @@ -73,6 +73,11 @@ impl From<&[Part]> for Idiom { Self(v.to_vec()) } } +impl From for Idiom { + fn from(v: Part) -> Self { + Self(vec![v]) + } +} impl Idiom { /// Appends a part to the end of this Idiom diff --git a/lib/src/sql/kind.rs b/lib/src/sql/kind.rs index 17b2799e..26d05301 100644 --- a/lib/src/sql/kind.rs +++ b/lib/src/sql/kind.rs @@ -1,5 +1,4 @@ -use crate::sql::fmt::Fmt; -use crate::sql::table::Table; +use crate::sql::{fmt::Fmt, Table}; use revision::revisioned; use serde::{Deserialize, Serialize}; use std::fmt::{self, Display, Formatter}; diff --git a/lib/src/sql/language.rs b/lib/src/sql/language.rs index e987622d..552d931f 100644 --- a/lib/src/sql/language.rs +++ b/lib/src/sql/language.rs @@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize}; use std::fmt; use std::fmt::Display; -#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)] +#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] #[revisioned(revision = 1)] pub enum Language { @@ -26,9 +26,9 @@ pub enum Language { Turkish, } -impl Display for Language { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str(match self { +impl Language { + pub fn as_str(&self) -> &'static str { + match self { Self::Arabic => "ARABIC", Self::Danish => "DANISH", Self::Dutch => "DUTCH", @@ -46,6 +46,12 @@ impl Display for Language { Self::Swedish => "SWEDISH", Self::Tamil => "TAMIL", Self::Turkish => "TURKISH", - }) + } + } +} + +impl Display for Language { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(self.as_str()) } } diff --git a/lib/src/sql/mod.rs b/lib/src/sql/mod.rs index 6a79bfe1..22c35968 100644 --- a/lib/src/sql/mod.rs +++ b/lib/src/sql/mod.rs @@ -149,4 +149,4 @@ mod parser { pub use crate::syn::*; } -pub use self::parser::{idiom, json, parse, subquery, thing, v1::ParseError, value}; +pub use self::parser::{error::ParseError, idiom, json, parse, subquery, thing, value}; diff --git a/lib/src/sql/order.rs b/lib/src/sql/order.rs index 5f219417..61900398 100644 --- a/lib/src/sql/order.rs +++ b/lib/src/sql/order.rs @@ -39,6 +39,7 @@ pub struct Order { pub random: bool, pub collate: bool, pub numeric: bool, + /// true if the direction is ascending pub direction: bool, } diff --git a/lib/src/sql/query.rs b/lib/src/sql/query.rs index 769cc638..6293fd13 100644 --- a/lib/src/sql/query.rs +++ b/lib/src/sql/query.rs @@ -1,6 +1,6 @@ use crate::sql::fmt::Pretty; -use crate::sql::statement::{Statement, Statements}; use crate::sql::statements::{DefineStatement, RemoveStatement}; +use crate::sql::{Statement, Statements}; use derive::Store; use revision::revisioned; use serde::{Deserialize, Serialize}; diff --git a/lib/src/sql/statements/define/scope.rs b/lib/src/sql/statements/define/scope.rs index 54577f52..b4cf4888 100644 --- a/lib/src/sql/statements/define/scope.rs +++ b/lib/src/sql/statements/define/scope.rs @@ -23,6 +23,12 @@ pub struct DefineScopeStatement { pub comment: Option, } +impl DefineScopeStatement { + pub(crate) fn random_code() -> String { + rand::thread_rng().sample_iter(&Alphanumeric).take(128).map(char::from).collect::() + } +} + impl DefineScopeStatement { /// Process this type returning a computed simple Value pub(crate) async fn compute( @@ -46,10 +52,6 @@ impl DefineScopeStatement { // Ok all good Ok(Value::None) } - - pub fn random_code() -> String { - rand::thread_rng().sample_iter(&Alphanumeric).take(128).map(char::from).collect::() - } } impl Display for DefineScopeStatement { diff --git a/lib/src/sql/statements/define/user.rs b/lib/src/sql/statements/define/user.rs index 43347c41..a466e5df 100644 --- a/lib/src/sql/statements/define/user.rs +++ b/lib/src/sql/statements/define/user.rs @@ -47,6 +47,31 @@ impl From<(Base, &str, &str)> for DefineUserStatement { } impl DefineUserStatement { + pub(crate) fn from_parsed_values(name: Ident, base: Base, roles: Vec) -> Self { + DefineUserStatement { + name, + base, + roles, // New users get the viewer role by default + code: rand::thread_rng() + .sample_iter(&Alphanumeric) + .take(128) + .map(char::from) + .collect::(), + ..Default::default() + } + } + + pub(crate) fn set_password(&mut self, password: &str) { + self.hash = Argon2::default() + .hash_password(password.as_bytes(), &SaltString::generate(&mut OsRng)) + .unwrap() + .to_string() + } + + pub(crate) fn set_passhash(&mut self, passhash: String) { + self.hash = passhash; + } + /// Process this type returning a computed simple Value pub(crate) async fn compute( &self, diff --git a/lib/src/sql/statements/ifelse.rs b/lib/src/sql/statements/ifelse.rs index 9accc7c6..971dfc54 100644 --- a/lib/src/sql/statements/ifelse.rs +++ b/lib/src/sql/statements/ifelse.rs @@ -13,7 +13,9 @@ use std::fmt::{self, Display, Write}; #[revisioned(revision = 1)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] pub struct IfelseStatement { + /// The first if condition followed by a body, followed by any number of else if's pub exprs: Vec<(Value, Value)>, + /// the final else body, if there is one pub close: Option, } diff --git a/lib/src/sql/statements/use.rs b/lib/src/sql/statements/use.rs index 704efd2b..399661f6 100644 --- a/lib/src/sql/statements/use.rs +++ b/lib/src/sql/statements/use.rs @@ -3,6 +3,8 @@ use revision::revisioned; use serde::{Deserialize, Serialize}; use std::fmt; +use crate::sql::escape::escape_ident; + #[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Store, Hash)] #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))] #[revisioned(revision = 1)] @@ -15,9 +17,11 @@ impl fmt::Display for UseStatement { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str("USE")?; if let Some(ref ns) = self.ns { + let ns = escape_ident(ns); write!(f, " NS {ns}")?; } if let Some(ref db) = self.db { + let db = escape_ident(db); write!(f, " DB {db}")?; } Ok(()) diff --git a/lib/src/sql/strand.rs b/lib/src/sql/strand.rs index a2b4932e..cee32486 100644 --- a/lib/src/sql/strand.rs +++ b/lib/src/sql/strand.rs @@ -130,7 +130,7 @@ pub(crate) mod no_nul_bytes { #[cfg(test)] mod test { - #[cfg(not(feature = "experimental_parser"))] + #[cfg(not(feature = "experimental-parser"))] #[test] fn ensure_strands_are_prefixed() { use super::Strand; diff --git a/lib/src/sql/thing.rs b/lib/src/sql/thing.rs index 44ee5fe1..8fd2e813 100644 --- a/lib/src/sql/thing.rs +++ b/lib/src/sql/thing.rs @@ -75,7 +75,7 @@ impl TryFrom for Thing { impl TryFrom<&str> for Thing { type Error = (); fn try_from(v: &str) -> Result { - match syn::thing_raw(v) { + match syn::thing(v) { Ok(v) => Ok(v), _ => Err(()), } diff --git a/lib/src/sql/value/changed.rs b/lib/src/sql/value/changed.rs index c0395f36..b7c1ab05 100644 --- a/lib/src/sql/value/changed.rs +++ b/lib/src/sql/value/changed.rs @@ -42,7 +42,7 @@ impl Value { mod tests { use super::*; - use crate::syn::test::Parse; + use crate::syn::Parse; #[test] fn changed_none() { diff --git a/lib/src/sql/value/clear.rs b/lib/src/sql/value/clear.rs index bc35937b..bc749d2a 100644 --- a/lib/src/sql/value/clear.rs +++ b/lib/src/sql/value/clear.rs @@ -12,7 +12,7 @@ impl Value { mod tests { use super::*; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn clear_value() { diff --git a/lib/src/sql/value/compare.rs b/lib/src/sql/value/compare.rs index f003f6c0..e31c3c29 100644 --- a/lib/src/sql/value/compare.rs +++ b/lib/src/sql/value/compare.rs @@ -92,7 +92,7 @@ mod tests { use super::*; use crate::sql::idiom::Idiom; - use crate::syn::test::Parse; + use crate::syn::Parse; #[test] fn compare_none() { diff --git a/lib/src/sql/value/cut.rs b/lib/src/sql/value/cut.rs index 343972ec..c38a2774 100644 --- a/lib/src/sql/value/cut.rs +++ b/lib/src/sql/value/cut.rs @@ -97,7 +97,7 @@ mod tests { use super::*; use crate::sql::idiom::Idiom; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn cut_none() { diff --git a/lib/src/sql/value/dec.rs b/lib/src/sql/value/dec.rs index 74c22408..f1609e39 100644 --- a/lib/src/sql/value/dec.rs +++ b/lib/src/sql/value/dec.rs @@ -30,7 +30,7 @@ mod tests { use super::*; use crate::sql::idiom::Idiom; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn decrement_none() { diff --git a/lib/src/sql/value/decrement.rs b/lib/src/sql/value/decrement.rs index 22bec8a0..a12d6efa 100644 --- a/lib/src/sql/value/decrement.rs +++ b/lib/src/sql/value/decrement.rs @@ -41,7 +41,7 @@ mod tests { use super::*; use crate::dbs::test::mock; use crate::sql::idiom::Idiom; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn decrement_none() { diff --git a/lib/src/sql/value/del.rs b/lib/src/sql/value/del.rs index 9247737b..9de1eabc 100644 --- a/lib/src/sql/value/del.rs +++ b/lib/src/sql/value/del.rs @@ -201,7 +201,7 @@ mod tests { use super::*; use crate::dbs::test::mock; use crate::sql::idiom::Idiom; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn del_none() { diff --git a/lib/src/sql/value/diff.rs b/lib/src/sql/value/diff.rs index d2807360..ac66f5db 100644 --- a/lib/src/sql/value/diff.rs +++ b/lib/src/sql/value/diff.rs @@ -78,7 +78,7 @@ impl Value { mod tests { use super::*; - use crate::syn::test::Parse; + use crate::syn::Parse; #[test] fn diff_none() { diff --git a/lib/src/sql/value/each.rs b/lib/src/sql/value/each.rs index 67b4258a..77f8d34f 100644 --- a/lib/src/sql/value/each.rs +++ b/lib/src/sql/value/each.rs @@ -59,7 +59,7 @@ mod tests { use super::*; use crate::sql::idiom::Idiom; - use crate::syn::test::Parse; + use crate::syn::Parse; #[test] fn each_none() { diff --git a/lib/src/sql/value/every.rs b/lib/src/sql/value/every.rs index b16f3019..e939ddfb 100644 --- a/lib/src/sql/value/every.rs +++ b/lib/src/sql/value/every.rs @@ -53,7 +53,7 @@ mod tests { use super::*; use crate::sql::idiom::Idiom; - use crate::syn::test::Parse; + use crate::syn::Parse; #[test] fn every_with_empty_objects_arrays() { diff --git a/lib/src/sql/value/extend.rs b/lib/src/sql/value/extend.rs index b5e936e0..8e602723 100644 --- a/lib/src/sql/value/extend.rs +++ b/lib/src/sql/value/extend.rs @@ -34,7 +34,7 @@ mod tests { use super::*; use crate::dbs::test::mock; use crate::sql::idiom::Idiom; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn extend_array_value() { diff --git a/lib/src/sql/value/get.rs b/lib/src/sql/value/get.rs index 1d2256b4..d7e5067b 100644 --- a/lib/src/sql/value/get.rs +++ b/lib/src/sql/value/get.rs @@ -250,7 +250,7 @@ mod tests { use crate::sql::id::Id; use crate::sql::idiom::Idiom; use crate::sql::thing::Thing; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn get_none() { diff --git a/lib/src/sql/value/inc.rs b/lib/src/sql/value/inc.rs index 7b8a002f..cd690879 100644 --- a/lib/src/sql/value/inc.rs +++ b/lib/src/sql/value/inc.rs @@ -30,7 +30,7 @@ mod tests { use super::*; use crate::sql::idiom::Idiom; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn increment_none() { diff --git a/lib/src/sql/value/increment.rs b/lib/src/sql/value/increment.rs index d4d840ef..83634fe7 100644 --- a/lib/src/sql/value/increment.rs +++ b/lib/src/sql/value/increment.rs @@ -42,7 +42,7 @@ mod tests { use super::*; use crate::dbs::test::mock; use crate::sql::idiom::Idiom; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn increment_none() { diff --git a/lib/src/sql/value/merge.rs b/lib/src/sql/value/merge.rs index ac13994e..c42c1774 100644 --- a/lib/src/sql/value/merge.rs +++ b/lib/src/sql/value/merge.rs @@ -24,7 +24,7 @@ impl Value { mod tests { use super::*; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn merge_none() { diff --git a/lib/src/sql/value/patch.rs b/lib/src/sql/value/patch.rs index e254c715..507736e4 100644 --- a/lib/src/sql/value/patch.rs +++ b/lib/src/sql/value/patch.rs @@ -86,7 +86,7 @@ impl Value { mod tests { use super::*; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn patch_add_simple() { diff --git a/lib/src/sql/value/pick.rs b/lib/src/sql/value/pick.rs index 368924c7..79cfb2e1 100644 --- a/lib/src/sql/value/pick.rs +++ b/lib/src/sql/value/pick.rs @@ -54,7 +54,7 @@ mod tests { use crate::sql::id::Id; use crate::sql::idiom::Idiom; use crate::sql::thing::Thing; - use crate::syn::test::Parse; + use crate::syn::Parse; #[test] fn pick_none() { diff --git a/lib/src/sql/value/put.rs b/lib/src/sql/value/put.rs index a629b2dc..a62199f9 100644 --- a/lib/src/sql/value/put.rs +++ b/lib/src/sql/value/put.rs @@ -87,7 +87,7 @@ mod tests { use super::*; use crate::sql::idiom::Idiom; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn put_none() { diff --git a/lib/src/sql/value/replace.rs b/lib/src/sql/value/replace.rs index 4d193bf6..7dc39762 100644 --- a/lib/src/sql/value/replace.rs +++ b/lib/src/sql/value/replace.rs @@ -19,7 +19,7 @@ impl Value { mod tests { use super::*; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn replace() { diff --git a/lib/src/sql/value/rid.rs b/lib/src/sql/value/rid.rs index 87e00f74..545e7f2c 100644 --- a/lib/src/sql/value/rid.rs +++ b/lib/src/sql/value/rid.rs @@ -13,7 +13,7 @@ mod tests { use super::*; use crate::sql::id::Id; use crate::sql::thing::Thing; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn rid_none() { diff --git a/lib/src/sql/value/serde/ser/value/mod.rs b/lib/src/sql/value/serde/ser/value/mod.rs index e9f31bf0..607b3186 100644 --- a/lib/src/sql/value/serde/ser/value/mod.rs +++ b/lib/src/sql/value/serde/ser/value/mod.rs @@ -692,7 +692,7 @@ mod tests { #[test] fn duration() { let duration = Duration::default(); - let value = to_value(&duration).unwrap(); + let value = to_value(duration).unwrap(); let expected = Value::Duration(duration); assert_eq!(value, expected); assert_eq!(expected, to_value(&expected).unwrap()); diff --git a/lib/src/sql/value/set.rs b/lib/src/sql/value/set.rs index 4fb2c061..4dac6eb8 100644 --- a/lib/src/sql/value/set.rs +++ b/lib/src/sql/value/set.rs @@ -159,7 +159,7 @@ mod tests { use super::*; use crate::dbs::test::mock; use crate::sql::idiom::Idiom; - use crate::syn::test::Parse; + use crate::syn::Parse; #[tokio::test] async fn set_none() { diff --git a/lib/src/sql/value/value.rs b/lib/src/sql/value/value.rs index 70026426..340f41b5 100644 --- a/lib/src/sql/value/value.rs +++ b/lib/src/sql/value/value.rs @@ -1087,7 +1087,8 @@ impl Value { | Value::Array(_) | Value::Param(_) | Value::Edges(_) - | Value::Thing(_) => true, + | Value::Thing(_) + | Value::Table(_) => true, _ => false, } } @@ -2774,7 +2775,7 @@ mod tests { use super::*; use crate::sql::uuid::Uuid; - use crate::syn::test::Parse; + use crate::syn::Parse; #[test] fn check_none() { diff --git a/lib/src/sql/value/walk.rs b/lib/src/sql/value/walk.rs index 02445828..0566d7c9 100644 --- a/lib/src/sql/value/walk.rs +++ b/lib/src/sql/value/walk.rs @@ -62,7 +62,7 @@ mod tests { use super::*; use crate::sql::idiom::Idiom; - use crate::syn::test::Parse; + use crate::syn::Parse; #[test] fn walk_blank() { diff --git a/lib/src/syn/common.rs b/lib/src/syn/common.rs index 45687a73..917b1931 100644 --- a/lib/src/syn/common.rs +++ b/lib/src/syn/common.rs @@ -1,3 +1,8 @@ +#[cfg(feature = "experimental-parser")] +use super::v2::token::Span; +#[cfg(feature = "experimental-parser")] +use std::ops::Range; + /// A human readable location inside a string. /// /// Locations are 1 indexed, the first character on the first line being on line 1 column 1. @@ -19,10 +24,9 @@ impl Location { .expect("tried to find location of substring in unrelated string"); // Bytes of input prior to line being iteratated. let mut bytes_prior = 0; - for (line_idx, line) in input.split('\n').enumerate() { - // +1 for the '\n' - let bytes_so_far = bytes_prior + line.len() + 1; - if bytes_so_far > offset { + for (line_idx, (line, seperator_offset)) in LineIterator::new(input).enumerate() { + let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize; + if bytes_so_far >= offset { // found line. let line_offset = offset - bytes_prior; let column = line[..line_offset].chars().count(); @@ -37,16 +41,13 @@ impl Location { unreachable!() } - #[cfg(feature = "experimental_parser")] - pub fn of_span_start(source: &str, span: Span) -> Self { - // Bytes of input before substr. - let offset = span.offset as usize; + #[cfg(feature = "experimental-parser")] + pub fn of_offset(source: &str, offset: usize) -> Self { // Bytes of input prior to line being iteratated. let mut bytes_prior = 0; - for (line_idx, line) in source.split('\n').enumerate() { - // +1 for the '\n' - let bytes_so_far = bytes_prior + line.len() + 1; - if bytes_so_far > offset { + for (line_idx, (line, seperator_offset)) in LineIterator::new(source).enumerate() { + let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize; + if bytes_so_far >= offset { // found line. let line_offset = offset - bytes_prior; let column = line[..line_offset].chars().count(); @@ -61,31 +62,22 @@ impl Location { unreachable!() } - #[cfg(feature = "experimental_parser")] + #[cfg(feature = "experimental-parser")] + pub fn of_span_start(source: &str, span: Span) -> Self { + // Bytes of input before substr. + + let offset = span.offset as usize; + Self::of_offset(source, offset) + } + + #[cfg(feature = "experimental-parser")] pub fn of_span_end(source: &str, span: Span) -> Self { // Bytes of input before substr. let offset = span.offset as usize + span.len as usize; - // Bytes of input prior to line being iteratated. - let mut bytes_prior = 0; - for (line_idx, line) in source.split('\n').enumerate() { - // +1 for the '\n' - let bytes_so_far = bytes_prior + line.len() + 1; - if bytes_so_far > offset { - // found line. - let line_offset = offset - bytes_prior; - let column = line[..line_offset].chars().count(); - // +1 because line and column are 1 index. - return Self { - line: line_idx + 1, - column: column + 1, - }; - } - bytes_prior = bytes_so_far; - } - unreachable!() + Self::of_offset(source, offset) } - #[cfg(feature = "experimental_parser")] + #[cfg(feature = "experimental-parser")] pub fn range_of_span(source: &str, span: Span) -> Range { // Bytes of input before substr. let offset = span.offset as usize; @@ -93,19 +85,18 @@ impl Location { // Bytes of input prior to line being iteratated. let mut bytes_prior = 0; - let mut iterator = source.split('\n').enumerate(); + let mut iterator = LineIterator::new(source).enumerate(); let start = loop { - let Some((line_idx, line)) = iterator.next() else { + let Some((line_idx, (line, seperator_offset))) = iterator.next() else { panic!("tried to find location of span not belonging to string"); }; - // +1 for the '\n' - let bytes_so_far = bytes_prior + line.len() + 1; - if bytes_so_far > offset { + let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize; + if bytes_so_far >= offset { // found line. let line_offset = offset - bytes_prior; let column = line[..line_offset].chars().count(); // +1 because line and column are 1 index. - if bytes_so_far > end { + if bytes_so_far >= end { // end is on the same line, finish immediatly. let line_offset = end - bytes_prior; let end_column = line[..line_offset].chars().count(); @@ -127,12 +118,11 @@ impl Location { }; loop { - let Some((line_idx, line)) = iterator.next() else { + let Some((line_idx, (line, seperator_offset))) = iterator.next() else { panic!("tried to find location of span not belonging to string"); }; - // +1 for the '\n' - let bytes_so_far = bytes_prior + line.len() + 1; - if bytes_so_far > end { + let bytes_so_far = bytes_prior + line.len() + seperator_offset.unwrap_or(0) as usize; + if bytes_so_far >= end { let line_offset = end - bytes_prior; let column = line[..line_offset].chars().count(); return start..Self { @@ -143,3 +133,93 @@ impl Location { } } } + +struct LineIterator<'a> { + current: &'a str, +} + +impl<'a> LineIterator<'a> { + pub fn new(s: &'a str) -> Self { + LineIterator { + current: s, + } + } +} + +impl<'a> Iterator for LineIterator<'a> { + type Item = (&'a str, Option); + + fn next(&mut self) -> Option { + if self.current.is_empty() { + return None; + } + let bytes = self.current.as_bytes(); + for i in 0..bytes.len() { + match bytes[i] { + b'\r' => { + if let Some(b'\n') = bytes.get(i + 1) { + let res = &self.current[..i]; + self.current = &self.current[i + 2..]; + return Some((res, Some(2))); + } + let res = &self.current[..i]; + self.current = &self.current[i + 1..]; + return Some((res, Some(1))); + } + 0xb | 0xC | b'\n' => { + // vertical tab VT and form feed FF. + let res = &self.current[..i]; + self.current = &self.current[i + 1..]; + return Some((res, Some(1))); + } + 0xc2 => { + // next line NEL + if bytes.get(i + 1).copied() != Some(0x85) { + continue; + } + let res = &self.current[..i]; + self.current = &self.current[i + 2..]; + return Some((res, Some(2))); + } + 0xe2 => { + // line separator and paragraph seperator. + if bytes.get(i + 1).copied() != Some(0x80) { + continue; + } + let next_byte = bytes.get(i + 2).copied(); + if next_byte != Some(0xA8) && next_byte != Some(0xA9) { + continue; + } + + // vertical tab VT, next line NEL and form feed FF. + let res = &self.current[..i]; + self.current = &self.current[i + 3..]; + return Some((res, Some(3))); + } + _ => {} + } + } + Some((std::mem::take(&mut self.current), None)) + } +} + +#[cfg(test)] +mod test { + use super::LineIterator; + + #[test] + fn test_line_iterator() { + let lines = "foo\nbar\r\nfoo\rbar\u{000B}foo\u{000C}bar\u{0085}foo\u{2028}bar\u{2029}\n"; + let mut iterator = LineIterator::new(lines); + assert_eq!(iterator.next(), Some(("foo", Some(1)))); + assert_eq!(iterator.next(), Some(("bar", Some(2)))); + assert_eq!(iterator.next(), Some(("foo", Some(1)))); + assert_eq!(iterator.next(), Some(("bar", Some(1)))); + assert_eq!(iterator.next(), Some(("foo", Some(1)))); + assert_eq!(iterator.next(), Some(("bar", Some(2)))); + assert_eq!(iterator.next(), Some(("foo", Some(3)))); + assert_eq!(iterator.next(), Some(("bar", Some(3)))); + assert_eq!(iterator.next(), Some(("", Some(1)))); + assert_eq!(iterator.next(), None); + } +} diff --git a/lib/src/syn/error.rs b/lib/src/syn/error/mod.rs similarity index 99% rename from lib/src/syn/error.rs rename to lib/src/syn/error/mod.rs index f31e4234..0da9cb01 100644 --- a/lib/src/syn/error.rs +++ b/lib/src/syn/error/mod.rs @@ -2,6 +2,9 @@ use std::{fmt, ops::Range}; use super::common::Location; +mod nom_error; +pub use nom_error::ParseError; + #[derive(Clone, Debug)] pub struct RenderedError { pub text: String, diff --git a/lib/src/syn/v1/error/mod.rs b/lib/src/syn/error/nom_error.rs similarity index 98% rename from lib/src/syn/v1/error/mod.rs rename to lib/src/syn/error/nom_error.rs index ffd6ca35..1d5ca6f8 100644 --- a/lib/src/syn/v1/error/mod.rs +++ b/lib/src/syn/error/nom_error.rs @@ -5,19 +5,12 @@ use crate::syn::{ use nom::error::ErrorKind; use nom::error::FromExternalError; use nom::error::ParseError as NomParseError; -use nom::Err; use std::fmt::Write; use std::num::ParseFloatError; use std::num::ParseIntError; use std::ops::Bound; use thiserror::Error; -mod utils; -pub use utils::*; -mod render; - -pub type IResult> = Result<(I, O), Err>; - #[derive(Error, Debug, Clone)] pub enum ParseError { Base(I), diff --git a/lib/src/syn/mod.rs b/lib/src/syn/mod.rs index f8b0bc4f..e75612ed 100644 --- a/lib/src/syn/mod.rs +++ b/lib/src/syn/mod.rs @@ -3,11 +3,20 @@ pub mod common; pub mod error; +#[cfg(not(feature = "experimental-parser"))] pub mod v1; -pub use v1::{ - datetime, datetime_raw, duration, idiom, json, parse, path_like, range, subquery, thing, - thing_raw, value, +#[cfg(not(feature = "experimental-parser"))] +pub use v1::{datetime_raw, duration, idiom, json, parse, range, subquery, thing, value}; + +#[cfg(feature = "experimental-parser")] +pub mod v2; +#[cfg(feature = "experimental-parser")] +pub use v2::{ + datetime_raw, duration, idiom, json, json_legacy_strand, parse, range, subquery, thing, value, + value_legacy_strand, }; #[cfg(test)] -pub mod test; +pub trait Parse { + fn parse(val: &str) -> T; +} diff --git a/lib/src/syn/test.rs b/lib/src/syn/test.rs deleted file mode 100644 index 6eb287cd..00000000 --- a/lib/src/syn/test.rs +++ /dev/null @@ -1,50 +0,0 @@ -pub(crate) use super::v1::builtin::builtin_name; -use crate::sql::{Array, Expression, Idiom, Param, Script, Thing, Value}; - -use super::v1::test::*; - -pub trait Parse { - fn parse(val: &str) -> T; -} - -impl Parse for Value { - fn parse(val: &str) -> Self { - value(val).unwrap().1 - } -} - -impl Parse for Array { - fn parse(val: &str) -> Self { - array(val).unwrap().1 - } -} - -impl Parse for Param { - fn parse(val: &str) -> Self { - param(val).unwrap().1 - } -} - -impl Parse for Idiom { - fn parse(val: &str) -> Self { - idiom(val).unwrap().1 - } -} - -impl Parse for Script { - fn parse(val: &str) -> Self { - script(val).unwrap().1 - } -} - -impl Parse for Thing { - fn parse(val: &str) -> Self { - thing(val).unwrap().1 - } -} - -impl Parse for Expression { - fn parse(val: &str) -> Self { - expression(val).unwrap().1 - } -} diff --git a/lib/src/syn/v1/error/utils.rs b/lib/src/syn/v1/error.rs similarity index 94% rename from lib/src/syn/v1/error/utils.rs rename to lib/src/syn/v1/error.rs index 66d44444..d0fb4789 100644 --- a/lib/src/syn/v1/error/utils.rs +++ b/lib/src/syn/v1/error.rs @@ -1,7 +1,7 @@ -use super::{IResult, ParseError}; -use nom::bytes::complete::tag_no_case; -use nom::Err; -use nom::Parser; +pub use crate::syn::error::ParseError; +use nom::{bytes::complete::tag_no_case, Err, Parser}; + +pub type IResult> = Result<(I, O), Err>; pub fn expected(expect: &'static str, mut parser: P) -> impl FnMut(I) -> IResult where diff --git a/lib/src/syn/v1/error/render.rs b/lib/src/syn/v1/error/render.rs deleted file mode 100644 index 8b137891..00000000 --- a/lib/src/syn/v1/error/render.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/lib/src/syn/v1/expression.rs b/lib/src/syn/v1/expression.rs index a98401f4..137cd706 100644 --- a/lib/src/syn/v1/expression.rs +++ b/lib/src/syn/v1/expression.rs @@ -7,7 +7,7 @@ use super::{ value::single, IResult, }; -use crate::sql::{Cast, Expression, Future}; +use crate::sql::{Cast, Expression, Future, Operator, Value}; use nom::{bytes::complete::tag, character::complete::char, combinator::cut, sequence::delimited}; pub fn cast(i: &str) -> IResult<&str, Cast> { @@ -30,10 +30,32 @@ pub fn unary(i: &str) -> IResult<&str, Expression> { )) } +/// Augment an existing expression +pub(crate) fn augment(mut this: Expression, l: Value, o: Operator) -> Expression { + match &mut this { + Expression::Binary { + l: left, + o: op, + .. + } if o.precedence() >= op.precedence() => match left { + Value::Expression(x) => { + *x.as_mut() = augment(std::mem::take(x), l, o); + this + } + _ => { + *left = Expression::new(l, o, std::mem::take(left)).into(); + this + } + }, + e => { + let r = Value::from(std::mem::take(e)); + Expression::new(l, o, r) + } + } +} + #[cfg(test)] pub fn binary(i: &str) -> IResult<&str, Expression> { - use crate::sql::Value; - use super::depth; use super::value; @@ -43,7 +65,7 @@ pub fn binary(i: &str) -> IResult<&str, Expression> { let _diving = depth::dive(i)?; let (i, r) = value::value(i)?; let v = match r { - Value::Expression(r) => r.augment(l, o), + Value::Expression(r) => augment(*r, l, o), _ => Expression::new(l, o, r), }; Ok((i, v)) diff --git a/lib/src/syn/v1/function.rs b/lib/src/syn/v1/function.rs index e390901e..56d4c147 100644 --- a/lib/src/syn/v1/function.rs +++ b/lib/src/syn/v1/function.rs @@ -189,7 +189,7 @@ mod tests { use super::super::builtin::{builtin_name, BuiltinName}; use super::*; use crate::sql::Value; - use crate::syn::{self, test::Parse}; + use crate::syn::{self, Parse}; fn function(i: &str) -> IResult<&str, Function> { alt((defined_function, |i| { diff --git a/lib/src/syn/v1/idiom.rs b/lib/src/syn/v1/idiom.rs index 3bf83d18..05a570f0 100644 --- a/lib/src/syn/v1/idiom.rs +++ b/lib/src/syn/v1/idiom.rs @@ -275,9 +275,8 @@ pub fn bracketed_value(i: &str) -> IResult<&str, Part> { #[cfg(test)] mod tests { - use crate::sql::{Dir, Expression, Id, Number, Param, Strand, Table, Thing}; - use crate::syn::test::Parse; + use crate::syn::Parse; use super::*; diff --git a/lib/src/syn/v1/literal/datetime.rs b/lib/src/syn/v1/literal/datetime.rs index fee696b7..fdc75518 100644 --- a/lib/src/syn/v1/literal/datetime.rs +++ b/lib/src/syn/v1/literal/datetime.rs @@ -29,8 +29,8 @@ fn datetime_single(i: &str) -> IResult<&str, Datetime> { fn datetime_double(i: &str) -> IResult<&str, Datetime> { alt(( - delimited(tag("d\""), cut(datetime_raw), cut(char('\"'))), - delimited(char('\"'), datetime_raw, char('\"')), + delimited(tag("d\""), cut(datetime_raw), cut(char('"'))), + delimited(char('"'), datetime_raw, char('"')), ))(i) } @@ -194,7 +194,7 @@ mod tests { // use chrono::Date; - use crate::{sql::Value, syn::test::Parse}; + use crate::{sql::Value, syn::Parse}; use super::*; diff --git a/lib/src/syn/v1/literal/mod.rs b/lib/src/syn/v1/literal/mod.rs index b31edf2e..ed57d61f 100644 --- a/lib/src/syn/v1/literal/mod.rs +++ b/lib/src/syn/v1/literal/mod.rs @@ -108,7 +108,7 @@ pub fn tables(i: &str) -> IResult<&str, Tables> { mod tests { use super::*; - use crate::syn::test::Parse; + use crate::syn::Parse; #[test] fn ident_normal() { diff --git a/lib/src/syn/v1/literal/strand.rs b/lib/src/syn/v1/literal/strand.rs index 1a73f2e8..ba35bbd2 100644 --- a/lib/src/syn/v1/literal/strand.rs +++ b/lib/src/syn/v1/literal/strand.rs @@ -163,7 +163,7 @@ fn char_unicode_bracketed(i: &str) -> IResult<&str, char> { #[cfg(test)] mod tests { - use crate::{sql::Value, syn::test::Parse}; + use crate::{sql::Value, syn::Parse}; use super::*; diff --git a/lib/src/syn/v1/literal/uuid.rs b/lib/src/syn/v1/literal/uuid.rs index 93d87226..74639f0e 100644 --- a/lib/src/syn/v1/literal/uuid.rs +++ b/lib/src/syn/v1/literal/uuid.rs @@ -54,7 +54,7 @@ fn uuid_raw(i: &str) -> IResult<&str, Uuid> { #[cfg(test)] mod tests { - use crate::{sql::Value, syn::test::Parse}; + use crate::{sql::Value, syn::Parse}; use super::*; diff --git a/lib/src/syn/v1/mod.rs b/lib/src/syn/v1/mod.rs index 0942b18a..04d0a3f2 100644 --- a/lib/src/syn/v1/mod.rs +++ b/lib/src/syn/v1/mod.rs @@ -7,7 +7,7 @@ mod part; mod stmt; mod block; -pub(crate) mod builtin; +mod builtin; mod comment; mod common; mod depth; @@ -79,10 +79,6 @@ pub fn idiom(input: &str) -> Result { parse_impl(input, idiom::plain) } -pub fn datetime(input: &str) -> Result { - parse_impl(input, literal::datetime) -} - pub fn datetime_raw(input: &str) -> Result { parse_impl(input, literal::datetime_all_raw) } @@ -91,20 +87,12 @@ pub fn duration(input: &str) -> Result { parse_impl(input, literal::duration) } -pub fn path_like(input: &str) -> Result { - parse_impl(input, value::path_like) -} - pub fn range(input: &str) -> Result { parse_impl(input, literal::range) } /// Parses a SurrealQL [`Thing`] pub fn thing(input: &str) -> Result { - parse_impl(input, thing::thing) -} - -pub fn thing_raw(input: &str) -> Result { parse_impl(input, thing::thing_raw) } diff --git a/lib/src/syn/v1/operator.rs b/lib/src/syn/v1/operator.rs index 46554d3a..cb82ee9b 100644 --- a/lib/src/syn/v1/operator.rs +++ b/lib/src/syn/v1/operator.rs @@ -149,6 +149,7 @@ pub fn knn_distance(i: &str) -> IResult<&str, Distance> { } pub fn knn(i: &str) -> IResult<&str, Operator> { + let (i, _) = opt(tag_no_case("knn"))(i)?; let (i, _) = char('<')(i)?; let (i, k) = u32(i)?; let (i, dist) = opt(knn_distance)(i)?; @@ -228,4 +229,13 @@ mod tests { assert_eq!("<3,EUCLIDEAN>", format!("{}", out)); assert_eq!(out, Operator::Knn(3, Some(Distance::Euclidean))); } + + #[test] + fn test_knn_with_prefix() { + let res = knn("knn<5>"); + assert!(res.is_ok()); + let out = res.unwrap().1; + assert_eq!("<5>", format!("{}", out)); + assert_eq!(out, Operator::Knn(5, None)); + } } diff --git a/lib/src/syn/v1/part/data.rs b/lib/src/syn/v1/part/data.rs index dfe642f0..65ebb34b 100644 --- a/lib/src/syn/v1/part/data.rs +++ b/lib/src/syn/v1/part/data.rs @@ -72,7 +72,6 @@ pub fn single(i: &str) -> IResult<&str, Data> { pub fn values(i: &str) -> IResult<&str, Data> { let (i, _) = tag_no_case("(")(i)?; - // TODO: look at call tree here. let (i, fields) = separated_list1(commas, plain)(i)?; let (i, _) = tag_no_case(")")(i)?; let (i, _) = shouldbespace(i)?; diff --git a/lib/src/syn/v1/part/mod.rs b/lib/src/syn/v1/part/mod.rs index 2f457de2..ee3c3622 100644 --- a/lib/src/syn/v1/part/mod.rs +++ b/lib/src/syn/v1/part/mod.rs @@ -6,7 +6,6 @@ use super::{ literal::{datetime, duration, ident, table, tables}, operator::dir, thing::thing, - // TODO: go through and check every import for alias. value::value, IResult, }; @@ -238,7 +237,7 @@ mod tests { use super::*; use crate::sql::{Datetime, Idiom, Value}; - use crate::syn::test::Parse; + use crate::syn::Parse; use std::time; #[test] diff --git a/lib/src/syn/v1/part/permission.rs b/lib/src/syn/v1/part/permission.rs index 75c8f22c..34a66cf6 100644 --- a/lib/src/syn/v1/part/permission.rs +++ b/lib/src/syn/v1/part/permission.rs @@ -121,7 +121,7 @@ fn rule(i: &str) -> IResult<&str, Vec<(PermissionKind, Permission)>> { #[cfg(test)] mod test { use crate::sql::{Expression, Value}; - use crate::syn::test::Parse; + use crate::syn::Parse; use super::*; diff --git a/lib/src/syn/v1/part/split.rs b/lib/src/syn/v1/part/split.rs index 7ac5e709..0e199c32 100644 --- a/lib/src/syn/v1/part/split.rs +++ b/lib/src/syn/v1/part/split.rs @@ -24,7 +24,7 @@ fn split_raw(i: &str) -> IResult<&str, Split> { mod tests { use super::*; - use crate::{sql::Idiom, syn::test::Parse}; + use crate::{sql::Idiom, syn::Parse}; #[test] fn split_statement() { diff --git a/lib/src/syn/v1/stmt/define/user.rs b/lib/src/syn/v1/stmt/define/user.rs index 6f1e251c..a536562f 100644 --- a/lib/src/syn/v1/stmt/define/user.rs +++ b/lib/src/syn/v1/stmt/define/user.rs @@ -11,7 +11,6 @@ use crate::{ iam::Role, sql::{statements::DefineUserStatement, Ident, Strand}, }; -use argon2::{password_hash::SaltString, Argon2, PasswordHasher}; use nom::{ branch::alt, bytes::complete::tag_no_case, @@ -19,7 +18,6 @@ use nom::{ multi::{many0, separated_list1}, Err, }; -use rand::{distributions::Alphanumeric, rngs::OsRng, Rng}; pub fn user(i: &str) -> IResult<&str, DefineUserStatement> { let (i, _) = tag_no_case("USER")(i)?; @@ -35,28 +33,19 @@ pub fn user(i: &str) -> IResult<&str, DefineUserStatement> { Ok((i, (name, base, opts))) })(i)?; // Create the base statement - let mut res = DefineUserStatement { + let mut res = DefineUserStatement::from_parsed_values( name, base, - roles: vec!["Viewer".into()], // New users get the viewer role by default - code: rand::thread_rng() - .sample_iter(&Alphanumeric) - .take(128) - .map(char::from) - .collect::(), - ..Default::default() - }; + vec!["Viewer".into()], // New users get the viewer role by default + ); // Assign any defined options for opt in opts { match opt { DefineUserOption::Password(v) => { - res.hash = Argon2::default() - .hash_password(v.as_ref(), &SaltString::generate(&mut OsRng)) - .unwrap() - .to_string() + res.set_password(&v); } DefineUserOption::Passhash(v) => { - res.hash = v; + res.set_passhash(v); } DefineUserOption::Roles(v) => { res.roles = v; diff --git a/lib/src/syn/v1/stmt/option.rs b/lib/src/syn/v1/stmt/option.rs index 7891cdcd..18de980e 100644 --- a/lib/src/syn/v1/stmt/option.rs +++ b/lib/src/syn/v1/stmt/option.rs @@ -9,7 +9,7 @@ use nom::{ branch::alt, bytes::complete::tag_no_case, character::complete::char, - combinator::{cut, opt, value}, + combinator::{opt, value}, sequence::tuple, }; @@ -19,10 +19,10 @@ pub fn option(i: &str) -> IResult<&str, OptionStatement> { let (i, n) = ident(i)?; let (i, v) = expected( "'=' followed by a value for the option", - cut(opt(alt(( + opt(alt(( value(true, tuple((mightbespace, char('='), mightbespace, tag_no_case("TRUE")))), value(false, tuple((mightbespace, char('='), mightbespace, tag_no_case("FALSE")))), - )))), + ))), )(i)?; Ok(( i, diff --git a/lib/src/syn/v1/subquery.rs b/lib/src/syn/v1/subquery.rs index 270c3edc..1e54d8ff 100644 --- a/lib/src/syn/v1/subquery.rs +++ b/lib/src/syn/v1/subquery.rs @@ -87,7 +87,6 @@ fn disallowed_subquery_statements(i: &str) -> IResult<&str, ()> { #[cfg(test)] mod tests { - use super::*; #[test] diff --git a/lib/src/syn/v1/test.rs b/lib/src/syn/v1/test.rs index fb8a5803..42000d71 100644 --- a/lib/src/syn/v1/test.rs +++ b/lib/src/syn/v1/test.rs @@ -1,4 +1,5 @@ -pub use super::{ +use super::{ + super::Parse, expression::binary as expression, function::script_body as script, idiom::plain as idiom, @@ -6,3 +7,48 @@ pub use super::{ thing::thing, value::{array, value}, }; +use nom::Finish; + +use crate::sql::{Array, Expression, Idiom, Param, Script, Thing, Value}; + +impl Parse for Value { + fn parse(val: &str) -> Self { + value(val).finish().unwrap().1 + } +} + +impl Parse for Array { + fn parse(val: &str) -> Self { + array(val).finish().unwrap().1 + } +} + +impl Parse for Param { + fn parse(val: &str) -> Self { + param(val).finish().unwrap().1 + } +} + +impl Parse for Idiom { + fn parse(val: &str) -> Self { + idiom(val).finish().unwrap().1 + } +} + +impl Parse for Script { + fn parse(val: &str) -> Self { + script(val).finish().unwrap().1 + } +} + +impl Parse for Thing { + fn parse(val: &str) -> Self { + thing(val).finish().unwrap().1 + } +} + +impl Parse for Expression { + fn parse(val: &str) -> Self { + expression(val).finish().unwrap().1 + } +} diff --git a/lib/src/syn/v1/thing.rs b/lib/src/syn/v1/thing.rs index bd2acf6f..d9a6386b 100644 --- a/lib/src/syn/v1/thing.rs +++ b/lib/src/syn/v1/thing.rs @@ -81,7 +81,7 @@ mod tests { use crate::sql::object::Object; use crate::sql::value::Value; use crate::sql::Strand; - use crate::syn::test::Parse; + use crate::syn::Parse; #[test] fn thing_normal() { @@ -249,7 +249,7 @@ mod tests { let res = id(sql); let out = res.unwrap().1; assert_eq!(Id::from("100test"), out); - assert_eq!("100test", format!("{}", out)); + assert_eq!("⟨100test⟩", format!("{}", out)); } #[test] diff --git a/lib/src/syn/v1/value/mod.rs b/lib/src/syn/v1/value/mod.rs index bb23638f..939f1830 100644 --- a/lib/src/syn/v1/value/mod.rs +++ b/lib/src/syn/v1/value/mod.rs @@ -9,7 +9,7 @@ use super::{ depth, ending::keyword, error::expected, - expression::{cast, future, unary}, + expression::{augment, cast, future, unary}, function::{builtin_function, defined_function, model}, idiom::{self, reparse_idiom_start}, literal::{ @@ -62,7 +62,7 @@ pub fn value(i: &str) -> IResult<&str, Value> { let _diving = depth::dive(i)?; let (i, r) = cut(value)(i)?; let expr = match r { - Value::Expression(r) => r.augment(start, o), + Value::Expression(r) => augment(*r, start, o), _ => Expression::new(start, o, r), }; let v = Value::from(expr); @@ -179,7 +179,7 @@ pub fn select(i: &str) -> IResult<&str, Value> { }; let (i, r) = cut(value)(i)?; let expr = match r { - Value::Expression(r) => r.augment(start, op), + Value::Expression(r) => augment(*r, start, op), _ => Expression::new(start, op, r), }; let v = Value::from(expr); diff --git a/lib/src/syn/v2/lexer/byte.rs b/lib/src/syn/v2/lexer/byte.rs new file mode 100644 index 00000000..c45da866 --- /dev/null +++ b/lib/src/syn/v2/lexer/byte.rs @@ -0,0 +1,387 @@ +use crate::syn::v2::{ + lexer::{ + unicode::{byte, chars}, + Error, Lexer, + }, + token::{t, Token, TokenKind}, +}; + +impl<'a> Lexer<'a> { + /// Eats a single line comment. + pub fn eat_single_line_comment(&mut self) { + loop { + let Some(byte) = self.reader.next() else { + break; + }; + match byte { + byte::CR => { + self.eat(byte::LF); + break; + } + byte::LF => { + break; + } + x if !x.is_ascii() => { + // -1 because we already ate the byte. + let backup = self.reader.offset() - 1; + let char = match self.reader.complete_char(x) { + Ok(x) => x, + Err(_) => { + // let the next token handle the error. + self.reader.backup(backup); + break; + } + }; + + match char { + chars::LS | chars::PS | chars::NEL => break, + _ => {} + } + } + _ => {} + } + } + self.set_whitespace_span(self.current_span()); + self.skip_offset(); + } + + /// Eats a multi line comment and returns an error if `*/` would be missing. + pub fn eat_multi_line_comment(&mut self) -> Result<(), Error> { + loop { + let Some(byte) = self.reader.next() else { + return Err(Error::UnexpectedEof); + }; + if let b'*' = byte { + let Some(byte) = self.reader.next() else { + return Err(Error::UnexpectedEof); + }; + if b'/' == byte { + self.set_whitespace_span(self.current_span()); + self.skip_offset(); + return Ok(()); + } + } + } + } + + /// Eat whitespace like spaces tables and new-lines. + pub fn eat_whitespace(&mut self) { + loop { + let Some(byte) = self.reader.peek() else { + return; + }; + match byte { + byte::CR | byte::FF | byte::LF | byte::SP | byte::VT | byte::TAB => { + self.reader.next(); + } + x if !x.is_ascii() => { + let backup = self.reader.offset(); + self.reader.next(); + let char = match self.reader.complete_char(x) { + Ok(x) => x, + Err(_) => { + self.reader.backup(backup); + break; + } + }; + + match char { + '\u{00A0}' | '\u{1680}' | '\u{2000}' | '\u{2001}' | '\u{2002}' + | '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' + | '\u{2008}' | '\u{2009}' | '\u{200A}' | '\u{202F}' | '\u{205F}' + | '\u{3000}' => {} + _ => { + self.reader.backup(backup); + break; + } + } + } + _ => break, + } + } + self.set_whitespace_span(self.current_span()); + self.skip_offset(); + } + + // re-lexes a `/` token to a regex token. + pub fn relex_regex(&mut self, token: Token) -> Token { + debug_assert_eq!(token.kind, t!("/")); + debug_assert_eq!(token.span.offset + 1, self.last_offset); + debug_assert_eq!(token.span.len, 1); + debug_assert_eq!(self.scratch, ""); + + self.last_offset = token.span.offset; + loop { + match self.reader.next() { + Some(b'\\') => { + if let Some(b'/') = self.reader.peek() { + self.reader.next(); + self.scratch.push('/') + } else { + self.scratch.push('\\') + } + } + Some(b'/') => break, + Some(x) => { + if x.is_ascii() { + self.scratch.push(x as char); + } else { + match self.reader.complete_char(x) { + Ok(x) => { + self.scratch.push(x); + } + Err(e) => return self.invalid_token(e.into()), + } + } + } + None => return self.invalid_token(Error::UnexpectedEof), + } + } + + match self.scratch.parse() { + Ok(x) => { + self.scratch.clear(); + self.regex = Some(x); + self.finish_token(TokenKind::Regex) + } + Err(e) => self.invalid_token(Error::Regex(e)), + } + } + + /// Lex the next token, starting from the given byte. + pub fn lex_ascii(&mut self, byte: u8) -> Token { + let kind = match byte { + b'{' => t!("{"), + b'}' => t!("}"), + b'[' => t!("["), + b']' => t!("]"), + b')' => t!(")"), + b'(' => t!("("), + b';' => t!(";"), + b',' => t!(","), + b'@' => t!("@"), + byte::CR | byte::FF | byte::LF | byte::SP | byte::VT | byte::TAB => { + self.eat_whitespace(); + return self.next_token_inner(); + } + b'|' => match self.reader.peek() { + Some(b'|') => { + self.reader.next(); + t!("||") + } + _ => t!("|"), + }, + b'&' => match self.reader.peek() { + Some(b'&') => { + self.reader.next(); + t!("&&") + } + _ => return self.invalid_token(Error::ExpectedEnd('&')), + }, + b'.' => match self.reader.peek() { + Some(b'.') => { + self.reader.next(); + match self.reader.peek() { + Some(b'.') => { + self.reader.next(); + t!("...") + } + _ => t!(".."), + } + } + _ => t!("."), + }, + b'!' => match self.reader.peek() { + Some(b'=') => { + self.reader.next(); + t!("!=") + } + Some(b'~') => { + self.reader.next(); + t!("!~") + } + _ => t!("!"), + }, + b'?' => match self.reader.peek() { + Some(b'?') => { + self.reader.next(); + t!("??") + } + Some(b':') => { + self.reader.next(); + t!("?:") + } + Some(b'~') => { + self.reader.next(); + t!("?~") + } + Some(b'=') => { + self.reader.next(); + t!("?=") + } + _ => t!("?"), + }, + b'<' => match self.reader.peek() { + Some(b'=') => { + self.reader.next(); + t!("<=") + } + Some(b'-') => { + self.reader.next(); + match self.reader.peek() { + Some(b'>') => { + self.reader.next(); + t!("<->") + } + _ => t!("<-"), + } + } + _ => t!("<"), + }, + b'>' => match self.reader.peek() { + Some(b'=') => { + self.reader.next(); + t!(">=") + } + _ => t!(">"), + }, + b'-' => match self.reader.peek() { + Some(b'>') => { + self.reader.next(); + t!("->") + } + Some(b'-') => { + self.reader.next(); + self.eat_single_line_comment(); + return self.next_token_inner(); + } + Some(b'=') => { + self.reader.next(); + t!("-=") + } + _ => t!("-"), + }, + b'+' => match self.reader.peek() { + Some(b'=') => { + self.reader.next(); + t!("+=") + } + Some(b'?') => { + self.reader.next(); + match self.reader.peek() { + Some(b'=') => { + self.reader.next(); + t!("+?=") + } + _ => return self.invalid_token(Error::ExpectedEnd('=')), + } + } + _ => t!("+"), + }, + b'/' => match self.reader.peek() { + Some(b'*') => { + self.reader.next(); + // A `*/` could be missing which would be invalid. + if let Err(e) = self.eat_multi_line_comment() { + return self.invalid_token(e); + } + return self.next_token_inner(); + } + Some(b'/') => { + self.reader.next(); + self.eat_single_line_comment(); + return self.next_token_inner(); + } + _ => t!("/"), + }, + b'*' => match self.reader.peek() { + Some(b'*') => { + self.reader.next(); + t!("**") + } + Some(b'=') => { + self.reader.next(); + t!("*=") + } + Some(b'~') => { + self.reader.next(); + t!("*~") + } + _ => t!("*"), + }, + b'=' => match self.reader.peek() { + Some(b'=') => { + self.reader.next(); + t!("==") + } + _ => t!("="), + }, + b':' => match self.reader.peek() { + Some(b':') => { + self.reader.next(); + t!("::") + } + _ => t!(":"), + }, + b'$' => { + if self.reader.peek().map(|x| x.is_ascii_alphabetic()).unwrap_or(false) { + return self.lex_param(); + } + t!("$") + } + b'#' => { + self.eat_single_line_comment(); + return self.next_token_inner(); + } + b'`' => return self.lex_surrounded_ident(true), + b'"' => return self.lex_strand(true), + b'\'' => return self.lex_strand(false), + b'd' => { + match self.reader.peek() { + Some(b'"') => { + self.reader.next(); + return self.lex_datetime(true); + } + Some(b'\'') => { + self.reader.next(); + return self.lex_datetime(false); + } + _ => {} + } + return self.lex_ident_from_next_byte(b'd'); + } + b'u' => { + match self.reader.peek() { + Some(b'"') => { + self.reader.next(); + return self.lex_uuid(true); + } + Some(b'\'') => { + self.reader.next(); + return self.lex_uuid(false); + } + _ => {} + } + return self.lex_ident_from_next_byte(b'u'); + } + b'r' => match self.reader.peek() { + Some(b'\"') => { + self.reader.next(); + t!("r\"") + } + Some(b'\'') => { + self.reader.next(); + t!("r'") + } + _ => return self.lex_ident_from_next_byte(byte), + }, + b'a'..=b'z' | b'A'..=b'Z' | b'_' => { + return self.lex_ident_from_next_byte(byte); + } + b'0'..=b'9' => return self.lex_number(byte), + x => return self.invalid_token(Error::UnexpectedCharacter(x as char)), + }; + + self.finish_token(kind) + } +} diff --git a/lib/src/syn/v2/lexer/char.rs b/lib/src/syn/v2/lexer/char.rs new file mode 100644 index 00000000..e3a20523 --- /dev/null +++ b/lib/src/syn/v2/lexer/char.rs @@ -0,0 +1,37 @@ +use crate::syn::v2::{ + lexer::{CharError, Lexer}, + token::{t, Token}, +}; + +use super::Error; + +impl<'a> Lexer<'a> { + /// lex non-ascii characters. + /// + /// Should only be called after determining that the byte is not a valid ascii character. + pub fn lex_char(&mut self, byte: u8) -> Token { + let c = match self.reader.complete_char(byte) { + Ok(x) => x, + Err(CharError::Eof) => return self.invalid_token(Error::InvalidUtf8), + Err(CharError::Unicode) => return self.invalid_token(Error::InvalidUtf8), + }; + let kind = match c { + '⟨' => return self.lex_surrounded_ident(false), + '…' => t!("..."), + '∋' => t!("∋"), + '∌' => t!("∌"), + '∈' => t!("∈"), + '∉' => t!("∉"), + '⊇' => t!("⊇"), + '⊃' => t!("⊃"), + '⊅' => t!("⊅"), + '⊆' => t!("⊆"), + '⊂' => t!("⊂"), + '⊄' => t!("⊄"), + '×' => t!("×"), + '÷' => t!("÷"), + x => return self.invalid_token(Error::UnexpectedCharacter(x)), + }; + self.finish_token(kind) + } +} diff --git a/lib/src/syn/v2/lexer/datetime.rs b/lib/src/syn/v2/lexer/datetime.rs new file mode 100644 index 00000000..34fd0c6c --- /dev/null +++ b/lib/src/syn/v2/lexer/datetime.rs @@ -0,0 +1,267 @@ +use std::ops::RangeInclusive; + +use chrono::{FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, Offset, TimeZone, Utc}; +use thiserror::Error; + +use crate::{ + sql::Datetime, + syn::v2::token::{Token, TokenKind}, +}; + +use super::{Error as LexError, Lexer}; + +#[derive(Error, Debug)] +pub enum PartError { + #[error("value outside of allowed range")] + OutsideRange, + #[error("missing digit(s)")] + MissingDigits, + #[error("too many digits")] + TooManyDigits, +} + +#[derive(Error, Debug)] +pub enum Error { + #[error("invalid year, {0}")] + Year(PartError), + #[error("invalid month, {0}")] + Month(PartError), + #[error("invalid day, {0}")] + Day(PartError), + #[error("invalid hour, {0}")] + Hour(PartError), + #[error("invalid time minute, {0}")] + Minute(PartError), + #[error("invalid second, {0}")] + Second(PartError), + #[error("invalid nano_seconds, {0}")] + NanoSeconds(PartError), + #[error("invalid time-zone hour, {0}")] + TimeZoneHour(PartError), + #[error("invalid time-zone minute, {0}")] + TimeZoneMinute(PartError), + #[error("missing seperator `{}`",*(.0) as char)] + MissingSeparator(u8), + #[error("expected date-time strand to end")] + ExpectedEnd, + #[error("missing time-zone")] + MissingTimeZone, + #[error("date does not exist")] + NonExistantDate, + #[error("time does not exist")] + NonExistantTime, + #[error("time-zone offset too big")] + TimeZoneOutOfRange, +} + +impl<'a> Lexer<'a> { + /// Lex a date-time strand. + pub fn lex_datetime(&mut self, double: bool) -> Token { + match self.lex_datetime_err(double) { + Ok(x) => { + self.datetime = Some(x); + self.finish_token(TokenKind::DateTime) + } + Err(e) => self.invalid_token(LexError::DateTime(e)), + } + } + + /// Lex datetime without enclosing `"` or `'` but return a result or parser error. + pub fn lex_datetime_raw_err(&mut self) -> Result { + let negative = match self.reader.peek() { + Some(b'+') => { + self.reader.next(); + false + } + Some(b'-') => { + self.reader.next(); + true + } + _ => false, + }; + + let mut year = self.lex_datetime_part(4, 0..=9999).map_err(Error::Year)? as i16; + if negative { + year = -year; + } + if !self.eat(b'-') { + return Err(Error::MissingSeparator(b'-')); + } + let month = self.lex_datetime_part(2, 1..=12).map_err(Error::Month)?; + if !self.eat(b'-') { + return Err(Error::MissingSeparator(b'-')); + } + let day = self.lex_datetime_part(2, 1..=31).map_err(Error::Day)?; + + if !self.eat(b'T') { + let Some(date) = NaiveDate::from_ymd_opt(year as i32, month as u32, day as u32) else { + return Err(Error::NonExistantDate); + }; + let time = NaiveTime::default(); + let date_time = NaiveDateTime::new(date, time); + + let datetime = Utc + .fix() + .from_local_datetime(&date_time) + .earliest() + // this should never panic with a fixed offset. + .unwrap() + .with_timezone(&Utc); + + return Ok(Datetime(datetime)); + } + + let hour = self.lex_datetime_part(2, 0..=24).map_err(Error::Hour)?; + if !self.eat(b':') { + return Err(Error::MissingSeparator(b':')); + } + + let minutes = self.lex_datetime_part(2, 0..=59).map_err(Error::Minute)?; + + if !self.eat(b':') { + return Err(Error::MissingSeparator(b':')); + } + + let seconds = self.lex_datetime_part(2, 0..=59).map_err(Error::Second)?; + + // nano seconds + let nano = if let Some(b'.') = self.reader.peek() { + self.reader.next(); + // check if there is atleast one digit. + if !matches!(self.reader.peek(), Some(b'0'..=b'9')) { + return Err(Error::NanoSeconds(PartError::MissingDigits)); + } + let mut number = 0u32; + for i in 0..9 { + let Some(c) = self.reader.peek() else { + // always invalid token, just let the next section handle the error. + break; + }; + if !c.is_ascii_digit() { + // If digits are missing they are counted as 0's + for _ in i..9 { + number *= 10; + } + break; + } + self.reader.next(); + number *= 10; + number += (c - b'0') as u32; + } + // ensure nano_seconds are at most 9 digits. + if matches!(self.reader.peek(), Some(b'0'..=b'9')) { + return Err(Error::NanoSeconds(PartError::TooManyDigits)); + } + number + } else { + 0 + }; + + // time zone + let time_zone = match self.reader.peek() { + Some(b'Z') => { + self.reader.next(); + None + } + Some(x @ (b'-' | b'+')) => { + self.reader.next(); + let negative = x == b'-'; + let hour = self.lex_datetime_part(2, 0..=24).map_err(Error::TimeZoneHour)? as i32; + let Some(b':') = self.reader.next() else { + return Err(Error::MissingSeparator(b':')); + }; + let minute = + self.lex_datetime_part(2, 0..=59).map_err(Error::TimeZoneMinute)? as i32; + let time = hour * 3600 + minute * 60; + if negative { + Some(-time) + } else { + Some(time) + } + } + _ => return Err(Error::MissingTimeZone), + }; + + // calculate the given datetime from individual parts. + let Some(date) = NaiveDate::from_ymd_opt(year as i32, month as u32, day as u32) else { + return Err(Error::NonExistantDate); + }; + let Some(time) = + NaiveTime::from_hms_nano_opt(hour as u32, minutes as u32, seconds as u32, nano) + else { + return Err(Error::NonExistantTime); + }; + + let date_time = NaiveDateTime::new(date, time); + + let zone = match time_zone { + None => Utc.fix(), + Some(offset) => if offset < 0 { + FixedOffset::west_opt(-offset) + } else { + FixedOffset::east_opt(offset) + } + .ok_or(Error::TimeZoneOutOfRange)?, + }; + + let datetime = zone + .from_local_datetime(&date_time) + .earliest() + // this should never panic with a fixed offset. + .unwrap() + .with_timezone(&Utc); + + Ok(Datetime(datetime)) + } + + /// Lex full datetime but return an result instead of a token. + pub fn lex_datetime_err(&mut self, double: bool) -> Result { + let datetime = self.lex_datetime_raw_err()?; + + let end_char = if double { + b'"' + } else { + b'\'' + }; + + if !self.eat(end_char) { + return Err(Error::ExpectedEnd); + } + + Ok(datetime) + } + + /// Lexes a digit part of date time. + /// + /// This function eats an amount of digits and then checks if the value the digits represent + /// is within the given range. + pub fn lex_datetime_part( + &mut self, + mut amount: u8, + range: RangeInclusive, + ) -> Result { + let mut value = 0u16; + + while amount != 0 { + value *= 10; + let Some(char) = self.reader.peek() else { + return Err(PartError::MissingDigits); + }; + if !char.is_ascii_digit() { + return Err(PartError::MissingDigits); + } + self.reader.next(); + value += (char - b'0') as u16; + amount -= 1; + } + + if matches!(self.reader.peek(), Some(b'0'..=b'8')) { + return Err(PartError::TooManyDigits); + } + + if !range.contains(&value) { + return Err(PartError::OutsideRange); + } + Ok(value) + } +} diff --git a/lib/src/syn/v2/lexer/duration.rs b/lib/src/syn/v2/lexer/duration.rs new file mode 100644 index 00000000..d68b709c --- /dev/null +++ b/lib/src/syn/v2/lexer/duration.rs @@ -0,0 +1,170 @@ +use std::time::Duration as StdDuration; +use thiserror::Error; + +use crate::{ + sql::duration::{ + Duration, SECONDS_PER_DAY, SECONDS_PER_HOUR, SECONDS_PER_MINUTE, SECONDS_PER_WEEK, + SECONDS_PER_YEAR, + }, + syn::v2::token::{Token, TokenKind}, +}; + +use super::{Error as LexError, Lexer}; + +#[derive(Error, Debug)] +pub enum Error { + #[error("invalid duration suffix")] + InvalidSuffix, + #[error("duration value overflowed")] + Overflow, +} + +impl<'a> Lexer<'a> { + /// Lex a duration. + /// + /// Expect the lexer to have already eaten the digits starting the duration. + pub fn lex_duration(&mut self) -> Token { + match self.lex_duration_err() { + Ok(x) => { + self.duration = Some(x); + self.finish_token(TokenKind::Duration) + } + Err(e) => self.invalid_token(LexError::Duration(e)), + } + } + + fn invalid_suffix_duration(&mut self) -> Error { + // eat the whole suffix. + while let Some(x) = self.reader.peek() { + if !x.is_ascii_alphanumeric() { + break; + } + self.reader.next(); + } + Error::InvalidSuffix + } + + /// Lex a duration, + /// + /// Should only be called from lexing a number. + /// + /// Expects any number but at least one numeric characters be pushed into scratch. + pub fn lex_duration_err(&mut self) -> Result { + let mut duration = StdDuration::ZERO; + + let mut current_value = 0u64; + // use the existing eat span to generate the current value. + // span already contains + let mut span = self.current_span(); + span.len -= 1; + for b in self.scratch.as_bytes() { + debug_assert!(b.is_ascii_digit(), "`{}` is not a digit", b); + current_value = current_value.checked_mul(10).ok_or(Error::Overflow)?; + current_value = current_value.checked_add((b - b'0') as u64).ok_or(Error::Overflow)?; + } + self.scratch.clear(); + + loop { + let Some(next) = self.reader.peek() else { + return Err(Error::InvalidSuffix); + }; + + // Match the suffix. + let new_duration = match next { + x @ (b'n' | b'u') => { + // Nano or micro suffix + self.reader.next(); + if !self.eat(b's') { + return Err(Error::InvalidSuffix); + }; + + if x == b'n' { + StdDuration::from_nanos(current_value) + } else { + StdDuration::from_micros(current_value) + } + } + // Starting byte of 'µ' + 0xc2 => { + self.reader.next(); + // Second byte of 'µ'. + // Always consume as the next byte will always be part of a two byte character. + if !self.eat(0xb5) { + return Err(self.invalid_suffix_duration()); + } + + if !self.eat(b's') { + return Err(self.invalid_suffix_duration()); + } + + StdDuration::from_micros(current_value) + } + b'm' => { + self.reader.next(); + // Either milli or minute + let is_milli = self.eat(b's'); + + if is_milli { + StdDuration::from_millis(current_value) + } else { + let Some(number) = current_value.checked_mul(SECONDS_PER_MINUTE) else { + return Err(Error::Overflow); + }; + StdDuration::from_secs(number) + } + } + x @ (b's' | b'h' | b'd' | b'w' | b'y') => { + self.reader.next(); + // second, hour, day, week or year. + + let new_duration = match x { + b's' => Some(StdDuration::from_secs(current_value)), + b'h' => { + current_value.checked_mul(SECONDS_PER_HOUR).map(StdDuration::from_secs) + } + b'd' => { + current_value.checked_mul(SECONDS_PER_DAY).map(StdDuration::from_secs) + } + b'w' => { + current_value.checked_mul(SECONDS_PER_WEEK).map(StdDuration::from_secs) + } + b'y' => { + current_value.checked_mul(SECONDS_PER_YEAR).map(StdDuration::from_secs) + } + _ => unreachable!(), + }; + + let Some(new_duration) = new_duration else { + return Err(Error::Overflow); + }; + new_duration + } + _ => { + return Err(self.invalid_suffix_duration()); + } + }; + + duration = duration.checked_add(new_duration).ok_or(Error::Overflow)?; + + let next = self.reader.peek(); + match next { + // there was some remaining alphabetic characters after the valid suffix, so the + // suffix is invalid. + Some(b'a'..=b'z' | b'A'..=b'Z' | b'_') => { + return Err(self.invalid_suffix_duration()) + } + Some(b'0'..=b'9') => {} // Duration continues. + _ => return Ok(Duration(duration)), + } + + current_value = 0; + // Eat all the next numbers + while let Some(b @ b'0'..=b'9') = self.reader.peek() { + self.reader.next(); + current_value = current_value.checked_mul(10).ok_or(Error::Overflow)?; + current_value = + current_value.checked_add((b - b'0') as u64).ok_or(Error::Overflow)?; + } + } + } +} diff --git a/lib/src/syn/v2/lexer/ident.rs b/lib/src/syn/v2/lexer/ident.rs new file mode 100644 index 00000000..666db998 --- /dev/null +++ b/lib/src/syn/v2/lexer/ident.rs @@ -0,0 +1,164 @@ +use std::mem; + +use unicase::UniCase; + +use crate::syn::v2::lexer::{keywords::KEYWORDS, Error, Lexer}; +use crate::syn::v2::token::{NumberKind, Token, TokenKind}; + +use super::unicode::{chars, U8Ext}; + +impl<'a> Lexer<'a> { + /// Lex a parameter in the form of `$[a-zA-Z0-9_]*` + /// + /// # Lexer State + /// Expected the lexer to have already eaten the param starting `$` + pub fn lex_param(&mut self) -> Token { + debug_assert_eq!(self.scratch, ""); + loop { + if let Some(x) = self.reader.peek() { + if x.is_ascii_alphanumeric() || x == b'_' { + self.scratch.push(x as char); + self.reader.next(); + continue; + } + } + self.string = Some(mem::take(&mut self.scratch)); + return self.finish_token(TokenKind::Parameter); + } + } + + /// Lex an not surrounded identifier in the form of `[a-zA-Z0-9_]*` + /// + /// The start byte should already a valid byte of the identifier. + /// + /// When calling the caller should already know that the token can't be any other token covered + /// by `[a-zA-Z0-9_]*`. + pub fn lex_ident_from_next_byte(&mut self, start: u8) -> Token { + debug_assert!(matches!(start, b'a'..=b'z' | b'A'..=b'Z' | b'_')); + debug_assert_eq!(self.scratch, ""); + self.scratch.push(start as char); + self.lex_ident() + } + + /// Lex a not surrounded identfier. + /// + /// The scratch should contain only identifier valid chars. + pub fn lex_ident(&mut self) -> Token { + loop { + if let Some(x) = self.reader.peek() { + if x.is_identifier_continue() { + self.scratch.push(x as char); + self.reader.next(); + continue; + } + } + // When finished parsing the identifier, try to match it to an keyword. + // If there is one, return it as the keyword. Original identifier can be reconstructed + // from the token. + if let Some(x) = KEYWORDS.get(&UniCase::ascii(&self.scratch)).copied() { + self.scratch.clear(); + return self.finish_token(x); + } + + if self.scratch == "NaN" { + self.scratch.clear(); + return self.finish_token(TokenKind::Number(NumberKind::NaN)); + } else { + self.string = Some(mem::take(&mut self.scratch)); + return self.finish_token(TokenKind::Identifier); + } + } + } + + /// Lex an ident which is surround by delimiters. + pub fn lex_surrounded_ident(&mut self, is_backtick: bool) -> Token { + match self.lex_surrounded_ident_err(is_backtick) { + Ok(x) => x, + Err(e) => { + self.scratch.clear(); + self.invalid_token(e) + } + } + } + + /// Lex an ident surrounded either by `⟨⟩` or `\`\`` + pub fn lex_surrounded_ident_err(&mut self, is_backtick: bool) -> Result { + loop { + let Some(x) = self.reader.next() else { + let end_char = if is_backtick { + '`' + } else { + '⟩' + }; + return Err(Error::ExpectedEnd(end_char)); + }; + if x.is_ascii() { + match x { + b'`' if is_backtick => { + self.string = Some(mem::take(&mut self.scratch)); + return Ok(self.finish_token(TokenKind::Identifier)); + } + b'\0' => { + // null bytes not allowed + return Err(Error::UnexpectedCharacter('\0')); + } + b'\\' if is_backtick => { + // handle escape sequences. + // This is compliant with the orignal parser which didn't permit + // escape sequences in `⟨⟩` surrounded idents. + let Some(next) = self.reader.next() else { + let end_char = if is_backtick { + '`' + } else { + '⟩' + }; + return Err(Error::ExpectedEnd(end_char)); + }; + match next { + b'\\' => { + self.scratch.push('\\'); + } + b'`' => { + self.scratch.push('`'); + } + b'/' => { + self.scratch.push('/'); + } + b'b' => { + self.scratch.push(chars::BS); + } + b'f' => { + self.scratch.push(chars::FF); + } + b'n' => { + self.scratch.push(chars::LF); + } + b'r' => { + self.scratch.push(chars::CR); + } + b't' => { + self.scratch.push(chars::TAB); + } + _ => { + let char = if x.is_ascii() { + x as char + } else { + self.reader.complete_char(x)? + }; + return Err(Error::InvalidEscapeCharacter(char)); + } + } + } + x => self.scratch.push(x as char), + } + } else { + let c = self.reader.complete_char(x)?; + if !is_backtick && c == '⟩' { + self.string = Some(mem::take(&mut self.scratch)); + return Ok(self.finish_token(TokenKind::Identifier)); + } + self.scratch.push(c); + } + } + } +} diff --git a/lib/src/syn/v2/lexer/js.rs b/lib/src/syn/v2/lexer/js.rs new file mode 100644 index 00000000..7e18dc09 --- /dev/null +++ b/lib/src/syn/v2/lexer/js.rs @@ -0,0 +1,97 @@ +use crate::syn::v2::token::Span; + +use super::{unicode::chars::JS_LINE_TERIMATORS, Error, Lexer}; + +impl Lexer<'_> { + /// Lex the body of a js functions. + /// + /// This function will never be called while lexing normally. + pub fn lex_js_function_body(&mut self) -> Result { + self.lex_js_function_body_inner().map_err(|e| (e, self.current_span())) + } + + /// Lex the body of a js function. + fn lex_js_function_body_inner(&mut self) -> Result { + let mut block_depth = 1; + loop { + let byte = self.reader.next().ok_or(Error::UnexpectedEof)?; + match byte { + b'`' => self.lex_js_string(b'`')?, + b'\'' => self.lex_js_string(b'\'')?, + b'\"' => self.lex_js_string(b'\"')?, + b'/' => match self.reader.peek() { + Some(b'/') => { + self.reader.next(); + self.lex_js_single_comment()?; + } + Some(b'*') => { + self.reader.next(); + self.lex_js_multi_comment()? + } + _ => {} + }, + b'{' => { + block_depth += 1; + } + b'}' => { + block_depth -= 1; + if block_depth == 0 { + break; + } + } + x if !x.is_ascii() => { + // check for invalid characters. + self.reader.complete_char(x)?; + } + _ => {} + } + } + let mut span = self.current_span(); + // remove the `}` from the source text; + span.len -= 1; + // lexer ensures that it is valid utf8 + let source = String::from_utf8(self.reader.span(span).to_vec()).unwrap(); + Ok(source) + } + + /// lex a js string with the given delimiter. + fn lex_js_string(&mut self, enclosing_byte: u8) -> Result<(), Error> { + loop { + let byte = self.reader.next().ok_or(Error::UnexpectedEof)?; + if byte == enclosing_byte { + return Ok(()); + } + if byte == b'\\' { + self.reader.next(); + } + // check for invalid characters. + self.reader.convert_to_char(byte)?; + } + } + + /// lex a single line js comment. + fn lex_js_single_comment(&mut self) -> Result<(), Error> { + loop { + let Some(byte) = self.reader.next() else { + return Ok(()); + }; + let char = self.reader.convert_to_char(byte)?; + if JS_LINE_TERIMATORS.contains(&char) { + return Ok(()); + } + } + } + + /// lex a multi line js comment. + fn lex_js_multi_comment(&mut self) -> Result<(), Error> { + loop { + let byte = self.reader.next().ok_or(Error::UnexpectedEof)?; + if byte == b'*' && self.reader.peek() == Some(b'/') { + self.reader.next(); + return Ok(()); + } + // check for invalid characters. + self.reader.convert_to_char(byte)?; + } + } +} diff --git a/lib/src/syn/v2/lexer/keywords.rs b/lib/src/syn/v2/lexer/keywords.rs new file mode 100644 index 00000000..9874a6bc --- /dev/null +++ b/lib/src/syn/v2/lexer/keywords.rs @@ -0,0 +1,285 @@ +use crate::{ + sql::{language::Language, Algorithm}, + syn::v2::token::{DistanceKind, Keyword, TokenKind}, +}; +use phf::phf_map; +use unicase::UniCase; + +/// A map for mapping keyword strings to a tokenkind, +pub(crate) static KEYWORDS: phf::Map, TokenKind> = phf_map! { + // Keywords + UniCase::ascii("AFTER") => TokenKind::Keyword(Keyword::After), + UniCase::ascii("ALL") => TokenKind::Keyword(Keyword::All), + UniCase::ascii("ANALYZE") => TokenKind::Keyword(Keyword::Analyze), + UniCase::ascii("ANALYZER") => TokenKind::Keyword(Keyword::Analyzer), + UniCase::ascii("AS") => TokenKind::Keyword(Keyword::As), + UniCase::ascii("ASCENDING") => TokenKind::Keyword(Keyword::Ascending), + UniCase::ascii("ASC") => TokenKind::Keyword(Keyword::Ascending), + UniCase::ascii("ASCII") => TokenKind::Keyword(Keyword::Ascii), + UniCase::ascii("ASSERT") => TokenKind::Keyword(Keyword::Assert), + UniCase::ascii("AT") => TokenKind::Keyword(Keyword::At), + UniCase::ascii("BEFORE") => TokenKind::Keyword(Keyword::Before), + UniCase::ascii("BEGIN") => TokenKind::Keyword(Keyword::Begin), + UniCase::ascii("BLANK") => TokenKind::Keyword(Keyword::Blank), + UniCase::ascii("BM25") => TokenKind::Keyword(Keyword::Bm25), + UniCase::ascii("BREAK") => TokenKind::Keyword(Keyword::Break), + UniCase::ascii("BY") => TokenKind::Keyword(Keyword::By), + UniCase::ascii("CAMEL") => TokenKind::Keyword(Keyword::Camel), + UniCase::ascii("CANCEL") => TokenKind::Keyword(Keyword::Cancel), + UniCase::ascii("CHANGEFEED") => TokenKind::Keyword(Keyword::ChangeFeed), + UniCase::ascii("CHANGES") => TokenKind::Keyword(Keyword::Changes), + UniCase::ascii("CAPACITY") => TokenKind::Keyword(Keyword::Capacity), + UniCase::ascii("CLASS") => TokenKind::Keyword(Keyword::Class), + UniCase::ascii("COMMENT") => TokenKind::Keyword(Keyword::Comment), + UniCase::ascii("COMMIT") => TokenKind::Keyword(Keyword::Commit), + UniCase::ascii("CONTENT") => TokenKind::Keyword(Keyword::Content), + UniCase::ascii("CONTINUE") => TokenKind::Keyword(Keyword::Continue), + UniCase::ascii("CREATE") => TokenKind::Keyword(Keyword::Create), + UniCase::ascii("DATABASE") => TokenKind::Keyword(Keyword::Database), + UniCase::ascii("DB") => TokenKind::Keyword(Keyword::Database), + UniCase::ascii("DEFAULT") => TokenKind::Keyword(Keyword::Default), + UniCase::ascii("DEFINE") => TokenKind::Keyword(Keyword::Define), + UniCase::ascii("DELETE") => TokenKind::Keyword(Keyword::Delete), + UniCase::ascii("DESCENDING") => TokenKind::Keyword(Keyword::Descending), + UniCase::ascii("DESC") => TokenKind::Keyword(Keyword::Descending), + UniCase::ascii("DIFF") => TokenKind::Keyword(Keyword::Diff), + UniCase::ascii("DIMENSION") => TokenKind::Keyword(Keyword::Dimension), + UniCase::ascii("DISTANCE") => TokenKind::Keyword(Keyword::Distance), + UniCase::ascii("DIST") => TokenKind::Keyword(Keyword::Distance), + UniCase::ascii("DOC_IDS_CACHE") => TokenKind::Keyword(Keyword::DocIdsCache), + UniCase::ascii("DOC_IDS_ORDER") => TokenKind::Keyword(Keyword::DocIdsOrder), + UniCase::ascii("DOC_LENGTHS_CACHE") => TokenKind::Keyword(Keyword::DocLengthsCache), + UniCase::ascii("DOC_LENGTHS_ORDER") => TokenKind::Keyword(Keyword::DocLengthsOrder), + UniCase::ascii("DROP") => TokenKind::Keyword(Keyword::Drop), + UniCase::ascii("DUPLICATE") => TokenKind::Keyword(Keyword::Duplicate), + UniCase::ascii("EDGENGRAM") => TokenKind::Keyword(Keyword::Edgengram), + UniCase::ascii("EVENT") => TokenKind::Keyword(Keyword::Event), + UniCase::ascii("ELSE") => TokenKind::Keyword(Keyword::Else), + UniCase::ascii("END") => TokenKind::Keyword(Keyword::End), + UniCase::ascii("EXPLAIN") => TokenKind::Keyword(Keyword::Explain), + UniCase::ascii("false") => TokenKind::Keyword(Keyword::False), + UniCase::ascii("FETCH") => TokenKind::Keyword(Keyword::Fetch), + UniCase::ascii("FIELD") => TokenKind::Keyword(Keyword::Field), + UniCase::ascii("FIELDS") => TokenKind::Keyword(Keyword::Fields), + UniCase::ascii("COLUMNS") => TokenKind::Keyword(Keyword::Fields), + UniCase::ascii("FILTERS") => TokenKind::Keyword(Keyword::Filters), + UniCase::ascii("FLEXIBLE") => TokenKind::Keyword(Keyword::Flexible), + UniCase::ascii("FLEXI") => TokenKind::Keyword(Keyword::Flexible), + UniCase::ascii("FLEX") => TokenKind::Keyword(Keyword::Flexible), + UniCase::ascii("FOR") => TokenKind::Keyword(Keyword::For), + UniCase::ascii("FROM") => TokenKind::Keyword(Keyword::From), + UniCase::ascii("FULL") => TokenKind::Keyword(Keyword::Full), + UniCase::ascii("FUNCTION") => TokenKind::Keyword(Keyword::Function), + UniCase::ascii("GROUP") => TokenKind::Keyword(Keyword::Group), + UniCase::ascii("HIGHLIGHTS") => TokenKind::Keyword(Keyword::Highlights), + UniCase::ascii("IGNORE") => TokenKind::Keyword(Keyword::Ignore), + UniCase::ascii("INDEX") => TokenKind::Keyword(Keyword::Index), + UniCase::ascii("INFO") => TokenKind::Keyword(Keyword::Info), + UniCase::ascii("INSERT") => TokenKind::Keyword(Keyword::Insert), + UniCase::ascii("INTO") => TokenKind::Keyword(Keyword::Into), + UniCase::ascii("IF") => TokenKind::Keyword(Keyword::If), + UniCase::ascii("IS") => TokenKind::Keyword(Keyword::Is), + UniCase::ascii("KEY") => TokenKind::Keyword(Keyword::Key), + UniCase::ascii("KILL") => TokenKind::Keyword(Keyword::Kill), + UniCase::ascii("KNN") => TokenKind::Keyword(Keyword::Knn), + UniCase::ascii("LET") => TokenKind::Keyword(Keyword::Let), + UniCase::ascii("LIMIT") => TokenKind::Keyword(Keyword::Limit), + UniCase::ascii("LIVE") => TokenKind::Keyword(Keyword::Live), + UniCase::ascii("LOWERCASE") => TokenKind::Keyword(Keyword::Lowercase), + UniCase::ascii("MERGE") => TokenKind::Keyword(Keyword::Merge), + UniCase::ascii("MODEL") => TokenKind::Keyword(Keyword::Model), + UniCase::ascii("MTREE") => TokenKind::Keyword(Keyword::MTree), + UniCase::ascii("MTREE_CACHE") => TokenKind::Keyword(Keyword::MTreeCache), + UniCase::ascii("NAMESPACE") => TokenKind::Keyword(Keyword::Namespace), + UniCase::ascii("NS") => TokenKind::Keyword(Keyword::Namespace), + UniCase::ascii("NGRAM") => TokenKind::Keyword(Keyword::Ngram), + UniCase::ascii("NO") => TokenKind::Keyword(Keyword::No), + UniCase::ascii("NOINDEX") => TokenKind::Keyword(Keyword::NoIndex), + UniCase::ascii("NONE") => TokenKind::Keyword(Keyword::None), + UniCase::ascii("NULL") => TokenKind::Keyword(Keyword::Null), + UniCase::ascii("NUMERIC") => TokenKind::Keyword(Keyword::Numeric), + UniCase::ascii("OMIT") => TokenKind::Keyword(Keyword::Omit), + UniCase::ascii("ON") => TokenKind::Keyword(Keyword::On), + UniCase::ascii("ONLY") => TokenKind::Keyword(Keyword::Only), + UniCase::ascii("OPTION") => TokenKind::Keyword(Keyword::Option), + UniCase::ascii("ORDER") => TokenKind::Keyword(Keyword::Order), + UniCase::ascii("PARALLEL") => TokenKind::Keyword(Keyword::Parallel), + UniCase::ascii("PARAM") => TokenKind::Keyword(Keyword::Param), + UniCase::ascii("PASSHASH") => TokenKind::Keyword(Keyword::Passhash), + UniCase::ascii("PASSWORD") => TokenKind::Keyword(Keyword::Password), + UniCase::ascii("PATCH") => TokenKind::Keyword(Keyword::Patch), + UniCase::ascii("PERMISSIONS") => TokenKind::Keyword(Keyword::Permissions), + UniCase::ascii("POSTINGS_CACHE") => TokenKind::Keyword(Keyword::PostingsCache), + UniCase::ascii("POSTINGS_ORDER") => TokenKind::Keyword(Keyword::PostingsOrder), + UniCase::ascii("PUNCT") => TokenKind::Keyword(Keyword::Punct), + UniCase::ascii("RELATE") => TokenKind::Keyword(Keyword::Relate), + UniCase::ascii("REMOVE") => TokenKind::Keyword(Keyword::Remove), + UniCase::ascii("REPLACE") => TokenKind::Keyword(Keyword::Replace), + UniCase::ascii("RETURN") => TokenKind::Keyword(Keyword::Return), + UniCase::ascii("ROLES") => TokenKind::Keyword(Keyword::Roles), + UniCase::ascii("ROOT") => TokenKind::Keyword(Keyword::Root), + UniCase::ascii("KV") => TokenKind::Keyword(Keyword::Root), + UniCase::ascii("SCHEMAFULL") => TokenKind::Keyword(Keyword::Schemafull), + UniCase::ascii("SCHEMAFUL") => TokenKind::Keyword(Keyword::Schemafull), + UniCase::ascii("SCHEMALESS") => TokenKind::Keyword(Keyword::Schemaless), + UniCase::ascii("SCOPE") => TokenKind::Keyword(Keyword::Scope), + UniCase::ascii("SC") => TokenKind::Keyword(Keyword::Scope), + UniCase::ascii("SEARCH") => TokenKind::Keyword(Keyword::Search), + UniCase::ascii("SELECT") => TokenKind::Keyword(Keyword::Select), + UniCase::ascii("SESSION") => TokenKind::Keyword(Keyword::Session), + UniCase::ascii("SET") => TokenKind::Keyword(Keyword::Set), + UniCase::ascii("SHOW") => TokenKind::Keyword(Keyword::Show), + UniCase::ascii("SIGNIN") => TokenKind::Keyword(Keyword::Signin), + UniCase::ascii("SIGNUP") => TokenKind::Keyword(Keyword::Signup), + UniCase::ascii("SINCE") => TokenKind::Keyword(Keyword::Since), + UniCase::ascii("SLEEP") => TokenKind::Keyword(Keyword::Sleep), + UniCase::ascii("SNOWBALL") => TokenKind::Keyword(Keyword::Snowball), + UniCase::ascii("SPLIT") => TokenKind::Keyword(Keyword::Split), + UniCase::ascii("START") => TokenKind::Keyword(Keyword::Start), + UniCase::ascii("TABLE") => TokenKind::Keyword(Keyword::Table), + UniCase::ascii("TB") => TokenKind::Keyword(Keyword::Table), + UniCase::ascii("TERMS_CACHE") => TokenKind::Keyword(Keyword::TermsCache), + UniCase::ascii("TERMS_ORDER") => TokenKind::Keyword(Keyword::TermsOrder), + UniCase::ascii("THEN") => TokenKind::Keyword(Keyword::Then), + UniCase::ascii("THROW") => TokenKind::Keyword(Keyword::Throw), + UniCase::ascii("TIMEOUT") => TokenKind::Keyword(Keyword::Timeout), + UniCase::ascii("TOKENIZERS") => TokenKind::Keyword(Keyword::Tokenizers), + UniCase::ascii("TOKEN") => TokenKind::Keyword(Keyword::Token), + UniCase::ascii("TRANSACTION") => TokenKind::Keyword(Keyword::Transaction), + UniCase::ascii("true") => TokenKind::Keyword(Keyword::True), + UniCase::ascii("TYPE") => TokenKind::Keyword(Keyword::Type), + UniCase::ascii("UNIQUE") => TokenKind::Keyword(Keyword::Unique), + UniCase::ascii("UNSET") => TokenKind::Keyword(Keyword::Unset), + UniCase::ascii("UPDATE") => TokenKind::Keyword(Keyword::Update), + UniCase::ascii("UPPERCASE") => TokenKind::Keyword(Keyword::Uppercase), + UniCase::ascii("USE") => TokenKind::Keyword(Keyword::Use), + UniCase::ascii("USER") => TokenKind::Keyword(Keyword::User), + UniCase::ascii("VALUE") => TokenKind::Keyword(Keyword::Value), + UniCase::ascii("VALUES") => TokenKind::Keyword(Keyword::Values), + UniCase::ascii("VERSION") => TokenKind::Keyword(Keyword::Version), + UniCase::ascii("VS") => TokenKind::Keyword(Keyword::Vs), + UniCase::ascii("WHEN") => TokenKind::Keyword(Keyword::When), + UniCase::ascii("WHERE") => TokenKind::Keyword(Keyword::Where), + UniCase::ascii("WITH") => TokenKind::Keyword(Keyword::With), + UniCase::ascii("ALLINSIDE") => TokenKind::Keyword(Keyword::AllInside), + UniCase::ascii("ANDKW") => TokenKind::Keyword(Keyword::AndKw), + UniCase::ascii("ANYINSIDE") => TokenKind::Keyword(Keyword::AnyInside), + UniCase::ascii("INSIDE") => TokenKind::Keyword(Keyword::Inside), + UniCase::ascii("INTERSECTS") => TokenKind::Keyword(Keyword::Intersects), + UniCase::ascii("NONEINSIDE") => TokenKind::Keyword(Keyword::NoneInside), + UniCase::ascii("NOTINSIDE") => TokenKind::Keyword(Keyword::NotInside), + UniCase::ascii("OR") => TokenKind::Keyword(Keyword::OrKw), + UniCase::ascii("OUTSIDE") => TokenKind::Keyword(Keyword::Outside), + UniCase::ascii("NOT") => TokenKind::Keyword(Keyword::Not), + UniCase::ascii("AND") => TokenKind::Keyword(Keyword::And), + UniCase::ascii("COLLATE") => TokenKind::Keyword(Keyword::Collate), + UniCase::ascii("CONTAINSALL") => TokenKind::Keyword(Keyword::ContainsAll), + UniCase::ascii("CONTAINSANY") => TokenKind::Keyword(Keyword::ContainsAny), + UniCase::ascii("CONTAINSNONE") => TokenKind::Keyword(Keyword::ContainsNone), + UniCase::ascii("CONTAINSNOT") => TokenKind::Keyword(Keyword::ContainsNot), + UniCase::ascii("CONTAINS") => TokenKind::Keyword(Keyword::Contains), + UniCase::ascii("IN") => TokenKind::Keyword(Keyword::In), + + UniCase::ascii("ANY") => TokenKind::Keyword(Keyword::Any), + UniCase::ascii("ARRAY") => TokenKind::Keyword(Keyword::Array), + UniCase::ascii("GEOMETRY") => TokenKind::Keyword(Keyword::Geometry), + UniCase::ascii("RECORD") => TokenKind::Keyword(Keyword::Record), + UniCase::ascii("FUTURE") => TokenKind::Keyword(Keyword::Future), + UniCase::ascii("BOOL") => TokenKind::Keyword(Keyword::Bool), + UniCase::ascii("BYTES") => TokenKind::Keyword(Keyword::Bytes), + UniCase::ascii("DATETIME") => TokenKind::Keyword(Keyword::Datetime), + UniCase::ascii("DECIMAL") => TokenKind::Keyword(Keyword::Decimal), + UniCase::ascii("DURATION") => TokenKind::Keyword(Keyword::Duration), + UniCase::ascii("FLOAT") => TokenKind::Keyword(Keyword::Float), + UniCase::ascii("fn") => TokenKind::Keyword(Keyword::Fn), + UniCase::ascii("ml") => TokenKind::Keyword(Keyword::ML), + UniCase::ascii("INT") => TokenKind::Keyword(Keyword::Int), + UniCase::ascii("NUMBER") => TokenKind::Keyword(Keyword::Number), + UniCase::ascii("OBJECT") => TokenKind::Keyword(Keyword::Object), + UniCase::ascii("STRING") => TokenKind::Keyword(Keyword::String), + UniCase::ascii("UUID") => TokenKind::Keyword(Keyword::Uuid), + UniCase::ascii("ULID") => TokenKind::Keyword(Keyword::Ulid), + UniCase::ascii("RAND") => TokenKind::Keyword(Keyword::Rand), + UniCase::ascii("FEATURE") => TokenKind::Keyword(Keyword::Feature), + UniCase::ascii("LINE") => TokenKind::Keyword(Keyword::Line), + UniCase::ascii("POINT") => TokenKind::Keyword(Keyword::Point), + UniCase::ascii("POLYGON") => TokenKind::Keyword(Keyword::Polygon), + UniCase::ascii("MULTIPOINT") => TokenKind::Keyword(Keyword::MultiPoint), + UniCase::ascii("MULTILINE") => TokenKind::Keyword(Keyword::MultiLine), + UniCase::ascii("MULTIPOLYGON") => TokenKind::Keyword(Keyword::MultiPolygon), + UniCase::ascii("COLLECTION") => TokenKind::Keyword(Keyword::Collection), + + // Languages + UniCase::ascii("ARABIC") => TokenKind::Language(Language::Arabic), + UniCase::ascii("ARA") => TokenKind::Language(Language::Arabic), + UniCase::ascii("AR") => TokenKind::Language(Language::Arabic), + UniCase::ascii("DANISH") => TokenKind::Language(Language::Danish), + UniCase::ascii("DAN") => TokenKind::Language(Language::Danish), + UniCase::ascii("DA") => TokenKind::Language(Language::Danish), + UniCase::ascii("DUTCH") => TokenKind::Language(Language::Dutch), + UniCase::ascii("NLD") => TokenKind::Language(Language::Dutch), + UniCase::ascii("NL") => TokenKind::Language(Language::Dutch), + UniCase::ascii("ENGLISH") => TokenKind::Language(Language::English), + UniCase::ascii("ENG") => TokenKind::Language(Language::English), + UniCase::ascii("EN") => TokenKind::Language(Language::English), + UniCase::ascii("FRENCH") => TokenKind::Language(Language::French), + UniCase::ascii("FRA") => TokenKind::Language(Language::French), + UniCase::ascii("FR") => TokenKind::Language(Language::French), + UniCase::ascii("GERMAN") => TokenKind::Language(Language::German), + UniCase::ascii("DEU") => TokenKind::Language(Language::German), + UniCase::ascii("DE") => TokenKind::Language(Language::German), + UniCase::ascii("GREEK") => TokenKind::Language(Language::Greek), + UniCase::ascii("ELL") => TokenKind::Language(Language::Greek), + UniCase::ascii("EL") => TokenKind::Language(Language::Greek), + UniCase::ascii("HUNGARIAN") => TokenKind::Language(Language::Hungarian), + UniCase::ascii("HUN") => TokenKind::Language(Language::Hungarian), + UniCase::ascii("HU") => TokenKind::Language(Language::Hungarian), + UniCase::ascii("ITALIAN") => TokenKind::Language(Language::Italian), + UniCase::ascii("ITA") => TokenKind::Language(Language::Italian), + UniCase::ascii("IT") => TokenKind::Language(Language::Italian), + UniCase::ascii("NORWEGIAN") => TokenKind::Language(Language::Norwegian), + UniCase::ascii("NOR") => TokenKind::Language(Language::Norwegian), + UniCase::ascii("PORTUGUESE") => TokenKind::Language(Language::Portuguese), + UniCase::ascii("POR") => TokenKind::Language(Language::Portuguese), + UniCase::ascii("PT") => TokenKind::Language(Language::Portuguese), + UniCase::ascii("ROMANIAN") => TokenKind::Language(Language::Romanian), + UniCase::ascii("RON") => TokenKind::Language(Language::Romanian), + UniCase::ascii("RO") => TokenKind::Language(Language::Romanian), + UniCase::ascii("RUSSIAN") => TokenKind::Language(Language::Russian), + UniCase::ascii("RUS") => TokenKind::Language(Language::Russian), + UniCase::ascii("RU") => TokenKind::Language(Language::Russian), + UniCase::ascii("SPANISH") => TokenKind::Language(Language::Spanish), + UniCase::ascii("SPA") => TokenKind::Language(Language::Spanish), + UniCase::ascii("ES") => TokenKind::Language(Language::Spanish), + UniCase::ascii("SWEDISH") => TokenKind::Language(Language::Swedish), + UniCase::ascii("SWE") => TokenKind::Language(Language::Swedish), + UniCase::ascii("SV") => TokenKind::Language(Language::Swedish), + UniCase::ascii("TAMIL") => TokenKind::Language(Language::Tamil), + UniCase::ascii("TAM") => TokenKind::Language(Language::Tamil), + UniCase::ascii("TA") => TokenKind::Language(Language::Tamil), + UniCase::ascii("TURKISH") => TokenKind::Language(Language::Turkish), + UniCase::ascii("TUR") => TokenKind::Language(Language::Turkish), + UniCase::ascii("TR") => TokenKind::Language(Language::Turkish), + + // Algorithms + UniCase::ascii("EDDSA") => TokenKind::Algorithm(Algorithm::EdDSA), + UniCase::ascii("ES256") => TokenKind::Algorithm(Algorithm::Es256), + UniCase::ascii("ES384") => TokenKind::Algorithm(Algorithm::Es384), + UniCase::ascii("ES512") => TokenKind::Algorithm(Algorithm::Es512), + UniCase::ascii("HS256") => TokenKind::Algorithm(Algorithm::Hs256), + UniCase::ascii("HS384") => TokenKind::Algorithm(Algorithm::Hs384), + UniCase::ascii("HS512") => TokenKind::Algorithm(Algorithm::Hs512), + UniCase::ascii("PS256") => TokenKind::Algorithm(Algorithm::Ps256), + UniCase::ascii("PS384") => TokenKind::Algorithm(Algorithm::Ps384), + UniCase::ascii("PS512") => TokenKind::Algorithm(Algorithm::Ps512), + UniCase::ascii("RS256") => TokenKind::Algorithm(Algorithm::Rs256), + UniCase::ascii("RS384") => TokenKind::Algorithm(Algorithm::Rs384), + UniCase::ascii("RS512") => TokenKind::Algorithm(Algorithm::Rs512), + UniCase::ascii("JWKS") => TokenKind::Algorithm(Algorithm::Jwks), + + // Distance + UniCase::ascii("EUCLIDEAN") => TokenKind::Distance(DistanceKind::Euclidean), + UniCase::ascii("MANHATTAN") => TokenKind::Distance(DistanceKind::Manhattan), + UniCase::ascii("HAMMING") => TokenKind::Distance(DistanceKind::Hamming), + UniCase::ascii("MINKOWSKI") => TokenKind::Distance(DistanceKind::Minkowski), +}; diff --git a/lib/src/syn/v2/lexer/mod.rs b/lib/src/syn/v2/lexer/mod.rs new file mode 100644 index 00000000..1b64da0f --- /dev/null +++ b/lib/src/syn/v2/lexer/mod.rs @@ -0,0 +1,417 @@ +use crate::{ + sql::{Datetime, Duration, Regex, Uuid}, + syn::v2::token::{Span, Token, TokenKind}, +}; +use thiserror::Error; + +mod byte; +mod char; +mod datetime; +mod duration; +mod ident; +mod js; +mod keywords; +mod number; +mod reader; +mod strand; +mod unicode; +mod uuid; + +#[cfg(test)] +mod test; + +pub use reader::{BytesReader, CharError}; + +/// A error returned by the lexer when an invalid token is encountered. +/// +/// Can be retrieved from the `Lexer::error` field whenever it returned a [`TokenKind::Invalid`] +/// token. +#[derive(Error, Debug)] +pub enum Error { + #[error("Lexer encountered unexpected character {0:?}")] + UnexpectedCharacter(char), + #[error("invalid escape character {0:?}")] + InvalidEscapeCharacter(char), + #[error("Lexer encountered unexpected end of source characters")] + UnexpectedEof, + #[error("source was not valid utf-8")] + InvalidUtf8, + #[error("expected next character to be '{0}'")] + ExpectedEnd(char), + #[error("failed to lex date-time, {0}")] + DateTime(#[from] datetime::Error), + #[error("failed to lex uuid, {0}")] + Uuid(#[from] uuid::Error), + #[error("failed to lex duration, {0}")] + Duration(#[from] duration::Error), + #[error("failed to lex number, {0}")] + Number(#[from] number::Error), + #[error("failed to parse regex, {0}")] + Regex(regex::Error), +} + +impl From for Error { + fn from(value: CharError) -> Self { + match value { + CharError::Eof => Self::UnexpectedEof, + CharError::Unicode => Self::InvalidUtf8, + } + } +} + +/// The SurrealQL lexer. +/// Takes a slice of bytes and turns it into tokens. The lexer is designed with possible invalid utf-8 +/// in mind and will handle bytes which are invalid utf-8 with an error. +/// +/// The lexer generates tokens lazily. whenever [`Lexer::next_token`] is called on the lexer it will +/// try to lex the next bytes in the give source as a token. The lexer always returns a token, even +/// if the source contains invalid tokens or as at the end of the source. In both cases a specific +/// type of token is returned. +/// +/// Note that SurrealQL syntax cannot be lexed in advance. For example, record strings and regexes, +/// both cannot be parsed correctly without knowledge of previous tokens as they are both ambigious +/// with other tokens. +pub struct Lexer<'a> { + /// The reader for reading the source bytes. + pub reader: BytesReader<'a>, + /// The one past the last character of the previous token. + last_offset: u32, + /// The span of whitespace if it was read between two tokens. + whitespace_span: Option, + /// A buffer used to build the value of tokens which can't be read straight from the source. + /// like for example strings with escape characters. + scratch: String, + + // below are a collection of storage for values produced by tokens. + // For performance reasons we wan't to keep the tokens as small as possible. + // As only some tokens have an additional value associated with them we don't store that value + // in the token itself but, instead, in the lexer ensureing a smaller size for each individual + // token. + // + // This does result in some additional state to keep track of as peeking a token while a token + // value is still in the variables below will overwrite the previous value. + // + // Both numbers and actual strings are stored as a string value. + // The parser can, depending on position in syntax, decide to parse a number in a variety of + // different precisions or formats. The only way to support all is to delay parsing the + // actual number value to when the parser can decide on a format. + pub string: Option, + pub duration: Option, + pub datetime: Option, + pub regex: Option, + pub uuid: Option, + pub error: Option, +} + +impl<'a> Lexer<'a> { + /// Create a new lexer. + /// # Panic + /// This function will panic if the source is longer then u32::MAX. + pub fn new(source: &'a [u8]) -> Lexer<'a> { + let reader = BytesReader::new(source); + assert!(reader.len() <= u32::MAX as usize, "source code exceeded maximum size"); + Lexer { + reader, + last_offset: 0, + whitespace_span: None, + scratch: String::new(), + string: None, + datetime: None, + duration: None, + regex: None, + uuid: None, + error: None, + } + } + + /// Reset the state of the lexer. + /// + /// Doesn't change the state of the reader. + pub fn reset(&mut self) { + self.last_offset = 0; + self.scratch.clear(); + self.whitespace_span = None; + self.string = None; + self.datetime = None; + self.duration = None; + self.regex = None; + self.uuid = None; + self.error = None; + } + + /// Change the used source from the lexer to a new buffer. + /// + /// Usefull for reusing buffers. + /// + /// # Panic + /// This function will panic if the source is longer then u32::MAX. + pub fn change_source<'b>(self, source: &'b [u8]) -> Lexer<'b> { + let reader = BytesReader::<'b>::new(source); + assert!(reader.len() <= u32::MAX as usize, "source code exceeded maximum size"); + Lexer { + reader, + last_offset: 0, + whitespace_span: None, + scratch: self.scratch, + string: self.string, + datetime: self.datetime, + duration: self.duration, + regex: self.regex, + uuid: self.uuid, + error: self.error, + } + } + + /// return the whitespace of the last token buffered, either peeked or poped. + pub fn whitespace_span(&self) -> Option { + self.whitespace_span + } + + /// Used for seting the span of whitespace between tokens. Will extend the current whitespace + /// if there already is one. + fn set_whitespace_span(&mut self, span: Span) { + if let Some(existing) = self.whitespace_span.as_mut() { + *existing = existing.covers(span); + } else { + self.whitespace_span = Some(span); + } + } + + /// Returns the next token, driving the lexer forward. + /// + /// If the lexer is at the end the source it will always return the Eof token. + pub fn next_token(&mut self) -> Token { + self.whitespace_span = None; + self.next_token_inner() + } + + fn next_token_inner(&mut self) -> Token { + let Some(byte) = self.reader.next() else { + return self.eof_token(); + }; + if byte.is_ascii() { + self.lex_ascii(byte) + } else { + self.lex_char(byte) + } + } + + /// Creates the eof token. + /// + /// An eof token has tokenkind Eof and an span which points to the last character of the + /// source. + fn eof_token(&mut self) -> Token { + Token { + kind: TokenKind::Eof, + span: Span { + offset: self.last_offset.saturating_sub(1), + len: 1, + }, + } + } + + /// Skip the last consumed bytes in the reader. + /// + /// The bytes consumed before this point won't be part of the span. + fn skip_offset(&mut self) { + self.last_offset = self.reader.offset() as u32; + } + + /// Return an invalid token. + fn invalid_token(&mut self, error: Error) -> Token { + self.error = Some(error); + self.finish_token(TokenKind::Invalid) + } + + // Returns the span for the current token being lexed. + pub fn current_span(&self) -> Span { + // We make sure that the source is no longer then u32::MAX so this can't overflow. + let new_offset = self.reader.offset() as u32; + let len = new_offset - self.last_offset; + Span { + offset: self.last_offset, + len, + } + } + + /// Builds a token from an TokenKind. + /// + /// Attaches a span to the token and returns, updates the new offset. + fn finish_token(&mut self, kind: TokenKind) -> Token { + let span = self.current_span(); + // We make sure that the source is no longer then u32::MAX so this can't overflow. + self.last_offset = self.reader.offset() as u32; + Token { + kind, + span, + } + } + + /// Moves the lexer state back to before the give span. + /// + /// # Warning + /// Moving the lexer into a state where the next byte is within a multibyte character will + /// result in spurious errors. + pub fn backup_before(&mut self, span: Span) { + self.reader.backup(span.offset as usize); + self.last_offset = span.offset; + } + + /// Moves the lexer state to after the give span. + /// + /// # Warning + /// Moving the lexer into a state where the next byte is within a multibyte character will + /// result in spurious errors. + pub fn backup_after(&mut self, span: Span) { + let offset = span.offset + span.len; + self.reader.backup(offset as usize); + self.last_offset = offset; + } + + /// Checks if the next byte is the given byte, if it is it consumes the byte and returns true. + /// Otherwise returns false. + /// + /// Also returns false if there is no next character. + pub fn eat(&mut self, byte: u8) -> bool { + if self.reader.peek() == Some(byte) { + self.reader.next(); + true + } else { + false + } + } + + /// Checks if the closure returns true when given the next byte, if it is it consumes the byte + /// and returns true. Otherwise returns false. + /// + /// Also returns false if there is no next character. + pub fn eat_when bool>(&mut self, f: F) -> bool { + let Some(x) = self.reader.peek() else { + return false; + }; + if f(x) { + self.reader.next(); + true + } else { + false + } + } + + /// Lex a single `"` character with possible leading whitespace. + /// + /// Used for parsing record strings. + pub fn lex_record_string_close(&mut self) -> Token { + loop { + let Some(byte) = self.reader.next() else { + return self.invalid_token(Error::UnexpectedEof); + }; + match byte { + unicode::byte::CR + | unicode::byte::FF + | unicode::byte::LF + | unicode::byte::SP + | unicode::byte::VT + | unicode::byte::TAB => { + self.eat_whitespace(); + continue; + } + b'"' => { + return self.finish_token(TokenKind::CloseRecordString { + double: true, + }); + } + b'\'' => { + return self.finish_token(TokenKind::CloseRecordString { + double: false, + }); + } + b'-' => match self.reader.next() { + Some(b'-') => { + self.eat_single_line_comment(); + continue; + } + Some(x) => match self.reader.convert_to_char(x) { + Ok(c) => return self.invalid_token(Error::UnexpectedCharacter(c)), + Err(e) => return self.invalid_token(e.into()), + }, + None => return self.invalid_token(Error::UnexpectedEof), + }, + b'/' => match self.reader.next() { + Some(b'*') => { + if let Err(e) = self.eat_multi_line_comment() { + return self.invalid_token(e); + } + continue; + } + Some(b'/') => { + self.eat_single_line_comment(); + continue; + } + Some(x) => match self.reader.convert_to_char(x) { + Ok(c) => return self.invalid_token(Error::UnexpectedCharacter(c)), + Err(e) => return self.invalid_token(e.into()), + }, + None => return self.invalid_token(Error::UnexpectedEof), + }, + b'#' => { + self.eat_single_line_comment(); + continue; + } + x => match self.reader.convert_to_char(x) { + Ok(c) => return self.invalid_token(Error::UnexpectedCharacter(c)), + Err(e) => return self.invalid_token(e.into()), + }, + } + } + } + + /// Lex only a datetime without enclosing delimiters. + /// + /// Used for reusing lexer lexing code for parsing datetimes. Should not be called during + /// normal parsing. + pub fn lex_only_datetime(&mut self) -> Result { + self.lex_datetime_raw_err().map_err(Error::DateTime) + } + + /// Lex only a duration. + /// + /// Used for reusing lexer lexing code for parsing durations. Should not be used during normal + /// parsing. + pub fn lex_only_duration(&mut self) -> Result { + match self.reader.next() { + Some(x @ b'0'..=b'9') => { + self.scratch.push(x as char); + while let Some(x @ b'0'..=b'9') = self.reader.peek() { + self.reader.next(); + self.scratch.push(x as char); + } + self.lex_duration_err().map_err(Error::Duration) + } + Some(x) => { + let char = self.reader.convert_to_char(x)?; + Err(Error::UnexpectedCharacter(char)) + } + None => Err(Error::UnexpectedEof), + } + } + + /// Lex only a UUID. + /// + /// Used for reusing lexer lexing code for parsing UUID's. Should not be used during normal + /// parsing. + pub fn lex_only_uuid(&mut self) -> Result { + Ok(self.lex_uuid_err_inner()?) + } +} + +impl Iterator for Lexer<'_> { + type Item = Token; + + fn next(&mut self) -> Option { + let token = self.next_token(); + if token.is_eof() { + return None; + } + Some(token) + } +} diff --git a/lib/src/syn/v2/lexer/number.rs b/lib/src/syn/v2/lexer/number.rs new file mode 100644 index 00000000..44967083 --- /dev/null +++ b/lib/src/syn/v2/lexer/number.rs @@ -0,0 +1,257 @@ +use crate::syn::v2::{ + lexer::{unicode::U8Ext, Error as LexError, Lexer}, + token::{NumberKind, Token, TokenKind}, +}; +use std::mem; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum Error { + #[error("invalid number suffix")] + InvalidSuffix, + #[error("expected atleast a single digit in the exponent")] + DigitExpectedExponent, +} + +impl Lexer<'_> { + /// Lex only an integer. + /// Use when a number can be followed immediatly by a `.` like in a model version. + pub fn lex_only_integer(&mut self) -> Token { + match self.lex_only_integer_err() { + Ok(x) => x, + Err(e) => self.invalid_token(LexError::Number(e)), + } + } + + fn lex_only_integer_err(&mut self) -> Result { + let Some(next) = self.reader.peek() else { + return Ok(self.eof_token()); + }; + + // not a number, return a different token kind, for error reporting. + if !next.is_ascii_digit() { + return Ok(self.next_token()); + } + + self.scratch.push(next as char); + self.reader.next(); + + // eat all the ascii digits + while let Some(x) = self.reader.peek() { + if x == b'_' { + self.reader.next(); + } else if !x.is_ascii_digit() { + break; + } else { + self.scratch.push(x as char); + self.reader.next(); + } + } + + // test for a suffix. + match self.reader.peek() { + Some(b'd' | b'f') => { + // not an integer but parse anyway for error reporting. + return self.lex_suffix(true); + } + Some(x) if x.is_ascii_alphabetic() => return Err(self.invalid_suffix()), + _ => {} + } + + self.string = Some(mem::take(&mut self.scratch)); + Ok(self.finish_token(TokenKind::Number(NumberKind::Integer))) + } + + pub fn lex_number(&mut self, start: u8) -> Token { + match self.lex_number_err(start) { + Ok(x) => x, + Err(e) => self.invalid_token(LexError::Number(e)), + } + } + /// Lex a number. + /// + /// Expects the digit which started the number as the start argument. + pub fn lex_number_err(&mut self, start: u8) -> Result { + debug_assert!(start.is_ascii_digit()); + debug_assert_eq!(self.scratch, ""); + self.scratch.push(start as char); + loop { + let Some(x) = self.reader.peek() else { + self.string = Some(mem::take(&mut self.scratch)); + return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer))); + }; + match x { + b'0'..=b'9' => { + // next digits. + self.reader.next(); + self.scratch.push(x as char); + } + b'.' => { + // mantissa + let backup = self.reader.offset(); + self.reader.next(); + let next = self.reader.peek(); + if let Some(b'0'..=b'9') = next { + self.scratch.push('.'); + return self.lex_mantissa(); + } else { + // indexing a number + self.reader.backup(backup); + self.string = Some(mem::take(&mut self.scratch)); + return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer))); + } + } + b'f' | b'd' => return self.lex_suffix(true), + // Oxc2 is the start byte of 'µ' + 0xc2 | b'n' | b'u' | b'm' | b'h' | b'w' | b'y' | b's' => { + // duration suffix, switch to lexing duration. + return Ok(self.lex_duration()); + } + b'_' => { + self.reader.next(); + } + b'a'..=b'z' | b'A'..=b'Z' => { + return Err(self.invalid_suffix()); + // invalid token, unexpected identifier character immediatly after number. + // Eat all remaining identifier like characters. + } + _ => { + self.string = Some(mem::take(&mut self.scratch)); + return Ok(self.finish_token(TokenKind::Number(NumberKind::Integer))); + } + } + } + } + + fn invalid_suffix(&mut self) -> Error { + // eat the whole suffix. + while let Some(x) = self.reader.peek() { + if !x.is_ascii_alphanumeric() { + break; + } + self.reader.next(); + } + self.scratch.clear(); + Error::InvalidSuffix + } + + /// Lex a number suffix, either 'f' or 'dec'. + fn lex_suffix(&mut self, can_be_duration: bool) -> Result { + match self.reader.peek() { + Some(b'f') => { + // float suffix + self.reader.next(); + if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) { + Err(self.invalid_suffix()) + } else { + self.string = Some(mem::take(&mut self.scratch)); + Ok(self.finish_token(TokenKind::Number(NumberKind::Float))) + } + } + Some(b'd') => { + // decimal suffix + self.reader.next(); + let checkpoint = self.reader.offset(); + if !self.eat(b'e') { + if can_be_duration { + self.reader.backup(checkpoint - 1); + return Ok(self.lex_duration()); + } else { + return Err(self.invalid_suffix()); + } + } + + if !self.eat(b'c') { + return Err(self.invalid_suffix()); + } + + if let Some(true) = self.reader.peek().map(|x| x.is_identifier_continue()) { + Err(self.invalid_suffix()) + } else { + self.string = Some(mem::take(&mut self.scratch)); + Ok(self.finish_token(TokenKind::Number(NumberKind::Decimal))) + } + } + _ => unreachable!(), + } + } + + /// Lexes the mantissa of a number, i.e. `.8` in `1.8` + pub fn lex_mantissa(&mut self) -> Result { + loop { + // lex_number already checks if there exists a digit after the dot. + // So this will never fail the first iteration of the loop. + let Some(x) = self.reader.peek() else { + self.string = Some(mem::take(&mut self.scratch)); + return Ok(self.finish_token(TokenKind::Number(NumberKind::Mantissa))); + }; + match x { + b'0'..=b'9' => { + // next digit. + self.reader.next(); + self.scratch.push(x as char); + } + b'e' | b'E' => { + // scientific notation + self.reader.next(); + self.scratch.push('e'); + return self.lex_exponent(true); + } + b'_' => { + self.reader.next(); + } + b'f' | b'd' => return self.lex_suffix(false), + b'a'..=b'z' | b'A'..=b'Z' => { + // invalid token, random identifier characters immediately after number. + self.scratch.clear(); + return Err(Error::InvalidSuffix); + } + _ => { + self.string = Some(mem::take(&mut self.scratch)); + return Ok(self.finish_token(TokenKind::Number(NumberKind::Mantissa))); + } + } + } + } + + /// Lexes the exponent of a number, i.e. `e10` in `1.1e10`; + fn lex_exponent(&mut self, had_mantissa: bool) -> Result { + let mut atleast_one = false; + match self.reader.peek() { + Some(b'-' | b'+') => {} + Some(b'0'..=b'9') => { + atleast_one = true; + } + _ => { + // random other character, expected atleast one digit. + return Err(Error::DigitExpectedExponent); + } + } + self.reader.next(); + loop { + match self.reader.peek() { + Some(x @ b'0'..=b'9') => { + self.reader.next(); + self.scratch.push(x as char); + } + Some(b'_') => { + self.reader.next(); + } + Some(b'f' | b'd') => return self.lex_suffix(false), + _ => { + if atleast_one { + let kind = if had_mantissa { + NumberKind::MantissaExponent + } else { + NumberKind::Exponent + }; + self.string = Some(mem::take(&mut self.scratch)); + return Ok(self.finish_token(TokenKind::Number(kind))); + } else { + return Err(Error::DigitExpectedExponent); + } + } + } + } + } +} diff --git a/lib/src/syn/v2/lexer/reader.rs b/lib/src/syn/v2/lexer/reader.rs new file mode 100644 index 00000000..148f4864 --- /dev/null +++ b/lib/src/syn/v2/lexer/reader.rs @@ -0,0 +1,157 @@ +use thiserror::Error; + +use crate::syn::v2::token::Span; +use std::fmt; + +#[derive(Error, Debug)] +pub enum CharError { + #[error("found eof inside multi byte character")] + Eof, + #[error("string is not valid utf-8")] + Unicode, +} + +#[derive(Clone)] +pub struct BytesReader<'a> { + data: &'a [u8], + current: usize, +} + +impl fmt::Debug for BytesReader<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("BytesReader") + .field("used", &self.used()) + .field("remaining", &self.remaining()) + .finish() + } +} + +impl<'a> BytesReader<'a> { + pub fn new(slice: &'a [u8]) -> Self { + BytesReader { + data: slice, + current: 0, + } + } + + #[inline] + pub fn full(&self) -> &'a [u8] { + self.data + } + + #[inline] + pub fn used(&self) -> &'a [u8] { + &self.data[..self.current] + } + + #[inline] + pub fn remaining(&self) -> &'a [u8] { + &self.data[self.current..] + } + + #[inline] + pub fn len(&self) -> usize { + self.remaining().len() + } + + #[inline] + pub fn offset(&self) -> usize { + self.current + } + + #[inline] + pub fn backup(&mut self, offset: usize) { + assert!(offset <= self.offset()); + self.current = offset; + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.remaining().is_empty() + } + + #[inline] + pub fn peek(&self) -> Option { + self.remaining().get(0).copied() + } + #[inline] + pub fn span(&self, span: Span) -> &[u8] { + &self.data[(span.offset as usize)..(span.offset as usize + span.len as usize)] + } + #[inline] + pub fn next_continue_byte(&mut self) -> Result { + const CONTINUE_BYTE_PREFIX_MASK: u8 = 0b1100_0000; + const CONTINUE_BYTE_MASK: u8 = 0b0011_1111; + + let byte = self.next().ok_or(CharError::Eof)?; + if byte & CONTINUE_BYTE_PREFIX_MASK != 0b1000_0000 { + return Err(CharError::Eof); + } + + Ok(byte & CONTINUE_BYTE_MASK) + } + + pub fn convert_to_char(&mut self, start: u8) -> Result { + if start.is_ascii() { + return Ok(start as char); + } + self.complete_char(start) + } + + pub fn complete_char(&mut self, start: u8) -> Result { + match start & 0b1111_1000 { + 0b1100_0000 | 0b1101_0000 | 0b1100_1000 | 0b1101_1000 => { + let mut val = (start & 0b0001_1111) as u32; + val <<= 6; + let next = self.next_continue_byte()?; + val |= next as u32; + char::from_u32(val).ok_or(CharError::Unicode) + } + 0b1110_0000 | 0b1110_1000 => { + let mut val = (start & 0b0000_1111) as u32; + val <<= 6; + let next = self.next_continue_byte()?; + val |= next as u32; + val <<= 6; + let next = self.next_continue_byte()?; + val |= next as u32; + char::from_u32(val).ok_or(CharError::Unicode) + } + 0b1111_0000 => { + let mut val = (start & 0b0000_0111) as u32; + val <<= 6; + let next = self.next_continue_byte()?; + val |= next as u32; + val <<= 6; + let next = self.next_continue_byte()?; + val |= next as u32; + val <<= 6; + let next = self.next_continue_byte()?; + val |= next as u32; + char::from_u32(val).ok_or(CharError::Unicode) + } + x => panic!("start byte did not start multi byte character: {:b}", x), + } + } +} + +impl<'a> Iterator for BytesReader<'a> { + type Item = u8; + + #[inline] + fn next(&mut self) -> Option { + let res = self.peek()?; + self.current += 1; + Some(res) + } + fn size_hint(&self) -> (usize, Option) { + let len = self.len(); + (len, Some(len)) + } +} + +impl<'a> ExactSizeIterator for BytesReader<'a> { + fn len(&self) -> usize { + self.len() + } +} diff --git a/lib/src/syn/v2/lexer/strand.rs b/lib/src/syn/v2/lexer/strand.rs new file mode 100644 index 00000000..1404d590 --- /dev/null +++ b/lib/src/syn/v2/lexer/strand.rs @@ -0,0 +1,95 @@ +//! Lexing of strand like characters. + +use std::mem; + +use crate::syn::v2::token::{Token, TokenKind}; + +use super::{unicode::chars, Error, Lexer}; + +impl<'a> Lexer<'a> { + /// Lex a plain strand with either single or double quotes. + pub fn lex_strand(&mut self, is_double: bool) -> Token { + match self.lex_strand_err(is_double) { + Ok(x) => x, + Err(x) => { + self.scratch.clear(); + self.invalid_token(x) + } + } + } + + /// Lex a strand with either double or single quotes but return an result instead of a token. + pub fn lex_strand_err(&mut self, is_double: bool) -> Result { + loop { + let Some(x) = self.reader.next() else { + self.scratch.clear(); + return Ok(self.eof_token()); + }; + + if x.is_ascii() { + match x { + b'\'' if !is_double => { + self.string = Some(mem::take(&mut self.scratch)); + return Ok(self.finish_token(TokenKind::Strand)); + } + b'"' if is_double => { + self.string = Some(mem::take(&mut self.scratch)); + return Ok(self.finish_token(TokenKind::Strand)); + } + b'\0' => { + // null bytes not allowed + return Err(Error::UnexpectedCharacter('\0')); + } + b'\\' => { + // Handle escape sequences. + let Some(next) = self.reader.next() else { + self.scratch.clear(); + return Ok(self.eof_token()); + }; + match next { + b'\\' => { + self.scratch.push('\\'); + } + b'\'' if !is_double => { + self.scratch.push('\''); + } + b'\"' if is_double => { + self.scratch.push('\"'); + } + b'/' => { + self.scratch.push('/'); + } + b'b' => { + self.scratch.push(chars::BS); + } + b'f' => { + self.scratch.push(chars::FF); + } + b'n' => { + self.scratch.push(chars::LF); + } + b'r' => { + self.scratch.push(chars::CR); + } + b't' => { + self.scratch.push(chars::TAB); + } + x => { + let char = if x.is_ascii() { + x as char + } else { + self.reader.complete_char(x)? + }; + return Err(Error::InvalidEscapeCharacter(char)); + } + } + } + x => self.scratch.push(x as char), + } + } else { + let c = self.reader.complete_char(x)?; + self.scratch.push(c); + } + } + } +} diff --git a/lib/src/syn/v2/lexer/test.rs b/lib/src/syn/v2/lexer/test.rs new file mode 100644 index 00000000..a56b71a0 --- /dev/null +++ b/lib/src/syn/v2/lexer/test.rs @@ -0,0 +1,482 @@ +use chrono::{FixedOffset, NaiveDate, Offset, TimeZone, Utc}; + +use crate::syn::v2::token::{t, NumberKind, TokenKind}; + +macro_rules! test_case( + ($source:expr => [$($token:expr),*$(,)?]) => { + let mut lexer = crate::syn::v2::lexer::Lexer::new($source.as_bytes()); + let mut i = 0; + $( + let next = lexer.next(); + if let Some(next) = next { + let span = std::str::from_utf8(lexer.reader.span(next.span)).unwrap_or("invalid utf8"); + if let TokenKind::Invalid = next.kind{ + let error = lexer.error.take().unwrap(); + assert_eq!(next.kind, $token, "{} = {}:{} => {}",span, i, stringify!($token), error); + }else{ + assert_eq!(next.kind, $token, "{} = {}:{}", span, i, stringify!($token)); + } + }else{ + assert_eq!(next,None); + } + i += 1; + )* + let _ = i; + assert_eq!(lexer.next(),None) + }; +); + +#[test] +fn operators() { + test_case! { + r#"- + / * ! ** + < > <= >= <- <-> -> + = == -= += != +?= + ? ?? ?: ?~ ?= + { } [ ] ( ) + ; , | || & && + $ + . .. ... + + ^ + "# => [ + t!("-"), t!("+"), t!("/"), t!("*"), t!("!"), t!("**"), + + t!("<"), t!(">"), t!("<="), t!(">="), t!("<-"), t!("<->"), t!("->"), + + t!("="), t!("=="), t!("-="), t!("+="), t!("!="), t!("+?="), + + t!("?"), t!("??"), t!("?:"), t!("?~"), t!("?="), + + t!("{"), t!("}"), t!("["), t!("]"), t!("("), t!(")"), + + t!(";"), t!(","), t!("|"), t!("||"), TokenKind::Invalid, t!("&&"), + + t!("$"), + + t!("."), t!(".."), t!("..."), + + TokenKind::Invalid + ] + } +} + +#[test] +fn comments() { + test_case! { + r" + + /* some comment */ + - // another comment + + -- a third comment + - + " => [ + t!("+"), + t!("-"), + t!("+"), + t!("-"), + ] + } +} + +#[test] +fn whitespace() { + test_case! { + "+= \t\n\r -=" => [ + t!("+="), + t!("-="), + ] + } +} + +#[test] +fn identifiers() { + test_case! { + r#" + 123123adwad + + akdwkj + + akdwkj1231312313123 + + _a_k_d_wkj1231312313123 + + ____wdw____ + + "# + => [ + TokenKind::Invalid, + t!("+"), + TokenKind::Identifier, + t!("+"), + TokenKind::Identifier, + t!("+"), + TokenKind::Identifier, + t!("+"), + TokenKind::Identifier, + t!("+"), + ] + } +} + +#[test] +fn numbers() { + test_case! { + r#" + 123123+32010230.123012031+33043030dec+33043030f+ + + "# + => [ + TokenKind::Number(NumberKind::Integer), + t!("+"), + TokenKind::Number(NumberKind::Mantissa), + t!("+"), + TokenKind::Number(NumberKind::Decimal), + t!("+"), + TokenKind::Number(NumberKind::Float), + t!("+"), + ] + } + + test_case! { + "+123129decs+" + => [ + t!("+"), + TokenKind::Invalid, + t!("+"), + ] + } + + test_case! { + "+39349fs+" + => [ + t!("+"), + TokenKind::Invalid, + t!("+"), + ] + } + + test_case! { + "+394393df+" + => [ + t!("+"), + TokenKind::Invalid, + t!("+"), + ] + } + + test_case! { + "+32932932def+" + => [ + t!("+"), + TokenKind::Invalid, + t!("+"), + ] + } + + test_case! { + "+329239329z+" + => [ + t!("+"), + TokenKind::Invalid, + t!("+"), + ] + } +} + +#[test] +fn duration() { + test_case! { + r#" + 1ns+1µs+1us+1ms+1s+1m+1h+1w+1y + + 1nsa+1ans+1aus+1usa+1ams+1msa+1am+1ma+1ah+1ha+1aw+1wa+1ay+1ya+1µsa + "# + => [ + TokenKind::Duration, + t!("+"), + TokenKind::Duration, + t!("+"), + TokenKind::Duration, + t!("+"), + TokenKind::Duration, + t!("+"), + TokenKind::Duration, + t!("+"), + TokenKind::Duration, + t!("+"), + TokenKind::Duration, + t!("+"), + TokenKind::Duration, + t!("+"), + TokenKind::Duration, + + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + t!("+"), + TokenKind::Invalid, + ] + } +} + +#[test] +fn keyword() { + test_case! { + r#"select SELECT sElEcT"# => [ + t!("SELECT"), + t!("SELECT"), + t!("SELECT"), + ] + } +} + +#[test] +fn uuid() { + let mut lexer = crate::syn::v2::lexer::Lexer::new( + r#" u"e72bee20-f49b-11ec-b939-0242ac120002" "#.as_bytes(), + ); + let token = lexer.next_token(); + if let Some(error) = lexer.error { + println!("ERROR: {} @ ", error); + } + assert_eq!(token.kind, TokenKind::Uuid); + let uuid = lexer.uuid.take().unwrap(); + assert_eq!(uuid.0.to_string(), "e72bee20-f49b-11ec-b939-0242ac120002"); + + let mut lexer = crate::syn::v2::lexer::Lexer::new( + r#" u"b19bc00b-aa98-486c-ae37-c8e1c54295b1" "#.as_bytes(), + ); + let token = lexer.next_token(); + if let Some(error) = lexer.error { + println!("ERROR: {} @ ", error); + } + assert_eq!(token.kind, TokenKind::Uuid); + let uuid = lexer.uuid.take().unwrap(); + assert_eq!(uuid.0.to_string(), "b19bc00b-aa98-486c-ae37-c8e1c54295b1"); +} + +#[test] +fn date_time_just_date() { + let mut lexer = crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23" "#.as_bytes()); + let token = lexer.next_token(); + if let Some(error) = lexer.error { + println!("ERROR: {} @ ", error); + } + assert_eq!(token.kind, TokenKind::DateTime); + let datetime = lexer.datetime.take().unwrap(); + let expected_datetime = Utc + .fix() + .from_local_datetime( + &NaiveDate::from_ymd_opt(2012, 4, 23).unwrap().and_hms_nano_opt(0, 0, 0, 0).unwrap(), + ) + .earliest() + .unwrap() + .with_timezone(&Utc); + + assert_eq!(datetime.0, expected_datetime); +} + +#[test] +fn date_zone_time() { + let mut lexer = crate::syn::v2::lexer::Lexer::new(r#" d"2020-01-01T00:00:00Z" "#.as_bytes()); + let token = lexer.next_token(); + if let Some(error) = lexer.error { + println!("ERROR: {} @ ", error); + } + assert_eq!(token.kind, TokenKind::DateTime); + let datetime = lexer.datetime.take().unwrap(); + let expected_datetime = Utc + .fix() + .from_local_datetime( + &NaiveDate::from_ymd_opt(2020, 1, 1).unwrap().and_hms_nano_opt(0, 0, 0, 0).unwrap(), + ) + .earliest() + .unwrap() + .with_timezone(&Utc); + + assert_eq!(datetime.0, expected_datetime); +} + +#[test] +fn date_time_with_time() { + let mut lexer = crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43Z" "#.as_bytes()); + let token = lexer.next_token(); + if let Some(error) = lexer.error { + println!("ERROR: {} @ ", error); + } + assert_eq!(token.kind, TokenKind::DateTime); + let datetime = lexer.datetime.take().unwrap(); + let expected_datetime = Utc + .fix() + .from_local_datetime( + &NaiveDate::from_ymd_opt(2012, 4, 23).unwrap().and_hms_nano_opt(18, 25, 43, 0).unwrap(), + ) + .earliest() + .unwrap() + .with_timezone(&Utc); + + assert_eq!(datetime.0, expected_datetime); +} + +#[test] +fn date_time_nanos() { + let mut lexer = + crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.5631Z" "#.as_bytes()); + let token = lexer.next_token(); + if let Some(error) = lexer.error { + println!("ERROR: {} @ ", error); + } + assert_eq!(token.kind, TokenKind::DateTime); + let datetime = lexer.datetime.take().unwrap(); + let expected_datetime = Utc + .fix() + .from_local_datetime( + &NaiveDate::from_ymd_opt(2012, 4, 23) + .unwrap() + .and_hms_nano_opt(18, 25, 43, 563_100_000) + .unwrap(), + ) + .earliest() + .unwrap() + .with_timezone(&Utc); + assert_eq!(datetime.0, expected_datetime); +} + +#[test] +fn date_time_timezone_utc() { + let mut lexer = + crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.0000511Z" "#.as_bytes()); + let token = lexer.next_token(); + if let Some(error) = lexer.error { + println!("ERROR: {}", error); + } + assert_eq!(token.kind, TokenKind::DateTime); + let datetime = lexer.datetime.take().unwrap(); + let expected_datetime = Utc + .fix() + .from_local_datetime( + &NaiveDate::from_ymd_opt(2012, 4, 23) + .unwrap() + .and_hms_nano_opt(18, 25, 43, 51_100) + .unwrap(), + ) + .earliest() + .unwrap() + .with_timezone(&Utc); + assert_eq!(datetime.0, expected_datetime); +} + +#[test] +fn date_time_timezone_pacific() { + let mut lexer = + crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.511-08:00" "#.as_bytes()); + let token = lexer.next_token(); + if let Some(error) = lexer.error { + println!("ERROR: {}", error); + } + assert_eq!(token.kind, TokenKind::DateTime); + let datetime = lexer.datetime.take().unwrap(); + let offset = FixedOffset::west_opt(8 * 3600).unwrap(); + let expected_datetime = offset + .from_local_datetime( + &NaiveDate::from_ymd_opt(2012, 4, 23) + .unwrap() + .and_hms_nano_opt(18, 25, 43, 511_000_000) + .unwrap(), + ) + .earliest() + .unwrap() + .with_timezone(&Utc); + assert_eq!(datetime.0, expected_datetime); +} + +#[test] +fn date_time_timezone_pacific_partial() { + let mut lexer = + crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.511+08:30" "#.as_bytes()); + let token = lexer.next_token(); + if let Some(error) = lexer.error { + println!("ERROR: {}", error); + } + assert_eq!(token.kind, TokenKind::DateTime); + let datetime = lexer.datetime.take().unwrap(); + let offset = FixedOffset::east_opt(8 * 3600 + 30 * 60).unwrap(); + let expected_datetime = offset + .from_local_datetime( + &NaiveDate::from_ymd_opt(2012, 4, 23) + .unwrap() + .and_hms_nano_opt(18, 25, 43, 511_000_000) + .unwrap(), + ) + .earliest() + .unwrap() + .with_timezone(&Utc); + assert_eq!(datetime.0, expected_datetime); +} + +#[test] +fn date_time_timezone_utc_nanoseconds() { + let mut lexer = + crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.5110000Z" "#.as_bytes()); + let token = lexer.next_token(); + if let Some(error) = lexer.error { + println!("ERROR: {}", error); + } + assert_eq!(token.kind, TokenKind::DateTime); + let datetime = lexer.datetime.take().unwrap(); + let offset = Utc.fix(); + let expected_datetime = offset + .from_local_datetime( + &NaiveDate::from_ymd_opt(2012, 4, 23) + .unwrap() + .and_hms_nano_opt(18, 25, 43, 511_000_000) + .unwrap(), + ) + .earliest() + .unwrap() + .with_timezone(&Utc); + assert_eq!(datetime.0, expected_datetime); +} + +#[test] +fn date_time_timezone_utc_sub_nanoseconds() { + let mut lexer = + crate::syn::v2::lexer::Lexer::new(r#" d"2012-04-23T18:25:43.0000511Z" "#.as_bytes()); + let token = lexer.next_token(); + if let Some(error) = lexer.error { + println!("ERROR: {}", error); + } + assert_eq!(token.kind, TokenKind::DateTime); + let datetime = lexer.datetime.take().unwrap(); + let offset = Utc.fix(); + let expected_datetime = offset + .from_local_datetime( + &NaiveDate::from_ymd_opt(2012, 4, 23) + .unwrap() + .and_hms_nano_opt(18, 25, 43, 51_100) + .unwrap(), + ) + .earliest() + .unwrap() + .with_timezone(&Utc); + assert_eq!(datetime.0, expected_datetime); +} diff --git a/lib/src/syn/v2/lexer/unicode.rs b/lib/src/syn/v2/lexer/unicode.rs new file mode 100644 index 00000000..3804c79d --- /dev/null +++ b/lib/src/syn/v2/lexer/unicode.rs @@ -0,0 +1,68 @@ +//! Unicode related utilities. + +/// Character constants +pub mod chars { + // Character tabulation + pub const TAB: char = '\u{0009}'; + /// Form feed + pub const FF: char = '\u{000C}'; + + /// Line feed + pub const LF: char = '\u{000A}'; + /// Carriage return + pub const CR: char = '\u{000D}'; + /// Line separator + pub const LS: char = '\u{2020}'; + /// Backspace + pub const BS: char = '\u{0008}'; + /// Paragraph separator + pub const PS: char = '\u{2029}'; + /// Next line + pub const NEL: char = '\u{0085}'; + + /// Line terminators for javascript source code. + pub const JS_LINE_TERIMATORS: [char; 4] = [LF, CR, LS, PS]; +} + +pub mod byte { + /// Character tabulation + pub const TAB: u8 = b'\t'; + /// Line tabulation + pub const VT: u8 = 0xB; + /// Form feed + pub const FF: u8 = 0xC; + + /// Line feed + pub const LF: u8 = 0xA; + /// Carriage return + pub const CR: u8 = 0xD; + + /// Space + pub const SP: u8 = 0x20; +} + +/// A trait extending u8 for adding some extra function. +pub trait U8Ext { + ///. Returns if the u8 is the start of an identifier. + fn is_identifier_start(&self) -> bool; + + /// Returns if the u8 can start an identifier. + fn is_number_start(&self) -> bool; + + /// Returns if the u8 can continue an identifier after the first character. + fn is_identifier_continue(&self) -> bool; +} + +impl U8Ext for u8 { + fn is_identifier_start(&self) -> bool { + matches!(self, b'a'..=b'z' | b'A'..=b'Z' | b'_') + } + + fn is_identifier_continue(&self) -> bool { + matches!(self, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_') + } + + fn is_number_start(&self) -> bool { + self.is_ascii_digit() + } +} diff --git a/lib/src/syn/v2/lexer/uuid.rs b/lib/src/syn/v2/lexer/uuid.rs new file mode 100644 index 00000000..6e2e1410 --- /dev/null +++ b/lib/src/syn/v2/lexer/uuid.rs @@ -0,0 +1,124 @@ +use crate::{ + sql::Uuid, + syn::v2::token::{Token, TokenKind}, +}; + +use super::{Error as LexError, Lexer}; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum Error { + #[error("missing digits")] + MissingDigits, + #[error("digit was not in allowed range")] + InvalidRange, + #[error("expected uuid-strand to end")] + ExpectedStrandEnd, + #[error("missing a uuid seperator")] + MissingSeperator, +} + +impl<'a> Lexer<'a> { + /// Lex a uuid strand with either double or single quotes. + /// + /// Expects the first delimiter to already have been eaten. + pub fn lex_uuid(&mut self, double: bool) -> Token { + match self.lex_uuid_err(double) { + Ok(x) => { + debug_assert!(self.uuid.is_none()); + self.uuid = Some(x); + self.finish_token(TokenKind::Uuid) + } + Err(_) => self.invalid_token(LexError::Uuid(Error::MissingDigits)), + } + } + + /// Lex a uuid strand with either double or single quotes but return an result instead of a + /// token. + /// + /// Expects the first delimiter to already have been eaten. + pub fn lex_uuid_err(&mut self, double: bool) -> Result { + let uuid = self.lex_uuid_err_inner()?; + + let end_char = if double { + b'"' + } else { + b'\'' + }; + // closing strand character + if !self.eat(end_char) { + return Err(Error::ExpectedStrandEnd); + } + + Ok(uuid) + } + + /// Lex a uuid strand without delimiting quotes but return an result instead of a + /// token. + /// + /// Expects the first delimiter to already have been eaten. + pub fn lex_uuid_err_inner(&mut self) -> Result { + let start = self.reader.offset(); + + if !self.lex_hex(8) { + return Err(Error::MissingDigits); + } + + if !self.eat(b'-') { + return Err(Error::MissingSeperator); + } + + if !self.lex_hex(4) { + return Err(Error::MissingDigits); + } + + if !self.eat(b'-') { + return Err(Error::MissingSeperator); + } + + if !self.eat_when(|x| (b'1'..=b'8').contains(&x)) { + if self.reader.peek().map(|x| x.is_ascii_digit()).unwrap_or(false) { + // byte was an ascii digit but not in the valid range. + return Err(Error::InvalidRange); + } + return Err(Error::MissingDigits); + }; + + if !self.lex_hex(3) { + return Err(Error::MissingDigits); + } + + if !self.eat(b'-') { + return Err(Error::MissingSeperator); + } + + if !self.lex_hex(4) { + return Err(Error::MissingDigits); + } + + if !self.eat(b'-') { + return Err(Error::MissingSeperator); + } + + if !self.lex_hex(12) { + return Err(Error::MissingDigits); + } + + let end = self.reader.offset(); + // The lexer ensures that the section of bytes is valid utf8 so this should never panic. + let uuid_str = std::str::from_utf8(&self.reader.full()[start..end]).unwrap(); + // The lexer ensures that the bytes are a valid uuid so this should never panic. + Ok(Uuid(uuid::Uuid::try_from(uuid_str).unwrap())) + } + + /// lexes a given amount of hex characters. returns true if the lexing was successfull, false + /// otherwise. + pub fn lex_hex(&mut self, amount: u8) -> bool { + for _ in 0..amount { + if !self.eat_when(|x| matches!(x,b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F')) { + return false; + } + } + true + } +} diff --git a/lib/src/syn/v2/mod.rs b/lib/src/syn/v2/mod.rs new file mode 100644 index 00000000..7b514d6d --- /dev/null +++ b/lib/src/syn/v2/mod.rs @@ -0,0 +1,123 @@ +#![allow(dead_code)] + +use crate::{ + err::Error, + sql::{Datetime, Duration, Idiom, Query, Range, Subquery, Thing, Value}, +}; + +pub mod lexer; +pub mod parser; +pub mod token; + +#[cfg(test)] +mod test; + +use lexer::Lexer; +use parser::{ParseError, ParseErrorKind, Parser}; + +/// Parses a SurrealQL [`Query`] +/// +/// During query parsing, the total depth of calls to parse values (including arrays, expressions, +/// functions, objects, sub-queries), Javascript values, and geometry collections count against +/// a computation depth limit. If the limit is reached, parsing will return +/// [`Error::ComputationDepthExceeded`], as opposed to spending more time and potentially +/// overflowing the call stack. +/// +/// If you encounter this limit and believe that it should be increased, +/// please [open an issue](https://github.com/surrealdb/surrealdb/issues)! +#[instrument(level = "debug", name = "parser", skip_all, fields(length = input.len()))] +pub fn parse(input: &str) -> Result { + debug!("parsing query, input = {input}"); + let mut parser = Parser::new(input.as_bytes()); + parser.parse_query().map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery) +} + +/// Parses a SurrealQL [`Value`]. +#[instrument(level = "debug", name = "parser", skip_all, fields(length = input.len()))] +pub fn value(input: &str) -> Result { + debug!("parsing value, input = {input}"); + let mut parser = Parser::new(input.as_bytes()); + parser.parse_value_field().map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery) +} + +/// Parses a SurrealQL [`Value`]. +#[instrument(level = "debug", name = "parser", skip_all, fields(length = input.len()))] +pub fn value_legacy_strand(input: &str) -> Result { + debug!("parsing value, input = {input}"); + let mut parser = Parser::new(input.as_bytes()); + parser.allow_legacy_strand(true); + parser.parse_value().map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery) +} + +/// Parses JSON into an inert SurrealQL [`Value`] +#[instrument(level = "debug", name = "parser", skip_all, fields(length = input.len()))] +pub fn json(input: &str) -> Result { + debug!("parsing json, input = {input}"); + let mut parser = Parser::new(input.as_bytes()); + parser.parse_json().map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery) +} + +/// Parses JSON into an inert SurrealQL [`Value`] +#[instrument(level = "debug", name = "parser", skip_all, fields(length = input.len()))] +pub fn json_legacy_strand(input: &str) -> Result { + debug!("parsing json, input = {input}"); + let mut parser = Parser::new(input.as_bytes()); + parser.allow_legacy_strand(true); + parser.parse_json().map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery) +} +/// Parses a SurrealQL Subquery [`Subquery`] +#[instrument(level = "debug", name = "parser", skip_all, fields(length = input.len()))] +pub fn subquery(input: &str) -> Result { + debug!("parsing subquery, input = {input}"); + let mut parser = Parser::new(input.as_bytes()); + parser.parse_full_subquery().map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery) +} + +/// Parses a SurrealQL [`Idiom`] +#[instrument(level = "debug", name = "parser", skip_all, fields(length = input.len()))] +pub fn idiom(input: &str) -> Result { + debug!("parsing idiom, input = {input}"); + let mut parser = Parser::new(input.as_bytes()); + parser.parse_plain_idiom().map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery) +} + +/// Parse a datetime without enclosing delimiters from a string. +pub fn datetime_raw(input: &str) -> Result { + debug!("parsing datetime, input = {input}"); + let mut lexer = Lexer::new(input.as_bytes()); + lexer + .lex_datetime_raw_err() + .map_err(|e| { + ParseError::new( + ParseErrorKind::InvalidToken(lexer::Error::DateTime(e)), + lexer.current_span(), + ) + }) + .map_err(|e| e.render_on(input)) + .map_err(Error::InvalidQuery) +} + +/// Parse a duration from a string. +pub fn duration(input: &str) -> Result { + debug!("parsing duration, input = {input}"); + let mut lexer = Lexer::new(input.as_bytes()); + lexer + .lex_only_duration() + .map_err(|e| ParseError::new(ParseErrorKind::InvalidToken(e), lexer.current_span())) + .map_err(|e| e.render_on(input)) + .map_err(Error::InvalidQuery) +} + +/// Parse a range +pub fn range(input: &str) -> Result { + debug!("parsing range, input = {input}"); + let mut parser = Parser::new(input.as_bytes()); + parser.parse_range().map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery) +} + +/// Parse a record id. +pub fn thing(input: &str) -> Result { + debug!("parsing thing, input = {input}"); + let mut parser = Parser::new(input.as_bytes()); + parser.parse_thing().map_err(|e| e.render_on(input)).map_err(Error::InvalidQuery) +} diff --git a/lib/src/syn/v2/parser/basic.rs b/lib/src/syn/v2/parser/basic.rs new file mode 100644 index 00000000..ebe12880 --- /dev/null +++ b/lib/src/syn/v2/parser/basic.rs @@ -0,0 +1,302 @@ +use crate::{ + sql::{ + language::Language, Datetime, Duration, Ident, Number, Param, Regex, Strand, Table, Uuid, + }, + syn::v2::{ + parser::mac::unexpected, + token::{t, NumberKind, Token, TokenKind}, + }, +}; + +use super::{ParseError, ParseErrorKind, ParseResult, Parser}; + +/// A trait for parsing single tokens with a specific value. +pub trait TokenValue: Sized { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult; +} + +impl TokenValue for Ident { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + match token.kind { + TokenKind::Keyword(_) + | TokenKind::Language(_) + | TokenKind::Algorithm(_) + | TokenKind::Distance(_) => { + let str = parser.lexer.reader.span(token.span); + // Lexer should ensure that the token is valid utf-8 + let str = std::str::from_utf8(str).unwrap().to_owned(); + Ok(Ident(str)) + } + TokenKind::Identifier => { + let str = parser.lexer.string.take().unwrap(); + Ok(Ident(str)) + } + x => { + unexpected!(parser, x, "a identifier"); + } + } + } +} + +impl TokenValue for Table { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + parser.token_value::(token).map(|x| Table(x.0)) + } +} + +impl TokenValue for u64 { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + match token.kind { + TokenKind::Number(NumberKind::Integer) => { + let number = parser.lexer.string.take().unwrap().parse().map_err(|e| { + ParseError::new( + ParseErrorKind::InvalidInteger { + error: e, + }, + token.span, + ) + })?; + Ok(number) + } + x => unexpected!(parser, x, "an integer"), + } + } +} + +impl TokenValue for u32 { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + match token.kind { + TokenKind::Number(NumberKind::Integer) => { + let number = parser.lexer.string.take().unwrap().parse().map_err(|e| { + ParseError::new( + ParseErrorKind::InvalidInteger { + error: e, + }, + token.span, + ) + })?; + Ok(number) + } + x => unexpected!(parser, x, "an integer"), + } + } +} + +impl TokenValue for u16 { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + match token.kind { + TokenKind::Number(NumberKind::Integer) => { + let number = parser.lexer.string.take().unwrap().parse().map_err(|e| { + ParseError::new( + ParseErrorKind::InvalidInteger { + error: e, + }, + token.span, + ) + })?; + Ok(number) + } + x => unexpected!(parser, x, "an integer"), + } + } +} + +impl TokenValue for u8 { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + match token.kind { + TokenKind::Number(NumberKind::Integer) => { + let number = parser.lexer.string.take().unwrap().parse().map_err(|e| { + ParseError::new( + ParseErrorKind::InvalidInteger { + error: e, + }, + token.span, + ) + })?; + Ok(number) + } + x => unexpected!(parser, x, "an integer"), + } + } +} + +impl TokenValue for f32 { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + match token.kind { + TokenKind::Number(NumberKind::NaN) => Ok(f32::NAN), + TokenKind::Number( + NumberKind::Integer + | NumberKind::Float + | NumberKind::Mantissa + | NumberKind::MantissaExponent, + ) => { + let number = parser.lexer.string.take().unwrap().parse().map_err(|e| { + ParseError::new( + ParseErrorKind::InvalidFloat { + error: e, + }, + token.span, + ) + })?; + Ok(number) + } + x => unexpected!(parser, x, "an floating point"), + } + } +} + +impl TokenValue for Language { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + match token.kind { + TokenKind::Language(x) => Ok(x), + // `NO` can both be used as a keyword and as a language. + t!("NO") => Ok(Language::Norwegian), + x => unexpected!(parser, x, "a language"), + } + } +} + +impl TokenValue for Number { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + match token.kind { + TokenKind::Number(NumberKind::NaN) => Ok(Number::Float(f64::NAN)), + TokenKind::Number(NumberKind::Integer) => { + let source = parser.lexer.string.take().unwrap(); + if let Ok(x) = source.parse() { + return Ok(Number::Int(x)); + } + // integer overflowed, fallback to floating point + // As far as I can tell this will never fail for valid integers. + let x = source.parse().map_err(|e| { + ParseError::new( + ParseErrorKind::InvalidFloat { + error: e, + }, + token.span, + ) + })?; + Ok(Number::Float(x)) + } + TokenKind::Number( + NumberKind::Mantissa | NumberKind::MantissaExponent | NumberKind::Float, + ) => { + let source = parser.lexer.string.take().unwrap(); + // As far as I can tell this will never fail for valid integers. + let x = source.parse().map_err(|e| { + ParseError::new( + ParseErrorKind::InvalidFloat { + error: e, + }, + token.span, + ) + })?; + Ok(Number::Float(x)) + } + TokenKind::Number(NumberKind::Decimal) => { + let source = parser.lexer.string.take().unwrap(); + // As far as I can tell this will never fail for valid integers. + let x: rust_decimal::Decimal = source.parse().map_err(|error| { + ParseError::new( + ParseErrorKind::InvalidDecimal { + error, + }, + token.span, + ) + })?; + Ok(Number::Decimal(x)) + } + x => unexpected!(parser, x, "a number"), + } + } +} + +impl TokenValue for Param { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + match token.kind { + TokenKind::Parameter => { + let param = parser.lexer.string.take().unwrap(); + Ok(Param(Ident(param))) + } + x => unexpected!(parser, x, "a parameter"), + } + } +} + +impl TokenValue for Duration { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + let TokenKind::Duration = token.kind else { + unexpected!(parser, token.kind, "a duration") + }; + let duration = parser.lexer.duration.take().expect("token data was already consumed"); + Ok(duration) + } +} + +impl TokenValue for Datetime { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + let TokenKind::DateTime = token.kind else { + unexpected!(parser, token.kind, "a duration") + }; + let datetime = parser.lexer.datetime.take().expect("token data was already consumed"); + Ok(datetime) + } +} + +impl TokenValue for Strand { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + match token.kind { + TokenKind::Strand => { + let strand = parser.lexer.string.take().unwrap(); + Ok(Strand(strand)) + } + x => unexpected!(parser, x, "a strand"), + } + } +} + +impl TokenValue for Uuid { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + let TokenKind::Uuid = token.kind else { + unexpected!(parser, token.kind, "a duration") + }; + Ok(parser.lexer.uuid.take().expect("token data was already consumed")) + } +} + +impl TokenValue for Regex { + fn from_token(parser: &mut Parser<'_>, token: Token) -> ParseResult { + let TokenKind::Regex = token.kind else { + unexpected!(parser, token.kind, "a regex") + }; + Ok(parser.lexer.regex.take().expect("token data was already consumed")) + } +} + +impl Parser<'_> { + /// Parse a token value from the next token in the parser. + pub fn next_token_value(&mut self) -> ParseResult { + let next = self.peek(); + let res = V::from_token(self, next); + if res.is_ok() { + self.pop_peek(); + } + res + } + + /// Parse a token value from the given token. + pub fn token_value(&mut self, token: Token) -> ParseResult { + V::from_token(self, token) + } + + /// Returns if the peeked token can be a identifier. + pub fn peek_can_be_ident(&mut self) -> bool { + matches!( + self.peek_kind(), + TokenKind::Keyword(_) + | TokenKind::Language(_) + | TokenKind::Algorithm(_) + | TokenKind::Distance(_) + | TokenKind::Identifier + ) + } +} diff --git a/lib/src/syn/v2/parser/builtin.rs b/lib/src/syn/v2/parser/builtin.rs new file mode 100644 index 00000000..a5c8ab72 --- /dev/null +++ b/lib/src/syn/v2/parser/builtin.rs @@ -0,0 +1,500 @@ +use super::{ParseResult, Parser}; +use crate::{ + sql::{Constant, Function, Ident, Value}, + syn::v2::{ + parser::{mac::expected, ParseError, ParseErrorKind}, + token::{t, Span}, + }, +}; +use phf::phf_map; +use unicase::UniCase; + +const MAX_LEVENSTHEIN_CUT_OFF: u8 = 4; +const MAX_FUNCTION_NAME_LEN: usize = 33; +const LEVENSTHEIN_ARRAY_SIZE: usize = 1 + MAX_FUNCTION_NAME_LEN + MAX_LEVENSTHEIN_CUT_OFF as usize; + +/// simple function calculating levenshtein distance with a cut-off. +/// +/// levenshtein distance seems fast enough for searching possible functions to suggest as the list +/// isn't that long and the function names aren't that long. Additionally this function also uses a +/// cut off for quick rejection of strings which won't lower the minimum searched distance. +/// +/// Function uses stack allocated array's of size LEVENSTHEIN_ARRAY_SIZE. LEVENSTHEIN_ARRAY_SIZE should the largest size in the haystack + +/// maximum cut_off + 1 for the additional value required during calculation +fn levenshtein(a: &[u8], b: &[u8], cut_off: u8) -> u8 { + debug_assert!(LEVENSTHEIN_ARRAY_SIZE < u8::MAX as usize); + let mut distance_array = [[0u8; LEVENSTHEIN_ARRAY_SIZE]; 2]; + + if a.len().abs_diff(b.len()) > cut_off as usize { + // moving from a to b requires atleast more then cut off insertions or deletions so don't + // even bother. + return cut_off + 1; + } + + // at this point a and b shouldn't be larger then LEVENSTHEIN_ARRAY_SIZE + // because otherwise they would have been rejected by the previous if statement. + assert!(a.len() < LEVENSTHEIN_ARRAY_SIZE); + assert!(b.len() < LEVENSTHEIN_ARRAY_SIZE); + + for i in 1..=a.len() { + distance_array[0][i] = i as u8; + } + + for i in 1..=b.len() { + let current = i & 1; + let prev = current ^ 1; + distance_array[current][0] = i as u8; + + let mut lowest = i as u8; + + for j in 1..=a.len() { + let cost = (a.get(j - 1).map(|x| x.to_ascii_lowercase()) + != b.get(j - 1).map(|x| x.to_ascii_lowercase())) as u8; + + let res = (distance_array[prev][j] + 1) + .min(distance_array[current][j - 1] + 1) + .min(distance_array[prev][j - 1] + cost); + + distance_array[current][j] = res; + lowest = res.min(lowest) + } + + // The lowest value in the next calculated row will always be equal or larger then the + // lowest value of the current row. So we can cut off search early if the score can't equal + // the cut_off. + if lowest > cut_off { + return cut_off + 1; + } + } + distance_array[b.len() & 1][a.len()] +} + +/// The kind of a parsed path. +pub enum PathKind { + Constant(Constant), + Function, +} + +/// A map of path strings for parsing paths. +pub(crate) static PATHS: phf::Map, PathKind> = phf_map! { + UniCase::ascii("array::add") => PathKind::Function, + UniCase::ascii("array::all") => PathKind::Function, + UniCase::ascii("array::any") => PathKind::Function, + UniCase::ascii("array::append") => PathKind::Function, + UniCase::ascii("array::at") => PathKind::Function, + UniCase::ascii("array::boolean_and") => PathKind::Function, + UniCase::ascii("array::boolean_not") => PathKind::Function, + UniCase::ascii("array::boolean_or") => PathKind::Function, + UniCase::ascii("array::boolean_xor") => PathKind::Function, + UniCase::ascii("array::clump") => PathKind::Function, + UniCase::ascii("array::combine") => PathKind::Function, + UniCase::ascii("array::complement") => PathKind::Function, + UniCase::ascii("array::concat") => PathKind::Function, + UniCase::ascii("array::difference") => PathKind::Function, + UniCase::ascii("array::distinct") => PathKind::Function, + UniCase::ascii("array::filter_index") => PathKind::Function, + UniCase::ascii("array::find_index") => PathKind::Function, + UniCase::ascii("array::first") => PathKind::Function, + UniCase::ascii("array::flatten") => PathKind::Function, + UniCase::ascii("array::group") => PathKind::Function, + UniCase::ascii("array::insert") => PathKind::Function, + UniCase::ascii("array::intersect") => PathKind::Function, + UniCase::ascii("array::join") => PathKind::Function, + UniCase::ascii("array::last") => PathKind::Function, + UniCase::ascii("array::len") => PathKind::Function, + UniCase::ascii("array::logical_and") => PathKind::Function, + UniCase::ascii("array::logical_or") => PathKind::Function, + UniCase::ascii("array::logical_xor") => PathKind::Function, + UniCase::ascii("array::matches") => PathKind::Function, + UniCase::ascii("array::max") => PathKind::Function, + UniCase::ascii("array::min") => PathKind::Function, + UniCase::ascii("array::pop") => PathKind::Function, + UniCase::ascii("array::prepend") => PathKind::Function, + UniCase::ascii("array::push") => PathKind::Function, + UniCase::ascii("array::remove") => PathKind::Function, + UniCase::ascii("array::reverse") => PathKind::Function, + UniCase::ascii("array::slice") => PathKind::Function, + UniCase::ascii("array::sort") => PathKind::Function, + UniCase::ascii("array::transpose") => PathKind::Function, + UniCase::ascii("array::union") => PathKind::Function, + UniCase::ascii("array::sort::asc") => PathKind::Function, + UniCase::ascii("array::sort::desc") => PathKind::Function, + // + UniCase::ascii("object::entries") => PathKind::Function, + UniCase::ascii("object::from_entries") => PathKind::Function, + UniCase::ascii("object::keys") => PathKind::Function, + UniCase::ascii("object::len") => PathKind::Function, + UniCase::ascii("object::values") => PathKind::Function, + UniCase::ascii("object::matches") => PathKind::Function, + // + UniCase::ascii("bytes::len") => PathKind::Function, + // + UniCase::ascii("count") => PathKind::Function, + // + UniCase::ascii("crypto::md5") => PathKind::Function, + UniCase::ascii("crypto::sha1") => PathKind::Function, + UniCase::ascii("crypto::sha256") => PathKind::Function, + UniCase::ascii("crypto::sha512") => PathKind::Function, + // + UniCase::ascii("duration::days") => PathKind::Function, + UniCase::ascii("duration::hours") => PathKind::Function, + UniCase::ascii("duration::micros") => PathKind::Function, + UniCase::ascii("duration::millis") => PathKind::Function, + UniCase::ascii("duration::mins") => PathKind::Function, + UniCase::ascii("duration::nanos") => PathKind::Function, + UniCase::ascii("duration::secs") => PathKind::Function, + UniCase::ascii("duration::weeks") => PathKind::Function, + UniCase::ascii("duration::years") => PathKind::Function, + UniCase::ascii("duration::from::days") => PathKind::Function, + UniCase::ascii("duration::from::hours") => PathKind::Function, + UniCase::ascii("duration::from::micros") => PathKind::Function, + UniCase::ascii("duration::from::millis") => PathKind::Function, + UniCase::ascii("duration::from::mins") => PathKind::Function, + UniCase::ascii("duration::from::nanos") => PathKind::Function, + UniCase::ascii("duration::from::secs") => PathKind::Function, + UniCase::ascii("duration::from::weeks") => PathKind::Function, + // + UniCase::ascii("encoding::base64::decode") => PathKind::Function, + UniCase::ascii("encoding::base64::encode") => PathKind::Function, + // + UniCase::ascii("geo::area") => PathKind::Function, + UniCase::ascii("geo::bearing") => PathKind::Function, + UniCase::ascii("geo::centroid") => PathKind::Function, + UniCase::ascii("geo::distance") => PathKind::Function, + UniCase::ascii("geo::hash::decode") => PathKind::Function, + UniCase::ascii("geo::hash::encode") => PathKind::Function, + // + UniCase::ascii("math::abs") => PathKind::Function, + UniCase::ascii("math::bottom") => PathKind::Function, + UniCase::ascii("math::ceil") => PathKind::Function, + UniCase::ascii("math::fixed") => PathKind::Function, + UniCase::ascii("math::floor") => PathKind::Function, + UniCase::ascii("math::interquartile") => PathKind::Function, + UniCase::ascii("math::max") => PathKind::Function, + UniCase::ascii("math::mean") => PathKind::Function, + UniCase::ascii("math::median") => PathKind::Function, + UniCase::ascii("math::midhinge") => PathKind::Function, + UniCase::ascii("math::min") => PathKind::Function, + UniCase::ascii("math::mode") => PathKind::Function, + UniCase::ascii("math::nearestrank") => PathKind::Function, + UniCase::ascii("math::percentile") => PathKind::Function, + UniCase::ascii("math::pow") => PathKind::Function, + UniCase::ascii("math::product") => PathKind::Function, + UniCase::ascii("math::round") => PathKind::Function, + UniCase::ascii("math::spread") => PathKind::Function, + UniCase::ascii("math::sqrt") => PathKind::Function, + UniCase::ascii("math::stddev") => PathKind::Function, + UniCase::ascii("math::sum") => PathKind::Function, + UniCase::ascii("math::top") => PathKind::Function, + UniCase::ascii("math::trimean") => PathKind::Function, + UniCase::ascii("math::variance") => PathKind::Function, + // + UniCase::ascii("meta::id") => PathKind::Function, + UniCase::ascii("meta::table") => PathKind::Function, + UniCase::ascii("meta::tb") => PathKind::Function, + // + UniCase::ascii("not") => PathKind::Function, + // + UniCase::ascii("parse::email::host") => PathKind::Function, + UniCase::ascii("parse::email::user") => PathKind::Function, + UniCase::ascii("parse::url::domain") => PathKind::Function, + UniCase::ascii("parse::url::fragment") => PathKind::Function, + UniCase::ascii("parse::url::host") => PathKind::Function, + UniCase::ascii("parse::url::path") => PathKind::Function, + UniCase::ascii("parse::url::port") => PathKind::Function, + UniCase::ascii("parse::url::query") => PathKind::Function, + UniCase::ascii("parse::url::scheme") => PathKind::Function, + // + UniCase::ascii("rand") => PathKind::Function, + UniCase::ascii("rand::bool") => PathKind::Function, + UniCase::ascii("rand::enum") => PathKind::Function, + UniCase::ascii("rand::float") => PathKind::Function, + UniCase::ascii("rand::guid") => PathKind::Function, + UniCase::ascii("rand::int") => PathKind::Function, + UniCase::ascii("rand::string") => PathKind::Function, + UniCase::ascii("rand::time") => PathKind::Function, + UniCase::ascii("rand::ulid") => PathKind::Function, + UniCase::ascii("rand::uuid::v4") => PathKind::Function, + UniCase::ascii("rand::uuid::v7") => PathKind::Function, + UniCase::ascii("rand::uuid") => PathKind::Function, + // + UniCase::ascii("session::db") => PathKind::Function, + UniCase::ascii("session::id") => PathKind::Function, + UniCase::ascii("session::ip") => PathKind::Function, + UniCase::ascii("session::ns") => PathKind::Function, + UniCase::ascii("session::origin") => PathKind::Function, + UniCase::ascii("session::sc") => PathKind::Function, + UniCase::ascii("session::sd") => PathKind::Function, + UniCase::ascii("session::token") => PathKind::Function, + // + UniCase::ascii("string::concat") => PathKind::Function, + UniCase::ascii("string::contains") => PathKind::Function, + UniCase::ascii("string::endsWith") => PathKind::Function, + UniCase::ascii("string::join") => PathKind::Function, + UniCase::ascii("string::len") => PathKind::Function, + UniCase::ascii("string::lowercase") => PathKind::Function, + UniCase::ascii("string::repeat") => PathKind::Function, + UniCase::ascii("string::replace") => PathKind::Function, + UniCase::ascii("string::reverse") => PathKind::Function, + UniCase::ascii("string::slice") => PathKind::Function, + UniCase::ascii("string::slug") => PathKind::Function, + UniCase::ascii("string::split") => PathKind::Function, + UniCase::ascii("string::startsWith") => PathKind::Function, + UniCase::ascii("string::trim") => PathKind::Function, + UniCase::ascii("string::uppercase") => PathKind::Function, + UniCase::ascii("string::words") => PathKind::Function, + UniCase::ascii("string::distance::hamming") => PathKind::Function, + UniCase::ascii("string::distance::levenshtein") => PathKind::Function, + UniCase::ascii("string::is::alphanum") => PathKind::Function, + UniCase::ascii("string::is::alpha") => PathKind::Function, + UniCase::ascii("string::is::ascii") => PathKind::Function, + UniCase::ascii("string::is::datetime") => PathKind::Function, + UniCase::ascii("string::is::domain") => PathKind::Function, + UniCase::ascii("string::is::email") => PathKind::Function, + UniCase::ascii("string::is::hexadecimal") => PathKind::Function, + UniCase::ascii("string::is::latitude") => PathKind::Function, + UniCase::ascii("string::is::longitude") => PathKind::Function, + UniCase::ascii("string::is::numeric") => PathKind::Function, + UniCase::ascii("string::is::semver") => PathKind::Function, + UniCase::ascii("string::is::url") => PathKind::Function, + UniCase::ascii("string::is::uuid") => PathKind::Function, + UniCase::ascii("string::semver::compare") => PathKind::Function, + UniCase::ascii("string::semver::major") => PathKind::Function, + UniCase::ascii("string::semver::minor") => PathKind::Function, + UniCase::ascii("string::semver::patch") => PathKind::Function, + UniCase::ascii("string::semver::inc::major") => PathKind::Function, + UniCase::ascii("string::semver::inc::minor") => PathKind::Function, + UniCase::ascii("string::semver::inc::patch") => PathKind::Function, + UniCase::ascii("string::semver::set::major") => PathKind::Function, + UniCase::ascii("string::semver::set::minor") => PathKind::Function, + UniCase::ascii("string::semver::set::patch") => PathKind::Function, + UniCase::ascii("string::similarity::fuzzy") => PathKind::Function, + UniCase::ascii("string::similarity::jaro") => PathKind::Function, + UniCase::ascii("string::similarity::smithwaterman") => PathKind::Function, + UniCase::ascii("string::matches") => PathKind::Function, + // + UniCase::ascii("time::ceil") => PathKind::Function, + UniCase::ascii("time::day") => PathKind::Function, + UniCase::ascii("time::floor") => PathKind::Function, + UniCase::ascii("time::format") => PathKind::Function, + UniCase::ascii("time::group") => PathKind::Function, + UniCase::ascii("time::hour") => PathKind::Function, + UniCase::ascii("time::max") => PathKind::Function, + UniCase::ascii("time::min") => PathKind::Function, + UniCase::ascii("time::minute") => PathKind::Function, + UniCase::ascii("time::month") => PathKind::Function, + UniCase::ascii("time::nano") => PathKind::Function, + UniCase::ascii("time::micros") => PathKind::Function, + UniCase::ascii("time::millis") => PathKind::Function, + UniCase::ascii("time::now") => PathKind::Function, + UniCase::ascii("time::round") => PathKind::Function, + UniCase::ascii("time::second") => PathKind::Function, + UniCase::ascii("time::timezone") => PathKind::Function, + UniCase::ascii("time::unix") => PathKind::Function, + UniCase::ascii("time::wday") => PathKind::Function, + UniCase::ascii("time::week") => PathKind::Function, + UniCase::ascii("time::yday") => PathKind::Function, + UniCase::ascii("time::year") => PathKind::Function, + UniCase::ascii("time::from::nanos") => PathKind::Function, + UniCase::ascii("time::from::micros") => PathKind::Function, + UniCase::ascii("time::from::millis") => PathKind::Function, + UniCase::ascii("time::from::secs") => PathKind::Function, + UniCase::ascii("time::from::unix") => PathKind::Function, + // + UniCase::ascii("type::bool") => PathKind::Function, + UniCase::ascii("type::datetime") => PathKind::Function, + UniCase::ascii("type::decimal") => PathKind::Function, + UniCase::ascii("type::duration") => PathKind::Function, + UniCase::ascii("type::float") => PathKind::Function, + UniCase::ascii("type::int") => PathKind::Function, + UniCase::ascii("type::number") => PathKind::Function, + UniCase::ascii("type::point") => PathKind::Function, + UniCase::ascii("type::string") => PathKind::Function, + UniCase::ascii("type::table") => PathKind::Function, + UniCase::ascii("type::thing") => PathKind::Function, + UniCase::ascii("type::is::array") => PathKind::Function, + UniCase::ascii("type::is::bool") => PathKind::Function, + UniCase::ascii("type::is::bytes") => PathKind::Function, + UniCase::ascii("type::is::collection") => PathKind::Function, + UniCase::ascii("type::is::datetime") => PathKind::Function, + UniCase::ascii("type::is::decimal") => PathKind::Function, + UniCase::ascii("type::is::duration") => PathKind::Function, + UniCase::ascii("type::is::float") => PathKind::Function, + UniCase::ascii("type::is::geometry") => PathKind::Function, + UniCase::ascii("type::is::int") => PathKind::Function, + UniCase::ascii("type::is::line") => PathKind::Function, + UniCase::ascii("type::is::null") => PathKind::Function, + UniCase::ascii("type::is::none") => PathKind::Function, + UniCase::ascii("type::is::multiline") => PathKind::Function, + UniCase::ascii("type::is::multipoint") => PathKind::Function, + UniCase::ascii("type::is::multipolygon") => PathKind::Function, + UniCase::ascii("type::is::number") => PathKind::Function, + UniCase::ascii("type::is::object") => PathKind::Function, + UniCase::ascii("type::is::point") => PathKind::Function, + UniCase::ascii("type::is::polygon") => PathKind::Function, + UniCase::ascii("type::is::record") => PathKind::Function, + UniCase::ascii("type::is::string") => PathKind::Function, + UniCase::ascii("type::is::uuid") => PathKind::Function, + // + UniCase::ascii("vector::add") => PathKind::Function, + UniCase::ascii("vector::angle") => PathKind::Function, + UniCase::ascii("vector::cross") => PathKind::Function, + UniCase::ascii("vector::dot") => PathKind::Function, + UniCase::ascii("vector::divide") => PathKind::Function, + UniCase::ascii("vector::magnitude") => PathKind::Function, + UniCase::ascii("vector::multiply") => PathKind::Function, + UniCase::ascii("vector::normalize") => PathKind::Function, + UniCase::ascii("vector::project") => PathKind::Function, + UniCase::ascii("vector::subtract") => PathKind::Function, + UniCase::ascii("vector::distance::chebyshev") => PathKind::Function, + UniCase::ascii("vector::distance::euclidean") => PathKind::Function, + UniCase::ascii("vector::distance::hamming") => PathKind::Function, + UniCase::ascii("vector::distance::mahalanobis") => PathKind::Function, + UniCase::ascii("vector::distance::manhattan") => PathKind::Function, + UniCase::ascii("vector::distance::minkowski") => PathKind::Function, + UniCase::ascii("vector::similarity::cosine") => PathKind::Function, + UniCase::ascii("vector::similarity::jaccard") => PathKind::Function, + UniCase::ascii("vector::similarity::pearson") => PathKind::Function, + UniCase::ascii("vector::similarity::spearman") => PathKind::Function, + // + UniCase::ascii("crypto::argon2::compare") => PathKind::Function, + UniCase::ascii("crypto::argon2::generate") => PathKind::Function, + UniCase::ascii("crypto::bcrypt::compare") => PathKind::Function, + UniCase::ascii("crypto::bcrypt::generate") => PathKind::Function, + UniCase::ascii("crypto::pbkdf2::compare") => PathKind::Function, + UniCase::ascii("crypto::pbkdf2::generate") => PathKind::Function, + UniCase::ascii("crypto::scrypt::compare") => PathKind::Function, + UniCase::ascii("crypto::scrypt::generate") => PathKind::Function, + // + UniCase::ascii("http::head") => PathKind::Function, + UniCase::ascii("http::get") => PathKind::Function, + UniCase::ascii("http::put") => PathKind::Function, + UniCase::ascii("http::post") => PathKind::Function, + UniCase::ascii("http::patch") => PathKind::Function, + UniCase::ascii("http::delete") => PathKind::Function, + // + UniCase::ascii("search::analyze") => PathKind::Function, + UniCase::ascii("search::score") => PathKind::Function, + UniCase::ascii("search::highlight") => PathKind::Function, + UniCase::ascii("search::offsets") => PathKind::Function, + // + UniCase::ascii("sleep") => PathKind::Function, + // + UniCase::ascii("type::field") => PathKind::Function, + UniCase::ascii("type::fields") => PathKind::Function, + + // constants + UniCase::ascii("math::E") => PathKind::Constant(Constant::MathE), + UniCase::ascii("math::FRAC_1_PI") => PathKind::Constant(Constant::MathFrac1Pi), + UniCase::ascii("math::FRAC_1_SQRT_2") => PathKind::Constant(Constant::MathFrac1Sqrt2), + UniCase::ascii("math::FRAC_2_PI") => PathKind::Constant(Constant::MathFrac2Pi), + UniCase::ascii("math::FRAC_2_SQRT_PI") => PathKind::Constant(Constant::MathFrac2SqrtPi), + UniCase::ascii("math::FRAC_PI_2") => PathKind::Constant(Constant::MathFracPi2), + UniCase::ascii("math::FRAC_PI_3") => PathKind::Constant(Constant::MathFracPi3), + UniCase::ascii("math::FRAC_PI_4") => PathKind::Constant(Constant::MathFracPi4), + UniCase::ascii("math::FRAC_PI_6") => PathKind::Constant(Constant::MathFracPi6), + UniCase::ascii("math::FRAC_PI_8") => PathKind::Constant(Constant::MathFracPi8), + UniCase::ascii("math::INF") => PathKind::Constant(Constant::MathInf), + UniCase::ascii("math::LN_10") => PathKind::Constant(Constant::MathLn10), + UniCase::ascii("math::LN_2") => PathKind::Constant(Constant::MathLn2), + UniCase::ascii("math::LOG10_2") => PathKind::Constant(Constant::MathLog102), + UniCase::ascii("math::LOG10_E") => PathKind::Constant(Constant::MathLog10E), + UniCase::ascii("math::LOG2_10") => PathKind::Constant(Constant::MathLog210), + UniCase::ascii("math::LOG2_E") => PathKind::Constant(Constant::MathLog2E), + UniCase::ascii("math::PI") => PathKind::Constant(Constant::MathPi), + UniCase::ascii("math::SQRT_2") => PathKind::Constant(Constant::MathSqrt2), + UniCase::ascii("math::TAU") => PathKind::Constant(Constant::MathTau), +}; + +impl Parser<'_> { + /// Parse a builtin path. + pub fn parse_builtin(&mut self, start: Span) -> ParseResult { + let mut last_span = start; + while self.eat(t!("::")) { + self.next_token_value::()?; + last_span = self.last_span(); + } + + let span = start.covers(last_span); + let slice = self.lexer.reader.span(span); + + // parser implementations guarentess that the slice is a valid utf8 string. + let str = std::str::from_utf8(slice).unwrap(); + + match PATHS.get_entry(&UniCase::ascii(str)) { + Some((_, PathKind::Constant(x))) => Ok(Value::Constant(x.clone())), + Some((k, PathKind::Function)) => self + .parse_builtin_function(k.into_inner().to_owned()) + .map(|x| Value::Function(Box::new(x))), + None => { + // Generate an suggestion. + // don't search further if the levenshtein distance is further then 10. + let mut cut_off = MAX_LEVENSTHEIN_CUT_OFF; + + let possibly = PATHS + .keys() + .copied() + .min_by_key(|x| { + let res = levenshtein(str.as_bytes(), x.as_bytes(), cut_off); + cut_off = res.min(cut_off); + res + }) + .map(|x| x.into_inner()); + + if cut_off == MAX_LEVENSTHEIN_CUT_OFF { + // couldn't find a value which lowered the cut off, + // any suggestion probably will be nonsensical so don't give any. + return Err(ParseError::new( + ParseErrorKind::InvalidPath { + possibly: None, + }, + span, + )); + } + + Err(ParseError::new( + ParseErrorKind::InvalidPath { + possibly, + }, + span, + )) + } + } + } + + /// Parse a call to a builtin function. + pub fn parse_builtin_function(&mut self, name: String) -> ParseResult { + let start = expected!(self, t!("(")).span; + let mut args = Vec::new(); + loop { + if self.eat(t!(")")) { + break; + } + + args.push(self.parse_value_field()?); + + if !self.eat(t!(",")) { + self.expect_closing_delimiter(t!(")"), start)?; + break; + } + } + Ok(Function::Normal(name, args)) + } +} + +#[cfg(test)] +mod test { + use super::{MAX_FUNCTION_NAME_LEN, PATHS}; + + #[test] + fn function_name_constant_up_to_date() { + let max = PATHS.keys().map(|x| x.len()).max().unwrap(); + // These two need to be the same but the constant needs to manually be updated if PATHS + // ever changes so that these two values are not the same. + assert_eq!( + MAX_FUNCTION_NAME_LEN, max, + "the constant MAX_FUNCTION_NAME_LEN should be {} but is {}, please update the constant", + max, MAX_FUNCTION_NAME_LEN + ); + } +} diff --git a/lib/src/syn/v2/parser/error.rs b/lib/src/syn/v2/parser/error.rs new file mode 100644 index 00000000..7080031a --- /dev/null +++ b/lib/src/syn/v2/parser/error.rs @@ -0,0 +1,230 @@ +use crate::syn::{ + common::Location, + error::{RenderedError, Snippet}, + v2::{ + lexer::Error as LexError, + token::{Span, TokenKind}, + }, +}; +use std::{ + fmt::Write, + num::{ParseFloatError, ParseIntError}, +}; + +#[derive(Debug)] +pub enum IntErrorKind { + FloatToInt, + DecimalToInt, + IntegerOverflow, +} + +#[derive(Debug)] +pub enum ParseErrorKind { + /// The parser encountered an unexpected token. + Unexpected { + found: TokenKind, + expected: &'static str, + }, + UnexpectedExplain { + found: TokenKind, + expected: &'static str, + explain: &'static str, + }, + /// The parser encountered an unexpected token. + UnexpectedEof { + expected: &'static str, + }, + /// An error for an unclosed delimiter with a span of the token which should be closed. + UnclosedDelimiter { + expected: TokenKind, + should_close: Span, + }, + /// An error for parsing an integer + InvalidInteger { + error: ParseIntError, + }, + /// An error for parsing an float + InvalidFloat { + error: ParseFloatError, + }, + /// An error for parsing an decimal. + InvalidDecimal { + error: rust_decimal::Error, + }, + DisallowedStatement, + /// The parser encountered an token which could not be lexed correctly. + InvalidToken(LexError), + /// Matched a path which was invalid. + InvalidPath { + possibly: Option<&'static str>, + }, + NoWhitespace, + /// A path in the parser which was not yet finished. + /// Should eventually be removed. + Todo, +} + +/// A parsing error. +#[derive(Debug)] +pub struct ParseError { + pub kind: ParseErrorKind, + pub at: Span, +} + +impl ParseError { + /// Create a new parse error. + pub fn new(kind: ParseErrorKind, at: Span) -> Self { + ParseError { + kind, + at, + } + } + + /// Create a rendered error from the string this error was generated from. + pub fn render_on(&self, source: &str) -> RenderedError { + match &self.kind { + ParseErrorKind::Unexpected { + found, + expected, + } => { + let text = format!("Unexpected token '{}' expected {}", found.as_str(), expected); + let locations = Location::range_of_span(source, self.at); + let snippet = Snippet::from_source_location_range(source, locations, None); + RenderedError { + text, + snippets: vec![snippet], + } + } + ParseErrorKind::UnexpectedExplain { + found, + expected, + explain, + } => { + let text = format!("Unexpected token '{}' expected {}", found.as_str(), expected); + let locations = Location::range_of_span(source, self.at); + let snippet = Snippet::from_source_location_range(source, locations, Some(explain)); + RenderedError { + text, + snippets: vec![snippet], + } + } + ParseErrorKind::UnexpectedEof { + expected, + } => { + let text = format!("Query ended early, expected {}", expected); + let locations = Location::range_of_span(source, self.at); + let snippet = Snippet::from_source_location_range(source, locations, None); + RenderedError { + text, + snippets: vec![snippet], + } + } + ParseErrorKind::UnclosedDelimiter { + expected, + should_close, + } => { + let text = format!("Expected closing delimiter {}", expected.as_str()); + let locations = Location::range_of_span(source, self.at); + let snippet = Snippet::from_source_location_range(source, locations, None); + let locations = Location::range_of_span(source, *should_close); + let close_snippet = Snippet::from_source_location_range( + source, + locations, + Some("Expected this delimiter to close"), + ); + RenderedError { + text, + snippets: vec![snippet, close_snippet], + } + } + ParseErrorKind::DisallowedStatement => { + let text = "This statement is not allowed in this location".to_owned(); + let locations = Location::range_of_span(source, self.at); + let snippet = Snippet::from_source_location_range(source, locations, None); + RenderedError { + text, + snippets: vec![snippet], + } + } + ParseErrorKind::InvalidToken(e) => { + let text = e.to_string(); + let locations = Location::range_of_span(source, self.at); + let snippet = Snippet::from_source_location_range(source, locations, None); + RenderedError { + text, + snippets: vec![snippet], + } + } + ParseErrorKind::Todo => { + let text = "Parser hit not yet implemented path".to_string(); + let locations = Location::range_of_span(source, self.at); + let snippet = Snippet::from_source_location_range(source, locations, None); + RenderedError { + text, + snippets: vec![snippet], + } + } + ParseErrorKind::InvalidPath { + possibly, + } => { + let mut text = "Invalid path".to_owned(); + if let Some(p) = possibly { + // writing into a string never causes an error. + write!(text, ", did you maybe mean `{}`", p).unwrap(); + } + let locations = Location::range_of_span(source, self.at); + let snippet = Snippet::from_source_location_range( + source, + locations, + Some("This path does not exist."), + ); + RenderedError { + text, + snippets: vec![snippet], + } + } + ParseErrorKind::InvalidInteger { + ref error, + } => { + let text = format!("failed to parse integer, {error}"); + let locations = Location::range_of_span(source, self.at); + let snippet = Snippet::from_source_location_range(source, locations, None); + RenderedError { + text: text.to_string(), + snippets: vec![snippet], + } + } + ParseErrorKind::InvalidFloat { + ref error, + } => { + let text = format!("failed to parse floating point, {error}"); + let locations = Location::range_of_span(source, self.at); + let snippet = Snippet::from_source_location_range(source, locations, None); + RenderedError { + text: text.to_string(), + snippets: vec![snippet], + } + } + ParseErrorKind::InvalidDecimal { + ref error, + } => { + let text = format!("failed to parse decimal number, {error}"); + let locations = Location::range_of_span(source, self.at); + let snippet = Snippet::from_source_location_range(source, locations, None); + RenderedError { + text: text.to_string(), + snippets: vec![snippet], + } + } + ParseErrorKind::NoWhitespace => { + let text = "Whitespace is dissallowed in this position"; + let locations = Location::range_of_span(source, self.at); + let snippet = Snippet::from_source_location_range(source, locations, None); + RenderedError { + text: text.to_string(), + snippets: vec![snippet], + } + } + } + } +} diff --git a/lib/src/syn/v2/parser/expression.rs b/lib/src/syn/v2/parser/expression.rs new file mode 100644 index 00000000..bffbdec9 --- /dev/null +++ b/lib/src/syn/v2/parser/expression.rs @@ -0,0 +1,400 @@ +//! This module defines the pratt parser for operators. +use super::mac::unexpected; +use crate::sql::{value::TryNeg, Cast, Expression, Number, Operator, Value}; +use crate::syn::v2::{ + parser::{mac::expected, ParseResult, Parser}, + token::{t, NumberKind, TokenKind}, +}; +use std::cmp::Ordering; + +impl Parser<'_> { + /// Parsers a generic value. + /// + /// A generic loose ident like `foo` in for example `foo.bar` can be two different values + /// depending on context: a table or a field the current document. This function parses loose + /// idents as a table, see [`parse_value_field`] for parsing loose idents as fields + pub fn parse_value(&mut self) -> ParseResult { + let old = self.table_as_field; + self.table_as_field = false; + let res = self.pratt_parse_expr(0); + self.table_as_field = old; + res + } + + /// Parsers a generic value. + /// + /// A generic loose ident like `foo` in for example `foo.bar` can be two different values + /// depending on context: a table or a field the current document. This function parses loose + /// idents as a field, see [`parse_value`] for parsing loose idents as table + pub fn parse_value_field(&mut self) -> ParseResult { + let old = self.table_as_field; + self.table_as_field = true; + let res = self.pratt_parse_expr(0); + self.table_as_field = old; + res + } + + /// Parse a assigner operator. + pub fn parse_assigner(&mut self) -> ParseResult { + match self.next().kind { + t!("=") => Ok(Operator::Equal), + t!("+=") => Ok(Operator::Inc), + t!("-=") => Ok(Operator::Dec), + t!("+?=") => Ok(Operator::Ext), + x => unexpected!(self, x, "an assign operator"), + } + } + + /// Returns the binding power of an infix operator. + /// + /// Binding power is the opposite of precendence: a higher binding power means that a token is + /// more like to operate directly on it's neighbours. Example `*` has a higher binding power + /// than `-` resulting in 1 - 2 * 3 being parsed as 1 - (2 * 3). + /// + /// This returns two numbers: the binding power of the left neighbour and the right neighbour. + /// If the left number is lower then the right it is left associative: i.e. '1 op 2 op 3' will + /// be parsed as '(1 op 2) op 3'. If the right number is lower the operator is right + /// associative: i.e. '1 op 2 op 3' will be parsed as '1 op (2 op 3)'. For example: `+=` is + /// right associative so `a += b += 3` will be parsed as `a += (b += 3)` while `+` is left + /// associative and will be parsed as `(a + b) + c`. + fn infix_binding_power(token: TokenKind) -> Option<(u8, u8)> { + // TODO: Look at ordering of operators. + match token { + // assigment operators have the lowes binding power. + t!("+=") | t!("-=") | t!("+?=") => Some((2, 1)), + + t!("||") | t!("OR") => Some((3, 4)), + t!("&&") | t!("AND") => Some((5, 6)), + + // Equality operators have same binding power. + t!("=") + | t!("IS") + | t!("==") + | t!("!=") + | t!("*=") + | t!("?=") + | t!("~") + | t!("!~") + | t!("*~") + | t!("?~") + | t!("@") => Some((7, 8)), + + t!("<") + | t!("<=") + | t!(">") + | t!(">=") + | t!("∋") + | t!("CONTAINS") + | t!("∌") + | t!("CONTAINSNOT") + | t!("∈") + | t!("INSIDE") + | t!("∉") + | t!("NOTINSIDE") + | t!("⊇") + | t!("CONTAINSALL") + | t!("⊃") + | t!("CONTAINSANY") + | t!("⊅") + | t!("CONTAINSNONE") + | t!("⊆") + | t!("ALLINSIDE") + | t!("⊂") + | t!("ANYINSIDE") + | t!("⊄") + | t!("NONEINSIDE") + | t!("OUTSIDE") + | t!("INTERSECTS") + | t!("NOT") + | t!("IN") + | t!("KNN") => Some((9, 10)), + + t!("+") | t!("-") => Some((11, 12)), + t!("*") | t!("×") | t!("/") | t!("÷") => Some((13, 14)), + t!("**") => Some((15, 16)), + t!("?:") | t!("??") => Some((17, 18)), + _ => None, + } + } + + fn prefix_binding_power(&mut self, token: TokenKind) -> Option<((), u8)> { + match token { + t!("!") | t!("+") | t!("-") => Some(((), 19)), + t!("<") if self.peek_token_at(1).kind != t!("FUTURE") => Some(((), 20)), + _ => None, + } + } + + fn parse_prefix_op(&mut self, min_bp: u8) -> ParseResult { + const I64_ABS_MAX: u64 = 9223372036854775808; + + let token = self.next(); + let operator = match token.kind { + t!("+") => Operator::Add, + t!("-") => Operator::Neg, + t!("!") => Operator::Not, + t!("<") => { + let kind = self.parse_kind(token.span)?; + let value = self.pratt_parse_expr(min_bp)?; + let cast = Cast(kind, value); + return Ok(Value::Cast(Box::new(cast))); + } + // should be unreachable as we previously check if the token was a prefix op. + _ => unreachable!(), + }; + + // HACK: The way we handle numbers in the parser has one downside: We can't parse i64::MIN + // directly. + // The tokens [`-`, `1232`] are parsed independently where - is parsed as a unary operator then 1232 + // as a positive i64 integer. This results in a problem when 9223372036854775808 is the + // positive integer. This is larger then i64::MAX so the parser fallsback to parsing a + // floating point number. However -9223372036854775808 does fit in a i64 but the parser is, + // when parsing the number, unaware that the number will be negative. + // To handle this correctly we parse negation operator followed by an integer here so we can + // make sure this specific case is handled correctly. + if let Operator::Neg = operator { + // parse -12301230 immediately as a negative number, + if let TokenKind::Number(NumberKind::Integer) = self.peek_kind() { + let token = self.next(); + let number = self.token_value::(token)?; + let number = match number.cmp(&I64_ABS_MAX) { + Ordering::Less => Number::Int(-(number as i64)), + Ordering::Equal => Number::Int(i64::MIN), + Ordering::Greater => self.token_value::(token)?.try_neg().unwrap(), + }; + return Ok(Value::Number(number)); + } + } + + let v = self.pratt_parse_expr(min_bp)?; + + // HACK: For compatiblity with the old parser apply + and - operator immediately if the + // left value is a number. + if let Value::Number(number) = v { + if let Operator::Neg = operator { + // this can only panic if `number` is i64::MIN which currently can't be parsed. + return Ok(Value::Number(number.try_neg().unwrap())); + } + + if let Operator::Add = operator { + // doesn't do anything. + return Ok(Value::Number(number)); + } + Ok(Value::Expression(Box::new(Expression::Unary { + o: operator, + v: Value::Number(number), + }))) + } else { + Ok(Value::Expression(Box::new(Expression::Unary { + o: operator, + v, + }))) + } + } + + fn parse_infix_op(&mut self, min_bp: u8, lhs: Value) -> ParseResult { + let token = self.next(); + let operator = match token.kind { + // TODO: change operator name? + t!("||") | t!("OR") => Operator::Or, + t!("&&") | t!("AND") => Operator::And, + t!("?:") => Operator::Tco, + t!("??") => Operator::Nco, + t!("==") => Operator::Exact, + t!("!=") => Operator::NotEqual, + t!("*=") => Operator::AllEqual, + t!("?=") => Operator::AnyEqual, + t!("=") => Operator::Equal, + t!("!~") => Operator::NotLike, + t!("*~") => Operator::AllLike, + t!("?~") => Operator::AnyLike, + t!("~") => Operator::Like, + t!("@") => { + let reference = (!self.eat(t!("@"))) + .then(|| { + let number = self.next_token_value()?; + expected!(self, t!("@")); + Ok(number) + }) + .transpose()?; + Operator::Matches(reference) + } + t!("<=") => Operator::LessThanOrEqual, + t!("<") => Operator::LessThan, + t!(">=") => Operator::MoreThanOrEqual, + t!(">") => Operator::MoreThan, + t!("**") => Operator::Pow, + t!("+") => Operator::Add, + t!("-") => Operator::Sub, + t!("*") | t!("×") => Operator::Mul, + t!("/") | t!("÷") => Operator::Div, + t!("∋") | t!("CONTAINS") => Operator::Contain, + t!("∌") | t!("CONTAINSNOT") => Operator::NotContain, + t!("∈") | t!("INSIDE") => Operator::Inside, + t!("∉") | t!("NOTINSIDE") => Operator::NotInside, + t!("⊇") | t!("CONTAINSALL") => Operator::ContainAll, + t!("⊃") | t!("CONTAINSANY") => Operator::ContainAny, + t!("⊅") | t!("CONTAINSNONE") => Operator::ContainNone, + t!("⊆") | t!("ALLINSIDE") => Operator::AllInside, + t!("⊂") | t!("ANYINSIDE") => Operator::AnyInside, + t!("⊄") | t!("NONEINSIDE") => Operator::NoneInside, + t!("IS") => { + if self.eat(t!("NOT")) { + Operator::NotEqual + } else { + Operator::Equal + } + } + t!("OUTSIDE") => Operator::Outside, + t!("INTERSECTS") => Operator::Intersects, + t!("NOT") => { + expected!(self, t!("IN")); + Operator::NotInside + } + t!("IN") => Operator::Inside, + t!("KNN") => { + let start = expected!(self, t!("<")).span; + let amount = self.next_token_value()?; + let dist = self.eat(t!(",")).then(|| self.parse_distance()).transpose()?; + self.expect_closing_delimiter(t!(">"), start)?; + Operator::Knn(amount, dist) + } + + // should be unreachable as we previously check if the token was a prefix op. + _ => unreachable!(), + }; + let rhs = self.pratt_parse_expr(min_bp)?; + Ok(Value::Expression(Box::new(Expression::Binary { + l: lhs, + o: operator, + r: rhs, + }))) + } + + /// The pratt parsing loop. + /// Parses expression according to binding power. + fn pratt_parse_expr(&mut self, min_bp: u8) -> ParseResult { + let peek = self.peek(); + let mut lhs = if let Some(((), r_bp)) = self.prefix_binding_power(peek.kind) { + self.parse_prefix_op(r_bp)? + } else { + self.parse_idiom_expression()? + }; + + loop { + let token = self.peek(); + let Some((l_bp, r_bp)) = Self::infix_binding_power(token.kind) else { + break; + }; + + if l_bp < min_bp { + break; + } + + lhs = self.parse_infix_op(r_bp, lhs)?; + } + + Ok(lhs) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::sql::{Block, Future, Kind}; + use crate::syn::Parse; + + #[test] + fn cast_int() { + let sql = "1.2345"; + let out = Value::parse(sql); + assert_eq!(" 1.2345f", format!("{}", out)); + assert_eq!(out, Value::from(Cast(Kind::Int, 1.2345.into()))); + } + + #[test] + fn cast_string() { + let sql = "1.2345"; + let out = Value::parse(sql); + assert_eq!(" 1.2345f", format!("{}", out)); + assert_eq!(out, Value::from(Cast(Kind::String, 1.2345.into()))); + } + + #[test] + fn expression_statement() { + let sql = "true AND false"; + let out = Value::parse(sql); + assert_eq!("true AND false", format!("{}", out)); + } + + #[test] + fn expression_left_opened() { + let sql = "3 * 3 * 3 = 27"; + let out = Value::parse(sql); + assert_eq!("3 * 3 * 3 = 27", format!("{}", out)); + } + + #[test] + fn expression_left_closed() { + let sql = "(3 * 3 * 3) = 27"; + let out = Value::parse(sql); + assert_eq!("(3 * 3 * 3) = 27", format!("{}", out)); + } + + #[test] + fn expression_right_opened() { + let sql = "27 = 3 * 3 * 3"; + let out = Value::parse(sql); + assert_eq!("27 = 3 * 3 * 3", format!("{}", out)); + } + + #[test] + fn expression_right_closed() { + let sql = "27 = (3 * 3 * 3)"; + let out = Value::parse(sql); + assert_eq!("27 = (3 * 3 * 3)", format!("{}", out)); + } + + #[test] + fn expression_both_opened() { + let sql = "3 * 3 * 3 = 3 * 3 * 3"; + let out = Value::parse(sql); + assert_eq!("3 * 3 * 3 = 3 * 3 * 3", format!("{}", out)); + } + + #[test] + fn expression_both_closed() { + let sql = "(3 * 3 * 3) = (3 * 3 * 3)"; + let out = Value::parse(sql); + assert_eq!("(3 * 3 * 3) = (3 * 3 * 3)", format!("{}", out)); + } + + #[test] + fn expression_unary() { + let sql = "-a"; + let out = Value::parse(sql); + assert_eq!(sql, format!("{}", out)); + } + + #[test] + fn expression_with_unary() { + let sql = "-(5) + 5"; + let out = Value::parse(sql); + assert_eq!(sql, format!("{}", out)); + } + + #[test] + fn parse_expression() { + let sql = " { 5 + 10 }"; + let out = Value::parse(sql); + assert_eq!(" { 5 + 10 }", format!("{}", out)); + assert_eq!( + out, + Value::from(Future(Block::from(Value::from(Expression::Binary { + l: Value::Number(Number::Int(5)), + o: Operator::Add, + r: Value::Number(Number::Int(10)) + })))) + ); + } +} diff --git a/lib/src/syn/v2/parser/function.rs b/lib/src/syn/v2/parser/function.rs new file mode 100644 index 00000000..98277108 --- /dev/null +++ b/lib/src/syn/v2/parser/function.rs @@ -0,0 +1,302 @@ +use crate::{ + sql::{Function, Ident, Model}, + syn::v2::{ + parser::mac::{expected, unexpected}, + token::{t, NumberKind, TokenKind}, + }, +}; + +use super::{ParseResult, Parser}; + +impl Parser<'_> { + /// Parse a custom function function call + /// + /// Expects `fn` to already be called. + pub fn parse_custom_function(&mut self) -> ParseResult { + expected!(self, t!("::")); + let mut name = self.next_token_value::()?.0; + while self.eat(t!("::")) { + name.push_str("::"); + name.push_str(&self.next_token_value::()?.0) + } + let start = expected!(self, t!("(")).span; + let mut args = Vec::new(); + loop { + if self.eat(t!(")")) { + break; + } + + args.push(self.parse_value_field()?); + + if !self.eat(t!(",")) { + self.expect_closing_delimiter(t!(")"), start)?; + break; + } + } + + Ok(Function::Custom(name, args)) + } + + /// Parse a model invocation + /// + /// Expects `ml` to already be called. + pub fn parse_model(&mut self) -> ParseResult { + expected!(self, t!("::")); + let mut name = self.next_token_value::()?.0; + while self.eat(t!("::")) { + name.push_str("::"); + name.push_str(&self.next_token_value::()?.0) + } + let start = expected!(self, t!("<")).span; + + let token = self.lexer.lex_only_integer(); + let major = match token.kind { + TokenKind::Number(NumberKind::Integer) => self.token_value::(token)?, + x => unexpected!(self, x, "a integer"), + }; + + expected!(self, t!(".")); + + let token = self.lexer.lex_only_integer(); + let minor = match token.kind { + TokenKind::Number(NumberKind::Integer) => self.token_value::(token)?, + x => unexpected!(self, x, "a integer"), + }; + + expected!(self, t!(".")); + + let token = self.lexer.lex_only_integer(); + let patch = match token.kind { + TokenKind::Number(NumberKind::Integer) => self.token_value::(token)?, + x => unexpected!(self, x, "a integer"), + }; + + self.expect_closing_delimiter(t!(">"), start)?; + + let start = expected!(self, t!("(")).span; + let mut args = Vec::new(); + loop { + if self.eat(t!(")")) { + break; + } + + args.push(self.parse_value_field()?); + + if !self.eat(t!(",")) { + self.expect_closing_delimiter(t!(")"), start)?; + break; + } + } + Ok(Model { + name, + version: format!("{}.{}.{}", major, minor, patch), + args, + }) + } +} + +#[cfg(test)] +mod test { + use crate::{ + sql::{Script, Value}, + syn::{self, Parse}, + }; + + use super::*; + + #[test] + fn function_single() { + let sql = "count()"; + let out = Value::parse(sql); + assert_eq!("count()", format!("{}", out)); + assert_eq!(out, Value::from(Function::Normal(String::from("count"), vec![]))); + } + + #[test] + fn function_single_not() { + let sql = "not(10)"; + let out = Value::parse(sql); + assert_eq!("not(10)", format!("{}", out)); + assert_eq!(out, Value::from(Function::Normal("not".to_owned(), vec![10.into()]))); + } + + #[test] + fn function_module() { + let sql = "rand::uuid()"; + let out = Value::parse(sql); + assert_eq!("rand::uuid()", format!("{}", out)); + assert_eq!(out, Value::from(Function::Normal(String::from("rand::uuid"), vec![]))); + } + + #[test] + fn function_arguments() { + let sql = "string::is::numeric(null)"; + let out = Value::parse(sql); + assert_eq!("string::is::numeric(NULL)", format!("{}", out)); + assert_eq!( + out, + Value::from(Function::Normal(String::from("string::is::numeric"), vec![Value::Null])) + ); + } + + #[test] + fn function_simple_together() { + let sql = "function() { return 'test'; }"; + let out = Value::parse(sql); + assert_eq!("function() { return 'test'; }", format!("{}", out)); + assert_eq!(out, Value::from(Function::Script(Script::from(" return 'test'; "), vec![]))); + } + + #[test] + fn function_simple_whitespace() { + let sql = "function () { return 'test'; }"; + let out = Value::parse(sql); + assert_eq!("function() { return 'test'; }", format!("{}", out)); + assert_eq!(out, Value::from(Function::Script(Script::from(" return 'test'; "), vec![]))); + } + + #[test] + fn function_script_expression() { + let sql = "function() { return this.tags.filter(t => { return t.length > 3; }); }"; + let out = Value::parse(sql); + assert_eq!( + "function() { return this.tags.filter(t => { return t.length > 3; }); }", + format!("{}", out) + ); + assert_eq!( + out, + Value::from(Function::Script( + Script::from(" return this.tags.filter(t => { return t.length > 3; }); "), + vec![] + )) + ); + } + + #[test] + fn ml_model_example() { + let sql = r#"ml::insurance::prediction<1.0.0>({ + age: 18, + disposable_income: "yes", + purchased_before: true + }) + "#; + let out = Value::parse(sql); + assert_eq!("ml::insurance::prediction<1.0.0>({ age: 18, disposable_income: 'yes', purchased_before: true })",out.to_string()); + } + + #[test] + fn ml_model_example_in_select() { + let sql = r" + SELECT + name, + age, + ml::insurance::prediction<1.0.0>({ + age: age, + disposable_income: math::round(income), + purchased_before: array::len(->purchased->property) > 0, + }) AS likely_to_buy FROM person:tobie; + "; + let out = syn::parse(sql).unwrap(); + assert_eq!( + "SELECT name, age, ml::insurance::prediction<1.0.0>({ age: age, disposable_income: math::round(income), purchased_before: array::len(->purchased->property) > 0 }) AS likely_to_buy FROM person:tobie;", + out.to_string() + ); + } + + #[test] + fn ml_model_with_mutiple_arguments() { + let sql = "ml::insurance::prediction<1.0.0>(1,2,3,4,)"; + let out = Value::parse(sql); + assert_eq!("ml::insurance::prediction<1.0.0>(1,2,3,4)", out.to_string()); + } + + #[test] + fn script_basic() { + let sql = "function(){return true;}"; + let out = Value::parse(sql); + assert_eq!("function() {return true;}", format!("{}", out)); + assert_eq!(out, Value::from(Function::Script(Script::from("return true;"), Vec::new()))); + } + + #[test] + fn script_object() { + let sql = "function(){return { test: true, something: { other: true } };}"; + let out = Value::parse(sql); + assert_eq!( + "function() {return { test: true, something: { other: true } };}", + format!("{}", out) + ); + assert_eq!( + out, + Value::from(Function::Script( + Script::from("return { test: true, something: { other: true } };"), + Vec::new() + )) + ); + } + + #[test] + fn script_closure() { + let sql = "function(){return this.values.map(v => `This value is ${Number(v * 3)}`);}"; + let out = Value::parse(sql); + assert_eq!( + "function() {return this.values.map(v => `This value is ${Number(v * 3)}`);}", + format!("{}", out) + ); + assert_eq!( + out, + Value::from(Function::Script( + Script::from("return this.values.map(v => `This value is ${Number(v * 3)}`);"), + Vec::new() + )) + ); + } + + #[test] + fn script_complex() { + let sql = r#"function(){return { test: true, some: { object: "some text with uneven {{{ {} \" brackets", else: false } };}"#; + let out = Value::parse(sql); + assert_eq!( + r#"function() {return { test: true, some: { object: "some text with uneven {{{ {} \" brackets", else: false } };}"#, + format!("{}", out) + ); + assert_eq!( + out, + Value::from(Function::Script( + Script::from( + r#"return { test: true, some: { object: "some text with uneven {{{ {} \" brackets", else: false } };"# + ), + Vec::new() + )) + ); + } + + #[test] + fn script_advanced() { + let body = r#" + // { + // } + // {} + // { } + /* { */ + /* } */ + /* {} */ + /* { } */ + /* {{{ $ }} */ + /* /* /* /* */ + let x = {}; + let x = { }; + let x = '{'; + let x = "{"; + let x = '}'; + let x = "}"; + let x = '} } { {'; + let x = 3 / 4 * 2; + let x = /* something */ 45 + 2; + "#; + let sql = "function() {".to_owned() + body + "}"; + let out = Value::parse(&sql); + assert_eq!(sql, format!("{}", out)); + assert_eq!(out, Value::from(Function::Script(Script::from(body), Vec::new()))); + } +} diff --git a/lib/src/syn/v2/parser/idiom.rs b/lib/src/syn/v2/parser/idiom.rs new file mode 100644 index 00000000..f9abadfb --- /dev/null +++ b/lib/src/syn/v2/parser/idiom.rs @@ -0,0 +1,824 @@ +use crate::{ + sql::{Dir, Edges, Field, Fields, Graph, Ident, Idiom, Part, Table, Tables, Value}, + syn::v2::token::{t, Span, TokenKind}, +}; + +use super::{mac::unexpected, ParseError, ParseErrorKind, ParseResult, Parser}; + +impl Parser<'_> { + /// Parse fields of a selecting query: `foo, bar` in `SELECT foo, bar FROM baz`. + /// + /// # Parser State + /// Expects the next tokens to be of a field set. + pub fn parse_fields(&mut self) -> ParseResult { + if self.eat(t!("VALUE")) { + let expr = self.parse_value_field()?; + let alias = self.eat(t!("AS")).then(|| self.parse_plain_idiom()).transpose()?; + Ok(Fields( + vec![Field::Single { + expr, + alias, + }], + true, + )) + } else { + let mut fields = Vec::new(); + loop { + let field = if self.eat(t!("*")) { + Field::All + } else { + let expr = self.parse_value_field()?; + let alias = self.eat(t!("AS")).then(|| self.parse_plain_idiom()).transpose()?; + Field::Single { + expr, + alias, + } + }; + fields.push(field); + if !self.eat(t!(",")) { + break; + } + } + Ok(Fields(fields, false)) + } + } + + /// Parses a list of idioms seperated by a `,` + pub fn parse_idiom_list(&mut self) -> ParseResult> { + let mut res = vec![self.parse_plain_idiom()?]; + while self.eat(t!(",")) { + res.push(self.parse_plain_idiom()?); + } + Ok(res) + } + + /// Parses the remaining idiom parts after the start: Any part like `...`, `.foo` and `->foo` + /// + /// This function differes from [`Parser::parse_remaining_value_idiom`] in how it handles graph + /// parsing. Graphs inside a plain idioms will remain a normal graph production. + pub(crate) fn parse_remaining_idiom(&mut self, start: Vec) -> ParseResult { + let mut res = start; + loop { + match self.peek_kind() { + t!("...") => { + self.pop_peek(); + res.push(Part::Flatten); + } + t!(".") => { + self.pop_peek(); + res.push(self.parse_dot_part()?) + } + t!("[") => { + let span = self.pop_peek().span; + res.push(self.parse_bracket_part(span)?) + } + t!("->") => { + self.pop_peek(); + res.push(Part::Graph(self.parse_graph(Dir::Out)?)) + } + t!("<->") => { + self.pop_peek(); + res.push(Part::Graph(self.parse_graph(Dir::Both)?)) + } + t!("<-") => { + self.pop_peek(); + res.push(Part::Graph(self.parse_graph(Dir::In)?)) + } + t!("..") => { + return Err(ParseError::new( + ParseErrorKind::UnexpectedExplain { + found: t!(".."), + expected: "an idiom", + explain: "Did you maybe mean the flatten operator `...`", + }, + self.last_span(), + )) + } + _ => break, + } + } + Ok(Idiom(res)) + } + + /// Parses the remaining idiom parts after the start: Any part like `...`, `.foo` and `->foo` + /// + /// + /// This function differes from [`Parser::parse_remaining_value_idiom`] in how it handles graph + /// parsing. When parsing a idiom like production which can be a value, the initial start value + /// might need to be changed to a Edge depending on what is parsed next. + pub(crate) fn parse_remaining_value_idiom(&mut self, start: Vec) -> ParseResult { + let mut res = start; + loop { + match self.peek_kind() { + t!("...") => { + self.pop_peek(); + res.push(Part::Flatten); + } + t!(".") => { + self.pop_peek(); + res.push(self.parse_dot_part()?) + } + t!("[") => { + let span = self.pop_peek().span; + res.push(self.parse_bracket_part(span)?) + } + t!("->") => { + self.pop_peek(); + if let Some(x) = self.parse_graph_idiom(&mut res, Dir::Out)? { + return Ok(x); + } + } + t!("<->") => { + self.pop_peek(); + if let Some(x) = self.parse_graph_idiom(&mut res, Dir::Out)? { + return Ok(x); + } + } + t!("<-") => { + self.pop_peek(); + if let Some(x) = self.parse_graph_idiom(&mut res, Dir::Out)? { + return Ok(x); + } + } + t!("..") => { + return Err(ParseError::new( + ParseErrorKind::UnexpectedExplain { + found: t!(".."), + expected: "an idiom", + explain: "Did you maybe mean the flatten operator `...`", + }, + self.last_span(), + )) + } + _ => break, + } + } + Ok(Value::Idiom(Idiom(res))) + } + + /// Parse a graph idiom and possibly rewrite the starting value to be an edge whenever the + /// parsed production matches `Thing -> Ident`. + fn parse_graph_idiom(&mut self, res: &mut Vec, dir: Dir) -> ParseResult> { + let graph = self.parse_graph(dir)?; + // the production `Thing Graph` is reparsed as an edge if the graph does not contain an + // alias or a condition. + if res.len() == 1 && graph.alias.is_none() && graph.cond.is_none() { + match std::mem::replace(&mut res[0], Part::All) { + Part::Value(Value::Thing(t)) | Part::Start(Value::Thing(t)) => { + let edge = Edges { + dir: graph.dir, + from: t, + what: graph.what, + }; + let value = Value::Edges(Box::new(edge)); + + if !Self::continues_idiom(self.peek_kind()) { + return Ok(Some(value)); + } + res[0] = Part::Start(value); + return Ok(None); + } + x => { + res[0] = x; + } + } + } + res.push(Part::Graph(graph)); + Ok(None) + } + + /// Returns if the token kind could continua an idiom + pub fn continues_idiom(kind: TokenKind) -> bool { + matches!(kind, t!("->") | t!("<->") | t!("<-") | t!("[") | t!(".") | t!("...")) + } + + /// Parse a idiom which can only start with a graph or an identifier. + /// Other expressions are not allowed as start of this idiom + pub fn parse_plain_idiom(&mut self) -> ParseResult { + let start = match self.peek_kind() { + t!("->") => { + self.pop_peek(); + Part::Graph(self.parse_graph(Dir::Out)?) + } + t!("<->") => { + self.pop_peek(); + Part::Graph(self.parse_graph(Dir::Both)?) + } + t!("<-") => { + self.pop_peek(); + Part::Graph(self.parse_graph(Dir::In)?) + } + _ => Part::Field(self.next_token_value()?), + }; + let start = vec![start]; + self.parse_remaining_idiom(start) + } + + /// Parse the part after the `.` in a idiom + pub fn parse_dot_part(&mut self) -> ParseResult { + let res = match self.peek_kind() { + t!("*") => { + self.pop_peek(); + Part::All + } + _ => Part::Field(self.next_token_value()?), + }; + Ok(res) + } + /// Parse the part after the `[` in a idiom + pub fn parse_bracket_part(&mut self, start: Span) -> ParseResult { + let res = match self.peek_kind() { + t!("*") => { + self.pop_peek(); + Part::All + } + t!("$") => { + self.pop_peek(); + Part::Last + } + t!("123") => Part::Index(self.next_token_value()?), + t!("?") | t!("WHERE") => { + self.pop_peek(); + Part::Where(self.parse_value_field()?) + } + t!("$param") => Part::Value(Value::Param(self.next_token_value()?)), + TokenKind::Strand => Part::Value(Value::Strand(self.next_token_value()?)), + _ => { + let idiom = self.parse_basic_idiom()?; + Part::Value(Value::Idiom(idiom)) + } + }; + self.expect_closing_delimiter(t!("]"), start)?; + Ok(res) + } + + /// Parse a list of basic idioms seperated by a ',' + pub fn parse_basic_idiom_list(&mut self) -> ParseResult> { + let mut res = vec![self.parse_basic_idiom()?]; + while self.eat(t!(",")) { + res.push(self.parse_basic_idiom()?); + } + Ok(res) + } + + /// Parse a basic idiom. + /// + /// Basic idioms differ from normal idioms in that they are more restrictive. + /// Flatten, graphs, conditions and indexing by param is not allowed. + pub fn parse_basic_idiom(&mut self) -> ParseResult { + let start = self.next_token_value::()?; + let mut parts = vec![Part::Field(start)]; + loop { + let token = self.peek(); + let part = match token.kind { + t!(".") => { + self.pop_peek(); + self.parse_dot_part()? + } + t!("[") => { + self.pop_peek(); + let res = match self.peek_kind() { + t!("*") => { + self.pop_peek(); + Part::All + } + t!("$") => { + self.pop_peek(); + Part::Last + } + t!("123") => { + let number = self.token_value(token)?; + Part::Index(number) + } + x => unexpected!(self, x, "$, * or a number"), + }; + self.expect_closing_delimiter(t!("]"), token.span)?; + res + } + _ => break, + }; + parts.push(part); + } + Ok(Idiom(parts)) + } + + /// Parse a local idiom. + /// + /// Basic idioms differ from local idioms in that they are more restrictive. + /// Only field, all and number indexing is allowed. Flatten is also allowed but only at the + /// end. + pub fn parse_local_idiom(&mut self) -> ParseResult { + let start = self.next_token_value()?; + let mut parts = vec![Part::Field(start)]; + loop { + let token = self.peek(); + let part = match token.kind { + t!(".") => { + self.pop_peek(); + self.parse_dot_part()? + } + t!("[") => { + self.pop_peek(); + let res = match self.peek_kind() { + t!("*") => { + self.pop_peek(); + Part::All + } + t!("123") => { + let number = self.next_token_value()?; + Part::Index(number) + } + x => unexpected!(self, x, "$, * or a number"), + }; + self.expect_closing_delimiter(t!("]"), token.span)?; + res + } + _ => break, + }; + + parts.push(part); + } + + if self.eat(t!("...")) { + let span = self.last_span(); + parts.push(Part::Flatten); + if let t!(".") | t!("[") = self.peek_kind() { + return Err(ParseError::new( + ParseErrorKind::UnexpectedExplain { + found: t!("..."), + expected: "local idiom to end.", + explain: "Flattening can only be done at the end of a local idiom.", + }, + span, + )); + } + } + + Ok(Idiom(parts)) + } + + /// Parses a list of what values seperated by comma's + /// + /// # Parser state + /// Expects to be at the start of a what list. + pub fn parse_what_list(&mut self) -> ParseResult> { + let mut res = vec![self.parse_what_value()?]; + while self.eat(t!(",")) { + res.push(self.parse_what_value()?) + } + Ok(res) + } + + /// Parses a single what value, + /// + /// # Parser state + /// Expects to be at the start of a what value + pub fn parse_what_value(&mut self) -> ParseResult { + let start = self.parse_what_primary()?; + if start.can_start_idiom() && Self::continues_idiom(self.peek_kind()) { + let start = match start { + Value::Table(Table(x)) => vec![Part::Field(Ident(x))], + Value::Idiom(Idiom(x)) => x, + x => vec![Part::Start(x)], + }; + + let idiom = self.parse_remaining_value_idiom(start)?; + Ok(idiom) + } else { + Ok(start) + } + } + + /// Parses a graph value + /// + /// # Parser state + /// Expects to just have eaten a direction (e.g. <-, <->, or ->) and be at the field like part + /// of the graph + pub fn parse_graph(&mut self, dir: Dir) -> ParseResult { + match self.peek_kind() { + t!("?") => { + self.pop_peek(); + Ok(Graph { + dir, + ..Default::default() + }) + } + t!("(") => { + let span = self.pop_peek().span; + let what = match self.peek_kind() { + t!("?") => { + self.pop_peek(); + Tables::default() + } + x if x.can_be_identifier() => { + // The following function should always succeed here, + // returning an error here would be a bug, so unwrap. + let table = self.next_token_value().unwrap(); + let mut tables = Tables(vec![table]); + while self.eat(t!(",")) { + tables.0.push(self.next_token_value()?); + } + tables + } + x => unexpected!(self, x, "`?` or an identifier"), + }; + + let cond = self.try_parse_condition()?; + let alias = self.eat(t!("AS")).then(|| self.parse_plain_idiom()).transpose()?; + + self.expect_closing_delimiter(t!(")"), span)?; + + Ok(Graph { + dir, + what, + cond, + alias, + expr: Fields::all(), + ..Default::default() + }) + } + x if x.can_be_identifier() => { + // The following function should always succeed here, + // returning an error here would be a bug, so unwrap. + let table = self.next_token_value().unwrap(); + Ok(Graph { + dir, + expr: Fields::all(), + what: Tables(vec![table]), + ..Default::default() + }) + } + x => unexpected!(self, x, "`?`, `(` or an identifier"), + } + } +} + +#[cfg(test)] +mod tests { + use crate::sql::{Dir, Expression, Id, Number, Object, Param, Strand, Table, Thing}; + use crate::syn::Parse; + + use super::*; + + #[test] + fn graph_in() { + let sql = "<-likes"; + let out = Value::parse(sql); + assert_eq!("<-likes", format!("{}", out)); + } + + #[test] + fn graph_out() { + let sql = "->likes"; + let out = Value::parse(sql); + assert_eq!("->likes", format!("{}", out)); + } + + #[test] + fn graph_both() { + let sql = "<->likes"; + let out = Value::parse(sql); + assert_eq!("<->likes", format!("{}", out)); + } + + #[test] + fn graph_multiple() { + let sql = "->(likes, follows)"; + let out = Value::parse(sql); + assert_eq!("->(likes, follows)", format!("{}", out)); + } + + #[test] + fn graph_aliases() { + let sql = "->(likes, follows AS connections)"; + let out = Value::parse(sql); + assert_eq!("->(likes, follows AS connections)", format!("{}", out)); + } + + #[test] + fn graph_conditions() { + let sql = "->(likes, follows WHERE influencer = true)"; + let out = Value::parse(sql); + assert_eq!("->(likes, follows WHERE influencer = true)", format!("{}", out)); + } + + #[test] + fn graph_conditions_aliases() { + let sql = "->(likes, follows WHERE influencer = true AS connections)"; + let out = Value::parse(sql); + assert_eq!("->(likes, follows WHERE influencer = true AS connections)", format!("{}", out)); + } + + #[test] + fn idiom_normal() { + let sql = "test"; + let out = Value::parse(sql); + assert_eq!("test", format!("{}", out)); + assert_eq!(out, Value::from(Idiom(vec![Part::from("test")]))); + } + + #[test] + fn idiom_quoted_backtick() { + let sql = "`test`"; + let out = Value::parse(sql); + assert_eq!("test", format!("{}", out)); + assert_eq!(out, Value::from(Idiom(vec![Part::from("test")]))); + } + + #[test] + fn idiom_quoted_brackets() { + let sql = "⟨test⟩"; + let out = Value::parse(sql); + assert_eq!("test", format!("{}", out)); + assert_eq!(out, Value::from(Idiom(vec![Part::from("test")]))); + } + + #[test] + fn idiom_nested() { + let sql = "test.temp"; + let out = Value::parse(sql); + assert_eq!("test.temp", format!("{}", out)); + assert_eq!(out, Value::from(Idiom(vec![Part::from("test"), Part::from("temp")]))); + } + + #[test] + fn idiom_nested_quoted() { + let sql = "test.`some key`"; + let out = Value::parse(sql); + assert_eq!("test.`some key`", format!("{}", out)); + assert_eq!(out, Value::from(Idiom(vec![Part::from("test"), Part::from("some key")]))); + } + + #[test] + fn idiom_nested_array_all() { + let sql = "test.temp[*]"; + let out = Value::parse(sql); + assert_eq!("test.temp[*]", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![Part::from("test"), Part::from("temp"), Part::All])) + ); + } + + #[test] + fn idiom_nested_array_last() { + let sql = "test.temp[$]"; + let out = Value::parse(sql); + assert_eq!("test.temp[$]", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![Part::from("test"), Part::from("temp"), Part::Last])) + ); + } + + #[test] + fn idiom_nested_array_value() { + let sql = "test.temp[*].text"; + let out = Value::parse(sql); + assert_eq!("test.temp[*].text", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![ + Part::from("test"), + Part::from("temp"), + Part::All, + Part::from("text") + ])) + ); + } + + #[test] + fn idiom_nested_array_question() { + let sql = "test.temp[? test = true].text"; + let out = Value::parse(sql); + assert_eq!("test.temp[WHERE test = true].text", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![ + Part::from("test"), + Part::from("temp"), + Part::Where(Value::from(Expression::parse("test = true"))), + Part::from("text") + ])) + ); + } + + #[test] + fn idiom_nested_array_condition() { + let sql = "test.temp[WHERE test = true].text"; + let out = Value::parse(sql); + assert_eq!("test.temp[WHERE test = true].text", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![ + Part::from("test"), + Part::from("temp"), + Part::Where(Value::from(Expression::parse("test = true"))), + Part::from("text") + ])) + ); + } + + #[test] + fn idiom_start_param_local_field() { + let sql = "$test.temporary[0].embedded…"; + let out = Value::parse(sql); + assert_eq!("$test.temporary[0].embedded…", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![ + Part::Start(Param::from("test").into()), + Part::from("temporary"), + Part::Index(Number::Int(0)), + Part::from("embedded"), + Part::Flatten, + ])) + ); + } + + #[test] + fn idiom_start_thing_remote_traversal() { + let sql = "person:test.friend->like->person"; + let out = Value::parse(sql); + assert_eq!("person:test.friend->like->person", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![ + Part::Start(Thing::from(("person", "test")).into()), + Part::from("friend"), + Part::Graph(Graph { + dir: Dir::Out, + expr: Fields::all(), + what: Table::from("like").into(), + cond: None, + alias: None, + split: None, + group: None, + order: None, + limit: None, + start: None, + }), + Part::Graph(Graph { + dir: Dir::Out, + expr: Fields::all(), + what: Table::from("person").into(), + cond: None, + alias: None, + split: None, + group: None, + order: None, + limit: None, + start: None, + }), + ])) + ); + } + + #[test] + fn part_all() { + let sql = "{}[*]"; + let out = Value::parse(sql); + assert_eq!("{ }[*]", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![Part::Start(Value::from(Object::default())), Part::All])) + ); + } + + #[test] + fn part_last() { + let sql = "{}[$]"; + let out = Value::parse(sql); + assert_eq!("{ }[$]", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![Part::Start(Value::from(Object::default())), Part::Last])) + ); + } + + #[test] + fn part_param() { + let sql = "{}[$param]"; + let out = Value::parse(sql); + assert_eq!("{ }[$param]", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![ + Part::Start(Value::from(Object::default())), + Part::Value(Value::Param(Param::from("param"))) + ])) + ); + } + + #[test] + fn part_flatten() { + let sql = "{}..."; + let out = Value::parse(sql); + assert_eq!("{ }…", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![Part::Start(Value::from(Object::default())), Part::Flatten])) + ); + } + + #[test] + fn part_flatten_ellipsis() { + let sql = "{}…"; + let out = Value::parse(sql); + assert_eq!("{ }…", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![Part::Start(Value::from(Object::default())), Part::Flatten])) + ); + } + + #[test] + fn part_number() { + let sql = "{}[0]"; + let out = Value::parse(sql); + assert_eq!("{ }[0]", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![ + Part::Start(Value::from(Object::default())), + Part::Index(Number::from(0)) + ])) + ); + } + + #[test] + fn part_expression_question() { + let sql = "{}[?test = true]"; + let out = Value::parse(sql); + assert_eq!("{ }[WHERE test = true]", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![ + Part::Start(Value::from(Object::default())), + Part::Where(Value::from(Expression::parse("test = true"))) + ])) + ); + } + + #[test] + fn part_expression_condition() { + let sql = "{}[WHERE test = true]"; + let out = Value::parse(sql); + assert_eq!("{ }[WHERE test = true]", format!("{}", out)); + assert_eq!( + out, + Value::from(Idiom(vec![ + Part::Start(Value::from(Object::default())), + Part::Where(Value::from(Expression::parse("test = true"))) + ])) + ); + } + + #[test] + fn idiom_thing_number() { + let sql = "test:1.foo"; + let out = Value::parse(sql); + assert_eq!( + out, + Value::from(Idiom(vec![ + Part::Start(Value::Thing(Thing { + tb: "test".to_owned(), + id: Id::Number(1), + })), + Part::from("foo"), + ])) + ); + } + + #[test] + fn idiom_thing_index() { + let sql = "test:1['foo']"; + let out = Value::parse(sql); + assert_eq!( + out, + Value::from(Idiom(vec![ + Part::Start(Value::Thing(Thing { + tb: "test".to_owned(), + id: Id::Number(1), + })), + Part::Value(Value::Strand(Strand("foo".to_owned()))), + ])) + ); + } + + #[test] + fn idiom_thing_all() { + let sql = "test:1.*"; + let out = Value::parse(sql); + assert_eq!( + out, + Value::from(Idiom(vec![ + Part::Start(Value::Thing(Thing { + tb: "test".to_owned(), + id: Id::Number(1), + })), + Part::All + ])) + ); + } +} diff --git a/lib/src/syn/v2/parser/json.rs b/lib/src/syn/v2/parser/json.rs new file mode 100644 index 00000000..c076c523 --- /dev/null +++ b/lib/src/syn/v2/parser/json.rs @@ -0,0 +1,73 @@ +use std::collections::BTreeMap; + +use crate::{ + sql::{Array, Ident, Object, Strand, Value}, + syn::v2::{ + parser::mac::expected, + token::{t, Span, TokenKind}, + }, +}; + +use super::{ParseResult, Parser}; + +impl Parser<'_> { + pub fn parse_json(&mut self) -> ParseResult { + let token = self.next(); + match token.kind { + t!("NULL") => Ok(Value::Null), + t!("true") => Ok(Value::Bool(true)), + t!("false") => Ok(Value::Bool(false)), + t!("{") => self.parse_json_object(token.span).map(Value::Object), + t!("[") => self.parse_json_array(token.span).map(Value::Array), + TokenKind::Duration => self.token_value(token).map(Value::Duration), + TokenKind::DateTime => self.token_value(token).map(Value::Datetime), + TokenKind::Strand => { + if self.legacy_strands { + self.parse_legacy_strand() + } else { + Ok(Value::Strand(Strand(self.lexer.string.take().unwrap()))) + } + } + TokenKind::Number(_) => self.token_value(token).map(Value::Number), + TokenKind::Uuid => self.token_value(token).map(Value::Uuid), + _ => { + let ident = self.token_value::(token)?.0; + self.parse_thing_from_ident(ident).map(Value::Thing) + } + } + } + + fn parse_json_object(&mut self, start: Span) -> ParseResult { + let mut obj = BTreeMap::new(); + loop { + if self.eat(t!("}")) { + return Ok(Object(obj)); + } + let key = self.parse_object_key()?; + expected!(self, t!(":")); + let value = self.parse_json()?; + obj.insert(key, value); + + if !self.eat(t!(",")) { + self.expect_closing_delimiter(t!("}"), start)?; + return Ok(Object(obj)); + } + } + } + + fn parse_json_array(&mut self, start: Span) -> ParseResult { + let mut array = Vec::new(); + loop { + if self.eat(t!("]")) { + return Ok(Array(array)); + } + let value = self.parse_json()?; + array.push(value); + + if !self.eat(t!(",")) { + self.expect_closing_delimiter(t!("]"), start)?; + return Ok(Array(array)); + } + } + } +} diff --git a/lib/src/syn/v2/parser/kind.rs b/lib/src/syn/v2/parser/kind.rs new file mode 100644 index 00000000..08d5e293 --- /dev/null +++ b/lib/src/syn/v2/parser/kind.rs @@ -0,0 +1,438 @@ +use crate::{ + sql::Kind, + syn::v2::{ + parser::mac::expected, + token::{t, Keyword, Span, TokenKind}, + }, +}; + +use super::{mac::unexpected, ParseResult, Parser}; + +impl Parser<'_> { + /// Parse a kind production. + /// + /// # Parser State + /// expects the first `<` to already be eaten + pub fn parse_kind(&mut self, delim: Span) -> ParseResult { + let kind = self.parse_inner_kind()?; + self.expect_closing_delimiter(t!(">"), delim)?; + Ok(kind) + } + + /// Parse an inner kind, a kind without enclosing `<` `>`. + pub fn parse_inner_kind(&mut self) -> ParseResult { + match self.peek_kind() { + t!("ANY") => { + self.pop_peek(); + Ok(Kind::Any) + } + t!("OPTION") => { + self.pop_peek(); + + let delim = expected!(self, t!("<")).span; + let mut first = self.parse_concrete_kind()?; + if self.peek_kind() == t!("|") { + let mut kind = vec![first]; + while self.eat(t!("|")) { + kind.push(self.parse_concrete_kind()?); + } + first = Kind::Either(kind); + } + self.expect_closing_delimiter(t!(">"), delim)?; + Ok(Kind::Option(Box::new(first))) + } + _ => { + let first = self.parse_concrete_kind()?; + if self.peek_kind() == t!("|") { + let mut kind = vec![first]; + while self.eat(t!("|")) { + kind.push(self.parse_concrete_kind()?); + } + Ok(Kind::Either(kind)) + } else { + Ok(first) + } + } + } + } + + /// Parse a single kind which is not any, option, or either. + fn parse_concrete_kind(&mut self) -> ParseResult { + match self.next().kind { + t!("BOOL") => Ok(Kind::Bool), + t!("NULL") => Ok(Kind::Null), + t!("BYTES") => Ok(Kind::Bytes), + t!("DATETIME") => Ok(Kind::Datetime), + t!("DECIMAL") => Ok(Kind::Decimal), + t!("DURATION") => Ok(Kind::Duration), + t!("FLOAT") => Ok(Kind::Float), + t!("INT") => Ok(Kind::Int), + t!("NUMBER") => Ok(Kind::Number), + t!("OBJECT") => Ok(Kind::Object), + t!("POINT") => Ok(Kind::Point), + t!("STRING") => Ok(Kind::String), + t!("UUID") => Ok(Kind::Uuid), + t!("RECORD") => { + let tables = match self.peek_kind() { + t!("<") => { + let next = self.next(); + let mut tables = vec![self.next_token_value()?]; + while self.eat(t!("|")) { + tables.push(self.next_token_value()?); + } + self.expect_closing_delimiter(t!(">"), next.span)?; + tables + } + t!("(") => { + let next = self.next(); + let mut tables = vec![self.next_token_value()?]; + while self.eat(t!(",")) { + tables.push(self.next_token_value()?); + } + self.expect_closing_delimiter(t!(")"), next.span)?; + tables + } + _ => Vec::new(), + }; + Ok(Kind::Record(tables)) + } + t!("GEOMETRY") => { + let kind = match self.peek_kind() { + t!("<") => { + let delim = self.pop_peek().span; + let mut kind = vec![self.parse_geometry_kind()?]; + while self.eat(t!("|")) { + kind.push(self.parse_geometry_kind()?); + } + self.expect_closing_delimiter(t!(">"), delim)?; + kind + } + // Legacy gemoetry kind syntax with parens instead of `<` `>`. + t!("(") => { + let delim = self.pop_peek().span; + let mut kind = vec![self.parse_geometry_kind()?]; + loop { + if self.eat(t!(")")) { + break; + } + + kind.push(self.parse_geometry_kind()?); + + if !self.eat(t!(",")) { + self.expect_closing_delimiter(t!(")"), delim)?; + break; + } + } + kind + } + _ => Vec::new(), + }; + Ok(Kind::Geometry(kind)) + } + t!("ARRAY") => { + let span = self.peek().span; + if self.eat(t!("<")) { + let kind = self.parse_inner_kind()?; + let size = self.eat(t!(",")).then(|| self.next_token_value()).transpose()?; + self.expect_closing_delimiter(t!(">"), span)?; + Ok(Kind::Array(Box::new(kind), size)) + } else { + Ok(Kind::Array(Box::new(Kind::Any), None)) + } + } + t!("SET") => { + let span = self.peek().span; + if self.eat(t!("<")) { + let kind = self.parse_inner_kind()?; + let size = self.eat(t!(",")).then(|| self.next_token_value()).transpose()?; + self.expect_closing_delimiter(t!(">"), span)?; + Ok(Kind::Set(Box::new(kind), size)) + } else { + Ok(Kind::Set(Box::new(Kind::Any), None)) + } + } + x => unexpected!(self, x, "a kind name"), + } + } + + /// Parse the kind of gemoetry + fn parse_geometry_kind(&mut self) -> ParseResult { + match self.next().kind { + TokenKind::Keyword( + x @ (Keyword::Feature + | Keyword::Point + | Keyword::Line + | Keyword::Polygon + | Keyword::MultiPoint + | Keyword::MultiLine + | Keyword::MultiPolygon + | Keyword::Collection), + ) => Ok(x.as_str().to_ascii_lowercase()), + x => unexpected!(self, x, "a geometry kind name"), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::sql::table::Table; + + fn kind(i: &str) -> ParseResult { + let mut parser = Parser::new(i.as_bytes()); + parser.parse_inner_kind() + } + + #[test] + fn kind_any() { + let sql = "any"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("any", format!("{}", out)); + assert_eq!(out, Kind::Any); + } + + #[test] + fn kind_null() { + let sql = "null"; + let res = kind(sql); + assert!(res.is_ok()); + let out = res.unwrap(); + assert_eq!("null", format!("{}", out)); + assert_eq!(out, Kind::Null); + } + + #[test] + fn kind_bool() { + let sql = "bool"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("bool", format!("{}", out)); + assert_eq!(out, Kind::Bool); + } + + #[test] + fn kind_bytes() { + let sql = "bytes"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("bytes", format!("{}", out)); + assert_eq!(out, Kind::Bytes); + } + + #[test] + fn kind_datetime() { + let sql = "datetime"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("datetime", format!("{}", out)); + assert_eq!(out, Kind::Datetime); + } + + #[test] + fn kind_decimal() { + let sql = "decimal"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("decimal", format!("{}", out)); + assert_eq!(out, Kind::Decimal); + } + + #[test] + fn kind_duration() { + let sql = "duration"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("duration", format!("{}", out)); + assert_eq!(out, Kind::Duration); + } + + #[test] + fn kind_float() { + let sql = "float"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("float", format!("{}", out)); + assert_eq!(out, Kind::Float); + } + + #[test] + fn kind_number() { + let sql = "number"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("number", format!("{}", out)); + assert_eq!(out, Kind::Number); + } + + #[test] + fn kind_object() { + let sql = "object"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("object", format!("{}", out)); + assert_eq!(out, Kind::Object); + } + + #[test] + fn kind_point() { + let sql = "point"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("point", format!("{}", out)); + assert_eq!(out, Kind::Point); + } + + #[test] + fn kind_string() { + let sql = "string"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("string", format!("{}", out)); + assert_eq!(out, Kind::String); + } + + #[test] + fn kind_uuid() { + let sql = "uuid"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("uuid", format!("{}", out)); + assert_eq!(out, Kind::Uuid); + } + + #[test] + fn kind_either() { + let sql = "int | float"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("int | float", format!("{}", out)); + assert_eq!(out, Kind::Either(vec![Kind::Int, Kind::Float])); + } + + #[test] + fn kind_record_any() { + let sql = "record"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("record", format!("{}", out)); + assert_eq!(out, Kind::Record(vec![])); + } + + #[test] + fn kind_record_one() { + let sql = "record"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("record", format!("{}", out)); + assert_eq!(out, Kind::Record(vec![Table::from("person")])); + } + + #[test] + fn kind_record_many() { + let sql = "record"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("record", format!("{}", out)); + assert_eq!(out, Kind::Record(vec![Table::from("person"), Table::from("animal")])); + } + + #[test] + fn kind_geometry_any() { + let sql = "geometry"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("geometry", format!("{}", out)); + assert_eq!(out, Kind::Geometry(vec![])); + } + + #[test] + fn kind_geometry_one() { + let sql = "geometry"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("geometry", format!("{}", out)); + assert_eq!(out, Kind::Geometry(vec![String::from("point")])); + } + + #[test] + fn kind_geometry_many() { + let sql = "geometry"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("geometry", format!("{}", out)); + assert_eq!(out, Kind::Geometry(vec![String::from("point"), String::from("multipoint")])); + } + + #[test] + fn kind_option_one() { + let sql = "option"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("option", format!("{}", out)); + assert_eq!(out, Kind::Option(Box::new(Kind::Int))); + } + + #[test] + fn kind_option_many() { + let sql = "option"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("option", format!("{}", out)); + assert_eq!(out, Kind::Option(Box::new(Kind::Either(vec![Kind::Int, Kind::Float])))); + } + + #[test] + fn kind_array_any() { + let sql = "array"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("array", format!("{}", out)); + assert_eq!(out, Kind::Array(Box::new(Kind::Any), None)); + } + + #[test] + fn kind_array_some() { + let sql = "array"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("array", format!("{}", out)); + assert_eq!(out, Kind::Array(Box::new(Kind::Float), None)); + } + + #[test] + fn kind_array_some_size() { + let sql = "array"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("array", format!("{}", out)); + assert_eq!(out, Kind::Array(Box::new(Kind::Float), Some(10))); + } + + #[test] + fn kind_set_any() { + let sql = "set"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("set", format!("{}", out)); + assert_eq!(out, Kind::Set(Box::new(Kind::Any), None)); + } + + #[test] + fn kind_set_some() { + let sql = "set"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("set", format!("{}", out)); + assert_eq!(out, Kind::Set(Box::new(Kind::Float), None)); + } + + #[test] + fn kind_set_some_size() { + let sql = "set"; + let res = kind(sql); + let out = res.unwrap(); + assert_eq!("set", format!("{}", out)); + assert_eq!(out, Kind::Set(Box::new(Kind::Float), Some(10))); + } +} diff --git a/lib/src/syn/v2/parser/mac.rs b/lib/src/syn/v2/parser/mac.rs new file mode 100644 index 00000000..4f2c9df1 --- /dev/null +++ b/lib/src/syn/v2/parser/mac.rs @@ -0,0 +1,79 @@ +/// A macro for requiring a certain token to be next, returning an error otherwise.. +macro_rules! unexpected { + ($parser:expr, $found:expr, $expected:expr) => { + match $found { + $crate::syn::v2::token::TokenKind::Invalid => { + let error = $parser.lexer.error.take().unwrap(); + return Err($crate::syn::v2::parser::ParseError::new( + $crate::syn::v2::parser::ParseErrorKind::InvalidToken(error), + $parser.last_span(), + )); + } + $crate::syn::v2::token::TokenKind::Eof => { + let expected = $expected; + return Err($crate::syn::v2::parser::ParseError::new( + $crate::syn::v2::parser::ParseErrorKind::UnexpectedEof { + expected, + }, + $parser.last_span(), + )); + } + x => { + let expected = $expected; + return Err($crate::syn::v2::parser::ParseError::new( + $crate::syn::v2::parser::ParseErrorKind::Unexpected { + found: x, + expected, + }, + $parser.last_span(), + )); + } + } + }; +} + +/// A macro for indicating that the parser encountered an token which it didn't expect. +macro_rules! expected { + ($parser:expr, $($kind:tt)*) => {{ + let token = $parser.next(); + match token.kind { + $($kind)* => token, + $crate::syn::v2::parser::TokenKind::Invalid => { + let error = $parser.lexer.error.take().unwrap(); + return Err($crate::syn::v2::parser::ParseError::new( + $crate::syn::v2::parser::ParseErrorKind::InvalidToken(error), + $parser.last_span(), + )); + } + x => { + let expected = $($kind)*.as_str(); + let kind = if let $crate::syn::v2::token::TokenKind::Eof = x { + $crate::syn::v2::parser::ParseErrorKind::UnexpectedEof { + expected, + } + } else { + $crate::syn::v2::parser::ParseErrorKind::Unexpected { + found: x, + expected, + } + }; + + return Err($crate::syn::v2::parser::ParseError::new(kind, $parser.last_span())); + } + } + }}; +} + +#[cfg(test)] +macro_rules! test_parse { + ($func:ident$( ( $($e:expr),* $(,)? ))? , $t:literal) => {{ + let mut parser = $crate::syn::v2::parser::Parser::new($t.as_bytes()); + parser.$func($($($e),*)*) + }}; +} + +pub(super) use expected; +pub(super) use unexpected; + +#[cfg(test)] +pub(super) use test_parse; diff --git a/lib/src/syn/v2/parser/mod.rs b/lib/src/syn/v2/parser/mod.rs new file mode 100644 index 00000000..d73df9da --- /dev/null +++ b/lib/src/syn/v2/parser/mod.rs @@ -0,0 +1,304 @@ +//! Module implementing the SurrealQL parser. +//! +//! The SurrealQL parse is a relatively simple recursive decent parser. +//! Most of the functions of the SurrealQL parser peek a token from the lexer and then decide to +//! take a path depending on which token is next. +//! +//! There are a bunch of common patterns for which this module has some confinence functions. +//! - Whenever only one token can be next you should use the [`expected!`] macro. This macro +//! ensures that the given token type is next and if not returns a parser error. +//! - Whenever a limited set of tokens can be next it is common to match the token kind and then +//! have a catch all arm which calles the macro [`unexpected!`]. This macro will raise an parse +//! error with information about the type of token it recieves and what it expected. +//! - If a single token can be optionally next use [`Parser::eat`] this function returns a bool +//! depending on if the given tokenkind was eaten. +//! - If a closing delimiting token is expected use [`Parser::expect_closing_delimiter`]. This +//! function will raise an error if the expected delimiter isn't the next token. This error will +//! also point to which delimiter the parser expected to be closed. + +use self::token_buffer::TokenBuffer; +use crate::{ + sql, + syn::v2::{ + lexer::{Error as LexError, Lexer}, + parser::mac::expected, + token::{t, Span, Token, TokenKind}, + }, +}; + +mod basic; +mod builtin; +mod error; +mod expression; +mod function; +mod idiom; +mod json; +mod kind; +mod mac; +mod object; +mod prime; +mod stmt; +mod thing; +mod token_buffer; + +#[cfg(test)] +pub mod test; + +pub use error::{IntErrorKind, ParseError, ParseErrorKind}; + +/// The result returned by most parser function. +pub type ParseResult = Result; + +/// A result of trying to parse a possibly partial query. +#[derive(Debug)] +pub enum PartialResult { + /// The parser couldn't be sure that it has finished a full value. + Pending { + /// The value that was parsed. + /// This will not always be an error, if optional keywords after the end of a statement + /// where missing this will still parse that statement in full. + possible_value: Result, + /// number of bytes used for parsing the above statement. + used: usize, + }, + /// The parser is sure that it doesn't need more data to return either an error or a value. + Ready { + /// The value the parser is sure the query should return. + value: Result, + /// number of bytes used + used: usize, + }, +} + +/// The SurrealQL parser. +pub struct Parser<'a> { + lexer: Lexer<'a>, + last_span: Span, + token_buffer: TokenBuffer<4>, + table_as_field: bool, + legacy_strands: bool, +} + +impl<'a> Parser<'a> { + /// Create a new parser from a give source. + pub fn new(source: &'a [u8]) -> Self { + Parser { + lexer: Lexer::new(source), + last_span: Span::empty(), + token_buffer: TokenBuffer::new(), + table_as_field: false, + legacy_strands: false, + } + } + + /// Set whether to parse strands as legacy strands. + pub fn allow_legacy_strand(&mut self, value: bool) { + self.legacy_strands = value; + } + + /// Reset the parser state. Doesnt change the position of the parser in buffer. + pub fn reset(&mut self) { + self.last_span = Span::empty(); + self.token_buffer.clear(); + self.table_as_field = false; + self.lexer.reset(); + } + + /// Change the source of the parser reusing the existing buffers. + pub fn change_source(self, source: &[u8]) -> Parser { + Parser { + lexer: self.lexer.change_source(source), + last_span: Span::empty(), + token_buffer: TokenBuffer::new(), + legacy_strands: self.legacy_strands, + table_as_field: false, + } + } + + /// Returns the next token and advance the parser one token forward. + #[allow(clippy::should_implement_trait)] + pub fn next(&mut self) -> Token { + let res = self.token_buffer.pop().unwrap_or_else(|| self.lexer.next_token()); + self.last_span = res.span; + res + } + + /// Consume the current peeked value and advance the parser one token forward. + /// + /// Should only be called after peeking a value. + pub fn pop_peek(&mut self) -> Token { + let res = self.token_buffer.pop().unwrap(); + self.last_span = res.span; + res + } + + /// Returns the next token without consuming it. + pub fn peek(&mut self) -> Token { + let Some(x) = self.token_buffer.first() else { + let res = self.lexer.next_token(); + self.token_buffer.push(res); + return res; + }; + x + } + + /// Return the token kind of the next token without consuming it. + pub fn peek_kind(&mut self) -> TokenKind { + let Some(x) = self.token_buffer.first().map(|x| x.kind) else { + let res = self.lexer.next_token(); + self.token_buffer.push(res); + return res.kind; + }; + x + } + + /// Returns the next n'th token without consuming it. + /// `peek_token_at(0)` is equivalent to `peek`. + pub fn peek_token_at(&mut self, at: u8) -> Token { + for _ in self.token_buffer.len()..=at { + self.token_buffer.push(self.lexer.next_token()); + } + self.token_buffer.at(at).unwrap() + } + + /// Returns the span of the next token if it was already peeked, otherwise returns the token of + /// the last consumed token. + pub fn last_span(&mut self) -> Span { + self.token_buffer.first().map(|x| x.span).unwrap_or(self.last_span) + } + + /// Eat the next token if it is of the given kind. + /// Returns whether a token was eaten. + pub fn eat(&mut self, token: TokenKind) -> bool { + if token == self.peek().kind { + self.token_buffer.pop(); + true + } else { + false + } + } + + /// Checks if the next token is of the given kind. If it isn't it returns a UnclosedDelimiter + /// error. + fn expect_closing_delimiter(&mut self, kind: TokenKind, should_close: Span) -> ParseResult<()> { + if !self.eat(kind) { + return Err(ParseError::new( + ParseErrorKind::UnclosedDelimiter { + expected: kind, + should_close, + }, + self.last_span(), + )); + } + Ok(()) + } + + /// Ensure that there was no whitespace parser between the last token and the current one. + /// + /// This is used in places where whitespace is prohibited like inside a record id. + fn no_whitespace(&mut self) -> ParseResult<()> { + if let Some(span) = self.lexer.whitespace_span() { + Err(ParseError::new(ParseErrorKind::NoWhitespace, span)) + } else { + Ok(()) + } + } + + /// Recover the parser state to after a given span. + pub fn backup_after(&mut self, span: Span) { + self.token_buffer.clear(); + self.lexer.backup_after(span); + } + + /// Parse a full query. + /// + /// This is the primary entry point of the parser. + pub fn parse_query(&mut self) -> ParseResult { + // eat possible empty statements. + while self.eat(t!(";")) {} + + let mut statements = vec![self.parse_stmt()?]; + + while self.eat(t!(";")) { + // eat possible empty statements. + while self.eat(t!(";")) {} + + if let TokenKind::Eof = self.peek().kind { + break; + }; + + statements.push(self.parse_stmt()?); + } + + expected!(self, TokenKind::Eof); + Ok(sql::Query(sql::Statements(statements))) + } + + /// Parse a single statement. + pub fn parse_statement(&mut self) -> ParseResult { + self.parse_stmt() + } + + /// Parse a possibly partial statement. + /// + /// This will try to parse a statement if a full statement can be parsed from the buffer parser + /// is operating on. + pub fn parse_partial_statement(&mut self) -> PartialResult { + while self.eat(t!(";")) {} + + let res = self.parse_stmt(); + match res { + Err(ParseError { + kind: ParseErrorKind::UnexpectedEof { + .. + }, + .. + }) + | Err(ParseError { + kind: ParseErrorKind::InvalidToken(LexError::UnexpectedEof), + .. + }) => { + return PartialResult::Pending { + possible_value: res, + used: self.lexer.reader.offset(), + }; + } + Err(ParseError { + kind: ParseErrorKind::Unexpected { + .. + }, + at, + .. + }) => { + // Ensure the we are sure that the last token was fully parsed. + self.backup_after(at); + if self.peek().kind != TokenKind::Eof || self.lexer.whitespace_span().is_some() { + // if there is a next token or we ate whitespace after the eof we can be sure + // that the error is not the result of a token only being partially present. + return PartialResult::Ready { + value: res, + used: self.lexer.reader.offset(), + }; + } + } + _ => {} + }; + + let colon = self.next(); + if colon.kind != t!(";") { + return PartialResult::Pending { + possible_value: res, + used: self.lexer.reader.offset(), + }; + } + + // Might have peeked more tokens past the final ";" so backup to after the semi-colon. + self.backup_after(colon.span); + let used = self.lexer.reader.offset(); + + PartialResult::Ready { + value: res, + used, + } + } +} diff --git a/lib/src/syn/v2/parser/object.rs b/lib/src/syn/v2/parser/object.rs new file mode 100644 index 00000000..3e65ab4f --- /dev/null +++ b/lib/src/syn/v2/parser/object.rs @@ -0,0 +1,715 @@ +use std::collections::BTreeMap; + +use geo_types::{LineString, MultiLineString, MultiPoint, MultiPolygon, Point, Polygon}; + +use crate::{ + sql::{Block, Geometry, Object, Strand, Value}, + syn::v2::{ + parser::{mac::expected, ParseError, ParseErrorKind, ParseResult, Parser}, + token::{t, Span, TokenKind}, + }, +}; + +use super::mac::unexpected; + +impl Parser<'_> { + /// Parse an production which starts with an `{` + /// + /// Either a block statemnt, a object or geometry. + pub(super) fn parse_object_like(&mut self, start: Span) -> ParseResult { + if self.eat(t!("}")) { + // empty object, just return + return Ok(Value::Object(Object::default())); + } + + // Check first if it can be an object. + if self.peek_token_at(1).kind == t!(":") { + return self.parse_object_or_geometry(start); + } + + // not an object so instead parse as a block. + self.parse_block(start).map(Box::new).map(Value::Block) + } + + /// Parse a production starting with an `{` as either an object or a geometry. + /// + /// This function tries to match an object to an geometry like object and if it is unable + /// fallsback to parsing normal objects. + fn parse_object_or_geometry(&mut self, start: Span) -> ParseResult { + // empty object was already matched previously so next must be a key. + let key = self.parse_object_key()?; + expected!(self, t!(":")); + // the order of fields of a geometry does not matter so check if it is any of geometry like keys + // "type" : could be the type of the object. + // "collections": could be a geometry collection. + // "geometry": could be the values of geometry. + match key.as_str() { + "type" => { + // for it to be geometry the next value must be a strand like. + let token = self.peek(); + let strand = self.token_value::(token); + match strand.as_ref().map(|x| x.as_str()) { + Ok("Point") => { + // we matched a type correctly but the field containing the geometry value + // can still be wrong. + // + // we can unwrap strand since we just matched it to not be an err. + self.parse_geometry_after_type( + start, + key, + strand.unwrap(), + Self::to_point, + |x| Value::Geometry(Geometry::Point(x)), + ) + } + Ok("LineString") => self.parse_geometry_after_type( + start, + key, + strand.unwrap(), + Self::to_line, + |x| Value::Geometry(Geometry::Line(x)), + ), + Ok("Polygon") => self.parse_geometry_after_type( + start, + key, + strand.unwrap(), + Self::to_polygon, + |x| Value::Geometry(Geometry::Polygon(x)), + ), + Ok("MultiPoint") => self.parse_geometry_after_type( + start, + key, + strand.unwrap(), + Self::to_multipoint, + |x| Value::Geometry(Geometry::MultiPoint(x)), + ), + Ok("MultiLineString") => self.parse_geometry_after_type( + start, + key, + strand.unwrap(), + Self::to_multiline, + |x| Value::Geometry(Geometry::MultiLine(x)), + ), + Ok("MultiPolygon") => self.parse_geometry_after_type( + start, + key, + strand.unwrap(), + Self::to_multipolygon, + |x| Value::Geometry(Geometry::MultiPolygon(x)), + ), + Ok("GeometryCollection") => { + self.next(); + let strand = strand.unwrap(); + if !self.eat(t!(",")) { + // missing next field, not a geometry. + return self + .parse_object_from_map( + BTreeMap::from([(key, Value::Strand(strand))]), + start, + ) + .map(Value::Object); + } + let coord_key = self.parse_object_key()?; + expected!(self, t!(":")); + if coord_key != "geometries" { + // invalid field key, not a Geometry + return self + .parse_object_from_key( + coord_key, + BTreeMap::from([(key, Value::Strand(strand))]), + start, + ) + .map(Value::Object); + } + let value = self.parse_value_field()?; + let comma = self.eat(t!(",")); + if !self.eat(t!("}")) { + if !comma { + // No brace after no comma, missing brace. + return Err(ParseError::new( + ParseErrorKind::UnclosedDelimiter { + expected: t!("}"), + should_close: start, + }, + self.last_span(), + )); + } + + // A comma and then no brace. more then two fields, not a geometry. + return self + .parse_object_from_map( + BTreeMap::from([ + (key, Value::Strand(strand)), + (coord_key, value), + ]), + start, + ) + .map(Value::Object); + } + + if let Value::Array(x) = value { + // test first to avoid a cloning. + if x.iter().all(|x| matches!(x, Value::Geometry(_))) { + let geometries = + x.0.into_iter() + .map(|x| { + if let Value::Geometry(x) = x { + x + } else { + unreachable!() + } + }) + .collect(); + + return Ok(Value::Geometry(Geometry::Collection(geometries))); + } + + return Ok(Value::Object(Object(BTreeMap::from([ + (key, Value::Strand(strand)), + (coord_key, Value::Array(x)), + ])))); + } + + Ok(Value::Object(Object(BTreeMap::from([ + (key, Value::Strand(strand)), + (coord_key, value), + ])))) + } + Ok(_) => { + self.pop_peek(); + if !self.eat(t!(",")) { + self.expect_closing_delimiter(t!("}"), start)?; + Ok(Value::Object(Object(BTreeMap::from([( + key, + Value::Strand(strand.unwrap()), + )])))) + } else { + self.parse_object_from_map( + BTreeMap::from([(key, Value::Strand(strand.unwrap()))]), + start, + ) + .map(Value::Object) + } + } + _ => self.parse_object_from_key(key, BTreeMap::new(), start).map(Value::Object), + } + } + "coordinates" => { + // found coordinates field, next must be a coordinates value but we don't know + // which until we match type. + let value = self.parse_value_field()?; + if !self.eat(t!(",")) { + // no comma object must end early. + self.expect_closing_delimiter(t!("}"), start)?; + return Ok(Value::Object(Object(BTreeMap::from([(key, value)])))); + } + + if self.eat(t!("}")) { + // object ends early. + return Ok(Value::Object(Object(BTreeMap::from([(key, value)])))); + } + + let type_key = self.parse_object_key()?; + expected!(self, t!(":")); + if type_key != "type" { + // not the right field, return object. + return self + .parse_object_from_key(type_key, BTreeMap::from([(key, value)]), start) + .map(Value::Object); + } + let peek = self.peek(); + let strand = self.token_value::(peek); + // match the type and then match the coordinates field to a value of that type. + let (ate_comma, type_value) = match strand.as_ref().map(|x| x.as_str()) { + Ok("Point") => { + self.next(); + let ate_comma = self.eat(t!(",")); + if self.eat(t!("}")) { + if let Some(point) = Self::to_point(&value) { + return Ok(Value::Geometry(Geometry::Point(point))); + } + } + // At this point the value does not match, or there are more fields. + // since we matched `Ok("Point")` strand cannot be an error so this unwrap + // will never panic. + (ate_comma, Value::Strand(strand.unwrap())) + } + Ok("LineString") => { + self.next(); + let ate_comma = self.eat(t!(",")); + if self.eat(t!("}")) { + if let Some(point) = Self::to_line(&value) { + return Ok(Value::Geometry(Geometry::Line(point))); + } + } + (ate_comma, Value::Strand(strand.unwrap())) + } + Ok("Polygon") => { + self.next(); + let ate_comma = self.eat(t!(",")); + if self.eat(t!("}")) { + if let Some(point) = Self::to_polygon(&value) { + return Ok(Value::Geometry(Geometry::Polygon(point))); + } + } + (ate_comma, Value::Strand(strand.unwrap())) + } + Ok("MultiPoint") => { + self.next(); + let ate_comma = self.eat(t!(",")); + if self.eat(t!("}")) { + if let Some(point) = Self::to_multipolygon(&value) { + return Ok(Value::Geometry(Geometry::MultiPolygon(point))); + } + } + (ate_comma, Value::Strand(strand.unwrap())) + } + Ok("MultiLineString") => { + self.next(); + let ate_comma = self.eat(t!(",")); + if self.eat(t!("}")) { + if let Some(point) = Self::to_multiline(&value) { + return Ok(Value::Geometry(Geometry::MultiLine(point))); + } + } + (ate_comma, Value::Strand(strand.unwrap())) + } + Ok("MultiPolygon") => { + self.next(); + let ate_comma = self.eat(t!(",")); + if self.eat(t!("}")) { + if let Some(point) = Self::to_multipolygon(&value) { + return Ok(Value::Geometry(Geometry::MultiPolygon(point))); + } + } + (ate_comma, Value::Strand(strand.unwrap())) + } + _ => { + let value = self.parse_value_field()?; + (self.eat(t!(",")), value) + } + }; + // type field or coordinates value didn't match or the object continues after to + // fields. + + if !ate_comma { + self.expect_closing_delimiter(t!("}"), start)?; + return Ok(Value::Object(Object(BTreeMap::from([ + (key, value), + (type_key, type_value), + ])))); + } + self.parse_object_from_map( + BTreeMap::from([(key, value), (type_key, type_value)]), + start, + ) + .map(Value::Object) + } + "geometries" => { + let value = self.parse_value_field()?; + if !self.eat(t!(",")) { + self.expect_closing_delimiter(t!("}"), start)?; + return Ok(Value::Object(Object(BTreeMap::from([(key, value)])))); + } + let type_key = self.parse_object_key()?; + expected!(self, t!(":")); + if type_key != "type" { + return self + .parse_object_from_key(type_key, BTreeMap::from([(key, value)]), start) + .map(Value::Object); + } + let peek = self.peek(); + let strand = self.token_value::(peek); + let (ate_comma, type_value) = + if let Ok("GeometryCollection") = strand.as_ref().map(|x| x.as_str()) { + self.next(); + let ate_comma = self.eat(t!(",")); + if self.eat(t!("}")) { + if let Value::Array(ref x) = value { + if x.iter().all(|x| matches!(x, Value::Geometry(_))) { + let Value::Array(x) = value else { + unreachable!() + }; + let geometries = x + .into_iter() + .map(|x| { + if let Value::Geometry(x) = x { + x + } else { + unreachable!() + } + }) + .collect(); + return Ok(Value::Geometry(Geometry::Collection(geometries))); + } + } + } + (ate_comma, Value::Strand(strand.unwrap())) + } else { + let value = self.parse_value_field()?; + (self.eat(t!(",")), value) + }; + + if !ate_comma { + self.expect_closing_delimiter(t!("}"), start)?; + return Ok(Value::Object(Object(BTreeMap::from([ + (key, value), + (type_key, type_value), + ])))); + } + self.parse_object_from_map( + BTreeMap::from([(key, value), (type_key, type_value)]), + start, + ) + .map(Value::Object) + } + _ => self.parse_object_from_key(key, BTreeMap::new(), start).map(Value::Object), + } + } + + fn parse_geometry_after_type( + &mut self, + start: Span, + key: String, + strand: Strand, + capture: F, + map: Fm, + ) -> ParseResult + where + F: FnOnce(&Value) -> Option, + Fm: FnOnce(R) -> Value, + { + // eat the strand with the type name. + self.next(); + if !self.eat(t!(",")) { + // there is not second field. not a geometry + self.expect_closing_delimiter(t!("}"), start)?; + return Ok(Value::Object(Object(BTreeMap::from([(key, Value::Strand(strand))])))); + } + let coord_key = self.parse_object_key()?; + expected!(self, t!(":")); + if coord_key != "coordinates" { + // next field was not correct, fallback to parsing plain object. + return self + .parse_object_from_key( + coord_key, + BTreeMap::from([(key, Value::Strand(strand))]), + start, + ) + .map(Value::Object); + } + let value = self.parse_value_field()?; + let comma = self.eat(t!(",")); + if !self.eat(t!("}")) { + // the object didn't end, either an error or not a geometry. + if !comma { + return Err(ParseError::new( + ParseErrorKind::UnclosedDelimiter { + expected: t!("}"), + should_close: start, + }, + self.last_span(), + )); + } + + return self + .parse_object_from_map( + BTreeMap::from([(key, Value::Strand(strand)), (coord_key, value)]), + start, + ) + .map(Value::Object); + } + + let Some(v) = capture(&value) else { + // failed to match the geometry value, just a plain object. + return Ok(Value::Object(Object(BTreeMap::from([ + (key, Value::Strand(strand)), + (coord_key, value), + ])))); + }; + // successfully matched the value, it is a geometry. + Ok(map(v)) + } + + fn to_multipolygon(v: &Value) -> Option> { + let mut res = Vec::new(); + let Value::Array(v) = v else { + return None; + }; + for x in v.iter() { + res.push(Self::to_polygon(x)?); + } + Some(MultiPolygon::new(res)) + } + + fn to_multiline(v: &Value) -> Option> { + let mut res = Vec::new(); + let Value::Array(v) = v else { + return None; + }; + for x in v.iter() { + res.push(Self::to_line(x)?); + } + Some(MultiLineString::new(res)) + } + + fn to_multipoint(v: &Value) -> Option> { + let mut res = Vec::new(); + let Value::Array(v) = v else { + return None; + }; + for x in v.iter() { + res.push(Self::to_point(x)?); + } + Some(MultiPoint::new(res)) + } + + fn to_polygon(v: &Value) -> Option> { + let mut res = Vec::new(); + let Value::Array(v) = v else { + return None; + }; + if v.is_empty() { + return None; + } + let first = Self::to_line(&v[0])?; + for x in &v[1..] { + res.push(Self::to_line(x)?); + } + Some(Polygon::new(first, res)) + } + + fn to_line(v: &Value) -> Option> { + let mut res = Vec::new(); + let Value::Array(v) = v else { + return None; + }; + for x in v.iter() { + res.push(Self::to_point(x)?); + } + Some(LineString::from(res)) + } + + fn to_point(v: &Value) -> Option> { + let Value::Array(v) = v else { + return None; + }; + if v.len() != 2 { + return None; + } + // FIXME: This truncates decimals and large integers into a f64. + let Value::Number(ref a) = v.0[0] else { + return None; + }; + let Value::Number(ref b) = v.0[1] else { + return None; + }; + Some(Point::from((a.clone().try_into().ok()?, b.clone().try_into().ok()?))) + } + + fn parse_object_from_key( + &mut self, + key: String, + mut map: BTreeMap, + start: Span, + ) -> ParseResult { + let v = self.parse_value_field()?; + map.insert(key, v); + if !self.eat(t!(",")) { + self.expect_closing_delimiter(t!("}"), start)?; + return Ok(Object(map)); + } + self.parse_object_from_map(map, start) + } + + /// Parses an object. + /// + /// Expects the span of the starting `{` as an argument. + /// + /// # Parser state + /// Expects the first `{` to already have been eaten. + pub(super) fn parse_object(&mut self, start: Span) -> ParseResult { + self.parse_object_from_map(BTreeMap::new(), start) + } + + fn parse_object_from_map( + &mut self, + mut map: BTreeMap, + start: Span, + ) -> ParseResult { + loop { + if self.eat(t!("}")) { + return Ok(Object(map)); + } + + let (key, value) = self.parse_object_entry()?; + // TODO: Error on duplicate key? + map.insert(key, value); + + if !self.eat(t!(",")) { + self.expect_closing_delimiter(t!("}"), start)?; + return Ok(Object(map)); + } + } + } + + /// Parses a block of statements + /// + /// # Parser State + /// Expects the starting `{` to have already been eaten and its span to be handed to this + /// functions as the `start` parameter. + pub(super) fn parse_block(&mut self, start: Span) -> ParseResult { + let mut statements = Vec::new(); + loop { + while self.eat(t!(";")) {} + if self.eat(t!("}")) { + break; + } + + let stmt = self.parse_entry()?; + statements.push(stmt); + if !self.eat(t!(";")) { + self.expect_closing_delimiter(t!("}"), start)?; + break; + } + } + Ok(Block(statements)) + } + + /// Parse a single entry in the object, i.e. `field: value + 1` in the object `{ field: value + + /// 1 }` + fn parse_object_entry(&mut self) -> ParseResult<(String, Value)> { + let text = self.parse_object_key()?; + expected!(self, t!(":")); + let value = self.parse_value_field()?; + Ok((text, value)) + } + + /// Parses the key of an object, i.e. `field` in the object `{ field: 1 }`. + pub fn parse_object_key(&mut self) -> ParseResult { + let token = self.peek(); + match token.kind { + TokenKind::Keyword(_) + | TokenKind::Language(_) + | TokenKind::Algorithm(_) + | TokenKind::Distance(_) => { + self.pop_peek(); + let str = self.lexer.reader.span(token.span); + // Lexer should ensure that the token is valid utf-8 + let str = std::str::from_utf8(str).unwrap().to_owned(); + Ok(str) + } + TokenKind::Identifier | TokenKind::Strand => { + self.pop_peek(); + let str = self.lexer.string.take().unwrap(); + Ok(str) + } + TokenKind::Number(_) => { + self.pop_peek(); + Ok(self.lexer.string.take().unwrap()) + } + x => unexpected!(self, x, "an object key"), + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::syn::Parse; + + #[test] + fn block_value() { + let sql = "{ 80 }"; + let out = Value::parse(sql); + assert_eq!(sql, out.to_string()) + } + + #[test] + fn block_ifelse() { + let sql = "{ RETURN IF true THEN 50 ELSE 40 END; }"; + let out = Value::parse(sql); + assert_eq!(sql, out.to_string()) + } + + #[test] + fn block_multiple() { + let sql = r#"{ + + LET $person = (SELECT * FROM person WHERE first = $first AND last = $last AND birthday = $birthday); + + RETURN IF $person[0].id THEN + $person[0] + ELSE + (CREATE person SET first = $first, last = $last, birthday = $birthday) + END; + +}"#; + let out = Value::parse(sql); + assert_eq!(sql, format!("{:#}", out)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::syn::Parse; + + #[test] + fn simple() { + let sql = "(-0.118092, 51.509865)"; + let out = Value::parse(sql); + assert!(matches!(out, Value::Geometry(_))); + assert_eq!("(-0.118092, 51.509865)", format!("{}", out)); + } + + #[test] + fn point() { + let sql = r#"{ + type: 'Point', + coordinates: [-0.118092, 51.509865] + }"#; + let out = Value::parse(sql); + assert!(matches!(out, Value::Geometry(_))); + assert_eq!("(-0.118092, 51.509865)", format!("{}", out)); + } + + #[test] + fn polygon_exterior() { + let sql = r#"{ + type: 'Polygon', + coordinates: [ + [ + [-0.38314819, 51.37692386], [0.1785278, 51.37692386], + [0.1785278, 51.61460570], [-0.38314819, 51.61460570], + [-0.38314819, 51.37692386] + ] + ] + }"#; + let out = Value::parse(sql); + assert!(matches!(out, Value::Geometry(_))); + assert_eq!("{ type: 'Polygon', coordinates: [[[-0.38314819, 51.37692386], [0.1785278, 51.37692386], [0.1785278, 51.6146057], [-0.38314819, 51.6146057], [-0.38314819, 51.37692386]]] }", format!("{}", out)); + } + + #[test] + fn polygon_interior() { + let sql = r#"{ + type: 'Polygon', + coordinates: [ + [ + [-0.38314819, 51.37692386], [0.1785278, 51.37692386], + [0.1785278, 51.61460570], [-0.38314819, 51.61460570], + [-0.38314819, 51.37692386] + ], + [ + [-0.38314819, 51.37692386], [0.1785278, 51.37692386], + [0.1785278, 51.61460570], [-0.38314819, 51.61460570], + [-0.38314819, 51.37692386] + ] + ] + }"#; + let out = Value::parse(sql); + assert!(matches!(out, Value::Geometry(_))); + assert_eq!("{ type: 'Polygon', coordinates: [[[-0.38314819, 51.37692386], [0.1785278, 51.37692386], [0.1785278, 51.6146057], [-0.38314819, 51.6146057], [-0.38314819, 51.37692386]], [[[-0.38314819, 51.37692386], [0.1785278, 51.37692386], [0.1785278, 51.6146057], [-0.38314819, 51.6146057], [-0.38314819, 51.37692386]]]] }", format!("{}", out)); + } +} diff --git a/lib/src/syn/v2/parser/prime.rs b/lib/src/syn/v2/parser/prime.rs new file mode 100644 index 00000000..457689b8 --- /dev/null +++ b/lib/src/syn/v2/parser/prime.rs @@ -0,0 +1,584 @@ +use geo::Point; + +use super::{ParseResult, Parser}; +use crate::{ + sql::{ + Array, Dir, Function, Geometry, Ident, Idiom, Mock, Number, Part, Script, Strand, Subquery, + Table, Value, + }, + syn::v2::{ + lexer::Lexer, + parser::{ + mac::{expected, unexpected}, + ParseError, ParseErrorKind, + }, + token::{t, Span, TokenKind}, + }, +}; + +impl Parser<'_> { + /// Parse a what primary. + /// + /// What's are values which are more restricted in what expressions they can contain. + pub fn parse_what_primary(&mut self) -> ParseResult { + match self.peek_kind() { + TokenKind::Duration => { + let duration = self.next_token_value()?; + Ok(Value::Duration(duration)) + } + TokenKind::DateTime => { + let datetime = self.next_token_value()?; + Ok(Value::Datetime(datetime)) + } + t!("r\"") => { + self.pop_peek(); + Ok(Value::Thing(self.parse_record_string(true)?)) + } + t!("r'") => { + self.pop_peek(); + Ok(Value::Thing(self.parse_record_string(false)?)) + } + t!("$param") => { + let param = self.next_token_value()?; + Ok(Value::Param(param)) + } + t!("FUNCTION") => { + self.pop_peek(); + Ok(Value::Function(Box::new(self.parse_script()?))) + } + t!("IF") => { + let stmt = self.parse_if_stmt()?; + Ok(Value::Subquery(Box::new(Subquery::Ifelse(stmt)))) + } + t!("(") => { + let token = self.pop_peek(); + self.parse_inner_subquery(Some(token.span)).map(|x| Value::Subquery(Box::new(x))) + } + t!("<") => { + self.pop_peek(); + expected!(self, t!("FUTURE")); + expected!(self, t!(">")); + let start = expected!(self, t!("{")).span; + let block = self.parse_block(start)?; + Ok(Value::Future(Box::new(crate::sql::Future(block)))) + } + t!("|") => { + let start = self.pop_peek().span; + self.parse_mock(start).map(Value::Mock) + } + t!("/") => { + let token = self.pop_peek(); + let regex = self.lexer.relex_regex(token); + self.token_value(regex).map(Value::Regex) + } + t!("RETURN") + | t!("SELECT") + | t!("CREATE") + | t!("UPDATE") + | t!("DELETE") + | t!("RELATE") + | t!("DEFINE") + | t!("REMOVE") => self.parse_inner_subquery(None).map(|x| Value::Subquery(Box::new(x))), + t!("fn") => self.parse_custom_function().map(|x| Value::Function(Box::new(x))), + t!("ml") => self.parse_model().map(|x| Value::Model(Box::new(x))), + x => { + if !self.peek_can_be_ident() { + unexpected!(self, x, "a value") + } + + let token = self.next(); + match self.peek_kind() { + t!("::") | t!("(") => self.parse_builtin(token.span), + t!(":") => { + let str = self.token_value::(token)?.0; + self.parse_thing_or_range(str) + } + x => { + if x.has_data() { + // x had data and possibly overwrote the data from token, This is + // always an invalid production so just return error. + unexpected!(self, x, "a value"); + } else { + Ok(Value::Table(self.token_value(token)?)) + } + } + } + } + } + } + + /// Parse an expressions + pub fn parse_idiom_expression(&mut self) -> ParseResult { + let token = self.peek(); + let value = match token.kind { + t!("NONE") => { + self.pop_peek(); + return Ok(Value::None); + } + t!("NULL") => { + self.pop_peek(); + return Ok(Value::Null); + } + t!("true") => { + self.pop_peek(); + return Ok(Value::Bool(true)); + } + t!("false") => { + self.pop_peek(); + return Ok(Value::Bool(false)); + } + t!("<") => { + self.pop_peek(); + // Casting should already have been parsed. + expected!(self, t!("FUTURE")); + self.expect_closing_delimiter(t!(">"), token.span)?; + let next = expected!(self, t!("{")).span; + let block = self.parse_block(next)?; + return Ok(Value::Future(Box::new(crate::sql::Future(block)))); + } + TokenKind::Strand => { + self.pop_peek(); + if self.legacy_strands { + return self.parse_legacy_strand(); + } else { + let strand = self.token_value(token)?; + return Ok(Value::Strand(strand)); + } + } + TokenKind::Duration => { + self.pop_peek(); + let duration = self.token_value(token)?; + Value::Duration(duration) + } + TokenKind::Number(_) => { + self.pop_peek(); + let number = self.token_value(token)?; + Value::Number(number) + } + TokenKind::Uuid => { + self.pop_peek(); + let uuid = self.token_value(token)?; + Value::Uuid(uuid) + } + TokenKind::DateTime => { + self.pop_peek(); + let datetime = self.token_value(token)?; + Value::Datetime(datetime) + } + t!("r\"") => { + self.pop_peek(); + Value::Thing(self.parse_record_string(true)?) + } + t!("r'") => { + self.pop_peek(); + Value::Thing(self.parse_record_string(false)?) + } + t!("$param") => { + self.pop_peek(); + let param = self.token_value(token)?; + Value::Param(param) + } + t!("FUNCTION") => { + self.pop_peek(); + Value::Function(Box::new(self.parse_script()?)) + } + t!("->") => { + self.pop_peek(); + let graph = self.parse_graph(Dir::Out)?; + Value::Idiom(Idiom(vec![Part::Graph(graph)])) + } + t!("<->") => { + self.pop_peek(); + let graph = self.parse_graph(Dir::Both)?; + Value::Idiom(Idiom(vec![Part::Graph(graph)])) + } + t!("<-") => { + self.pop_peek(); + let graph = self.parse_graph(Dir::In)?; + Value::Idiom(Idiom(vec![Part::Graph(graph)])) + } + t!("[") => { + self.pop_peek(); + self.parse_array(token.span).map(Value::Array)? + } + t!("{") => { + self.pop_peek(); + self.parse_object_like(token.span)? + } + t!("|") => { + self.pop_peek(); + self.parse_mock(token.span).map(Value::Mock)? + } + t!("IF") => { + self.pop_peek(); + let stmt = self.parse_if_stmt()?; + Value::Subquery(Box::new(Subquery::Ifelse(stmt))) + } + t!("(") => { + self.pop_peek(); + self.parse_inner_subquery_or_coordinate(token.span)? + } + t!("/") => { + self.pop_peek(); + let regex = self.lexer.relex_regex(token); + self.token_value(regex).map(Value::Regex)? + } + t!("RETURN") + | t!("SELECT") + | t!("CREATE") + | t!("UPDATE") + | t!("DELETE") + | t!("RELATE") + | t!("DEFINE") + | t!("REMOVE") => self.parse_inner_subquery(None).map(|x| Value::Subquery(Box::new(x)))?, + t!("fn") => { + self.pop_peek(); + self.parse_custom_function().map(|x| Value::Function(Box::new(x)))? + } + t!("ml") => { + self.pop_peek(); + self.parse_model().map(|x| Value::Model(Box::new(x)))? + } + _ => { + self.pop_peek(); + match self.peek_kind() { + t!("::") | t!("(") => self.parse_builtin(token.span)?, + t!(":") => { + let str = self.token_value::(token)?.0; + self.parse_thing_or_range(str)? + } + x => { + if x.has_data() { + unexpected!(self, x, "a value"); + } else if self.table_as_field { + Value::Idiom(Idiom(vec![Part::Field(self.token_value(token)?)])) + } else { + Value::Table(self.token_value(token)?) + } + } + } + } + }; + + // Parse the rest of the idiom if it is being continued. + if Self::continues_idiom(self.peek_kind()) { + match value { + Value::None + | Value::Null + | Value::Bool(_) + | Value::Future(_) + | Value::Strand(_) => unreachable!(), + Value::Idiom(Idiom(x)) => self.parse_remaining_value_idiom(x), + Value::Table(Table(x)) => { + self.parse_remaining_value_idiom(vec![Part::Field(Ident(x))]) + } + x => self.parse_remaining_value_idiom(vec![Part::Start(x)]), + } + } else { + Ok(value) + } + } + + /// Parses an array production + /// + /// # Parser state + /// Expects the starting `[` to already be eaten and its span passed as an argument. + pub fn parse_array(&mut self, start: Span) -> ParseResult { + let mut values = Vec::new(); + loop { + if self.eat(t!("]")) { + break; + } + values.push(self.parse_value_field()?); + + if !self.eat(t!(",")) { + self.expect_closing_delimiter(t!("]"), start)?; + break; + } + } + + Ok(Array(values)) + } + + /// Parse a mock `|foo:1..3|` + /// + /// # Parser State + /// Expects the starting `|` already be eaten and its span passed as an argument. + pub fn parse_mock(&mut self, start: Span) -> ParseResult { + let name = self.next_token_value::()?.0; + expected!(self, t!(":")); + let from = self.next_token_value()?; + let to = self.eat(t!("..")).then(|| self.next_token_value()).transpose()?; + self.expect_closing_delimiter(t!("|"), start)?; + if let Some(to) = to { + Ok(Mock::Range(name, from, to)) + } else { + Ok(Mock::Count(name, from)) + } + } + + pub fn parse_full_subquery(&mut self) -> ParseResult { + let peek = self.peek(); + match peek.kind { + t!("(") => { + self.pop_peek(); + self.parse_inner_subquery(Some(peek.span)) + } + t!("IF") => { + self.pop_peek(); + let if_stmt = self.parse_if_stmt()?; + Ok(Subquery::Ifelse(if_stmt)) + } + _ => self.parse_inner_subquery(None), + } + } + + pub fn parse_inner_subquery_or_coordinate(&mut self, start: Span) -> ParseResult { + let next = self.peek(); + let res = match next.kind { + t!("RETURN") => { + self.pop_peek(); + let stmt = self.parse_return_stmt()?; + Subquery::Output(stmt) + } + t!("SELECT") => { + self.pop_peek(); + let stmt = self.parse_select_stmt()?; + Subquery::Select(stmt) + } + t!("CREATE") => { + self.pop_peek(); + let stmt = self.parse_create_stmt()?; + Subquery::Create(stmt) + } + t!("UPDATE") => { + self.pop_peek(); + let stmt = self.parse_update_stmt()?; + Subquery::Update(stmt) + } + t!("DELETE") => { + self.pop_peek(); + let stmt = self.parse_delete_stmt()?; + Subquery::Delete(stmt) + } + t!("RELATE") => { + self.pop_peek(); + let stmt = self.parse_relate_stmt()?; + Subquery::Relate(stmt) + } + t!("DEFINE") => { + self.pop_peek(); + let stmt = self.parse_define_stmt()?; + Subquery::Define(stmt) + } + t!("REMOVE") => { + self.pop_peek(); + let stmt = self.parse_remove_stmt()?; + Subquery::Remove(stmt) + } + _ => { + let value = self.parse_value_field()?; + Subquery::Value(value) + } + }; + match res { + Subquery::Value(Value::Number(x)) => { + if self.eat(t!(",")) { + // TODO: Fix number parsing. + let b = self.next_token_value::()?; + + let a: f64 = x + .try_into() + .map_err(|_| ParseError::new(ParseErrorKind::Todo, next.span))?; + let b: f64 = b + .try_into() + .map_err(|_| ParseError::new(ParseErrorKind::Todo, next.span))?; + + self.expect_closing_delimiter(t!(")"), start)?; + Ok(Value::Geometry(Geometry::Point(Point::from((a, b))))) + } else { + self.expect_closing_delimiter(t!(")"), start)?; + Ok(Value::Subquery(Box::new(Subquery::Value(Value::Number(x))))) + } + } + x => { + self.expect_closing_delimiter(t!(")"), start)?; + Ok(Value::Subquery(Box::new(x))) + } + } + } + + pub fn parse_inner_subquery(&mut self, start: Option) -> ParseResult { + let res = match self.peek().kind { + t!("RETURN") => { + self.pop_peek(); + let stmt = self.parse_return_stmt()?; + Subquery::Output(stmt) + } + t!("SELECT") => { + self.pop_peek(); + let stmt = self.parse_select_stmt()?; + Subquery::Select(stmt) + } + t!("CREATE") => { + self.pop_peek(); + let stmt = self.parse_create_stmt()?; + Subquery::Create(stmt) + } + t!("UPDATE") => { + self.pop_peek(); + let stmt = self.parse_update_stmt()?; + Subquery::Update(stmt) + } + t!("DELETE") => { + self.pop_peek(); + let stmt = self.parse_delete_stmt()?; + Subquery::Delete(stmt) + } + t!("RELATE") => { + self.pop_peek(); + let stmt = self.parse_relate_stmt()?; + Subquery::Relate(stmt) + } + t!("DEFINE") => { + self.pop_peek(); + let stmt = self.parse_define_stmt()?; + Subquery::Define(stmt) + } + t!("REMOVE") => { + self.pop_peek(); + let stmt = self.parse_remove_stmt()?; + Subquery::Remove(stmt) + } + _ => { + let value = self.parse_value_field()?; + Subquery::Value(value) + } + }; + if let Some(start) = start { + self.expect_closing_delimiter(t!(")"), start)?; + } + Ok(res) + } + + /// Parses a strand with legacy rules, parsing to a record id, datetime or uuid if the string + /// matches. + pub fn parse_legacy_strand(&mut self) -> ParseResult { + let text = self.lexer.string.take().unwrap(); + if let Ok(x) = Parser::new(text.as_bytes()).parse_thing() { + return Ok(Value::Thing(x)); + } + if let Ok(x) = Lexer::new(text.as_bytes()).lex_only_datetime() { + return Ok(Value::Datetime(x)); + } + if let Ok(x) = Lexer::new(text.as_bytes()).lex_only_uuid() { + return Ok(Value::Uuid(x)); + } + Ok(Value::Strand(Strand(text))) + } + + fn parse_script(&mut self) -> ParseResult { + let start = expected!(self, t!("(")).span; + let mut args = Vec::new(); + loop { + if self.eat(t!(")")) { + break; + } + + args.push(self.parse_value_field()?); + + if !self.eat(t!(",")) { + self.expect_closing_delimiter(t!(")"), start)?; + break; + } + } + expected!(self, t!("{")); + let body = self + .lexer + .lex_js_function_body() + .map_err(|(e, span)| ParseError::new(ParseErrorKind::InvalidToken(e), span))?; + Ok(Function::Script(Script(body), args)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::syn::Parse; + + #[test] + fn subquery_expression_statement() { + let sql = "(1 + 2 + 3)"; + let out = Value::parse(sql); + assert_eq!("(1 + 2 + 3)", format!("{}", out)) + } + + #[test] + fn subquery_ifelse_statement() { + let sql = "IF true THEN false END"; + let out = Value::parse(sql); + assert_eq!("IF true THEN false END", format!("{}", out)) + } + + #[test] + fn subquery_select_statement() { + let sql = "(SELECT * FROM test)"; + let out = Value::parse(sql); + assert_eq!("(SELECT * FROM test)", format!("{}", out)) + } + + #[test] + fn subquery_define_statement() { + let sql = "(DEFINE EVENT foo ON bar WHEN $event = 'CREATE' THEN (CREATE x SET y = 1))"; + let out = Value::parse(sql); + assert_eq!( + "(DEFINE EVENT foo ON bar WHEN $event = 'CREATE' THEN (CREATE x SET y = 1))", + format!("{}", out) + ) + } + + #[test] + fn subquery_remove_statement() { + let sql = "(REMOVE EVENT foo_event ON foo)"; + let out = Value::parse(sql); + assert_eq!("(REMOVE EVENT foo_event ON foo)", format!("{}", out)) + } + + #[test] + fn mock_count() { + let sql = "|test:1000|"; + let out = Value::parse(sql); + assert_eq!("|test:1000|", format!("{}", out)); + assert_eq!(out, Value::from(Mock::Count(String::from("test"), 1000))); + } + + #[test] + fn mock_range() { + let sql = "|test:1..1000|"; + let out = Value::parse(sql); + assert_eq!("|test:1..1000|", format!("{}", out)); + assert_eq!(out, Value::from(Mock::Range(String::from("test"), 1, 1000))); + } + + #[test] + fn regex_simple() { + let sql = "/test/"; + let out = Value::parse(sql); + assert_eq!("/test/", format!("{}", out)); + let Value::Regex(regex) = out else { + panic!() + }; + assert_eq!(regex, "test".parse().unwrap()); + } + + #[test] + fn regex_complex() { + let sql = r"/(?i)test\/[a-z]+\/\s\d\w{1}.*/"; + let out = Value::parse(sql); + assert_eq!(r"/(?i)test/[a-z]+/\s\d\w{1}.*/", format!("{}", out)); + let Value::Regex(regex) = out else { + panic!() + }; + assert_eq!(regex, r"(?i)test/[a-z]+/\s\d\w{1}.*".parse().unwrap()); + } +} diff --git a/lib/src/syn/v2/parser/stmt/create.rs b/lib/src/syn/v2/parser/stmt/create.rs new file mode 100644 index 00000000..08a48dbe --- /dev/null +++ b/lib/src/syn/v2/parser/stmt/create.rs @@ -0,0 +1,27 @@ +use crate::{ + sql::{statements::CreateStatement, Values}, + syn::v2::{ + parser::{ParseResult, Parser}, + token::t, + }, +}; + +impl Parser<'_> { + pub fn parse_create_stmt(&mut self) -> ParseResult { + let only = self.eat(t!("ONLY")); + let what = Values(self.parse_what_list()?); + let data = self.try_parse_data()?; + let output = self.try_parse_output()?; + let timeout = self.try_parse_timeout()?; + let parallel = self.eat(t!("PARALLEL")); + + Ok(CreateStatement { + only, + what, + data, + output, + timeout, + parallel, + }) + } +} diff --git a/lib/src/syn/v2/parser/stmt/define.rs b/lib/src/syn/v2/parser/stmt/define.rs new file mode 100644 index 00000000..4d05c281 --- /dev/null +++ b/lib/src/syn/v2/parser/stmt/define.rs @@ -0,0 +1,655 @@ +use crate::{ + sql::{ + filter::Filter, + index::{Distance, VectorType}, + statements::{ + DefineAnalyzerStatement, DefineDatabaseStatement, DefineEventStatement, + DefineFieldStatement, DefineFunctionStatement, DefineIndexStatement, + DefineNamespaceStatement, DefineParamStatement, DefineScopeStatement, DefineStatement, + DefineTableStatement, DefineTokenStatement, DefineUserStatement, + }, + tokenizer::Tokenizer, + Ident, Idioms, Index, Param, Permissions, Scoring, Strand, Values, + }, + syn::v2::{ + parser::{ + mac::{expected, unexpected}, + ParseResult, Parser, + }, + token::{t, TokenKind}, + }, +}; + +impl Parser<'_> { + pub fn parse_define_stmt(&mut self) -> ParseResult { + match self.next().kind { + t!("NAMESPACE") => self.parse_define_namespace().map(DefineStatement::Namespace), + t!("DATABASE") => self.parse_define_database().map(DefineStatement::Database), + t!("FUNCTION") => self.parse_define_function().map(DefineStatement::Function), + t!("USER") => self.parse_define_user().map(DefineStatement::User), + t!("TOKEN") => self.parse_define_token().map(DefineStatement::Token), + t!("SCOPE") => self.parse_define_scope().map(DefineStatement::Scope), + t!("PARAM") => self.parse_define_param().map(DefineStatement::Param), + t!("TABLE") => self.parse_define_table().map(DefineStatement::Table), + t!("EVENT") => self.parse_define_event().map(DefineStatement::Event), + t!("FIELD") => self.parse_define_field().map(DefineStatement::Field), + t!("INDEX") => self.parse_define_index().map(DefineStatement::Index), + t!("ANALYZER") => self.parse_define_analyzer().map(DefineStatement::Analyzer), + x => unexpected!(self, x, "a define statement keyword"), + } + } + + pub fn parse_define_namespace(&mut self) -> ParseResult { + let name = self.next_token_value()?; + let comment = self.eat(t!("COMMENT")).then(|| self.next_token_value()).transpose()?; + Ok(DefineNamespaceStatement { + id: None, + name, + comment, + }) + } + + pub fn parse_define_database(&mut self) -> ParseResult { + let name = self.next_token_value()?; + let mut res = DefineDatabaseStatement { + id: None, + name, + comment: None, + changefeed: None, + }; + loop { + match self.peek_kind() { + t!("COMMENT") => { + self.pop_peek(); + res.comment = Some(self.next_token_value()?); + } + t!("CHANGEFEED") => { + self.pop_peek(); + res.changefeed = Some(self.parse_changefeed()?); + } + _ => break, + } + } + + Ok(res) + } + + pub fn parse_define_function(&mut self) -> ParseResult { + let name = self.parse_custom_function_name()?; + let token = expected!(self, t!("(")).span; + let mut args = Vec::new(); + loop { + if self.eat(t!(")")) { + break; + } + + let param = self.next_token_value::()?.0; + expected!(self, t!(":")); + let kind = self.parse_inner_kind()?; + + args.push((param, kind)); + + if !self.eat(t!(",")) { + self.expect_closing_delimiter(t!(")"), token)?; + break; + } + } + + let next = expected!(self, t!("{")).span; + let block = self.parse_block(next)?; + + let mut res = DefineFunctionStatement { + name, + args, + block, + ..Default::default() + }; + + loop { + match self.peek_kind() { + t!("COMMENT") => { + self.pop_peek(); + res.comment = Some(self.next_token_value()?); + } + t!("PERMISSIONS") => { + self.pop_peek(); + res.permissions = self.parse_permission_value()?; + } + _ => break, + } + } + + Ok(res) + } + + pub fn parse_define_user(&mut self) -> ParseResult { + let name = self.next_token_value()?; + expected!(self, t!("ON")); + let base = self.parse_base(false)?; + + let mut res = DefineUserStatement::from_parsed_values( + name, + base, + vec!["Viewer".into()], // New users get the viewer role by default + ); + + loop { + match self.peek_kind() { + t!("COMMENT") => { + self.pop_peek(); + res.comment = Some(self.next_token_value()?); + } + t!("PASSWORD") => { + self.pop_peek(); + res.set_password(&self.next_token_value::()?.0); + } + t!("PASSHASH") => { + self.pop_peek(); + res.set_passhash(self.next_token_value::()?.0); + } + t!("ROLES") => { + self.pop_peek(); + res.roles = vec![self.next_token_value()?]; + while self.eat(t!(",")) { + res.roles.push(self.next_token_value()?); + } + } + _ => break, + } + } + + Ok(res) + } + + pub fn parse_define_token(&mut self) -> ParseResult { + let name = self.next_token_value()?; + expected!(self, t!("ON")); + let base = self.parse_base(true)?; + + let mut res = DefineTokenStatement { + name, + base, + ..Default::default() + }; + + loop { + match self.peek_kind() { + t!("COMMENT") => { + self.pop_peek(); + res.comment = Some(self.next_token_value()?); + } + t!("VALUE") => { + self.pop_peek(); + res.code = self.next_token_value::()?.0; + } + t!("TYPE") => { + self.pop_peek(); + match self.next().kind { + TokenKind::Algorithm(x) => { + res.kind = x; + } + x => unexpected!(self, x, "a token algorithm"), + } + } + _ => break, + } + } + + Ok(res) + } + + pub fn parse_define_scope(&mut self) -> ParseResult { + let name = self.next_token_value()?; + let mut res = DefineScopeStatement { + name, + code: DefineScopeStatement::random_code(), + ..Default::default() + }; + + loop { + match self.peek_kind() { + t!("COMMENT") => { + self.pop_peek(); + res.comment = Some(self.next_token_value()?); + } + t!("SESSION") => { + self.pop_peek(); + res.session = Some(self.next_token_value()?); + } + t!("SIGNUP") => { + self.pop_peek(); + res.signup = Some(self.parse_value()?); + } + t!("SIGNIN") => { + self.pop_peek(); + res.signin = Some(self.parse_value()?); + } + _ => break, + } + } + + Ok(res) + } + + pub fn parse_define_param(&mut self) -> ParseResult { + let name = self.next_token_value::()?.0; + + let mut res = DefineParamStatement { + name, + ..Default::default() + }; + + loop { + match self.peek_kind() { + t!("VALUE") => { + self.pop_peek(); + res.value = self.parse_value()?; + } + t!("COMMENT") => { + self.pop_peek(); + res.comment = Some(self.next_token_value()?); + } + t!("PERMISSIONS") => { + self.pop_peek(); + res.permissions = self.parse_permission_value()?; + } + _ => break, + } + } + Ok(res) + } + + pub fn parse_define_table(&mut self) -> ParseResult { + let name = self.next_token_value()?; + let mut res = DefineTableStatement { + name, + permissions: Permissions::none(), + ..Default::default() + }; + + loop { + match self.peek_kind() { + t!("COMMENT") => { + self.pop_peek(); + res.comment = Some(self.next_token_value()?); + } + t!("DROP") => { + self.pop_peek(); + res.drop = true; + } + t!("SCHEMALESS") => { + self.pop_peek(); + res.full = false; + } + t!("SCHEMAFULL") => { + self.pop_peek(); + res.full = true; + } + t!("PERMISSIONS") => { + self.pop_peek(); + res.permissions = self.parse_permission(false)?; + } + t!("CHANGEFEED") => { + self.pop_peek(); + res.changefeed = Some(self.parse_changefeed()?); + } + t!("AS") => { + self.pop_peek(); + match self.peek_kind() { + t!("(") => { + let open = self.pop_peek().span; + res.view = Some(self.parse_view()?); + self.expect_closing_delimiter(t!(")"), open)?; + } + t!("SELECT") => { + res.view = Some(self.parse_view()?); + } + x => unexpected!(self, x, "`SELECT`"), + } + } + _ => break, + } + } + + Ok(res) + } + + pub fn parse_define_event(&mut self) -> ParseResult { + let name = self.next_token_value()?; + expected!(self, t!("ON")); + self.eat(t!("TABLE")); + let what = self.next_token_value()?; + + let mut res = DefineEventStatement { + name, + what, + ..Default::default() + }; + + loop { + match self.peek_kind() { + t!("WHEN") => { + self.pop_peek(); + res.when = self.parse_value()?; + } + t!("THEN") => { + self.pop_peek(); + res.then = Values(vec![self.parse_value()?]); + while self.eat(t!(",")) { + res.then.0.push(self.parse_value()?) + } + } + t!("COMMENT") => { + self.pop_peek(); + res.comment = Some(self.next_token_value()?); + } + _ => break, + } + } + Ok(res) + } + + pub fn parse_define_field(&mut self) -> ParseResult { + let name = self.parse_local_idiom()?; + expected!(self, t!("ON")); + self.eat(t!("TABLE")); + let what = self.next_token_value()?; + + let mut res = DefineFieldStatement { + name, + what, + ..Default::default() + }; + + loop { + match self.peek_kind() { + // FLEX, FLEXI and FLEXIBLE are all the same token type. + t!("FLEXIBLE") => { + self.pop_peek(); + res.flex = true; + } + t!("TYPE") => { + self.pop_peek(); + res.kind = Some(self.parse_inner_kind()?); + } + t!("VALUE") => { + self.pop_peek(); + res.value = Some(self.parse_value()?); + } + t!("ASSERT") => { + self.pop_peek(); + res.assert = Some(self.parse_value()?); + } + t!("DEFAULT") => { + self.pop_peek(); + res.default = Some(self.parse_value()?); + } + t!("PERMISSIONS") => { + self.pop_peek(); + res.permissions = self.parse_permission(true)?; + } + t!("COMMENT") => { + self.pop_peek(); + res.comment = Some(self.next_token_value()?); + } + _ => break, + } + } + + Ok(res) + } + + pub fn parse_define_index(&mut self) -> ParseResult { + let name = self.next_token_value()?; + expected!(self, t!("ON")); + self.eat(t!("TABLE")); + let what = self.next_token_value()?; + + let mut res = DefineIndexStatement { + name, + what, + ..Default::default() + }; + + loop { + match self.peek_kind() { + // COLUMS and FIELDS are the same tokenkind + t!("FIELDS") => { + self.pop_peek(); + res.cols = Idioms(vec![self.parse_local_idiom()?]); + while self.eat(t!(",")) { + res.cols.0.push(self.parse_local_idiom()?); + } + } + t!("UNIQUE") => { + self.pop_peek(); + res.index = Index::Uniq; + } + t!("SEARCH") => { + self.pop_peek(); + let analyzer = + self.eat(t!("ANALYZER")).then(|| self.next_token_value()).transpose()?; + let scoring = match self.next().kind { + t!("VS") => Scoring::Vs, + t!("BM25") => { + if self.eat(t!("(")) { + let open = self.last_span(); + let k1 = self.next_token_value()?; + expected!(self, t!(",")); + let b = self.next_token_value()?; + self.expect_closing_delimiter(t!(")"), open)?; + Scoring::Bm { + k1, + b, + } + } else { + Scoring::bm25() + } + } + x => unexpected!(self, x, "`VS` or `BM25`"), + }; + + // TODO: Propose change in how order syntax works. + let doc_ids_order = self + .eat(t!("DOC_IDS_ORDER")) + .then(|| self.next_token_value()) + .transpose()? + .unwrap_or(100); + let doc_lengths_order = self + .eat(t!("DOC_LENGTHS_ORDER")) + .then(|| self.next_token_value()) + .transpose()? + .unwrap_or(100); + let postings_order = self + .eat(t!("POSTINGS_ORDER")) + .then(|| self.next_token_value()) + .transpose()? + .unwrap_or(100); + let terms_order = self + .eat(t!("TERMS_ORDER")) + .then(|| self.next_token_value()) + .transpose()? + .unwrap_or(100); + let doc_ids_cache = self + .eat(t!("DOC_IDS_CACHE")) + .then(|| self.next_token_value()) + .transpose()? + .unwrap_or(100); + let doc_lengths_cache = self + .eat(t!("DOC_LENGTHS_CACHE")) + .then(|| self.next_token_value()) + .transpose()? + .unwrap_or(100); + let postings_cache = self + .eat(t!("POSTINGS_CACHE")) + .then(|| self.next_token_value()) + .transpose()? + .unwrap_or(100); + let terms_cache = self + .eat(t!("TERMS_CACHE")) + .then(|| self.next_token_value()) + .transpose()? + .unwrap_or(100); + + let hl = self.eat(t!("HIGHLIGHTS")); + + res.index = Index::Search(crate::sql::index::SearchParams { + az: analyzer.unwrap_or_else(|| Ident::from("like")), + sc: scoring, + hl, + doc_ids_order, + doc_lengths_order, + postings_order, + terms_order, + doc_ids_cache, + doc_lengths_cache, + postings_cache, + terms_cache, + }); + } + t!("MTREE") => { + self.pop_peek(); + expected!(self, t!("DIMENSION")); + let dimension = self.next_token_value()?; + let distance = self.try_parse_distance()?.unwrap_or(Distance::Euclidean); + let capacity = self + .eat(t!("CAPACITY")) + .then(|| self.next_token_value()) + .transpose()? + .unwrap_or(40); + + let doc_ids_order = self + .eat(t!("DOC_IDS_ORDER")) + .then(|| self.next_token_value()) + .transpose()? + .unwrap_or(100); + + let doc_ids_cache = self + .eat(t!("DOC_IDS_CACHE")) + .then(|| self.next_token_value()) + .transpose()? + .unwrap_or(100); + + let mtree_cache = self + .eat(t!("MTREE_CACHE")) + .then(|| self.next_token_value()) + .transpose()? + .unwrap_or(100); + + res.index = Index::MTree(crate::sql::index::MTreeParams { + dimension, + distance, + capacity, + doc_ids_order, + doc_ids_cache, + mtree_cache, + vector_type: VectorType::F64, + }) + } + t!("COMMENT") => { + self.pop_peek(); + res.comment = Some(self.next_token_value()?); + } + _ => break, + } + } + + Ok(res) + } + + pub fn parse_define_analyzer(&mut self) -> ParseResult { + let name = self.next_token_value()?; + let mut res = DefineAnalyzerStatement { + name, + function: None, + tokenizers: None, + filters: None, + comment: None, + }; + loop { + match self.peek_kind() { + t!("FILTERS") => { + self.pop_peek(); + let mut filters = Vec::new(); + loop { + match self.next().kind { + t!("ASCII") => { + filters.push(Filter::Ascii); + } + t!("LOWERCASE") => { + filters.push(Filter::Lowercase); + } + t!("UPPERCASE") => { + filters.push(Filter::Uppercase); + } + t!("EDGENGRAM") => { + let open_span = expected!(self, t!("(")).span; + let a = self.next_token_value()?; + expected!(self, t!(",")); + let b = self.next_token_value()?; + self.expect_closing_delimiter(t!(")"), open_span)?; + filters.push(Filter::EdgeNgram(a, b)); + } + t!("NGRAM") => { + let open_span = expected!(self, t!("(")).span; + let a = self.next_token_value()?; + expected!(self, t!(",")); + let b = self.next_token_value()?; + self.expect_closing_delimiter(t!(")"), open_span)?; + filters.push(Filter::Ngram(a, b)); + } + t!("SNOWBALL") => { + let open_span = expected!(self, t!("(")).span; + let language = self.next_token_value()?; + self.expect_closing_delimiter(t!(")"), open_span)?; + filters.push(Filter::Snowball(language)) + } + x => unexpected!(self, x, "a filter"), + } + if !self.eat(t!(",")) { + break; + } + } + res.filters = Some(filters); + } + t!("TOKENIZERS") => { + self.pop_peek(); + let mut tokenizers = Vec::new(); + + loop { + let tokenizer = match self.next().kind { + t!("BLANK") => Tokenizer::Blank, + t!("CAMEL") => Tokenizer::Camel, + t!("CLASS") => Tokenizer::Class, + t!("PUNCT") => Tokenizer::Punct, + x => unexpected!(self, x, "a tokenizer"), + }; + tokenizers.push(tokenizer); + if !self.eat(t!(",")) { + break; + } + } + res.tokenizers = Some(tokenizers); + } + t!("FUNCTION") => { + self.pop_peek(); + expected!(self, t!("fn")); + expected!(self, t!("::")); + let mut ident = self.next_token_value::()?; + while self.eat(t!("::")) { + let value = self.next_token_value::()?; + ident.0.push_str("::"); + ident.0.push_str(&value); + } + res.function = Some(ident); + } + t!("COMMENT") => { + self.pop_peek(); + res.comment = Some(self.next_token_value()?); + } + _ => break, + } + } + Ok(res) + } +} diff --git a/lib/src/syn/v2/parser/stmt/delete.rs b/lib/src/syn/v2/parser/stmt/delete.rs new file mode 100644 index 00000000..4cbd5185 --- /dev/null +++ b/lib/src/syn/v2/parser/stmt/delete.rs @@ -0,0 +1,31 @@ +use crate::{ + sql::{statements::DeleteStatement, Values}, + syn::v2::{ + parser::{ParseResult, Parser}, + token::t, + }, +}; + +impl Parser<'_> { + pub fn parse_delete_stmt(&mut self) -> ParseResult { + self.eat(t!("FROM")); + let only = self.eat(t!("ONLY")); + let what = Values(self.parse_what_list()?); + let cond = self.try_parse_condition()?; + let output = self.try_parse_output()?; + let timeout = self.try_parse_timeout()?; + let parallel = self.eat(t!("PARALLEL")); + + Ok(DeleteStatement { + only, + what, + cond, + output, + timeout, + parallel, + }) + } +} + +#[cfg(test)] +mod test {} diff --git a/lib/src/syn/v2/parser/stmt/if.rs b/lib/src/syn/v2/parser/stmt/if.rs new file mode 100644 index 00000000..aa623628 --- /dev/null +++ b/lib/src/syn/v2/parser/stmt/if.rs @@ -0,0 +1,85 @@ +use crate::{ + sql::statements::IfelseStatement, + syn::v2::{ + parser::{ + mac::{expected, unexpected}, + ParseResult, Parser, + }, + token::t, + }, +}; + +impl Parser<'_> { + pub fn parse_if_stmt(&mut self) -> ParseResult { + let condition = self.parse_value_field()?; + + let mut res = IfelseStatement { + exprs: Vec::new(), + close: None, + }; + + let next = self.next(); + match next.kind { + t!("THEN") => { + let body = self.parse_value_field()?; + self.eat(t!(";")); + res.exprs.push((condition, body)); + self.parse_worded_tail(&mut res)?; + } + t!("{") => { + let body = self.parse_block(next.span)?; + res.exprs.push((condition, body.into())); + self.parse_bracketed_tail(&mut res)?; + } + x => unexpected!(self, x, "THEN or '{'"), + } + + Ok(res) + } + + fn parse_worded_tail(&mut self, res: &mut IfelseStatement) -> ParseResult<()> { + loop { + match self.next().kind { + t!("END") => return Ok(()), + t!("ELSE") => { + if self.eat(t!("IF")) { + let condition = self.parse_value_field()?; + expected!(self, t!("THEN")); + let body = self.parse_value_field()?; + self.eat(t!(";")); + res.exprs.push((condition, body)); + } else { + let value = self.parse_value_field()?; + self.eat(t!(";")); + expected!(self, t!("END")); + res.close = Some(value); + return Ok(()); + } + } + x => unexpected!(self, x, "if to end"), + } + } + } + + fn parse_bracketed_tail(&mut self, res: &mut IfelseStatement) -> ParseResult<()> { + loop { + match self.peek_kind() { + t!("ELSE") => { + self.pop_peek(); + if self.eat(t!("IF")) { + let condition = self.parse_value_field()?; + let span = expected!(self, t!("{")).span; + let body = self.parse_block(span)?; + res.exprs.push((condition, body.into())); + } else { + let span = expected!(self, t!("{")).span; + let value = self.parse_block(span)?; + res.close = Some(value.into()); + return Ok(()); + } + } + _ => return Ok(()), + } + } + } +} diff --git a/lib/src/syn/v2/parser/stmt/insert.rs b/lib/src/syn/v2/parser/stmt/insert.rs new file mode 100644 index 00000000..223f6c1a --- /dev/null +++ b/lib/src/syn/v2/parser/stmt/insert.rs @@ -0,0 +1,97 @@ +use crate::{ + sql::{statements::InsertStatement, Data, Value}, + syn::v2::{ + parser::{mac::expected, ParseResult, Parser}, + token::t, + }, +}; + +impl Parser<'_> { + pub(crate) fn parse_insert_stmt(&mut self) -> ParseResult { + let ignore = self.eat(t!("IGNORE")); + expected!(self, t!("INTO")); + let next = self.next(); + // TODO: Explain that more complicated expressions are not allowed here. + let into = match next.kind { + t!("$param") => { + let param = self.token_value(next)?; + Value::Param(param) + } + _ => { + let table = self.token_value(next)?; + Value::Table(table) + } + }; + + let data = match self.peek_kind() { + t!("(") => { + let start = self.pop_peek().span; + let fields = self.parse_idiom_list()?; + self.expect_closing_delimiter(t!(")"), start)?; + expected!(self, t!("VALUES")); + + let start = expected!(self, t!("(")).span; + let mut values = vec![self.parse_value()?]; + while self.eat(t!(",")) { + values.push(self.parse_value()?); + } + self.expect_closing_delimiter(t!(")"), start)?; + + let mut values = vec![values]; + while self.eat(t!(",")) { + let start = expected!(self, t!("(")).span; + let mut inner_values = vec![self.parse_value()?]; + while self.eat(t!(",")) { + inner_values.push(self.parse_value()?); + } + values.push(inner_values); + self.expect_closing_delimiter(t!(")"), start)?; + } + + Data::ValuesExpression( + values + .into_iter() + .map(|row| fields.iter().cloned().zip(row).collect()) + .collect(), + ) + } + _ => { + let value = self.parse_value()?; + Data::SingleExpression(value) + } + }; + + let update = self.eat(t!("ON")).then(|| self.parse_insert_update()).transpose()?; + let output = self.try_parse_output()?; + let timeout = self.try_parse_timeout()?; + let parallel = self.eat(t!("PARALLEL")); + Ok(InsertStatement { + into, + data, + ignore, + update, + output, + timeout, + parallel, + }) + } + + fn parse_insert_update(&mut self) -> ParseResult { + expected!(self, t!("DUPLICATE")); + expected!(self, t!("KEY")); + expected!(self, t!("UPDATE")); + let l = self.parse_plain_idiom()?; + let o = self.parse_assigner()?; + let r = self.parse_value()?; + let mut data = vec![(l, o, r)]; + + while self.eat(t!(",")) { + let l = self.parse_plain_idiom()?; + let o = self.parse_assigner()?; + let r = self.parse_value()?; + data.push((l, o, r)) + } + + Ok(Data::UpdateExpression(data)) + } +} diff --git a/lib/src/syn/v2/parser/stmt/mod.rs b/lib/src/syn/v2/parser/stmt/mod.rs new file mode 100644 index 00000000..3c759c51 --- /dev/null +++ b/lib/src/syn/v2/parser/stmt/mod.rs @@ -0,0 +1,551 @@ +use crate::sql::block::Entry; +use crate::sql::statements::show::{ShowSince, ShowStatement}; +use crate::sql::statements::sleep::SleepStatement; +use crate::sql::statements::{ + KillStatement, LiveStatement, OptionStatement, SetStatement, ThrowStatement, +}; +use crate::sql::{Fields, Ident, Param}; +use crate::syn::v2::token::{t, TokenKind}; +use crate::{ + sql::{ + statements::{ + analyze::AnalyzeStatement, BeginStatement, BreakStatement, CancelStatement, + CommitStatement, ContinueStatement, ForeachStatement, InfoStatement, OutputStatement, + UseStatement, + }, + Expression, Operator, Statement, Statements, Value, + }, + syn::v2::parser::mac::unexpected, +}; + +use super::{mac::expected, ParseResult, Parser}; + +mod create; +mod define; +mod delete; +mod r#if; +mod insert; +mod parts; +mod relate; +mod remove; +mod select; +mod update; + +impl Parser<'_> { + pub fn parse_stmt_list(&mut self) -> ParseResult { + let mut res = Vec::new(); + loop { + match self.peek_kind() { + // consume any possible empty statements. + t!(";") => continue, + t!("eof") => break, + _ => { + let stmt = self.parse_stmt()?; + res.push(stmt); + if !self.eat(t!(";")) { + expected!(self, t!("eof")); + break; + } + } + } + } + Ok(Statements(res)) + } + + pub(super) fn parse_stmt(&mut self) -> ParseResult { + let token = self.peek(); + match token.kind { + t!("ANALYZE") => { + self.pop_peek(); + self.parse_analyze().map(Statement::Analyze) + } + t!("BEGIN") => { + self.pop_peek(); + self.parse_begin().map(Statement::Begin) + } + t!("BREAK") => { + self.pop_peek(); + Ok(Statement::Break(BreakStatement)) + } + t!("CANCEL") => { + self.pop_peek(); + self.parse_cancel().map(Statement::Cancel) + } + t!("COMMIT") => { + self.pop_peek(); + self.parse_commit().map(Statement::Commit) + } + t!("CONTINUE") => { + self.pop_peek(); + Ok(Statement::Continue(ContinueStatement)) + } + t!("CREATE") => { + self.pop_peek(); + self.parse_create_stmt().map(Statement::Create) + } + t!("DEFINE") => { + self.pop_peek(); + self.parse_define_stmt().map(Statement::Define) + } + t!("DELETE") => { + self.pop_peek(); + self.parse_delete_stmt().map(Statement::Delete) + } + t!("FOR") => { + self.pop_peek(); + self.parse_for_stmt().map(Statement::Foreach) + } + t!("IF") => { + self.pop_peek(); + self.parse_if_stmt().map(Statement::Ifelse) + } + t!("INFO") => { + self.pop_peek(); + self.parse_info_stmt().map(Statement::Info) + } + t!("INSERT") => { + self.pop_peek(); + self.parse_insert_stmt().map(Statement::Insert) + } + t!("KILL") => { + self.pop_peek(); + self.parse_kill_stmt().map(Statement::Kill) + } + t!("LIVE") => { + self.pop_peek(); + self.parse_live_stmt().map(Statement::Live) + } + t!("OPTION") => { + self.pop_peek(); + self.parse_option_stmt().map(Statement::Option) + } + t!("RETURN") => { + self.pop_peek(); + self.parse_return_stmt().map(Statement::Output) + } + t!("RELATE") => { + self.pop_peek(); + self.parse_relate_stmt().map(Statement::Relate) + } + t!("REMOVE") => { + self.pop_peek(); + self.parse_remove_stmt().map(Statement::Remove) + } + t!("SELECT") => { + self.pop_peek(); + self.parse_select_stmt().map(Statement::Select) + } + t!("LET") => { + self.pop_peek(); + self.parse_let_stmt().map(Statement::Set) + } + t!("SHOW") => { + self.pop_peek(); + self.parse_show_stmt().map(Statement::Show) + } + t!("SLEEP") => { + self.pop_peek(); + self.parse_sleep_stmt().map(Statement::Sleep) + } + t!("THROW") => { + self.pop_peek(); + self.parse_throw_stmt().map(Statement::Throw) + } + t!("UPDATE") => { + self.pop_peek(); + self.parse_update_stmt().map(Statement::Update) + } + t!("USE") => { + self.pop_peek(); + self.parse_use_stmt().map(Statement::Use) + } + _ => { + // TODO: Provide information about keywords. + let value = self.parse_value_field()?; + Ok(Self::refine_stmt_value(value)) + } + } + } + + pub(super) fn parse_entry(&mut self) -> ParseResult { + let token = self.peek(); + match token.kind { + t!("BREAK") => { + self.pop_peek(); + Ok(Entry::Break(BreakStatement)) + } + t!("CONTINUE") => { + self.pop_peek(); + Ok(Entry::Continue(ContinueStatement)) + } + t!("CREATE") => { + self.pop_peek(); + self.parse_create_stmt().map(Entry::Create) + } + t!("DEFINE") => { + self.pop_peek(); + self.parse_define_stmt().map(Entry::Define) + } + t!("DELETE") => { + self.pop_peek(); + self.parse_delete_stmt().map(Entry::Delete) + } + t!("FOR") => { + self.pop_peek(); + self.parse_for_stmt().map(Entry::Foreach) + } + t!("IF") => { + self.pop_peek(); + self.parse_if_stmt().map(Entry::Ifelse) + } + t!("INSERT") => { + self.pop_peek(); + self.parse_insert_stmt().map(Entry::Insert) + } + t!("RETURN") => { + self.pop_peek(); + self.parse_return_stmt().map(Entry::Output) + } + t!("RELATE") => { + self.pop_peek(); + self.parse_relate_stmt().map(Entry::Relate) + } + t!("REMOVE") => { + self.pop_peek(); + self.parse_remove_stmt().map(Entry::Remove) + } + t!("SELECT") => { + self.pop_peek(); + self.parse_select_stmt().map(Entry::Select) + } + t!("LET") => { + self.pop_peek(); + self.parse_let_stmt().map(Entry::Set) + } + t!("THROW") => { + self.pop_peek(); + self.parse_throw_stmt().map(Entry::Throw) + } + t!("UPDATE") => { + self.pop_peek(); + self.parse_update_stmt().map(Entry::Update) + } + _ => { + // TODO: Provide information about keywords. + let v = self.parse_value_field()?; + Ok(Self::refine_entry_value(v)) + } + } + } + + /// Turns [Param] `=` [Value] into a set statment. + fn refine_stmt_value(value: Value) -> Statement { + match value { + Value::Expression(x) => { + if let Expression::Binary { + l: Value::Param(x), + o: Operator::Equal, + r, + } = *x + { + return Statement::Set(crate::sql::statements::SetStatement { + name: x.0 .0, + what: r, + }); + } + Statement::Value(Value::Expression(x)) + } + _ => Statement::Value(value), + } + } + + fn refine_entry_value(value: Value) -> Entry { + match value { + Value::Expression(x) => { + if let Expression::Binary { + l: Value::Param(x), + o: Operator::Equal, + r, + } = *x + { + return Entry::Set(crate::sql::statements::SetStatement { + name: x.0 .0, + what: r, + }); + } + Entry::Value(Value::Expression(x)) + } + _ => Entry::Value(value), + } + } + + /// Parsers a analyze statement. + fn parse_analyze(&mut self) -> ParseResult { + expected!(self, t!("INDEX")); + + let index = self.next_token_value()?; + expected!(self, t!("ON")); + let table = self.next_token_value()?; + + Ok(AnalyzeStatement::Idx(table, index)) + } + + /// Parsers a begin statement. + /// + /// # Parser State + /// Expects `BEGIN` to already be consumed. + fn parse_begin(&mut self) -> ParseResult { + if let t!("TRANSACTION") = self.peek().kind { + self.next(); + } + Ok(BeginStatement) + } + + /// Parsers a cancel statement. + /// + /// # Parser State + /// Expects `CANCEL` to already be consumed. + fn parse_cancel(&mut self) -> ParseResult { + if let t!("TRANSACTION") = self.peek().kind { + self.next(); + } + Ok(CancelStatement) + } + + /// Parsers a commit statement. + /// + /// # Parser State + /// Expects `COMMIT` to already be consumed. + fn parse_commit(&mut self) -> ParseResult { + if let t!("TRANSACTION") = self.peek().kind { + self.next(); + } + Ok(CommitStatement) + } + + /// Parsers a USE statement. + /// + /// # Parser State + /// Expects `USE` to already be consumed. + fn parse_use_stmt(&mut self) -> ParseResult { + let (ns, db) = if self.eat(t!("NAMESPACE")) { + let ns = self.next_token_value::()?.0; + let db = self + .eat(t!("DATABASE")) + .then(|| self.next_token_value::()) + .transpose()? + .map(|x| x.0); + (Some(ns), db) + } else { + expected!(self, t!("DATABASE")); + + let db = self.next_token_value::()?.0; + (None, Some(db)) + }; + + Ok(UseStatement { + ns, + db, + }) + } + + /// Parsers a FOR statement. + /// + /// # Parser State + /// Expects `FOR` to already be consumed. + pub fn parse_for_stmt(&mut self) -> ParseResult { + let param = self.next_token_value()?; + expected!(self, t!("IN")); + let range = self.parse_value()?; + + let span = expected!(self, t!("{")).span; + let block = self.parse_block(span)?; + Ok(ForeachStatement { + param, + range, + block, + }) + } + + /// Parsers a INFO statement. + /// + /// # Parser State + /// Expects `INFO` to already be consumed. + pub(crate) fn parse_info_stmt(&mut self) -> ParseResult { + expected!(self, t!("FOR")); + let stmt = match self.next().kind { + t!("ROOT") => InfoStatement::Root, + t!("NAMESPACE") => InfoStatement::Ns, + t!("DATABASE") => InfoStatement::Db, + t!("SCOPE") => { + let ident = self.next_token_value()?; + InfoStatement::Sc(ident) + } + t!("TABLE") => { + let ident = self.next_token_value()?; + InfoStatement::Tb(ident) + } + t!("USER") => { + let ident = self.next_token_value()?; + let base = self.eat(t!("ON")).then(|| self.parse_base(false)).transpose()?; + InfoStatement::User(ident, base) + } + x => unexpected!(self, x, "an info target"), + }; + Ok(stmt) + } + + /// Parsers a KILL statement. + /// + /// # Parser State + /// Expects `KILL` to already be consumed. + pub(crate) fn parse_kill_stmt(&mut self) -> ParseResult { + let id = match self.peek_kind() { + TokenKind::Uuid => self.next_token_value().map(Value::Uuid)?, + t!("$param") => { + let token = self.pop_peek(); + let param = self.token_value(token)?; + Value::Param(param) + } + x => unexpected!(self, x, "a UUID or a parameter"), + }; + Ok(KillStatement { + id, + }) + } + + /// Parsers a LIVE statement. + /// + /// # Parser State + /// Expects `LIVE` to already be consumed. + pub(crate) fn parse_live_stmt(&mut self) -> ParseResult { + expected!(self, t!("SELECT")); + + let expr = match self.peek_kind() { + t!("DIFF") => { + self.pop_peek(); + Fields::default() + } + _ => self.parse_fields()?, + }; + expected!(self, t!("FROM")); + let what = match self.peek().kind { + t!("$param") => Value::Param(self.next_token_value()?), + _ => Value::Table(self.next_token_value()?), + }; + let cond = self.try_parse_condition()?; + let fetch = self.try_parse_fetch()?; + + Ok(LiveStatement::from_source_parts(expr, what, cond, fetch)) + } + + /// Parsers a OPTION statement. + /// + /// # Parser State + /// Expects `OPTION` to already be consumed. + pub(crate) fn parse_option_stmt(&mut self) -> ParseResult { + let name = self.next_token_value()?; + let what = if self.eat(t!("=")) { + match self.next().kind { + t!("true") => true, + t!("false") => false, + x => unexpected!(self, x, "either 'true' or 'false'"), + } + } else { + true + }; + Ok(OptionStatement { + name, + what, + }) + } + + /// Parsers a RETURN statement. + /// + /// # Parser State + /// Expects `RETURN` to already be consumed. + pub(crate) fn parse_return_stmt(&mut self) -> ParseResult { + let what = self.parse_value_field()?; + let fetch = self.try_parse_fetch()?; + Ok(OutputStatement { + what, + fetch, + }) + } + + /// Parsers a LET statement. + /// + /// SurrealQL has support for `LET` less let statements. + /// These are not parsed here but after a statement is fully parsed. + /// A expression statement which matches a let-less let statement is then refined into a let + /// statement. + /// + /// # Parser State + /// Expects `LET` to already be consumed. + pub(crate) fn parse_let_stmt(&mut self) -> ParseResult { + let name = self.next_token_value::()?.0 .0; + expected!(self, t!("=")); + let what = self.parse_value()?; + Ok(SetStatement { + name, + what, + }) + } + + /// Parsers a SHOW statement + /// + /// # Parser State + /// Expects `SHOW` to already be consumed. + pub(crate) fn parse_show_stmt(&mut self) -> ParseResult { + expected!(self, t!("CHANGES")); + expected!(self, t!("FOR")); + + let table = match self.next().kind { + t!("TABLE") => { + let table = self.next_token_value()?; + Some(table) + } + t!("DATABASE") => None, + x => unexpected!(self, x, "`TABLE` or `DATABASE`"), + }; + + expected!(self, t!("SINCE")); + + let next = self.next(); + let since = match next.kind { + TokenKind::Number(_) => ShowSince::Versionstamp(self.token_value(next)?), + TokenKind::DateTime => ShowSince::Timestamp(self.token_value(next)?), + x => unexpected!(self, x, "a version stamp or a date-time"), + }; + + let limit = self.eat(t!("LIMIT")).then(|| self.next_token_value()).transpose()?; + + Ok(ShowStatement { + table, + since, + limit, + }) + } + + /// Parsers a SLEEP statement + /// + /// # Parser State + /// Expects `SLEEP` to already be consumed. + pub(crate) fn parse_sleep_stmt(&mut self) -> ParseResult { + let duration = self.next_token_value()?; + Ok(SleepStatement { + duration, + }) + } + + /// Parsers a THROW statement + /// + /// # Parser State + /// Expects `THROW` to already be consumed. + pub(crate) fn parse_throw_stmt(&mut self) -> ParseResult { + let error = self.parse_value_field()?; + Ok(ThrowStatement { + error, + }) + } +} diff --git a/lib/src/syn/v2/parser/stmt/parts.rs b/lib/src/syn/v2/parser/stmt/parts.rs new file mode 100644 index 00000000..e05eadd7 --- /dev/null +++ b/lib/src/syn/v2/parser/stmt/parts.rs @@ -0,0 +1,343 @@ +//! Contains parsing code for smaller common parts of statements. + +use crate::{ + sql::{ + changefeed::ChangeFeed, index::Distance, Base, Cond, Data, Duration, Fetch, Fetchs, Group, + Groups, Ident, Operator, Output, Permission, Permissions, Tables, Timeout, View, + }, + syn::v2::{ + parser::{ + mac::{expected, unexpected}, + ParseResult, Parser, + }, + token::{t, DistanceKind, TokenKind}, + }, +}; + +impl Parser<'_> { + /// Parses a data production if the next token is a data keyword. + /// Otherwise returns None + pub fn try_parse_data(&mut self) -> ParseResult> { + let res = match self.peek().kind { + t!("SET") => { + self.pop_peek(); + let mut set_list = Vec::new(); + loop { + let idiom = self.parse_plain_idiom()?; + let operator = match self.next().kind { + t!("=") => Operator::Equal, + t!("+=") => Operator::Inc, + t!("-=") => Operator::Dec, + t!("+?=") => Operator::Ext, + x => unexpected!(self, x, "a assign operator"), + }; + let value = self.parse_value()?; + set_list.push((idiom, operator, value)); + if !self.eat(t!(",")) { + break; + } + } + Data::SetExpression(set_list) + } + t!("UNSET") => { + self.pop_peek(); + let idiom_list = self.parse_idiom_list()?; + Data::UnsetExpression(idiom_list) + } + t!("PATCH") => { + self.pop_peek(); + Data::PatchExpression(self.parse_value()?) + } + t!("MERGE") => { + self.pop_peek(); + Data::MergeExpression(self.parse_value()?) + } + t!("REPLACE") => { + self.pop_peek(); + Data::ReplaceExpression(self.parse_value()?) + } + t!("CONTENT") => { + self.pop_peek(); + Data::ContentExpression(self.parse_value()?) + } + _ => return Ok(None), + }; + Ok(Some(res)) + } + + /// Parses a statement output if the next token is `return`. + pub fn try_parse_output(&mut self) -> ParseResult> { + if !self.eat(t!("RETURN")) { + return Ok(None); + } + let res = match self.peek_kind() { + t!("NONE") => { + self.pop_peek(); + Output::None + } + t!("NULL") => { + self.pop_peek(); + Output::Null + } + t!("DIFF") => { + self.pop_peek(); + Output::Diff + } + t!("AFTER") => { + self.pop_peek(); + Output::After + } + t!("BEFORE") => { + self.pop_peek(); + Output::Before + } + _ => Output::Fields(self.parse_fields()?), + }; + Ok(Some(res)) + } + + /// Parses a statement timeout if the next token is `TIMEOUT`. + pub fn try_parse_timeout(&mut self) -> ParseResult> { + if !self.eat(t!("TIMEOUT")) { + return Ok(None); + } + let duration = self.next_token_value()?; + Ok(Some(Timeout(duration))) + } + + pub fn try_parse_fetch(&mut self) -> ParseResult> { + if !self.eat(t!("FETCH")) { + return Ok(None); + } + let v = self.parse_idiom_list()?.into_iter().map(Fetch).collect(); + Ok(Some(Fetchs(v))) + } + + pub fn try_parse_condition(&mut self) -> ParseResult> { + if !self.eat(t!("WHERE")) { + return Ok(None); + } + let v = self.parse_value_field()?; + Ok(Some(Cond(v))) + } + + pub fn try_parse_group(&mut self) -> ParseResult> { + if !self.eat(t!("GROUP")) { + return Ok(None); + } + + let res = match self.peek_kind() { + t!("ALL") => { + self.pop_peek(); + Groups(Vec::new()) + } + t!("BY") => { + self.pop_peek(); + let mut groups = Groups(vec![Group(self.parse_basic_idiom()?)]); + while self.eat(t!(",")) { + groups.0.push(Group(self.parse_basic_idiom()?)); + } + groups + } + _ => { + let mut groups = Groups(vec![Group(self.parse_basic_idiom()?)]); + while self.eat(t!(",")) { + groups.0.push(Group(self.parse_basic_idiom()?)); + } + groups + } + }; + + Ok(Some(res)) + } + + /// Parse a permissions production + /// + /// # Parser State + /// Expects the parser to have just eaten the `PERMISSIONS` keyword. + pub fn parse_permission(&mut self, permissive: bool) -> ParseResult { + match self.next().kind { + t!("NONE") => Ok(Permissions::none()), + t!("FULL") => Ok(Permissions::full()), + t!("FOR") => { + let mut permission = if permissive { + Permissions::full() + } else { + Permissions::none() + }; + self.parse_specific_permission(&mut permission)?; + self.eat(t!(",")); + while self.eat(t!("FOR")) { + self.parse_specific_permission(&mut permission)?; + self.eat(t!(",")); + } + Ok(permission) + } + x => unexpected!(self, x, "'NONE', 'FULL' or 'FOR'"), + } + } + + /// Parse a specific permission for a type of query + /// + /// Sets the permission for a specific query on the given permission keyword. + /// + /// # Parser State + /// Expects the parser to just have eaten the `FOR` keyword. + pub fn parse_specific_permission(&mut self, permissions: &mut Permissions) -> ParseResult<()> { + let mut select = false; + let mut create = false; + let mut update = false; + let mut delete = false; + + loop { + match self.next().kind { + t!("SELECT") => { + select = true; + } + t!("CREATE") => { + create = true; + } + t!("UPDATE") => { + update = true; + } + t!("DELETE") => { + delete = true; + } + x => unexpected!(self, x, "'SELECT', 'CREATE', 'UPDATE' or 'DELETE'"), + } + if !self.eat(t!(",")) { + break; + } + } + + let permission_value = self.parse_permission_value()?; + if select { + permissions.select = permission_value.clone(); + } + if create { + permissions.create = permission_value.clone(); + } + if update { + permissions.update = permission_value.clone(); + } + if delete { + permissions.delete = permission_value + } + + Ok(()) + } + + /// Parses a the value for a permission for a type of query + /// + /// # Parser State + /// + /// Expects the parser to just have eaten either `SELECT`, `CREATE`, `UPDATE` or `DELETE`. + pub fn parse_permission_value(&mut self) -> ParseResult { + match self.next().kind { + t!("NONE") => Ok(Permission::None), + t!("FULL") => Ok(Permission::Full), + t!("WHERE") => Ok(Permission::Specific(self.parse_value_field()?)), + x => unexpected!(self, x, "'NONE', 'FULL', or 'WHERE'"), + } + } + + /// Parses a base + /// + /// So either `NAMESPACE`, ~DATABASE`, `ROOT`, or `SCOPE` if `scope_allowed` is true. + /// + /// # Parser state + /// Expects the next keyword to be a base. + pub fn parse_base(&mut self, scope_allowed: bool) -> ParseResult { + match self.next().kind { + t!("NAMESPACE") => Ok(Base::Ns), + t!("DATABASE") => Ok(Base::Db), + t!("ROOT") => Ok(Base::Root), + t!("SCOPE") => { + if !scope_allowed { + unexpected!(self, t!("SCOPE"), "a scope is not allowed here"); + } + let name = self.next_token_value()?; + Ok(Base::Sc(name)) + } + x => { + if scope_allowed { + unexpected!(self, x, "'NAMEPSPACE', 'DATABASE', 'ROOT', 'SCOPE' or 'KV'") + } else { + unexpected!(self, x, "'NAMEPSPACE', 'DATABASE', 'ROOT', or 'KV'") + } + } + } + } + + /// Parses a changefeed production + /// + /// # Parser State + /// Expects the parser to have already eating the `CHANGEFEED` keyword + pub fn parse_changefeed(&mut self) -> ParseResult { + let expiry = self.next_token_value::()?.0; + Ok(ChangeFeed { + expiry, + }) + } + + /// Parses a view production + /// + /// # Parse State + /// Expects the parser to have already eaten the possible `(` if the view was wrapped in + /// parens. Expects the next keyword to be `SELECT`. + pub fn parse_view(&mut self) -> ParseResult { + expected!(self, t!("SELECT")); + let fields = self.parse_fields()?; + expected!(self, t!("FROM")); + let mut from = vec![self.next_token_value()?]; + while self.eat(t!(",")) { + from.push(self.next_token_value()?); + } + + let cond = self.try_parse_condition()?; + let group = self.try_parse_group()?; + + Ok(View { + expr: fields, + what: Tables(from), + cond, + group, + }) + } + + pub fn parse_distance(&mut self) -> ParseResult { + let dist = match self.next().kind { + TokenKind::Distance(x) => match x { + DistanceKind::Euclidean => Distance::Euclidean, + DistanceKind::Manhattan => Distance::Manhattan, + DistanceKind::Hamming => Distance::Hamming, + DistanceKind::Minkowski => { + let distance = self.next_token_value()?; + Distance::Minkowski(distance) + } + }, + x => unexpected!(self, x, "a distance measure"), + }; + Ok(dist) + } + + pub fn try_parse_distance(&mut self) -> ParseResult> { + if !self.eat(t!("DISTANCE")) { + return Ok(None); + } + + self.parse_distance().map(Some) + } + + pub fn parse_custom_function_name(&mut self) -> ParseResult { + expected!(self, t!("fn")); + expected!(self, t!("::")); + let mut name = self.next_token_value::()?; + while self.eat(t!("::")) { + let part = self.next_token_value::()?; + name.0.push_str("::"); + name.0.push_str(part.0.as_str()); + } + Ok(name) + } +} diff --git a/lib/src/syn/v2/parser/stmt/relate.rs b/lib/src/syn/v2/parser/stmt/relate.rs new file mode 100644 index 00000000..3b873cda --- /dev/null +++ b/lib/src/syn/v2/parser/stmt/relate.rs @@ -0,0 +1,92 @@ +use crate::{ + sql::{statements::RelateStatement, Subquery, Value}, + syn::v2::{ + parser::{ + mac::{expected, unexpected}, + ParseResult, Parser, + }, + token::t, + }, +}; + +impl Parser<'_> { + pub fn parse_relate_stmt(&mut self) -> ParseResult { + let only = self.eat(t!("ONLY")); + let (kind, from, with) = self.parse_relation()?; + let uniq = self.eat(t!("UNIQUE")); + + let data = self.try_parse_data()?; + let output = self.try_parse_output()?; + let timeout = self.try_parse_timeout()?; + let parallel = self.eat(t!("PARALLEL")); + Ok(RelateStatement { + only, + kind, + from, + with, + uniq, + data, + output, + timeout, + parallel, + }) + } + + pub fn parse_relation(&mut self) -> ParseResult<(Value, Value, Value)> { + let first = self.parse_relate_value()?; + let is_o = match self.next().kind { + t!("->") => true, + t!("<-") => false, + x => unexpected!(self, x, "a relation arrow"), + }; + let kind = self.parse_thing_or_table()?; + if is_o { + expected!(self, t!("->")) + } else { + expected!(self, t!("<-")) + }; + let second = self.parse_relate_value()?; + if is_o { + Ok((kind, first, second)) + } else { + Ok((kind, second, first)) + } + } + + pub fn parse_relate_value(&mut self) -> ParseResult { + match self.peek_kind() { + t!("[") => { + let start = self.pop_peek().span; + self.parse_array(start).map(Value::Array) + } + t!("$param") => self.next_token_value().map(Value::Param), + t!("RETURN") + | t!("SELECT") + | t!("CREATE") + | t!("UPDATE") + | t!("DELETE") + | t!("RELATE") + | t!("DEFINE") + | t!("REMOVE") => self.parse_inner_subquery(None).map(|x| Value::Subquery(Box::new(x))), + t!("IF") => { + self.pop_peek(); + self.parse_if_stmt().map(|x| Value::Subquery(Box::new(Subquery::Ifelse(x)))) + } + t!("(") => { + let span = self.pop_peek().span; + let res = + self.parse_inner_subquery(Some(span)).map(|x| Value::Subquery(Box::new(x)))?; + Ok(res) + } + _ => self.parse_thing().map(Value::Thing), + } + } + + pub fn parse_thing_or_table(&mut self) -> ParseResult { + if self.peek_token_at(1).kind == t!(":") { + self.parse_thing().map(Value::Thing) + } else { + self.next_token_value().map(Value::Table) + } + } +} diff --git a/lib/src/syn/v2/parser/stmt/remove.rs b/lib/src/syn/v2/parser/stmt/remove.rs new file mode 100644 index 00000000..249dd698 --- /dev/null +++ b/lib/src/syn/v2/parser/stmt/remove.rs @@ -0,0 +1,121 @@ +use crate::{ + sql::{ + statements::{ + remove::RemoveAnalyzerStatement, RemoveDatabaseStatement, RemoveEventStatement, + RemoveFieldStatement, RemoveFunctionStatement, RemoveIndexStatement, + RemoveNamespaceStatement, RemoveParamStatement, RemoveScopeStatement, RemoveStatement, + RemoveUserStatement, + }, + Param, + }, + syn::v2::{ + parser::{ + mac::{expected, unexpected}, + ParseResult, Parser, + }, + token::t, + }, +}; + +impl Parser<'_> { + pub fn parse_remove_stmt(&mut self) -> ParseResult { + let res = match self.next().kind { + t!("NAMESPACE") => { + let name = self.next_token_value()?; + RemoveStatement::Namespace(RemoveNamespaceStatement { + name, + }) + } + t!("DATABASE") => { + let name = self.next_token_value()?; + RemoveStatement::Database(RemoveDatabaseStatement { + name, + }) + } + t!("FUNCTION") => { + let name = self.parse_custom_function_name()?; + let next = self.peek(); + if self.eat(t!("(")) { + self.expect_closing_delimiter(t!(")"), next.span)?; + } + RemoveStatement::Function(RemoveFunctionStatement { + name, + }) + } + t!("TOKEN") => { + let name = self.next_token_value()?; + expected!(self, t!("ON")); + let base = self.parse_base(true)?; + RemoveStatement::Token(crate::sql::statements::RemoveTokenStatement { + name, + base, + }) + } + t!("SCOPE") => { + let name = self.next_token_value()?; + RemoveStatement::Scope(RemoveScopeStatement { + name, + }) + } + t!("PARAM") => { + let name = self.next_token_value::()?; + RemoveStatement::Param(RemoveParamStatement { + name: name.0, + }) + } + t!("TABLE") => { + let name = self.next_token_value()?; + RemoveStatement::Table(crate::sql::statements::RemoveTableStatement { + name, + }) + } + t!("EVENT") => { + let name = self.next_token_value()?; + expected!(self, t!("ON")); + self.eat(t!("TABLE")); + let table = self.next_token_value()?; + RemoveStatement::Event(RemoveEventStatement { + name, + what: table, + }) + } + t!("FIELD") => { + let idiom = self.parse_local_idiom()?; + expected!(self, t!("ON")); + self.eat(t!("TABLE")); + let table = self.next_token_value()?; + RemoveStatement::Field(RemoveFieldStatement { + name: idiom, + what: table, + }) + } + t!("INDEX") => { + let name = self.next_token_value()?; + expected!(self, t!("ON")); + self.eat(t!("TABLE")); + let what = self.next_token_value()?; + RemoveStatement::Index(RemoveIndexStatement { + name, + what, + }) + } + t!("ANALYZER") => { + let name = self.next_token_value()?; + RemoveStatement::Analyzer(RemoveAnalyzerStatement { + name, + }) + } + t!("USER") => { + let name = self.next_token_value()?; + expected!(self, t!("ON")); + let base = self.parse_base(false)?; + RemoveStatement::User(RemoveUserStatement { + name, + base, + }) + } + x => unexpected!(self, x, "a remove statement keyword"), + }; + Ok(res) + } +} diff --git a/lib/src/syn/v2/parser/stmt/select.rs b/lib/src/syn/v2/parser/stmt/select.rs new file mode 100644 index 00000000..48ed7a07 --- /dev/null +++ b/lib/src/syn/v2/parser/stmt/select.rs @@ -0,0 +1,189 @@ +use crate::{ + sql::{ + statements::SelectStatement, Explain, Ident, Idioms, Limit, Order, Orders, Split, Splits, + Start, Values, Version, With, + }, + syn::v2::{ + parser::{ + mac::{expected, unexpected}, + ParseResult, Parser, + }, + token::t, + }, +}; + +impl Parser<'_> { + pub(crate) fn parse_select_stmt(&mut self) -> ParseResult { + // + let expr = self.parse_fields()?; + + let omit = self.eat(t!("OMIT")).then(|| self.parse_idiom_list()).transpose()?.map(Idioms); + + expected!(self, t!("FROM")); + + let only = self.eat(t!("ONLY")); + + let mut what = vec![self.parse_value()?]; + while self.eat(t!(",")) { + what.push(self.parse_value()?); + } + let what = Values(what); + + let with = self.try_parse_with()?; + let cond = self.try_parse_condition()?; + let split = self.try_parse_split()?; + let group = self.try_parse_group()?; + let order = self.try_parse_orders()?; + let (limit, start) = if let t!("START") = self.peek_kind() { + let start = self.try_parse_start()?; + let limit = self.try_parse_limit()?; + (limit, start) + } else { + let limit = self.try_parse_limit()?; + let start = self.try_parse_start()?; + (limit, start) + }; + let fetch = self.try_parse_fetch()?; + let version = self.try_parse_version()?; + let timeout = self.try_parse_timeout()?; + let parallel = self.eat(t!("PARALLEL")); + let explain = self.eat(t!("EXPLAIN")).then(|| Explain(self.eat(t!("FULL")))); + + Ok(SelectStatement { + expr, + omit, + only, + what, + with, + cond, + split, + group, + order, + limit, + start, + fetch, + version, + timeout, + parallel, + explain, + }) + } + + fn try_parse_with(&mut self) -> ParseResult> { + if !self.eat(t!("WITH")) { + return Ok(None); + } + let with = match self.next().kind { + t!("NOINDEX") => With::NoIndex, + t!("NO") => { + expected!(self, t!("INDEX")); + With::NoIndex + } + t!("INDEX") => { + let mut index = vec![self.next_token_value::()?.0]; + while self.eat(t!(",")) { + index.push(self.next_token_value::()?.0); + } + With::Index(index) + } + x => unexpected!(self, x, "`NO`, `NOINDEX` or `INDEX`"), + }; + Ok(Some(with)) + } + + fn try_parse_split(&mut self) -> ParseResult> { + if !self.eat(t!("SPLIT")) { + return Ok(None); + } + + self.eat(t!("ON")); + + let mut res = vec![Split(self.parse_basic_idiom()?)]; + while self.eat(t!(",")) { + res.push(Split(self.parse_basic_idiom()?)); + } + Ok(Some(Splits(res))) + } + + fn try_parse_orders(&mut self) -> ParseResult> { + if !self.eat(t!("ORDER")) { + return Ok(None); + } + + self.eat(t!("BY")); + + let orders = match self.peek_kind() { + t!("RAND") => { + self.pop_peek(); + let start = expected!(self, t!("(")).span; + self.expect_closing_delimiter(t!(")"), start)?; + vec![Order { + order: Default::default(), + random: true, + collate: false, + numeric: false, + direction: true, + }] + } + _ => { + let mut orders = vec![self.parse_order()?]; + while self.eat(t!(",")) { + orders.push(self.parse_order()?); + } + orders + } + }; + + Ok(Some(Orders(orders))) + } + + fn parse_order(&mut self) -> ParseResult { + let start = self.parse_basic_idiom()?; + let collate = self.eat(t!("COLLATE")); + let numeric = self.eat(t!("NUMERIC")); + let direction = match self.peek_kind() { + t!("ASCENDING") => { + self.pop_peek(); + true + } + t!("DESCENDING") => { + self.pop_peek(); + false + } + _ => true, + }; + Ok(Order { + order: start, + random: false, + collate, + numeric, + direction, + }) + } + + fn try_parse_limit(&mut self) -> ParseResult> { + if !self.eat(t!("LIMIT")) { + return Ok(None); + } + self.eat(t!("BY")); + let value = self.parse_value()?; + Ok(Some(Limit(value))) + } + + fn try_parse_start(&mut self) -> ParseResult> { + if !self.eat(t!("START")) { + return Ok(None); + } + self.eat(t!("AT")); + let value = self.parse_value()?; + Ok(Some(Start(value))) + } + + fn try_parse_version(&mut self) -> ParseResult> { + if !self.eat(t!("VERSION")) { + return Ok(None); + } + let time = self.next_token_value()?; + Ok(Some(Version(time))) + } +} diff --git a/lib/src/syn/v2/parser/stmt/update.rs b/lib/src/syn/v2/parser/stmt/update.rs new file mode 100644 index 00000000..616f1e3e --- /dev/null +++ b/lib/src/syn/v2/parser/stmt/update.rs @@ -0,0 +1,29 @@ +use crate::{ + sql::{statements::UpdateStatement, Values}, + syn::v2::{ + parser::{ParseResult, Parser}, + token::t, + }, +}; + +impl Parser<'_> { + pub fn parse_update_stmt(&mut self) -> ParseResult { + let only = self.eat(t!("ONLY")); + let what = Values(self.parse_what_list()?); + let data = self.try_parse_data()?; + let cond = self.try_parse_condition()?; + let output = self.try_parse_output()?; + let timeout = self.try_parse_timeout()?; + let parallel = self.eat(t!("PARALLEL")); + + Ok(UpdateStatement { + only, + what, + data, + cond, + output, + timeout, + parallel, + }) + } +} diff --git a/lib/src/syn/v2/parser/test/mod.rs b/lib/src/syn/v2/parser/test/mod.rs new file mode 100644 index 00000000..30be2080 --- /dev/null +++ b/lib/src/syn/v2/parser/test/mod.rs @@ -0,0 +1,3 @@ +mod stmt; +mod streaming; +mod value; diff --git a/lib/src/syn/v2/parser/test/stmt.rs b/lib/src/syn/v2/parser/test/stmt.rs new file mode 100644 index 00000000..4e254374 --- /dev/null +++ b/lib/src/syn/v2/parser/test/stmt.rs @@ -0,0 +1,1205 @@ +use crate::{ + sql::{ + block::Entry, + changefeed::ChangeFeed, + filter::Filter, + index::{Distance, MTreeParams, SearchParams, VectorType}, + language::Language, + statements::{ + analyze::AnalyzeStatement, show::ShowSince, show::ShowStatement, sleep::SleepStatement, + BeginStatement, BreakStatement, CancelStatement, CommitStatement, ContinueStatement, + CreateStatement, DefineAnalyzerStatement, DefineDatabaseStatement, + DefineEventStatement, DefineFieldStatement, DefineFunctionStatement, + DefineIndexStatement, DefineNamespaceStatement, DefineParamStatement, DefineStatement, + DefineTableStatement, DefineTokenStatement, DeleteStatement, ForeachStatement, + IfelseStatement, InfoStatement, InsertStatement, KillStatement, OptionStatement, + OutputStatement, RelateStatement, RemoveAnalyzerStatement, RemoveDatabaseStatement, + RemoveEventStatement, RemoveFieldStatement, RemoveFunctionStatement, + RemoveIndexStatement, RemoveNamespaceStatement, RemoveParamStatement, + RemoveScopeStatement, RemoveStatement, RemoveTableStatement, RemoveTokenStatement, + RemoveUserStatement, SelectStatement, SetStatement, ThrowStatement, UpdateStatement, + UseStatement, + }, + tokenizer::Tokenizer, + Algorithm, Array, Base, Block, Cond, Data, Datetime, Dir, Duration, Edges, Explain, + Expression, Fetch, Fetchs, Field, Fields, Future, Graph, Group, Groups, Id, Ident, Idiom, + Idioms, Index, Kind, Limit, Number, Object, Operator, Order, Orders, Output, Param, Part, + Permission, Permissions, Scoring, Split, Splits, Start, Statement, Strand, Subquery, Table, + Tables, Thing, Timeout, Uuid, Value, Values, Version, With, + }, + syn::v2::parser::mac::test_parse, +}; +use chrono::{offset::TimeZone, NaiveDate, Offset, Utc}; + +#[test] +pub fn parse_analyze() { + let res = test_parse!(parse_stmt, r#"ANALYZE INDEX b on a"#).unwrap(); + assert_eq!( + res, + Statement::Analyze(AnalyzeStatement::Idx(Ident("a".to_string()), Ident("b".to_string()))) + ) +} + +#[test] +pub fn parse_begin() { + let res = test_parse!(parse_stmt, r#"BEGIN"#).unwrap(); + assert_eq!(res, Statement::Begin(BeginStatement)); + let res = test_parse!(parse_stmt, r#"BEGIN TRANSACTION"#).unwrap(); + assert_eq!(res, Statement::Begin(BeginStatement)); +} + +#[test] +pub fn parse_break() { + let res = test_parse!(parse_stmt, r#"BREAK"#).unwrap(); + assert_eq!(res, Statement::Break(BreakStatement)); +} + +#[test] +pub fn parse_cancel() { + let res = test_parse!(parse_stmt, r#"CANCEL"#).unwrap(); + assert_eq!(res, Statement::Cancel(CancelStatement)); + let res = test_parse!(parse_stmt, r#"CANCEL TRANSACTION"#).unwrap(); + assert_eq!(res, Statement::Cancel(CancelStatement)); +} + +#[test] +pub fn parse_commit() { + let res = test_parse!(parse_stmt, r#"COMMIT"#).unwrap(); + assert_eq!(res, Statement::Commit(CommitStatement)); + let res = test_parse!(parse_stmt, r#"COMMIT TRANSACTION"#).unwrap(); + assert_eq!(res, Statement::Commit(CommitStatement)); +} + +#[test] +pub fn parse_continue() { + let res = test_parse!(parse_stmt, r#"CONTINUE"#).unwrap(); + assert_eq!(res, Statement::Continue(ContinueStatement)); +} + +#[test] +fn parse_create() { + let res = test_parse!( + parse_stmt, + "CREATE ONLY foo SET bar = 3, foo +?= 4 RETURN VALUE foo AS bar TIMEOUT 1s PARALLEL" + ) + .unwrap(); + assert_eq!( + res, + Statement::Create(CreateStatement { + only: true, + what: Values(vec![Value::Table(Table("foo".to_owned()))]), + data: Some(Data::SetExpression(vec![ + ( + Idiom(vec![Part::Field(Ident("bar".to_owned()))]), + Operator::Equal, + Value::Number(Number::Int(3)) + ), + ( + Idiom(vec![Part::Field(Ident("foo".to_owned()))]), + Operator::Ext, + Value::Number(Number::Int(4)) + ), + ])), + output: Some(Output::Fields(Fields( + vec![Field::Single { + expr: Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + alias: Some(Idiom(vec![Part::Field(Ident("bar".to_owned()))])), + }], + true, + ))), + timeout: Some(Timeout(Duration(std::time::Duration::from_secs(1)))), + parallel: true, + }), + ); +} + +#[test] +fn parse_define_namespace() { + let res = test_parse!(parse_stmt, "DEFINE NAMESPACE a COMMENT 'test'").unwrap(); + assert_eq!( + res, + Statement::Define(DefineStatement::Namespace(DefineNamespaceStatement { + id: None, + name: Ident("a".to_string()), + comment: Some(Strand("test".to_string())) + })) + ); + + let res = test_parse!(parse_stmt, "DEFINE NS a").unwrap(); + assert_eq!( + res, + Statement::Define(DefineStatement::Namespace(DefineNamespaceStatement { + id: None, + name: Ident("a".to_string()), + comment: None + })) + ) +} + +#[test] +fn parse_define_database() { + let res = test_parse!(parse_stmt, "DEFINE DATABASE a COMMENT 'test' CHANGEFEED 10m").unwrap(); + assert_eq!( + res, + Statement::Define(DefineStatement::Database(DefineDatabaseStatement { + id: None, + name: Ident("a".to_string()), + comment: Some(Strand("test".to_string())), + changefeed: Some(ChangeFeed { + expiry: std::time::Duration::from_secs(60) * 10 + }) + })) + ); + + let res = test_parse!(parse_stmt, "DEFINE DB a").unwrap(); + assert_eq!( + res, + Statement::Define(DefineStatement::Database(DefineDatabaseStatement { + id: None, + name: Ident("a".to_string()), + comment: None, + changefeed: None + })) + ) +} + +#[test] +fn parse_define_function() { + let res = test_parse!( + parse_stmt, + r#"DEFINE FUNCTION fn::foo::bar($a: number, $b: array) { + RETURN a + } COMMENT 'test' PERMISSIONS FULL + "# + ) + .unwrap(); + + assert_eq!( + res, + Statement::Define(DefineStatement::Function(DefineFunctionStatement { + name: Ident("foo::bar".to_string()), + args: vec![ + (Ident("a".to_string()), Kind::Number), + (Ident("b".to_string()), Kind::Array(Box::new(Kind::Bool), Some(3))) + ], + block: Block(vec![Entry::Output(OutputStatement { + what: Value::Idiom(Idiom(vec![Part::Field(Ident("a".to_string()))])), + fetch: None, + })]), + comment: Some(Strand("test".to_string())), + permissions: Permission::Full, + })) + ) +} + +#[test] +fn parse_define_user() { + let res = test_parse!( + parse_stmt, + r#"DEFINE USER user ON ROOT COMMENT 'test' PASSHASH 'hunter2' ROLES foo, bar COMMENT "*******""# + ) + .unwrap(); + + let Statement::Define(DefineStatement::User(stmt)) = res else { + panic!() + }; + + assert_eq!(stmt.name, Ident("user".to_string())); + assert_eq!(stmt.base, Base::Root); + assert_eq!(stmt.hash, "hunter2".to_owned()); + assert_eq!(stmt.roles, vec![Ident("foo".to_string()), Ident("bar".to_string())]); + assert_eq!(stmt.comment, Some(Strand("*******".to_string()))) +} + +#[test] +fn parse_define_token() { + let res = test_parse!( + parse_stmt, + r#"DEFINE TOKEN a ON SCOPE b TYPE EDDSA VALUE "foo" COMMENT "bar""# + ) + .unwrap(); + assert_eq!( + res, + Statement::Define(DefineStatement::Token(DefineTokenStatement { + name: Ident("a".to_string()), + base: Base::Sc(Ident("b".to_string())), + kind: Algorithm::EdDSA, + code: "foo".to_string(), + comment: Some(Strand("bar".to_string())) + })) + ) +} + +#[test] +fn parse_define_scope() { + let res = test_parse!( + parse_stmt, + r#"DEFINE SCOPE a SESSION 1s SIGNUP true SIGNIN false COMMENT "bar""# + ) + .unwrap(); + + // manually compare since DefineScopeStatement creates a random code in its parser. + let Statement::Define(DefineStatement::Scope(stmt)) = res else { + panic!() + }; + + assert_eq!(stmt.name, Ident("a".to_string())); + assert_eq!(stmt.comment, Some(Strand("bar".to_string()))); + assert_eq!(stmt.session, Some(Duration(std::time::Duration::from_secs(1)))); + assert_eq!(stmt.signup, Some(Value::Bool(true))); + assert_eq!(stmt.signin, Some(Value::Bool(false))); +} + +#[test] +fn parse_define_param() { + let res = + test_parse!(parse_stmt, r#"DEFINE PARAM $a VALUE { a: 1, "b": 3 } PERMISSIONS WHERE null"#) + .unwrap(); + + assert_eq!( + res, + Statement::Define(DefineStatement::Param(DefineParamStatement { + name: Ident("a".to_string()), + value: Value::Object(Object( + [ + ("a".to_string(), Value::Number(Number::Int(1))), + ("b".to_string(), Value::Number(Number::Int(3))), + ] + .into_iter() + .collect() + )), + comment: None, + permissions: Permission::Specific(Value::Null) + })) + ); +} + +#[test] +fn parse_define_table() { + let res = + test_parse!(parse_stmt, r#"DEFINE TABLE name DROP SCHEMAFUL CHANGEFEED 1s PERMISSIONS FOR SELECT WHERE a = 1 AS SELECT foo FROM bar GROUP BY foo"#) + .unwrap(); + + assert_eq!( + res, + Statement::Define(DefineStatement::Table(DefineTableStatement { + id: None, + name: Ident("name".to_string()), + drop: true, + full: true, + view: Some(crate::sql::View { + expr: Fields( + vec![Field::Single { + expr: Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + alias: None, + }], + false + ), + what: Tables(vec![Table("bar".to_owned())]), + cond: None, + group: Some(Groups(vec![Group(Idiom(vec![Part::Field(Ident("foo".to_owned()))]))])), + }), + permissions: Permissions { + select: Permission::Specific(Value::Expression(Box::new( + crate::sql::Expression::Binary { + l: Value::Idiom(Idiom(vec![Part::Field(Ident("a".to_owned()))])), + o: Operator::Equal, + r: Value::Number(Number::Int(1)) + } + ))), + create: Permission::None, + update: Permission::None, + delete: Permission::None, + }, + changefeed: Some(ChangeFeed { + expiry: std::time::Duration::from_secs(1) + }), + comment: None, + })) + ); +} + +#[test] +fn parse_define_event() { + let res = + test_parse!(parse_stmt, r#"DEFINE EVENT event ON TABLE table WHEN null THEN null,none"#) + .unwrap(); + + assert_eq!( + res, + Statement::Define(DefineStatement::Event(DefineEventStatement { + name: Ident("event".to_owned()), + what: Ident("table".to_owned()), + when: Value::Null, + then: Values(vec![Value::Null, Value::None]), + comment: None, + })) + ) +} + +#[test] +fn parse_define_field() { + let res = test_parse!( + parse_stmt, + r#"DEFINE FIELD foo.*[*]... ON TABLE bar FLEX TYPE option,10>> VALUE null ASSERT true DEFAULT false PERMISSIONS FOR DELETE, UPDATE NONE, FOR create WHERE true"# + ).unwrap(); + + assert_eq!( + res, + Statement::Define(DefineStatement::Field(DefineFieldStatement { + name: Idiom(vec![ + Part::Field(Ident("foo".to_owned())), + Part::All, + Part::All, + Part::Flatten, + ]), + what: Ident("bar".to_owned()), + flex: true, + kind: Some(Kind::Option(Box::new(Kind::Either(vec![ + Kind::Number, + Kind::Array(Box::new(Kind::Record(vec![Table("foo".to_owned())])), Some(10)) + ])))), + value: Some(Value::Null), + assert: Some(Value::Bool(true)), + default: Some(Value::Bool(false)), + permissions: Permissions { + delete: Permission::None, + update: Permission::None, + create: Permission::Specific(Value::Bool(true)), + select: Permission::Full, + }, + comment: None + })) + ) +} + +#[test] +fn parse_define_index() { + let res = test_parse!( + parse_stmt, + r#"DEFINE INDEX index ON TABLE table FIELDS a,b[*] SEARCH ANALYZER ana BM25 (0.1,0.2) + DOC_IDS_ORDER 1 + DOC_LENGTHS_ORDER 2 + POSTINGS_ORDER 3 + TERMS_ORDER 4 + DOC_IDS_CACHE 5 + DOC_LENGTHS_CACHE 6 + POSTINGS_CACHE 7 + TERMS_CACHE 8 + HIGHLIGHTS"# + ) + .unwrap(); + + assert_eq!( + res, + Statement::Define(DefineStatement::Index(DefineIndexStatement { + name: Ident("index".to_owned()), + what: Ident("table".to_owned()), + cols: Idioms(vec![ + Idiom(vec![Part::Field(Ident("a".to_owned()))]), + Idiom(vec![Part::Field(Ident("b".to_owned())), Part::All]) + ]), + index: Index::Search(SearchParams { + az: Ident("ana".to_owned()), + hl: true, + sc: Scoring::Bm { + k1: 0.1, + b: 0.2 + }, + doc_ids_order: 1, + doc_lengths_order: 2, + postings_order: 3, + terms_order: 4, + doc_ids_cache: 5, + doc_lengths_cache: 6, + postings_cache: 7, + terms_cache: 8, + }), + comment: None + })) + ); + + let res = + test_parse!(parse_stmt, r#"DEFINE INDEX index ON TABLE table FIELDS a UNIQUE"#).unwrap(); + + assert_eq!( + res, + Statement::Define(DefineStatement::Index(DefineIndexStatement { + name: Ident("index".to_owned()), + what: Ident("table".to_owned()), + cols: Idioms(vec![Idiom(vec![Part::Field(Ident("a".to_owned()))]),]), + index: Index::Uniq, + comment: None + })) + ); + + let res = + test_parse!(parse_stmt, r#"DEFINE INDEX index ON TABLE table FIELDS a MTREE DIMENSION 4 DISTANCE MINKOWSKI 5 CAPACITY 6 DOC_IDS_ORDER 7 DOC_IDS_CACHE 8 MTREE_CACHE 9"#).unwrap(); + + assert_eq!( + res, + Statement::Define(DefineStatement::Index(DefineIndexStatement { + name: Ident("index".to_owned()), + what: Ident("table".to_owned()), + cols: Idioms(vec![Idiom(vec![Part::Field(Ident("a".to_owned()))]),]), + index: Index::MTree(MTreeParams { + dimension: 4, + distance: Distance::Minkowski(Number::Int(5)), + capacity: 6, + doc_ids_order: 7, + doc_ids_cache: 8, + mtree_cache: 9, + vector_type: VectorType::F64, + }), + comment: None + })) + ); +} + +#[test] +fn parse_define_analyzer() { + let res = test_parse!( + parse_stmt, + r#"DEFINE ANALYZER ana FILTERS ASCII, EDGENGRAM(1,2), NGRAM(3,4), LOWERCASE, SNOWBALL(NLD), UPPERCASE TOKENIZERS BLANK, CAMEL, CLASS, PUNCT FUNCTION fn::foo::bar"# + ).unwrap(); + + assert_eq!( + res, + Statement::Define(DefineStatement::Analyzer(DefineAnalyzerStatement { + name: Ident("ana".to_owned()), + tokenizers: Some(vec![ + Tokenizer::Blank, + Tokenizer::Camel, + Tokenizer::Class, + Tokenizer::Punct, + ]), + filters: Some(vec![ + Filter::Ascii, + Filter::EdgeNgram(1, 2), + Filter::Ngram(3, 4), + Filter::Lowercase, + Filter::Snowball(Language::Dutch), + Filter::Uppercase, + ]), + comment: None, + function: Some(Ident("foo::bar".to_string())), + })), + ) +} + +#[test] +fn parse_delete() { + let res = test_parse!( + parse_statement, + "DELETE FROM ONLY |foo:32..64| Where 2 RETURN AFTER TIMEOUT 1s PARALLEL" + ) + .unwrap(); + assert_eq!( + res, + Statement::Delete(DeleteStatement { + only: true, + what: Values(vec![Value::Mock(crate::sql::Mock::Range("foo".to_string(), 32, 64))]), + cond: Some(Cond(Value::Number(Number::Int(2)))), + output: Some(Output::After), + timeout: Some(Timeout(Duration(std::time::Duration::from_secs(1)))), + parallel: true, + }) + ); +} + +#[test] +fn parse_delete_2() { + let res = test_parse!( + parse_stmt, + r#"DELETE FROM ONLY a:b->?[$][?true] WHERE null RETURN NULL TIMEOUT 1h PARALLEL"# + ) + .unwrap(); + + assert_eq!( + res, + Statement::Delete(DeleteStatement { + only: true, + what: Values(vec![Value::Idiom(Idiom(vec![ + Part::Start(Value::Edges(Box::new(Edges { + dir: Dir::Out, + from: Thing { + tb: "a".to_owned(), + id: Id::String("b".to_owned()), + }, + what: Tables::default(), + }))), + Part::Last, + Part::Where(Value::Bool(true)), + ]))]), + cond: Some(Cond(Value::Null)), + output: Some(Output::Null), + timeout: Some(Timeout(Duration(std::time::Duration::from_secs(60 * 60)))), + parallel: true + }) + ) +} + +#[test] +pub fn parse_for() { + let res = test_parse!( + parse_stmt, + r#"FOR $foo IN (SELECT foo FROM bar) * 2 { + BREAK + }"# + ) + .unwrap(); + + assert_eq!( + res, + Statement::Foreach(ForeachStatement { + param: Param(Ident("foo".to_owned())), + range: Value::Expression(Box::new(Expression::Binary { + l: Value::Subquery(Box::new(Subquery::Select(SelectStatement { + expr: Fields( + vec![Field::Single { + expr: Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + alias: None + }], + false + ), + what: Values(vec![Value::Table(Table("bar".to_owned()))]), + ..Default::default() + }))), + o: Operator::Mul, + r: Value::Number(Number::Int(2)) + })), + block: Block(vec![Entry::Break(BreakStatement)]) + }) + ) +} + +#[test] +fn parse_if() { + let res = + test_parse!(parse_stmt, r#"IF foo THEN bar ELSE IF faz THEN baz ELSE baq END"#).unwrap(); + assert_eq!( + res, + Statement::Ifelse(IfelseStatement { + exprs: vec![ + ( + Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + Value::Idiom(Idiom(vec![Part::Field(Ident("bar".to_owned()))])) + ), + ( + Value::Idiom(Idiom(vec![Part::Field(Ident("faz".to_owned()))])), + Value::Idiom(Idiom(vec![Part::Field(Ident("baz".to_owned()))])) + ) + ], + close: Some(Value::Idiom(Idiom(vec![Part::Field(Ident("baq".to_owned()))]))) + }) + ) +} + +#[test] +fn parse_if_block() { + let res = + test_parse!(parse_stmt, r#"IF foo { bar } ELSE IF faz { baz } ELSE { baq }"#).unwrap(); + assert_eq!( + res, + Statement::Ifelse(IfelseStatement { + exprs: vec![ + ( + Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(vec![ + Part::Field(Ident("bar".to_owned())) + ])))]))), + ), + ( + Value::Idiom(Idiom(vec![Part::Field(Ident("faz".to_owned()))])), + Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(vec![ + Part::Field(Ident("baz".to_owned())) + ])))]))), + ) + ], + close: Some(Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom( + vec![Part::Field(Ident("baq".to_owned()))] + )))])))), + }) + ) +} + +#[test] +fn parse_info() { + let res = test_parse!(parse_stmt, "INFO FOR ROOT").unwrap(); + assert_eq!(res, Statement::Info(InfoStatement::Root)); + + let res = test_parse!(parse_stmt, "INFO FOR KV").unwrap(); + assert_eq!(res, Statement::Info(InfoStatement::Root)); + + let res = test_parse!(parse_stmt, "INFO FOR NAMESPACE").unwrap(); + assert_eq!(res, Statement::Info(InfoStatement::Ns)); + + let res = test_parse!(parse_stmt, "INFO FOR NS").unwrap(); + assert_eq!(res, Statement::Info(InfoStatement::Ns)); + + let res = test_parse!(parse_stmt, "INFO FOR SCOPE scope").unwrap(); + assert_eq!(res, Statement::Info(InfoStatement::Sc(Ident("scope".to_owned())))); + + let res = test_parse!(parse_stmt, "INFO FOR TABLE table").unwrap(); + assert_eq!(res, Statement::Info(InfoStatement::Tb(Ident("table".to_owned())))); + + let res = test_parse!(parse_stmt, "INFO FOR USER user").unwrap(); + assert_eq!(res, Statement::Info(InfoStatement::User(Ident("user".to_owned()), None))); + + let res = test_parse!(parse_stmt, "INFO FOR USER user ON namespace").unwrap(); + assert_eq!(res, Statement::Info(InfoStatement::User(Ident("user".to_owned()), Some(Base::Ns)))); +} + +#[test] +fn parse_select() { + let res = test_parse!( + parse_stmt, + r#" +SELECT bar as foo,[1,2],bar OMIT bar FROM ONLY a,1 + WITH INDEX index,index_2 + WHERE true + SPLIT ON foo,bar + GROUP foo,bar + ORDER BY foo COLLATE NUMERIC ASC + START AT { a: true } + LIMIT BY a:b + FETCH foo + VERSION d"2012-04-23T18:25:43.0000511Z" + EXPLAIN FULL + "# + ) + .unwrap(); + + let offset = Utc.fix(); + let expected_datetime = offset + .from_local_datetime( + &NaiveDate::from_ymd_opt(2012, 4, 23) + .unwrap() + .and_hms_nano_opt(18, 25, 43, 51_100) + .unwrap(), + ) + .earliest() + .unwrap() + .with_timezone(&Utc); + + assert_eq!( + res, + Statement::Select(SelectStatement { + expr: Fields( + vec![ + Field::Single { + expr: Value::Idiom(Idiom(vec![Part::Field(Ident("bar".to_owned()))])), + alias: Some(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + }, + Field::Single { + expr: Value::Array(Array(vec![ + Value::Number(Number::Int(1)), + Value::Number(Number::Int(2)) + ])), + alias: None, + }, + Field::Single { + expr: Value::Idiom(Idiom(vec![Part::Field(Ident("bar".to_owned()))])), + alias: None, + }, + ], + false, + ), + omit: Some(Idioms(vec![Idiom(vec![Part::Field(Ident("bar".to_owned()))])])), + only: true, + what: Values(vec![Value::Table(Table("a".to_owned())), Value::Number(Number::Int(1))]), + with: Some(With::Index(vec!["index".to_owned(), "index_2".to_owned()])), + cond: Some(Cond(Value::Bool(true))), + split: Some(Splits(vec![ + Split(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + Split(Idiom(vec![Part::Field(Ident("bar".to_owned()))])), + ])), + group: Some(Groups(vec![ + Group(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + Group(Idiom(vec![Part::Field(Ident("bar".to_owned()))])), + ])), + order: Some(Orders(vec![Order { + order: Idiom(vec![Part::Field(Ident("foo".to_owned()))]), + random: false, + collate: true, + numeric: true, + direction: true, + }])), + limit: Some(Limit(Value::Thing(Thing { + tb: "a".to_owned(), + id: Id::String("b".to_owned()), + }))), + start: Some(Start(Value::Object(Object( + [("a".to_owned(), Value::Bool(true))].into_iter().collect() + )))), + fetch: Some(Fetchs(vec![Fetch(Idiom(vec![Part::Field(Ident("foo".to_owned()))]))])), + version: Some(Version(Datetime(expected_datetime))), + timeout: None, + parallel: false, + explain: Some(Explain(true)), + }), + ); +} + +#[test] +fn parse_let() { + let res = test_parse!(parse_stmt, r#"LET $param = 1"#).unwrap(); + assert_eq!( + res, + Statement::Set(SetStatement { + name: "param".to_owned(), + what: Value::Number(Number::Int(1)) + }) + ); + + let res = test_parse!(parse_stmt, r#"$param = 1"#).unwrap(); + assert_eq!( + res, + Statement::Set(SetStatement { + name: "param".to_owned(), + what: Value::Number(Number::Int(1)) + }) + ); +} + +#[test] +fn parse_show() { + let res = test_parse!(parse_stmt, r#"SHOW CHANGES FOR TABLE foo SINCE 1 LIMIT 10"#).unwrap(); + + assert_eq!( + res, + Statement::Show(ShowStatement { + table: Some(Table("foo".to_owned())), + since: ShowSince::Versionstamp(1), + limit: Some(10) + }) + ); + + let offset = Utc.fix(); + let expected_datetime = offset + .from_local_datetime( + &NaiveDate::from_ymd_opt(2012, 4, 23) + .unwrap() + .and_hms_nano_opt(18, 25, 43, 51_100) + .unwrap(), + ) + .earliest() + .unwrap() + .with_timezone(&Utc); + + let res = test_parse!( + parse_stmt, + r#"SHOW CHANGES FOR DATABASE SINCE d"2012-04-23T18:25:43.0000511Z""# + ) + .unwrap(); + assert_eq!( + res, + Statement::Show(ShowStatement { + table: None, + since: ShowSince::Timestamp(Datetime(expected_datetime)), + limit: None + }) + ) +} + +#[test] +fn parse_sleep() { + let res = test_parse!(parse_stmt, r"SLEEP 1s").unwrap(); + + let expect = Statement::Sleep(SleepStatement { + duration: Duration(std::time::Duration::from_secs(1)), + }); + assert_eq!(res, expect) +} + +#[test] +fn parse_use() { + let res = test_parse!(parse_stmt, r"USE NS foo").unwrap(); + let expect = Statement::Use(UseStatement { + ns: Some("foo".to_owned()), + db: None, + }); + assert_eq!(res, expect); + + let res = test_parse!(parse_stmt, r"USE DB foo").unwrap(); + let expect = Statement::Use(UseStatement { + ns: None, + db: Some("foo".to_owned()), + }); + assert_eq!(res, expect); + + let res = test_parse!(parse_stmt, r"USE NS bar DB foo").unwrap(); + let expect = Statement::Use(UseStatement { + ns: Some("bar".to_owned()), + db: Some("foo".to_owned()), + }); + assert_eq!(res, expect); +} + +#[test] +fn parse_value_stmt() { + let res = test_parse!(parse_stmt, r"1s").unwrap(); + let expect = Statement::Value(Value::Duration(Duration(std::time::Duration::from_secs(1)))); + assert_eq!(res, expect); +} + +#[test] +fn parse_throw() { + let res = test_parse!(parse_stmt, r"THROW 1s").unwrap(); + + let expect = Statement::Throw(ThrowStatement { + error: Value::Duration(Duration(std::time::Duration::from_secs(1))), + }); + assert_eq!(res, expect) +} + +#[test] +fn parse_insert() { + let res = test_parse!( + parse_stmt, + r#"INSERT IGNORE INTO $foo (a,b,c) VALUES (1,2,3),(4,5,6) ON DUPLICATE KEY UPDATE a.b +?= null, c.d += none RETURN AFTER"# + ).unwrap(); + assert_eq!( + res, + Statement::Insert(InsertStatement { + into: Value::Param(Param(Ident("foo".to_owned()))), + data: Data::ValuesExpression(vec![ + vec![ + ( + Idiom(vec![Part::Field(Ident("a".to_owned()))]), + Value::Number(Number::Int(1)), + ), + ( + Idiom(vec![Part::Field(Ident("b".to_owned()))]), + Value::Number(Number::Int(2)), + ), + ( + Idiom(vec![Part::Field(Ident("c".to_owned()))]), + Value::Number(Number::Int(3)), + ), + ], + vec![ + ( + Idiom(vec![Part::Field(Ident("a".to_owned()))]), + Value::Number(Number::Int(4)), + ), + ( + Idiom(vec![Part::Field(Ident("b".to_owned()))]), + Value::Number(Number::Int(5)), + ), + ( + Idiom(vec![Part::Field(Ident("c".to_owned()))]), + Value::Number(Number::Int(6)), + ), + ], + ]), + ignore: true, + update: Some(Data::UpdateExpression(vec![ + ( + Idiom(vec![ + Part::Field(Ident("a".to_owned())), + Part::Field(Ident("b".to_owned())), + ]), + Operator::Ext, + Value::Null, + ), + ( + Idiom(vec![ + Part::Field(Ident("c".to_owned())), + Part::Field(Ident("d".to_owned())), + ]), + Operator::Inc, + Value::None, + ), + ])), + output: Some(Output::After), + timeout: None, + parallel: false, + }), + ) +} + +#[test] +fn parse_kill() { + let res = test_parse!(parse_stmt, r#"KILL $param"#).unwrap(); + assert_eq!( + res, + Statement::Kill(KillStatement { + id: Value::Param(Param(Ident("param".to_owned()))) + }) + ); + + let res = test_parse!(parse_stmt, r#"KILL u"e72bee20-f49b-11ec-b939-0242ac120002" "#).unwrap(); + assert_eq!( + res, + Statement::Kill(KillStatement { + id: Value::Uuid(Uuid(uuid::uuid!("e72bee20-f49b-11ec-b939-0242ac120002"))) + }) + ); +} + +#[test] +fn parse_live() { + let res = test_parse!(parse_stmt, r#"LIVE SELECT DIFF FROM $foo"#).unwrap(); + let Statement::Live(stmt) = res else { + panic!() + }; + assert_eq!(stmt.expr, Fields::default()); + assert_eq!(stmt.what, Value::Param(Param(Ident("foo".to_owned())))); + + let res = + test_parse!(parse_stmt, r#"LIVE SELECT foo FROM table WHERE true FETCH a[where foo],b"#) + .unwrap(); + let Statement::Live(stmt) = res else { + panic!() + }; + assert_eq!( + stmt.expr, + Fields( + vec![Field::Single { + expr: Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + alias: None, + }], + false, + ) + ); + assert_eq!(stmt.what, Value::Table(Table("table".to_owned()))); + assert_eq!(stmt.cond, Some(Cond(Value::Bool(true)))); + assert_eq!( + stmt.fetch, + Some(Fetchs(vec![ + Fetch(Idiom(vec![ + Part::Field(Ident("a".to_owned())), + Part::Where(Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))]))), + ])), + Fetch(Idiom(vec![Part::Field(Ident("b".to_owned()))])), + ])), + ) +} + +#[test] +fn parse_option() { + let res = test_parse!(parse_stmt, r#"OPTION value = true"#).unwrap(); + assert_eq!( + res, + Statement::Option(OptionStatement { + name: Ident("value".to_owned()), + what: true + }) + ) +} + +#[test] +fn parse_return() { + let res = test_parse!(parse_stmt, r#"RETURN RETRUN FETCH RETURN"#).unwrap(); + assert_eq!( + res, + Statement::Output(OutputStatement { + what: Value::Idiom(Idiom(vec![Part::Field(Ident("RETRUN".to_owned()))])), + fetch: Some(Fetchs(vec![Fetch(Idiom(vec![Part::Field( + Ident("RETURN".to_owned()).to_owned() + )]))])), + }), + ) +} + +#[test] +fn parse_relate() { + let res = test_parse!( + parse_stmt, + r#"RELATE ONLY [1,2]->a:b->(CREATE foo) UNIQUE SET a += 1 RETURN NONE PARALLEL"# + ) + .unwrap(); + assert_eq!( + res, + Statement::Relate(RelateStatement { + only: true, + kind: Value::Thing(Thing { + tb: "a".to_owned(), + id: Id::String("b".to_owned()), + }), + from: Value::Array(Array(vec![ + Value::Number(Number::Int(1)), + Value::Number(Number::Int(2)), + ])), + with: Value::Subquery(Box::new(Subquery::Create(CreateStatement { + only: false, + what: Values(vec![Value::Table(Table("foo".to_owned()))]), + data: None, + output: None, + timeout: None, + parallel: false, + }))), + uniq: true, + data: Some(Data::SetExpression(vec![( + Idiom(vec![Part::Field(Ident("a".to_owned()))]), + Operator::Inc, + Value::Number(Number::Int(1)) + )])), + output: Some(Output::None), + timeout: None, + parallel: true, + }), + ) +} + +#[test] +fn parse_remove() { + let res = test_parse!(parse_stmt, r#"REMOVE NAMESPACE ns"#).unwrap(); + assert_eq!( + res, + Statement::Remove(RemoveStatement::Namespace(RemoveNamespaceStatement { + name: Ident("ns".to_owned()) + })) + ); + + let res = test_parse!(parse_stmt, r#"REMOVE DB database"#).unwrap(); + assert_eq!( + res, + Statement::Remove(RemoveStatement::Database(RemoveDatabaseStatement { + name: Ident("database".to_owned()) + })) + ); + + let res = test_parse!(parse_stmt, r#"REMOVE FUNCTION fn::foo::bar"#).unwrap(); + assert_eq!( + res, + Statement::Remove(RemoveStatement::Function(RemoveFunctionStatement { + name: Ident("foo::bar".to_owned()) + })) + ); + let res = test_parse!(parse_stmt, r#"REMOVE FUNCTION fn::foo::bar();"#).unwrap(); + assert_eq!( + res, + Statement::Remove(RemoveStatement::Function(RemoveFunctionStatement { + name: Ident("foo::bar".to_owned()) + })) + ); + + let res = test_parse!(parse_stmt, r#"REMOVE TOKEN foo ON SCOPE bar"#).unwrap(); + assert_eq!( + res, + Statement::Remove(RemoveStatement::Token(RemoveTokenStatement { + name: Ident("foo".to_owned()), + base: Base::Sc(Ident("bar".to_owned())) + })) + ); + + let res = test_parse!(parse_stmt, r#"REMOVE SCOPE foo"#).unwrap(); + assert_eq!( + res, + Statement::Remove(RemoveStatement::Scope(RemoveScopeStatement { + name: Ident("foo".to_owned()), + })) + ); + + let res = test_parse!(parse_stmt, r#"REMOVE PARAM $foo"#).unwrap(); + assert_eq!( + res, + Statement::Remove(RemoveStatement::Param(RemoveParamStatement { + name: Ident("foo".to_owned()), + })) + ); + + let res = test_parse!(parse_stmt, r#"REMOVE TABLE foo"#).unwrap(); + assert_eq!( + res, + Statement::Remove(RemoveStatement::Table(RemoveTableStatement { + name: Ident("foo".to_owned()), + })) + ); + + let res = test_parse!(parse_stmt, r#"REMOVE EVENT foo ON TABLE bar"#).unwrap(); + assert_eq!( + res, + Statement::Remove(RemoveStatement::Event(RemoveEventStatement { + name: Ident("foo".to_owned()), + what: Ident("bar".to_owned()), + })) + ); + + let res = test_parse!(parse_stmt, r#"REMOVE FIELD foo.bar[10] ON bar"#).unwrap(); + assert_eq!( + res, + Statement::Remove(RemoveStatement::Field(RemoveFieldStatement { + name: Idiom(vec![ + Part::Field(Ident("foo".to_owned())), + Part::Field(Ident("bar".to_owned())), + Part::Index(Number::Int(10)) + ]), + what: Ident("bar".to_owned()), + })) + ); + + let res = test_parse!(parse_stmt, r#"REMOVE INDEX foo ON bar"#).unwrap(); + assert_eq!( + res, + Statement::Remove(RemoveStatement::Index(RemoveIndexStatement { + name: Ident("foo".to_owned()), + what: Ident("bar".to_owned()), + })) + ); + + let res = test_parse!(parse_stmt, r#"REMOVE ANALYZER foo"#).unwrap(); + assert_eq!( + res, + Statement::Remove(RemoveStatement::Analyzer(RemoveAnalyzerStatement { + name: Ident("foo".to_owned()), + })) + ); + + let res = test_parse!(parse_stmt, r#"REMOVE user foo on database"#).unwrap(); + assert_eq!( + res, + Statement::Remove(RemoveStatement::User(RemoveUserStatement { + name: Ident("foo".to_owned()), + base: Base::Db, + })) + ); +} + +#[test] +fn parse_update() { + let res = test_parse!( + parse_stmt, + r#"UPDATE ONLY { "text" }, a->b UNSET foo... , a->b, c[*] WHERE true RETURN DIFF TIMEOUT 1s PARALLEL"# + ) + .unwrap(); + assert_eq!( + res, + Statement::Update(UpdateStatement { + only: true, + what: Values(vec![ + Value::Future(Box::new(Future(Block(vec![Entry::Value(Value::Strand(Strand( + "text".to_string() + )))])))), + Value::Idiom(Idiom(vec![ + Part::Field(Ident("a".to_string())), + Part::Graph(Graph { + dir: Dir::Out, + what: Tables(vec![Table("b".to_string())]), + expr: Fields::all(), + ..Default::default() + }) + ])) + ]), + cond: Some(Cond(Value::Bool(true))), + data: Some(Data::UnsetExpression(vec![ + Idiom(vec![Part::Field(Ident("foo".to_string())), Part::Flatten]), + Idiom(vec![ + Part::Field(Ident("a".to_string())), + Part::Graph(Graph { + dir: Dir::Out, + what: Tables(vec![Table("b".to_string())]), + expr: Fields::all(), + ..Default::default() + }) + ]), + Idiom(vec![Part::Field(Ident("c".to_string())), Part::All]) + ])), + output: Some(Output::Diff), + timeout: Some(Timeout(Duration(std::time::Duration::from_secs(1)))), + parallel: true, + }) + ); +} diff --git a/lib/src/syn/v2/parser/test/streaming.rs b/lib/src/syn/v2/parser/test/streaming.rs new file mode 100644 index 00000000..b7dd77c1 --- /dev/null +++ b/lib/src/syn/v2/parser/test/streaming.rs @@ -0,0 +1,680 @@ +use crate::{ + sql::{ + block::Entry, + changefeed::ChangeFeed, + filter::Filter, + index::{Distance, MTreeParams, SearchParams, VectorType}, + language::Language, + statements::{ + analyze::AnalyzeStatement, show::ShowSince, show::ShowStatement, sleep::SleepStatement, + BeginStatement, BreakStatement, CancelStatement, CommitStatement, ContinueStatement, + CreateStatement, DefineAnalyzerStatement, DefineDatabaseStatement, + DefineEventStatement, DefineFieldStatement, DefineFunctionStatement, + DefineIndexStatement, DefineNamespaceStatement, DefineParamStatement, DefineStatement, + DefineTableStatement, DefineTokenStatement, DeleteStatement, ForeachStatement, + IfelseStatement, InfoStatement, InsertStatement, KillStatement, OutputStatement, + RelateStatement, RemoveFieldStatement, RemoveFunctionStatement, RemoveStatement, + SelectStatement, SetStatement, ThrowStatement, UpdateStatement, + }, + tokenizer::Tokenizer, + Algorithm, Array, Base, Block, Cond, Data, Datetime, Dir, Duration, Edges, Explain, + Expression, Fetch, Fetchs, Field, Fields, Future, Graph, Group, Groups, Id, Ident, Idiom, + Idioms, Index, Kind, Limit, Number, Object, Operator, Order, Orders, Output, Param, Part, + Permission, Permissions, Scoring, Split, Splits, Start, Statement, Strand, Subquery, Table, + Tables, Thing, Timeout, Uuid, Value, Values, Version, With, + }, + syn::v2::parser::{Parser, PartialResult}, +}; +use chrono::{offset::TimeZone, NaiveDate, Offset, Utc}; + +static SOURCE: &str = r#" + ANALYZE INDEX b on a; + BEGIN; + BEGIN TRANSACTION; + BREAK; + CANCEL; + CANCEL TRANSACTION; + COMMIT; + COMMIT TRANSACTION; + CONTINUE; + CREATE ONLY foo SET bar = 3, foo +?= 4 RETURN VALUE foo AS bar TIMEOUT 1s PARALLEL; + DEFINE NAMESPACE a COMMENT 'test'; + DEFINE NS a; + DEFINE DATABASE a COMMENT 'test' CHANGEFEED 10m; + DEFINE DB a; + DEFINE FUNCTION fn::foo::bar($a: number, $b: array) { + RETURN a + } COMMENT 'test' PERMISSIONS FULL; + DEFINE TOKEN a ON SCOPE b TYPE EDDSA VALUE "foo" COMMENT "bar"; + DEFINE PARAM $a VALUE { a: 1, "b": 3 } PERMISSIONS WHERE null; + DEFINE TABLE name DROP SCHEMAFUL CHANGEFEED 1s PERMISSIONS FOR SELECT WHERE a = 1 AS SELECT foo FROM bar GROUP BY foo; + DEFINE EVENT event ON TABLE table WHEN null THEN null,none; + DEFINE FIELD foo.*[*]... ON TABLE bar FLEX TYPE option,10>> VALUE null ASSERT true DEFAULT false PERMISSIONS FOR DELETE, UPDATE NONE, FOR create WHERE true; + DEFINE INDEX index ON TABLE table FIELDS a,b[*] SEARCH ANALYZER ana BM25 (0.1,0.2) + DOC_IDS_ORDER 1 + DOC_LENGTHS_ORDER 2 + POSTINGS_ORDER 3 + TERMS_ORDER 4 + DOC_IDS_CACHE 5 + DOC_LENGTHS_CACHE 6 + POSTINGS_CACHE 7 + TERMS_CACHE 8 + HIGHLIGHTS; + DEFINE INDEX index ON TABLE table FIELDS a UNIQUE; + DEFINE INDEX index ON TABLE table FIELDS a MTREE DIMENSION 4 DISTANCE MINKOWSKI 5 CAPACITY 6 DOC_IDS_ORDER 7 DOC_IDS_CACHE 8 MTREE_CACHE 9; + DEFINE ANALYZER ana FILTERS ASCII, EDGENGRAM(1,2), NGRAM(3,4), LOWERCASE, SNOWBALL(NLD), UPPERCASE TOKENIZERS BLANK, CAMEL, CLASS, PUNCT FUNCTION fn::foo::bar; + DELETE FROM ONLY |foo:32..64| Where 2 RETURN AFTER TIMEOUT 1s PARALLEL; + DELETE FROM ONLY a:b->?[$][?true] WHERE null RETURN NULL TIMEOUT 1h PARALLEL; + FOR $foo IN (SELECT foo FROM bar) * 2 { + BREAK + }; + IF foo THEN bar ELSE IF faz THEN baz ELSE baq END; + IF foo { bar } ELSE IF faz { baz } ELSE { baq }; + INFO FOR ROOT; + INFO FOR NAMESPACE; + INFO FOR SCOPE scope; + INFO FOR USER user ON namespace; + SELECT bar as foo,[1,2],bar OMIT bar FROM ONLY a,1 + WITH INDEX index,index_2 + WHERE true + SPLIT ON foo,bar + GROUP foo,bar + ORDER BY foo COLLATE NUMERIC ASC + START AT { a: true } + LIMIT BY a:b + FETCH foo + VERSION d"2012-04-23T18:25:43.0000511Z" + EXPLAIN FULL; + LET $param = 1; + SHOW CHANGES FOR TABLE foo SINCE 1 LIMIT 10; + SHOW CHANGES FOR DATABASE SINCE d"2012-04-23T18:25:43.0000511Z"; + SLEEP 1s; + THROW 1s; + INSERT IGNORE INTO $foo (a,b,c) VALUES (1,2,3),(4,5,6) ON DUPLICATE KEY UPDATE a.b +?= null, c.d += none RETURN AFTER; + KILL u"e72bee20-f49b-11ec-b939-0242ac120002"; + RETURN RETRUN FETCH RETURN; + RELATE ONLY [1,2]->a:b->(CREATE foo) UNIQUE SET a += 1 RETURN NONE PARALLEL; + REMOVE FUNCTION fn::foo::bar(); + REMOVE FIELD foo.bar[10] ON bar; + UPDATE ONLY { "text" }, a->b UNSET foo... , a->b, c[*] WHERE true RETURN DIFF TIMEOUT 1s PARALLEL; +"#; + +fn statements() -> Vec { + let offset = Utc.fix(); + let expected_datetime = offset + .from_local_datetime( + &NaiveDate::from_ymd_opt(2012, 4, 23) + .unwrap() + .and_hms_nano_opt(18, 25, 43, 51_100) + .unwrap(), + ) + .earliest() + .unwrap() + .with_timezone(&Utc); + + vec![ + Statement::Analyze(AnalyzeStatement::Idx(Ident("a".to_string()), Ident("b".to_string()))), + Statement::Begin(BeginStatement), + Statement::Begin(BeginStatement), + Statement::Break(BreakStatement), + Statement::Cancel(CancelStatement), + Statement::Cancel(CancelStatement), + Statement::Commit(CommitStatement), + Statement::Commit(CommitStatement), + Statement::Continue(ContinueStatement), + Statement::Create(CreateStatement { + only: true, + what: Values(vec![Value::Table(Table("foo".to_owned()))]), + data: Some(Data::SetExpression(vec![ + ( + Idiom(vec![Part::Field(Ident("bar".to_owned()))]), + Operator::Equal, + Value::Number(Number::Int(3)), + ), + ( + Idiom(vec![Part::Field(Ident("foo".to_owned()))]), + Operator::Ext, + Value::Number(Number::Int(4)), + ), + ])), + output: Some(Output::Fields(Fields( + vec![Field::Single { + expr: Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + alias: Some(Idiom(vec![Part::Field(Ident("bar".to_owned()))])), + }], + true, + ))), + timeout: Some(Timeout(Duration(std::time::Duration::from_secs(1)))), + parallel: true, + }), + Statement::Define(DefineStatement::Namespace(DefineNamespaceStatement { + id: None, + name: Ident("a".to_string()), + comment: Some(Strand("test".to_string())), + })), + Statement::Define(DefineStatement::Namespace(DefineNamespaceStatement { + id: None, + name: Ident("a".to_string()), + comment: None, + })), + Statement::Define(DefineStatement::Database(DefineDatabaseStatement { + id: None, + name: Ident("a".to_string()), + comment: Some(Strand("test".to_string())), + changefeed: Some(ChangeFeed { + expiry: std::time::Duration::from_secs(60) * 10, + }), + })), + Statement::Define(DefineStatement::Database(DefineDatabaseStatement { + id: None, + name: Ident("a".to_string()), + comment: None, + changefeed: None, + })), + Statement::Define(DefineStatement::Function(DefineFunctionStatement { + name: Ident("foo::bar".to_string()), + args: vec![ + (Ident("a".to_string()), Kind::Number), + (Ident("b".to_string()), Kind::Array(Box::new(Kind::Bool), Some(3))), + ], + block: Block(vec![Entry::Output(OutputStatement { + what: Value::Idiom(Idiom(vec![Part::Field(Ident("a".to_string()))])), + fetch: None, + })]), + comment: Some(Strand("test".to_string())), + permissions: Permission::Full, + })), + Statement::Define(DefineStatement::Token(DefineTokenStatement { + name: Ident("a".to_string()), + base: Base::Sc(Ident("b".to_string())), + kind: Algorithm::EdDSA, + code: "foo".to_string(), + comment: Some(Strand("bar".to_string())), + })), + Statement::Define(DefineStatement::Param(DefineParamStatement { + name: Ident("a".to_string()), + value: Value::Object(Object( + [ + ("a".to_string(), Value::Number(Number::Int(1))), + ("b".to_string(), Value::Number(Number::Int(3))), + ] + .into_iter() + .collect(), + )), + comment: None, + permissions: Permission::Specific(Value::Null), + })), + Statement::Define(DefineStatement::Table(DefineTableStatement { + id: None, + name: Ident("name".to_string()), + drop: true, + full: true, + view: Some(crate::sql::View { + expr: Fields( + vec![Field::Single { + expr: Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + alias: None, + }], + false, + ), + what: Tables(vec![Table("bar".to_owned())]), + cond: None, + group: Some(Groups(vec![Group(Idiom(vec![Part::Field(Ident("foo".to_owned()))]))])), + }), + permissions: Permissions { + select: Permission::Specific(Value::Expression(Box::new( + crate::sql::Expression::Binary { + l: Value::Idiom(Idiom(vec![Part::Field(Ident("a".to_owned()))])), + o: Operator::Equal, + r: Value::Number(Number::Int(1)), + }, + ))), + create: Permission::None, + update: Permission::None, + delete: Permission::None, + }, + changefeed: Some(ChangeFeed { + expiry: std::time::Duration::from_secs(1), + }), + comment: None, + })), + Statement::Define(DefineStatement::Event(DefineEventStatement { + name: Ident("event".to_owned()), + what: Ident("table".to_owned()), + when: Value::Null, + then: Values(vec![Value::Null, Value::None]), + comment: None, + })), + Statement::Define(DefineStatement::Field(DefineFieldStatement { + name: Idiom(vec![ + Part::Field(Ident("foo".to_owned())), + Part::All, + Part::All, + Part::Flatten, + ]), + what: Ident("bar".to_owned()), + flex: true, + kind: Some(Kind::Option(Box::new(Kind::Either(vec![ + Kind::Number, + Kind::Array(Box::new(Kind::Record(vec![Table("foo".to_owned())])), Some(10)), + ])))), + value: Some(Value::Null), + assert: Some(Value::Bool(true)), + default: Some(Value::Bool(false)), + permissions: Permissions { + delete: Permission::None, + update: Permission::None, + create: Permission::Specific(Value::Bool(true)), + select: Permission::Full, + }, + comment: None, + })), + Statement::Define(DefineStatement::Index(DefineIndexStatement { + name: Ident("index".to_owned()), + what: Ident("table".to_owned()), + cols: Idioms(vec![ + Idiom(vec![Part::Field(Ident("a".to_owned()))]), + Idiom(vec![Part::Field(Ident("b".to_owned())), Part::All]), + ]), + index: Index::Search(SearchParams { + az: Ident("ana".to_owned()), + hl: true, + sc: Scoring::Bm { + k1: 0.1, + b: 0.2, + }, + doc_ids_order: 1, + doc_lengths_order: 2, + postings_order: 3, + terms_order: 4, + doc_ids_cache: 5, + doc_lengths_cache: 6, + postings_cache: 7, + terms_cache: 8, + }), + comment: None, + })), + Statement::Define(DefineStatement::Index(DefineIndexStatement { + name: Ident("index".to_owned()), + what: Ident("table".to_owned()), + cols: Idioms(vec![Idiom(vec![Part::Field(Ident("a".to_owned()))])]), + index: Index::Uniq, + comment: None, + })), + Statement::Define(DefineStatement::Index(DefineIndexStatement { + name: Ident("index".to_owned()), + what: Ident("table".to_owned()), + cols: Idioms(vec![Idiom(vec![Part::Field(Ident("a".to_owned()))])]), + index: Index::MTree(MTreeParams { + dimension: 4, + distance: Distance::Minkowski(Number::Int(5)), + capacity: 6, + doc_ids_order: 7, + doc_ids_cache: 8, + mtree_cache: 9, + vector_type: VectorType::F64, + }), + comment: None, + })), + Statement::Define(DefineStatement::Analyzer(DefineAnalyzerStatement { + name: Ident("ana".to_owned()), + tokenizers: Some(vec![ + Tokenizer::Blank, + Tokenizer::Camel, + Tokenizer::Class, + Tokenizer::Punct, + ]), + filters: Some(vec![ + Filter::Ascii, + Filter::EdgeNgram(1, 2), + Filter::Ngram(3, 4), + Filter::Lowercase, + Filter::Snowball(Language::Dutch), + Filter::Uppercase, + ]), + function: Some(Ident("foo::bar".to_string())), + comment: None, + })), + Statement::Delete(DeleteStatement { + only: true, + what: Values(vec![Value::Mock(crate::sql::Mock::Range("foo".to_string(), 32, 64))]), + cond: Some(Cond(Value::Number(Number::Int(2)))), + output: Some(Output::After), + timeout: Some(Timeout(Duration(std::time::Duration::from_secs(1)))), + parallel: true, + }), + Statement::Delete(DeleteStatement { + only: true, + what: Values(vec![Value::Idiom(Idiom(vec![ + Part::Start(Value::Edges(Box::new(Edges { + dir: Dir::Out, + from: Thing { + tb: "a".to_owned(), + id: Id::String("b".to_owned()), + }, + what: Tables::default(), + }))), + Part::Last, + Part::Where(Value::Bool(true)), + ]))]), + cond: Some(Cond(Value::Null)), + output: Some(Output::Null), + timeout: Some(Timeout(Duration(std::time::Duration::from_secs(60 * 60)))), + parallel: true, + }), + Statement::Foreach(ForeachStatement { + param: Param(Ident("foo".to_owned())), + range: Value::Expression(Box::new(Expression::Binary { + l: Value::Subquery(Box::new(Subquery::Select(SelectStatement { + expr: Fields( + vec![Field::Single { + expr: Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + alias: None, + }], + false, + ), + what: Values(vec![Value::Table(Table("bar".to_owned()))]), + ..Default::default() + }))), + o: Operator::Mul, + r: Value::Number(Number::Int(2)), + })), + block: Block(vec![Entry::Break(BreakStatement)]), + }), + Statement::Ifelse(IfelseStatement { + exprs: vec![ + ( + Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + Value::Idiom(Idiom(vec![Part::Field(Ident("bar".to_owned()))])), + ), + ( + Value::Idiom(Idiom(vec![Part::Field(Ident("faz".to_owned()))])), + Value::Idiom(Idiom(vec![Part::Field(Ident("baz".to_owned()))])), + ), + ], + close: Some(Value::Idiom(Idiom(vec![Part::Field(Ident("baq".to_owned()))]))), + }), + Statement::Ifelse(IfelseStatement { + exprs: vec![ + ( + Value::Idiom(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(vec![ + Part::Field(Ident("bar".to_owned())), + ])))]))), + ), + ( + Value::Idiom(Idiom(vec![Part::Field(Ident("faz".to_owned()))])), + Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom(vec![ + Part::Field(Ident("baz".to_owned())), + ])))]))), + ), + ], + close: Some(Value::Block(Box::new(Block(vec![Entry::Value(Value::Idiom(Idiom( + vec![Part::Field(Ident("baq".to_owned()))], + )))])))), + }), + Statement::Info(InfoStatement::Root), + Statement::Info(InfoStatement::Ns), + Statement::Info(InfoStatement::Sc(Ident("scope".to_owned()))), + Statement::Info(InfoStatement::User(Ident("user".to_owned()), Some(Base::Ns))), + Statement::Select(SelectStatement { + expr: Fields( + vec![ + Field::Single { + expr: Value::Idiom(Idiom(vec![Part::Field(Ident("bar".to_owned()))])), + alias: Some(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + }, + Field::Single { + expr: Value::Array(Array(vec![ + Value::Number(Number::Int(1)), + Value::Number(Number::Int(2)), + ])), + alias: None, + }, + Field::Single { + expr: Value::Idiom(Idiom(vec![Part::Field(Ident("bar".to_owned()))])), + alias: None, + }, + ], + false, + ), + omit: Some(Idioms(vec![Idiom(vec![Part::Field(Ident("bar".to_owned()))])])), + only: true, + what: Values(vec![Value::Table(Table("a".to_owned())), Value::Number(Number::Int(1))]), + with: Some(With::Index(vec!["index".to_owned(), "index_2".to_owned()])), + cond: Some(Cond(Value::Bool(true))), + split: Some(Splits(vec![ + Split(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + Split(Idiom(vec![Part::Field(Ident("bar".to_owned()))])), + ])), + group: Some(Groups(vec![ + Group(Idiom(vec![Part::Field(Ident("foo".to_owned()))])), + Group(Idiom(vec![Part::Field(Ident("bar".to_owned()))])), + ])), + order: Some(Orders(vec![Order { + order: Idiom(vec![Part::Field(Ident("foo".to_owned()))]), + random: false, + collate: true, + numeric: true, + direction: true, + }])), + limit: Some(Limit(Value::Thing(Thing { + tb: "a".to_owned(), + id: Id::String("b".to_owned()), + }))), + start: Some(Start(Value::Object(Object( + [("a".to_owned(), Value::Bool(true))].into_iter().collect(), + )))), + fetch: Some(Fetchs(vec![Fetch(Idiom(vec![Part::Field(Ident("foo".to_owned()))]))])), + version: Some(Version(Datetime(expected_datetime))), + timeout: None, + parallel: false, + explain: Some(Explain(true)), + }), + Statement::Set(SetStatement { + name: "param".to_owned(), + what: Value::Number(Number::Int(1)), + }), + Statement::Show(ShowStatement { + table: Some(Table("foo".to_owned())), + since: ShowSince::Versionstamp(1), + limit: Some(10), + }), + Statement::Show(ShowStatement { + table: None, + since: ShowSince::Timestamp(Datetime(expected_datetime)), + limit: None, + }), + Statement::Sleep(SleepStatement { + duration: Duration(std::time::Duration::from_secs(1)), + }), + Statement::Throw(ThrowStatement { + error: Value::Duration(Duration(std::time::Duration::from_secs(1))), + }), + Statement::Insert(InsertStatement { + into: Value::Param(Param(Ident("foo".to_owned()))), + data: Data::ValuesExpression(vec![ + vec![ + ( + Idiom(vec![Part::Field(Ident("a".to_owned()))]), + Value::Number(Number::Int(1)), + ), + ( + Idiom(vec![Part::Field(Ident("b".to_owned()))]), + Value::Number(Number::Int(2)), + ), + ( + Idiom(vec![Part::Field(Ident("c".to_owned()))]), + Value::Number(Number::Int(3)), + ), + ], + vec![ + ( + Idiom(vec![Part::Field(Ident("a".to_owned()))]), + Value::Number(Number::Int(4)), + ), + ( + Idiom(vec![Part::Field(Ident("b".to_owned()))]), + Value::Number(Number::Int(5)), + ), + ( + Idiom(vec![Part::Field(Ident("c".to_owned()))]), + Value::Number(Number::Int(6)), + ), + ], + ]), + ignore: true, + update: Some(Data::UpdateExpression(vec![ + ( + Idiom(vec![ + Part::Field(Ident("a".to_owned())), + Part::Field(Ident("b".to_owned())), + ]), + Operator::Ext, + Value::Null, + ), + ( + Idiom(vec![ + Part::Field(Ident("c".to_owned())), + Part::Field(Ident("d".to_owned())), + ]), + Operator::Inc, + Value::None, + ), + ])), + output: Some(Output::After), + timeout: None, + parallel: false, + }), + Statement::Kill(KillStatement { + id: Value::Uuid(Uuid(uuid::uuid!("e72bee20-f49b-11ec-b939-0242ac120002"))), + }), + Statement::Output(OutputStatement { + what: Value::Idiom(Idiom(vec![Part::Field(Ident("RETRUN".to_owned()))])), + fetch: Some(Fetchs(vec![Fetch(Idiom(vec![Part::Field( + Ident("RETURN".to_owned()).to_owned(), + )]))])), + }), + Statement::Relate(RelateStatement { + only: true, + kind: Value::Thing(Thing { + tb: "a".to_owned(), + id: Id::String("b".to_owned()), + }), + from: Value::Array(Array(vec![ + Value::Number(Number::Int(1)), + Value::Number(Number::Int(2)), + ])), + with: Value::Subquery(Box::new(Subquery::Create(CreateStatement { + only: false, + what: Values(vec![Value::Table(Table("foo".to_owned()))]), + data: None, + output: None, + timeout: None, + parallel: false, + }))), + uniq: true, + data: Some(Data::SetExpression(vec![( + Idiom(vec![Part::Field(Ident("a".to_owned()))]), + Operator::Inc, + Value::Number(Number::Int(1)), + )])), + output: Some(Output::None), + timeout: None, + parallel: true, + }), + Statement::Remove(RemoveStatement::Function(RemoveFunctionStatement { + name: Ident("foo::bar".to_owned()), + })), + Statement::Remove(RemoveStatement::Field(RemoveFieldStatement { + name: Idiom(vec![ + Part::Field(Ident("foo".to_owned())), + Part::Field(Ident("bar".to_owned())), + Part::Index(Number::Int(10)), + ]), + what: Ident("bar".to_owned()), + })), + Statement::Update(UpdateStatement { + only: true, + what: Values(vec![ + Value::Future(Box::new(Future(Block(vec![Entry::Value(Value::Strand(Strand( + "text".to_string(), + )))])))), + Value::Idiom(Idiom(vec![ + Part::Field(Ident("a".to_string())), + Part::Graph(Graph { + dir: Dir::Out, + what: Tables(vec![Table("b".to_string())]), + expr: Fields::all(), + ..Default::default() + }), + ])), + ]), + cond: Some(Cond(Value::Bool(true))), + data: Some(Data::UnsetExpression(vec![ + Idiom(vec![Part::Field(Ident("foo".to_string())), Part::Flatten]), + Idiom(vec![ + Part::Field(Ident("a".to_string())), + Part::Graph(Graph { + dir: Dir::Out, + what: Tables(vec![Table("b".to_string())]), + expr: Fields::all(), + ..Default::default() + }), + ]), + Idiom(vec![Part::Field(Ident("c".to_string())), Part::All]), + ])), + output: Some(Output::Diff), + timeout: Some(Timeout(Duration(std::time::Duration::from_secs(1)))), + parallel: true, + }), + ] +} + +#[test] +fn test_streaming() { + let expected = statements(); + let mut current_stmt = 0; + let source_bytes = SOURCE.as_bytes(); + let mut source_start = 0; + let mut parser = Parser::new(&[]); + + for i in 0..source_bytes.len() { + let partial_source = &source_bytes[source_start..i]; + //let src = String::from_utf8_lossy(partial_source); + //println!("{}:{}", i, src); + parser = parser.change_source(partial_source); + parser.reset(); + match parser.parse_partial_statement() { + PartialResult::Pending { + .. + } => { + continue; + } + PartialResult::Ready { + value, + used, + } => { + //println!("USED: {}", used); + let value = value.unwrap(); + assert_eq!(value, expected[current_stmt]); + current_stmt += 1; + source_start += used; + } + } + } + + let src = String::from_utf8_lossy(&source_bytes[source_start..]); + let range = src.char_indices().nth(100).map(|x| x.0).unwrap_or(src.len()); + let src = &src[..range]; + parser.reset(); + parser = parser.change_source(&source_bytes[source_start..]); + assert_eq!( + current_stmt, + expected.len(), + "failed to parse at {}\nAt statement {}\n\n{:?}", + src, + expected[current_stmt], + parser.parse_partial_statement() + ); +} diff --git a/lib/src/syn/v2/parser/test/value.rs b/lib/src/syn/v2/parser/test/value.rs new file mode 100644 index 00000000..c7f30dcd --- /dev/null +++ b/lib/src/syn/v2/parser/test/value.rs @@ -0,0 +1,66 @@ +use std::collections::BTreeMap; + +use crate::{ + sql::{Array, Constant, Id, Number, Object, Strand, Thing, Value}, + syn::v2::parser::mac::test_parse, +}; + +#[test] +fn parse_recursive_record_string() { + let res = test_parse!(parse_value, r#" r"a:[r"b:{c: r"d:1"}"]" "#).unwrap(); + assert_eq!( + res, + Value::Thing(Thing { + tb: "a".to_owned(), + id: Id::Array(Array(vec![Value::Thing(Thing { + tb: "b".to_owned(), + id: Id::Object(Object(BTreeMap::from([( + "c".to_owned(), + Value::Thing(Thing { + tb: "d".to_owned(), + id: Id::Number(1) + }) + )]))) + })])) + }) + ) +} + +#[test] +fn parse_record_string_2() { + let res = test_parse!(parse_value, r#" r'a:["foo"]' "#).unwrap(); + assert_eq!( + res, + Value::Thing(Thing { + tb: "a".to_owned(), + id: Id::Array(Array(vec![Value::Strand(Strand("foo".to_owned()))])) + }) + ) +} + +#[test] +fn parse_i64() { + let res = test_parse!(parse_value, r#" -9223372036854775808 "#).unwrap(); + assert_eq!(res, Value::Number(Number::Int(i64::MIN))); + + let res = test_parse!(parse_value, r#" 9223372036854775807 "#).unwrap(); + assert_eq!(res, Value::Number(Number::Int(i64::MAX))); +} + +#[test] +fn constant_lowercase() { + let out = test_parse!(parse_value, r#" math::pi "#).unwrap(); + assert_eq!(out, Value::Constant(Constant::MathPi)); +} + +#[test] +fn constant_uppercase() { + let out = test_parse!(parse_value, r#" MATH::PI "#).unwrap(); + assert_eq!(out, Value::Constant(Constant::MathPi)); +} + +#[test] +fn constant_mixedcase() { + let out = test_parse!(parse_value, r#" MaTh::Pi "#).unwrap(); + assert_eq!(out, Value::Constant(Constant::MathPi)); +} diff --git a/lib/src/syn/v2/parser/thing.rs b/lib/src/syn/v2/parser/thing.rs new file mode 100644 index 00000000..4f29f0b5 --- /dev/null +++ b/lib/src/syn/v2/parser/thing.rs @@ -0,0 +1,362 @@ +use super::{ParseResult, Parser}; +use crate::{ + sql::{id::Gen, Id, Ident, Range, Thing, Value}, + syn::v2::{ + parser::{ + mac::{expected, unexpected}, + ParseError, ParseErrorKind, + }, + token::{t, NumberKind, TokenKind}, + }, +}; +use std::ops::Bound; + +impl Parser<'_> { + pub fn parse_record_string(&mut self, double: bool) -> ParseResult { + let thing = self.parse_thing()?; + // can't have any tokens in the buffer, since the next token must be produced by a specific + // call. + debug_assert_eq!(self.token_buffer.len(), 0); + // manually handle the trailing `"`. + let token = self.lexer.lex_record_string_close(); + if token.kind == TokenKind::Invalid { + return Err(ParseError::new( + ParseErrorKind::InvalidToken(self.lexer.error.take().unwrap()), + token.span, + )); + } + if token.kind == t!("'r") && double { + unexpected!(self, token.kind, "a single quote") + } + if token.kind == t!("\"r") && !double { + unexpected!(self, token.kind, "a double quote") + } + debug_assert!(matches!(token.kind, TokenKind::CloseRecordString { .. })); + Ok(thing) + } + + pub fn parse_thing_or_range(&mut self, ident: String) -> ParseResult { + expected!(self, t!(":")); + + self.peek(); + self.no_whitespace()?; + + if self.eat(t!("..")) { + let end = if self.eat(t!("=")) { + self.no_whitespace()?; + Bound::Included(self.parse_id()?) + } else if self.peek_can_be_ident() + || matches!(self.peek_kind(), TokenKind::Number(_) | t!("{") | t!("[")) + { + self.no_whitespace()?; + Bound::Excluded(self.parse_id()?) + } else { + Bound::Unbounded + }; + return Ok(Value::Range(Box::new(Range { + tb: ident, + beg: Bound::Unbounded, + end, + }))); + } + + let beg = if self.peek_can_be_ident() + || matches!(self.peek_kind(), TokenKind::Number(_) | t!("{") | t!("[")) + { + let id = self.parse_id()?; + + if self.eat(t!(">")) { + self.no_whitespace()?; + Bound::Excluded(id) + } else { + Bound::Included(id) + } + } else { + Bound::Unbounded + }; + + if self.eat(t!("..")) { + let end = if self.eat(t!("=")) { + self.no_whitespace()?; + Bound::Included(self.parse_id()?) + } else if self.peek_can_be_ident() + || matches!(self.peek_kind(), TokenKind::Number(_) | t!("{") | t!("[")) + { + self.no_whitespace()?; + Bound::Excluded(self.parse_id()?) + } else { + Bound::Unbounded + }; + Ok(Value::Range(Box::new(Range { + tb: ident, + beg, + end, + }))) + } else { + let Bound::Included(id) = beg else { + unexpected!(self, self.peek_kind(), "the range operator '..'") + }; + Ok(Value::Thing(Thing { + tb: ident, + id, + })) + } + } + + pub fn parse_range(&mut self) -> ParseResult { + let tb = self.next_token_value::()?.0; + + expected!(self, t!(":")); + + self.peek(); + self.no_whitespace()?; + + let beg = if self.peek_can_be_ident() { + self.peek(); + self.no_whitespace()?; + + let id = self.parse_id()?; + + self.peek(); + self.no_whitespace()?; + + if self.eat(t!(">")) { + Bound::Excluded(id) + } else { + Bound::Included(id) + } + } else { + Bound::Unbounded + }; + + self.peek(); + self.no_whitespace()?; + + expected!(self, t!("..")); + + self.peek(); + self.no_whitespace()?; + + let inclusive = self.eat(t!("=")); + + self.peek(); + self.no_whitespace()?; + + let end = if self.peek_can_be_ident() { + let id = self.parse_id()?; + if inclusive { + Bound::Included(id) + } else { + Bound::Excluded(id) + } + } else { + Bound::Unbounded + }; + + Ok(Range { + tb, + beg, + end, + }) + } + + pub fn parse_thing(&mut self) -> ParseResult { + let ident = self.next_token_value::()?.0; + self.parse_thing_from_ident(ident) + } + + pub fn parse_thing_from_ident(&mut self, ident: String) -> ParseResult { + expected!(self, t!(":")); + + self.peek(); + self.no_whitespace()?; + + let id = self.parse_id()?; + Ok(Thing { + tb: ident, + id, + }) + } + + pub fn parse_id(&mut self) -> ParseResult { + let token = self.next(); + match token.kind { + t!("{") => { + let object = self.parse_object(token.span)?; + Ok(Id::Object(object)) + } + t!("[") => { + let array = self.parse_array(token.span)?; + Ok(Id::Array(array)) + } + TokenKind::Number(NumberKind::Integer) => { + // Id handle numbers more loose then other parts of the code. + // If number can't fit in a i64 it will instead be parsed as a string. + let text = self.lexer.string.take().unwrap(); + if let Ok(number) = text.parse() { + Ok(Id::Number(number)) + } else { + Ok(Id::String(text)) + } + } + t!("ULID") => { + // TODO: error message about how to use `ulid` as an identifier. + expected!(self, t!("(")); + expected!(self, t!(")")); + Ok(Id::Generate(Gen::Ulid)) + } + t!("UUID") => { + expected!(self, t!("(")); + expected!(self, t!(")")); + Ok(Id::Generate(Gen::Uuid)) + } + t!("RAND") => { + expected!(self, t!("(")); + expected!(self, t!(")")); + Ok(Id::Generate(Gen::Rand)) + } + _ => { + let ident = self.token_value::(token)?.0; + Ok(Id::String(ident)) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::sql::array::Array; + use crate::sql::object::Object; + use crate::sql::value::Value; + use crate::syn::Parse; + + fn thing(i: &str) -> ParseResult { + let mut parser = Parser::new(i.as_bytes()); + parser.parse_thing() + } + + #[test] + fn thing_normal() { + let sql = "test:id"; + let res = thing(sql); + let out = res.unwrap(); + assert_eq!("test:id", format!("{}", out)); + assert_eq!( + out, + Thing { + tb: String::from("test"), + id: Id::from("id"), + } + ); + } + + #[test] + fn thing_integer() { + let sql = "test:001"; + let res = thing(sql); + let out = res.unwrap(); + assert_eq!("test:1", format!("{}", out)); + assert_eq!( + out, + Thing { + tb: String::from("test"), + id: Id::from(1), + } + ); + } + + #[test] + fn thing_string() { + let sql = "r'test:001'"; + let res = Value::parse(sql); + let Value::Thing(out) = res else { + panic!() + }; + assert_eq!("test:1", format!("{}", out)); + assert_eq!( + out, + Thing { + tb: String::from("test"), + id: Id::from(1), + } + ); + + let sql = "r'test:001'"; + let res = Value::parse(sql); + let Value::Thing(out) = res else { + panic!() + }; + assert_eq!("test:1", format!("{}", out)); + assert_eq!( + out, + Thing { + tb: String::from("test"), + id: Id::from(1), + } + ); + } + + #[test] + fn thing_quoted_backtick() { + let sql = "`test`:`id`"; + let res = thing(sql); + let out = res.unwrap(); + assert_eq!("test:id", format!("{}", out)); + assert_eq!( + out, + Thing { + tb: String::from("test"), + id: Id::from("id"), + } + ); + } + + #[test] + fn thing_quoted_brackets() { + let sql = "⟨test⟩:⟨id⟩"; + let res = thing(sql); + let out = res.unwrap(); + assert_eq!("test:id", format!("{}", out)); + assert_eq!( + out, + Thing { + tb: String::from("test"), + id: Id::from("id"), + } + ); + } + + #[test] + fn thing_object() { + let sql = "test:{ location: 'GBR', year: 2022 }"; + let res = thing(sql); + let out = res.unwrap(); + assert_eq!("test:{ location: 'GBR', year: 2022 }", format!("{}", out)); + assert_eq!( + out, + Thing { + tb: String::from("test"), + id: Id::Object(Object::from(map! { + "location".to_string() => Value::from("GBR"), + "year".to_string() => Value::from(2022), + })), + } + ); + } + + #[test] + fn thing_array() { + let sql = "test:['GBR', 2022]"; + let res = thing(sql); + let out = res.unwrap(); + assert_eq!("test:['GBR', 2022]", format!("{}", out)); + assert_eq!( + out, + Thing { + tb: String::from("test"), + id: Id::Array(Array::from(vec![Value::from("GBR"), Value::from(2022)])), + } + ); + } +} diff --git a/lib/src/syn/v2/parser/token_buffer.rs b/lib/src/syn/v2/parser/token_buffer.rs new file mode 100644 index 00000000..9c589db3 --- /dev/null +++ b/lib/src/syn/v2/parser/token_buffer.rs @@ -0,0 +1,71 @@ +use crate::syn::v2::token::Token; + +pub struct TokenBuffer { + buffer: [Token; S], + write: u8, + read: u8, +} + +impl TokenBuffer { + pub fn new() -> Self { + assert!(S < 256); + Self { + buffer: [Token::invalid(); S], + write: 0, + read: 0, + } + } + + #[inline] + pub fn push(&mut self, token: Token) { + let next_write = self.write.wrapping_add(1) % S as u8; + if next_write == self.read { + panic!("token buffer full"); + } + self.buffer[self.write as usize] = token; + self.write = next_write; + } + + #[inline] + pub fn pop(&mut self) -> Option { + if self.write == self.read { + return None; + } + let res = self.buffer[self.read as usize]; + self.read = self.read.wrapping_add(1) % S as u8; + Some(res) + } + + #[inline] + pub fn first(&mut self) -> Option { + if self.write == self.read { + return None; + } + Some(self.buffer[self.read as usize]) + } + + pub fn len(&self) -> u8 { + // 0 0 0 0 0 0 0 0 + // | ^ + // len: 6 read: 3 write: 1 + // 8 - read + write + if self.read > self.write { + S as u8 - self.read + self.write + } else { + self.write - self.read + } + } + + pub fn at(&mut self, at: u8) -> Option { + if at >= self.len() { + return None; + } + let offset = (self.read as u16 + at as u16) % S as u16; + Some(self.buffer[offset as usize]) + } + + pub fn clear(&mut self) { + self.read = 0; + self.write = 0; + } +} diff --git a/lib/src/syn/v2/test.rs b/lib/src/syn/v2/test.rs new file mode 100644 index 00000000..71260670 --- /dev/null +++ b/lib/src/syn/v2/test.rs @@ -0,0 +1,58 @@ +use super::super::Parse; +use super::lexer::Lexer; +use super::parser::Parser; +use crate::sql::{Array, Expression, Ident, Idiom, Param, Script, Thing, Value}; +use crate::syn::v2::token::{t, TokenKind}; + +impl Parse for Value { + fn parse(val: &str) -> Self { + super::value(val).unwrap() + } +} + +impl Parse for Array { + fn parse(val: &str) -> Self { + let mut parser = Parser::new(val.as_bytes()); + let start = parser.peek().span; + assert!(parser.eat(t!("["))); + parser.parse_array(start).unwrap() + } +} + +impl Parse for Param { + fn parse(val: &str) -> Self { + let mut lexer = Lexer::new(val.as_bytes()); + let token = lexer.next_token(); + assert_eq!(token.kind, TokenKind::Parameter); + Param(Ident(lexer.string.take().unwrap())) + } +} + +impl Parse for Idiom { + fn parse(val: &str) -> Self { + super::idiom(val).unwrap() + } +} + +impl Parse for Script { + fn parse(_val: &str) -> Self { + todo!() + } +} + +impl Parse for Thing { + fn parse(val: &str) -> Self { + super::thing(val).unwrap() + } +} + +impl Parse for Expression { + fn parse(val: &str) -> Self { + let mut parser = Parser::new(val.as_bytes()); + let value = parser.parse_value_field().unwrap(); + if let Value::Expression(x) = value { + return *x; + } + panic!("not an expression"); + } +} diff --git a/lib/src/syn/v2/token/keyword.rs b/lib/src/syn/v2/token/keyword.rs new file mode 100644 index 00000000..c169f2f0 --- /dev/null +++ b/lib/src/syn/v2/token/keyword.rs @@ -0,0 +1,219 @@ +macro_rules! keyword { + ($($name:ident => $value:tt),* $(,)?) => { + + #[repr(u8)] + #[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)] + pub enum Keyword{ + $($name,)* + } + + impl Keyword{ + pub fn as_str(&self) -> &'static str{ + match self{ + $(Keyword::$name => $value,)* + } + } + } + + macro_rules! keyword_t { + $(($value) => { + $crate::syn::v2::token::Keyword::$name + };)* + } + }; +} + +keyword! { + After => "AFTER", + All => "ALL", + Analyze => "ANALYZE", + Analyzer => "ANALYZER", + As => "AS", + Ascending => "ASCENDING", + Ascii => "ASCII", + Assert => "ASSERT", + At => "AT", + Before => "BEFORE", + Begin => "BEGIN", + Blank => "BLANK", + Bm25 => "BM25", + Break => "BREAK", + By => "BY", + Camel => "CAMEL", + Cancel => "CANCEL", + ChangeFeed => "CHANGEFEED", + Changes => "CHANGES", + Capacity => "CAPACITY", + Class => "CLASS", + Comment => "COMMENT", + Commit => "COMMIT", + Content => "CONTENT", + Continue => "CONTINUE", + Create => "CREATE", + Database => "DATABASE", + Default => "DEFAULT", + Define => "DEFINE", + Delete => "DELETE", + Descending => "DESCENDING", + Diff => "DIFF", + Dimension => "DIMENSION", + Distance => "DISTANCE", + DocIdsCache => "DOC_IDS_CACHE", + DocIdsOrder => "DOC_IDS_ORDER", + DocLengthsCache => "DOC_LENGTHS_CACHE", + DocLengthsOrder => "DOC_LENGTHS_ORDER", + Drop => "DROP", + Duplicate => "DUPLICATE", + Edgengram => "EDGENGRAM", + Event => "EVENT", + Else => "ELSE", + End => "END", + Explain => "EXPLAIN", + False => "false", + Fetch => "FETCH", + Field => "FIELD", + Fields => "FIELDS", + Filters => "FILTERS", + Flexible => "FLEXIBLE", + For => "FOR", + From => "FROM", + Full => "FULL", + Function => "FUNCTION", + Group => "GROUP", + Highlights => "HIGHLIGHTS", + Ignore => "IGNORE", + Index => "INDEX", + Info => "INFO", + Insert => "INSERT", + Into => "INTO", + If => "IF", + Is => "IS", + Key => "KEY", + Kill => "KILL", + Knn => "KNN", + Let => "LET", + Limit => "LIMIT", + Live => "LIVE", + Lowercase => "LOWERCASE", + Merge => "MERGE", + Model => "MODEL", + MTree => "MTREE", + MTreeCache => "MTREE_CACHE", + Namespace => "NAMESPACE", + Ngram => "NGRAM", + No => "NO", + NoIndex => "NOINDEX", + None => "NONE", + Null => "NULL", + Numeric => "NUMERIC", + Omit => "OMIT", + On => "ON", + Only => "ONLY", + Option => "OPTION", + Order => "ORDER", + Parallel => "PARALLEL", + Param => "PARAM", + Passhash => "PASSHASH", + Password => "PASSWORD", + Patch => "PATCH", + Permissions => "PERMISSIONS", + PostingsCache => "POSTINGS_CACHE", + PostingsOrder => "POSTINGS_ORDER", + Punct => "PUNCT", + Relate => "RELATE", + Remove => "REMOVE", + Replace => "REPLACE", + Return => "RETURN", + Roles => "ROLES", + Root => "ROOT", + Schemafull => "SCHEMAFULL", + Schemaless => "SCHEMALESS", + Scope => "SCOPE", + Search => "SEARCH", + Select => "SELECT", + Session => "SESSION", + Set => "SET", + Show => "SHOW", + Signin => "SIGNIN", + Signup => "SIGNUP", + Since => "SINCE", + Sleep => "SLEEP", + Snowball => "SNOWBALL", + Split => "SPLIT", + Start => "START", + Table => "TABLE", + TermsCache => "TERMS_CACHE", + TermsOrder => "TERMS_ORDER", + Then => "THEN", + Throw => "THROW", + Timeout => "TIMEOUT", + Tokenizers => "TOKENIZERS", + Token => "TOKEN", + Transaction => "TRANSACTION", + True => "true", + Type => "TYPE", + Unique => "UNIQUE", + Unset => "UNSET", + Update => "UPDATE", + Uppercase => "UPPERCASE", + Use => "USE", + User => "USER", + Value => "VALUE", + Values => "VALUES", + Version => "VERSION", + Vs => "VS", + When => "WHEN", + Where => "WHERE", + With => "WITH", + AllInside => "ALLINSIDE", + AndKw => "ANDKW", + AnyInside => "ANYINSIDE", + Inside => "INSIDE", + Intersects => "INTERSECTS", + NoneInside => "NONEINSIDE", + NotInside => "NOTINSIDE", + OrKw => "OR", + Outside => "OUTSIDE", + Not => "NOT", + And => "AND", + Collate => "COLLATE", + ContainsAll => "CONTAINSALL", + ContainsAny => "CONTAINSANY", + ContainsNone => "CONTAINSNONE", + ContainsNot => "CONTAINSNOT", + Contains => "CONTAINS", + In => "IN", + + Any => "ANY", + Array => "ARRAY", + Geometry => "GEOMETRY", + Record => "RECORD", + Future => "FUTURE", + Bool => "BOOL", + Bytes => "BYTES", + Datetime => "DATETIME", + Decimal => "DECIMAL", + Duration => "DURATION", + Float => "FLOAT", + Fn => "fn", + Int => "INT", + Number => "NUMBER", + Object => "OBJECT", + String => "STRING", + Uuid => "UUID", + Ulid => "ULID", + Rand => "RAND", + Feature => "FEATURE", + Line => "LINE", + Point => "POINT", + Polygon => "POLYGON", + MultiPoint => "MULTIPOINT", + MultiLine => "MULTILINE", + MultiPolygon => "MULTIPOLYGON", + Collection => "COLLECTION", + + FN => "fn", + ML => "ml", +} + +pub(crate) use keyword_t; diff --git a/lib/src/syn/v2/token/mac.rs b/lib/src/syn/v2/token/mac.rs new file mode 100644 index 00000000..f40069ff --- /dev/null +++ b/lib/src/syn/v2/token/mac.rs @@ -0,0 +1,300 @@ +/// A shorthand for token kinds. +macro_rules! t { + ("invalid") => { + $crate::syn::v2::token::TokenKind::Invalid + }; + ("eof") => { + $crate::syn::v2::token::TokenKind::Eof + }; + ("[") => { + $crate::syn::v2::token::TokenKind::OpenDelim($crate::syn::v2::token::Delim::Bracket) + }; + ("{") => { + $crate::syn::v2::token::TokenKind::OpenDelim($crate::syn::v2::token::Delim::Brace) + }; + ("(") => { + $crate::syn::v2::token::TokenKind::OpenDelim($crate::syn::v2::token::Delim::Paren) + }; + ("]") => { + $crate::syn::v2::token::TokenKind::CloseDelim($crate::syn::v2::token::Delim::Bracket) + }; + ("}") => { + $crate::syn::v2::token::TokenKind::CloseDelim($crate::syn::v2::token::Delim::Brace) + }; + (")") => { + $crate::syn::v2::token::TokenKind::CloseDelim($crate::syn::v2::token::Delim::Paren) + }; + + ("r\"") => { + $crate::syn::v2::token::TokenKind::OpenRecordString { + double: true, + } + }; + ("r'") => { + $crate::syn::v2::token::TokenKind::OpenRecordString { + double: false, + } + }; + + ("\"r") => { + $crate::syn::v2::token::TokenKind::CloseRecordString { + double: true, + } + }; + ("'r") => { + $crate::syn::v2::token::TokenKind::CloseRecordString { + double: false, + } + }; + + ("<") => { + $crate::syn::v2::token::TokenKind::LeftChefron + }; + (">") => { + $crate::syn::v2::token::TokenKind::RightChefron + }; + + (";") => { + $crate::syn::v2::token::TokenKind::SemiColon + }; + (",") => { + $crate::syn::v2::token::TokenKind::Comma + }; + ("|") => { + $crate::syn::v2::token::TokenKind::Vert + }; + ("...") => { + $crate::syn::v2::token::TokenKind::DotDotDot + }; + ("..") => { + $crate::syn::v2::token::TokenKind::DotDot + }; + (".") => { + $crate::syn::v2::token::TokenKind::Dot + }; + ("::") => { + $crate::syn::v2::token::TokenKind::PathSeperator + }; + (":") => { + $crate::syn::v2::token::TokenKind::Colon + }; + ("<-") => { + $crate::syn::v2::token::TokenKind::ArrowLeft + }; + ("<->") => { + $crate::syn::v2::token::TokenKind::BiArrow + }; + ("->") => { + $crate::syn::v2::token::TokenKind::ArrowRight + }; + + ("*") => { + $crate::syn::v2::token::TokenKind::Star + }; + ("$") => { + $crate::syn::v2::token::TokenKind::Dollar + }; + + ("+") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Add) + }; + ("-") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Subtract) + }; + ("**") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Power) + }; + ("*=") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::AllEqual) + }; + ("*~") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::AllLike) + }; + ("/") => { + $crate::syn::v2::token::TokenKind::ForwardSlash + }; + ("<=") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::LessEqual) + }; + (">=") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::GreaterEqual) + }; + ("@") => { + $crate::syn::v2::token::TokenKind::At + }; + ("||") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Or) + }; + ("&&") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::And) + }; + ("×") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Mult) + }; + ("÷") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Divide) + }; + + ("$param") => { + $crate::syn::v2::token::TokenKind::Parameter + }; + ("123") => { + $crate::syn::v2::token::TokenKind::Number(_) + }; + + ("!") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Not) + }; + ("!~") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::NotLike) + }; + ("!=") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::NotEqual) + }; + + ("?") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Like) + }; + ("?:") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Tco) + }; + ("??") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Nco) + }; + ("==") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Exact) + }; + ("!=") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::NotEqual) + }; + ("*=") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::AllEqual) + }; + ("?=") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::AnyEqual) + }; + ("=") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Equal) + }; + ("!~") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::NotLike) + }; + ("*~") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::AllLike) + }; + ("?~") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::AnyLike) + }; + ("~") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Like) + }; + ("+?=") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Ext) + }; + ("+=") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Inc) + }; + ("-=") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Dec) + }; + + ("∋") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Contains) + }; + ("∌") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::NotContains) + }; + ("∈") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::Inside) + }; + ("∉") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::NotInside) + }; + ("⊇") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::ContainsAll) + }; + ("⊃") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::ContainsAny) + }; + ("⊅") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::ContainsNone) + }; + ("⊆") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::AllInside) + }; + ("⊂") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::AnyInside) + }; + ("⊄") => { + $crate::syn::v2::token::TokenKind::Operator($crate::syn::v2::token::Operator::NoneInside) + }; + + // algorithms + ("EDDSA") => { + $crate::syn::v2::token::TokenKind::Algorithm($crate::sql::Algorithm::EdDSA) + }; + ("ES256") => { + $crate::syn::v2::token::TokenKind::Algorithm($crate::sql::Algorithm::Es256) + }; + ("ES384") => { + $crate::syn::v2::token::TokenKind::Algorithm($crate::sql::Algorithm::Es384) + }; + ("ES512") => { + $crate::syn::v2::token::TokenKind::Algorithm($crate::sql::Algorithm::Es512) + }; + ("HS256") => { + $crate::syn::v2::token::TokenKind::Algorithm($crate::sql::Algorithm::Hs256) + }; + ("HS384") => { + $crate::syn::v2::token::TokenKind::Algorithm($crate::sql::Algorithm::Hs384) + }; + ("HS512") => { + $crate::syn::v2::token::TokenKind::Algorithm($crate::sql::Algorithm::Hs512) + }; + ("PS256") => { + $crate::syn::v2::token::TokenKind::Algorithm($crate::sql::Algorithm::Ps256) + }; + ("PS384") => { + $crate::syn::v2::token::TokenKind::Algorithm($crate::sql::Algorithm::Ps384) + }; + ("PS512") => { + $crate::syn::v2::token::TokenKind::Algorithm($crate::sql::Algorithm::Ps512) + }; + ("RS256") => { + $crate::syn::v2::token::TokenKind::Algorithm($crate::sql::Algorithm::Rs256) + }; + ("RS384") => { + $crate::syn::v2::token::TokenKind::Algorithm($crate::sql::Algorithm::Rs384) + }; + ("RS512") => { + $crate::syn::v2::token::TokenKind::Algorithm($crate::sql::Algorithm::Rs512) + }; + + // Distance + ("EUCLIDEAN") => { + $crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Euclidean) + }; + ("MANHATTAN") => { + $crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Manhattan) + }; + ("COSINE") => { + $crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Cosine) + }; + ("HAMMING") => { + $crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Hamming) + }; + ("MAHALANOBIS") => { + $crate::syn::v2::token::TokenKind::Distance( + $crate::syn::v2::token::DistanceKind::Mahalanobis, + ) + }; + ("MINKOWSKI") => { + $crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Minkowski) + }; + + ($t:tt) => { + $crate::syn::v2::token::TokenKind::Keyword($crate::syn::v2::token::keyword_t!($t)) + }; +} + +pub(crate) use t; diff --git a/lib/src/syn/v2/token/mod.rs b/lib/src/syn/v2/token/mod.rs new file mode 100644 index 00000000..03eb20aa --- /dev/null +++ b/lib/src/syn/v2/token/mod.rs @@ -0,0 +1,385 @@ +//! Module specifying the token representation of the parser. + +use std::hash::Hash; + +mod keyword; +pub(crate) use keyword::keyword_t; +pub use keyword::Keyword; +mod mac; +pub(crate) use mac::t; + +use crate::sql::{language::Language, Algorithm}; + +/// A location in the source passed to the lexer. +#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)] +pub struct Span { + /// Offset in bytes. + pub offset: u32, + /// The amount of bytes this location encompasses. + pub len: u32, +} + +impl Span { + /// Create a new empty span. + pub const fn empty() -> Self { + Span { + offset: 0, + len: 0, + } + } + + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Create a span that covers the range of both spans as well as possible space inbetween. + pub fn covers(self, other: Span) -> Span { + let start = self.offset.min(other.offset); + let end = (self.offset + self.len).max(other.offset + other.len); + let len = end - start; + Span { + offset: start, + len, + } + } +} + +#[repr(u8)] +#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)] +pub enum Operator { + /// `!` + Not, + /// `+` + Add, + /// `-` + Subtract, + /// `÷` + Divide, + /// `×` or `∙` + Mult, + /// `||` + Or, + /// `&&` + And, + /// `<=` + LessEqual, + /// `>=` + GreaterEqual, + /// `*` + Star, + /// `**` + Power, + /// `=` + Equal, + /// `==` + Exact, + /// `!=` + NotEqual, + /// `*=` + AllEqual, + /// `?=` + AnyEqual, + /// `~` + Like, + /// `!~` + NotLike, + /// `*~` + AllLike, + /// `?~` + AnyLike, + /// `∋` + Contains, + /// `∌` + NotContains, + /// `⊇` + ContainsAll, + /// `⊃` + ContainsAny, + /// `⊅` + ContainsNone, + /// `∈` + Inside, + /// `∉` + NotInside, + /// `⊆` + AllInside, + /// `⊂` + AnyInside, + /// `⊄` + NoneInside, + /// `@123@` + Matches, + /// `+=` + Inc, + /// `-=` + Dec, + /// `+?=` + Ext, + /// `?:` + Tco, + /// `??` + Nco, +} + +impl Operator { + fn as_str(&self) -> &'static str { + match self { + Operator::Not => "'!'", + Operator::Add => "'+'", + Operator::Subtract => "'-'", + Operator::Divide => "'÷'", + Operator::Or => "'||'", + Operator::And => "'&&'", + Operator::Mult => "'×'", + Operator::LessEqual => "'<='", + Operator::GreaterEqual => "'>='", + Operator::Star => "'*'", + Operator::Power => "'**'", + Operator::Equal => "'='", + Operator::Exact => "'=='", + Operator::NotEqual => "'!='", + Operator::AllEqual => "'*='", + Operator::AnyEqual => "'?='", + Operator::Like => "'~'", + Operator::NotLike => "'!~'", + Operator::AllLike => "'*~'", + Operator::AnyLike => "'?~'", + Operator::Contains => "'∋'", + Operator::NotContains => "'∌'", + Operator::ContainsAll => "'⊇'", + Operator::ContainsAny => "'⊃'", + Operator::ContainsNone => "'⊅'", + Operator::Inside => "'∈'", + Operator::NotInside => "'∉'", + Operator::AllInside => "'⊆'", + Operator::AnyInside => "'⊂'", + Operator::NoneInside => "'⊄'", + Operator::Matches => "'@@'", + Operator::Inc => "'+='", + Operator::Dec => "'-='", + Operator::Ext => "'+?='", + Operator::Tco => "'?:'", + Operator::Nco => "'??'", + } + } +} + +/// A delimiting token, denoting the start or end of a certain production. +#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)] +pub enum Delim { + /// `()` + Paren, + /// `[]` + Bracket, + /// `{}` + Brace, +} + +#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)] +pub enum DistanceKind { + Euclidean, + Manhattan, + Hamming, + Minkowski, +} + +impl DistanceKind { + pub fn as_str(&self) -> &'static str { + match self { + DistanceKind::Euclidean => "EUCLIDEAN", + DistanceKind::Manhattan => "MANHATTAN", + DistanceKind::Hamming => "HAMMING", + DistanceKind::Minkowski => "MINKOWSKI", + } + } +} + +#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)] +pub enum NumberKind { + // A plain integer number. + Integer, + // A number with a decimal postfix. + Decimal, + // A number with a float postfix. + Float, + // A number with a `.3` part. + Mantissa, + // A number with a `.3e10` part. + MantissaExponent, + // A number with a `.3e10` part. + Exponent, + NaN, +} + +/// The type of token +#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)] +pub enum TokenKind { + Keyword(Keyword), + Algorithm(Algorithm), + Language(Language), + Distance(DistanceKind), + Operator(Operator), + OpenDelim(Delim), + CloseDelim(Delim), + // a token denoting the opening of a record string, i.e. `r"` + OpenRecordString { + double: bool, + }, + /// a token denoting the clsoing of a record string, i.e. `"` + /// Never produced normally by the lexer. + CloseRecordString { + double: bool, + }, + Regex, + Uuid, + DateTime, + Strand, + /// A parameter like `$name`. + Parameter, + /// A duration. + Duration, + Number(NumberKind), + Identifier, + /// `<` + LeftChefron, + /// `>` + RightChefron, + /// `*` + Star, + /// `?` + Question, + /// `$` + Dollar, + /// `->` + ArrowRight, + /// `<-` + ArrowLeft, + /// `<->` + BiArrow, + /// '/' + ForwardSlash, + /// `.` + Dot, + /// `..` + DotDot, + /// `...` or `…` + DotDotDot, + /// `;` + SemiColon, + /// `::` + PathSeperator, + /// `:` + Colon, + /// `,` + Comma, + /// `|` + Vert, + /// `@` + At, + /// A token which could not be properly lexed. + Invalid, + /// A token which indicates the end of the file. + Eof, +} + +/// An assertion statically checking that the size of Tokenkind remains two bytes +const _TOKEN_KIND_SIZE_ASSERT: [(); 2] = [(); std::mem::size_of::()]; + +impl TokenKind { + pub fn has_data(&self) -> bool { + matches!( + self, + TokenKind::Identifier + | TokenKind::Uuid + | TokenKind::DateTime + | TokenKind::Strand + | TokenKind::Parameter + | TokenKind::Regex + ) + } + + pub fn can_be_identifier(&self) -> bool { + matches!( + self, + TokenKind::Identifier + | TokenKind::Keyword(_) + | TokenKind::Language(_) + | TokenKind::Algorithm(_) + ) + } + + pub fn as_str(&self) -> &'static str { + match *self { + TokenKind::Keyword(x) => x.as_str(), + TokenKind::Operator(x) => x.as_str(), + TokenKind::Algorithm(_) => todo!(), + TokenKind::Language(x) => x.as_str(), + TokenKind::Distance(x) => x.as_str(), + TokenKind::OpenDelim(Delim::Paren) => "(", + TokenKind::OpenDelim(Delim::Brace) => "{", + TokenKind::OpenDelim(Delim::Bracket) => "[", + TokenKind::CloseDelim(Delim::Paren) => ")", + TokenKind::CloseDelim(Delim::Brace) => "}", + TokenKind::CloseDelim(Delim::Bracket) => "]", + TokenKind::OpenRecordString { + .. + } => "a record string", + TokenKind::CloseRecordString { + .. + } => "a closing record string", + TokenKind::Uuid => "a uuid", + TokenKind::DateTime => "a date-time", + TokenKind::Strand => "a strand", + TokenKind::Parameter => "a parameter", + TokenKind::Duration => "a duration", + TokenKind::Number(_) => "a number", + TokenKind::Identifier => "an identifier", + TokenKind::Regex => "a regex", + TokenKind::LeftChefron => "'<'", + TokenKind::RightChefron => "'>'", + TokenKind::Star => "'*'", + TokenKind::Dollar => "'$'", + TokenKind::Question => "'?'", + TokenKind::ArrowRight => "'->'", + TokenKind::ArrowLeft => "'<-'", + TokenKind::BiArrow => "'<->'", + TokenKind::ForwardSlash => "'/'", + TokenKind::Dot => "'.'", + TokenKind::DotDot => "'..'", + TokenKind::DotDotDot => "'...'", + TokenKind::SemiColon => "';'", + TokenKind::PathSeperator => "'::'", + TokenKind::Colon => "':'", + TokenKind::Comma => "','", + TokenKind::Vert => "'|'", + TokenKind::At => "'@'", + TokenKind::Invalid => "Invalid", + TokenKind::Eof => "Eof", + } + } +} + +#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)] +pub struct Token { + pub kind: TokenKind, + pub span: Span, +} + +impl Token { + pub const fn invalid() -> Token { + Token { + kind: TokenKind::Invalid, + span: Span::empty(), + } + } + + /// Returns if the token is invalid. + pub fn is_invalid(&self) -> bool { + matches!(self.kind, TokenKind::Invalid) + } + + /// Returns if the token is `end of file`. + pub fn is_eof(&self) -> bool { + matches!(self.kind, TokenKind::Eof) + } +} diff --git a/lib/test.surql b/lib/test.surql index 17979a56..e61ee8c5 100644 --- a/lib/test.surql +++ b/lib/test.surql @@ -88,8 +88,8 @@ SELECT ], math::PI > 3.14 AS check_constant, test AS `some thing`, - '2012-04-23T18:25:43.511Z' AS utctime, - '2012-04-23T18:25:43.511-08:00' AS pacifictime, + d'2012-04-23T18:25:43.511Z' AS utctime, + d'2012-04-23T18:25:43.511-08:00' AS pacifictime, { key: (3 + 1 + 2), other: 9 * 7, @@ -98,7 +98,7 @@ SELECT } } AS object, rand::uuid::v4() AS a_uuid_field, - "ff36afd6-6689-4c02-8c8c-7df478924645" AS ⟨another id field⟩, + u"ff36afd6-6689-4c02-8c8c-7df478924645" AS ⟨another id field⟩, if true { 'Yay' } else { @@ -125,16 +125,16 @@ WHERE IF true THEN 'YAY' ELSE 'OOPS' END AND (3 + 3 * 4) = 6 AND 3 + 3 * 4 = 6 AND ages CONTAINS 18 - AND if IS true + AND `if` IS true AND 346 <= 789 AND 678 >= 345 AND ( then = true - OR if = true - OR create is NONE - OR delete = NULL - OR delete INSIDE ['one', 'two', 'three'] + OR `if` = true + OR `create` is NONE + OR `delete` = NULL + OR `delete` INSIDE ['one', 'two', 'three'] ) SPLIT test.things -VERSION '2019-01-01T08:00:00Z' +VERSION d'2019-01-01T08:00:00Z' TIMEOUT 2w; diff --git a/lib/tests/api/backup.rs b/lib/tests/api/backup.rs index 355f633e..73a5a43c 100644 --- a/lib/tests/api/backup.rs +++ b/lib/tests/api/backup.rs @@ -19,9 +19,15 @@ async fn export_import() { } drop(permit); let file = format!("{db_name}.sql"); - db.export(&file).await.unwrap(); - db.import(&file).await.unwrap(); + + let res = async { + db.export(&file).await?; + db.import(&file).await?; + Result::<(), Error>::Ok(()) + } + .await; remove_file(file).await.unwrap(); + res.unwrap(); } #[test_log::test(tokio::test)] diff --git a/lib/tests/api/mod.rs b/lib/tests/api/mod.rs index 5cbb24cc..31589041 100644 --- a/lib/tests/api/mod.rs +++ b/lib/tests/api/mod.rs @@ -33,6 +33,7 @@ async fn yuse() { drop(permit); } +#[ignore] #[test_log::test(tokio::test)] async fn invalidate() { let (permit, db) = new_db().await; @@ -55,7 +56,7 @@ async fn signup_scope() { let scope = Ulid::new().to_string(); let sql = format!( " - DEFINE SCOPE {scope} SESSION 1s + DEFINE SCOPE `{scope}` SESSION 1s SIGNUP ( CREATE user SET email = $email, pass = crypto::argon2::generate($pass) ) SIGNIN ( SELECT * FROM user WHERE email = $email AND crypto::argon2::compare(pass, $pass) ) " @@ -82,7 +83,7 @@ async fn signin_ns() { db.use_ns(NS).use_db(Ulid::new().to_string()).await.unwrap(); let user = Ulid::new().to_string(); let pass = "password123"; - let sql = format!("DEFINE USER {user} ON NAMESPACE PASSWORD '{pass}'"); + let sql = format!("DEFINE USER `{user}` ON NAMESPACE PASSWORD '{pass}'"); let response = db.query(sql).await.unwrap(); drop(permit); response.check().unwrap(); @@ -102,7 +103,7 @@ async fn signin_db() { db.use_ns(NS).use_db(&database).await.unwrap(); let user = Ulid::new().to_string(); let pass = "password123"; - let sql = format!("DEFINE USER {user} ON DATABASE PASSWORD '{pass}'"); + let sql = format!("DEFINE USER `{user}` ON DATABASE PASSWORD '{pass}'"); let response = db.query(sql).await.unwrap(); drop(permit); response.check().unwrap(); @@ -126,7 +127,7 @@ async fn signin_scope() { let pass = "password123"; let sql = format!( " - DEFINE SCOPE {scope} SESSION 1s + DEFINE SCOPE `{scope}` SESSION 1s SIGNUP ( CREATE user SET email = $email, pass = crypto::argon2::generate($pass) ) SIGNIN ( SELECT * FROM user WHERE email = $email AND crypto::argon2::compare(pass, $pass) ) " @@ -168,7 +169,7 @@ async fn scope_throws_error() { let pass = "password123"; let sql = format!( " - DEFINE SCOPE {scope} SESSION 1s + DEFINE SCOPE `{scope}` SESSION 1s SIGNUP {{ THROW 'signup_thrown_error' }} SIGNIN {{ THROW 'signin_thrown_error' }} " @@ -230,7 +231,7 @@ async fn scope_invalid_query() { let pass = "password123"; let sql = format!( " - DEFINE SCOPE {scope} SESSION 1s + DEFINE SCOPE `{scope}` SESSION 1s SIGNUP {{ SELECT * FROM ONLY [1, 2] }} SIGNIN {{ SELECT * FROM ONLY [1, 2] }} " @@ -292,7 +293,7 @@ async fn authenticate() { db.use_ns(NS).use_db(Ulid::new().to_string()).await.unwrap(); let user = Ulid::new().to_string(); let pass = "password123"; - let sql = format!("DEFINE USER {user} ON NAMESPACE PASSWORD '{pass}'"); + let sql = format!("DEFINE USER `{user}` ON NAMESPACE PASSWORD '{pass}'"); let response = db.query(sql).await.unwrap(); drop(permit); response.check().unwrap(); diff --git a/lib/tests/changefeeds.rs b/lib/tests/changefeeds.rs index b976b9f3..7a2cebec 100644 --- a/lib/tests/changefeeds.rs +++ b/lib/tests/changefeeds.rs @@ -469,11 +469,7 @@ async fn changefeed_with_ts() -> Result<(), Error> { // // Show changes using timestamp 1 // - let sql = format!( - " - SHOW CHANGES FOR TABLE user SINCE '{ts1_dt}' LIMIT 10; - " - ); + let sql = format!("SHOW CHANGES FOR TABLE user SINCE d'{ts1_dt}' LIMIT 10; "); let value: Value = db.execute(&sql, &ses, None).await?.remove(0).result?; let Value::Array(array) = value.clone() else { unreachable!() @@ -510,7 +506,7 @@ async fn changefeed_with_ts() -> Result<(), Error> { // // Show changes using timestamp 3 // - let sql = format!("SHOW CHANGES FOR TABLE user SINCE '{ts3_dt}' LIMIT 10;"); + let sql = format!("SHOW CHANGES FOR TABLE user SINCE d'{ts3_dt}' LIMIT 10; "); let value: Value = db.execute(&sql, &ses, None).await?.remove(0).result?; let Value::Array(array) = value.clone() else { unreachable!() diff --git a/lib/tests/complex.rs b/lib/tests/complex.rs index ab76a632..cc96a808 100644 --- a/lib/tests/complex.rs +++ b/lib/tests/complex.rs @@ -23,7 +23,7 @@ fn self_referential_field() -> Result<(), Error> { assert_eq!(res.len(), 1); // let tmp = res.next().unwrap(); - assert!(matches!(tmp, Err(Error::ComputationDepthExceeded))); + assert!(matches!(tmp, Err(Error::ComputationDepthExceeded)), "found {:?}", tmp); // Ok(()) }) @@ -43,7 +43,7 @@ fn cyclic_fields() -> Result<(), Error> { assert_eq!(res.len(), 1); // let tmp = res.next().unwrap(); - assert!(matches!(tmp, Err(Error::ComputationDepthExceeded))); + assert!(matches!(tmp, Err(Error::ComputationDepthExceeded)), "found {:?}", tmp); // Ok(()) }) @@ -67,7 +67,7 @@ fn cyclic_records() -> Result<(), Error> { assert!(tmp.is_ok()); // let tmp = res.next().unwrap(); - assert!(matches!(tmp, Err(Error::ComputationDepthExceeded))); + assert!(matches!(tmp, Err(Error::ComputationDepthExceeded)), "found {:?}", tmp); // Ok(()) }) diff --git a/lib/tests/create.rs b/lib/tests/create.rs index d80e53bf..c9ece08d 100644 --- a/lib/tests/create.rs +++ b/lib/tests/create.rs @@ -18,8 +18,8 @@ async fn create_with_id() -> Result<(), Error> { CREATE person CONTENT { id: person:jaime, name: 'Jaime' }; CREATE user CONTENT { id: 1, name: 'Robert' }; CREATE city CONTENT { id: 'london', name: 'London' }; - CREATE city CONTENT { id: '8e60244d-95f6-4f95-9e30-09a98977efb0', name: 'London' }; - CREATE temperature CONTENT { id: ['London', '2022-09-30T20:25:01.406828Z'], name: 'London' }; + CREATE city CONTENT { id: u'8e60244d-95f6-4f95-9e30-09a98977efb0', name: 'London' }; + CREATE temperature CONTENT { id: ['London', d'2022-09-30T20:25:01.406828Z'], name: 'London' }; CREATE test CONTENT { id: other:715917898417176677 }; CREATE test CONTENT { id: other:⟨715917898.417176677⟩ }; CREATE test CONTENT { id: other:9223372036854775808 }; @@ -105,7 +105,7 @@ async fn create_with_id() -> Result<(), Error> { let val = Value::parse( "[ { - id: temperature:['London', '2022-09-30T20:25:01.406828Z'], + id: temperature:['London', d'2022-09-30T20:25:01.406828Z'], name: 'London' } ]", diff --git a/lib/tests/datetimes.rs b/lib/tests/datetimes.rs index 102fc837..39241c80 100644 --- a/lib/tests/datetimes.rs +++ b/lib/tests/datetimes.rs @@ -11,7 +11,7 @@ async fn datetimes_conversion() -> Result<(), Error> { let sql = r#" SELECT * FROM "2012-01-01"; SELECT * FROM "2012-01-01"; - SELECT * FROM "2012-01-01T08:00:00Z" + "-test"; + SELECT * FROM d"2012-01-01T08:00:00Z" + "-test"; "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -29,7 +29,7 @@ async fn datetimes_conversion() -> Result<(), Error> { let tmp = res.remove(0).result?; let val = Value::parse( "[ - '2012-01-01T00:00:00Z' + d'2012-01-01T00:00:00Z' ]", ); assert_eq!(tmp, val); diff --git a/lib/tests/escape.rs b/lib/tests/escape.rs index ff9e7de0..d5873fbf 100644 --- a/lib/tests/escape.rs +++ b/lib/tests/escape.rs @@ -11,8 +11,8 @@ async fn complex_ids() -> Result<(), Error> { let sql = r#" CREATE person:100 SET test = 'One'; CREATE person:00100; - CREATE 'person:100'; - CREATE "person:100"; + CREATE r'person:100'; + CREATE r"person:100"; CREATE person:⟨100⟩ SET test = 'Two'; CREATE person:`100`; SELECT * FROM person; diff --git a/lib/tests/function.rs b/lib/tests/function.rs index 85960288..a6a371c7 100644 --- a/lib/tests/function.rs +++ b/lib/tests/function.rs @@ -2682,7 +2682,7 @@ async fn function_math_variance() -> Result<(), Error> { #[tokio::test] async fn function_parse_meta_id() -> Result<(), Error> { let sql = r#" - RETURN meta::id("person:tobie"); + RETURN meta::id(r"person:tobie"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -2699,7 +2699,7 @@ async fn function_parse_meta_id() -> Result<(), Error> { #[tokio::test] async fn function_parse_meta_table() -> Result<(), Error> { let sql = r#" - RETURN meta::table("person:tobie"); + RETURN meta::table(r"person:tobie"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -3760,7 +3760,7 @@ async fn function_parse_is_url() -> Result<(), Error> { #[tokio::test] async fn function_parse_is_uuid() -> Result<(), Error> { let sql = r#" - RETURN string::is::uuid("e72bee20-f49b-11ec-b939-0242ac120002"); + RETURN string::is::uuid(u"e72bee20-f49b-11ec-b939-0242ac120002"); RETURN string::is::uuid("this is a test!"); "#; let dbs = new_ds().await?; @@ -4264,9 +4264,9 @@ async fn function_string_words() -> Result<(), Error> { #[tokio::test] async fn function_time_ceil() -> Result<(), Error> { let sql = r#" - RETURN time::ceil("1987-06-22T08:30:45Z", 1w); - RETURN time::ceil("1987-06-22T08:30:45Z", 1y); - RETURN time::ceil("2023-05-11T03:09:00Z", 1s); + RETURN time::ceil(d"1987-06-22T08:30:45Z", 1w); + RETURN time::ceil(d"1987-06-22T08:30:45Z", 1y); + RETURN time::ceil(d"2023-05-11T03:09:00Z", 1s); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4274,15 +4274,15 @@ async fn function_time_ceil() -> Result<(), Error> { assert_eq!(res.len(), 3); // let tmp = res.remove(0).result?; - let val = Value::parse("'1987-06-25T00:00:00Z'"); + let val = Value::parse("d'1987-06-25T00:00:00Z'"); assert_eq!(tmp, val); // let tmp = res.remove(0).result?; - let val = Value::parse("'1987-12-28T00:00:00Z'"); + let val = Value::parse("d'1987-12-28T00:00:00Z'"); assert_eq!(tmp, val); // let tmp = res.remove(0).result?; - let val = Value::parse("'2023-05-11T03:09:00Z'"); + let val = Value::parse("d'2023-05-11T03:09:00Z'"); assert_eq!(tmp, val); // Ok(()) @@ -4292,7 +4292,7 @@ async fn function_time_ceil() -> Result<(), Error> { async fn function_time_day() -> Result<(), Error> { let sql = r#" RETURN time::day(); - RETURN time::day("1987-06-22T08:30:45Z"); + RETURN time::day(d"1987-06-22T08:30:45Z"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4312,9 +4312,9 @@ async fn function_time_day() -> Result<(), Error> { #[tokio::test] async fn function_time_floor() -> Result<(), Error> { let sql = r#" - RETURN time::floor("1987-06-22T08:30:45Z", 1w); - RETURN time::floor("1987-06-22T08:30:45Z", 1y); - RETURN time::floor("2023-05-11T03:09:00Z", 1s); + RETURN time::floor(d"1987-06-22T08:30:45Z", 1w); + RETURN time::floor(d"1987-06-22T08:30:45Z", 1y); + RETURN time::floor(d"2023-05-11T03:09:00Z", 1s); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4322,15 +4322,15 @@ async fn function_time_floor() -> Result<(), Error> { assert_eq!(res.len(), 3); // let tmp = res.remove(0).result?; - let val = Value::parse("'1987-06-18T00:00:00Z'"); + let val = Value::parse("d'1987-06-18T00:00:00Z'"); assert_eq!(tmp, val); // let tmp = res.remove(0).result?; - let val = Value::parse("'1986-12-28T00:00:00Z'"); + let val = Value::parse("d'1986-12-28T00:00:00Z'"); assert_eq!(tmp, val); // let tmp = res.remove(0).result?; - let val = Value::parse("'2023-05-11T03:09:00Z'"); + let val = Value::parse("d'2023-05-11T03:09:00Z'"); assert_eq!(tmp, val); // Ok(()) @@ -4339,8 +4339,8 @@ async fn function_time_floor() -> Result<(), Error> { #[tokio::test] async fn function_time_format() -> Result<(), Error> { let sql = r#" - RETURN time::format("1987-06-22T08:30:45Z", "%Y-%m-%d"); - RETURN time::format("1987-06-22T08:30:45Z", "%T"); + RETURN time::format(d"1987-06-22T08:30:45Z", "%Y-%m-%d"); + RETURN time::format(d"1987-06-22T08:30:45Z", "%T"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4361,8 +4361,8 @@ async fn function_time_format() -> Result<(), Error> { #[tokio::test] async fn function_time_group() -> Result<(), Error> { let sql = r#" - RETURN time::group("1987-06-22T08:30:45Z", 'hour'); - RETURN time::group("1987-06-22T08:30:45Z", 'month'); + RETURN time::group(d"1987-06-22T08:30:45Z", 'hour'); + RETURN time::group(d"1987-06-22T08:30:45Z", 'month'); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4370,11 +4370,11 @@ async fn function_time_group() -> Result<(), Error> { assert_eq!(res.len(), 2); // let tmp = res.remove(0).result?; - let val = Value::parse("'1987-06-22T08:00:00Z'"); + let val = Value::parse("d'1987-06-22T08:00:00Z'"); assert_eq!(tmp, val); // let tmp = res.remove(0).result?; - let val = Value::parse("'1987-06-01T00:00:00Z'"); + let val = Value::parse("d'1987-06-01T00:00:00Z'"); assert_eq!(tmp, val); // Ok(()) @@ -4384,7 +4384,7 @@ async fn function_time_group() -> Result<(), Error> { async fn function_time_hour() -> Result<(), Error> { let sql = r#" RETURN time::hour(); - RETURN time::hour("1987-06-22T08:30:45Z"); + RETURN time::hour(d"1987-06-22T08:30:45Z"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4404,7 +4404,7 @@ async fn function_time_hour() -> Result<(), Error> { #[tokio::test] async fn function_time_min() -> Result<(), Error> { let sql = r#" - RETURN time::min(["1987-06-22T08:30:45Z", "1988-06-22T08:30:45Z"]); + RETURN time::min([d"1987-06-22T08:30:45Z", d"1988-06-22T08:30:45Z"]); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4412,7 +4412,7 @@ async fn function_time_min() -> Result<(), Error> { assert_eq!(res.len(), 1); // let tmp = res.remove(0).result?; - let val = Value::parse("'1987-06-22T08:30:45Z'"); + let val = Value::parse("d'1987-06-22T08:30:45Z'"); assert_eq!(tmp, val); // Ok(()) @@ -4421,7 +4421,7 @@ async fn function_time_min() -> Result<(), Error> { #[tokio::test] async fn function_time_max() -> Result<(), Error> { let sql = r#" - RETURN time::max(["1987-06-22T08:30:45Z", "1988-06-22T08:30:45Z"]); + RETURN time::max([d"1987-06-22T08:30:45Z", d"1988-06-22T08:30:45Z"]); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4429,7 +4429,7 @@ async fn function_time_max() -> Result<(), Error> { assert_eq!(res.len(), 1); // let tmp = res.remove(0).result?; - let val = Value::parse("'1988-06-22T08:30:45Z'"); + let val = Value::parse("d'1988-06-22T08:30:45Z'"); assert_eq!(tmp, val); // Ok(()) @@ -4439,7 +4439,7 @@ async fn function_time_max() -> Result<(), Error> { async fn function_time_minute() -> Result<(), Error> { let sql = r#" RETURN time::minute(); - RETURN time::minute("1987-06-22T08:30:45Z"); + RETURN time::minute(d"1987-06-22T08:30:45Z"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4460,7 +4460,7 @@ async fn function_time_minute() -> Result<(), Error> { async fn function_time_month() -> Result<(), Error> { let sql = r#" RETURN time::month(); - RETURN time::month("1987-06-22T08:30:45Z"); + RETURN time::month(d"1987-06-22T08:30:45Z"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4481,7 +4481,7 @@ async fn function_time_month() -> Result<(), Error> { async fn function_time_nano() -> Result<(), Error> { let sql = r#" RETURN time::nano(); - RETURN time::nano("1987-06-22T08:30:45Z"); + RETURN time::nano(d"1987-06-22T08:30:45Z"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4502,7 +4502,7 @@ async fn function_time_nano() -> Result<(), Error> { async fn function_time_micros() -> Result<(), Error> { let sql = r#" RETURN time::micros(); - RETURN time::micros("1987-06-22T08:30:45Z"); + RETURN time::micros(d"1987-06-22T08:30:45Z"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4523,7 +4523,7 @@ async fn function_time_micros() -> Result<(), Error> { async fn function_time_millis() -> Result<(), Error> { let sql = r#" RETURN time::millis(); - RETURN time::millis("1987-06-22T08:30:45Z"); + RETURN time::millis(d"1987-06-22T08:30:45Z"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4559,8 +4559,8 @@ async fn function_time_now() -> Result<(), Error> { #[tokio::test] async fn function_time_round() -> Result<(), Error> { let sql = r#" - RETURN time::round("1987-06-22T08:30:45Z", 1w); - RETURN time::round("1987-06-22T08:30:45Z", 1y); + RETURN time::round(d"1987-06-22T08:30:45Z", 1w); + RETURN time::round(d"1987-06-22T08:30:45Z", 1y); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4568,11 +4568,11 @@ async fn function_time_round() -> Result<(), Error> { assert_eq!(res.len(), 2); // let tmp = res.remove(0).result?; - let val = Value::parse("'1987-06-25T00:00:00Z'"); + let val = Value::parse("d'1987-06-25T00:00:00Z'"); assert_eq!(tmp, val); // let tmp = res.remove(0).result?; - let val = Value::parse("'1986-12-28T00:00:00Z'"); + let val = Value::parse("d'1986-12-28T00:00:00Z'"); assert_eq!(tmp, val); // Ok(()) @@ -4582,7 +4582,7 @@ async fn function_time_round() -> Result<(), Error> { async fn function_time_second() -> Result<(), Error> { let sql = r#" RETURN time::second(); - RETURN time::second("1987-06-22T08:30:45Z"); + RETURN time::second(d"1987-06-22T08:30:45Z"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4603,7 +4603,7 @@ async fn function_time_second() -> Result<(), Error> { async fn function_time_unix() -> Result<(), Error> { let sql = r#" RETURN time::unix(); - RETURN time::unix("1987-06-22T08:30:45Z"); + RETURN time::unix(d"1987-06-22T08:30:45Z"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4624,7 +4624,7 @@ async fn function_time_unix() -> Result<(), Error> { async fn function_time_wday() -> Result<(), Error> { let sql = r#" RETURN time::wday(); - RETURN time::wday("1987-06-22T08:30:45Z"); + RETURN time::wday(d"1987-06-22T08:30:45Z"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4645,7 +4645,7 @@ async fn function_time_wday() -> Result<(), Error> { async fn function_time_week() -> Result<(), Error> { let sql = r#" RETURN time::week(); - RETURN time::week("1987-06-22T08:30:45Z"); + RETURN time::week(d"1987-06-22T08:30:45Z"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4666,7 +4666,7 @@ async fn function_time_week() -> Result<(), Error> { async fn function_time_yday() -> Result<(), Error> { let sql = r#" RETURN time::yday(); - RETURN time::yday("1987-06-22T08:30:45Z"); + RETURN time::yday(d"1987-06-22T08:30:45Z"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4687,7 +4687,7 @@ async fn function_time_yday() -> Result<(), Error> { async fn function_time_year() -> Result<(), Error> { let sql = r#" RETURN time::year(); - RETURN time::year("1987-06-22T08:30:45Z"); + RETURN time::year(d"1987-06-22T08:30:45Z"); "#; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -4716,11 +4716,11 @@ async fn function_time_from_nanos() -> Result<(), Error> { assert_eq!(res.len(), 2); // let tmp = res.remove(0).result?; - let val = Value::parse("'1982-03-03T17:49:30.384840Z'"); + let val = Value::parse("d'1982-03-03T17:49:30.384840Z'"); assert_eq!(tmp, val); // let tmp = res.remove(0).result?; - let val = Value::parse("'2060-01-02T08:28:24.384440Z'"); + let val = Value::parse("d'2060-01-02T08:28:24.384440Z'"); assert_eq!(tmp, val); // Ok(()) @@ -4738,11 +4738,11 @@ async fn function_time_from_micros() -> Result<(), Error> { assert_eq!(res.len(), 2); // let tmp = res.remove(0).result?; - let val = Value::parse("'1982-03-03T17:49:30.384840Z'"); + let val = Value::parse("d'1982-03-03T17:49:30.384840Z'"); assert_eq!(tmp, val); // let tmp = res.remove(0).result?; - let val = Value::parse("'2060-01-02T08:28:24.384440Z'"); + let val = Value::parse("d'2060-01-02T08:28:24.384440Z'"); assert_eq!(tmp, val); // Ok(()) @@ -4760,11 +4760,11 @@ async fn function_time_from_millis() -> Result<(), Error> { assert_eq!(res.len(), 2); // let tmp = res.remove(0).result?; - let val = Value::parse("'1982-03-03T17:49:33.840Z'"); + let val = Value::parse("d'1982-03-03T17:49:33.840Z'"); assert_eq!(tmp, val); // let tmp = res.remove(0).result?; - let val = Value::parse("'2060-01-02T08:28:24.440Z'"); + let val = Value::parse("d'2060-01-02T08:28:24.440Z'"); assert_eq!(tmp, val); // Ok(()) @@ -4782,11 +4782,11 @@ async fn function_time_from_secs() -> Result<(), Error> { assert_eq!(res.len(), 2); // let tmp = res.remove(0).result?; - let val = Value::parse("'1982-03-04T01:37:20Z'"); + let val = Value::parse("d'1982-03-04T01:37:20Z'"); assert_eq!(tmp, val); // let tmp = res.remove(0).result?; - let val = Value::parse("'2060-03-05T09:27:20Z'"); + let val = Value::parse("d'2060-03-05T09:27:20Z'"); assert_eq!(tmp, val); // Ok(()) @@ -4804,11 +4804,11 @@ async fn function_time_from_unix() -> Result<(), Error> { assert_eq!(res.len(), 2); // let tmp = res.remove(0).result?; - let val = Value::parse("'1982-03-04T01:37:20Z'"); + let val = Value::parse("d'1982-03-04T01:37:20Z'"); assert_eq!(tmp, val); // let tmp = res.remove(0).result?; - let val = Value::parse("'2060-03-05T09:27:20Z'"); + let val = Value::parse("d'2060-03-05T09:27:20Z'"); assert_eq!(tmp, val); // Ok(()) @@ -4852,11 +4852,11 @@ async fn function_type_datetime() -> Result<(), Error> { assert_eq!(res.len(), 2); // let tmp = res.remove(0).result?; - let val = Value::parse("'1987-06-22T00:00:00Z'"); + let val = Value::parse("d'1987-06-22T00:00:00Z'"); assert_eq!(tmp, val); // let tmp = res.remove(0).result?; - let val = Value::parse("'2022-08-01T00:00:00Z'"); + let val = Value::parse("d'2022-08-01T00:00:00Z'"); assert_eq!(tmp, val); // Ok(()) @@ -5048,7 +5048,7 @@ async fn function_type_is_collection() -> Result<(), Error> { #[tokio::test] async fn function_type_is_datetime() -> Result<(), Error> { let sql = r#" - RETURN type::is::datetime( "2023-09-04T11:22:38.247Z"); + RETURN type::is::datetime( d"2023-09-04T11:22:38.247Z"); RETURN type::is::datetime("123"); "#; let dbs = new_ds().await?; @@ -5494,7 +5494,7 @@ async fn function_type_is_string() -> Result<(), Error> { #[tokio::test] async fn function_type_is_uuid() -> Result<(), Error> { let sql = r#" - RETURN type::is::uuid( "018a6065-a80a-765e-b640-9fcb330a2f4f"); + RETURN type::is::uuid( u"018a6065-a80a-765e-b640-9fcb330a2f4f"); RETURN type::is::uuid("123"); "#; let dbs = new_ds().await?; diff --git a/lib/tests/future.rs b/lib/tests/future.rs index 4f74d2fd..1a7ae61c 100644 --- a/lib/tests/future.rs +++ b/lib/tests/future.rs @@ -24,12 +24,12 @@ async fn future_function_simple() -> Result<(), Error> { // let tmp = res.remove(0).result?; let val = - Value::parse("[{ id: person:test, birthday: '2007-06-22T00:00:00Z', can_drive: false }]"); + Value::parse("[{ id: person:test, birthday: d'2007-06-22T00:00:00Z', can_drive: false }]"); assert_eq!(tmp, val); // let tmp = res.remove(0).result?; let val = - Value::parse("[{ id: person:test, birthday: '2001-06-22T00:00:00Z', can_drive: true }]"); + Value::parse("[{ id: person:test, birthday: d'2001-06-22T00:00:00Z', can_drive: true }]"); assert_eq!(tmp, val); // Ok(()) @@ -56,7 +56,7 @@ async fn future_function_arguments() -> Result<(), Error> { { a: 'test@surrealdb.com', b: 'test@surrealdb.com', - id: 'future:test', + id: future:test, x: 'a-test', y: 'b-test', } diff --git a/lib/tests/geometry.rs b/lib/tests/geometry.rs index d03df581..2cfc8c59 100644 --- a/lib/tests/geometry.rs +++ b/lib/tests/geometry.rs @@ -25,7 +25,7 @@ async fn geometry_point() -> Result<(), Error> { "type": "Point", "coordinates": [-0.118092, 51.509865] }, - "id": "city:london" + "id": r"city:london" } ]"#, ); @@ -39,7 +39,7 @@ async fn geometry_point() -> Result<(), Error> { "type": "Point", "coordinates": [-0.118092, 51.509865] }, - "id": "city:london" + "id": r"city:london" } ]"#, ); @@ -90,7 +90,7 @@ async fn geometry_polygon() -> Result<(), Error> { ] ] }, - "id": "city:london" + "id": r"city:london" } ]"#, ); @@ -112,7 +112,7 @@ async fn geometry_polygon() -> Result<(), Error> { ] ] }, - "id": "city:london" + "id": r"city:london" } ]"#, ); @@ -134,7 +134,7 @@ async fn geometry_polygon() -> Result<(), Error> { ] ] }, - "id": "city:london" + "id": r"city:london" } ]"#, ); @@ -178,7 +178,7 @@ async fn geometry_multipoint() -> Result<(), Error> { [-0.118092, 51.509865] ] }, - "id": "city:london" + "id": r"city:london" } ]"#, ); @@ -195,7 +195,7 @@ async fn geometry_multipoint() -> Result<(), Error> { [-0.118092, 51.509865] ] }, - "id": "city:london" + "id": r"city:london" } ]"#, ); @@ -212,7 +212,7 @@ async fn geometry_multipoint() -> Result<(), Error> { [-0.118092, 51.509865] ] }, - "id": "city:london" + "id": r"city:london" } ]"#, ); @@ -260,7 +260,7 @@ async fn geometry_multipolygon() -> Result<(), Error> { ] ] }, - "id": "university:oxford" + "id": r"university:oxford" } ]"#, ); @@ -281,7 +281,7 @@ async fn geometry_multipolygon() -> Result<(), Error> { ] ] }, - "id": "university:oxford" + "id": r"university:oxford" } ]"#, ); @@ -302,7 +302,7 @@ async fn geometry_multipolygon() -> Result<(), Error> { ] ] }, - "id": "university:oxford" + "id": r"university:oxford" } ]"#, ); diff --git a/lib/tests/group.rs b/lib/tests/group.rs index 71f60acb..df6ee8c4 100644 --- a/lib/tests/group.rs +++ b/lib/tests/group.rs @@ -9,15 +9,15 @@ use surrealdb::sql::Value; #[tokio::test] async fn select_limit_fetch() -> Result<(), Error> { let sql = " - CREATE temperature:1 SET country = 'GBP', time = '2020-01-01T08:00:00Z'; - CREATE temperature:2 SET country = 'GBP', time = '2020-02-01T08:00:00Z'; - CREATE temperature:3 SET country = 'GBP', time = '2020-03-01T08:00:00Z'; - CREATE temperature:4 SET country = 'GBP', time = '2021-01-01T08:00:00Z'; - CREATE temperature:5 SET country = 'GBP', time = '2021-01-01T08:00:00Z'; - CREATE temperature:6 SET country = 'EUR', time = '2021-01-01T08:00:00Z'; - CREATE temperature:7 SET country = 'USD', time = '2021-01-01T08:00:00Z'; - CREATE temperature:8 SET country = 'AUD', time = '2021-01-01T08:00:00Z'; - CREATE temperature:9 SET country = 'CHF', time = '2023-01-01T08:00:00Z'; + CREATE temperature:1 SET country = 'GBP', time = d'2020-01-01T08:00:00Z'; + CREATE temperature:2 SET country = 'GBP', time = d'2020-02-01T08:00:00Z'; + CREATE temperature:3 SET country = 'GBP', time = d'2020-03-01T08:00:00Z'; + CREATE temperature:4 SET country = 'GBP', time = d'2021-01-01T08:00:00Z'; + CREATE temperature:5 SET country = 'GBP', time = d'2021-01-01T08:00:00Z'; + CREATE temperature:6 SET country = 'EUR', time = d'2021-01-01T08:00:00Z'; + CREATE temperature:7 SET country = 'USD', time = d'2021-01-01T08:00:00Z'; + CREATE temperature:8 SET country = 'AUD', time = d'2021-01-01T08:00:00Z'; + CREATE temperature:9 SET country = 'CHF', time = d'2023-01-01T08:00:00Z'; SELECT *, time::year(time) AS year FROM temperature; SELECT count(), time::year(time) AS year, country FROM temperature GROUP BY country, year; "; @@ -32,7 +32,7 @@ async fn select_limit_fetch() -> Result<(), Error> { { country: 'GBP', id: temperature:1, - time: '2020-01-01T08:00:00Z' + time: d'2020-01-01T08:00:00Z' } ]", ); @@ -44,7 +44,7 @@ async fn select_limit_fetch() -> Result<(), Error> { { country: 'GBP', id: temperature:2, - time: '2020-02-01T08:00:00Z' + time: d'2020-02-01T08:00:00Z' } ]", ); @@ -56,7 +56,7 @@ async fn select_limit_fetch() -> Result<(), Error> { { country: 'GBP', id: temperature:3, - time: '2020-03-01T08:00:00Z' + time: d'2020-03-01T08:00:00Z' } ]", ); @@ -68,7 +68,7 @@ async fn select_limit_fetch() -> Result<(), Error> { { country: 'GBP', id: temperature:4, - time: '2021-01-01T08:00:00Z' + time: d'2021-01-01T08:00:00Z' } ]", ); @@ -80,7 +80,7 @@ async fn select_limit_fetch() -> Result<(), Error> { { country: 'GBP', id: temperature:5, - time: '2021-01-01T08:00:00Z' + time: d'2021-01-01T08:00:00Z' } ]", ); @@ -92,7 +92,7 @@ async fn select_limit_fetch() -> Result<(), Error> { { country: 'EUR', id: temperature:6, - time: '2021-01-01T08:00:00Z' + time: d'2021-01-01T08:00:00Z' } ]", ); @@ -104,7 +104,7 @@ async fn select_limit_fetch() -> Result<(), Error> { { country: 'USD', id: temperature:7, - time: '2021-01-01T08:00:00Z' + time: d'2021-01-01T08:00:00Z' } ]", ); @@ -116,7 +116,7 @@ async fn select_limit_fetch() -> Result<(), Error> { { country: 'AUD', id: temperature:8, - time: '2021-01-01T08:00:00Z' + time: d'2021-01-01T08:00:00Z' } ]", ); @@ -128,7 +128,7 @@ async fn select_limit_fetch() -> Result<(), Error> { { country: 'CHF', id: temperature:9, - time: '2023-01-01T08:00:00Z' + time: d'2023-01-01T08:00:00Z' } ]", ); @@ -140,55 +140,55 @@ async fn select_limit_fetch() -> Result<(), Error> { { country: 'GBP', id: temperature:1, - time: '2020-01-01T08:00:00Z', + time: d'2020-01-01T08:00:00Z', year: 2020 }, { country: 'GBP', id: temperature:2, - time: '2020-02-01T08:00:00Z', + time: d'2020-02-01T08:00:00Z', year: 2020 }, { country: 'GBP', id: temperature:3, - time: '2020-03-01T08:00:00Z', + time: d'2020-03-01T08:00:00Z', year: 2020 }, { country: 'GBP', id: temperature:4, - time: '2021-01-01T08:00:00Z', + time: d'2021-01-01T08:00:00Z', year: 2021 }, { country: 'GBP', id: temperature:5, - time: '2021-01-01T08:00:00Z', + time: d'2021-01-01T08:00:00Z', year: 2021 }, { country: 'EUR', id: temperature:6, - time: '2021-01-01T08:00:00Z', + time: d'2021-01-01T08:00:00Z', year: 2021 }, { country: 'USD', id: temperature:7, - time: '2021-01-01T08:00:00Z', + time: d'2021-01-01T08:00:00Z', year: 2021 }, { country: 'AUD', id: temperature:8, - time: '2021-01-01T08:00:00Z', + time: d'2021-01-01T08:00:00Z', year: 2021 }, { country: 'CHF', id: temperature:9, - time: '2023-01-01T08:00:00Z', + time: d'2023-01-01T08:00:00Z', year: 2023 } ]", diff --git a/lib/tests/script.rs b/lib/tests/script.rs index 063af2ac..4be79203 100644 --- a/lib/tests/script.rs +++ b/lib/tests/script.rs @@ -163,10 +163,10 @@ async fn script_function_types() -> Result<(), Error> { "[ { id: article:test, - created_at: '1995-12-17T03:24:00Z', + created_at: d'1995-12-17T03:24:00Z', next_signin: 1w2d6h, manager: user:joanna, - identifier: '03412258-988f-47cd-82db-549902cdaffe', + identifier: u'03412258-988f-47cd-82db-549902cdaffe', } ]", ); diff --git a/lib/tests/vector.rs b/lib/tests/vector.rs index dfaa7cd4..f32bd033 100644 --- a/lib/tests/vector.rs +++ b/lib/tests/vector.rs @@ -14,8 +14,8 @@ async fn select_where_mtree_knn() -> Result<(), Error> { CREATE pts:3 SET point = [8,9,10,11]; DEFINE INDEX mt_pts ON pts FIELDS point MTREE DIMENSION 4; LET $pt = [2,3,4,5]; - SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point <2,EUCLIDEAN> $pt; - SELECT id FROM pts WHERE point <2> $pt EXPLAIN; + SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point knn<2,EUCLIDEAN> $pt; + SELECT id FROM pts WHERE point knn<2> $pt EXPLAIN; "; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -70,7 +70,7 @@ async fn delete_update_mtree_index() -> Result<(), Error> { DELETE pts:2; UPDATE pts:3 SET point = [12,13,14,15]; LET $pt = [2,3,4,5]; - SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point <5> $pt ORDER BY dist; + SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point knn<5> $pt ORDER BY dist; "; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); @@ -153,9 +153,9 @@ async fn select_where_brut_force_knn() -> Result<(), Error> { CREATE pts:2 SET point = [4,5,6,7]; CREATE pts:3 SET point = [8,9,10,11]; LET $pt = [2,3,4,5]; - SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point <2,EUCLIDEAN> $pt; - SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point <2,EUCLIDEAN> $pt PARALLEL; - SELECT id FROM pts WHERE point <2> $pt EXPLAIN; + SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point knn<2,EUCLIDEAN> $pt; + SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point knn<2,EUCLIDEAN> $pt PARALLEL; + SELECT id FROM pts WHERE point knn<2> $pt EXPLAIN; "; let dbs = new_ds().await?; let ses = Session::owner().with_ns("test").with_db("test"); diff --git a/src/net/params.rs b/src/net/params.rs index 02d9a8e2..ce060375 100644 --- a/src/net/params.rs +++ b/src/net/params.rs @@ -48,7 +48,14 @@ impl From for BTreeMap { fn from(v: Params) -> BTreeMap { v.inner .into_iter() - .map(|(k, v)| (k, surrealdb::sql::json(&v).unwrap_or_else(|_| Value::from(v)))) + .map(|(k, v)| { + #[cfg(feature = "experimental-parser")] + let value = surrealdb::syn::json_legacy_strand(&v); + #[cfg(not(feature = "experimental-parser"))] + let value = surrealdb::syn::json(&v); + + (k, value.unwrap_or_else(|_| Value::from(v))) + }) .collect::>() } } diff --git a/tests/cli_integration.rs b/tests/cli_integration.rs index 9a4d2668..730afb10 100644 --- a/tests/cli_integration.rs +++ b/tests/cli_integration.rs @@ -170,7 +170,7 @@ mod cli_integration { let args = format!("sql --conn http://{addr} {creds}"); let output = common::run(&args) .input(&format!( - "USE NS {throwaway} DB {throwaway}; CREATE thing:one;\n", + "USE NS `{throwaway}` DB `{throwaway}`; CREATE thing:one;\n", throwaway = Ulid::new() )) .output() @@ -180,10 +180,9 @@ mod cli_integration { info!("* Pass only ns"); { - let throwaway = Ulid::new(); - let args = format!("sql --conn http://{addr} {creds} --ns {throwaway}"); + let args = format!("sql --conn http://{addr} {creds} --ns {ns}"); let output = common::run(&args) - .input("USE DB {throwaway}; SELECT * FROM thing:one;\n") + .input(&format!("USE DB `{db}`; SELECT * FROM thing:one;\n")) .output() .expect("only ns"); assert!(output.contains("thing:one"), "missing thing:one in {output}"); @@ -295,7 +294,7 @@ mod cli_integration { let args = format!("sql --conn http://{addr} --db {db} --ns {ns} --auth-level root {creds}"); let output = common::run(&args) - .input(format!("USE NS {ns} DB {db}; INFO FOR ROOT;\n").as_str()) + .input(format!("USE NS `{ns}` DB `{db}`; INFO FOR ROOT;\n").as_str()) .output() .expect("success"); assert!( @@ -309,7 +308,7 @@ mod cli_integration { let args = format!("sql --conn http://{addr} --db {db} --ns {ns} --auth-level root {creds}"); let output = common::run(&args) - .input(format!("USE NS {ns} DB {db}; INFO FOR NS;\n").as_str()) + .input(format!("USE NS `{ns}` DB `{db}`; INFO FOR NS;\n").as_str()) .output() .expect("success"); assert!( @@ -323,7 +322,7 @@ mod cli_integration { let args = format!("sql --conn http://{addr} --db {db} --ns {ns} --auth-level root {creds}"); let output = common::run(&args) - .input(format!("USE NS {ns} DB {db}; INFO FOR DB;\n").as_str()) + .input(format!("USE NS `{ns}` DB `{db}`; INFO FOR DB;\n").as_str()) .output() .expect("success"); assert!( @@ -338,7 +337,7 @@ mod cli_integration { "sql --conn http://{addr} --db {db} --ns {ns} --auth-level namespace {creds}" ); let output = common::run(&args) - .input(format!("USE NS {ns} DB {db}; INFO FOR ROOT;\n").as_str()) + .input(format!("USE NS `{ns}` DB `{db}`; INFO FOR ROOT;\n").as_str()) .output() .expect("success"); assert!( @@ -353,7 +352,7 @@ mod cli_integration { "sql --conn http://{addr} --db {db} --ns {ns} --auth-level namespace {creds}" ); let output = common::run(&args) - .input(format!("USE NS {ns} DB {db}; INFO FOR NS;\n").as_str()) + .input(format!("USE NS `{ns}` DB `{db}`; INFO FOR NS;\n").as_str()) .output() .expect("success"); assert!( @@ -368,7 +367,7 @@ mod cli_integration { "sql --conn http://{addr} --db {db} --ns {ns} --auth-level namespace {creds}" ); let output = common::run(&args) - .input(format!("USE NS {ns} DB {db}; INFO FOR DB;\n").as_str()) + .input(format!("USE NS `{ns}` DB `{db}`; INFO FOR DB;\n").as_str()) .output() .expect("success"); assert!( @@ -383,7 +382,7 @@ mod cli_integration { "sql --conn http://{addr} --db {db} --ns {ns} --auth-level database {creds}" ); let output = common::run(&args) - .input(format!("USE NS {ns} DB {db}; INFO FOR ROOT;\n").as_str()) + .input(format!("USE NS `{ns}` DB `{db}`; INFO FOR ROOT;\n").as_str()) .output() .expect("success"); assert!( @@ -398,7 +397,7 @@ mod cli_integration { "sql --conn http://{addr} --db {db} --ns {ns} --auth-level database {creds}" ); let output = common::run(&args) - .input(format!("USE NS {ns} DB {db}; INFO FOR NS;\n").as_str()) + .input(format!("USE NS `{ns}` DB `{db}`; INFO FOR NS;\n").as_str()) .output() .expect("success"); assert!( @@ -413,7 +412,7 @@ mod cli_integration { "sql --conn http://{addr} --db {db} --ns {ns} --auth-level database {creds}" ); let output = common::run(&args) - .input(format!("USE NS {ns} DB {db}; INFO FOR DB;\n").as_str()) + .input(format!("USE NS `{ns}` DB `{db}`; INFO FOR DB;\n").as_str()) .output() .expect("success"); assert!( @@ -426,7 +425,7 @@ mod cli_integration { { let args = format!("sql --conn http://{addr} --auth-level database {creds}"); let output = common::run(&args) - .input(format!("USE NS {ns} DB {db}; INFO FOR NS;\n").as_str()) + .input(format!("USE NS `{ns}` DB `{db}`; INFO FOR NS;\n").as_str()) .output(); assert!( output @@ -442,7 +441,7 @@ mod cli_integration { { let args = format!("sql --conn http://{addr} --ns {ns} --auth-level database {creds}"); let output = common::run(&args) - .input(format!("USE NS {ns} DB {db}; INFO FOR DB;\n").as_str()) + .input(format!("USE NS `{ns}` DB `{db}`; INFO FOR DB;\n").as_str()) .output(); assert!( output diff --git a/tests/http_integration.rs b/tests/http_integration.rs index 6d88ba21..df933636 100644 --- a/tests/http_integration.rs +++ b/tests/http_integration.rs @@ -1413,7 +1413,7 @@ mod http_integration { let res = client .post(format!("http://{addr}/sql")) .basic_auth(USER, Some(PASS)) - .body(format!("CREATE |{table}:1..{num_records}| SET default = 'content'")) + .body(format!("CREATE |`{table}`:1..{num_records}| SET default = 'content'")) .send() .await?; let body: serde_json::Value = serde_json::from_str(&res.text().await?).unwrap(); diff --git a/tests/ws_integration.rs b/tests/ws_integration.rs index 9bda8510..a6a98163 100644 --- a/tests/ws_integration.rs +++ b/tests/ws_integration.rs @@ -1,9 +1,7 @@ // RUST_LOG=warn cargo make ci-ws-integration - mod common; mod ws_integration { - use super::common; /// Tests for the empty protocol format