Tooling: Test checking file based database migration (#3558)
This commit is contained in:
parent
9d2fe88717
commit
a182908fde
24 changed files with 877 additions and 109 deletions
25
.github/workflows/ci.yml
vendored
25
.github/workflows/ci.yml
vendored
|
@ -664,3 +664,28 @@ jobs:
|
|||
df -h
|
||||
ps auxf
|
||||
cat /tmp/surrealdb.log || true
|
||||
|
||||
db-upgrade:
|
||||
name: Database Upgrade from previous versions
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
- name: Install stable toolchain
|
||||
uses: dtolnay/rust-toolchain@stable
|
||||
with:
|
||||
toolchain: 1.75.0
|
||||
|
||||
- name: Checkout sources
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
save-if: ${{ github.ref == 'refs/heads/main' }}
|
||||
|
||||
- name: Install cargo-make
|
||||
run: cargo install --debug --locked cargo-make
|
||||
|
||||
- name: Test upgrade
|
||||
run: cargo make ci-database-upgrade
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ args = ["check", "--locked", "--package", "surrealdb", "--features", "protocol-w
|
|||
category = "CI - CHECK"
|
||||
command = "cargo"
|
||||
env = { RUSTFLAGS = "--cfg surrealdb_unstable" }
|
||||
args = ["clippy", "--all-targets", "--features", "storage-mem,storage-rocksdb,storage-speedb,storage-tikv,storage-fdb,scripting,http,jwks", "--tests", "--benches", "--examples","--bins", "--", "-D", "warnings"]
|
||||
args = ["clippy", "--all-targets", "--features", "storage-mem,storage-rocksdb,storage-speedb,storage-tikv,storage-fdb,scripting,http,jwks", "--tests", "--benches", "--examples", "--bins", "--", "-D", "warnings"]
|
||||
|
||||
#
|
||||
# Integration Tests
|
||||
|
@ -26,25 +26,25 @@ args = ["clippy", "--all-targets", "--features", "storage-mem,storage-rocksdb,st
|
|||
[tasks.ci-cli-integration]
|
||||
category = "CI - INTEGRATION TESTS"
|
||||
command = "cargo"
|
||||
env = { RUST_BACKTRACE=1, RUSTFLAGS = "--cfg surrealdb_unstable", RUST_LOG={ value = "cli_integration=debug", condition = { env_not_set = ["RUST_LOG"] } } }
|
||||
env = { RUST_BACKTRACE = 1, RUSTFLAGS = "--cfg surrealdb_unstable", RUST_LOG = { value = "cli_integration=debug", condition = { env_not_set = ["RUST_LOG"] } } }
|
||||
args = ["test", "--locked", "--no-default-features", "--features", "storage-mem,http,scripting,jwks", "--workspace", "--test", "cli_integration", "--", "cli_integration"]
|
||||
|
||||
[tasks.ci-http-integration]
|
||||
category = "CI - INTEGRATION TESTS"
|
||||
command = "cargo"
|
||||
env = { RUST_BACKTRACE=1, RUSTFLAGS = "--cfg surrealdb_unstable", RUST_LOG={ value = "http_integration=debug", condition = { env_not_set = ["RUST_LOG"] } } }
|
||||
env = { RUST_BACKTRACE = 1, RUSTFLAGS = "--cfg surrealdb_unstable", RUST_LOG = { value = "http_integration=debug", condition = { env_not_set = ["RUST_LOG"] } } }
|
||||
args = ["test", "--locked", "--no-default-features", "--features", "storage-mem,http-compression,jwks", "--workspace", "--test", "http_integration", "--", "http_integration"]
|
||||
|
||||
[tasks.ci-ws-integration]
|
||||
category = "WS - INTEGRATION TESTS"
|
||||
command = "cargo"
|
||||
env = { RUST_BACKTRACE=1, RUSTFLAGS = "--cfg surrealdb_unstable", RUST_LOG={ value = "ws_integration=debug", condition = { env_not_set = ["RUST_LOG"] } } }
|
||||
env = { RUST_BACKTRACE = 1, RUSTFLAGS = "--cfg surrealdb_unstable", RUST_LOG = { value = "ws_integration=debug", condition = { env_not_set = ["RUST_LOG"] } } }
|
||||
args = ["test", "--locked", "--no-default-features", "--features", "storage-mem,sql2", "--workspace", "--test", "ws_integration", "--", "ws_integration"]
|
||||
|
||||
[tasks.ci-ml-integration]
|
||||
category = "ML - INTEGRATION TESTS"
|
||||
command = "cargo"
|
||||
env = { RUST_BACKTRACE=1, RUSTFLAGS = "--cfg surrealdb_unstable", RUST_LOG={ value = "cli_integration::common=debug", condition = { env_not_set = ["RUST_LOG"] } } }
|
||||
env = { RUST_BACKTRACE = 1, RUSTFLAGS = "--cfg surrealdb_unstable", RUST_LOG = { value = "cli_integration::common=debug", condition = { env_not_set = ["RUST_LOG"] } } }
|
||||
args = ["test", "--locked", "--features", "storage-mem,ml,sql2", "--workspace", "--test", "ml_integration", "--", "ml_integration", "--nocapture"]
|
||||
|
||||
[tasks.ci-workspace-coverage]
|
||||
|
@ -52,24 +52,26 @@ category = "CI - INTEGRATION TESTS"
|
|||
command = "cargo"
|
||||
env = { RUSTFLAGS = "--cfg surrealdb_unstable" }
|
||||
args = [
|
||||
"llvm-cov", "--html", "--locked", "--no-default-features", "--features", "storage-mem,scripting,http,jwks", "--workspace", "--",
|
||||
"--skip", "api_integration",
|
||||
"--skip", "cli_integration",
|
||||
"--skip", "http_integration",
|
||||
"--skip", "ws_integration"
|
||||
"llvm-cov", "--html", "--locked", "--no-default-features", "--features", "storage-mem,scripting,http,jwks", "--workspace", "--",
|
||||
"--skip", "api_integration",
|
||||
"--skip", "cli_integration",
|
||||
"--skip", "http_integration",
|
||||
"--skip", "ws_integration",
|
||||
"--skip", "database_upgrade"
|
||||
]
|
||||
|
||||
|
||||
[tasks.test-experimental-parser]
|
||||
category = "CI - INTEGRATION TESTS"
|
||||
command = "cargo"
|
||||
env = { RUST_BACKTRACE=1, RUSTFLAGS = "--cfg surrealdb_unstable", RUSTDOCFLAGS="--cfg surrealdb_unstable" }
|
||||
env = { RUST_BACKTRACE = 1, RUSTFLAGS = "--cfg surrealdb_unstable", RUSTDOCFLAGS = "--cfg surrealdb_unstable" }
|
||||
args = [
|
||||
"test", "--locked", "--no-default-features", "--features", "storage-mem,scripting,http,parser2", "--workspace", "--",
|
||||
"--skip", "api_integration",
|
||||
"--skip", "cli_integration",
|
||||
"--skip", "http_integration",
|
||||
"--skip", "ws_integration"
|
||||
"test", "--locked", "--no-default-features", "--features", "storage-mem,scripting,http,parser2", "--workspace", "--",
|
||||
"--skip", "api_integration",
|
||||
"--skip", "cli_integration",
|
||||
"--skip", "http_integration",
|
||||
"--skip", "ws_integration",
|
||||
"--skip", "database_upgrade"
|
||||
]
|
||||
|
||||
[tasks.test-workspace-coverage-complete]
|
||||
|
@ -83,6 +85,11 @@ env = { _START_SURREALDB_PATH = "memory" }
|
|||
category = "CI - INTEGRATION TESTS"
|
||||
run_task = { name = ["start-surrealdb", "test-workspace-coverage-complete", "stop-surrealdb"], fork = true }
|
||||
|
||||
[tasks.ci-database-upgrade]
|
||||
env = { _TEST_FEATURES = "storage-rocksdb,sql2" }
|
||||
category = "CI - DATABASE UPGRADE TESTS"
|
||||
run_task = { name = ["test-database-upgrade"], fork = true }
|
||||
|
||||
#
|
||||
# Tests private tasks
|
||||
#
|
||||
|
@ -90,21 +97,28 @@ run_task = { name = ["start-surrealdb", "test-workspace-coverage-complete", "sto
|
|||
[tasks.test-kvs]
|
||||
private = true
|
||||
command = "cargo"
|
||||
env = {RUST_BACKTRACE=1, RUSTFLAGS = "--cfg surrealdb_unstable" }
|
||||
env = { RUST_BACKTRACE = 1, RUSTFLAGS = "--cfg surrealdb_unstable" }
|
||||
args = ["test", "--locked", "--package", "surrealdb", "--no-default-features", "--features", "${_TEST_FEATURES}", "--lib", "kvs"]
|
||||
|
||||
|
||||
[tasks.test-api-integration]
|
||||
private = true
|
||||
command = "cargo"
|
||||
env = {RUST_BACKTRACE=1, RUSTFLAGS = "--cfg surrealdb_unstable" }
|
||||
env = { RUST_BACKTRACE = 1, RUSTFLAGS = "--cfg surrealdb_unstable" }
|
||||
args = ["test", "--locked", "--package", "surrealdb", "--no-default-features", "--features", "${_TEST_FEATURES}", "--test", "api", "api_integration::${_TEST_API_ENGINE}"]
|
||||
|
||||
[tasks.ci-api-integration]
|
||||
env = {RUST_BACKTRACE=1, _START_SURREALDB_PATH = "memory", RUSTFLAGS = "--cfg surrealdb_unstable" }
|
||||
env = { RUST_BACKTRACE = 1, _START_SURREALDB_PATH = "memory", RUSTFLAGS = "--cfg surrealdb_unstable" }
|
||||
private = true
|
||||
run_task = { name = ["start-surrealdb", "test-api-integration", "stop-surrealdb"], fork = true }
|
||||
|
||||
[tasks.test-database-upgrade]
|
||||
private = true
|
||||
command = "cargo"
|
||||
env = { RUST_BACKTRACE = 1, RUST_LOG = "info", RUSTFLAGS = "--cfg surrealdb_unstable" }
|
||||
args = ["test", "--locked", "--no-default-features", "--features", "${_TEST_FEATURES}", "--workspace", "--test", "database_upgrade", "--", "database_upgrade", "--show-output"]
|
||||
|
||||
|
||||
#
|
||||
# Integration tests with background services
|
||||
#
|
||||
|
@ -203,7 +217,7 @@ script = """
|
|||
|
||||
[tasks.start-tikv]
|
||||
category = "CI - SERVICES"
|
||||
env = { SURREAL_LINK="https://github.com/surrealdb/surrealdb/releases/download/v1.2.0/surreal-v1.2.0.linux-amd64.tgz" }
|
||||
env = { SURREAL_LINK = "https://github.com/surrealdb/surrealdb/releases/download/v1.2.0/surreal-v1.2.0.linux-amd64.tgz" }
|
||||
script = """
|
||||
#!/bin/bash -ex
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::trees::bkeys::TrieKeys;
|
||||
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeStore};
|
||||
use crate::idx::trees::btree::{BState, BState1, BState1skip, BStatistics, BTree, BTreeStore};
|
||||
use crate::idx::trees::store::{IndexStores, TreeNodeProvider};
|
||||
use crate::idx::{IndexKeyBase, VersionedSerdeState};
|
||||
use crate::kvs::{Key, Transaction, TransactionType};
|
||||
use revision::revisioned;
|
||||
use crate::kvs::{Key, Transaction, TransactionType, Val};
|
||||
use revision::{revisioned, Revisioned};
|
||||
use roaring::RoaringTreemap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
|
@ -157,7 +157,63 @@ struct State {
|
|||
next_doc_id: DocId,
|
||||
}
|
||||
|
||||
impl VersionedSerdeState for State {}
|
||||
impl VersionedSerdeState for State {
|
||||
fn try_from_val(val: Val) -> Result<Self, Error> {
|
||||
match Self::deserialize_revisioned(&mut val.as_slice()) {
|
||||
Ok(r) => Ok(r),
|
||||
// If it fails here, there is the chance it was an old version of BState
|
||||
// that included the #[serde[skip]] updated parameter
|
||||
Err(e) => match State1skip::deserialize_revisioned(&mut val.as_slice()) {
|
||||
Ok(b_old) => Ok(b_old.into()),
|
||||
Err(_) => match State1::deserialize_revisioned(&mut val.as_slice()) {
|
||||
Ok(b_old) => Ok(b_old.into()),
|
||||
// Otherwise we return the initial error
|
||||
Err(_) => Err(Error::Revision(e)),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[revisioned(revision = 1)]
|
||||
struct State1 {
|
||||
btree: BState1,
|
||||
available_ids: Option<RoaringTreemap>,
|
||||
next_doc_id: DocId,
|
||||
}
|
||||
|
||||
impl From<State1> for State {
|
||||
fn from(s: State1) -> Self {
|
||||
Self {
|
||||
btree: s.btree.into(),
|
||||
available_ids: s.available_ids,
|
||||
next_doc_id: s.next_doc_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl VersionedSerdeState for State1 {}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[revisioned(revision = 1)]
|
||||
struct State1skip {
|
||||
btree: BState1skip,
|
||||
available_ids: Option<RoaringTreemap>,
|
||||
next_doc_id: DocId,
|
||||
}
|
||||
|
||||
impl From<State1skip> for State {
|
||||
fn from(s: State1skip) -> Self {
|
||||
Self {
|
||||
btree: s.btree.into(),
|
||||
available_ids: s.available_ids,
|
||||
next_doc_id: s.next_doc_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl VersionedSerdeState for State1skip {}
|
||||
|
||||
impl State {
|
||||
fn new(default_btree_order: u32) -> Self {
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::trees::bkeys::FstKeys;
|
||||
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeStore};
|
||||
use crate::idx::trees::btree::{BState, BState1, BState1skip, BStatistics, BTree, BTreeStore};
|
||||
use crate::idx::trees::store::{IndexStores, TreeNodeProvider};
|
||||
use crate::idx::{IndexKeyBase, VersionedSerdeState};
|
||||
use crate::kvs::{Key, Transaction, TransactionType};
|
||||
use revision::revisioned;
|
||||
use crate::kvs::{Key, Transaction, TransactionType, Val};
|
||||
use revision::{revisioned, Revisioned};
|
||||
use roaring::RoaringTreemap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
|
@ -140,7 +140,45 @@ struct State {
|
|||
next_term_id: TermId,
|
||||
}
|
||||
|
||||
impl VersionedSerdeState for State {}
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[revisioned(revision = 1)]
|
||||
struct State1 {
|
||||
btree: BState1,
|
||||
available_ids: Option<RoaringTreemap>,
|
||||
next_term_id: TermId,
|
||||
}
|
||||
|
||||
impl VersionedSerdeState for State1 {}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
#[revisioned(revision = 1)]
|
||||
struct State1skip {
|
||||
btree: BState1skip,
|
||||
available_ids: Option<RoaringTreemap>,
|
||||
next_term_id: TermId,
|
||||
}
|
||||
|
||||
impl VersionedSerdeState for State1skip {}
|
||||
|
||||
impl From<State1> for State {
|
||||
fn from(state: State1) -> Self {
|
||||
Self {
|
||||
btree: state.btree.into(),
|
||||
available_ids: state.available_ids,
|
||||
next_term_id: state.next_term_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<State1skip> for State {
|
||||
fn from(state: State1skip) -> Self {
|
||||
Self {
|
||||
btree: state.btree.into(),
|
||||
available_ids: state.available_ids,
|
||||
next_term_id: state.next_term_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl State {
|
||||
fn new(default_btree_order: u32) -> Self {
|
||||
|
@ -152,6 +190,24 @@ impl State {
|
|||
}
|
||||
}
|
||||
|
||||
impl VersionedSerdeState for State {
|
||||
fn try_from_val(val: Val) -> Result<Self, Error> {
|
||||
match Self::deserialize_revisioned(&mut val.as_slice()) {
|
||||
Ok(r) => Ok(r),
|
||||
// If it fails here, there is the chance it was an old version of BState
|
||||
// that included the #[serde[skip]] updated parameter
|
||||
Err(e) => match State1skip::deserialize_revisioned(&mut val.as_slice()) {
|
||||
Ok(b_old) => Ok(b_old.into()),
|
||||
Err(_) => match State1::deserialize_revisioned(&mut val.as_slice()) {
|
||||
Ok(b_old) => Ok(b_old.into()),
|
||||
// Otherwise we return the initial error
|
||||
Err(_) => Err(Error::Revision(e)),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::idx::ft::postings::TermFrequency;
|
||||
|
|
|
@ -4,7 +4,7 @@ use crate::idx::trees::store::{NodeId, StoredNode, TreeNode, TreeStore};
|
|||
use crate::idx::VersionedSerdeState;
|
||||
use crate::kvs::{Key, Transaction, Val};
|
||||
use crate::sql::{Object, Value};
|
||||
use revision::revisioned;
|
||||
use revision::{revisioned, Revisioned};
|
||||
use serde::{Deserialize, Serialize};
|
||||
#[cfg(debug_assertions)]
|
||||
use std::collections::HashSet;
|
||||
|
@ -37,7 +37,63 @@ pub struct BState {
|
|||
generation: u64,
|
||||
}
|
||||
|
||||
impl VersionedSerdeState for BState {}
|
||||
impl VersionedSerdeState for BState {
|
||||
fn try_from_val(val: Val) -> Result<Self, Error> {
|
||||
match Self::deserialize_revisioned(&mut val.as_slice()) {
|
||||
Ok(r) => Ok(r),
|
||||
// If it fails here, there is the chance it was an old version of BState
|
||||
// that included the #[serde[skip]] updated parameter
|
||||
Err(e) => match BState1skip::deserialize_revisioned(&mut val.as_slice()) {
|
||||
Ok(b_old) => Ok(b_old.into()),
|
||||
Err(_) => match BState1::deserialize_revisioned(&mut val.as_slice()) {
|
||||
Ok(b_old) => Ok(b_old.into()),
|
||||
// Otherwise we return the initial error
|
||||
Err(_) => Err(Error::Revision(e)),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
#[revisioned(revision = 1)]
|
||||
pub(in crate::idx) struct BState1 {
|
||||
minimum_degree: u32,
|
||||
root: Option<NodeId>,
|
||||
next_node_id: NodeId,
|
||||
}
|
||||
|
||||
#[derive(Clone, Serialize, Deserialize)]
|
||||
#[revisioned(revision = 1)]
|
||||
pub(in crate::idx) struct BState1skip {
|
||||
minimum_degree: u32,
|
||||
root: Option<NodeId>,
|
||||
next_node_id: NodeId,
|
||||
#[serde(skip)]
|
||||
updated: bool,
|
||||
}
|
||||
|
||||
impl From<BState1> for BState {
|
||||
fn from(o: BState1) -> Self {
|
||||
Self {
|
||||
minimum_degree: o.minimum_degree,
|
||||
root: o.root,
|
||||
next_node_id: o.next_node_id,
|
||||
generation: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BState1skip> for BState {
|
||||
fn from(o: BState1skip) -> Self {
|
||||
Self {
|
||||
minimum_degree: o.minimum_degree,
|
||||
root: o.root,
|
||||
next_node_id: o.next_node_id,
|
||||
generation: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BState {
|
||||
pub fn new(minimum_degree: u32) -> Self {
|
||||
|
|
|
@ -52,6 +52,9 @@ pub struct SearchParams {
|
|||
#[revisioned(revision = 2)]
|
||||
pub struct MTreeParams {
|
||||
pub dimension: u16,
|
||||
#[revision(start = 1, end = 2, convert_fn = "convert_old_distance")]
|
||||
pub _distance: Distance1, // TODO remove once 1.0 && 1.1 are EOL
|
||||
#[revision(start = 2)]
|
||||
pub distance: Distance,
|
||||
pub vector_type: VectorType,
|
||||
pub capacity: u16,
|
||||
|
@ -62,6 +65,35 @@ pub struct MTreeParams {
|
|||
pub mtree_cache: u32,
|
||||
}
|
||||
|
||||
impl MTreeParams {
|
||||
fn convert_old_distance(
|
||||
&mut self,
|
||||
_revision: u16,
|
||||
d1: Distance1,
|
||||
) -> Result<(), revision::Error> {
|
||||
self.distance = match d1 {
|
||||
Distance1::Euclidean => Distance::Euclidean,
|
||||
Distance1::Manhattan => Distance::Manhattan,
|
||||
Distance1::Cosine => Distance::Cosine,
|
||||
Distance1::Hamming => Distance::Hamming,
|
||||
Distance1::Minkowski(n) => Distance::Minkowski(n),
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
||||
#[revisioned(revision = 1)]
|
||||
pub enum Distance1 {
|
||||
#[default]
|
||||
Euclidean,
|
||||
Manhattan,
|
||||
Cosine,
|
||||
Hamming,
|
||||
Minkowski(Number),
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
||||
#[revisioned(revision = 1)]
|
||||
|
@ -80,14 +112,14 @@ pub enum Distance {
|
|||
impl Distance {
|
||||
pub(crate) fn compute(&self, v1: &Vec<Number>, v2: &Vec<Number>) -> Result<Number, Error> {
|
||||
match self {
|
||||
Distance::Cosine => v1.cosine_similarity(v2),
|
||||
Distance::Chebyshev => v1.chebyshev_distance(v2),
|
||||
Distance::Euclidean => v1.euclidean_distance(v2),
|
||||
Distance::Hamming => v1.hamming_distance(v2),
|
||||
Distance::Jaccard => v1.jaccard_similarity(v2),
|
||||
Distance::Manhattan => v1.manhattan_distance(v2),
|
||||
Distance::Minkowski(r) => v1.minkowski_distance(v2, r),
|
||||
Distance::Pearson => v1.pearson_similarity(v2),
|
||||
Self::Cosine => v1.cosine_similarity(v2),
|
||||
Self::Chebyshev => v1.chebyshev_distance(v2),
|
||||
Self::Euclidean => v1.euclidean_distance(v2),
|
||||
Self::Hamming => v1.hamming_distance(v2),
|
||||
Self::Jaccard => v1.jaccard_similarity(v2),
|
||||
Self::Manhattan => v1.manhattan_distance(v2),
|
||||
Self::Minkowski(r) => v1.minkowski_distance(v2, r),
|
||||
Self::Pearson => v1.pearson_similarity(v2),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,9 +29,13 @@ impl ser::Serializer for Serializer {
|
|||
variant: &'static str,
|
||||
) -> Result<Self::Ok, Error> {
|
||||
match variant {
|
||||
"Chebyshev" => Ok(Distance::Chebyshev),
|
||||
"Cosine" => Ok(Distance::Cosine),
|
||||
"Euclidean" => Ok(Distance::Euclidean),
|
||||
"Manhattan" => Ok(Distance::Manhattan),
|
||||
"Hamming" => Ok(Distance::Hamming),
|
||||
"Jaccard" => Ok(Distance::Jaccard),
|
||||
"Manhattan" => Ok(Distance::Manhattan),
|
||||
"Pearson" => Ok(Distance::Pearson),
|
||||
variant => Err(Error::custom(format!("unexpected unit variant `{name}::{variant}`"))),
|
||||
}
|
||||
}
|
||||
|
@ -66,29 +70,18 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn distance_euclidean() {
|
||||
let dist = Distance::Euclidean;
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_manhattan() {
|
||||
let dist = Distance::Manhattan;
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_hamming() {
|
||||
let dist = Distance::Hamming;
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_minkowski() {
|
||||
let dist = Distance::Minkowski(7.into());
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
for dist in [
|
||||
Distance::Chebyshev,
|
||||
Distance::Cosine,
|
||||
Distance::Euclidean,
|
||||
Distance::Jaccard,
|
||||
Distance::Hamming,
|
||||
Distance::Manhattan,
|
||||
Distance::Minkowski(7.into()),
|
||||
Distance::Pearson,
|
||||
] {
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized, "{}", dist);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::err::Error;
|
||||
use crate::sql::index::{Distance, MTreeParams, VectorType};
|
||||
use crate::sql::index::{Distance, Distance1, MTreeParams, VectorType};
|
||||
use crate::sql::value::serde::ser;
|
||||
use ser::Serializer as _;
|
||||
use serde::ser::Error as _;
|
||||
|
@ -66,6 +66,9 @@ impl serde::ser::SerializeStruct for SerializeMTree {
|
|||
"dimension" => {
|
||||
self.dimension = value.serialize(ser::primitive::u16::Serializer.wrap())?;
|
||||
}
|
||||
"_distance" => {
|
||||
self.distance = value.serialize(ser::distance::Serializer.wrap())?;
|
||||
}
|
||||
"distance" => {
|
||||
self.distance = value.serialize(ser::distance::Serializer.wrap())?;
|
||||
}
|
||||
|
@ -94,6 +97,7 @@ impl serde::ser::SerializeStruct for SerializeMTree {
|
|||
fn end(self) -> Result<Self::Ok, Error> {
|
||||
Ok(MTreeParams {
|
||||
dimension: self.dimension,
|
||||
_distance: Distance1::Euclidean,
|
||||
distance: self.distance,
|
||||
vector_type: self.vector_type,
|
||||
capacity: self.capacity,
|
||||
|
@ -108,6 +112,7 @@ impl serde::ser::SerializeStruct for SerializeMTree {
|
|||
fn mtree_params() {
|
||||
let params = MTreeParams {
|
||||
dimension: 1,
|
||||
_distance: Default::default(),
|
||||
distance: Default::default(),
|
||||
vector_type: Default::default(),
|
||||
capacity: 2,
|
||||
|
|
|
@ -52,6 +52,9 @@ pub struct SearchParams {
|
|||
#[revisioned(revision = 2)]
|
||||
pub struct MTreeParams {
|
||||
pub dimension: u16,
|
||||
#[revision(start = 1, end = 2, convert_fn = "convert_old_distance")]
|
||||
pub _distance: Distance1, // TODO remove once 1.0 && 1.1 are EOL
|
||||
#[revision(start = 2)]
|
||||
pub distance: Distance,
|
||||
pub vector_type: VectorType,
|
||||
pub capacity: u16,
|
||||
|
@ -62,6 +65,35 @@ pub struct MTreeParams {
|
|||
pub mtree_cache: u32,
|
||||
}
|
||||
|
||||
impl MTreeParams {
|
||||
fn convert_old_distance(
|
||||
&mut self,
|
||||
_revision: u16,
|
||||
d1: Distance1,
|
||||
) -> Result<(), revision::Error> {
|
||||
self.distance = match d1 {
|
||||
Distance1::Euclidean => Distance::Euclidean,
|
||||
Distance1::Manhattan => Distance::Manhattan,
|
||||
Distance1::Cosine => Distance::Cosine,
|
||||
Distance1::Hamming => Distance::Hamming,
|
||||
Distance1::Minkowski(n) => Distance::Minkowski(n),
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
||||
#[revisioned(revision = 1)]
|
||||
pub enum Distance1 {
|
||||
#[default]
|
||||
Euclidean,
|
||||
Manhattan,
|
||||
Cosine,
|
||||
Hamming,
|
||||
Minkowski(Number),
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
|
||||
#[revisioned(revision = 1)]
|
||||
|
@ -80,14 +112,14 @@ pub enum Distance {
|
|||
impl Distance {
|
||||
pub(crate) fn compute(&self, v1: &Vec<Number>, v2: &Vec<Number>) -> Result<Number, Error> {
|
||||
match self {
|
||||
Distance::Cosine => v1.cosine_similarity(v2),
|
||||
Distance::Chebyshev => v1.chebyshev_distance(v2),
|
||||
Distance::Euclidean => v1.euclidean_distance(v2),
|
||||
Distance::Hamming => v1.hamming_distance(v2),
|
||||
Distance::Jaccard => v1.jaccard_similarity(v2),
|
||||
Distance::Manhattan => v1.manhattan_distance(v2),
|
||||
Distance::Minkowski(r) => v1.minkowski_distance(v2, r),
|
||||
Distance::Pearson => v1.pearson_similarity(v2),
|
||||
Self::Cosine => v1.cosine_similarity(v2),
|
||||
Self::Chebyshev => v1.chebyshev_distance(v2),
|
||||
Self::Euclidean => v1.euclidean_distance(v2),
|
||||
Self::Hamming => v1.hamming_distance(v2),
|
||||
Self::Jaccard => v1.jaccard_similarity(v2),
|
||||
Self::Manhattan => v1.manhattan_distance(v2),
|
||||
Self::Minkowski(r) => v1.minkowski_distance(v2, r),
|
||||
Self::Pearson => v1.pearson_similarity(v2),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,9 +29,13 @@ impl ser::Serializer for Serializer {
|
|||
variant: &'static str,
|
||||
) -> Result<Self::Ok, Error> {
|
||||
match variant {
|
||||
"Chebyshev" => Ok(Distance::Chebyshev),
|
||||
"Cosine" => Ok(Distance::Cosine),
|
||||
"Euclidean" => Ok(Distance::Euclidean),
|
||||
"Manhattan" => Ok(Distance::Manhattan),
|
||||
"Hamming" => Ok(Distance::Hamming),
|
||||
"Jaccard" => Ok(Distance::Jaccard),
|
||||
"Manhattan" => Ok(Distance::Manhattan),
|
||||
"Pearson" => Ok(Distance::Pearson),
|
||||
variant => Err(Error::custom(format!("unexpected unit variant `{name}::{variant}`"))),
|
||||
}
|
||||
}
|
||||
|
@ -66,29 +70,18 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn distance_euclidean() {
|
||||
let dist = Distance::Euclidean;
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_manhattan() {
|
||||
let dist = Distance::Manhattan;
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_hamming() {
|
||||
let dist = Distance::Hamming;
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_minkowski() {
|
||||
let dist = Distance::Minkowski(7.into());
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
for dist in [
|
||||
Distance::Chebyshev,
|
||||
Distance::Cosine,
|
||||
Distance::Euclidean,
|
||||
Distance::Jaccard,
|
||||
Distance::Hamming,
|
||||
Distance::Manhattan,
|
||||
Distance::Minkowski(7.into()),
|
||||
Distance::Pearson,
|
||||
] {
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized, "{}", dist);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::err::Error;
|
||||
use crate::sql::index::{Distance, MTreeParams, VectorType};
|
||||
use crate::sql::index::{Distance, Distance1, MTreeParams, VectorType};
|
||||
use crate::sql::value::serde::ser;
|
||||
use ser::Serializer as _;
|
||||
use serde::ser::Error as _;
|
||||
|
@ -66,6 +66,9 @@ impl serde::ser::SerializeStruct for SerializeMTree {
|
|||
"dimension" => {
|
||||
self.dimension = value.serialize(ser::primitive::u16::Serializer.wrap())?;
|
||||
}
|
||||
"_distance" => {
|
||||
self.distance = value.serialize(ser::distance::Serializer.wrap())?;
|
||||
}
|
||||
"distance" => {
|
||||
self.distance = value.serialize(ser::distance::Serializer.wrap())?;
|
||||
}
|
||||
|
@ -94,6 +97,7 @@ impl serde::ser::SerializeStruct for SerializeMTree {
|
|||
fn end(self) -> Result<Self::Ok, Error> {
|
||||
Ok(MTreeParams {
|
||||
dimension: self.dimension,
|
||||
_distance: Distance1::Euclidean,
|
||||
distance: self.distance,
|
||||
vector_type: self.vector_type,
|
||||
capacity: self.capacity,
|
||||
|
@ -108,6 +112,7 @@ impl serde::ser::SerializeStruct for SerializeMTree {
|
|||
fn mtree_params() {
|
||||
let params = MTreeParams {
|
||||
dimension: 1,
|
||||
_distance: Default::default(),
|
||||
distance: Default::default(),
|
||||
vector_type: Default::default(),
|
||||
capacity: 2,
|
||||
|
|
|
@ -4,7 +4,7 @@ use super::super::{
|
|||
IResult,
|
||||
};
|
||||
use crate::sql::{
|
||||
index::{Distance, MTreeParams, SearchParams, VectorType},
|
||||
index::{Distance, Distance1, MTreeParams, SearchParams, VectorType},
|
||||
Ident, Index,
|
||||
};
|
||||
|
||||
|
@ -118,6 +118,7 @@ pub fn mtree_distance(i: &str) -> IResult<&str, Distance> {
|
|||
let (i, _) = shouldbespace(i)?;
|
||||
alt((
|
||||
map(tag_no_case("EUCLIDEAN"), |_| Distance::Euclidean),
|
||||
map(tag_no_case("COSINE"), |_| Distance::Cosine),
|
||||
map(tag_no_case("MANHATTAN"), |_| Distance::Manhattan),
|
||||
minkowski,
|
||||
))(i)
|
||||
|
@ -178,6 +179,7 @@ pub fn mtree(i: &str) -> IResult<&str, Index> {
|
|||
i,
|
||||
Index::MTree(MTreeParams {
|
||||
dimension,
|
||||
_distance: Distance1::Euclidean, // TODO remove once 1.0 && 1.1 are EOL
|
||||
distance: distance.unwrap_or(Distance::Euclidean),
|
||||
vector_type: vector_type.unwrap_or(VectorType::F64),
|
||||
capacity: capacity.unwrap_or(40),
|
||||
|
|
|
@ -102,7 +102,7 @@ fn index_comment(i: &str) -> IResult<&str, DefineIndexOption> {
|
|||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::sql::index::{Distance, MTreeParams, SearchParams, VectorType};
|
||||
use crate::sql::index::{Distance, Distance1, MTreeParams, SearchParams, VectorType};
|
||||
use crate::sql::Ident;
|
||||
use crate::sql::Idiom;
|
||||
use crate::sql::Idioms;
|
||||
|
@ -225,6 +225,7 @@ mod tests {
|
|||
index: Index::MTree(MTreeParams {
|
||||
dimension: 4,
|
||||
vector_type: VectorType::F64,
|
||||
_distance: Distance1::Euclidean,
|
||||
distance: Distance::Euclidean,
|
||||
capacity: 40,
|
||||
doc_ids_order: 100,
|
||||
|
|
|
@ -282,10 +282,14 @@ pub(crate) static KEYWORDS: phf::Map<UniCase<&'static str>, TokenKind> = phf_map
|
|||
UniCase::ascii("JWKS") => jwks_token_kind(), // Necessary because `phf_map!` doesn't support `cfg` attributes
|
||||
|
||||
// Distance
|
||||
UniCase::ascii("CHEBYSHEV") => TokenKind::Distance(DistanceKind::Chebyshev),
|
||||
UniCase::ascii("COSINE") => TokenKind::Distance(DistanceKind::Cosine),
|
||||
UniCase::ascii("EUCLIDEAN") => TokenKind::Distance(DistanceKind::Euclidean),
|
||||
UniCase::ascii("MANHATTAN") => TokenKind::Distance(DistanceKind::Manhattan),
|
||||
UniCase::ascii("JACCARD") => TokenKind::Distance(DistanceKind::Jaccard),
|
||||
UniCase::ascii("HAMMING") => TokenKind::Distance(DistanceKind::Hamming),
|
||||
UniCase::ascii("MANHATTAN") => TokenKind::Distance(DistanceKind::Manhattan),
|
||||
UniCase::ascii("MINKOWSKI") => TokenKind::Distance(DistanceKind::Minkowski),
|
||||
UniCase::ascii("PEARSON") => TokenKind::Distance(DistanceKind::Pearson),
|
||||
|
||||
// Change Feed keywords
|
||||
UniCase::ascii("ORIGINAL") => TokenKind::ChangeFeedInclude(ChangeFeedInclude::Original),
|
||||
|
|
|
@ -543,6 +543,7 @@ impl Parser<'_> {
|
|||
|
||||
res.index = Index::MTree(crate::sql::index::MTreeParams {
|
||||
dimension,
|
||||
_distance: Default::default(),
|
||||
distance,
|
||||
capacity,
|
||||
doc_ids_order,
|
||||
|
|
|
@ -375,13 +375,17 @@ impl Parser<'_> {
|
|||
pub fn parse_distance(&mut self) -> ParseResult<Distance> {
|
||||
let dist = match self.next().kind {
|
||||
TokenKind::Distance(x) => match x {
|
||||
DistanceKind::Chebyshev => Distance::Chebyshev,
|
||||
DistanceKind::Cosine => Distance::Cosine,
|
||||
DistanceKind::Euclidean => Distance::Euclidean,
|
||||
DistanceKind::Manhattan => Distance::Manhattan,
|
||||
DistanceKind::Hamming => Distance::Hamming,
|
||||
DistanceKind::Jaccard => Distance::Jaccard,
|
||||
DistanceKind::Manhattan => Distance::Manhattan,
|
||||
DistanceKind::Minkowski => {
|
||||
let distance = self.next_token_value()?;
|
||||
Distance::Minkowski(distance)
|
||||
}
|
||||
DistanceKind::Pearson => Distance::Pearson,
|
||||
},
|
||||
x => unexpected!(self, x, "a distance measure"),
|
||||
};
|
||||
|
|
|
@ -450,6 +450,7 @@ fn parse_define_index() {
|
|||
cols: Idioms(vec![Idiom(vec![Part::Field(Ident("a".to_owned()))]),]),
|
||||
index: Index::MTree(MTreeParams {
|
||||
dimension: 4,
|
||||
_distance: Default::default(),
|
||||
distance: Distance::Minkowski(Number::Int(5)),
|
||||
capacity: 6,
|
||||
doc_ids_order: 7,
|
||||
|
|
|
@ -271,18 +271,24 @@ macro_rules! t {
|
|||
};
|
||||
|
||||
// Distance
|
||||
("EUCLIDEAN") => {
|
||||
$crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Euclidean)
|
||||
};
|
||||
("MANHATTAN") => {
|
||||
$crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Manhattan)
|
||||
("CHEBYSHEV") => {
|
||||
$crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Chebyshev)
|
||||
};
|
||||
("COSINE") => {
|
||||
$crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Cosine)
|
||||
};
|
||||
("EUCLIDEAN") => {
|
||||
$crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Euclidean)
|
||||
};
|
||||
("HAMMING") => {
|
||||
$crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Hamming)
|
||||
};
|
||||
("JACCARD") => {
|
||||
$crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Jaccard)
|
||||
};
|
||||
("MANHATTAN") => {
|
||||
$crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Manhattan)
|
||||
};
|
||||
("MAHALANOBIS") => {
|
||||
$crate::syn::v2::token::TokenKind::Distance(
|
||||
$crate::syn::v2::token::DistanceKind::Mahalanobis,
|
||||
|
@ -291,6 +297,9 @@ macro_rules! t {
|
|||
("MINKOWSKI") => {
|
||||
$crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Minkowski)
|
||||
};
|
||||
("PEARSON") => {
|
||||
$crate::syn::v2::token::TokenKind::Distance($crate::syn::v2::token::DistanceKind::Pearson)
|
||||
};
|
||||
|
||||
($t:tt) => {
|
||||
$crate::syn::v2::token::TokenKind::Keyword($crate::syn::v2::token::keyword_t!($t))
|
||||
|
|
|
@ -186,19 +186,27 @@ pub enum Delim {
|
|||
|
||||
#[derive(Clone, Copy, Eq, PartialEq, Hash, Debug)]
|
||||
pub enum DistanceKind {
|
||||
Chebyshev,
|
||||
Cosine,
|
||||
Euclidean,
|
||||
Manhattan,
|
||||
Hamming,
|
||||
Jaccard,
|
||||
Manhattan,
|
||||
Minkowski,
|
||||
Pearson,
|
||||
}
|
||||
|
||||
impl DistanceKind {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
DistanceKind::Chebyshev => "CHEBYSHEV",
|
||||
DistanceKind::Cosine => "COSINE",
|
||||
DistanceKind::Euclidean => "EUCLIDEAN",
|
||||
DistanceKind::Manhattan => "MANHATTAN",
|
||||
DistanceKind::Hamming => "HAMMING",
|
||||
DistanceKind::Jaccard => "JACCARD",
|
||||
DistanceKind::Manhattan => "MANHATTAN",
|
||||
DistanceKind::Minkowski => "MINKOWSKI",
|
||||
DistanceKind::Pearson => "PEARSON",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
94
tests/common/docker.rs
Normal file
94
tests/common/docker.rs
Normal file
|
@ -0,0 +1,94 @@
|
|||
use http::{header, HeaderMap, StatusCode};
|
||||
use reqwest::Client;
|
||||
use std::process::Command;
|
||||
use std::time::{Duration, SystemTime};
|
||||
use tokio::time::sleep;
|
||||
use tracing::{debug, error, info, warn};
|
||||
|
||||
pub struct DockerContainer {
|
||||
id: String,
|
||||
running: bool,
|
||||
}
|
||||
|
||||
pub const DOCKER_EXPOSED_PORT: usize = 8000;
|
||||
|
||||
impl DockerContainer {
|
||||
pub fn start(version: &str, file_path: &str, user: &str, pass: &str) -> Self {
|
||||
let docker_image = format!("surrealdb/surrealdb:{version}");
|
||||
info!("Start Docker image {docker_image} with file {file_path}");
|
||||
let mut args =
|
||||
Arguments::new(["run", "-p", &format!("127.0.0.1:8000:{DOCKER_EXPOSED_PORT}"), "-d"]);
|
||||
args.add([docker_image]);
|
||||
args.add(["start", "--auth", "--user", user, "--pass", pass]);
|
||||
args.add([format!("file:{file_path}")]);
|
||||
let id = Self::docker(args);
|
||||
Self {
|
||||
id,
|
||||
running: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn logs(&self) {
|
||||
info!("Logging Docker container {}", self.id);
|
||||
Self::docker(Arguments::new(["logs", &self.id]));
|
||||
}
|
||||
pub fn stop(&mut self) {
|
||||
if self.running {
|
||||
info!("Stopping Docker container {}", self.id);
|
||||
Self::docker(Arguments::new(["stop", &self.id]));
|
||||
self.running = false;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn extract_data_dir(&self, file_path: &str) {
|
||||
let container_src_path = format!("{}:{file_path}", self.id);
|
||||
info!("Extract directory from Docker container {}", container_src_path);
|
||||
Self::docker(Arguments::new(["cp", &container_src_path, file_path]));
|
||||
}
|
||||
|
||||
fn docker(args: Arguments) -> String {
|
||||
let mut command = Command::new("docker");
|
||||
|
||||
let output = command.args(args.0).output().unwrap();
|
||||
let std_out = String::from_utf8(output.stdout).unwrap().trim().to_string();
|
||||
if !output.stderr.is_empty() {
|
||||
error!("{}", String::from_utf8(output.stderr).unwrap());
|
||||
}
|
||||
assert_eq!(output.status.code(), Some(0), "Docker command failure: {:?}", command);
|
||||
std_out
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for DockerContainer {
|
||||
fn drop(&mut self) {
|
||||
// Be sure the container is stopped
|
||||
self.stop();
|
||||
// Delete the container
|
||||
info!("Delete Docker container {}", self.id);
|
||||
Self::docker(Arguments::new(["rm", &self.id]));
|
||||
}
|
||||
}
|
||||
|
||||
struct Arguments(Vec<String>);
|
||||
|
||||
impl Arguments {
|
||||
fn new<I, S>(args: I) -> Self
|
||||
where
|
||||
I: IntoIterator<Item = S>,
|
||||
S: Into<String>,
|
||||
{
|
||||
let mut a = Self(vec![]);
|
||||
a.add(args);
|
||||
a
|
||||
}
|
||||
|
||||
fn add<I, S>(&mut self, args: I)
|
||||
where
|
||||
I: IntoIterator<Item = S>,
|
||||
S: Into<String>,
|
||||
{
|
||||
for arg in args {
|
||||
self.0.push(arg.into());
|
||||
}
|
||||
}
|
||||
}
|
28
tests/common/expected.rs
Normal file
28
tests/common/expected.rs
Normal file
|
@ -0,0 +1,28 @@
|
|||
use serde_json::Value as JsonValue;
|
||||
pub enum Expected {
|
||||
Any,
|
||||
One(&'static str),
|
||||
Two(&'static str, &'static str),
|
||||
}
|
||||
|
||||
impl Expected {
|
||||
pub fn check_results(&self, q: &str, results: &[JsonValue]) {
|
||||
match self {
|
||||
Expected::Any => {}
|
||||
Expected::One(expected) => {
|
||||
assert_eq!(results.len(), 1, "Wrong number of result for {}", q);
|
||||
Self::check_json(q, &results[0], expected);
|
||||
}
|
||||
Expected::Two(expected1, expected2) => {
|
||||
assert_eq!(results.len(), 2, "Wrong number of result for {}", q);
|
||||
Self::check_json(q, &results[0], expected1);
|
||||
Self::check_json(q, &results[1], expected2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_json(q: &str, result: &JsonValue, expected: &str) {
|
||||
let expected: JsonValue = serde_json::from_str(expected).expect(expected);
|
||||
assert_eq!(result, &expected, "Unexpected result on query {}", q);
|
||||
}
|
||||
}
|
|
@ -1,8 +1,11 @@
|
|||
#![allow(unused_imports)]
|
||||
#![allow(dead_code)]
|
||||
|
||||
pub mod docker;
|
||||
pub mod error;
|
||||
pub mod expected;
|
||||
pub mod format;
|
||||
pub mod rest_client;
|
||||
pub mod server;
|
||||
pub mod socket;
|
||||
|
||||
|
|
100
tests/common/rest_client.rs
Normal file
100
tests/common/rest_client.rs
Normal file
|
@ -0,0 +1,100 @@
|
|||
use crate::common::docker::DOCKER_EXPOSED_PORT;
|
||||
use crate::common::expected::Expected;
|
||||
use http::{header, HeaderMap, StatusCode};
|
||||
use reqwest::Client;
|
||||
use serde_json::Value as JsonValue;
|
||||
use std::time::{Duration, SystemTime};
|
||||
use tokio::time::sleep;
|
||||
use tracing::{debug, error, warn};
|
||||
|
||||
// A very basic Rest client.
|
||||
// The goal is to have a client that can connect to any version of SurrealDB.
|
||||
// Using the REST API / sql endpoint
|
||||
pub struct RestClient {
|
||||
client: Client,
|
||||
url: String,
|
||||
user: String,
|
||||
pass: String,
|
||||
}
|
||||
|
||||
impl RestClient {
|
||||
pub fn new(ns: &str, db: &str, user: &str, pass: &str) -> Self {
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert("NS", ns.parse().unwrap());
|
||||
headers.insert("DB", db.parse().unwrap());
|
||||
headers.insert(header::ACCEPT, "application/json".parse().unwrap());
|
||||
let client = Client::builder()
|
||||
.connect_timeout(Duration::from_millis(10))
|
||||
.default_headers(headers)
|
||||
.build()
|
||||
.expect("Client::builder()...build()");
|
||||
Self {
|
||||
client,
|
||||
url: format!("http://127.0.0.1:{DOCKER_EXPOSED_PORT}/sql"),
|
||||
user: user.to_string(),
|
||||
pass: pass.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn wait_for_connection(self, time_out: &Duration) -> Option<Self> {
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
let start = SystemTime::now();
|
||||
while start.elapsed().unwrap().le(time_out) {
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
if let Some(r) = self.query("INFO FOR ROOT").await {
|
||||
if r.status() == StatusCode::OK {
|
||||
return Some(self);
|
||||
}
|
||||
}
|
||||
warn!("DB not yet responding");
|
||||
sleep(Duration::from_secs(2)).await;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub async fn query(&self, q: &str) -> Option<reqwest::Response> {
|
||||
match self
|
||||
.client
|
||||
.post(&self.url)
|
||||
.basic_auth(&self.user, Some(&self.pass))
|
||||
.body(q.to_string())
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
Ok(r) => Some(r),
|
||||
Err(e) => {
|
||||
error!("{e}");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn checked_query(&self, q: &str, expected: &Expected) {
|
||||
let r = self.query(q).await.unwrap_or_else(|| panic!("No response for {q}"));
|
||||
assert_eq!(
|
||||
r.status(),
|
||||
StatusCode::OK,
|
||||
"Wrong response for {q} -> {}",
|
||||
r.text().await.expect(q)
|
||||
);
|
||||
// Convert the result to JSON
|
||||
let j: JsonValue = r.json().await.expect(q);
|
||||
debug!("{q} => {j:#}");
|
||||
// The result should be an array
|
||||
let results_with_status = j.as_array().expect(q);
|
||||
assert_eq!(results_with_status.len(), 1, "Wrong number of results on query {q}");
|
||||
let result_with_status = &results_with_status[0];
|
||||
// Check the status
|
||||
let status =
|
||||
result_with_status.get("status").unwrap_or_else(|| panic!("No status on query: {q}"));
|
||||
assert_eq!(status.as_str(), Some("OK"), "Wrong status for {q} => {status:#}");
|
||||
// Extract the results
|
||||
let results =
|
||||
result_with_status.get("result").unwrap_or_else(|| panic!("No result for query: {q}"));
|
||||
if !matches!(expected, Expected::Any) {
|
||||
// Check the results
|
||||
let results = results.as_array().expect(q);
|
||||
expected.check_results(q, results);
|
||||
}
|
||||
}
|
||||
}
|
246
tests/database_upgrade.rs
Normal file
246
tests/database_upgrade.rs
Normal file
|
@ -0,0 +1,246 @@
|
|||
#![allow(unused_imports)]
|
||||
#![allow(dead_code)]
|
||||
|
||||
mod common;
|
||||
|
||||
mod database_upgrade {
|
||||
use super::common::docker::DockerContainer;
|
||||
use super::common::expected::Expected;
|
||||
use super::common::rest_client::RestClient;
|
||||
use serde_json::Value as JsonValue;
|
||||
use serial_test::serial;
|
||||
use std::time::Duration;
|
||||
use surrealdb::engine::any::{connect, Any};
|
||||
use surrealdb::{Connection, Surreal};
|
||||
use test_log::test;
|
||||
use tracing::info;
|
||||
use ulid::Ulid;
|
||||
|
||||
const CNX_TIMEOUT: Duration = Duration::from_secs(180);
|
||||
const NS: &str = "test";
|
||||
const DB: &str = "test";
|
||||
const USER: &str = "root";
|
||||
const PASS: &str = "root";
|
||||
|
||||
async fn upgrade_test_1_0(version: &str) {
|
||||
// Start the docker instance
|
||||
let (path, mut docker, client) = start_docker(version).await;
|
||||
|
||||
// Create the data set
|
||||
create_data_on_docker(&client, "FTS", &DATA_FTS).await;
|
||||
|
||||
// Check the data set
|
||||
check_data_on_docker(&client, "FTS", &CHECK_FTS).await;
|
||||
check_data_on_docker(&client, "DB", &CHECK_DB).await;
|
||||
|
||||
// Stop the docker instance
|
||||
docker.stop();
|
||||
|
||||
// Extract the database directory
|
||||
docker.extract_data_dir(&path);
|
||||
|
||||
// Connect to a local instance
|
||||
let db = new_local_instance(&path).await;
|
||||
|
||||
// Check that the data has properly migrated
|
||||
check_migrated_data(&db, "DB", &CHECK_DB).await;
|
||||
check_migrated_data(&db, "FTS", &CHECK_FTS).await;
|
||||
}
|
||||
|
||||
#[test(tokio::test(flavor = "multi_thread"))]
|
||||
#[cfg(feature = "storage-rocksdb")]
|
||||
#[serial]
|
||||
async fn upgrade_test_1_0_0() {
|
||||
upgrade_test_1_0("1.0.0").await;
|
||||
}
|
||||
|
||||
#[test(tokio::test(flavor = "multi_thread"))]
|
||||
#[cfg(feature = "storage-rocksdb")]
|
||||
#[serial]
|
||||
async fn upgrade_test_1_0_1() {
|
||||
upgrade_test_1_0("1.0.1").await;
|
||||
}
|
||||
|
||||
async fn upgrade_test_1_1(version: &str) {
|
||||
// Start the docker instance
|
||||
let (path, mut docker, client) = start_docker(version).await;
|
||||
|
||||
// Create the data set
|
||||
create_data_on_docker(&client, "FTS", &DATA_FTS).await;
|
||||
create_data_on_docker(&client, "MTREE", &DATA_MTREE).await;
|
||||
|
||||
// Check the data set
|
||||
check_data_on_docker(&client, "DB", &CHECK_DB).await;
|
||||
check_data_on_docker(&client, "FTS", &CHECK_FTS).await;
|
||||
check_data_on_docker(&client, "MTREE", &CHECK_MTREE_RPC).await;
|
||||
|
||||
// Stop the docker instance
|
||||
docker.stop();
|
||||
|
||||
// Extract the database directory
|
||||
docker.extract_data_dir(&path);
|
||||
|
||||
// Connect to a local instance
|
||||
let db = new_local_instance(&path).await;
|
||||
|
||||
// Check that the data has properly migrated
|
||||
check_migrated_data(&db, "DB", &CHECK_DB).await;
|
||||
check_migrated_data(&db, "FTS", &CHECK_FTS).await;
|
||||
check_migrated_data(&db, "MTREE", &CHECK_MTREE_DB).await;
|
||||
}
|
||||
|
||||
#[test(tokio::test(flavor = "multi_thread"))]
|
||||
#[cfg(feature = "storage-rocksdb")]
|
||||
#[serial]
|
||||
async fn upgrade_test_1_1_0() {
|
||||
upgrade_test_1_1("v1.1.0").await;
|
||||
}
|
||||
|
||||
#[test(tokio::test(flavor = "multi_thread"))]
|
||||
#[cfg(feature = "storage-rocksdb")]
|
||||
#[serial]
|
||||
async fn upgrade_test_1_1_1() {
|
||||
upgrade_test_1_1("v1.1.1").await;
|
||||
}
|
||||
|
||||
async fn upgrade_test_1_2(version: &str) {
|
||||
// Start the docker instance
|
||||
let (path, mut docker, client) = start_docker(version).await;
|
||||
|
||||
// Create the data set
|
||||
create_data_on_docker(&client, "FTS", &DATA_FTS).await;
|
||||
create_data_on_docker(&client, "MTREE", &DATA_MTREE).await;
|
||||
|
||||
// Check the data set
|
||||
check_data_on_docker(&client, "DB", &CHECK_DB).await;
|
||||
check_data_on_docker(&client, "FTS", &CHECK_FTS).await;
|
||||
check_data_on_docker(&client, "MTREE", &CHECK_MTREE_RPC).await;
|
||||
|
||||
// Stop the docker instance
|
||||
docker.stop();
|
||||
|
||||
// Extract the database directory
|
||||
docker.extract_data_dir(&path);
|
||||
|
||||
// Connect to a local instance
|
||||
let db = new_local_instance(&path).await;
|
||||
|
||||
// Check that the data has properly migrated
|
||||
check_migrated_data(&db, "DB", &CHECK_DB).await;
|
||||
check_migrated_data(&db, "FTS", &CHECK_FTS).await;
|
||||
check_migrated_data(&db, "MTREE", &CHECK_MTREE_DB).await;
|
||||
check_migrated_data(&db, "KNN_BRUTEFORCE", &CHECK_KNN_BRUTEFORCE).await;
|
||||
}
|
||||
|
||||
#[test(tokio::test(flavor = "multi_thread"))]
|
||||
#[cfg(feature = "storage-rocksdb")]
|
||||
#[serial]
|
||||
async fn upgrade_test_1_2_0() {
|
||||
upgrade_test_1_2("v1.2.0").await;
|
||||
}
|
||||
|
||||
#[test(tokio::test(flavor = "multi_thread"))]
|
||||
#[cfg(feature = "storage-rocksdb")]
|
||||
#[serial]
|
||||
async fn upgrade_test_1_2_1() {
|
||||
upgrade_test_1_2("v1.2.1").await;
|
||||
}
|
||||
|
||||
// *******
|
||||
// DATASET
|
||||
// *******
|
||||
|
||||
// Set of DATA for Full Text Search
|
||||
const DATA_FTS: [&str; 5] = [
|
||||
"DEFINE ANALYZER name TOKENIZERS class FILTERS lowercase,ngram(1,128)",
|
||||
"DEFINE ANALYZER userdefinedid TOKENIZERS blank FILTERS lowercase,ngram(1,32)",
|
||||
"DEFINE INDEX account_name_search_idx ON TABLE account COLUMNS name SEARCH ANALYZER name BM25(1.2,0.75) HIGHLIGHTS",
|
||||
"DEFINE INDEX account_user_defined_id_search_idx ON TABLE account COLUMNS user_defined_id SEARCH ANALYZER userdefinedid BM25 HIGHLIGHTS",
|
||||
"CREATE account SET name='Tobie', user_defined_id='Tobie'",
|
||||
];
|
||||
|
||||
// Set of QUERY and RESULT to check for Full Text Search
|
||||
const CHECK_FTS: [Check; 1] =
|
||||
[("SELECT name FROM account WHERE name @@ 'Tobie'", Expected::One("{\"name\":\"Tobie\"}"))];
|
||||
|
||||
// Set of DATA for VectorSearch and Knn Operator checking
|
||||
const DATA_MTREE: [&str; 4] = [
|
||||
"CREATE pts:1 SET point = [1,2,3,4]",
|
||||
"CREATE pts:2 SET point = [4,5,6,7]",
|
||||
"CREATE pts:3 SET point = [8,9,10,11]",
|
||||
"DEFINE INDEX mt_pts ON pts FIELDS point MTREE DIMENSION 4",
|
||||
];
|
||||
|
||||
const CHECK_MTREE_RPC: [Check; 1] = [
|
||||
("SELECT id, vector::distance::euclidean(point, [2,3,4,5]) AS dist FROM pts WHERE point <2> [2,3,4,5]",
|
||||
Expected::Two("{\"dist\": 2.0, \"id\": \"pts:1\"}", "{ \"dist\": 4.0, \"id\": \"pts:2\"}"))];
|
||||
|
||||
const CHECK_MTREE_DB: [Check; 1] = [
|
||||
("SELECT id, vector::distance::euclidean(point, [2,3,4,5]) AS dist FROM pts WHERE point <2> [2,3,4,5]",
|
||||
Expected::Two("{\"dist\": 2.0, \"id\": {\"tb\": \"pts\", \"id\": {\"Number\": 1}}}", "{ \"dist\": 4.0, \"id\": {\"tb\": \"pts\", \"id\": {\"Number\": 2}}}"))];
|
||||
|
||||
const CHECK_KNN_BRUTEFORCE: [Check; 1] = [
|
||||
("SELECT id, vector::distance::euclidean(point, [2,3,4,5]) AS dist FROM pts WHERE point <2,EUCLIDEAN> [2,3,4,5]",
|
||||
Expected::Two("{\"dist\": 2.0, \"id\": {\"tb\": \"pts\", \"id\": {\"Number\": 1}}}", "{ \"dist\": 4.0, \"id\": {\"tb\": \"pts\", \"id\": {\"Number\": 2}}}"))];
|
||||
|
||||
type Check = (&'static str, Expected);
|
||||
|
||||
const CHECK_DB: [Check; 1] = [("INFO FOR DB", Expected::Any)];
|
||||
|
||||
// *******
|
||||
// HELPERS
|
||||
// *******
|
||||
|
||||
async fn start_docker(docker_version: &str) -> (String, DockerContainer, RestClient) {
|
||||
// Location of the database files (RocksDB) in the Host
|
||||
let file_path = format!("/tmp/{}.db", Ulid::new());
|
||||
let docker = DockerContainer::start(docker_version, &file_path, USER, PASS);
|
||||
let client = RestClient::new(NS, DB, USER, PASS)
|
||||
.wait_for_connection(&CNX_TIMEOUT)
|
||||
.await
|
||||
.unwrap_or_else(|| {
|
||||
docker.logs();
|
||||
panic!("No connected client")
|
||||
});
|
||||
(file_path, docker, client)
|
||||
}
|
||||
|
||||
async fn create_data_on_docker(client: &RestClient, info: &str, data: &[&str]) {
|
||||
info!("Create {info} data on Docker's instance");
|
||||
for l in data {
|
||||
client.checked_query(l, &Expected::Any).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn check_data_on_docker(client: &RestClient, info: &str, queries: &[Check]) {
|
||||
info!("Check {info} data on Docker's instance");
|
||||
for (query, expected) in queries {
|
||||
client.checked_query(query, expected).await;
|
||||
}
|
||||
}
|
||||
|
||||
async fn check_migrated_data(db: &Surreal<Any>, info: &str, queries: &[Check]) {
|
||||
info!("Check migrated {info} data");
|
||||
for (query, expected_results) in queries {
|
||||
checked_query(db, query, expected_results).await;
|
||||
}
|
||||
}
|
||||
|
||||
// Executes the query and ensures to print out the query if it does not pass
|
||||
async fn checked_query<C>(db: &Surreal<C>, q: &str, expected: &Expected)
|
||||
where
|
||||
C: Connection,
|
||||
{
|
||||
let mut res = db.query(q).await.expect(q).check().expect(q);
|
||||
assert_eq!(res.num_statements(), 1, "Wrong number of result on query {q}");
|
||||
let results: Vec<JsonValue> = res.take(0).unwrap();
|
||||
expected.check_results(q, &results);
|
||||
}
|
||||
|
||||
async fn new_local_instance(file_path: &String) -> Surreal<Any> {
|
||||
let db = connect(format!("file:{}", file_path)).await.unwrap();
|
||||
db.use_ns(NS).await.unwrap();
|
||||
db.use_db(DB).await.unwrap();
|
||||
db
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue