Feat: In Memory index store (#3020)
This commit is contained in:
parent
378df76cb0
commit
a6c50cb5f5
39 changed files with 3009 additions and 1623 deletions
23
Cargo.lock
generated
23
Cargo.lock
generated
|
@ -3014,15 +3014,6 @@ version = "0.4.20"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
|
||||
|
||||
[[package]]
|
||||
name = "lru"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2994eeba8ed550fd9b47a0b38f0242bc3344e496483c6180b69139cc2fa5d1d7"
|
||||
dependencies = [
|
||||
"hashbrown 0.14.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lz4-sys"
|
||||
version = "1.9.4"
|
||||
|
@ -3994,6 +3985,18 @@ dependencies = [
|
|||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick_cache"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f69f8d22fa3f34f3083d9a4375c038732c7a7e964de1beb81c544da92dfc40b8"
|
||||
dependencies = [
|
||||
"ahash 0.8.6",
|
||||
"equivalent",
|
||||
"hashbrown 0.14.3",
|
||||
"parking_lot",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.33"
|
||||
|
@ -5311,7 +5314,6 @@ dependencies = [
|
|||
"indxdb",
|
||||
"ipnet",
|
||||
"lexicmp",
|
||||
"lru",
|
||||
"md-5",
|
||||
"nanoid",
|
||||
"native-tls",
|
||||
|
@ -5325,6 +5327,7 @@ dependencies = [
|
|||
"pharos",
|
||||
"pin-project-lite",
|
||||
"pprof",
|
||||
"quick_cache",
|
||||
"radix_trie",
|
||||
"rand 0.8.5",
|
||||
"regex",
|
||||
|
|
|
@ -82,7 +82,6 @@ ipnet = "2.9.0"
|
|||
js = { version = "=0.4.0-beta.4", package = "rquickjs", features = ["array-buffer", "bindgen", "classes", "futures", "loader", "macro", "parallel", "properties","rust-alloc"], optional = true }
|
||||
jsonwebtoken = { version = "8.3.0-surreal.1", package = "surrealdb-jsonwebtoken" }
|
||||
lexicmp = "0.1.0"
|
||||
lru = "0.12.1"
|
||||
md-5 = "0.10.6"
|
||||
nanoid = "0.4.0"
|
||||
native-tls = { version = "0.2.11", optional = true }
|
||||
|
@ -94,6 +93,7 @@ once_cell = "1.18.0"
|
|||
path-clean = "1.0.1"
|
||||
pbkdf2 = { version = "0.12.2", features = ["simple"] }
|
||||
pin-project-lite = "0.2.13"
|
||||
quick_cache = "0.4.0"
|
||||
radix_trie = { version = "0.2.1", features = ["serde"] }
|
||||
rand = "0.8.5"
|
||||
regex = "1.10.2"
|
||||
|
|
|
@ -5,7 +5,8 @@ use std::fmt::Debug;
|
|||
use std::time::Duration;
|
||||
use surrealdb::idx::trees::bkeys::{BKeys, FstKeys, TrieKeys};
|
||||
use surrealdb::idx::trees::btree::{BState, BTree, Payload};
|
||||
use surrealdb::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType};
|
||||
use surrealdb::idx::trees::store::cache::TreeCache;
|
||||
use surrealdb::idx::trees::store::{TreeNodeProvider, TreeStore};
|
||||
use surrealdb::kvs::{Datastore, Key, LockType::*, TransactionType::*};
|
||||
use tokio::runtime::Runtime;
|
||||
macro_rules! get_key_value {
|
||||
|
@ -24,12 +25,22 @@ fn bench_index_btree(c: &mut Criterion) {
|
|||
|
||||
group.bench_function("trees-insertion-fst", |b| {
|
||||
b.to_async(Runtime::new().unwrap())
|
||||
.iter(|| bench::<_, FstKeys>(samples_len, |i| get_key_value!(samples[i])))
|
||||
.iter(|| bench::<_, FstKeys>(samples_len, 100, |i| get_key_value!(samples[i])))
|
||||
});
|
||||
|
||||
group.bench_function("trees-insertion-trie", |b| {
|
||||
b.to_async(Runtime::new().unwrap())
|
||||
.iter(|| bench::<_, TrieKeys>(samples_len, |i| get_key_value!(samples[i])))
|
||||
.iter(|| bench::<_, TrieKeys>(samples_len, 100, |i| get_key_value!(samples[i])))
|
||||
});
|
||||
|
||||
group.bench_function("trees-insertion-fst-fullcache", |b| {
|
||||
b.to_async(Runtime::new().unwrap())
|
||||
.iter(|| bench::<_, FstKeys>(samples_len, 0, |i| get_key_value!(samples[i])))
|
||||
});
|
||||
|
||||
group.bench_function("trees-insertion-trie-fullcache", |b| {
|
||||
b.to_async(Runtime::new().unwrap())
|
||||
.iter(|| bench::<_, TrieKeys>(samples_len, 0, |i| get_key_value!(samples[i])))
|
||||
});
|
||||
|
||||
group.finish();
|
||||
|
@ -47,23 +58,24 @@ fn setup() -> (usize, Vec<usize>) {
|
|||
(samples_len, samples)
|
||||
}
|
||||
|
||||
async fn bench<F, BK>(samples_size: usize, sample_provider: F)
|
||||
async fn bench<F, BK>(samples_size: usize, cache_size: usize, sample_provider: F)
|
||||
where
|
||||
F: Fn(usize) -> (Key, Payload),
|
||||
BK: BKeys + Default + Debug,
|
||||
BK: BKeys + Clone + Default + Debug,
|
||||
{
|
||||
let ds = Datastore::new("memory").await.unwrap();
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
let mut t = BTree::<BK>::new(BState::new(100));
|
||||
let s = TreeNodeStore::new(TreeNodeProvider::Debug, TreeStoreType::Write, 20);
|
||||
let mut s = s.lock().await;
|
||||
let c = TreeCache::new(0, TreeNodeProvider::Debug, cache_size);
|
||||
let mut s = TreeStore::new(TreeNodeProvider::Debug, c, Write).await;
|
||||
for i in 0..samples_size {
|
||||
let (key, payload) = sample_provider(i);
|
||||
// Insert the sample
|
||||
t.insert(&mut tx, &mut s, key.clone(), payload).await.unwrap();
|
||||
// Search for it
|
||||
black_box(t.search(&mut tx, &mut s, &key).await.unwrap());
|
||||
black_box(t.search_mut(&mut tx, &mut s, &key).await.unwrap());
|
||||
}
|
||||
s.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
}
|
||||
|
||||
|
|
|
@ -7,7 +7,8 @@ use std::sync::Arc;
|
|||
use std::time::Duration;
|
||||
use surrealdb::idx::docids::DocId;
|
||||
use surrealdb::idx::trees::mtree::{MState, MTree};
|
||||
use surrealdb::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType};
|
||||
use surrealdb::idx::trees::store::cache::TreeCache;
|
||||
use surrealdb::idx::trees::store::{TreeNodeProvider, TreeStore};
|
||||
use surrealdb::idx::trees::vector::Vector;
|
||||
use surrealdb::kvs::Datastore;
|
||||
use surrealdb::kvs::LockType::Optimistic;
|
||||
|
@ -16,19 +17,35 @@ use surrealdb::sql::index::Distance;
|
|||
use tokio::runtime::Runtime;
|
||||
|
||||
fn bench_index_mtree_dim_3(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 1_000, 100_000, 3, 120);
|
||||
bench_index_mtree(c, 1_000, 100_000, 3, 120, 100);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_3_full_cache(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 1_000, 100_000, 3, 120, 0);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_50(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 100, 10_000, 50, 20);
|
||||
bench_index_mtree(c, 100, 10_000, 50, 20, 100);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_50_full_cache(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 100, 10_000, 50, 20, 0);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_300(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 50, 5_000, 300, 40);
|
||||
bench_index_mtree(c, 50, 5_000, 300, 40, 100);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_300_full_cache(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 50, 5_000, 300, 40, 0);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_2048(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 10, 1_000, 2048, 60);
|
||||
bench_index_mtree(c, 10, 1_000, 2048, 60, 100);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_2048_full_cache(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 10, 1_000, 2048, 60, 0);
|
||||
}
|
||||
|
||||
fn bench_index_mtree(
|
||||
|
@ -37,6 +54,7 @@ fn bench_index_mtree(
|
|||
release_samples_len: usize,
|
||||
vector_dimension: usize,
|
||||
measurement_secs: u64,
|
||||
cache_size: usize,
|
||||
) {
|
||||
let samples_len = if cfg!(debug_assertions) {
|
||||
debug_samples_len // Debug is slow
|
||||
|
@ -50,22 +68,26 @@ fn bench_index_mtree(
|
|||
// Indexing benchmark group
|
||||
{
|
||||
let mut group = get_group(c, "index_mtree_insert", samples_len, measurement_secs);
|
||||
let id = format!("len_{}_dim_{}", samples_len, vector_dimension);
|
||||
let id = format!("len_{}_dim_{}_cache_{}", samples_len, vector_dimension, cache_size);
|
||||
group.bench_function(id, |b| {
|
||||
b.to_async(Runtime::new().unwrap())
|
||||
.iter(|| insert_objects(&ds, samples_len, vector_dimension));
|
||||
.iter(|| insert_objects(&ds, samples_len, vector_dimension, cache_size));
|
||||
});
|
||||
group.finish();
|
||||
}
|
||||
|
||||
// Knn lookup benchmark group
|
||||
{
|
||||
let mut group = get_group(c, "index_mtree_lookup", 100_000, 10);
|
||||
let mut group = get_group(c, "index_mtree_lookup", samples_len, 10);
|
||||
for knn in [1, 10] {
|
||||
let id = format!("knn_{}_len_{}_dim_{}", knn, samples_len, vector_dimension);
|
||||
let id = format!(
|
||||
"knn_{}_len_{}_dim_{}_cache_{}",
|
||||
knn, samples_len, vector_dimension, cache_size
|
||||
);
|
||||
group.bench_function(id, |b| {
|
||||
b.to_async(Runtime::new().unwrap())
|
||||
.iter(|| knn_lookup_objects(&ds, 100_000, vector_dimension, knn));
|
||||
b.to_async(Runtime::new().unwrap()).iter(|| {
|
||||
knn_lookup_objects(&ds, samples_len, vector_dimension, knn, cache_size)
|
||||
});
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
|
@ -96,26 +118,38 @@ fn mtree() -> MTree {
|
|||
MTree::new(MState::new(40), Distance::Euclidean)
|
||||
}
|
||||
|
||||
async fn insert_objects(ds: &Datastore, samples_size: usize, vector_size: usize) {
|
||||
async fn insert_objects(
|
||||
ds: &Datastore,
|
||||
samples_size: usize,
|
||||
vector_size: usize,
|
||||
cache_size: usize,
|
||||
) {
|
||||
let mut rng = thread_rng();
|
||||
let mut t = mtree();
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
let s = TreeNodeStore::new(TreeNodeProvider::Debug, TreeStoreType::Write, 20);
|
||||
let mut s = s.lock().await;
|
||||
let c = TreeCache::new(0, TreeNodeProvider::Debug, cache_size);
|
||||
let mut s = TreeStore::new(TreeNodeProvider::Debug, c.clone(), Write).await;
|
||||
for i in 0..samples_size {
|
||||
let object = random_object(&mut rng, vector_size);
|
||||
// Insert the sample
|
||||
t.insert(&mut tx, &mut s, object, i as DocId).await.unwrap();
|
||||
}
|
||||
s.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
}
|
||||
|
||||
async fn knn_lookup_objects(ds: &Datastore, samples_size: usize, vector_size: usize, knn: usize) {
|
||||
async fn knn_lookup_objects(
|
||||
ds: &Datastore,
|
||||
samples_size: usize,
|
||||
vector_size: usize,
|
||||
knn: usize,
|
||||
cache_size: usize,
|
||||
) {
|
||||
let mut rng = thread_rng();
|
||||
let t = mtree();
|
||||
let mut tx = ds.transaction(Read, Optimistic).await.unwrap();
|
||||
let s = TreeNodeStore::new(TreeNodeProvider::Debug, TreeStoreType::Read, 20);
|
||||
let mut s = s.lock().await;
|
||||
let c = TreeCache::new(0, TreeNodeProvider::Debug, cache_size);
|
||||
let mut s = TreeStore::new(TreeNodeProvider::Debug, c, Read).await;
|
||||
for _ in 0..samples_size {
|
||||
let object = Arc::new(random_object(&mut rng, vector_size));
|
||||
// Insert the sample
|
||||
|
@ -127,8 +161,12 @@ async fn knn_lookup_objects(ds: &Datastore, samples_size: usize, vector_size: us
|
|||
criterion_group!(
|
||||
benches,
|
||||
bench_index_mtree_dim_3,
|
||||
bench_index_mtree_dim_3_full_cache,
|
||||
bench_index_mtree_dim_50,
|
||||
bench_index_mtree_dim_50_full_cache,
|
||||
bench_index_mtree_dim_300,
|
||||
bench_index_mtree_dim_300_full_cache,
|
||||
bench_index_mtree_dim_2048,
|
||||
bench_index_mtree_dim_2048_full_cache
|
||||
);
|
||||
criterion_main!(benches);
|
||||
|
|
|
@ -6,6 +6,7 @@ use crate::dbs::capabilities::NetTarget;
|
|||
use crate::dbs::{Capabilities, Notification};
|
||||
use crate::err::Error;
|
||||
use crate::idx::planner::QueryPlanner;
|
||||
use crate::idx::trees::store::IndexStores;
|
||||
use crate::sql::value::Value;
|
||||
use channel::Sender;
|
||||
use std::borrow::Cow;
|
||||
|
@ -43,6 +44,8 @@ pub struct Context<'a> {
|
|||
notifications: Option<Sender<Notification>>,
|
||||
// An optional query planner
|
||||
query_planner: Option<&'a QueryPlanner<'a>>,
|
||||
// The index store
|
||||
index_stores: IndexStores,
|
||||
// Capabilities
|
||||
capabilities: Arc<Capabilities>,
|
||||
}
|
||||
|
@ -65,9 +68,29 @@ impl<'a> Debug for Context<'a> {
|
|||
}
|
||||
|
||||
impl<'a> Context<'a> {
|
||||
pub(crate) fn from_ds(
|
||||
time_out: Option<Duration>,
|
||||
capabilities: Capabilities,
|
||||
index_stores: IndexStores,
|
||||
) -> Context<'a> {
|
||||
let mut ctx = Self {
|
||||
values: HashMap::default(),
|
||||
parent: None,
|
||||
deadline: None,
|
||||
cancelled: Arc::new(AtomicBool::new(false)),
|
||||
notifications: None,
|
||||
query_planner: None,
|
||||
capabilities: Arc::new(capabilities),
|
||||
index_stores,
|
||||
};
|
||||
if let Some(timeout) = time_out {
|
||||
ctx.add_timeout(timeout);
|
||||
}
|
||||
ctx
|
||||
}
|
||||
/// Create an empty background context.
|
||||
pub fn background() -> Self {
|
||||
Context {
|
||||
Self {
|
||||
values: HashMap::default(),
|
||||
parent: None,
|
||||
deadline: None,
|
||||
|
@ -75,6 +98,7 @@ impl<'a> Context<'a> {
|
|||
notifications: None,
|
||||
query_planner: None,
|
||||
capabilities: Arc::new(Capabilities::default()),
|
||||
index_stores: IndexStores::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -88,6 +112,7 @@ impl<'a> Context<'a> {
|
|||
notifications: parent.notifications.clone(),
|
||||
query_planner: parent.query_planner,
|
||||
capabilities: parent.capabilities.clone(),
|
||||
index_stores: parent.index_stores.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -148,6 +173,11 @@ impl<'a> Context<'a> {
|
|||
self.query_planner
|
||||
}
|
||||
|
||||
/// Get the index_store for this context/ds
|
||||
pub(crate) fn get_index_stores(&self) -> &IndexStores {
|
||||
&self.index_stores
|
||||
}
|
||||
|
||||
/// Check if the context is done. If it returns `None` the operation may
|
||||
/// proceed, otherwise the operation should be stopped.
|
||||
pub fn done(&self) -> Option<Reason> {
|
||||
|
|
|
@ -409,7 +409,7 @@ impl Options {
|
|||
|
||||
/// Get current Node ID
|
||||
pub fn id(&self) -> Result<Uuid, Error> {
|
||||
self.id.ok_or(Error::Unreachable)
|
||||
self.id.ok_or(Error::Unreachable("Options::id"))
|
||||
}
|
||||
|
||||
/// Get currently selected NS
|
||||
|
|
|
@ -5,9 +5,9 @@ use crate::doc::{CursorDoc, Document};
|
|||
use crate::err::Error;
|
||||
use crate::idx::ft::FtIndex;
|
||||
use crate::idx::trees::mtree::MTreeIndex;
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::key;
|
||||
use crate::kvs::TransactionType;
|
||||
use crate::sql::array::Array;
|
||||
use crate::sql::index::{Index, MTreeParams, SearchParams};
|
||||
use crate::sql::statements::DefineIndexStatement;
|
||||
|
@ -53,7 +53,7 @@ impl<'a> Document<'a> {
|
|||
Index::Uniq => ic.index_unique(txn).await?,
|
||||
Index::Idx => ic.index_non_unique(txn).await?,
|
||||
Index::Search(p) => ic.index_full_text(ctx, txn, p).await?,
|
||||
Index::MTree(p) => ic.index_mtree(txn, p).await?,
|
||||
Index::MTree(p) => ic.index_mtree(ctx, txn, p).await?,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -335,7 +335,16 @@ impl<'a> IndexOperation<'a> {
|
|||
) -> Result<(), Error> {
|
||||
let ikb = IndexKeyBase::new(self.opt, self.ix);
|
||||
|
||||
let mut ft = FtIndex::new(self.opt, txn, &p.az, ikb, p, TreeStoreType::Write).await?;
|
||||
let mut ft = FtIndex::new(
|
||||
ctx.get_index_stores(),
|
||||
self.opt,
|
||||
txn,
|
||||
&p.az,
|
||||
ikb,
|
||||
p,
|
||||
TransactionType::Write,
|
||||
)
|
||||
.await?;
|
||||
|
||||
if let Some(n) = self.n.take() {
|
||||
ft.index_document(ctx, self.opt, txn, self.rid, n).await?;
|
||||
|
@ -345,10 +354,17 @@ impl<'a> IndexOperation<'a> {
|
|||
ft.finish(txn).await
|
||||
}
|
||||
|
||||
async fn index_mtree(&mut self, txn: &Transaction, p: &MTreeParams) -> Result<(), Error> {
|
||||
async fn index_mtree(
|
||||
&mut self,
|
||||
ctx: &Context<'_>,
|
||||
txn: &Transaction,
|
||||
p: &MTreeParams,
|
||||
) -> Result<(), Error> {
|
||||
let mut tx = txn.lock().await;
|
||||
let ikb = IndexKeyBase::new(self.opt, self.ix);
|
||||
let mut mt = MTreeIndex::new(&mut tx, ikb, p, TreeStoreType::Write).await?;
|
||||
let mut mt =
|
||||
MTreeIndex::new(ctx.get_index_stores(), &mut tx, ikb, p, TransactionType::Write)
|
||||
.await?;
|
||||
// Delete the old index data
|
||||
if let Some(o) = self.o.take() {
|
||||
mt.remove_document(&mut tx, self.rid, o).await?;
|
||||
|
|
|
@ -46,8 +46,8 @@ pub enum Error {
|
|||
RetryWithId(Thing),
|
||||
|
||||
/// The database encountered unreachable logic
|
||||
#[error("The database encountered unreachable logic")]
|
||||
Unreachable,
|
||||
#[error("The database encountered unreachable logic: {0}")]
|
||||
Unreachable(&'static str),
|
||||
|
||||
/// Statement has been deprecated
|
||||
#[error("{0}")]
|
||||
|
@ -619,8 +619,8 @@ pub enum Error {
|
|||
Revision(#[from] RevisionError),
|
||||
|
||||
/// The index has been found to be inconsistent
|
||||
#[error("Index is corrupted")]
|
||||
CorruptedIndex,
|
||||
#[error("Index is corrupted: {0}")]
|
||||
CorruptedIndex(&'static str),
|
||||
|
||||
/// The query planner did not find an index able to support the match @@ or knn <> operator for a given expression
|
||||
#[error("There was no suitable index supporting the expression '{value}'")]
|
||||
|
|
|
@ -1,14 +1,12 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::trees::bkeys::TrieKeys;
|
||||
use crate::idx::trees::btree::{BStatistics, BTree, BTreeNodeStore};
|
||||
use crate::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType};
|
||||
use crate::idx::trees::btree::{BStatistics, BTree, BTreeStore};
|
||||
use crate::idx::trees::store::{IndexStores, TreeNodeProvider};
|
||||
use crate::idx::{trees, IndexKeyBase, VersionedSerdeState};
|
||||
use crate::kvs::{Key, Transaction};
|
||||
use crate::kvs::{Key, Transaction, TransactionType};
|
||||
use revision::revisioned;
|
||||
use roaring::RoaringTreemap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
pub type DocId = u64;
|
||||
|
||||
|
@ -18,35 +16,41 @@ pub(crate) struct DocIds {
|
|||
state_key: Key,
|
||||
index_key_base: IndexKeyBase,
|
||||
btree: BTree<TrieKeys>,
|
||||
store: Arc<Mutex<BTreeNodeStore<TrieKeys>>>,
|
||||
store: BTreeStore<TrieKeys>,
|
||||
available_ids: Option<RoaringTreemap>,
|
||||
next_doc_id: DocId,
|
||||
updated: bool,
|
||||
}
|
||||
|
||||
impl DocIds {
|
||||
pub(in crate::idx) async fn new(
|
||||
ixs: &IndexStores,
|
||||
tx: &mut Transaction,
|
||||
index_key_base: IndexKeyBase,
|
||||
tt: TransactionType,
|
||||
ikb: IndexKeyBase,
|
||||
default_btree_order: u32,
|
||||
store_type: TreeStoreType,
|
||||
cache_size: u32,
|
||||
) -> Result<Self, Error> {
|
||||
let state_key: Key = index_key_base.new_bd_key(None);
|
||||
let state_key: Key = ikb.new_bd_key(None);
|
||||
let state: State = if let Some(val) = tx.get(state_key.clone()).await? {
|
||||
State::try_from_val(val)?
|
||||
} else {
|
||||
State::new(default_btree_order)
|
||||
};
|
||||
let store =
|
||||
TreeNodeStore::new(TreeNodeProvider::DocIds(index_key_base.clone()), store_type, 20);
|
||||
let store = ixs
|
||||
.get_store_btree_trie(
|
||||
TreeNodeProvider::DocIds(ikb.clone()),
|
||||
state.btree.generation(),
|
||||
tt,
|
||||
cache_size as usize,
|
||||
)
|
||||
.await;
|
||||
Ok(Self {
|
||||
state_key,
|
||||
index_key_base,
|
||||
index_key_base: ikb,
|
||||
btree: BTree::new(state.btree),
|
||||
store,
|
||||
available_ids: state.available_ids,
|
||||
next_doc_id: state.next_doc_id,
|
||||
updated: false,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -72,8 +76,7 @@ impl DocIds {
|
|||
tx: &mut Transaction,
|
||||
doc_key: Key,
|
||||
) -> Result<Option<DocId>, Error> {
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.search(tx, &mut store, &doc_key).await
|
||||
self.btree.search(tx, &self.store, &doc_key).await
|
||||
}
|
||||
|
||||
/// Returns the doc_id for the given doc_key.
|
||||
|
@ -84,16 +87,13 @@ impl DocIds {
|
|||
doc_key: Key,
|
||||
) -> Result<Resolved, Error> {
|
||||
{
|
||||
let mut store = self.store.lock().await;
|
||||
if let Some(doc_id) = self.btree.search(tx, &mut store, &doc_key).await? {
|
||||
if let Some(doc_id) = self.btree.search_mut(tx, &mut self.store, &doc_key).await? {
|
||||
return Ok(Resolved::Existing(doc_id));
|
||||
}
|
||||
}
|
||||
let doc_id = self.get_next_doc_id();
|
||||
tx.set(self.index_key_base.new_bi_key(doc_id), doc_key.clone()).await?;
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.insert(tx, &mut store, doc_key, doc_id).await?;
|
||||
self.updated = true;
|
||||
self.btree.insert(tx, &mut self.store, doc_key, doc_id).await?;
|
||||
Ok(Resolved::New(doc_id))
|
||||
}
|
||||
|
||||
|
@ -102,8 +102,7 @@ impl DocIds {
|
|||
tx: &mut Transaction,
|
||||
doc_key: Key,
|
||||
) -> Result<Option<DocId>, Error> {
|
||||
let mut store = self.store.lock().await;
|
||||
if let Some(doc_id) = self.btree.delete(tx, &mut store, doc_key).await? {
|
||||
if let Some(doc_id) = self.btree.delete(tx, &mut self.store, doc_key).await? {
|
||||
tx.del(self.index_key_base.new_bi_key(doc_id)).await?;
|
||||
if let Some(available_ids) = &mut self.available_ids {
|
||||
available_ids.insert(doc_id);
|
||||
|
@ -112,7 +111,6 @@ impl DocIds {
|
|||
available_ids.insert(doc_id);
|
||||
self.available_ids = Some(available_ids);
|
||||
}
|
||||
self.updated = true;
|
||||
Ok(Some(doc_id))
|
||||
} else {
|
||||
Ok(None)
|
||||
|
@ -136,15 +134,14 @@ impl DocIds {
|
|||
&self,
|
||||
tx: &mut Transaction,
|
||||
) -> Result<BStatistics, Error> {
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.statistics(tx, &mut store).await
|
||||
self.btree.statistics(tx, &self.store).await
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
|
||||
let updated = self.store.lock().await.finish(tx).await?;
|
||||
if self.updated || updated {
|
||||
if self.store.finish(tx).await? {
|
||||
let btree = self.btree.inc_generation().clone();
|
||||
let state = State {
|
||||
btree: self.btree.get_state().clone(),
|
||||
btree,
|
||||
available_ids: self.available_ids.take(),
|
||||
next_doc_id: self.next_doc_id,
|
||||
};
|
||||
|
@ -199,16 +196,18 @@ impl Resolved {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::idx::docids::{DocIds, Resolved};
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::{Datastore, LockType::*, Transaction, TransactionType::*};
|
||||
use crate::kvs::TransactionType::*;
|
||||
use crate::kvs::{Datastore, LockType::*, Transaction, TransactionType};
|
||||
|
||||
const BTREE_ORDER: u32 = 7;
|
||||
|
||||
async fn get_doc_ids(ds: &Datastore, store_type: TreeStoreType) -> (Transaction, DocIds) {
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
async fn new_operation(ds: &Datastore, tt: TransactionType) -> (Transaction, DocIds) {
|
||||
let mut tx = ds.transaction(tt, Optimistic).await.unwrap();
|
||||
let d =
|
||||
DocIds::new(&mut tx, IndexKeyBase::default(), BTREE_ORDER, store_type).await.unwrap();
|
||||
DocIds::new(ds.index_store(), &mut tx, tt, IndexKeyBase::default(), BTREE_ORDER, 100)
|
||||
.await
|
||||
.unwrap();
|
||||
(tx, d)
|
||||
}
|
||||
|
||||
|
@ -223,37 +222,43 @@ mod tests {
|
|||
|
||||
// Resolve a first doc key
|
||||
{
|
||||
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await;
|
||||
let (mut tx, mut d) = new_operation(&ds, Write).await;
|
||||
let doc_id = d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap();
|
||||
finish(tx, d).await;
|
||||
|
||||
let (mut tx, d) = new_operation(&ds, Read).await;
|
||||
assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 1);
|
||||
assert_eq!(d.get_doc_key(&mut tx, 0).await.unwrap(), Some("Foo".into()));
|
||||
finish(tx, d).await;
|
||||
assert_eq!(doc_id, Resolved::New(0));
|
||||
}
|
||||
|
||||
// Resolve the same doc key
|
||||
{
|
||||
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await;
|
||||
let (mut tx, mut d) = new_operation(&ds, Write).await;
|
||||
let doc_id = d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap();
|
||||
finish(tx, d).await;
|
||||
|
||||
let (mut tx, d) = new_operation(&ds, Read).await;
|
||||
assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 1);
|
||||
assert_eq!(d.get_doc_key(&mut tx, 0).await.unwrap(), Some("Foo".into()));
|
||||
finish(tx, d).await;
|
||||
assert_eq!(doc_id, Resolved::Existing(0));
|
||||
}
|
||||
|
||||
// Resolve another single doc key
|
||||
{
|
||||
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await;
|
||||
let (mut tx, mut d) = new_operation(&ds, Write).await;
|
||||
let doc_id = d.resolve_doc_id(&mut tx, "Bar".into()).await.unwrap();
|
||||
finish(tx, d).await;
|
||||
|
||||
let (mut tx, d) = new_operation(&ds, Read).await;
|
||||
assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 2);
|
||||
assert_eq!(d.get_doc_key(&mut tx, 1).await.unwrap(), Some("Bar".into()));
|
||||
finish(tx, d).await;
|
||||
assert_eq!(doc_id, Resolved::New(1));
|
||||
}
|
||||
|
||||
// Resolve another two existing doc keys and two new doc keys (interlaced)
|
||||
{
|
||||
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await;
|
||||
let (mut tx, mut d) = new_operation(&ds, Write).await;
|
||||
assert_eq!(
|
||||
d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap(),
|
||||
Resolved::Existing(0)
|
||||
|
@ -264,12 +269,13 @@ mod tests {
|
|||
Resolved::Existing(1)
|
||||
);
|
||||
assert_eq!(d.resolve_doc_id(&mut tx, "World".into()).await.unwrap(), Resolved::New(3));
|
||||
assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 4);
|
||||
finish(tx, d).await;
|
||||
let (mut tx, d) = new_operation(&ds, Read).await;
|
||||
assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 4);
|
||||
}
|
||||
|
||||
{
|
||||
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await;
|
||||
let (mut tx, mut d) = new_operation(&ds, Write).await;
|
||||
assert_eq!(
|
||||
d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap(),
|
||||
Resolved::Existing(0)
|
||||
|
@ -286,12 +292,13 @@ mod tests {
|
|||
d.resolve_doc_id(&mut tx, "World".into()).await.unwrap(),
|
||||
Resolved::Existing(3)
|
||||
);
|
||||
finish(tx, d).await;
|
||||
let (mut tx, d) = new_operation(&ds, Read).await;
|
||||
assert_eq!(d.get_doc_key(&mut tx, 0).await.unwrap(), Some("Foo".into()));
|
||||
assert_eq!(d.get_doc_key(&mut tx, 1).await.unwrap(), Some("Bar".into()));
|
||||
assert_eq!(d.get_doc_key(&mut tx, 2).await.unwrap(), Some("Hello".into()));
|
||||
assert_eq!(d.get_doc_key(&mut tx, 3).await.unwrap(), Some("World".into()));
|
||||
assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 4);
|
||||
finish(tx, d).await;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -301,7 +308,7 @@ mod tests {
|
|||
|
||||
// Create two docs
|
||||
{
|
||||
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await;
|
||||
let (mut tx, mut d) = new_operation(&ds, Write).await;
|
||||
assert_eq!(d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap(), Resolved::New(0));
|
||||
assert_eq!(d.resolve_doc_id(&mut tx, "Bar".into()).await.unwrap(), Resolved::New(1));
|
||||
finish(tx, d).await;
|
||||
|
@ -309,7 +316,7 @@ mod tests {
|
|||
|
||||
// Remove doc 1
|
||||
{
|
||||
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await;
|
||||
let (mut tx, mut d) = new_operation(&ds, Write).await;
|
||||
assert_eq!(d.remove_doc(&mut tx, "Dummy".into()).await.unwrap(), None);
|
||||
assert_eq!(d.remove_doc(&mut tx, "Foo".into()).await.unwrap(), Some(0));
|
||||
finish(tx, d).await;
|
||||
|
@ -317,21 +324,21 @@ mod tests {
|
|||
|
||||
// Check 'Foo' has been removed
|
||||
{
|
||||
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await;
|
||||
let (mut tx, mut d) = new_operation(&ds, Write).await;
|
||||
assert_eq!(d.remove_doc(&mut tx, "Foo".into()).await.unwrap(), None);
|
||||
finish(tx, d).await;
|
||||
}
|
||||
|
||||
// Insert a new doc - should take the available id 1
|
||||
{
|
||||
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await;
|
||||
let (mut tx, mut d) = new_operation(&ds, Write).await;
|
||||
assert_eq!(d.resolve_doc_id(&mut tx, "Hello".into()).await.unwrap(), Resolved::New(0));
|
||||
finish(tx, d).await;
|
||||
}
|
||||
|
||||
// Remove doc 2
|
||||
{
|
||||
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await;
|
||||
let (mut tx, mut d) = new_operation(&ds, Write).await;
|
||||
assert_eq!(d.remove_doc(&mut tx, "Dummy".into()).await.unwrap(), None);
|
||||
assert_eq!(d.remove_doc(&mut tx, "Bar".into()).await.unwrap(), Some(1));
|
||||
finish(tx, d).await;
|
||||
|
@ -339,14 +346,14 @@ mod tests {
|
|||
|
||||
// Check 'Bar' has been removed
|
||||
{
|
||||
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await;
|
||||
let (mut tx, mut d) = new_operation(&ds, Write).await;
|
||||
assert_eq!(d.remove_doc(&mut tx, "Foo".into()).await.unwrap(), None);
|
||||
finish(tx, d).await;
|
||||
}
|
||||
|
||||
// Insert a new doc - should take the available id 2
|
||||
{
|
||||
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await;
|
||||
let (mut tx, mut d) = new_operation(&ds, Write).await;
|
||||
assert_eq!(d.resolve_doc_id(&mut tx, "World".into()).await.unwrap(), Resolved::New(1));
|
||||
finish(tx, d).await;
|
||||
}
|
||||
|
|
|
@ -1,36 +1,42 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::trees::bkeys::TrieKeys;
|
||||
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeNodeStore, Payload};
|
||||
use crate::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType};
|
||||
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeStore, Payload};
|
||||
use crate::idx::trees::store::{IndexStores, TreeNodeProvider};
|
||||
use crate::idx::{IndexKeyBase, VersionedSerdeState};
|
||||
use crate::kvs::{Key, Transaction};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
use crate::kvs::{Key, Transaction, TransactionType};
|
||||
|
||||
pub(super) type DocLength = u64;
|
||||
|
||||
pub(super) struct DocLengths {
|
||||
state_key: Key,
|
||||
btree: BTree<TrieKeys>,
|
||||
store: Arc<Mutex<BTreeNodeStore<TrieKeys>>>,
|
||||
store: BTreeStore<TrieKeys>,
|
||||
}
|
||||
|
||||
impl DocLengths {
|
||||
pub(super) async fn new(
|
||||
ixs: &IndexStores,
|
||||
tx: &mut Transaction,
|
||||
index_key_base: IndexKeyBase,
|
||||
ikb: IndexKeyBase,
|
||||
default_btree_order: u32,
|
||||
store_type: TreeStoreType,
|
||||
tt: TransactionType,
|
||||
cache_size: u32,
|
||||
) -> Result<Self, Error> {
|
||||
let state_key: Key = index_key_base.new_bl_key(None);
|
||||
let state_key: Key = ikb.new_bl_key(None);
|
||||
let state: BState = if let Some(val) = tx.get(state_key.clone()).await? {
|
||||
BState::try_from_val(val)?
|
||||
} else {
|
||||
BState::new(default_btree_order)
|
||||
};
|
||||
let store =
|
||||
TreeNodeStore::new(TreeNodeProvider::DocLengths(index_key_base), store_type, 20);
|
||||
let store = ixs
|
||||
.get_store_btree_trie(
|
||||
TreeNodeProvider::DocLengths(ikb),
|
||||
state.generation(),
|
||||
tt,
|
||||
cache_size as usize,
|
||||
)
|
||||
.await;
|
||||
Ok(Self {
|
||||
state_key,
|
||||
btree: BTree::new(state),
|
||||
|
@ -43,8 +49,15 @@ impl DocLengths {
|
|||
tx: &mut Transaction,
|
||||
doc_id: DocId,
|
||||
) -> Result<Option<DocLength>, Error> {
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.search(tx, &mut store, &doc_id.to_be_bytes().to_vec()).await
|
||||
self.btree.search(tx, &self.store, &doc_id.to_be_bytes().to_vec()).await
|
||||
}
|
||||
|
||||
pub(super) async fn get_doc_length_mut(
|
||||
&mut self,
|
||||
tx: &mut Transaction,
|
||||
doc_id: DocId,
|
||||
) -> Result<Option<DocLength>, Error> {
|
||||
self.btree.search_mut(tx, &mut self.store, &doc_id.to_be_bytes().to_vec()).await
|
||||
}
|
||||
|
||||
pub(super) async fn set_doc_length(
|
||||
|
@ -53,8 +66,8 @@ impl DocLengths {
|
|||
doc_id: DocId,
|
||||
doc_length: DocLength,
|
||||
) -> Result<(), Error> {
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.insert(tx, &mut store, doc_id.to_be_bytes().to_vec(), doc_length).await
|
||||
self.btree.insert(tx, &mut self.store, doc_id.to_be_bytes().to_vec(), doc_length).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) async fn remove_doc_length(
|
||||
|
@ -62,18 +75,18 @@ impl DocLengths {
|
|||
tx: &mut Transaction,
|
||||
doc_id: DocId,
|
||||
) -> Result<Option<Payload>, Error> {
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.delete(tx, &mut store, doc_id.to_be_bytes().to_vec()).await
|
||||
self.btree.delete(tx, &mut self.store, doc_id.to_be_bytes().to_vec()).await
|
||||
}
|
||||
|
||||
pub(super) async fn statistics(&self, tx: &mut Transaction) -> Result<BStatistics, Error> {
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.statistics(tx, &mut store).await
|
||||
self.btree.statistics(tx, &self.store).await
|
||||
}
|
||||
|
||||
pub(super) async fn finish(&self, tx: &mut Transaction) -> Result<(), Error> {
|
||||
self.store.lock().await.finish(tx).await?;
|
||||
self.btree.get_state().finish(tx, &self.state_key).await?;
|
||||
pub(super) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
|
||||
if self.store.finish(tx).await? {
|
||||
let state = self.btree.inc_generation();
|
||||
tx.set(self.state_key.clone(), state.try_to_val()?).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -81,9 +94,26 @@ impl DocLengths {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::idx::ft::doclength::DocLengths;
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::{Datastore, LockType::*, TransactionType::*};
|
||||
use crate::kvs::{Datastore, LockType::*, Transaction, TransactionType};
|
||||
|
||||
async fn doc_length(
|
||||
ds: &Datastore,
|
||||
order: u32,
|
||||
tt: TransactionType,
|
||||
) -> (Transaction, DocLengths) {
|
||||
let mut tx = ds.transaction(TransactionType::Write, Optimistic).await.unwrap();
|
||||
let dl =
|
||||
DocLengths::new(ds.index_store(), &mut tx, IndexKeyBase::default(), order, tt, 100)
|
||||
.await
|
||||
.unwrap();
|
||||
(tx, dl)
|
||||
}
|
||||
|
||||
async fn finish(mut l: DocLengths, mut tx: Transaction) {
|
||||
l.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_doc_lengths() {
|
||||
|
@ -91,49 +121,58 @@ mod tests {
|
|||
|
||||
let ds = Datastore::new("memory").await.unwrap();
|
||||
|
||||
// Check empty state
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
let l = DocLengths::new(
|
||||
&mut tx,
|
||||
IndexKeyBase::default(),
|
||||
BTREE_ORDER,
|
||||
TreeStoreType::Traversal,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(l.statistics(&mut tx).await.unwrap().keys_count, 0);
|
||||
let dl = l.get_doc_length(&mut tx, 99).await.unwrap();
|
||||
assert_eq!(dl, None);
|
||||
{
|
||||
// Check empty state
|
||||
let (mut tx, l) = doc_length(&ds, BTREE_ORDER, TransactionType::Read).await;
|
||||
assert_eq!(l.statistics(&mut tx).await.unwrap().keys_count, 0);
|
||||
let dl = l.get_doc_length(&mut tx, 99).await.unwrap();
|
||||
assert_eq!(dl, None);
|
||||
tx.cancel().await.unwrap();
|
||||
}
|
||||
|
||||
// Set a doc length
|
||||
let mut l =
|
||||
DocLengths::new(&mut tx, IndexKeyBase::default(), BTREE_ORDER, TreeStoreType::Write)
|
||||
.await
|
||||
.unwrap();
|
||||
l.set_doc_length(&mut tx, 99, 199).await.unwrap();
|
||||
assert_eq!(l.statistics(&mut tx).await.unwrap().keys_count, 1);
|
||||
let dl = l.get_doc_length(&mut tx, 99).await.unwrap();
|
||||
l.finish(&mut tx).await.unwrap();
|
||||
assert_eq!(dl, Some(199));
|
||||
{
|
||||
// Set a doc length
|
||||
let (mut tx, mut l) = doc_length(&ds, BTREE_ORDER, TransactionType::Write).await;
|
||||
l.set_doc_length(&mut tx, 99, 199).await.unwrap();
|
||||
finish(l, tx).await;
|
||||
}
|
||||
|
||||
// Update doc length
|
||||
let mut l =
|
||||
DocLengths::new(&mut tx, IndexKeyBase::default(), BTREE_ORDER, TreeStoreType::Write)
|
||||
.await
|
||||
.unwrap();
|
||||
l.set_doc_length(&mut tx, 99, 299).await.unwrap();
|
||||
assert_eq!(l.statistics(&mut tx).await.unwrap().keys_count, 1);
|
||||
let dl = l.get_doc_length(&mut tx, 99).await.unwrap();
|
||||
l.finish(&mut tx).await.unwrap();
|
||||
assert_eq!(dl, Some(299));
|
||||
{
|
||||
let (mut tx, l) = doc_length(&ds, BTREE_ORDER, TransactionType::Read).await;
|
||||
assert_eq!(l.statistics(&mut tx).await.unwrap().keys_count, 1);
|
||||
let dl = l.get_doc_length(&mut tx, 99).await.unwrap();
|
||||
assert_eq!(dl, Some(199));
|
||||
tx.cancel().await.unwrap();
|
||||
}
|
||||
|
||||
// Remove doc lengths
|
||||
let mut l =
|
||||
DocLengths::new(&mut tx, IndexKeyBase::default(), BTREE_ORDER, TreeStoreType::Write)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(l.remove_doc_length(&mut tx, 99).await.unwrap(), Some(299));
|
||||
assert_eq!(l.remove_doc_length(&mut tx, 99).await.unwrap(), None);
|
||||
tx.commit().await.unwrap()
|
||||
{
|
||||
// Update doc length
|
||||
let (mut tx, mut l) = doc_length(&ds, BTREE_ORDER, TransactionType::Write).await;
|
||||
l.set_doc_length(&mut tx, 99, 299).await.unwrap();
|
||||
finish(l, tx).await;
|
||||
}
|
||||
|
||||
{
|
||||
let (mut tx, l) = doc_length(&ds, BTREE_ORDER, TransactionType::Read).await;
|
||||
assert_eq!(l.statistics(&mut tx).await.unwrap().keys_count, 1);
|
||||
let dl = l.get_doc_length(&mut tx, 99).await.unwrap();
|
||||
assert_eq!(dl, Some(299));
|
||||
tx.cancel().await.unwrap();
|
||||
}
|
||||
|
||||
{
|
||||
// Remove doc lengths
|
||||
let (mut tx, mut l) = doc_length(&ds, BTREE_ORDER, TransactionType::Write).await;
|
||||
assert_eq!(l.remove_doc_length(&mut tx, 99).await.unwrap(), Some(299));
|
||||
assert_eq!(l.remove_doc_length(&mut tx, 99).await.unwrap(), None);
|
||||
finish(l, tx).await;
|
||||
}
|
||||
|
||||
{
|
||||
let (mut tx, l) = doc_length(&ds, BTREE_ORDER, TransactionType::Read).await;
|
||||
let dl = l.get_doc_length(&mut tx, 99).await.unwrap();
|
||||
assert_eq!(dl, None);
|
||||
tx.cancel().await.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,10 +20,10 @@ use crate::idx::ft::scorer::BM25Scorer;
|
|||
use crate::idx::ft::termdocs::{TermDocs, TermsDocs};
|
||||
use crate::idx::ft::terms::{TermId, Terms};
|
||||
use crate::idx::trees::btree::BStatistics;
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::idx::trees::store::IndexStores;
|
||||
use crate::idx::{IndexKeyBase, VersionedSerdeState};
|
||||
use crate::kvs;
|
||||
use crate::kvs::Key;
|
||||
use crate::kvs::{Key, TransactionType};
|
||||
use crate::sql::index::SearchParams;
|
||||
use crate::sql::scoring::Scoring;
|
||||
use crate::sql::statements::DefineAnalyzerStatement;
|
||||
|
@ -97,24 +97,25 @@ impl VersionedSerdeState for State {}
|
|||
|
||||
impl FtIndex {
|
||||
pub(crate) async fn new(
|
||||
ixs: &IndexStores,
|
||||
opt: &Options,
|
||||
txn: &Transaction,
|
||||
az: &str,
|
||||
index_key_base: IndexKeyBase,
|
||||
p: &SearchParams,
|
||||
store_type: TreeStoreType,
|
||||
tt: TransactionType,
|
||||
) -> Result<Self, Error> {
|
||||
let mut tx = txn.lock().await;
|
||||
let az = tx.get_db_analyzer(opt.ns(), opt.db(), az).await?;
|
||||
Self::with_analyzer(&mut tx, az, index_key_base, p, store_type).await
|
||||
Self::with_analyzer(ixs, &mut tx, az, index_key_base, p, tt).await
|
||||
}
|
||||
|
||||
async fn with_analyzer(
|
||||
ixs: &IndexStores,
|
||||
run: &mut kvs::Transaction,
|
||||
az: DefineAnalyzerStatement,
|
||||
index_key_base: IndexKeyBase,
|
||||
p: &SearchParams,
|
||||
store_type: TreeStoreType,
|
||||
tt: TransactionType,
|
||||
) -> Result<Self, Error> {
|
||||
let state_key: Key = index_key_base.new_bs_key();
|
||||
let state: State = if let Some(val) = run.get(state_key.clone()).await? {
|
||||
|
@ -123,16 +124,26 @@ impl FtIndex {
|
|||
State::default()
|
||||
};
|
||||
let doc_ids = Arc::new(RwLock::new(
|
||||
DocIds::new(run, index_key_base.clone(), p.doc_ids_order, store_type).await?,
|
||||
DocIds::new(ixs, run, tt, index_key_base.clone(), p.doc_ids_order, p.doc_ids_cache)
|
||||
.await?,
|
||||
));
|
||||
let doc_lengths = Arc::new(RwLock::new(
|
||||
DocLengths::new(run, index_key_base.clone(), p.doc_lengths_order, store_type).await?,
|
||||
DocLengths::new(
|
||||
ixs,
|
||||
run,
|
||||
index_key_base.clone(),
|
||||
p.doc_lengths_order,
|
||||
tt,
|
||||
p.doc_lengths_cache,
|
||||
)
|
||||
.await?,
|
||||
));
|
||||
let postings = Arc::new(RwLock::new(
|
||||
Postings::new(run, index_key_base.clone(), p.postings_order, store_type).await?,
|
||||
Postings::new(ixs, run, index_key_base.clone(), p.postings_order, tt, p.postings_cache)
|
||||
.await?,
|
||||
));
|
||||
let terms = Arc::new(RwLock::new(
|
||||
Terms::new(run, index_key_base.clone(), p.terms_order, store_type).await?,
|
||||
Terms::new(ixs, run, index_key_base.clone(), p.terms_order, tt, p.terms_cache).await?,
|
||||
));
|
||||
let termdocs = TermDocs::new(index_key_base.clone());
|
||||
let offsets = Offsets::new(index_key_base.clone());
|
||||
|
@ -244,7 +255,7 @@ impl FtIndex {
|
|||
let mut tx = txn.lock().await;
|
||||
let mut dl = self.doc_lengths.write().await;
|
||||
if resolved.was_existing() {
|
||||
if let Some(old_doc_length) = dl.get_doc_length(&mut tx, doc_id).await? {
|
||||
if let Some(old_doc_length) = dl.get_doc_length_mut(&mut tx, doc_id).await? {
|
||||
self.state.total_docs_lengths -= old_doc_length as u128;
|
||||
}
|
||||
}
|
||||
|
@ -442,7 +453,7 @@ impl FtIndex {
|
|||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn finish(self, tx: &Transaction) -> Result<(), Error> {
|
||||
pub(crate) async fn finish(&self, tx: &Transaction) -> Result<(), Error> {
|
||||
let mut run = tx.lock().await;
|
||||
self.doc_ids.write().await.finish(&mut run).await?;
|
||||
self.doc_lengths.write().await.finish(&mut run).await?;
|
||||
|
@ -484,13 +495,12 @@ mod tests {
|
|||
use crate::dbs::{Options, Transaction};
|
||||
use crate::idx::ft::scorer::{BM25Scorer, Score};
|
||||
use crate::idx::ft::{FtIndex, HitsIterator};
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::{Datastore, LockType::*};
|
||||
use crate::kvs::{Datastore, LockType::*, TransactionType};
|
||||
use crate::sql::index::SearchParams;
|
||||
use crate::sql::scoring::Scoring;
|
||||
use crate::sql::statements::{DefineAnalyzerStatement, DefineStatement};
|
||||
use crate::sql::{Statement, Thing, Value};
|
||||
use crate::sql::{Array, Statement, Thing, Value};
|
||||
use crate::syn;
|
||||
use futures::lock::Mutex;
|
||||
use std::collections::HashMap;
|
||||
|
@ -537,16 +547,17 @@ mod tests {
|
|||
|
||||
pub(super) async fn tx_fti<'a>(
|
||||
ds: &Datastore,
|
||||
store_type: TreeStoreType,
|
||||
tt: TransactionType,
|
||||
az: &DefineAnalyzerStatement,
|
||||
order: u32,
|
||||
hl: bool,
|
||||
) -> (Context<'a>, Options, Transaction, FtIndex) {
|
||||
let write = matches!(store_type, TreeStoreType::Write);
|
||||
let tx = ds.transaction(write.into(), Optimistic).await.unwrap();
|
||||
let ctx = Context::default();
|
||||
let tx = ds.transaction(tt, Optimistic).await.unwrap();
|
||||
let txn = Arc::new(Mutex::new(tx));
|
||||
let mut tx = txn.lock().await;
|
||||
let fti = FtIndex::with_analyzer(
|
||||
ctx.get_index_stores(),
|
||||
&mut tx,
|
||||
az.clone(),
|
||||
IndexKeyBase::default(),
|
||||
|
@ -558,13 +569,17 @@ mod tests {
|
|||
terms_order: order,
|
||||
sc: Scoring::bm25(),
|
||||
hl,
|
||||
doc_ids_cache: 100,
|
||||
doc_lengths_cache: 100,
|
||||
postings_cache: 100,
|
||||
terms_cache: 100,
|
||||
},
|
||||
TreeStoreType::Write,
|
||||
tt,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
drop(tx);
|
||||
(Context::default(), Options::default(), txn, fti)
|
||||
(ctx, Options::default(), txn, fti)
|
||||
}
|
||||
|
||||
pub(super) async fn finish(txn: &Transaction, fti: FtIndex) {
|
||||
|
@ -589,7 +604,7 @@ mod tests {
|
|||
{
|
||||
// Add one document
|
||||
let (ctx, opt, txn, mut fti) =
|
||||
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await;
|
||||
tx_fti(&ds, TransactionType::Write, &az, btree_order, false).await;
|
||||
fti.index_document(&ctx, &opt, &txn, &doc1, vec![Value::from("hello the world")])
|
||||
.await
|
||||
.unwrap();
|
||||
|
@ -599,7 +614,7 @@ mod tests {
|
|||
{
|
||||
// Add two documents
|
||||
let (ctx, opt, txn, mut fti) =
|
||||
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await;
|
||||
tx_fti(&ds, TransactionType::Write, &az, btree_order, false).await;
|
||||
fti.index_document(&ctx, &opt, &txn, &doc2, vec![Value::from("a yellow hello")])
|
||||
.await
|
||||
.unwrap();
|
||||
|
@ -611,7 +626,7 @@ mod tests {
|
|||
|
||||
{
|
||||
let (ctx, opt, txn, fti) =
|
||||
tx_fti(&ds, TreeStoreType::Read, &az, btree_order, false).await;
|
||||
tx_fti(&ds, TransactionType::Read, &az, btree_order, false).await;
|
||||
// Check the statistics
|
||||
let statistics = fti.statistics(&txn).await.unwrap();
|
||||
assert_eq!(statistics.terms.keys_count, 7);
|
||||
|
@ -643,14 +658,14 @@ mod tests {
|
|||
{
|
||||
// Reindex one document
|
||||
let (ctx, opt, txn, mut fti) =
|
||||
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await;
|
||||
tx_fti(&ds, TransactionType::Write, &az, btree_order, false).await;
|
||||
fti.index_document(&ctx, &opt, &txn, &doc3, vec![Value::from("nobar foo")])
|
||||
.await
|
||||
.unwrap();
|
||||
finish(&txn, fti).await;
|
||||
|
||||
let (ctx, opt, txn, fti) =
|
||||
tx_fti(&ds, TreeStoreType::Read, &az, btree_order, false).await;
|
||||
tx_fti(&ds, TransactionType::Read, &az, btree_order, false).await;
|
||||
|
||||
// We can still find 'foo'
|
||||
let (hits, scr) = search(&ctx, &opt, &txn, &fti, "foo").await;
|
||||
|
@ -668,7 +683,7 @@ mod tests {
|
|||
{
|
||||
// Remove documents
|
||||
let (_, _, txn, mut fti) =
|
||||
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await;
|
||||
tx_fti(&ds, TransactionType::Write, &az, btree_order, false).await;
|
||||
fti.remove_document(&txn, &doc1).await.unwrap();
|
||||
fti.remove_document(&txn, &doc2).await.unwrap();
|
||||
fti.remove_document(&txn, &doc3).await.unwrap();
|
||||
|
@ -677,7 +692,7 @@ mod tests {
|
|||
|
||||
{
|
||||
let (ctx, opt, txn, fti) =
|
||||
tx_fti(&ds, TreeStoreType::Read, &az, btree_order, false).await;
|
||||
tx_fti(&ds, TransactionType::Read, &az, btree_order, false).await;
|
||||
let (hits, _) = search(&ctx, &opt, &txn, &fti, "hello").await;
|
||||
assert!(hits.is_none());
|
||||
let (hits, _) = search(&ctx, &opt, &txn, &fti, "foo").await;
|
||||
|
@ -705,7 +720,7 @@ mod tests {
|
|||
let btree_order = 5;
|
||||
{
|
||||
let (ctx, opt, txn, mut fti) =
|
||||
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, hl).await;
|
||||
tx_fti(&ds, TransactionType::Write, &az, btree_order, hl).await;
|
||||
fti.index_document(
|
||||
&ctx,
|
||||
&opt,
|
||||
|
@ -747,7 +762,7 @@ mod tests {
|
|||
|
||||
{
|
||||
let (ctx, opt, txn, fti) =
|
||||
tx_fti(&ds, TreeStoreType::Read, &az, btree_order, hl).await;
|
||||
tx_fti(&ds, TransactionType::Read, &az, btree_order, hl).await;
|
||||
|
||||
let statistics = fti.statistics(&txn).await.unwrap();
|
||||
assert_eq!(statistics.terms.keys_count, 17);
|
||||
|
@ -815,4 +830,80 @@ mod tests {
|
|||
async fn test_ft_index_bm_25_with_highlighting() {
|
||||
test_ft_index_bm_25(true).await;
|
||||
}
|
||||
|
||||
async fn concurrent_task(ds: Arc<Datastore>, az: DefineAnalyzerStatement) {
|
||||
let btree_order = 5;
|
||||
let doc1: Thing = ("t", "doc1").into();
|
||||
let content1 = Value::from(Array::from(vec!["Enter a search term", "Welcome", "Docusaurus blogging features are powered by the blog plugin.", "Simply add Markdown files (or folders) to the blog directory.", "blog", "Regular blog authors can be added to authors.yml.", "authors.yml", "The blog post date can be extracted from filenames, such as:", "2019-05-30-welcome.md", "2019-05-30-welcome/index.md", "A blog post folder can be convenient to co-locate blog post images:", "The blog supports tags as well!", "And if you don't want a blog: just delete this directory, and use blog: false in your Docusaurus config.", "blog: false", "MDX Blog Post", "Blog posts support Docusaurus Markdown features, such as MDX.", "Use the power of React to create interactive blog posts.", "Long Blog Post", "This is the summary of a very long blog post,", "Use a <!-- truncate --> comment to limit blog post size in the list view.", "<!--", "truncate", "-->", "First Blog Post", "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet"]));
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
while start.elapsed().as_secs() < 3 {
|
||||
remove_insert_task(ds.as_ref(), &az, btree_order, &doc1, &content1).await;
|
||||
}
|
||||
}
|
||||
#[test(tokio::test)]
|
||||
async fn concurrent_test() {
|
||||
let ds = Arc::new(Datastore::new("memory").await.unwrap());
|
||||
let mut q = syn::parse("DEFINE ANALYZER test TOKENIZERS blank;").unwrap();
|
||||
let Statement::Define(DefineStatement::Analyzer(az)) = q.0 .0.pop().unwrap() else {
|
||||
panic!()
|
||||
};
|
||||
concurrent_task(ds.clone(), az.clone()).await;
|
||||
let task1 = tokio::spawn(concurrent_task(ds.clone(), az.clone()));
|
||||
let task2 = tokio::spawn(concurrent_task(ds.clone(), az.clone()));
|
||||
let _ = tokio::try_join!(task1, task2).expect("Tasks failed");
|
||||
}
|
||||
|
||||
async fn remove_insert_task(
|
||||
ds: &Datastore,
|
||||
az: &DefineAnalyzerStatement,
|
||||
btree_order: u32,
|
||||
rid: &Thing,
|
||||
content: &Value,
|
||||
) {
|
||||
let (ctx, opt, txn, mut fti) =
|
||||
tx_fti(ds, TransactionType::Write, &az, btree_order, false).await;
|
||||
fti.remove_document(&txn, &rid).await.unwrap();
|
||||
fti.index_document(&ctx, &opt, &txn, &rid, vec![content.clone()]).await.unwrap();
|
||||
finish(&txn, fti).await;
|
||||
}
|
||||
|
||||
#[test(tokio::test)]
|
||||
async fn remove_insert_sequence() {
|
||||
let ds = Datastore::new("memory").await.unwrap();
|
||||
let mut q = syn::parse("DEFINE ANALYZER test TOKENIZERS blank;").unwrap();
|
||||
let Statement::Define(DefineStatement::Analyzer(az)) = q.0 .0.pop().unwrap() else {
|
||||
panic!()
|
||||
};
|
||||
let doc: Thing = ("t", "doc1").into();
|
||||
let content = Value::from(Array::from(vec!["Enter a search term","Welcome","Docusaurus blogging features are powered by the blog plugin.","Simply add Markdown files (or folders) to the blog directory.","blog","Regular blog authors can be added to authors.yml.","authors.yml","The blog post date can be extracted from filenames, such as:","2019-05-30-welcome.md","2019-05-30-welcome/index.md","A blog post folder can be convenient to co-locate blog post images:","The blog supports tags as well!","And if you don't want a blog: just delete this directory, and use blog: false in your Docusaurus config.","blog: false","MDX Blog Post","Blog posts support Docusaurus Markdown features, such as MDX.","Use the power of React to create interactive blog posts.","Long Blog Post","This is the summary of a very long blog post,","Use a <!-- truncate --> comment to limit blog post size in the list view.","<!--","truncate","-->","First Blog Post","Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet"]));
|
||||
|
||||
for i in 0..5 {
|
||||
debug!("Attempt {i}");
|
||||
{
|
||||
let (ctx, opt, txn, mut fti) =
|
||||
tx_fti(&ds, TransactionType::Write, &az, 5, false).await;
|
||||
fti.index_document(&ctx, &opt, &txn, &doc, vec![content.clone()]).await.unwrap();
|
||||
finish(&txn, fti).await;
|
||||
}
|
||||
|
||||
{
|
||||
let (_, _, txn, fti) = tx_fti(&ds, TransactionType::Read, &az, 5, false).await;
|
||||
let s = fti.statistics(&txn).await.unwrap();
|
||||
assert_eq!(s.terms.keys_count, 113);
|
||||
}
|
||||
|
||||
{
|
||||
let (_, _, txn, mut fti) = tx_fti(&ds, TransactionType::Write, &az, 5, false).await;
|
||||
fti.remove_document(&txn, &doc).await.unwrap();
|
||||
finish(&txn, fti).await;
|
||||
}
|
||||
|
||||
{
|
||||
let (_, _, txn, fti) = tx_fti(&ds, TransactionType::Read, &az, 5, false).await;
|
||||
let s = fti.statistics(&txn).await.unwrap();
|
||||
assert_eq!(s.terms.keys_count, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -109,16 +109,16 @@ impl TryFrom<Val> for OffsetRecords {
|
|||
}
|
||||
let decompressed: Vec<u32> = bincode::deserialize(&val)?;
|
||||
let mut iter = decompressed.iter();
|
||||
let s = *iter.next().ok_or(Error::CorruptedIndex)?;
|
||||
let s = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(1)"))?;
|
||||
let mut indexes = Vec::with_capacity(s as usize);
|
||||
for _ in 0..s {
|
||||
let index = *iter.next().ok_or(Error::CorruptedIndex)?;
|
||||
let index = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(2)"))?;
|
||||
indexes.push(index);
|
||||
}
|
||||
let mut res = Vec::with_capacity(s as usize);
|
||||
for index in indexes {
|
||||
let start = *iter.next().ok_or(Error::CorruptedIndex)?;
|
||||
let end = *iter.next().ok_or(Error::CorruptedIndex)?;
|
||||
let start = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(3)"))?;
|
||||
let end = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(4)"))?;
|
||||
res.push(Offset::new(index, start, end));
|
||||
}
|
||||
Ok(OffsetRecords(res))
|
||||
|
|
|
@ -2,12 +2,10 @@ use crate::err::Error;
|
|||
use crate::idx::docids::DocId;
|
||||
use crate::idx::ft::terms::TermId;
|
||||
use crate::idx::trees::bkeys::TrieKeys;
|
||||
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeNodeStore};
|
||||
use crate::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType};
|
||||
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeStore};
|
||||
use crate::idx::trees::store::{IndexStores, TreeNodeProvider};
|
||||
use crate::idx::{IndexKeyBase, VersionedSerdeState};
|
||||
use crate::kvs::{Key, Transaction};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
use crate::kvs::{Key, Transaction, TransactionType};
|
||||
|
||||
pub(super) type TermFrequency = u64;
|
||||
|
||||
|
@ -15,15 +13,17 @@ pub(super) struct Postings {
|
|||
state_key: Key,
|
||||
index_key_base: IndexKeyBase,
|
||||
btree: BTree<TrieKeys>,
|
||||
store: Arc<Mutex<BTreeNodeStore<TrieKeys>>>,
|
||||
store: BTreeStore<TrieKeys>,
|
||||
}
|
||||
|
||||
impl Postings {
|
||||
pub(super) async fn new(
|
||||
ixs: &IndexStores,
|
||||
tx: &mut Transaction,
|
||||
index_key_base: IndexKeyBase,
|
||||
order: u32,
|
||||
store_type: TreeStoreType,
|
||||
tt: TransactionType,
|
||||
cache_size: u32,
|
||||
) -> Result<Self, Error> {
|
||||
let state_key: Key = index_key_base.new_bp_key(None);
|
||||
let state: BState = if let Some(val) = tx.get(state_key.clone()).await? {
|
||||
|
@ -31,8 +31,14 @@ impl Postings {
|
|||
} else {
|
||||
BState::new(order)
|
||||
};
|
||||
let store =
|
||||
TreeNodeStore::new(TreeNodeProvider::Postings(index_key_base.clone()), store_type, 20);
|
||||
let store = ixs
|
||||
.get_store_btree_trie(
|
||||
TreeNodeProvider::Postings(index_key_base.clone()),
|
||||
state.generation(),
|
||||
tt,
|
||||
cache_size as usize,
|
||||
)
|
||||
.await;
|
||||
Ok(Self {
|
||||
state_key,
|
||||
index_key_base,
|
||||
|
@ -49,8 +55,7 @@ impl Postings {
|
|||
term_freq: TermFrequency,
|
||||
) -> Result<(), Error> {
|
||||
let key = self.index_key_base.new_bf_key(term_id, doc_id);
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.insert(tx, &mut store, key, term_freq).await
|
||||
self.btree.insert(tx, &mut self.store, key, term_freq).await
|
||||
}
|
||||
|
||||
pub(super) async fn get_term_frequency(
|
||||
|
@ -60,8 +65,7 @@ impl Postings {
|
|||
doc_id: DocId,
|
||||
) -> Result<Option<TermFrequency>, Error> {
|
||||
let key = self.index_key_base.new_bf_key(term_id, doc_id);
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.search(tx, &mut store, &key).await
|
||||
self.btree.search(tx, &self.store, &key).await
|
||||
}
|
||||
|
||||
pub(super) async fn remove_posting(
|
||||
|
@ -71,18 +75,18 @@ impl Postings {
|
|||
doc_id: DocId,
|
||||
) -> Result<Option<TermFrequency>, Error> {
|
||||
let key = self.index_key_base.new_bf_key(term_id, doc_id);
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.delete(tx, &mut store, key).await
|
||||
self.btree.delete(tx, &mut self.store, key).await
|
||||
}
|
||||
|
||||
pub(super) async fn statistics(&self, tx: &mut Transaction) -> Result<BStatistics, Error> {
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.statistics(tx, &mut store).await
|
||||
self.btree.statistics(tx, &self.store).await
|
||||
}
|
||||
|
||||
pub(super) async fn finish(&self, tx: &mut Transaction) -> Result<(), Error> {
|
||||
self.store.lock().await.finish(tx).await?;
|
||||
self.btree.get_state().finish(tx, &self.state_key).await?;
|
||||
pub(super) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
|
||||
if self.store.finish(tx).await? {
|
||||
let state = self.btree.inc_generation();
|
||||
tx.set(self.state_key.clone(), state.try_to_val()?).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -90,58 +94,65 @@ impl Postings {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::idx::ft::postings::Postings;
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::{Datastore, LockType::*, TransactionType::*};
|
||||
use crate::kvs::{Datastore, LockType::*, Transaction, TransactionType, TransactionType::*};
|
||||
use test_log::test;
|
||||
|
||||
async fn new_operation(
|
||||
ds: &Datastore,
|
||||
order: u32,
|
||||
tt: TransactionType,
|
||||
) -> (Transaction, Postings) {
|
||||
let mut tx = ds.transaction(tt, Optimistic).await.unwrap();
|
||||
let p = Postings::new(ds.index_store(), &mut tx, IndexKeyBase::default(), order, tt, 100)
|
||||
.await
|
||||
.unwrap();
|
||||
(tx, p)
|
||||
}
|
||||
|
||||
async fn finish(mut tx: Transaction, mut p: Postings) {
|
||||
p.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
}
|
||||
|
||||
#[test(tokio::test)]
|
||||
async fn test_postings() {
|
||||
const DEFAULT_BTREE_ORDER: u32 = 5;
|
||||
|
||||
let ds = Datastore::new("memory").await.unwrap();
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
// Check empty state
|
||||
let mut p = Postings::new(
|
||||
&mut tx,
|
||||
IndexKeyBase::default(),
|
||||
DEFAULT_BTREE_ORDER,
|
||||
TreeStoreType::Write,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(p.statistics(&mut tx).await.unwrap().keys_count, 0);
|
||||
{
|
||||
// Check empty state
|
||||
let (tx, p) = new_operation(&ds, DEFAULT_BTREE_ORDER, Write).await;
|
||||
finish(tx, p).await;
|
||||
|
||||
p.update_posting(&mut tx, 1, 2, 3).await.unwrap();
|
||||
p.update_posting(&mut tx, 1, 4, 5).await.unwrap();
|
||||
let (mut tx, p) = new_operation(&ds, DEFAULT_BTREE_ORDER, Read).await;
|
||||
assert_eq!(p.statistics(&mut tx).await.unwrap().keys_count, 0);
|
||||
|
||||
p.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
// Add postings
|
||||
let (mut tx, mut p) = new_operation(&ds, DEFAULT_BTREE_ORDER, Write).await;
|
||||
p.update_posting(&mut tx, 1, 2, 3).await.unwrap();
|
||||
p.update_posting(&mut tx, 1, 4, 5).await.unwrap();
|
||||
finish(tx, p).await;
|
||||
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
let mut p = Postings::new(
|
||||
&mut tx,
|
||||
IndexKeyBase::default(),
|
||||
DEFAULT_BTREE_ORDER,
|
||||
TreeStoreType::Write,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(p.statistics(&mut tx).await.unwrap().keys_count, 2);
|
||||
let (mut tx, p) = new_operation(&ds, DEFAULT_BTREE_ORDER, Read).await;
|
||||
assert_eq!(p.statistics(&mut tx).await.unwrap().keys_count, 2);
|
||||
|
||||
assert_eq!(p.get_term_frequency(&mut tx, 1, 2).await.unwrap(), Some(3));
|
||||
assert_eq!(p.get_term_frequency(&mut tx, 1, 4).await.unwrap(), Some(5));
|
||||
assert_eq!(p.get_term_frequency(&mut tx, 1, 2).await.unwrap(), Some(3));
|
||||
assert_eq!(p.get_term_frequency(&mut tx, 1, 4).await.unwrap(), Some(5));
|
||||
|
||||
// Check removal of doc 2
|
||||
assert_eq!(p.remove_posting(&mut tx, 1, 2).await.unwrap(), Some(3));
|
||||
// Again the same
|
||||
assert_eq!(p.remove_posting(&mut tx, 1, 2).await.unwrap(), None);
|
||||
// Remove doc 4
|
||||
assert_eq!(p.remove_posting(&mut tx, 1, 4).await.unwrap(), Some(5));
|
||||
let (mut tx, mut p) = new_operation(&ds, DEFAULT_BTREE_ORDER, Write).await;
|
||||
// Check removal of doc 2
|
||||
assert_eq!(p.remove_posting(&mut tx, 1, 2).await.unwrap(), Some(3));
|
||||
// Again the same
|
||||
assert_eq!(p.remove_posting(&mut tx, 1, 2).await.unwrap(), None);
|
||||
// Remove doc 4
|
||||
assert_eq!(p.remove_posting(&mut tx, 1, 4).await.unwrap(), Some(5));
|
||||
finish(tx, p).await;
|
||||
|
||||
// The underlying b-tree should be empty now
|
||||
assert_eq!(p.statistics(&mut tx).await.unwrap().keys_count, 0);
|
||||
tx.commit().await.unwrap();
|
||||
// The underlying b-tree should be empty now
|
||||
let (mut tx, p) = new_operation(&ds, DEFAULT_BTREE_ORDER, Read).await;
|
||||
assert_eq!(p.statistics(&mut tx).await.unwrap().keys_count, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,32 +1,32 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::trees::bkeys::FstKeys;
|
||||
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeNodeStore};
|
||||
use crate::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType};
|
||||
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeStore};
|
||||
use crate::idx::trees::store::{IndexStores, TreeNodeProvider};
|
||||
use crate::idx::{IndexKeyBase, VersionedSerdeState};
|
||||
use crate::kvs::{Key, Transaction};
|
||||
use crate::kvs::{Key, Transaction, TransactionType};
|
||||
use revision::revisioned;
|
||||
use roaring::RoaringTreemap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
pub(crate) type TermId = u64;
|
||||
|
||||
pub(super) struct Terms {
|
||||
state_key: Key,
|
||||
index_key_base: IndexKeyBase,
|
||||
btree: BTree<FstKeys>,
|
||||
store: Arc<Mutex<BTreeNodeStore<FstKeys>>>,
|
||||
store: BTreeStore<FstKeys>,
|
||||
available_ids: Option<RoaringTreemap>,
|
||||
next_term_id: TermId,
|
||||
updated: bool,
|
||||
}
|
||||
|
||||
impl Terms {
|
||||
pub(super) async fn new(
|
||||
ixs: &IndexStores,
|
||||
tx: &mut Transaction,
|
||||
index_key_base: IndexKeyBase,
|
||||
default_btree_order: u32,
|
||||
store_type: TreeStoreType,
|
||||
tt: TransactionType,
|
||||
cache_size: u32,
|
||||
) -> Result<Self, Error> {
|
||||
let state_key: Key = index_key_base.new_bt_key(None);
|
||||
let state: State = if let Some(val) = tx.get(state_key.clone()).await? {
|
||||
|
@ -34,8 +34,14 @@ impl Terms {
|
|||
} else {
|
||||
State::new(default_btree_order)
|
||||
};
|
||||
let store =
|
||||
TreeNodeStore::new(TreeNodeProvider::Terms(index_key_base.clone()), store_type, 20);
|
||||
let store = ixs
|
||||
.get_store_btree_fst(
|
||||
TreeNodeProvider::Terms(index_key_base.clone()),
|
||||
state.btree.generation(),
|
||||
tt,
|
||||
cache_size as usize,
|
||||
)
|
||||
.await;
|
||||
Ok(Self {
|
||||
state_key,
|
||||
index_key_base,
|
||||
|
@ -43,7 +49,6 @@ impl Terms {
|
|||
store,
|
||||
available_ids: state.available_ids,
|
||||
next_term_id: state.next_term_id,
|
||||
updated: false,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -71,16 +76,13 @@ impl Terms {
|
|||
) -> Result<TermId, Error> {
|
||||
let term_key = term.into();
|
||||
{
|
||||
let mut store = self.store.lock().await;
|
||||
if let Some(term_id) = self.btree.search(tx, &mut store, &term_key).await? {
|
||||
if let Some(term_id) = self.btree.search_mut(tx, &mut self.store, &term_key).await? {
|
||||
return Ok(term_id);
|
||||
}
|
||||
}
|
||||
let term_id = self.get_next_term_id();
|
||||
tx.set(self.index_key_base.new_bu_key(term_id), term_key.clone()).await?;
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.insert(tx, &mut store, term_key, term_id).await?;
|
||||
self.updated = true;
|
||||
self.btree.insert(tx, &mut self.store, term_key, term_id).await?;
|
||||
Ok(term_id)
|
||||
}
|
||||
|
||||
|
@ -89,8 +91,7 @@ impl Terms {
|
|||
tx: &mut Transaction,
|
||||
term: &str,
|
||||
) -> Result<Option<TermId>, Error> {
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.search(tx, &mut store, &term.into()).await
|
||||
self.btree.search(tx, &self.store, &term.into()).await
|
||||
}
|
||||
|
||||
pub(super) async fn remove_term_id(
|
||||
|
@ -100,8 +101,7 @@ impl Terms {
|
|||
) -> Result<(), Error> {
|
||||
let term_id_key = self.index_key_base.new_bu_key(term_id);
|
||||
if let Some(term_key) = tx.get(term_id_key.clone()).await? {
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.delete(tx, &mut store, term_key.clone()).await?;
|
||||
self.btree.delete(tx, &mut self.store, term_key.clone()).await?;
|
||||
tx.del(term_id_key).await?;
|
||||
if let Some(available_ids) = &mut self.available_ids {
|
||||
available_ids.insert(term_id);
|
||||
|
@ -110,21 +110,19 @@ impl Terms {
|
|||
available_ids.insert(term_id);
|
||||
self.available_ids = Some(available_ids);
|
||||
}
|
||||
self.updated = true;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) async fn statistics(&self, tx: &mut Transaction) -> Result<BStatistics, Error> {
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.statistics(tx, &mut store).await
|
||||
self.btree.statistics(tx, &self.store).await
|
||||
}
|
||||
|
||||
pub(super) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
|
||||
let updated = self.store.lock().await.finish(tx).await?;
|
||||
if self.updated || updated {
|
||||
if self.store.finish(tx).await? {
|
||||
let btree = self.btree.inc_generation().clone();
|
||||
let state = State {
|
||||
btree: self.btree.get_state().clone(),
|
||||
btree,
|
||||
available_ids: self.available_ids.take(),
|
||||
next_term_id: self.next_term_id,
|
||||
};
|
||||
|
@ -158,11 +156,12 @@ impl State {
|
|||
mod tests {
|
||||
use crate::idx::ft::postings::TermFrequency;
|
||||
use crate::idx::ft::terms::Terms;
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::{Datastore, LockType::*, TransactionType::*};
|
||||
use crate::kvs::TransactionType::{Read, Write};
|
||||
use crate::kvs::{Datastore, LockType::*, Transaction, TransactionType};
|
||||
use rand::{thread_rng, Rng};
|
||||
use std::collections::HashSet;
|
||||
use test_log::test;
|
||||
|
||||
fn random_term(key_length: usize) -> String {
|
||||
thread_rng()
|
||||
|
@ -180,97 +179,106 @@ mod tests {
|
|||
set
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn new_operation(
|
||||
ds: &Datastore,
|
||||
order: u32,
|
||||
tt: TransactionType,
|
||||
) -> (Transaction, Terms) {
|
||||
let mut tx = ds.transaction(tt, Optimistic).await.unwrap();
|
||||
let t = Terms::new(ds.index_store(), &mut tx, IndexKeyBase::default(), order, tt, 100)
|
||||
.await
|
||||
.unwrap();
|
||||
(tx, t)
|
||||
}
|
||||
|
||||
async fn finish(mut tx: Transaction, mut t: Terms) {
|
||||
t.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
}
|
||||
|
||||
#[test(tokio::test)]
|
||||
async fn test_resolve_terms() {
|
||||
const BTREE_ORDER: u32 = 7;
|
||||
|
||||
let idx = IndexKeyBase::default();
|
||||
|
||||
let ds = Datastore::new("memory").await.unwrap();
|
||||
|
||||
{
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
let mut t =
|
||||
Terms::new(&mut tx, idx.clone(), BTREE_ORDER, TreeStoreType::Write).await.unwrap();
|
||||
t.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
// Empty operation
|
||||
let (tx, t) = new_operation(&ds, BTREE_ORDER, Write).await;
|
||||
finish(tx, t).await;
|
||||
}
|
||||
|
||||
// Resolve a first term
|
||||
{
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
let mut t =
|
||||
Terms::new(&mut tx, idx.clone(), BTREE_ORDER, TreeStoreType::Write).await.unwrap();
|
||||
let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
|
||||
assert_eq!(t.resolve_term_id(&mut tx, "C").await.unwrap(), 0);
|
||||
finish(tx, t).await;
|
||||
let (mut tx, t) = new_operation(&ds, BTREE_ORDER, Read).await;
|
||||
assert_eq!(t.statistics(&mut tx).await.unwrap().keys_count, 1);
|
||||
t.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
}
|
||||
|
||||
// Resolve a second term
|
||||
{
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
let mut t =
|
||||
Terms::new(&mut tx, idx.clone(), BTREE_ORDER, TreeStoreType::Write).await.unwrap();
|
||||
let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
|
||||
assert_eq!(t.resolve_term_id(&mut tx, "D").await.unwrap(), 1);
|
||||
finish(tx, t).await;
|
||||
let (mut tx, t) = new_operation(&ds, BTREE_ORDER, Read).await;
|
||||
assert_eq!(t.statistics(&mut tx).await.unwrap().keys_count, 2);
|
||||
t.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
}
|
||||
|
||||
// Resolve two existing terms with new frequencies
|
||||
{
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
let mut t =
|
||||
Terms::new(&mut tx, idx.clone(), BTREE_ORDER, TreeStoreType::Write).await.unwrap();
|
||||
let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
|
||||
assert_eq!(t.resolve_term_id(&mut tx, "C").await.unwrap(), 0);
|
||||
assert_eq!(t.resolve_term_id(&mut tx, "D").await.unwrap(), 1);
|
||||
finish(tx, t).await;
|
||||
|
||||
let (mut tx, t) = new_operation(&ds, BTREE_ORDER, Read).await;
|
||||
assert_eq!(t.statistics(&mut tx).await.unwrap().keys_count, 2);
|
||||
t.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
}
|
||||
|
||||
// Resolve one existing terms and two new terms
|
||||
{
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
let mut t =
|
||||
Terms::new(&mut tx, idx.clone(), BTREE_ORDER, TreeStoreType::Write).await.unwrap();
|
||||
|
||||
let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
|
||||
assert_eq!(t.resolve_term_id(&mut tx, "A").await.unwrap(), 2);
|
||||
assert_eq!(t.resolve_term_id(&mut tx, "C").await.unwrap(), 0);
|
||||
assert_eq!(t.resolve_term_id(&mut tx, "E").await.unwrap(), 3);
|
||||
finish(tx, t).await;
|
||||
|
||||
let (mut tx, t) = new_operation(&ds, BTREE_ORDER, Read).await;
|
||||
assert_eq!(t.statistics(&mut tx).await.unwrap().keys_count, 4);
|
||||
t.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[test(tokio::test)]
|
||||
async fn test_deletion() {
|
||||
const BTREE_ORDER: u32 = 7;
|
||||
|
||||
let idx = IndexKeyBase::default();
|
||||
|
||||
let ds = Datastore::new("memory").await.unwrap();
|
||||
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
let mut t =
|
||||
Terms::new(&mut tx, idx.clone(), BTREE_ORDER, TreeStoreType::Write).await.unwrap();
|
||||
{
|
||||
let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
|
||||
|
||||
// Check removing an non-existing term id returns None
|
||||
assert!(t.remove_term_id(&mut tx, 0).await.is_ok());
|
||||
// Check removing an non-existing term id returns None
|
||||
assert!(t.remove_term_id(&mut tx, 0).await.is_ok());
|
||||
|
||||
// Create few terms
|
||||
t.resolve_term_id(&mut tx, "A").await.unwrap();
|
||||
t.resolve_term_id(&mut tx, "C").await.unwrap();
|
||||
t.resolve_term_id(&mut tx, "E").await.unwrap();
|
||||
// Create few terms
|
||||
t.resolve_term_id(&mut tx, "A").await.unwrap();
|
||||
t.resolve_term_id(&mut tx, "C").await.unwrap();
|
||||
t.resolve_term_id(&mut tx, "E").await.unwrap();
|
||||
finish(tx, t).await;
|
||||
}
|
||||
|
||||
for term in ["A", "C", "E"] {
|
||||
let (mut tx, t) = new_operation(&ds, BTREE_ORDER, Read).await;
|
||||
let term_id = t.get_term_id(&mut tx, term).await.unwrap();
|
||||
|
||||
if let Some(term_id) = term_id {
|
||||
let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
|
||||
t.remove_term_id(&mut tx, term_id).await.unwrap();
|
||||
finish(tx, t).await;
|
||||
|
||||
let (mut tx, t) = new_operation(&ds, BTREE_ORDER, Read).await;
|
||||
assert_eq!(t.get_term_id(&mut tx, term).await.unwrap(), None);
|
||||
} else {
|
||||
panic!("Term ID not found: {}", term);
|
||||
|
@ -278,11 +286,10 @@ mod tests {
|
|||
}
|
||||
|
||||
// Check id recycling
|
||||
let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
|
||||
assert_eq!(t.resolve_term_id(&mut tx, "B").await.unwrap(), 0);
|
||||
assert_eq!(t.resolve_term_id(&mut tx, "D").await.unwrap(), 1);
|
||||
|
||||
t.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
finish(tx, t).await;
|
||||
}
|
||||
|
||||
fn random_term_freq_vec(term_count: usize) -> Vec<(String, TermFrequency)> {
|
||||
|
@ -295,39 +302,31 @@ mod tests {
|
|||
vec
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[test(tokio::test)]
|
||||
async fn test_resolve_100_docs_with_50_words_one_by_one() {
|
||||
let ds = Datastore::new("memory").await.unwrap();
|
||||
for _ in 0..100 {
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
let mut t = Terms::new(&mut tx, IndexKeyBase::default(), 100, TreeStoreType::Write)
|
||||
.await
|
||||
.unwrap();
|
||||
let (mut tx, mut t) = new_operation(&ds, 100, Write).await;
|
||||
let terms_string = random_term_freq_vec(50);
|
||||
for (term, _) in terms_string {
|
||||
t.resolve_term_id(&mut tx, &term).await.unwrap();
|
||||
}
|
||||
t.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
finish(tx, t).await;
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[test(tokio::test)]
|
||||
async fn test_resolve_100_docs_with_50_words_batch_of_10() {
|
||||
let ds = Datastore::new("memory").await.unwrap();
|
||||
for _ in 0..10 {
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
let mut t = Terms::new(&mut tx, IndexKeyBase::default(), 100, TreeStoreType::Write)
|
||||
.await
|
||||
.unwrap();
|
||||
let (mut tx, mut t) = new_operation(&ds, 100, Write).await;
|
||||
for _ in 0..10 {
|
||||
let terms_string = random_term_freq_vec(50);
|
||||
for (term, _) in terms_string {
|
||||
t.resolve_term_id(&mut tx, &term).await.unwrap();
|
||||
}
|
||||
}
|
||||
t.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
finish(tx, t).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,10 +14,9 @@ use crate::idx::planner::plan::IndexOperator::Matches;
|
|||
use crate::idx::planner::plan::{IndexOperator, IndexOption, RangeValue};
|
||||
use crate::idx::planner::tree::{IndexRef, IndexesMap};
|
||||
use crate::idx::trees::mtree::MTreeIndex;
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs;
|
||||
use crate::kvs::Key;
|
||||
use crate::kvs::{Key, TransactionType};
|
||||
use crate::sql::index::Index;
|
||||
use crate::sql::statements::DefineIndexStatement;
|
||||
use crate::sql::{Array, Expression, Object, Table, Thing, Value};
|
||||
|
@ -85,9 +84,16 @@ impl QueryExecutor {
|
|||
}
|
||||
} else {
|
||||
let ikb = IndexKeyBase::new(opt, idx_def);
|
||||
let ft =
|
||||
FtIndex::new(opt, txn, p.az.as_str(), ikb, p, TreeStoreType::Read)
|
||||
.await?;
|
||||
let ft = FtIndex::new(
|
||||
ctx.get_index_stores(),
|
||||
opt,
|
||||
txn,
|
||||
p.az.as_str(),
|
||||
ikb,
|
||||
p,
|
||||
TransactionType::Read,
|
||||
)
|
||||
.await?;
|
||||
if ft_entry.is_none() {
|
||||
ft_entry = FtEntry::new(ctx, opt, txn, &ft, io).await?;
|
||||
}
|
||||
|
@ -111,8 +117,14 @@ impl QueryExecutor {
|
|||
MtEntry::new(&mut tx, mt, a.clone(), *k).await?
|
||||
} else {
|
||||
let ikb = IndexKeyBase::new(opt, idx_def);
|
||||
let mt =
|
||||
MTreeIndex::new(&mut tx, ikb, p, TreeStoreType::Read).await?;
|
||||
let mt = MTreeIndex::new(
|
||||
ctx.get_index_stores(),
|
||||
&mut tx,
|
||||
ikb,
|
||||
p,
|
||||
TransactionType::Read,
|
||||
)
|
||||
.await?;
|
||||
let entry = MtEntry::new(&mut tx, &mt, a.clone(), *k).await?;
|
||||
mt_map.insert(ix_ref, mt);
|
||||
entry
|
||||
|
|
|
@ -5,11 +5,11 @@ use fst::{IntoStreamer, Map, MapBuilder, Streamer};
|
|||
use radix_trie::{SubTrie, Trie, TrieCommon};
|
||||
use serde::ser;
|
||||
use std::collections::VecDeque;
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::fmt::{Debug, Display, Formatter};
|
||||
use std::io;
|
||||
use std::io::Cursor;
|
||||
|
||||
pub trait BKeys: Default + Display + Sized {
|
||||
pub trait BKeys: Default + Debug + Display + Sized {
|
||||
fn with_key_val(key: Key, payload: Payload) -> Result<Self, Error>;
|
||||
fn len(&self) -> u32;
|
||||
fn is_empty(&self) -> bool;
|
||||
|
@ -19,7 +19,7 @@ pub trait BKeys: Default + Display + Sized {
|
|||
// The size of the Node should be small, therefore one instance of
|
||||
// BKeys would never be store a large volume of keys.
|
||||
fn collect_with_prefix(&self, prefix_key: &Key) -> Result<VecDeque<(Key, Payload)>, Error>;
|
||||
fn insert(&mut self, key: Key, payload: Payload);
|
||||
fn insert(&mut self, key: Key, payload: Payload) -> Option<Payload>;
|
||||
fn append(&mut self, keys: Self);
|
||||
fn remove(&mut self, key: &Key) -> Option<Payload>;
|
||||
fn split_keys(self) -> Result<SplitKeys<Self>, Error>;
|
||||
|
@ -30,9 +30,6 @@ pub trait BKeys: Default + Display + Sized {
|
|||
fn read_from(c: &mut Cursor<Vec<u8>>) -> Result<Self, Error>;
|
||||
fn write_to(&self, c: &mut Cursor<Vec<u8>>) -> Result<(), Error>;
|
||||
fn compile(&mut self) {}
|
||||
fn debug<F>(&self, to_string: F) -> Result<(), Error>
|
||||
where
|
||||
F: Fn(Key) -> Result<String, Error>;
|
||||
}
|
||||
|
||||
pub struct SplitKeys<BK>
|
||||
|
@ -46,12 +43,12 @@ where
|
|||
pub(in crate::idx) median_payload: Payload,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FstKeys {
|
||||
i: Inner,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
enum Inner {
|
||||
Map(Map<Vec<u8>>),
|
||||
Trie(TrieKeys),
|
||||
|
@ -104,14 +101,15 @@ impl BKeys for FstKeys {
|
|||
}
|
||||
|
||||
fn collect_with_prefix(&self, _prefix_key: &Key) -> Result<VecDeque<(Key, Payload)>, Error> {
|
||||
Err(Error::Unreachable)
|
||||
Err(Error::Unreachable("BKeys/FSTKeys::collect_with_prefix"))
|
||||
}
|
||||
|
||||
fn insert(&mut self, key: Key, payload: Payload) {
|
||||
fn insert(&mut self, key: Key, payload: Payload) -> Option<Payload> {
|
||||
self.edit();
|
||||
if let Inner::Trie(t) = &mut self.i {
|
||||
t.insert(key, payload);
|
||||
return t.insert(key, payload);
|
||||
}
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
fn append(&mut self, keys: Self) {
|
||||
|
@ -159,7 +157,7 @@ impl BKeys for FstKeys {
|
|||
median_payload: s.median_payload,
|
||||
})
|
||||
} else {
|
||||
Err(Error::Unreachable)
|
||||
Err(Error::Unreachable("BKeys/FSTKeys::split_keys"))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -245,30 +243,6 @@ impl BKeys for FstKeys {
|
|||
)))
|
||||
}
|
||||
}
|
||||
|
||||
fn debug<F>(&self, to_string: F) -> Result<(), Error>
|
||||
where
|
||||
F: Fn(Key) -> Result<String, Error>,
|
||||
{
|
||||
match &self.i {
|
||||
Inner::Map(m) => {
|
||||
let mut s = String::new();
|
||||
let mut iter = m.stream();
|
||||
let mut start = true;
|
||||
while let Some((k, p)) = iter.next() {
|
||||
if !start {
|
||||
s.push(',');
|
||||
} else {
|
||||
start = false;
|
||||
}
|
||||
s.push_str(&format!("{}={}", to_string(k.to_vec())?.as_str(), p));
|
||||
}
|
||||
debug!("FSTKeys[{}]", s);
|
||||
Ok(())
|
||||
}
|
||||
Inner::Trie(t) => t.debug(to_string),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<MapBuilder<Vec<u8>>> for FstKeys {
|
||||
|
@ -305,12 +279,12 @@ impl Display for FstKeys {
|
|||
}
|
||||
Ok(())
|
||||
}
|
||||
Inner::Trie(t) => t.fmt(f),
|
||||
Inner::Trie(t) => write!(f, "{}", t),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct TrieKeys {
|
||||
keys: Trie<Key, Payload>,
|
||||
}
|
||||
|
@ -372,8 +346,8 @@ impl BKeys for TrieKeys {
|
|||
Ok(r)
|
||||
}
|
||||
|
||||
fn insert(&mut self, key: Key, payload: Payload) {
|
||||
self.keys.insert(key, payload);
|
||||
fn insert(&mut self, key: Key, payload: Payload) -> Option<Payload> {
|
||||
self.keys.insert(key, payload)
|
||||
}
|
||||
|
||||
fn append(&mut self, keys: Self) {
|
||||
|
@ -400,7 +374,7 @@ impl BKeys for TrieKeys {
|
|||
let (median_key, median_payload) = if let Some((k, v)) = s.next() {
|
||||
(k.clone(), *v)
|
||||
} else {
|
||||
return Err(Error::Unreachable);
|
||||
return Err(Error::Unreachable("BKeys/TrieKeys::split_keys"));
|
||||
};
|
||||
let mut right = Trie::default();
|
||||
for (key, val) in s {
|
||||
|
@ -468,24 +442,6 @@ impl BKeys for TrieKeys {
|
|||
bincode::serialize_into(c, &compressed)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn debug<F>(&self, to_string: F) -> Result<(), Error>
|
||||
where
|
||||
F: Fn(Key) -> Result<String, Error>,
|
||||
{
|
||||
let mut s = String::new();
|
||||
let mut start = true;
|
||||
for (k, p) in self.keys.iter() {
|
||||
if !start {
|
||||
s.push(',');
|
||||
} else {
|
||||
start = false;
|
||||
}
|
||||
s.push_str(&format!("{}={}", to_string(k.to_vec())?.as_str(), p));
|
||||
}
|
||||
debug!("TrieKeys[{}]", s);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Trie<Key, Payload>> for TrieKeys {
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -1,322 +0,0 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::{Key, Transaction, Val};
|
||||
use lru::LruCache;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::Debug;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
pub type NodeId = u64;
|
||||
|
||||
#[derive(Clone, Copy, PartialEq)]
|
||||
pub enum TreeStoreType {
|
||||
Write,
|
||||
Read,
|
||||
Traversal,
|
||||
}
|
||||
|
||||
pub enum TreeNodeStore<N>
|
||||
where
|
||||
N: TreeNode + Debug,
|
||||
{
|
||||
/// caches every read nodes, and keeps track of updated and created nodes
|
||||
Write(TreeWriteCache<N>),
|
||||
/// Uses an LRU cache to keep in memory the last node read
|
||||
Read(TreeReadCache<N>),
|
||||
/// Read the nodes from the KV store without any cache
|
||||
Traversal(TreeNodeProvider),
|
||||
}
|
||||
|
||||
impl<N> TreeNodeStore<N>
|
||||
where
|
||||
N: TreeNode + Debug,
|
||||
{
|
||||
pub fn new(
|
||||
keys: TreeNodeProvider,
|
||||
store_type: TreeStoreType,
|
||||
read_size: usize,
|
||||
) -> Arc<Mutex<Self>> {
|
||||
Arc::new(Mutex::new(match store_type {
|
||||
TreeStoreType::Write => Self::Write(TreeWriteCache::new(keys)),
|
||||
TreeStoreType::Read => Self::Read(TreeReadCache::new(keys, read_size)),
|
||||
TreeStoreType::Traversal => Self::Traversal(keys),
|
||||
}))
|
||||
}
|
||||
|
||||
pub(super) async fn get_node(
|
||||
&mut self,
|
||||
tx: &mut Transaction,
|
||||
node_id: NodeId,
|
||||
) -> Result<StoredNode<N>, Error> {
|
||||
match self {
|
||||
TreeNodeStore::Write(w) => w.get_node(tx, node_id).await,
|
||||
TreeNodeStore::Read(r) => r.get_node(tx, node_id).await,
|
||||
TreeNodeStore::Traversal(keys) => keys.load::<N>(tx, node_id).await,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn set_node(&mut self, node: StoredNode<N>, updated: bool) -> Result<(), Error> {
|
||||
match self {
|
||||
TreeNodeStore::Write(w) => w.set_node(node, updated),
|
||||
TreeNodeStore::Read(r) => {
|
||||
if updated {
|
||||
Err(Error::Unreachable)
|
||||
} else {
|
||||
r.set_node(node);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
TreeNodeStore::Traversal(_) => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn new_node(&mut self, id: NodeId, node: N) -> Result<StoredNode<N>, Error> {
|
||||
match self {
|
||||
TreeNodeStore::Write(w) => Ok(w.new_node(id, node)),
|
||||
_ => Err(Error::Unreachable),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn remove_node(&mut self, node_id: NodeId, node_key: Key) -> Result<(), Error> {
|
||||
match self {
|
||||
TreeNodeStore::Write(w) => w.remove_node(node_id, node_key),
|
||||
_ => Err(Error::Unreachable),
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn finish(&mut self, tx: &mut Transaction) -> Result<bool, Error> {
|
||||
if let TreeNodeStore::Write(w) = self {
|
||||
w.finish(tx).await
|
||||
} else {
|
||||
Err(Error::Unreachable)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TreeWriteCache<N>
|
||||
where
|
||||
N: TreeNode + Debug,
|
||||
{
|
||||
np: TreeNodeProvider,
|
||||
nodes: HashMap<NodeId, StoredNode<N>>,
|
||||
updated: HashSet<NodeId>,
|
||||
removed: HashMap<NodeId, Key>,
|
||||
#[cfg(debug_assertions)]
|
||||
out: HashSet<NodeId>,
|
||||
}
|
||||
|
||||
impl<N: Debug> TreeWriteCache<N>
|
||||
where
|
||||
N: TreeNode,
|
||||
{
|
||||
fn new(keys: TreeNodeProvider) -> Self {
|
||||
Self {
|
||||
np: keys,
|
||||
nodes: HashMap::new(),
|
||||
updated: HashSet::new(),
|
||||
removed: HashMap::new(),
|
||||
#[cfg(debug_assertions)]
|
||||
out: HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_node(
|
||||
&mut self,
|
||||
tx: &mut Transaction,
|
||||
node_id: NodeId,
|
||||
) -> Result<StoredNode<N>, Error> {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
debug!("GET: {}", node_id);
|
||||
self.out.insert(node_id);
|
||||
}
|
||||
if let Some(n) = self.nodes.remove(&node_id) {
|
||||
return Ok(n);
|
||||
}
|
||||
self.np.load::<N>(tx, node_id).await
|
||||
}
|
||||
|
||||
fn set_node(&mut self, node: StoredNode<N>, updated: bool) -> Result<(), Error> {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
debug!("SET: {} {} {:?}", node.id, updated, node.n);
|
||||
self.out.remove(&node.id);
|
||||
}
|
||||
if updated {
|
||||
self.updated.insert(node.id);
|
||||
}
|
||||
if self.removed.contains_key(&node.id) {
|
||||
return Err(Error::Unreachable);
|
||||
}
|
||||
self.nodes.insert(node.id, node);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn new_node(&mut self, id: NodeId, node: N) -> StoredNode<N> {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
debug!("NEW: {}", id);
|
||||
self.out.insert(id);
|
||||
}
|
||||
StoredNode {
|
||||
n: node,
|
||||
id,
|
||||
key: self.np.get_key(id),
|
||||
size: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_node(&mut self, node_id: NodeId, node_key: Key) -> Result<(), Error> {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
debug!("REMOVE: {}", node_id);
|
||||
if self.nodes.contains_key(&node_id) {
|
||||
return Err(Error::Unreachable);
|
||||
}
|
||||
self.out.remove(&node_id);
|
||||
}
|
||||
self.updated.remove(&node_id);
|
||||
self.removed.insert(node_id, node_key);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn finish(&mut self, tx: &mut Transaction) -> Result<bool, Error> {
|
||||
let update = !self.updated.is_empty() || !self.removed.is_empty();
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
if !self.out.is_empty() {
|
||||
debug!("OUT: {:?}", self.out);
|
||||
return Err(Error::Unreachable);
|
||||
}
|
||||
}
|
||||
for node_id in &self.updated {
|
||||
if let Some(node) = self.nodes.remove(node_id) {
|
||||
self.np.save(tx, node).await?;
|
||||
} else {
|
||||
return Err(Error::Unreachable);
|
||||
}
|
||||
}
|
||||
self.updated.clear();
|
||||
let node_ids: Vec<NodeId> = self.removed.keys().copied().collect();
|
||||
for node_id in node_ids {
|
||||
if let Some(node_key) = self.removed.remove(&node_id) {
|
||||
tx.del(node_key).await?;
|
||||
}
|
||||
}
|
||||
Ok(update)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TreeReadCache<N>
|
||||
where
|
||||
N: TreeNode,
|
||||
{
|
||||
keys: TreeNodeProvider,
|
||||
nodes: LruCache<NodeId, StoredNode<N>>,
|
||||
}
|
||||
|
||||
impl<N> TreeReadCache<N>
|
||||
where
|
||||
N: TreeNode,
|
||||
{
|
||||
fn new(keys: TreeNodeProvider, size: usize) -> Self {
|
||||
Self {
|
||||
keys,
|
||||
nodes: LruCache::new(NonZeroUsize::new(size).unwrap()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_node(
|
||||
&mut self,
|
||||
tx: &mut Transaction,
|
||||
node_id: NodeId,
|
||||
) -> Result<StoredNode<N>, Error> {
|
||||
if let Some(n) = self.nodes.pop(&node_id) {
|
||||
return Ok(n);
|
||||
}
|
||||
self.keys.load::<N>(tx, node_id).await
|
||||
}
|
||||
|
||||
fn set_node(&mut self, node: StoredNode<N>) {
|
||||
self.nodes.put(node.id, node);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum TreeNodeProvider {
|
||||
DocIds(IndexKeyBase),
|
||||
DocLengths(IndexKeyBase),
|
||||
Postings(IndexKeyBase),
|
||||
Terms(IndexKeyBase),
|
||||
Vector(IndexKeyBase),
|
||||
Debug,
|
||||
}
|
||||
|
||||
impl TreeNodeProvider {
|
||||
pub(in crate::idx) fn get_key(&self, node_id: NodeId) -> Key {
|
||||
match self {
|
||||
TreeNodeProvider::DocIds(ikb) => ikb.new_bd_key(Some(node_id)),
|
||||
TreeNodeProvider::DocLengths(ikb) => ikb.new_bl_key(Some(node_id)),
|
||||
TreeNodeProvider::Postings(ikb) => ikb.new_bp_key(Some(node_id)),
|
||||
TreeNodeProvider::Terms(ikb) => ikb.new_bt_key(Some(node_id)),
|
||||
TreeNodeProvider::Vector(ikb) => ikb.new_vm_key(Some(node_id)),
|
||||
TreeNodeProvider::Debug => node_id.to_be_bytes().to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn load<N>(&self, tx: &mut Transaction, id: NodeId) -> Result<StoredNode<N>, Error>
|
||||
where
|
||||
N: TreeNode,
|
||||
{
|
||||
let key = self.get_key(id);
|
||||
if let Some(val) = tx.get(key.clone()).await? {
|
||||
let size = val.len() as u32;
|
||||
let node = N::try_from_val(val)?;
|
||||
Ok(StoredNode {
|
||||
n: node,
|
||||
id,
|
||||
key,
|
||||
size,
|
||||
})
|
||||
} else {
|
||||
Err(Error::CorruptedIndex)
|
||||
}
|
||||
}
|
||||
|
||||
async fn save<N>(&self, tx: &mut Transaction, mut node: StoredNode<N>) -> Result<(), Error>
|
||||
where
|
||||
N: TreeNode,
|
||||
{
|
||||
let val = node.n.try_into_val()?;
|
||||
tx.set(node.key, val).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) struct StoredNode<N> {
|
||||
pub(super) n: N,
|
||||
pub(super) id: NodeId,
|
||||
pub(super) key: Key,
|
||||
pub(super) size: u32,
|
||||
}
|
||||
|
||||
impl<N> StoredNode<N> {
|
||||
pub(super) fn new(n: N, id: NodeId, key: Key, size: u32) -> Self {
|
||||
Self {
|
||||
n,
|
||||
id,
|
||||
key,
|
||||
size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait TreeNode
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
fn try_from_val(val: Val) -> Result<Self, Error>;
|
||||
fn try_into_val(&mut self) -> Result<Val, Error>;
|
||||
}
|
217
lib/src/idx/trees/store/cache.rs
Normal file
217
lib/src/idx/trees/store/cache.rs
Normal file
|
@ -0,0 +1,217 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::trees::store::{NodeId, StoredNode, TreeNode, TreeNodeProvider};
|
||||
use crate::kvs::{Key, Transaction};
|
||||
use quick_cache::sync::Cache;
|
||||
use quick_cache::GuardResult;
|
||||
use std::cmp::Ordering;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt::{Debug, Display};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
pub type CacheGen = u64;
|
||||
|
||||
pub(super) struct TreeCaches<N>(Arc<RwLock<HashMap<Key, TreeCache<N>>>>)
|
||||
where
|
||||
N: TreeNode + Debug + Clone + Display;
|
||||
|
||||
impl<N> TreeCaches<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone + Display,
|
||||
{
|
||||
pub(super) async fn get_cache(
|
||||
&self,
|
||||
generation: CacheGen,
|
||||
keys: &TreeNodeProvider,
|
||||
cache_size: usize,
|
||||
) -> TreeCache<N> {
|
||||
#[cfg(debug_assertions)]
|
||||
debug!("get_cache {generation}");
|
||||
// We take the key from the node 0 as the key identifier for the cache
|
||||
let key = keys.get_key(0);
|
||||
match self.0.write().await.entry(key) {
|
||||
Entry::Occupied(mut e) => {
|
||||
let c = e.get_mut();
|
||||
// The cache and the store are matching, we can send a clone of the cache.
|
||||
match generation.cmp(&c.generation()) {
|
||||
Ordering::Less => {
|
||||
// The store generation is older than the current cache,
|
||||
// we return an empty cache, but we don't hold it
|
||||
TreeCache::new(generation, keys.clone(), cache_size)
|
||||
}
|
||||
Ordering::Equal => c.clone(),
|
||||
Ordering::Greater => {
|
||||
// The store generation is more recent than the cache,
|
||||
// we create a new one and hold it
|
||||
let c = TreeCache::new(generation, keys.clone(), cache_size);
|
||||
e.insert(c.clone());
|
||||
c
|
||||
}
|
||||
}
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
// There is no cache for index, we create one and hold it
|
||||
let c = TreeCache::new(generation, keys.clone(), cache_size);
|
||||
e.insert(c.clone());
|
||||
c
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn remove_cache(&self, keys: &TreeNodeProvider) {
|
||||
let key = keys.get_key(0);
|
||||
self.0.write().await.remove(&key);
|
||||
}
|
||||
|
||||
pub(crate) async fn is_empty(&self) -> bool {
|
||||
self.0.read().await.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl<N> Default for TreeCaches<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone + Display,
|
||||
{
|
||||
fn default() -> Self {
|
||||
Self(Arc::new(RwLock::new(HashMap::new())))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum TreeCache<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone + Display,
|
||||
{
|
||||
Lru(CacheGen, TreeLruCache<N>),
|
||||
Full(CacheGen, TreeFullCache<N>),
|
||||
}
|
||||
|
||||
impl<N> TreeCache<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone + Display,
|
||||
{
|
||||
pub fn new(generation: CacheGen, keys: TreeNodeProvider, cache_size: usize) -> Self {
|
||||
if cache_size == 0 {
|
||||
TreeCache::Full(generation, TreeFullCache::new(keys))
|
||||
} else {
|
||||
TreeCache::Lru(generation, TreeLruCache::new(keys, cache_size))
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn get_node(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
node_id: NodeId,
|
||||
) -> Result<Arc<StoredNode<N>>, Error> {
|
||||
match self {
|
||||
TreeCache::Lru(_, c) => c.get_node(tx, node_id).await,
|
||||
TreeCache::Full(_, c) => c.get_node(tx, node_id).await,
|
||||
}
|
||||
}
|
||||
|
||||
fn generation(&self) -> CacheGen {
|
||||
match self {
|
||||
TreeCache::Lru(gen, _) | TreeCache::Full(gen, _) => *gen,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TreeLruCache<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone + Display,
|
||||
{
|
||||
keys: TreeNodeProvider,
|
||||
cache: Arc<Cache<NodeId, Arc<StoredNode<N>>>>,
|
||||
}
|
||||
|
||||
impl<N> TreeLruCache<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone,
|
||||
{
|
||||
fn new(keys: TreeNodeProvider, cache_size: usize) -> Self {
|
||||
Self {
|
||||
keys,
|
||||
cache: Arc::new(Cache::new(cache_size)),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_node(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
node_id: NodeId,
|
||||
) -> Result<Arc<StoredNode<N>>, Error> {
|
||||
match self.cache.get_value_or_guard(&node_id, None) {
|
||||
GuardResult::Value(v) => Ok(v),
|
||||
GuardResult::Guard(g) => {
|
||||
let n = Arc::new(self.keys.load::<N>(tx, node_id).await?);
|
||||
g.insert(n.clone()).ok();
|
||||
Ok(n)
|
||||
}
|
||||
GuardResult::Timeout => Err(Error::Unreachable("TreeCache::get_node")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<N> Clone for TreeLruCache<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone,
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
keys: self.keys.clone(),
|
||||
cache: self.cache.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TreeFullCache<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone,
|
||||
{
|
||||
keys: TreeNodeProvider,
|
||||
cache: Arc<RwLock<HashMap<NodeId, Arc<StoredNode<N>>>>>,
|
||||
}
|
||||
|
||||
impl<N> TreeFullCache<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone,
|
||||
{
|
||||
pub fn new(keys: TreeNodeProvider) -> Self {
|
||||
Self {
|
||||
keys,
|
||||
cache: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn get_node(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
node_id: NodeId,
|
||||
) -> Result<Arc<StoredNode<N>>, Error> {
|
||||
// Let's first try with the read lock
|
||||
if let Some(n) = self.cache.read().await.get(&node_id).cloned() {
|
||||
return Ok(n);
|
||||
}
|
||||
match self.cache.write().await.entry(node_id) {
|
||||
Entry::Occupied(e) => Ok(e.get().clone()),
|
||||
Entry::Vacant(e) => {
|
||||
let n = Arc::new(self.keys.load::<N>(tx, node_id).await?);
|
||||
e.insert(n.clone());
|
||||
Ok(n)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<N> Clone for TreeFullCache<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone,
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
keys: self.keys.clone(),
|
||||
cache: self.cache.clone(),
|
||||
}
|
||||
}
|
||||
}
|
306
lib/src/idx/trees/store/mod.rs
Normal file
306
lib/src/idx/trees/store/mod.rs
Normal file
|
@ -0,0 +1,306 @@
|
|||
pub mod cache;
|
||||
pub(crate) mod tree;
|
||||
|
||||
use crate::dbs::Options;
|
||||
use crate::err::Error;
|
||||
use crate::idx::trees::bkeys::{FstKeys, TrieKeys};
|
||||
use crate::idx::trees::btree::{BTreeNode, BTreeStore};
|
||||
use crate::idx::trees::mtree::{MTreeNode, MTreeStore};
|
||||
use crate::idx::trees::store::cache::{TreeCache, TreeCaches};
|
||||
use crate::idx::trees::store::tree::{TreeRead, TreeWrite};
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::{Key, Transaction, TransactionType, Val};
|
||||
use crate::sql::statements::DefineIndexStatement;
|
||||
use crate::sql::Index;
|
||||
use std::fmt::{Debug, Display, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
pub type NodeId = u64;
|
||||
|
||||
pub enum TreeStore<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone,
|
||||
{
|
||||
/// caches every read nodes, and keeps track of updated and created nodes
|
||||
Write(TreeWrite<N>),
|
||||
/// caches read nodes in an LRU cache
|
||||
Read(TreeRead<N>),
|
||||
}
|
||||
|
||||
impl<N> TreeStore<N>
|
||||
where
|
||||
N: TreeNode + Debug + Display + Clone,
|
||||
{
|
||||
pub async fn new(keys: TreeNodeProvider, cache: TreeCache<N>, tt: TransactionType) -> Self {
|
||||
match tt {
|
||||
TransactionType::Read => Self::Read(TreeRead::new(cache)),
|
||||
TransactionType::Write => Self::Write(TreeWrite::new(keys, cache)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn get_node_mut(
|
||||
&mut self,
|
||||
tx: &mut Transaction,
|
||||
node_id: NodeId,
|
||||
) -> Result<StoredNode<N>, Error> {
|
||||
match self {
|
||||
TreeStore::Write(w) => w.get_node_mut(tx, node_id).await,
|
||||
_ => Err(Error::Unreachable("TreeStore::get_node_mut")),
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn get_node(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
node_id: NodeId,
|
||||
) -> Result<Arc<StoredNode<N>>, Error> {
|
||||
match self {
|
||||
TreeStore::Read(r) => r.get_node(tx, node_id).await,
|
||||
_ => Err(Error::Unreachable("TreeStore::get_node")),
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn set_node(
|
||||
&mut self,
|
||||
node: StoredNode<N>,
|
||||
updated: bool,
|
||||
) -> Result<(), Error> {
|
||||
match self {
|
||||
TreeStore::Write(w) => w.set_node(node, updated),
|
||||
_ => Err(Error::Unreachable("TreeStore::set_node")),
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) fn new_node(&mut self, id: NodeId, node: N) -> Result<StoredNode<N>, Error> {
|
||||
match self {
|
||||
TreeStore::Write(w) => Ok(w.new_node(id, node)),
|
||||
_ => Err(Error::Unreachable("TreeStore::new_node")),
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn remove_node(
|
||||
&mut self,
|
||||
node_id: NodeId,
|
||||
node_key: Key,
|
||||
) -> Result<(), Error> {
|
||||
match self {
|
||||
TreeStore::Write(w) => w.remove_node(node_id, node_key),
|
||||
_ => Err(Error::Unreachable("TreeStore::remove_node")),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn finish(&mut self, tx: &mut Transaction) -> Result<bool, Error> {
|
||||
match self {
|
||||
TreeStore::Write(w) => w.finish(tx).await,
|
||||
_ => Ok(false),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum TreeNodeProvider {
|
||||
DocIds(IndexKeyBase),
|
||||
DocLengths(IndexKeyBase),
|
||||
Postings(IndexKeyBase),
|
||||
Terms(IndexKeyBase),
|
||||
Vector(IndexKeyBase),
|
||||
Debug,
|
||||
}
|
||||
|
||||
impl TreeNodeProvider {
|
||||
pub(in crate::idx) fn get_key(&self, node_id: NodeId) -> Key {
|
||||
match self {
|
||||
TreeNodeProvider::DocIds(ikb) => ikb.new_bd_key(Some(node_id)),
|
||||
TreeNodeProvider::DocLengths(ikb) => ikb.new_bl_key(Some(node_id)),
|
||||
TreeNodeProvider::Postings(ikb) => ikb.new_bp_key(Some(node_id)),
|
||||
TreeNodeProvider::Terms(ikb) => ikb.new_bt_key(Some(node_id)),
|
||||
TreeNodeProvider::Vector(ikb) => ikb.new_vm_key(Some(node_id)),
|
||||
TreeNodeProvider::Debug => node_id.to_be_bytes().to_vec(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn load<N>(&self, tx: &mut Transaction, id: NodeId) -> Result<StoredNode<N>, Error>
|
||||
where
|
||||
N: TreeNode + Clone,
|
||||
{
|
||||
let key = self.get_key(id);
|
||||
if let Some(val) = tx.get(key.clone()).await? {
|
||||
let size = val.len() as u32;
|
||||
let node = N::try_from_val(val)?;
|
||||
Ok(StoredNode::new(node, id, key, size))
|
||||
} else {
|
||||
Err(Error::CorruptedIndex("TreeStore::load"))
|
||||
}
|
||||
}
|
||||
|
||||
async fn save<N>(&self, tx: &mut Transaction, mut node: StoredNode<N>) -> Result<(), Error>
|
||||
where
|
||||
N: TreeNode + Clone + Display,
|
||||
{
|
||||
let val = node.n.try_into_val()?;
|
||||
tx.set(node.key, val).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StoredNode<N>
|
||||
where
|
||||
N: Clone + Display,
|
||||
{
|
||||
pub(super) n: N,
|
||||
pub(super) id: NodeId,
|
||||
pub(super) key: Key,
|
||||
pub(super) size: u32,
|
||||
}
|
||||
|
||||
impl<N> StoredNode<N>
|
||||
where
|
||||
N: Clone + Display,
|
||||
{
|
||||
pub(super) fn new(n: N, id: NodeId, key: Key, size: u32) -> Self {
|
||||
Self {
|
||||
n,
|
||||
id,
|
||||
key,
|
||||
size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<N> Display for StoredNode<N>
|
||||
where
|
||||
N: Clone + Display,
|
||||
{
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "node_id: {} - {}", self.id, self.n)
|
||||
}
|
||||
}
|
||||
|
||||
pub trait TreeNode: Debug + Clone + Display {
|
||||
fn try_from_val(val: Val) -> Result<Self, Error>
|
||||
where
|
||||
Self: Sized;
|
||||
fn try_into_val(&mut self) -> Result<Val, Error>;
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct IndexStores(Arc<Inner>);
|
||||
|
||||
struct Inner {
|
||||
btree_fst_caches: TreeCaches<BTreeNode<FstKeys>>,
|
||||
btree_trie_caches: TreeCaches<BTreeNode<TrieKeys>>,
|
||||
mtree_caches: TreeCaches<MTreeNode>,
|
||||
}
|
||||
impl Default for IndexStores {
|
||||
fn default() -> Self {
|
||||
Self(Arc::new(Inner {
|
||||
btree_fst_caches: TreeCaches::default(),
|
||||
btree_trie_caches: TreeCaches::default(),
|
||||
mtree_caches: TreeCaches::default(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexStores {
|
||||
pub(in crate::idx) async fn get_store_btree_fst(
|
||||
&self,
|
||||
keys: TreeNodeProvider,
|
||||
generation: u64,
|
||||
tt: TransactionType,
|
||||
cache_size: usize,
|
||||
) -> BTreeStore<FstKeys> {
|
||||
let cache = self.0.btree_fst_caches.get_cache(generation, &keys, cache_size).await;
|
||||
TreeStore::new(keys, cache, tt).await
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn get_store_btree_trie(
|
||||
&self,
|
||||
keys: TreeNodeProvider,
|
||||
generation: u64,
|
||||
tt: TransactionType,
|
||||
cache_size: usize,
|
||||
) -> BTreeStore<TrieKeys> {
|
||||
let cache = self.0.btree_trie_caches.get_cache(generation, &keys, cache_size).await;
|
||||
TreeStore::new(keys, cache, tt).await
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn get_store_mtree(
|
||||
&self,
|
||||
keys: TreeNodeProvider,
|
||||
generation: u64,
|
||||
tt: TransactionType,
|
||||
cache_size: usize,
|
||||
) -> MTreeStore {
|
||||
let cache = self.0.mtree_caches.get_cache(generation, &keys, cache_size).await;
|
||||
TreeStore::new(keys, cache, tt).await
|
||||
}
|
||||
|
||||
pub(crate) async fn index_removed(
|
||||
&self,
|
||||
opt: &Options,
|
||||
tx: &mut Transaction,
|
||||
tb: &str,
|
||||
ix: &str,
|
||||
) -> Result<(), Error> {
|
||||
self.remove_index(
|
||||
opt,
|
||||
tx.get_and_cache_tb_index(opt.ns(), opt.db(), tb, ix).await?.as_ref(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) async fn namespace_removed(
|
||||
&self,
|
||||
opt: &Options,
|
||||
tx: &mut Transaction,
|
||||
) -> Result<(), Error> {
|
||||
for tb in tx.all_tb(opt.ns(), opt.db()).await?.iter() {
|
||||
self.table_removed(opt, tx, &tb.name).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn table_removed(
|
||||
&self,
|
||||
opt: &Options,
|
||||
tx: &mut Transaction,
|
||||
tb: &str,
|
||||
) -> Result<(), Error> {
|
||||
for ix in tx.all_tb_indexes(opt.ns(), opt.db(), tb).await?.iter() {
|
||||
self.remove_index(opt, ix).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn remove_index(&self, opt: &Options, ix: &DefineIndexStatement) -> Result<(), Error> {
|
||||
let ikb = IndexKeyBase::new(opt, ix);
|
||||
match ix.index {
|
||||
Index::Search(_) => {
|
||||
self.remove_search_cache(ikb).await;
|
||||
}
|
||||
Index::MTree(_) => {
|
||||
self.remove_mtree_cache(ikb).await;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn remove_search_cache(&self, ikb: IndexKeyBase) {
|
||||
self.0.btree_trie_caches.remove_cache(&TreeNodeProvider::DocIds(ikb.clone())).await;
|
||||
self.0.btree_trie_caches.remove_cache(&TreeNodeProvider::DocLengths(ikb.clone())).await;
|
||||
self.0.btree_trie_caches.remove_cache(&TreeNodeProvider::Postings(ikb.clone())).await;
|
||||
self.0.btree_fst_caches.remove_cache(&TreeNodeProvider::Terms(ikb)).await;
|
||||
}
|
||||
|
||||
async fn remove_mtree_cache(&self, ikb: IndexKeyBase) {
|
||||
self.0.btree_trie_caches.remove_cache(&TreeNodeProvider::DocIds(ikb.clone())).await;
|
||||
self.0.mtree_caches.remove_cache(&TreeNodeProvider::Vector(ikb.clone())).await;
|
||||
}
|
||||
|
||||
pub async fn is_empty(&self) -> bool {
|
||||
self.0.mtree_caches.is_empty().await
|
||||
&& self.0.btree_fst_caches.is_empty().await
|
||||
&& self.0.btree_trie_caches.is_empty().await
|
||||
}
|
||||
}
|
177
lib/src/idx/trees/store/tree.rs
Normal file
177
lib/src/idx/trees/store/tree.rs
Normal file
|
@ -0,0 +1,177 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::trees::store::cache::TreeCache;
|
||||
use crate::idx::trees::store::{NodeId, StoredNode, TreeNode, TreeNodeProvider};
|
||||
use crate::kvs::{Key, Transaction};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fmt::{Debug, Display};
|
||||
use std::sync::Arc;
|
||||
|
||||
pub struct TreeWrite<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone,
|
||||
{
|
||||
np: TreeNodeProvider,
|
||||
cache: TreeCache<N>,
|
||||
nodes: HashMap<NodeId, StoredNode<N>>,
|
||||
updated: HashSet<NodeId>,
|
||||
removed: HashMap<NodeId, Key>,
|
||||
#[cfg(debug_assertions)]
|
||||
out: HashSet<NodeId>,
|
||||
}
|
||||
|
||||
impl<N> TreeWrite<N>
|
||||
where
|
||||
N: TreeNode + Clone + Debug + Display,
|
||||
{
|
||||
pub(super) fn new(keys: TreeNodeProvider, cache: TreeCache<N>) -> Self {
|
||||
Self {
|
||||
np: keys,
|
||||
cache,
|
||||
nodes: HashMap::new(),
|
||||
updated: HashSet::new(),
|
||||
removed: HashMap::new(),
|
||||
#[cfg(debug_assertions)]
|
||||
out: HashSet::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn get_node_mut(
|
||||
&mut self,
|
||||
tx: &mut Transaction,
|
||||
node_id: NodeId,
|
||||
) -> Result<StoredNode<N>, Error> {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
debug!("GET: {}", node_id);
|
||||
self.out.insert(node_id);
|
||||
if self.removed.contains_key(&node_id) {
|
||||
return Err(Error::Unreachable("TreeTransactionWrite::get_node_mut"));
|
||||
}
|
||||
}
|
||||
if let Some(n) = self.nodes.remove(&node_id) {
|
||||
#[cfg(debug_assertions)]
|
||||
debug!("GET (NODES): {}", n.n);
|
||||
return Ok(n);
|
||||
}
|
||||
let r = self.cache.get_node(tx, node_id).await?;
|
||||
#[cfg(debug_assertions)]
|
||||
debug!("GET (CACHE): {}", r.n);
|
||||
Ok(StoredNode::new(r.n.clone(), r.id, r.key.clone(), r.size))
|
||||
}
|
||||
|
||||
pub(super) fn set_node(&mut self, node: StoredNode<N>, updated: bool) -> Result<(), Error> {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
if updated {
|
||||
debug!("SET {updated}: {node}");
|
||||
}
|
||||
self.out.remove(&node.id);
|
||||
}
|
||||
if updated {
|
||||
self.updated.insert(node.id);
|
||||
}
|
||||
if self.removed.contains_key(&node.id) {
|
||||
return Err(Error::Unreachable("TreeTransactionWrite::set_node(2)"));
|
||||
}
|
||||
self.nodes.insert(node.id, node);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) fn new_node(&mut self, id: NodeId, node: N) -> StoredNode<N> {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
debug!("NEW: {}", id);
|
||||
self.out.insert(id);
|
||||
}
|
||||
StoredNode::new(node, id, self.np.get_key(id), 0)
|
||||
}
|
||||
|
||||
pub(super) fn remove_node(&mut self, node_id: NodeId, node_key: Key) -> Result<(), Error> {
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
debug!("REMOVE: {}", node_id);
|
||||
if self.nodes.contains_key(&node_id) {
|
||||
return Err(Error::Unreachable("TreeTransactionWrite::remove_node"));
|
||||
}
|
||||
self.out.remove(&node_id);
|
||||
}
|
||||
self.updated.remove(&node_id);
|
||||
self.removed.insert(node_id, node_key);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) async fn finish(&mut self, tx: &mut Transaction) -> Result<bool, Error> {
|
||||
let update = !self.updated.is_empty() || !self.removed.is_empty();
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
debug!("finish");
|
||||
if !self.out.is_empty() {
|
||||
debug!("OUT: {:?}", self.out);
|
||||
return Err(Error::Unreachable("TreeTransactionWrite::finish(1)"));
|
||||
}
|
||||
}
|
||||
for node_id in &self.updated {
|
||||
if let Some(node) = self.nodes.remove(node_id) {
|
||||
#[cfg(debug_assertions)]
|
||||
debug!("finish: tx.save {node_id}");
|
||||
self.np.save(tx, node).await?;
|
||||
} else {
|
||||
return Err(Error::Unreachable("TreeTransactionWrite::finish(2)"));
|
||||
}
|
||||
}
|
||||
self.updated.clear();
|
||||
let node_ids: Vec<NodeId> = self.removed.keys().copied().collect();
|
||||
for node_id in node_ids {
|
||||
if let Some(node_key) = self.removed.remove(&node_id) {
|
||||
#[cfg(debug_assertions)]
|
||||
debug!("finish: tx.del {node_id}");
|
||||
tx.del(node_key).await?;
|
||||
}
|
||||
}
|
||||
Ok(update)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
impl<N> Drop for TreeWrite<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone,
|
||||
{
|
||||
fn drop(&mut self) {
|
||||
if !self.updated.is_empty() {
|
||||
warn!("TreeWrite::finish not called?: updated not empty: {:?}", self.updated);
|
||||
}
|
||||
if !self.removed.is_empty() {
|
||||
warn!("TreeWrite::finish not called?: removed not empty: {:?}", self.removed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TreeRead<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone,
|
||||
{
|
||||
cache: TreeCache<N>,
|
||||
}
|
||||
|
||||
impl<N> TreeRead<N>
|
||||
where
|
||||
N: TreeNode + Debug + Clone,
|
||||
{
|
||||
pub(super) fn new(cache: TreeCache<N>) -> Self {
|
||||
Self {
|
||||
cache,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn get_node(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
node_id: NodeId,
|
||||
) -> Result<Arc<StoredNode<N>>, Error> {
|
||||
let r = self.cache.get_node(tx, node_id).await?;
|
||||
#[cfg(debug_assertions)]
|
||||
debug!("GET: {}", node_id);
|
||||
Ok(r)
|
||||
}
|
||||
}
|
|
@ -167,7 +167,7 @@ impl Vector {
|
|||
(Vector::I16(a), Vector::I16(b)) => {
|
||||
Ok((a.iter().zip(b.iter()).map(|(a, b)| (a - b).pow(2)).sum::<i16>() as f64).sqrt())
|
||||
}
|
||||
_ => Err(Error::Unreachable),
|
||||
_ => Err(Error::Unreachable("Vector::euclidean_distance")),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -189,7 +189,7 @@ impl Vector {
|
|||
(Vector::I16(a), Vector::I16(b)) => {
|
||||
Ok(a.iter().zip(b.iter()).map(|(a, b)| (a - b).abs()).sum::<i16>() as f64)
|
||||
}
|
||||
_ => Err(Error::Unreachable),
|
||||
_ => Err(Error::Unreachable("Vector::manhattan_distance")),
|
||||
}
|
||||
}
|
||||
pub(super) fn minkowski_distance(&self, other: &Self, order: &Number) -> Result<f64, Error> {
|
||||
|
@ -220,7 +220,7 @@ impl Vector {
|
|||
.zip(b.iter())
|
||||
.map(|(a, b)| (a - b).abs().pow(order.to_int() as u32))
|
||||
.sum::<i16>() as f64,
|
||||
_ => return Err(Error::Unreachable),
|
||||
_ => return Err(Error::Unreachable("Vector::minkowski_distance")),
|
||||
};
|
||||
Ok(dist.powf(1.0 / order.to_float()))
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ use crate::dbs::{
|
|||
};
|
||||
use crate::err::Error;
|
||||
use crate::iam::{Action, Auth, Error as IamError, Resource, Role};
|
||||
use crate::idx::trees::store::IndexStores;
|
||||
use crate::key::root::hb::Hb;
|
||||
use crate::kvs::clock::SizedClock;
|
||||
#[allow(unused_imports)]
|
||||
|
@ -108,6 +109,8 @@ pub struct Datastore {
|
|||
notification_channel: Option<(Sender<Notification>, Receiver<Notification>)>,
|
||||
// Clock for tracking time. It is read only and accessible to all transactions. It is behind a mutex as tests may write to it.
|
||||
clock: Arc<RwLock<SizedClock>>,
|
||||
// The index store cache
|
||||
index_stores: IndexStores,
|
||||
}
|
||||
|
||||
/// We always want to be circulating the live query information
|
||||
|
@ -351,6 +354,7 @@ impl Datastore {
|
|||
capabilities: Capabilities::default(),
|
||||
versionstamp_oracle: Arc::new(Mutex::new(Oracle::systime_counter())),
|
||||
clock,
|
||||
index_stores: IndexStores::default(),
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -403,6 +407,10 @@ impl Datastore {
|
|||
self
|
||||
}
|
||||
|
||||
pub fn index_store(&self) -> &IndexStores {
|
||||
&self.index_stores
|
||||
}
|
||||
|
||||
/// Is authentication enabled for this Datastore?
|
||||
pub fn is_auth_enabled(&self) -> bool {
|
||||
self.auth_enabled
|
||||
|
@ -1046,12 +1054,11 @@ impl Datastore {
|
|||
// Create a new query executor
|
||||
let mut exe = Executor::new(self);
|
||||
// Create a default context
|
||||
let mut ctx = Context::default();
|
||||
ctx.add_capabilities(self.capabilities.clone());
|
||||
// Set the global query timeout
|
||||
if let Some(timeout) = self.query_timeout {
|
||||
ctx.add_timeout(timeout);
|
||||
}
|
||||
let mut ctx = Context::from_ds(
|
||||
self.query_timeout,
|
||||
self.capabilities.clone(),
|
||||
self.index_stores.clone(),
|
||||
);
|
||||
// Setup the notification channel
|
||||
if let Some(channel) = &self.notification_channel {
|
||||
ctx.add_notifications(Some(&channel.0));
|
||||
|
|
|
@ -7,7 +7,6 @@ use crate::dbs::node::ClusterMembership;
|
|||
use crate::dbs::node::Timestamp;
|
||||
use crate::err::Error;
|
||||
use crate::idg::u32::U32;
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::key::error::KeyCategory;
|
||||
use crate::key::key_req::KeyRequirements;
|
||||
use crate::kvs::cache::Cache;
|
||||
|
@ -76,7 +75,7 @@ pub(super) enum Inner {
|
|||
#[cfg(feature = "kv-fdb")]
|
||||
FoundationDB(super::fdb::Transaction),
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum TransactionType {
|
||||
Read,
|
||||
Write,
|
||||
|
@ -91,16 +90,6 @@ impl From<bool> for TransactionType {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<TreeStoreType> for TransactionType {
|
||||
fn from(value: TreeStoreType) -> Self {
|
||||
match value {
|
||||
TreeStoreType::Write => TransactionType::Write,
|
||||
TreeStoreType::Read => TransactionType::Read,
|
||||
TreeStoreType::Traversal => TransactionType::Read,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub enum LockType {
|
||||
Pessimistic,
|
||||
Optimistic,
|
||||
|
|
|
@ -21,7 +21,7 @@ pub enum Index {
|
|||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
#[revisioned(revision = 1)]
|
||||
#[revisioned(revision = 2)]
|
||||
pub struct SearchParams {
|
||||
pub az: Ident,
|
||||
pub hl: bool,
|
||||
|
@ -30,16 +30,28 @@ pub struct SearchParams {
|
|||
pub doc_lengths_order: u32,
|
||||
pub postings_order: u32,
|
||||
pub terms_order: u32,
|
||||
#[revision(start = 2)]
|
||||
pub doc_ids_cache: u32,
|
||||
#[revision(start = 2)]
|
||||
pub doc_lengths_cache: u32,
|
||||
#[revision(start = 2)]
|
||||
pub postings_cache: u32,
|
||||
#[revision(start = 2)]
|
||||
pub terms_cache: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
#[revisioned(revision = 1)]
|
||||
#[revisioned(revision = 2)]
|
||||
pub struct MTreeParams {
|
||||
pub dimension: u16,
|
||||
pub distance: Distance,
|
||||
pub vector_type: VectorType,
|
||||
pub capacity: u16,
|
||||
pub doc_ids_order: u32,
|
||||
#[revision(start = 2)]
|
||||
pub doc_ids_cache: u32,
|
||||
#[revision(start = 2)]
|
||||
pub mtree_cache: u32,
|
||||
}
|
||||
|
||||
#[derive(Clone, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
|
@ -94,13 +106,17 @@ impl Display for Index {
|
|||
Self::Search(p) => {
|
||||
write!(
|
||||
f,
|
||||
"SEARCH ANALYZER {} {} DOC_IDS_ORDER {} DOC_LENGTHS_ORDER {} POSTINGS_ORDER {} TERMS_ORDER {}",
|
||||
"SEARCH ANALYZER {} {} DOC_IDS_ORDER {} DOC_LENGTHS_ORDER {} POSTINGS_ORDER {} TERMS_ORDER {} DOC_IDS_CACHE {} DOC_LENGTHS_CACHE {} POSTINGS_CACHE {} TERMS_CACHE {}",
|
||||
p.az,
|
||||
p.sc,
|
||||
p.doc_ids_order,
|
||||
p.doc_lengths_order,
|
||||
p.postings_order,
|
||||
p.terms_order
|
||||
p.terms_order,
|
||||
p.doc_ids_cache,
|
||||
p.doc_lengths_cache,
|
||||
p.postings_cache,
|
||||
p.terms_cache
|
||||
)?;
|
||||
if p.hl {
|
||||
f.write_str(" HIGHLIGHTS")?
|
||||
|
@ -110,8 +126,8 @@ impl Display for Index {
|
|||
Self::MTree(p) => {
|
||||
write!(
|
||||
f,
|
||||
"MTREE DIMENSION {} DIST {} TYPE {} CAPACITY {} DOC_IDS_ORDER {}",
|
||||
p.dimension, p.distance, p.vector_type, p.capacity, p.doc_ids_order
|
||||
"MTREE DIMENSION {} DIST {} TYPE {} CAPACITY {} DOC_IDS_ORDER {} DOC_IDS_CACHE {} MTREE_CACHE {}",
|
||||
p.dimension, p.distance, p.vector_type, p.capacity, p.doc_ids_order, p.doc_ids_cache, p.mtree_cache
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use lru::LruCache;
|
||||
use once_cell::sync::Lazy;
|
||||
use quick_cache::sync::Cache;
|
||||
use quick_cache::GuardResult;
|
||||
use revision::revisioned;
|
||||
use serde::{
|
||||
de::{self, Visitor},
|
||||
|
@ -9,9 +10,7 @@ use std::cmp::Ordering;
|
|||
use std::fmt::Debug;
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::num::NonZeroUsize;
|
||||
use std::str::FromStr;
|
||||
use std::sync::Mutex;
|
||||
use std::{env, str};
|
||||
|
||||
pub(crate) const TOKEN: &str = "$surrealdb::private::sql::Regex";
|
||||
|
@ -28,22 +27,24 @@ impl Regex {
|
|||
}
|
||||
|
||||
fn regex_new(str: &str) -> Result<regex::Regex, regex::Error> {
|
||||
static REGEX_CACHE: Lazy<Mutex<LruCache<String, regex::Regex>>> = Lazy::new(|| {
|
||||
static REGEX_CACHE: Lazy<Cache<String, regex::Regex>> = Lazy::new(|| {
|
||||
let cache_size: usize = env::var("SURREAL_REGEX_CACHE_SIZE")
|
||||
.map_or(1000, |v| v.parse().unwrap_or(1000))
|
||||
.max(10); // The minimum cache size is 10
|
||||
Mutex::new(LruCache::new(NonZeroUsize::new(cache_size).unwrap()))
|
||||
Cache::new(cache_size)
|
||||
});
|
||||
let mut cache = match REGEX_CACHE.lock() {
|
||||
Ok(guard) => guard,
|
||||
Err(poisoned) => poisoned.into_inner(),
|
||||
};
|
||||
if let Some(re) = cache.get(str) {
|
||||
return Ok(re.clone());
|
||||
match REGEX_CACHE.get_value_or_guard(str, None) {
|
||||
GuardResult::Value(v) => Ok(v),
|
||||
GuardResult::Guard(g) => {
|
||||
let re = regex::Regex::new(str)?;
|
||||
g.insert(re.clone()).ok();
|
||||
Ok(re)
|
||||
}
|
||||
GuardResult::Timeout => {
|
||||
warn!("Regex cache timeout");
|
||||
regex::Regex::new(str)
|
||||
}
|
||||
}
|
||||
let re = regex::Regex::new(str)?;
|
||||
cache.put(str.to_owned(), re.clone());
|
||||
Ok(re)
|
||||
}
|
||||
|
||||
impl FromStr for Regex {
|
||||
|
|
|
@ -6,8 +6,8 @@ use crate::err::Error;
|
|||
use crate::iam::{Action, ResourceKind};
|
||||
use crate::idx::ft::FtIndex;
|
||||
use crate::idx::trees::mtree::MTreeIndex;
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::TransactionType;
|
||||
use crate::sql::ident::Ident;
|
||||
use crate::sql::index::Index;
|
||||
use crate::sql::value::Value;
|
||||
|
@ -28,7 +28,7 @@ impl AnalyzeStatement {
|
|||
/// Process this type returning a computed simple Value
|
||||
pub(crate) async fn compute(
|
||||
&self,
|
||||
_ctx: &Context<'_>,
|
||||
ctx: &Context<'_>,
|
||||
opt: &Options,
|
||||
txn: &Transaction,
|
||||
_doc: Option<&CursorDoc<'_>>,
|
||||
|
@ -48,14 +48,28 @@ impl AnalyzeStatement {
|
|||
// Index operation dispatching
|
||||
let value: Value = match &ix.index {
|
||||
Index::Search(p) => {
|
||||
let ft =
|
||||
FtIndex::new(opt, txn, p.az.as_str(), ikb, p, TreeStoreType::Traversal)
|
||||
.await?;
|
||||
let ft = FtIndex::new(
|
||||
ctx.get_index_stores(),
|
||||
opt,
|
||||
txn,
|
||||
p.az.as_str(),
|
||||
ikb,
|
||||
p,
|
||||
TransactionType::Read,
|
||||
)
|
||||
.await?;
|
||||
ft.statistics(txn).await?.into()
|
||||
}
|
||||
Index::MTree(p) => {
|
||||
let mut tx = txn.lock().await;
|
||||
let mt = MTreeIndex::new(&mut tx, ikb, p, TreeStoreType::Traversal).await?;
|
||||
let mt = MTreeIndex::new(
|
||||
ctx.get_index_stores(),
|
||||
&mut tx,
|
||||
ikb,
|
||||
p,
|
||||
TransactionType::Read,
|
||||
)
|
||||
.await?;
|
||||
mt.statistics(&mut tx).await?.into()
|
||||
}
|
||||
_ => {
|
||||
|
|
|
@ -19,7 +19,7 @@ impl RemoveIndexStatement {
|
|||
/// Process this type returning a computed simple Value
|
||||
pub(crate) async fn compute(
|
||||
&self,
|
||||
_ctx: &Context<'_>,
|
||||
ctx: &Context<'_>,
|
||||
opt: &Options,
|
||||
txn: &Transaction,
|
||||
) -> Result<Value, Error> {
|
||||
|
@ -27,6 +27,8 @@ impl RemoveIndexStatement {
|
|||
opt.is_allowed(Action::Edit, ResourceKind::Index, &Base::Db)?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Clear the index store cache
|
||||
ctx.get_index_stores().index_removed(opt, &mut run, &self.what, &self.name).await?;
|
||||
// Clear the cache
|
||||
run.clear_cache();
|
||||
// Delete the definition
|
||||
|
|
|
@ -18,7 +18,7 @@ impl RemoveNamespaceStatement {
|
|||
/// Process this type returning a computed simple Value
|
||||
pub(crate) async fn compute(
|
||||
&self,
|
||||
_ctx: &Context<'_>,
|
||||
ctx: &Context<'_>,
|
||||
opt: &Options,
|
||||
txn: &Transaction,
|
||||
) -> Result<Value, Error> {
|
||||
|
@ -26,6 +26,7 @@ impl RemoveNamespaceStatement {
|
|||
opt.is_allowed(Action::Edit, ResourceKind::Namespace, &Base::Root)?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
ctx.get_index_stores().namespace_removed(opt, &mut run).await?;
|
||||
// Clear the cache
|
||||
run.clear_cache();
|
||||
// Delete the definition
|
||||
|
|
|
@ -19,7 +19,7 @@ impl RemoveTableStatement {
|
|||
/// Process this type returning a computed simple Value
|
||||
pub(crate) async fn compute(
|
||||
&self,
|
||||
_ctx: &Context<'_>,
|
||||
ctx: &Context<'_>,
|
||||
opt: &Options,
|
||||
txn: &Transaction,
|
||||
) -> Result<Value, Error> {
|
||||
|
@ -27,6 +27,8 @@ impl RemoveTableStatement {
|
|||
opt.is_allowed(Action::Edit, ResourceKind::Table, &Base::Db)?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Remove the index stores
|
||||
ctx.get_index_stores().table_removed(opt, &mut run, &self.name).await?;
|
||||
// Clear the cache
|
||||
run.clear_cache();
|
||||
// Get the defined table
|
||||
|
|
|
@ -89,10 +89,14 @@ mod tests {
|
|||
k1: Default::default(),
|
||||
b: Default::default(),
|
||||
},
|
||||
doc_ids_order: Default::default(),
|
||||
doc_lengths_order: Default::default(),
|
||||
postings_order: Default::default(),
|
||||
terms_order: Default::default(),
|
||||
doc_ids_order: 1,
|
||||
doc_lengths_order: 2,
|
||||
postings_order: 3,
|
||||
terms_order: 4,
|
||||
doc_ids_cache: 5,
|
||||
doc_lengths_cache: 6,
|
||||
postings_cache: 7,
|
||||
terms_cache: 8,
|
||||
});
|
||||
let serialized = idx.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(idx, serialized);
|
||||
|
|
|
@ -51,6 +51,8 @@ pub(super) struct SerializeMTree {
|
|||
vector_type: VectorType,
|
||||
capacity: u16,
|
||||
doc_ids_order: u32,
|
||||
doc_ids_cache: u32,
|
||||
mtree_cache: u32,
|
||||
}
|
||||
impl serde::ser::SerializeStruct for SerializeMTree {
|
||||
type Ok = MTreeParams;
|
||||
|
@ -76,6 +78,12 @@ impl serde::ser::SerializeStruct for SerializeMTree {
|
|||
"doc_ids_order" => {
|
||||
self.doc_ids_order = value.serialize(ser::primitive::u32::Serializer.wrap())?;
|
||||
}
|
||||
"doc_ids_cache" => {
|
||||
self.doc_ids_cache = value.serialize(ser::primitive::u32::Serializer.wrap())?;
|
||||
}
|
||||
"mtree_cache" => {
|
||||
self.mtree_cache = value.serialize(ser::primitive::u32::Serializer.wrap())?;
|
||||
}
|
||||
key => {
|
||||
return Err(Error::custom(format!("unexpected field `MTreeParams {{ {key} }}`")));
|
||||
}
|
||||
|
@ -90,6 +98,8 @@ impl serde::ser::SerializeStruct for SerializeMTree {
|
|||
vector_type: self.vector_type,
|
||||
capacity: self.capacity,
|
||||
doc_ids_order: self.doc_ids_order,
|
||||
doc_ids_cache: self.doc_ids_cache,
|
||||
mtree_cache: self.mtree_cache,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -102,6 +112,8 @@ fn mtree_params() {
|
|||
vector_type: Default::default(),
|
||||
capacity: 2,
|
||||
doc_ids_order: 3,
|
||||
doc_ids_cache: 4,
|
||||
mtree_cache: 5,
|
||||
};
|
||||
let serialized = params.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(params, serialized);
|
||||
|
|
|
@ -54,6 +54,10 @@ pub(super) struct SerializeSearch {
|
|||
doc_lengths_order: u32,
|
||||
postings_order: u32,
|
||||
terms_order: u32,
|
||||
doc_ids_cache: u32,
|
||||
doc_lengths_cache: u32,
|
||||
postings_cache: u32,
|
||||
terms_cache: u32,
|
||||
}
|
||||
|
||||
impl serde::ser::SerializeStruct for SerializeSearch {
|
||||
|
@ -86,6 +90,18 @@ impl serde::ser::SerializeStruct for SerializeSearch {
|
|||
"terms_order" => {
|
||||
self.terms_order = value.serialize(ser::primitive::u32::Serializer.wrap())?;
|
||||
}
|
||||
"doc_ids_cache" => {
|
||||
self.doc_ids_cache = value.serialize(ser::primitive::u32::Serializer.wrap())?;
|
||||
}
|
||||
"doc_lengths_cache" => {
|
||||
self.doc_lengths_cache = value.serialize(ser::primitive::u32::Serializer.wrap())?;
|
||||
}
|
||||
"postings_cache" => {
|
||||
self.postings_cache = value.serialize(ser::primitive::u32::Serializer.wrap())?;
|
||||
}
|
||||
"terms_cache" => {
|
||||
self.terms_cache = value.serialize(ser::primitive::u32::Serializer.wrap())?;
|
||||
}
|
||||
key => {
|
||||
return Err(Error::custom(format!("unexpected field `SearchParams {{ {key} }}`")));
|
||||
}
|
||||
|
@ -103,6 +119,10 @@ impl serde::ser::SerializeStruct for SerializeSearch {
|
|||
doc_lengths_order: self.doc_lengths_order,
|
||||
postings_order: self.postings_order,
|
||||
terms_order: self.terms_order,
|
||||
doc_ids_cache: self.doc_ids_cache,
|
||||
doc_lengths_cache: self.doc_lengths_cache,
|
||||
postings_cache: self.postings_cache,
|
||||
terms_cache: self.terms_cache,
|
||||
}),
|
||||
_ => Err(Error::custom("`SearchParams` missing required field(s)")),
|
||||
}
|
||||
|
@ -115,10 +135,14 @@ fn search_params() {
|
|||
az: Default::default(),
|
||||
hl: false,
|
||||
sc: Scoring::Vs,
|
||||
doc_ids_order: 0,
|
||||
doc_lengths_order: 0,
|
||||
postings_order: 0,
|
||||
terms_order: 0,
|
||||
doc_ids_order: 1,
|
||||
doc_lengths_order: 2,
|
||||
postings_order: 3,
|
||||
terms_order: 4,
|
||||
doc_ids_cache: 5,
|
||||
doc_lengths_cache: 6,
|
||||
postings_cache: 7,
|
||||
terms_cache: 8,
|
||||
};
|
||||
let serialized = params.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(params, serialized);
|
||||
|
|
|
@ -7,6 +7,7 @@ use crate::sql::{
|
|||
index::{Distance, MTreeParams, SearchParams, VectorType},
|
||||
Ident, Index,
|
||||
};
|
||||
|
||||
use nom::{
|
||||
branch::alt,
|
||||
bytes::complete::{tag, tag_no_case},
|
||||
|
@ -43,18 +44,34 @@ pub fn doc_ids_order(i: &str) -> IResult<&str, u32> {
|
|||
order("DOC_IDS_ORDER", i)
|
||||
}
|
||||
|
||||
pub fn doc_ids_cache(i: &str) -> IResult<&str, u32> {
|
||||
order("DOC_IDS_CACHE", i)
|
||||
}
|
||||
|
||||
pub fn doc_lengths_order(i: &str) -> IResult<&str, u32> {
|
||||
order("DOC_LENGTHS_ORDER", i)
|
||||
}
|
||||
|
||||
pub fn doc_lengths_cache(i: &str) -> IResult<&str, u32> {
|
||||
order("DOC_LENGTHS_CACHE", i)
|
||||
}
|
||||
|
||||
pub fn postings_order(i: &str) -> IResult<&str, u32> {
|
||||
order("POSTINGS_ORDER", i)
|
||||
}
|
||||
|
||||
pub fn postings_cache(i: &str) -> IResult<&str, u32> {
|
||||
order("POSTINGS_CACHE", i)
|
||||
}
|
||||
|
||||
pub fn terms_order(i: &str) -> IResult<&str, u32> {
|
||||
order("TERMS_ORDER", i)
|
||||
}
|
||||
|
||||
pub fn terms_cache(i: &str) -> IResult<&str, u32> {
|
||||
order("TERMS_CACHE", i)
|
||||
}
|
||||
|
||||
pub fn highlights(i: &str) -> IResult<&str, bool> {
|
||||
let (i, _) = mightbespace(i)?;
|
||||
map(opt(tag("HIGHLIGHTS")), |x| x.is_some())(i)
|
||||
|
@ -71,6 +88,10 @@ pub fn search(i: &str) -> IResult<&str, Index> {
|
|||
let (i, o2) = opt(doc_lengths_order)(i)?;
|
||||
let (i, o3) = opt(postings_order)(i)?;
|
||||
let (i, o4) = opt(terms_order)(i)?;
|
||||
let (i, c1) = opt(doc_ids_cache)(i)?;
|
||||
let (i, c2) = opt(doc_lengths_cache)(i)?;
|
||||
let (i, c3) = opt(postings_cache)(i)?;
|
||||
let (i, c4) = opt(terms_cache)(i)?;
|
||||
let (i, hl) = highlights(i)?;
|
||||
Ok((
|
||||
i,
|
||||
|
@ -82,6 +103,10 @@ pub fn search(i: &str) -> IResult<&str, Index> {
|
|||
doc_lengths_order: o2.unwrap_or(100),
|
||||
postings_order: o3.unwrap_or(100),
|
||||
terms_order: o4.unwrap_or(100),
|
||||
doc_ids_cache: c1.unwrap_or(100),
|
||||
doc_lengths_cache: c2.unwrap_or(100),
|
||||
postings_cache: c3.unwrap_or(100),
|
||||
terms_cache: c4.unwrap_or(100),
|
||||
}),
|
||||
))
|
||||
})(i)
|
||||
|
@ -134,6 +159,10 @@ pub fn capacity(i: &str) -> IResult<&str, u16> {
|
|||
Ok((i, capacity))
|
||||
}
|
||||
|
||||
pub fn mtree_cache(i: &str) -> IResult<&str, u32> {
|
||||
order("MTREE_CACHE", i)
|
||||
}
|
||||
|
||||
pub fn mtree(i: &str) -> IResult<&str, Index> {
|
||||
let (i, _) = tag_no_case("MTREE")(i)?;
|
||||
let (i, _) = shouldbespace(i)?;
|
||||
|
@ -143,6 +172,8 @@ pub fn mtree(i: &str) -> IResult<&str, Index> {
|
|||
let (i, vector_type) = opt(vector_type)(i)?;
|
||||
let (i, capacity) = opt(capacity)(i)?;
|
||||
let (i, doc_ids_order) = opt(doc_ids_order)(i)?;
|
||||
let (i, doc_ids_cache) = opt(doc_ids_cache)(i)?;
|
||||
let (i, mtree_cache) = opt(mtree_cache)(i)?;
|
||||
Ok((
|
||||
i,
|
||||
Index::MTree(MTreeParams {
|
||||
|
@ -151,6 +182,8 @@ pub fn mtree(i: &str) -> IResult<&str, Index> {
|
|||
vector_type: vector_type.unwrap_or(VectorType::F64),
|
||||
capacity: capacity.unwrap_or(40),
|
||||
doc_ids_order: doc_ids_order.unwrap_or(100),
|
||||
doc_ids_cache: doc_ids_cache.unwrap_or(100),
|
||||
mtree_cache: mtree_cache.unwrap_or(100),
|
||||
}),
|
||||
))
|
||||
})(i)
|
||||
|
|
|
@ -138,7 +138,9 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn check_create_search_index_with_highlights() {
|
||||
let sql = "INDEX my_index ON TABLE my_table COLUMNS my_col SEARCH ANALYZER my_analyzer BM25(1.2,0.75) DOC_IDS_ORDER 1000 DOC_LENGTHS_ORDER 1000 POSTINGS_ORDER 1000 TERMS_ORDER 1000 HIGHLIGHTS";
|
||||
let sql = "INDEX my_index ON TABLE my_table COLUMNS my_col SEARCH ANALYZER my_analyzer BM25(1.2,0.75) \
|
||||
DOC_IDS_ORDER 1100 DOC_LENGTHS_ORDER 1200 POSTINGS_ORDER 1300 TERMS_ORDER 1400 \
|
||||
DOC_IDS_CACHE 100 DOC_LENGTHS_CACHE 200 POSTINGS_CACHE 300 TERMS_CACHE 400 HIGHLIGHTS";
|
||||
let (_, idx) = index(sql).unwrap();
|
||||
assert_eq!(
|
||||
idx,
|
||||
|
@ -153,15 +155,21 @@ mod tests {
|
|||
k1: 1.2,
|
||||
b: 0.75,
|
||||
},
|
||||
doc_ids_order: 1000,
|
||||
doc_lengths_order: 1000,
|
||||
postings_order: 1000,
|
||||
terms_order: 1000,
|
||||
doc_ids_order: 1100,
|
||||
doc_lengths_order: 1200,
|
||||
postings_order: 1300,
|
||||
terms_order: 1400,
|
||||
doc_ids_cache: 100,
|
||||
doc_lengths_cache: 200,
|
||||
postings_cache: 300,
|
||||
terms_cache: 400,
|
||||
}),
|
||||
comment: None,
|
||||
}
|
||||
);
|
||||
assert_eq!(idx.to_string(), "DEFINE INDEX my_index ON my_table FIELDS my_col SEARCH ANALYZER my_analyzer BM25(1.2,0.75) DOC_IDS_ORDER 1000 DOC_LENGTHS_ORDER 1000 POSTINGS_ORDER 1000 TERMS_ORDER 1000 HIGHLIGHTS");
|
||||
assert_eq!(idx.to_string(), "DEFINE INDEX my_index ON my_table FIELDS my_col SEARCH ANALYZER my_analyzer BM25(1.2,0.75) \
|
||||
DOC_IDS_ORDER 1100 DOC_LENGTHS_ORDER 1200 POSTINGS_ORDER 1300 TERMS_ORDER 1400 \
|
||||
DOC_IDS_CACHE 100 DOC_LENGTHS_CACHE 200 POSTINGS_CACHE 300 TERMS_CACHE 400 HIGHLIGHTS");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -182,13 +190,17 @@ mod tests {
|
|||
doc_lengths_order: 100,
|
||||
postings_order: 100,
|
||||
terms_order: 100,
|
||||
doc_ids_cache: 100,
|
||||
doc_lengths_cache: 100,
|
||||
postings_cache: 100,
|
||||
terms_cache: 100,
|
||||
}),
|
||||
comment: None,
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
idx.to_string(),
|
||||
"DEFINE INDEX my_index ON my_table FIELDS my_col SEARCH ANALYZER my_analyzer VS DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100"
|
||||
"DEFINE INDEX my_index ON my_table FIELDS my_col SEARCH ANALYZER my_analyzer VS DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100 DOC_IDS_CACHE 100 DOC_LENGTHS_CACHE 100 POSTINGS_CACHE 100 TERMS_CACHE 100"
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -208,13 +220,15 @@ mod tests {
|
|||
distance: Distance::Euclidean,
|
||||
capacity: 40,
|
||||
doc_ids_order: 100,
|
||||
doc_ids_cache: 100,
|
||||
mtree_cache: 100,
|
||||
}),
|
||||
comment: None,
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
idx.to_string(),
|
||||
"DEFINE INDEX my_index ON my_table FIELDS my_col MTREE DIMENSION 4 DIST EUCLIDEAN TYPE F64 CAPACITY 40 DOC_IDS_ORDER 100"
|
||||
"DEFINE INDEX my_index ON my_table FIELDS my_col MTREE DIMENSION 4 DIST EUCLIDEAN TYPE F64 CAPACITY 40 DOC_IDS_ORDER 100 DOC_IDS_CACHE 100 MTREE_CACHE 100"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1224,11 +1224,12 @@ async fn define_statement_search_index() -> Result<(), Error> {
|
|||
tables: {},
|
||||
indexes: { blog_title: 'DEFINE INDEX blog_title ON blog FIELDS title \
|
||||
SEARCH ANALYZER simple BM25(1.2,0.75) \
|
||||
DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100 HIGHLIGHTS' },
|
||||
DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100 \
|
||||
DOC_IDS_CACHE 100 DOC_LENGTHS_CACHE 100 POSTINGS_CACHE 100 TERMS_CACHE 100 HIGHLIGHTS' },
|
||||
lives: {},
|
||||
}",
|
||||
);
|
||||
assert_eq!(tmp, val);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
|
||||
let tmp = res.remove(0).result?;
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ async fn select_where_matches_using_index() -> Result<(), Error> {
|
|||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(tmp, val);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
"[
|
||||
|
@ -49,7 +49,7 @@ async fn select_where_matches_using_index() -> Result<(), Error> {
|
|||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(tmp, val);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -89,7 +89,7 @@ async fn select_where_matches_without_using_index_iterator() -> Result<(), Error
|
|||
},
|
||||
]",
|
||||
);
|
||||
assert_eq!(tmp, val);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
"[
|
||||
|
@ -99,7 +99,7 @@ async fn select_where_matches_without_using_index_iterator() -> Result<(), Error
|
|||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(tmp, val);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -143,7 +143,7 @@ async fn select_where_matches_using_index_and_arrays(parallel: bool) -> Result<(
|
|||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(tmp, val);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
|
@ -158,7 +158,7 @@ async fn select_where_matches_using_index_and_arrays(parallel: bool) -> Result<(
|
|||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(tmp, val);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -212,7 +212,7 @@ async fn select_where_matches_using_index_and_objects(parallel: bool) -> Result<
|
|||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(tmp, val);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
|
@ -274,7 +274,7 @@ async fn select_where_matches_using_index_offsets() -> Result<(), Error> {
|
|||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(tmp, val);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -306,7 +306,7 @@ async fn select_where_matches_using_index_and_score() -> Result<(), Error> {
|
|||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(tmp, val);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -345,11 +345,11 @@ async fn select_where_matches_without_using_index_and_score() -> Result<(), Erro
|
|||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(tmp, val);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
|
||||
// This result should be empty, as we are looking for non-existing terms (dummy1 and dummy2).
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse("[]");
|
||||
assert_eq!(tmp, val);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -123,6 +123,9 @@ async fn remove_statement_index() -> Result<(), Error> {
|
|||
for ix in ["uniq_isbn", "idx_author", "ft_title"] {
|
||||
assert_empty_prefix!(&mut tx, surrealdb::key::index::all::new("test", "test", "book", ix));
|
||||
}
|
||||
|
||||
// Every index store cache has been removed
|
||||
assert!(dbs.index_store().is_empty().await);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue