Feat: In Memory index store (#3020)

This commit is contained in:
Emmanuel Keller 2023-12-13 13:37:24 +00:00 committed by GitHub
parent 378df76cb0
commit a6c50cb5f5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
39 changed files with 3009 additions and 1623 deletions

23
Cargo.lock generated
View file

@ -3014,15 +3014,6 @@ version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "lru"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2994eeba8ed550fd9b47a0b38f0242bc3344e496483c6180b69139cc2fa5d1d7"
dependencies = [
"hashbrown 0.14.3",
]
[[package]] [[package]]
name = "lz4-sys" name = "lz4-sys"
version = "1.9.4" version = "1.9.4"
@ -3994,6 +3985,18 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "quick_cache"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f69f8d22fa3f34f3083d9a4375c038732c7a7e964de1beb81c544da92dfc40b8"
dependencies = [
"ahash 0.8.6",
"equivalent",
"hashbrown 0.14.3",
"parking_lot",
]
[[package]] [[package]]
name = "quote" name = "quote"
version = "1.0.33" version = "1.0.33"
@ -5311,7 +5314,6 @@ dependencies = [
"indxdb", "indxdb",
"ipnet", "ipnet",
"lexicmp", "lexicmp",
"lru",
"md-5", "md-5",
"nanoid", "nanoid",
"native-tls", "native-tls",
@ -5325,6 +5327,7 @@ dependencies = [
"pharos", "pharos",
"pin-project-lite", "pin-project-lite",
"pprof", "pprof",
"quick_cache",
"radix_trie", "radix_trie",
"rand 0.8.5", "rand 0.8.5",
"regex", "regex",

View file

@ -82,7 +82,6 @@ ipnet = "2.9.0"
js = { version = "=0.4.0-beta.4", package = "rquickjs", features = ["array-buffer", "bindgen", "classes", "futures", "loader", "macro", "parallel", "properties","rust-alloc"], optional = true } js = { version = "=0.4.0-beta.4", package = "rquickjs", features = ["array-buffer", "bindgen", "classes", "futures", "loader", "macro", "parallel", "properties","rust-alloc"], optional = true }
jsonwebtoken = { version = "8.3.0-surreal.1", package = "surrealdb-jsonwebtoken" } jsonwebtoken = { version = "8.3.0-surreal.1", package = "surrealdb-jsonwebtoken" }
lexicmp = "0.1.0" lexicmp = "0.1.0"
lru = "0.12.1"
md-5 = "0.10.6" md-5 = "0.10.6"
nanoid = "0.4.0" nanoid = "0.4.0"
native-tls = { version = "0.2.11", optional = true } native-tls = { version = "0.2.11", optional = true }
@ -94,6 +93,7 @@ once_cell = "1.18.0"
path-clean = "1.0.1" path-clean = "1.0.1"
pbkdf2 = { version = "0.12.2", features = ["simple"] } pbkdf2 = { version = "0.12.2", features = ["simple"] }
pin-project-lite = "0.2.13" pin-project-lite = "0.2.13"
quick_cache = "0.4.0"
radix_trie = { version = "0.2.1", features = ["serde"] } radix_trie = { version = "0.2.1", features = ["serde"] }
rand = "0.8.5" rand = "0.8.5"
regex = "1.10.2" regex = "1.10.2"

View file

@ -5,7 +5,8 @@ use std::fmt::Debug;
use std::time::Duration; use std::time::Duration;
use surrealdb::idx::trees::bkeys::{BKeys, FstKeys, TrieKeys}; use surrealdb::idx::trees::bkeys::{BKeys, FstKeys, TrieKeys};
use surrealdb::idx::trees::btree::{BState, BTree, Payload}; use surrealdb::idx::trees::btree::{BState, BTree, Payload};
use surrealdb::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType}; use surrealdb::idx::trees::store::cache::TreeCache;
use surrealdb::idx::trees::store::{TreeNodeProvider, TreeStore};
use surrealdb::kvs::{Datastore, Key, LockType::*, TransactionType::*}; use surrealdb::kvs::{Datastore, Key, LockType::*, TransactionType::*};
use tokio::runtime::Runtime; use tokio::runtime::Runtime;
macro_rules! get_key_value { macro_rules! get_key_value {
@ -24,12 +25,22 @@ fn bench_index_btree(c: &mut Criterion) {
group.bench_function("trees-insertion-fst", |b| { group.bench_function("trees-insertion-fst", |b| {
b.to_async(Runtime::new().unwrap()) b.to_async(Runtime::new().unwrap())
.iter(|| bench::<_, FstKeys>(samples_len, |i| get_key_value!(samples[i]))) .iter(|| bench::<_, FstKeys>(samples_len, 100, |i| get_key_value!(samples[i])))
}); });
group.bench_function("trees-insertion-trie", |b| { group.bench_function("trees-insertion-trie", |b| {
b.to_async(Runtime::new().unwrap()) b.to_async(Runtime::new().unwrap())
.iter(|| bench::<_, TrieKeys>(samples_len, |i| get_key_value!(samples[i]))) .iter(|| bench::<_, TrieKeys>(samples_len, 100, |i| get_key_value!(samples[i])))
});
group.bench_function("trees-insertion-fst-fullcache", |b| {
b.to_async(Runtime::new().unwrap())
.iter(|| bench::<_, FstKeys>(samples_len, 0, |i| get_key_value!(samples[i])))
});
group.bench_function("trees-insertion-trie-fullcache", |b| {
b.to_async(Runtime::new().unwrap())
.iter(|| bench::<_, TrieKeys>(samples_len, 0, |i| get_key_value!(samples[i])))
}); });
group.finish(); group.finish();
@ -47,23 +58,24 @@ fn setup() -> (usize, Vec<usize>) {
(samples_len, samples) (samples_len, samples)
} }
async fn bench<F, BK>(samples_size: usize, sample_provider: F) async fn bench<F, BK>(samples_size: usize, cache_size: usize, sample_provider: F)
where where
F: Fn(usize) -> (Key, Payload), F: Fn(usize) -> (Key, Payload),
BK: BKeys + Default + Debug, BK: BKeys + Clone + Default + Debug,
{ {
let ds = Datastore::new("memory").await.unwrap(); let ds = Datastore::new("memory").await.unwrap();
let mut tx = ds.transaction(Write, Optimistic).await.unwrap(); let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
let mut t = BTree::<BK>::new(BState::new(100)); let mut t = BTree::<BK>::new(BState::new(100));
let s = TreeNodeStore::new(TreeNodeProvider::Debug, TreeStoreType::Write, 20); let c = TreeCache::new(0, TreeNodeProvider::Debug, cache_size);
let mut s = s.lock().await; let mut s = TreeStore::new(TreeNodeProvider::Debug, c, Write).await;
for i in 0..samples_size { for i in 0..samples_size {
let (key, payload) = sample_provider(i); let (key, payload) = sample_provider(i);
// Insert the sample // Insert the sample
t.insert(&mut tx, &mut s, key.clone(), payload).await.unwrap(); t.insert(&mut tx, &mut s, key.clone(), payload).await.unwrap();
// Search for it // Search for it
black_box(t.search(&mut tx, &mut s, &key).await.unwrap()); black_box(t.search_mut(&mut tx, &mut s, &key).await.unwrap());
} }
s.finish(&mut tx).await.unwrap();
tx.commit().await.unwrap(); tx.commit().await.unwrap();
} }

View file

@ -7,7 +7,8 @@ use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use surrealdb::idx::docids::DocId; use surrealdb::idx::docids::DocId;
use surrealdb::idx::trees::mtree::{MState, MTree}; use surrealdb::idx::trees::mtree::{MState, MTree};
use surrealdb::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType}; use surrealdb::idx::trees::store::cache::TreeCache;
use surrealdb::idx::trees::store::{TreeNodeProvider, TreeStore};
use surrealdb::idx::trees::vector::Vector; use surrealdb::idx::trees::vector::Vector;
use surrealdb::kvs::Datastore; use surrealdb::kvs::Datastore;
use surrealdb::kvs::LockType::Optimistic; use surrealdb::kvs::LockType::Optimistic;
@ -16,19 +17,35 @@ use surrealdb::sql::index::Distance;
use tokio::runtime::Runtime; use tokio::runtime::Runtime;
fn bench_index_mtree_dim_3(c: &mut Criterion) { fn bench_index_mtree_dim_3(c: &mut Criterion) {
bench_index_mtree(c, 1_000, 100_000, 3, 120); bench_index_mtree(c, 1_000, 100_000, 3, 120, 100);
}
fn bench_index_mtree_dim_3_full_cache(c: &mut Criterion) {
bench_index_mtree(c, 1_000, 100_000, 3, 120, 0);
} }
fn bench_index_mtree_dim_50(c: &mut Criterion) { fn bench_index_mtree_dim_50(c: &mut Criterion) {
bench_index_mtree(c, 100, 10_000, 50, 20); bench_index_mtree(c, 100, 10_000, 50, 20, 100);
}
fn bench_index_mtree_dim_50_full_cache(c: &mut Criterion) {
bench_index_mtree(c, 100, 10_000, 50, 20, 0);
} }
fn bench_index_mtree_dim_300(c: &mut Criterion) { fn bench_index_mtree_dim_300(c: &mut Criterion) {
bench_index_mtree(c, 50, 5_000, 300, 40); bench_index_mtree(c, 50, 5_000, 300, 40, 100);
}
fn bench_index_mtree_dim_300_full_cache(c: &mut Criterion) {
bench_index_mtree(c, 50, 5_000, 300, 40, 0);
} }
fn bench_index_mtree_dim_2048(c: &mut Criterion) { fn bench_index_mtree_dim_2048(c: &mut Criterion) {
bench_index_mtree(c, 10, 1_000, 2048, 60); bench_index_mtree(c, 10, 1_000, 2048, 60, 100);
}
fn bench_index_mtree_dim_2048_full_cache(c: &mut Criterion) {
bench_index_mtree(c, 10, 1_000, 2048, 60, 0);
} }
fn bench_index_mtree( fn bench_index_mtree(
@ -37,6 +54,7 @@ fn bench_index_mtree(
release_samples_len: usize, release_samples_len: usize,
vector_dimension: usize, vector_dimension: usize,
measurement_secs: u64, measurement_secs: u64,
cache_size: usize,
) { ) {
let samples_len = if cfg!(debug_assertions) { let samples_len = if cfg!(debug_assertions) {
debug_samples_len // Debug is slow debug_samples_len // Debug is slow
@ -50,22 +68,26 @@ fn bench_index_mtree(
// Indexing benchmark group // Indexing benchmark group
{ {
let mut group = get_group(c, "index_mtree_insert", samples_len, measurement_secs); let mut group = get_group(c, "index_mtree_insert", samples_len, measurement_secs);
let id = format!("len_{}_dim_{}", samples_len, vector_dimension); let id = format!("len_{}_dim_{}_cache_{}", samples_len, vector_dimension, cache_size);
group.bench_function(id, |b| { group.bench_function(id, |b| {
b.to_async(Runtime::new().unwrap()) b.to_async(Runtime::new().unwrap())
.iter(|| insert_objects(&ds, samples_len, vector_dimension)); .iter(|| insert_objects(&ds, samples_len, vector_dimension, cache_size));
}); });
group.finish(); group.finish();
} }
// Knn lookup benchmark group // Knn lookup benchmark group
{ {
let mut group = get_group(c, "index_mtree_lookup", 100_000, 10); let mut group = get_group(c, "index_mtree_lookup", samples_len, 10);
for knn in [1, 10] { for knn in [1, 10] {
let id = format!("knn_{}_len_{}_dim_{}", knn, samples_len, vector_dimension); let id = format!(
"knn_{}_len_{}_dim_{}_cache_{}",
knn, samples_len, vector_dimension, cache_size
);
group.bench_function(id, |b| { group.bench_function(id, |b| {
b.to_async(Runtime::new().unwrap()) b.to_async(Runtime::new().unwrap()).iter(|| {
.iter(|| knn_lookup_objects(&ds, 100_000, vector_dimension, knn)); knn_lookup_objects(&ds, samples_len, vector_dimension, knn, cache_size)
});
}); });
} }
group.finish(); group.finish();
@ -96,26 +118,38 @@ fn mtree() -> MTree {
MTree::new(MState::new(40), Distance::Euclidean) MTree::new(MState::new(40), Distance::Euclidean)
} }
async fn insert_objects(ds: &Datastore, samples_size: usize, vector_size: usize) { async fn insert_objects(
ds: &Datastore,
samples_size: usize,
vector_size: usize,
cache_size: usize,
) {
let mut rng = thread_rng(); let mut rng = thread_rng();
let mut t = mtree(); let mut t = mtree();
let mut tx = ds.transaction(Write, Optimistic).await.unwrap(); let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
let s = TreeNodeStore::new(TreeNodeProvider::Debug, TreeStoreType::Write, 20); let c = TreeCache::new(0, TreeNodeProvider::Debug, cache_size);
let mut s = s.lock().await; let mut s = TreeStore::new(TreeNodeProvider::Debug, c.clone(), Write).await;
for i in 0..samples_size { for i in 0..samples_size {
let object = random_object(&mut rng, vector_size); let object = random_object(&mut rng, vector_size);
// Insert the sample // Insert the sample
t.insert(&mut tx, &mut s, object, i as DocId).await.unwrap(); t.insert(&mut tx, &mut s, object, i as DocId).await.unwrap();
} }
s.finish(&mut tx).await.unwrap();
tx.commit().await.unwrap(); tx.commit().await.unwrap();
} }
async fn knn_lookup_objects(ds: &Datastore, samples_size: usize, vector_size: usize, knn: usize) { async fn knn_lookup_objects(
ds: &Datastore,
samples_size: usize,
vector_size: usize,
knn: usize,
cache_size: usize,
) {
let mut rng = thread_rng(); let mut rng = thread_rng();
let t = mtree(); let t = mtree();
let mut tx = ds.transaction(Read, Optimistic).await.unwrap(); let mut tx = ds.transaction(Read, Optimistic).await.unwrap();
let s = TreeNodeStore::new(TreeNodeProvider::Debug, TreeStoreType::Read, 20); let c = TreeCache::new(0, TreeNodeProvider::Debug, cache_size);
let mut s = s.lock().await; let mut s = TreeStore::new(TreeNodeProvider::Debug, c, Read).await;
for _ in 0..samples_size { for _ in 0..samples_size {
let object = Arc::new(random_object(&mut rng, vector_size)); let object = Arc::new(random_object(&mut rng, vector_size));
// Insert the sample // Insert the sample
@ -127,8 +161,12 @@ async fn knn_lookup_objects(ds: &Datastore, samples_size: usize, vector_size: us
criterion_group!( criterion_group!(
benches, benches,
bench_index_mtree_dim_3, bench_index_mtree_dim_3,
bench_index_mtree_dim_3_full_cache,
bench_index_mtree_dim_50, bench_index_mtree_dim_50,
bench_index_mtree_dim_50_full_cache,
bench_index_mtree_dim_300, bench_index_mtree_dim_300,
bench_index_mtree_dim_300_full_cache,
bench_index_mtree_dim_2048, bench_index_mtree_dim_2048,
bench_index_mtree_dim_2048_full_cache
); );
criterion_main!(benches); criterion_main!(benches);

View file

@ -6,6 +6,7 @@ use crate::dbs::capabilities::NetTarget;
use crate::dbs::{Capabilities, Notification}; use crate::dbs::{Capabilities, Notification};
use crate::err::Error; use crate::err::Error;
use crate::idx::planner::QueryPlanner; use crate::idx::planner::QueryPlanner;
use crate::idx::trees::store::IndexStores;
use crate::sql::value::Value; use crate::sql::value::Value;
use channel::Sender; use channel::Sender;
use std::borrow::Cow; use std::borrow::Cow;
@ -43,6 +44,8 @@ pub struct Context<'a> {
notifications: Option<Sender<Notification>>, notifications: Option<Sender<Notification>>,
// An optional query planner // An optional query planner
query_planner: Option<&'a QueryPlanner<'a>>, query_planner: Option<&'a QueryPlanner<'a>>,
// The index store
index_stores: IndexStores,
// Capabilities // Capabilities
capabilities: Arc<Capabilities>, capabilities: Arc<Capabilities>,
} }
@ -65,9 +68,29 @@ impl<'a> Debug for Context<'a> {
} }
impl<'a> Context<'a> { impl<'a> Context<'a> {
pub(crate) fn from_ds(
time_out: Option<Duration>,
capabilities: Capabilities,
index_stores: IndexStores,
) -> Context<'a> {
let mut ctx = Self {
values: HashMap::default(),
parent: None,
deadline: None,
cancelled: Arc::new(AtomicBool::new(false)),
notifications: None,
query_planner: None,
capabilities: Arc::new(capabilities),
index_stores,
};
if let Some(timeout) = time_out {
ctx.add_timeout(timeout);
}
ctx
}
/// Create an empty background context. /// Create an empty background context.
pub fn background() -> Self { pub fn background() -> Self {
Context { Self {
values: HashMap::default(), values: HashMap::default(),
parent: None, parent: None,
deadline: None, deadline: None,
@ -75,6 +98,7 @@ impl<'a> Context<'a> {
notifications: None, notifications: None,
query_planner: None, query_planner: None,
capabilities: Arc::new(Capabilities::default()), capabilities: Arc::new(Capabilities::default()),
index_stores: IndexStores::default(),
} }
} }
@ -88,6 +112,7 @@ impl<'a> Context<'a> {
notifications: parent.notifications.clone(), notifications: parent.notifications.clone(),
query_planner: parent.query_planner, query_planner: parent.query_planner,
capabilities: parent.capabilities.clone(), capabilities: parent.capabilities.clone(),
index_stores: parent.index_stores.clone(),
} }
} }
@ -148,6 +173,11 @@ impl<'a> Context<'a> {
self.query_planner self.query_planner
} }
/// Get the index_store for this context/ds
pub(crate) fn get_index_stores(&self) -> &IndexStores {
&self.index_stores
}
/// Check if the context is done. If it returns `None` the operation may /// Check if the context is done. If it returns `None` the operation may
/// proceed, otherwise the operation should be stopped. /// proceed, otherwise the operation should be stopped.
pub fn done(&self) -> Option<Reason> { pub fn done(&self) -> Option<Reason> {

View file

@ -409,7 +409,7 @@ impl Options {
/// Get current Node ID /// Get current Node ID
pub fn id(&self) -> Result<Uuid, Error> { pub fn id(&self) -> Result<Uuid, Error> {
self.id.ok_or(Error::Unreachable) self.id.ok_or(Error::Unreachable("Options::id"))
} }
/// Get currently selected NS /// Get currently selected NS

View file

@ -5,9 +5,9 @@ use crate::doc::{CursorDoc, Document};
use crate::err::Error; use crate::err::Error;
use crate::idx::ft::FtIndex; use crate::idx::ft::FtIndex;
use crate::idx::trees::mtree::MTreeIndex; use crate::idx::trees::mtree::MTreeIndex;
use crate::idx::trees::store::TreeStoreType;
use crate::idx::IndexKeyBase; use crate::idx::IndexKeyBase;
use crate::key; use crate::key;
use crate::kvs::TransactionType;
use crate::sql::array::Array; use crate::sql::array::Array;
use crate::sql::index::{Index, MTreeParams, SearchParams}; use crate::sql::index::{Index, MTreeParams, SearchParams};
use crate::sql::statements::DefineIndexStatement; use crate::sql::statements::DefineIndexStatement;
@ -53,7 +53,7 @@ impl<'a> Document<'a> {
Index::Uniq => ic.index_unique(txn).await?, Index::Uniq => ic.index_unique(txn).await?,
Index::Idx => ic.index_non_unique(txn).await?, Index::Idx => ic.index_non_unique(txn).await?,
Index::Search(p) => ic.index_full_text(ctx, txn, p).await?, Index::Search(p) => ic.index_full_text(ctx, txn, p).await?,
Index::MTree(p) => ic.index_mtree(txn, p).await?, Index::MTree(p) => ic.index_mtree(ctx, txn, p).await?,
}; };
} }
} }
@ -335,7 +335,16 @@ impl<'a> IndexOperation<'a> {
) -> Result<(), Error> { ) -> Result<(), Error> {
let ikb = IndexKeyBase::new(self.opt, self.ix); let ikb = IndexKeyBase::new(self.opt, self.ix);
let mut ft = FtIndex::new(self.opt, txn, &p.az, ikb, p, TreeStoreType::Write).await?; let mut ft = FtIndex::new(
ctx.get_index_stores(),
self.opt,
txn,
&p.az,
ikb,
p,
TransactionType::Write,
)
.await?;
if let Some(n) = self.n.take() { if let Some(n) = self.n.take() {
ft.index_document(ctx, self.opt, txn, self.rid, n).await?; ft.index_document(ctx, self.opt, txn, self.rid, n).await?;
@ -345,10 +354,17 @@ impl<'a> IndexOperation<'a> {
ft.finish(txn).await ft.finish(txn).await
} }
async fn index_mtree(&mut self, txn: &Transaction, p: &MTreeParams) -> Result<(), Error> { async fn index_mtree(
&mut self,
ctx: &Context<'_>,
txn: &Transaction,
p: &MTreeParams,
) -> Result<(), Error> {
let mut tx = txn.lock().await; let mut tx = txn.lock().await;
let ikb = IndexKeyBase::new(self.opt, self.ix); let ikb = IndexKeyBase::new(self.opt, self.ix);
let mut mt = MTreeIndex::new(&mut tx, ikb, p, TreeStoreType::Write).await?; let mut mt =
MTreeIndex::new(ctx.get_index_stores(), &mut tx, ikb, p, TransactionType::Write)
.await?;
// Delete the old index data // Delete the old index data
if let Some(o) = self.o.take() { if let Some(o) = self.o.take() {
mt.remove_document(&mut tx, self.rid, o).await?; mt.remove_document(&mut tx, self.rid, o).await?;

View file

@ -46,8 +46,8 @@ pub enum Error {
RetryWithId(Thing), RetryWithId(Thing),
/// The database encountered unreachable logic /// The database encountered unreachable logic
#[error("The database encountered unreachable logic")] #[error("The database encountered unreachable logic: {0}")]
Unreachable, Unreachable(&'static str),
/// Statement has been deprecated /// Statement has been deprecated
#[error("{0}")] #[error("{0}")]
@ -619,8 +619,8 @@ pub enum Error {
Revision(#[from] RevisionError), Revision(#[from] RevisionError),
/// The index has been found to be inconsistent /// The index has been found to be inconsistent
#[error("Index is corrupted")] #[error("Index is corrupted: {0}")]
CorruptedIndex, CorruptedIndex(&'static str),
/// The query planner did not find an index able to support the match @@ or knn <> operator for a given expression /// The query planner did not find an index able to support the match @@ or knn <> operator for a given expression
#[error("There was no suitable index supporting the expression '{value}'")] #[error("There was no suitable index supporting the expression '{value}'")]

View file

@ -1,14 +1,12 @@
use crate::err::Error; use crate::err::Error;
use crate::idx::trees::bkeys::TrieKeys; use crate::idx::trees::bkeys::TrieKeys;
use crate::idx::trees::btree::{BStatistics, BTree, BTreeNodeStore}; use crate::idx::trees::btree::{BStatistics, BTree, BTreeStore};
use crate::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType}; use crate::idx::trees::store::{IndexStores, TreeNodeProvider};
use crate::idx::{trees, IndexKeyBase, VersionedSerdeState}; use crate::idx::{trees, IndexKeyBase, VersionedSerdeState};
use crate::kvs::{Key, Transaction}; use crate::kvs::{Key, Transaction, TransactionType};
use revision::revisioned; use revision::revisioned;
use roaring::RoaringTreemap; use roaring::RoaringTreemap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tokio::sync::Mutex;
pub type DocId = u64; pub type DocId = u64;
@ -18,35 +16,41 @@ pub(crate) struct DocIds {
state_key: Key, state_key: Key,
index_key_base: IndexKeyBase, index_key_base: IndexKeyBase,
btree: BTree<TrieKeys>, btree: BTree<TrieKeys>,
store: Arc<Mutex<BTreeNodeStore<TrieKeys>>>, store: BTreeStore<TrieKeys>,
available_ids: Option<RoaringTreemap>, available_ids: Option<RoaringTreemap>,
next_doc_id: DocId, next_doc_id: DocId,
updated: bool,
} }
impl DocIds { impl DocIds {
pub(in crate::idx) async fn new( pub(in crate::idx) async fn new(
ixs: &IndexStores,
tx: &mut Transaction, tx: &mut Transaction,
index_key_base: IndexKeyBase, tt: TransactionType,
ikb: IndexKeyBase,
default_btree_order: u32, default_btree_order: u32,
store_type: TreeStoreType, cache_size: u32,
) -> Result<Self, Error> { ) -> Result<Self, Error> {
let state_key: Key = index_key_base.new_bd_key(None); let state_key: Key = ikb.new_bd_key(None);
let state: State = if let Some(val) = tx.get(state_key.clone()).await? { let state: State = if let Some(val) = tx.get(state_key.clone()).await? {
State::try_from_val(val)? State::try_from_val(val)?
} else { } else {
State::new(default_btree_order) State::new(default_btree_order)
}; };
let store = let store = ixs
TreeNodeStore::new(TreeNodeProvider::DocIds(index_key_base.clone()), store_type, 20); .get_store_btree_trie(
TreeNodeProvider::DocIds(ikb.clone()),
state.btree.generation(),
tt,
cache_size as usize,
)
.await;
Ok(Self { Ok(Self {
state_key, state_key,
index_key_base, index_key_base: ikb,
btree: BTree::new(state.btree), btree: BTree::new(state.btree),
store, store,
available_ids: state.available_ids, available_ids: state.available_ids,
next_doc_id: state.next_doc_id, next_doc_id: state.next_doc_id,
updated: false,
}) })
} }
@ -72,8 +76,7 @@ impl DocIds {
tx: &mut Transaction, tx: &mut Transaction,
doc_key: Key, doc_key: Key,
) -> Result<Option<DocId>, Error> { ) -> Result<Option<DocId>, Error> {
let mut store = self.store.lock().await; self.btree.search(tx, &self.store, &doc_key).await
self.btree.search(tx, &mut store, &doc_key).await
} }
/// Returns the doc_id for the given doc_key. /// Returns the doc_id for the given doc_key.
@ -84,16 +87,13 @@ impl DocIds {
doc_key: Key, doc_key: Key,
) -> Result<Resolved, Error> { ) -> Result<Resolved, Error> {
{ {
let mut store = self.store.lock().await; if let Some(doc_id) = self.btree.search_mut(tx, &mut self.store, &doc_key).await? {
if let Some(doc_id) = self.btree.search(tx, &mut store, &doc_key).await? {
return Ok(Resolved::Existing(doc_id)); return Ok(Resolved::Existing(doc_id));
} }
} }
let doc_id = self.get_next_doc_id(); let doc_id = self.get_next_doc_id();
tx.set(self.index_key_base.new_bi_key(doc_id), doc_key.clone()).await?; tx.set(self.index_key_base.new_bi_key(doc_id), doc_key.clone()).await?;
let mut store = self.store.lock().await; self.btree.insert(tx, &mut self.store, doc_key, doc_id).await?;
self.btree.insert(tx, &mut store, doc_key, doc_id).await?;
self.updated = true;
Ok(Resolved::New(doc_id)) Ok(Resolved::New(doc_id))
} }
@ -102,8 +102,7 @@ impl DocIds {
tx: &mut Transaction, tx: &mut Transaction,
doc_key: Key, doc_key: Key,
) -> Result<Option<DocId>, Error> { ) -> Result<Option<DocId>, Error> {
let mut store = self.store.lock().await; if let Some(doc_id) = self.btree.delete(tx, &mut self.store, doc_key).await? {
if let Some(doc_id) = self.btree.delete(tx, &mut store, doc_key).await? {
tx.del(self.index_key_base.new_bi_key(doc_id)).await?; tx.del(self.index_key_base.new_bi_key(doc_id)).await?;
if let Some(available_ids) = &mut self.available_ids { if let Some(available_ids) = &mut self.available_ids {
available_ids.insert(doc_id); available_ids.insert(doc_id);
@ -112,7 +111,6 @@ impl DocIds {
available_ids.insert(doc_id); available_ids.insert(doc_id);
self.available_ids = Some(available_ids); self.available_ids = Some(available_ids);
} }
self.updated = true;
Ok(Some(doc_id)) Ok(Some(doc_id))
} else { } else {
Ok(None) Ok(None)
@ -136,15 +134,14 @@ impl DocIds {
&self, &self,
tx: &mut Transaction, tx: &mut Transaction,
) -> Result<BStatistics, Error> { ) -> Result<BStatistics, Error> {
let mut store = self.store.lock().await; self.btree.statistics(tx, &self.store).await
self.btree.statistics(tx, &mut store).await
} }
pub(in crate::idx) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> { pub(in crate::idx) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
let updated = self.store.lock().await.finish(tx).await?; if self.store.finish(tx).await? {
if self.updated || updated { let btree = self.btree.inc_generation().clone();
let state = State { let state = State {
btree: self.btree.get_state().clone(), btree,
available_ids: self.available_ids.take(), available_ids: self.available_ids.take(),
next_doc_id: self.next_doc_id, next_doc_id: self.next_doc_id,
}; };
@ -199,16 +196,18 @@ impl Resolved {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::idx::docids::{DocIds, Resolved}; use crate::idx::docids::{DocIds, Resolved};
use crate::idx::trees::store::TreeStoreType;
use crate::idx::IndexKeyBase; use crate::idx::IndexKeyBase;
use crate::kvs::{Datastore, LockType::*, Transaction, TransactionType::*}; use crate::kvs::TransactionType::*;
use crate::kvs::{Datastore, LockType::*, Transaction, TransactionType};
const BTREE_ORDER: u32 = 7; const BTREE_ORDER: u32 = 7;
async fn get_doc_ids(ds: &Datastore, store_type: TreeStoreType) -> (Transaction, DocIds) { async fn new_operation(ds: &Datastore, tt: TransactionType) -> (Transaction, DocIds) {
let mut tx = ds.transaction(Write, Optimistic).await.unwrap(); let mut tx = ds.transaction(tt, Optimistic).await.unwrap();
let d = let d =
DocIds::new(&mut tx, IndexKeyBase::default(), BTREE_ORDER, store_type).await.unwrap(); DocIds::new(ds.index_store(), &mut tx, tt, IndexKeyBase::default(), BTREE_ORDER, 100)
.await
.unwrap();
(tx, d) (tx, d)
} }
@ -223,37 +222,43 @@ mod tests {
// Resolve a first doc key // Resolve a first doc key
{ {
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await; let (mut tx, mut d) = new_operation(&ds, Write).await;
let doc_id = d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap(); let doc_id = d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap();
finish(tx, d).await;
let (mut tx, d) = new_operation(&ds, Read).await;
assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 1); assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 1);
assert_eq!(d.get_doc_key(&mut tx, 0).await.unwrap(), Some("Foo".into())); assert_eq!(d.get_doc_key(&mut tx, 0).await.unwrap(), Some("Foo".into()));
finish(tx, d).await;
assert_eq!(doc_id, Resolved::New(0)); assert_eq!(doc_id, Resolved::New(0));
} }
// Resolve the same doc key // Resolve the same doc key
{ {
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await; let (mut tx, mut d) = new_operation(&ds, Write).await;
let doc_id = d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap(); let doc_id = d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap();
finish(tx, d).await;
let (mut tx, d) = new_operation(&ds, Read).await;
assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 1); assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 1);
assert_eq!(d.get_doc_key(&mut tx, 0).await.unwrap(), Some("Foo".into())); assert_eq!(d.get_doc_key(&mut tx, 0).await.unwrap(), Some("Foo".into()));
finish(tx, d).await;
assert_eq!(doc_id, Resolved::Existing(0)); assert_eq!(doc_id, Resolved::Existing(0));
} }
// Resolve another single doc key // Resolve another single doc key
{ {
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await; let (mut tx, mut d) = new_operation(&ds, Write).await;
let doc_id = d.resolve_doc_id(&mut tx, "Bar".into()).await.unwrap(); let doc_id = d.resolve_doc_id(&mut tx, "Bar".into()).await.unwrap();
finish(tx, d).await;
let (mut tx, d) = new_operation(&ds, Read).await;
assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 2); assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 2);
assert_eq!(d.get_doc_key(&mut tx, 1).await.unwrap(), Some("Bar".into())); assert_eq!(d.get_doc_key(&mut tx, 1).await.unwrap(), Some("Bar".into()));
finish(tx, d).await;
assert_eq!(doc_id, Resolved::New(1)); assert_eq!(doc_id, Resolved::New(1));
} }
// Resolve another two existing doc keys and two new doc keys (interlaced) // Resolve another two existing doc keys and two new doc keys (interlaced)
{ {
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await; let (mut tx, mut d) = new_operation(&ds, Write).await;
assert_eq!( assert_eq!(
d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap(), d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap(),
Resolved::Existing(0) Resolved::Existing(0)
@ -264,12 +269,13 @@ mod tests {
Resolved::Existing(1) Resolved::Existing(1)
); );
assert_eq!(d.resolve_doc_id(&mut tx, "World".into()).await.unwrap(), Resolved::New(3)); assert_eq!(d.resolve_doc_id(&mut tx, "World".into()).await.unwrap(), Resolved::New(3));
assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 4);
finish(tx, d).await; finish(tx, d).await;
let (mut tx, d) = new_operation(&ds, Read).await;
assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 4);
} }
{ {
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await; let (mut tx, mut d) = new_operation(&ds, Write).await;
assert_eq!( assert_eq!(
d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap(), d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap(),
Resolved::Existing(0) Resolved::Existing(0)
@ -286,12 +292,13 @@ mod tests {
d.resolve_doc_id(&mut tx, "World".into()).await.unwrap(), d.resolve_doc_id(&mut tx, "World".into()).await.unwrap(),
Resolved::Existing(3) Resolved::Existing(3)
); );
finish(tx, d).await;
let (mut tx, d) = new_operation(&ds, Read).await;
assert_eq!(d.get_doc_key(&mut tx, 0).await.unwrap(), Some("Foo".into())); assert_eq!(d.get_doc_key(&mut tx, 0).await.unwrap(), Some("Foo".into()));
assert_eq!(d.get_doc_key(&mut tx, 1).await.unwrap(), Some("Bar".into())); assert_eq!(d.get_doc_key(&mut tx, 1).await.unwrap(), Some("Bar".into()));
assert_eq!(d.get_doc_key(&mut tx, 2).await.unwrap(), Some("Hello".into())); assert_eq!(d.get_doc_key(&mut tx, 2).await.unwrap(), Some("Hello".into()));
assert_eq!(d.get_doc_key(&mut tx, 3).await.unwrap(), Some("World".into())); assert_eq!(d.get_doc_key(&mut tx, 3).await.unwrap(), Some("World".into()));
assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 4); assert_eq!(d.statistics(&mut tx).await.unwrap().keys_count, 4);
finish(tx, d).await;
} }
} }
@ -301,7 +308,7 @@ mod tests {
// Create two docs // Create two docs
{ {
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await; let (mut tx, mut d) = new_operation(&ds, Write).await;
assert_eq!(d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap(), Resolved::New(0)); assert_eq!(d.resolve_doc_id(&mut tx, "Foo".into()).await.unwrap(), Resolved::New(0));
assert_eq!(d.resolve_doc_id(&mut tx, "Bar".into()).await.unwrap(), Resolved::New(1)); assert_eq!(d.resolve_doc_id(&mut tx, "Bar".into()).await.unwrap(), Resolved::New(1));
finish(tx, d).await; finish(tx, d).await;
@ -309,7 +316,7 @@ mod tests {
// Remove doc 1 // Remove doc 1
{ {
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await; let (mut tx, mut d) = new_operation(&ds, Write).await;
assert_eq!(d.remove_doc(&mut tx, "Dummy".into()).await.unwrap(), None); assert_eq!(d.remove_doc(&mut tx, "Dummy".into()).await.unwrap(), None);
assert_eq!(d.remove_doc(&mut tx, "Foo".into()).await.unwrap(), Some(0)); assert_eq!(d.remove_doc(&mut tx, "Foo".into()).await.unwrap(), Some(0));
finish(tx, d).await; finish(tx, d).await;
@ -317,21 +324,21 @@ mod tests {
// Check 'Foo' has been removed // Check 'Foo' has been removed
{ {
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await; let (mut tx, mut d) = new_operation(&ds, Write).await;
assert_eq!(d.remove_doc(&mut tx, "Foo".into()).await.unwrap(), None); assert_eq!(d.remove_doc(&mut tx, "Foo".into()).await.unwrap(), None);
finish(tx, d).await; finish(tx, d).await;
} }
// Insert a new doc - should take the available id 1 // Insert a new doc - should take the available id 1
{ {
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await; let (mut tx, mut d) = new_operation(&ds, Write).await;
assert_eq!(d.resolve_doc_id(&mut tx, "Hello".into()).await.unwrap(), Resolved::New(0)); assert_eq!(d.resolve_doc_id(&mut tx, "Hello".into()).await.unwrap(), Resolved::New(0));
finish(tx, d).await; finish(tx, d).await;
} }
// Remove doc 2 // Remove doc 2
{ {
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await; let (mut tx, mut d) = new_operation(&ds, Write).await;
assert_eq!(d.remove_doc(&mut tx, "Dummy".into()).await.unwrap(), None); assert_eq!(d.remove_doc(&mut tx, "Dummy".into()).await.unwrap(), None);
assert_eq!(d.remove_doc(&mut tx, "Bar".into()).await.unwrap(), Some(1)); assert_eq!(d.remove_doc(&mut tx, "Bar".into()).await.unwrap(), Some(1));
finish(tx, d).await; finish(tx, d).await;
@ -339,14 +346,14 @@ mod tests {
// Check 'Bar' has been removed // Check 'Bar' has been removed
{ {
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await; let (mut tx, mut d) = new_operation(&ds, Write).await;
assert_eq!(d.remove_doc(&mut tx, "Foo".into()).await.unwrap(), None); assert_eq!(d.remove_doc(&mut tx, "Foo".into()).await.unwrap(), None);
finish(tx, d).await; finish(tx, d).await;
} }
// Insert a new doc - should take the available id 2 // Insert a new doc - should take the available id 2
{ {
let (mut tx, mut d) = get_doc_ids(&ds, TreeStoreType::Write).await; let (mut tx, mut d) = new_operation(&ds, Write).await;
assert_eq!(d.resolve_doc_id(&mut tx, "World".into()).await.unwrap(), Resolved::New(1)); assert_eq!(d.resolve_doc_id(&mut tx, "World".into()).await.unwrap(), Resolved::New(1));
finish(tx, d).await; finish(tx, d).await;
} }

View file

@ -1,36 +1,42 @@
use crate::err::Error; use crate::err::Error;
use crate::idx::docids::DocId; use crate::idx::docids::DocId;
use crate::idx::trees::bkeys::TrieKeys; use crate::idx::trees::bkeys::TrieKeys;
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeNodeStore, Payload}; use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeStore, Payload};
use crate::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType}; use crate::idx::trees::store::{IndexStores, TreeNodeProvider};
use crate::idx::{IndexKeyBase, VersionedSerdeState}; use crate::idx::{IndexKeyBase, VersionedSerdeState};
use crate::kvs::{Key, Transaction}; use crate::kvs::{Key, Transaction, TransactionType};
use std::sync::Arc;
use tokio::sync::Mutex;
pub(super) type DocLength = u64; pub(super) type DocLength = u64;
pub(super) struct DocLengths { pub(super) struct DocLengths {
state_key: Key, state_key: Key,
btree: BTree<TrieKeys>, btree: BTree<TrieKeys>,
store: Arc<Mutex<BTreeNodeStore<TrieKeys>>>, store: BTreeStore<TrieKeys>,
} }
impl DocLengths { impl DocLengths {
pub(super) async fn new( pub(super) async fn new(
ixs: &IndexStores,
tx: &mut Transaction, tx: &mut Transaction,
index_key_base: IndexKeyBase, ikb: IndexKeyBase,
default_btree_order: u32, default_btree_order: u32,
store_type: TreeStoreType, tt: TransactionType,
cache_size: u32,
) -> Result<Self, Error> { ) -> Result<Self, Error> {
let state_key: Key = index_key_base.new_bl_key(None); let state_key: Key = ikb.new_bl_key(None);
let state: BState = if let Some(val) = tx.get(state_key.clone()).await? { let state: BState = if let Some(val) = tx.get(state_key.clone()).await? {
BState::try_from_val(val)? BState::try_from_val(val)?
} else { } else {
BState::new(default_btree_order) BState::new(default_btree_order)
}; };
let store = let store = ixs
TreeNodeStore::new(TreeNodeProvider::DocLengths(index_key_base), store_type, 20); .get_store_btree_trie(
TreeNodeProvider::DocLengths(ikb),
state.generation(),
tt,
cache_size as usize,
)
.await;
Ok(Self { Ok(Self {
state_key, state_key,
btree: BTree::new(state), btree: BTree::new(state),
@ -43,8 +49,15 @@ impl DocLengths {
tx: &mut Transaction, tx: &mut Transaction,
doc_id: DocId, doc_id: DocId,
) -> Result<Option<DocLength>, Error> { ) -> Result<Option<DocLength>, Error> {
let mut store = self.store.lock().await; self.btree.search(tx, &self.store, &doc_id.to_be_bytes().to_vec()).await
self.btree.search(tx, &mut store, &doc_id.to_be_bytes().to_vec()).await }
pub(super) async fn get_doc_length_mut(
&mut self,
tx: &mut Transaction,
doc_id: DocId,
) -> Result<Option<DocLength>, Error> {
self.btree.search_mut(tx, &mut self.store, &doc_id.to_be_bytes().to_vec()).await
} }
pub(super) async fn set_doc_length( pub(super) async fn set_doc_length(
@ -53,8 +66,8 @@ impl DocLengths {
doc_id: DocId, doc_id: DocId,
doc_length: DocLength, doc_length: DocLength,
) -> Result<(), Error> { ) -> Result<(), Error> {
let mut store = self.store.lock().await; self.btree.insert(tx, &mut self.store, doc_id.to_be_bytes().to_vec(), doc_length).await?;
self.btree.insert(tx, &mut store, doc_id.to_be_bytes().to_vec(), doc_length).await Ok(())
} }
pub(super) async fn remove_doc_length( pub(super) async fn remove_doc_length(
@ -62,18 +75,18 @@ impl DocLengths {
tx: &mut Transaction, tx: &mut Transaction,
doc_id: DocId, doc_id: DocId,
) -> Result<Option<Payload>, Error> { ) -> Result<Option<Payload>, Error> {
let mut store = self.store.lock().await; self.btree.delete(tx, &mut self.store, doc_id.to_be_bytes().to_vec()).await
self.btree.delete(tx, &mut store, doc_id.to_be_bytes().to_vec()).await
} }
pub(super) async fn statistics(&self, tx: &mut Transaction) -> Result<BStatistics, Error> { pub(super) async fn statistics(&self, tx: &mut Transaction) -> Result<BStatistics, Error> {
let mut store = self.store.lock().await; self.btree.statistics(tx, &self.store).await
self.btree.statistics(tx, &mut store).await
} }
pub(super) async fn finish(&self, tx: &mut Transaction) -> Result<(), Error> { pub(super) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
self.store.lock().await.finish(tx).await?; if self.store.finish(tx).await? {
self.btree.get_state().finish(tx, &self.state_key).await?; let state = self.btree.inc_generation();
tx.set(self.state_key.clone(), state.try_to_val()?).await?;
}
Ok(()) Ok(())
} }
} }
@ -81,9 +94,26 @@ impl DocLengths {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::idx::ft::doclength::DocLengths; use crate::idx::ft::doclength::DocLengths;
use crate::idx::trees::store::TreeStoreType;
use crate::idx::IndexKeyBase; use crate::idx::IndexKeyBase;
use crate::kvs::{Datastore, LockType::*, TransactionType::*}; use crate::kvs::{Datastore, LockType::*, Transaction, TransactionType};
async fn doc_length(
ds: &Datastore,
order: u32,
tt: TransactionType,
) -> (Transaction, DocLengths) {
let mut tx = ds.transaction(TransactionType::Write, Optimistic).await.unwrap();
let dl =
DocLengths::new(ds.index_store(), &mut tx, IndexKeyBase::default(), order, tt, 100)
.await
.unwrap();
(tx, dl)
}
async fn finish(mut l: DocLengths, mut tx: Transaction) {
l.finish(&mut tx).await.unwrap();
tx.commit().await.unwrap()
}
#[tokio::test] #[tokio::test]
async fn test_doc_lengths() { async fn test_doc_lengths() {
@ -91,49 +121,58 @@ mod tests {
let ds = Datastore::new("memory").await.unwrap(); let ds = Datastore::new("memory").await.unwrap();
{
// Check empty state // Check empty state
let mut tx = ds.transaction(Write, Optimistic).await.unwrap(); let (mut tx, l) = doc_length(&ds, BTREE_ORDER, TransactionType::Read).await;
let l = DocLengths::new(
&mut tx,
IndexKeyBase::default(),
BTREE_ORDER,
TreeStoreType::Traversal,
)
.await
.unwrap();
assert_eq!(l.statistics(&mut tx).await.unwrap().keys_count, 0); assert_eq!(l.statistics(&mut tx).await.unwrap().keys_count, 0);
let dl = l.get_doc_length(&mut tx, 99).await.unwrap(); let dl = l.get_doc_length(&mut tx, 99).await.unwrap();
assert_eq!(dl, None); assert_eq!(dl, None);
tx.cancel().await.unwrap();
}
{
// Set a doc length // Set a doc length
let mut l = let (mut tx, mut l) = doc_length(&ds, BTREE_ORDER, TransactionType::Write).await;
DocLengths::new(&mut tx, IndexKeyBase::default(), BTREE_ORDER, TreeStoreType::Write)
.await
.unwrap();
l.set_doc_length(&mut tx, 99, 199).await.unwrap(); l.set_doc_length(&mut tx, 99, 199).await.unwrap();
finish(l, tx).await;
}
{
let (mut tx, l) = doc_length(&ds, BTREE_ORDER, TransactionType::Read).await;
assert_eq!(l.statistics(&mut tx).await.unwrap().keys_count, 1); assert_eq!(l.statistics(&mut tx).await.unwrap().keys_count, 1);
let dl = l.get_doc_length(&mut tx, 99).await.unwrap(); let dl = l.get_doc_length(&mut tx, 99).await.unwrap();
l.finish(&mut tx).await.unwrap();
assert_eq!(dl, Some(199)); assert_eq!(dl, Some(199));
tx.cancel().await.unwrap();
}
{
// Update doc length // Update doc length
let mut l = let (mut tx, mut l) = doc_length(&ds, BTREE_ORDER, TransactionType::Write).await;
DocLengths::new(&mut tx, IndexKeyBase::default(), BTREE_ORDER, TreeStoreType::Write)
.await
.unwrap();
l.set_doc_length(&mut tx, 99, 299).await.unwrap(); l.set_doc_length(&mut tx, 99, 299).await.unwrap();
finish(l, tx).await;
}
{
let (mut tx, l) = doc_length(&ds, BTREE_ORDER, TransactionType::Read).await;
assert_eq!(l.statistics(&mut tx).await.unwrap().keys_count, 1); assert_eq!(l.statistics(&mut tx).await.unwrap().keys_count, 1);
let dl = l.get_doc_length(&mut tx, 99).await.unwrap(); let dl = l.get_doc_length(&mut tx, 99).await.unwrap();
l.finish(&mut tx).await.unwrap();
assert_eq!(dl, Some(299)); assert_eq!(dl, Some(299));
tx.cancel().await.unwrap();
}
{
// Remove doc lengths // Remove doc lengths
let mut l = let (mut tx, mut l) = doc_length(&ds, BTREE_ORDER, TransactionType::Write).await;
DocLengths::new(&mut tx, IndexKeyBase::default(), BTREE_ORDER, TreeStoreType::Write)
.await
.unwrap();
assert_eq!(l.remove_doc_length(&mut tx, 99).await.unwrap(), Some(299)); assert_eq!(l.remove_doc_length(&mut tx, 99).await.unwrap(), Some(299));
assert_eq!(l.remove_doc_length(&mut tx, 99).await.unwrap(), None); assert_eq!(l.remove_doc_length(&mut tx, 99).await.unwrap(), None);
tx.commit().await.unwrap() finish(l, tx).await;
}
{
let (mut tx, l) = doc_length(&ds, BTREE_ORDER, TransactionType::Read).await;
let dl = l.get_doc_length(&mut tx, 99).await.unwrap();
assert_eq!(dl, None);
tx.cancel().await.unwrap();
}
} }
} }

View file

@ -20,10 +20,10 @@ use crate::idx::ft::scorer::BM25Scorer;
use crate::idx::ft::termdocs::{TermDocs, TermsDocs}; use crate::idx::ft::termdocs::{TermDocs, TermsDocs};
use crate::idx::ft::terms::{TermId, Terms}; use crate::idx::ft::terms::{TermId, Terms};
use crate::idx::trees::btree::BStatistics; use crate::idx::trees::btree::BStatistics;
use crate::idx::trees::store::TreeStoreType; use crate::idx::trees::store::IndexStores;
use crate::idx::{IndexKeyBase, VersionedSerdeState}; use crate::idx::{IndexKeyBase, VersionedSerdeState};
use crate::kvs; use crate::kvs;
use crate::kvs::Key; use crate::kvs::{Key, TransactionType};
use crate::sql::index::SearchParams; use crate::sql::index::SearchParams;
use crate::sql::scoring::Scoring; use crate::sql::scoring::Scoring;
use crate::sql::statements::DefineAnalyzerStatement; use crate::sql::statements::DefineAnalyzerStatement;
@ -97,24 +97,25 @@ impl VersionedSerdeState for State {}
impl FtIndex { impl FtIndex {
pub(crate) async fn new( pub(crate) async fn new(
ixs: &IndexStores,
opt: &Options, opt: &Options,
txn: &Transaction, txn: &Transaction,
az: &str, az: &str,
index_key_base: IndexKeyBase, index_key_base: IndexKeyBase,
p: &SearchParams, p: &SearchParams,
store_type: TreeStoreType, tt: TransactionType,
) -> Result<Self, Error> { ) -> Result<Self, Error> {
let mut tx = txn.lock().await; let mut tx = txn.lock().await;
let az = tx.get_db_analyzer(opt.ns(), opt.db(), az).await?; let az = tx.get_db_analyzer(opt.ns(), opt.db(), az).await?;
Self::with_analyzer(&mut tx, az, index_key_base, p, store_type).await Self::with_analyzer(ixs, &mut tx, az, index_key_base, p, tt).await
} }
async fn with_analyzer( async fn with_analyzer(
ixs: &IndexStores,
run: &mut kvs::Transaction, run: &mut kvs::Transaction,
az: DefineAnalyzerStatement, az: DefineAnalyzerStatement,
index_key_base: IndexKeyBase, index_key_base: IndexKeyBase,
p: &SearchParams, p: &SearchParams,
store_type: TreeStoreType, tt: TransactionType,
) -> Result<Self, Error> { ) -> Result<Self, Error> {
let state_key: Key = index_key_base.new_bs_key(); let state_key: Key = index_key_base.new_bs_key();
let state: State = if let Some(val) = run.get(state_key.clone()).await? { let state: State = if let Some(val) = run.get(state_key.clone()).await? {
@ -123,16 +124,26 @@ impl FtIndex {
State::default() State::default()
}; };
let doc_ids = Arc::new(RwLock::new( let doc_ids = Arc::new(RwLock::new(
DocIds::new(run, index_key_base.clone(), p.doc_ids_order, store_type).await?, DocIds::new(ixs, run, tt, index_key_base.clone(), p.doc_ids_order, p.doc_ids_cache)
.await?,
)); ));
let doc_lengths = Arc::new(RwLock::new( let doc_lengths = Arc::new(RwLock::new(
DocLengths::new(run, index_key_base.clone(), p.doc_lengths_order, store_type).await?, DocLengths::new(
ixs,
run,
index_key_base.clone(),
p.doc_lengths_order,
tt,
p.doc_lengths_cache,
)
.await?,
)); ));
let postings = Arc::new(RwLock::new( let postings = Arc::new(RwLock::new(
Postings::new(run, index_key_base.clone(), p.postings_order, store_type).await?, Postings::new(ixs, run, index_key_base.clone(), p.postings_order, tt, p.postings_cache)
.await?,
)); ));
let terms = Arc::new(RwLock::new( let terms = Arc::new(RwLock::new(
Terms::new(run, index_key_base.clone(), p.terms_order, store_type).await?, Terms::new(ixs, run, index_key_base.clone(), p.terms_order, tt, p.terms_cache).await?,
)); ));
let termdocs = TermDocs::new(index_key_base.clone()); let termdocs = TermDocs::new(index_key_base.clone());
let offsets = Offsets::new(index_key_base.clone()); let offsets = Offsets::new(index_key_base.clone());
@ -244,7 +255,7 @@ impl FtIndex {
let mut tx = txn.lock().await; let mut tx = txn.lock().await;
let mut dl = self.doc_lengths.write().await; let mut dl = self.doc_lengths.write().await;
if resolved.was_existing() { if resolved.was_existing() {
if let Some(old_doc_length) = dl.get_doc_length(&mut tx, doc_id).await? { if let Some(old_doc_length) = dl.get_doc_length_mut(&mut tx, doc_id).await? {
self.state.total_docs_lengths -= old_doc_length as u128; self.state.total_docs_lengths -= old_doc_length as u128;
} }
} }
@ -442,7 +453,7 @@ impl FtIndex {
}) })
} }
pub(crate) async fn finish(self, tx: &Transaction) -> Result<(), Error> { pub(crate) async fn finish(&self, tx: &Transaction) -> Result<(), Error> {
let mut run = tx.lock().await; let mut run = tx.lock().await;
self.doc_ids.write().await.finish(&mut run).await?; self.doc_ids.write().await.finish(&mut run).await?;
self.doc_lengths.write().await.finish(&mut run).await?; self.doc_lengths.write().await.finish(&mut run).await?;
@ -484,13 +495,12 @@ mod tests {
use crate::dbs::{Options, Transaction}; use crate::dbs::{Options, Transaction};
use crate::idx::ft::scorer::{BM25Scorer, Score}; use crate::idx::ft::scorer::{BM25Scorer, Score};
use crate::idx::ft::{FtIndex, HitsIterator}; use crate::idx::ft::{FtIndex, HitsIterator};
use crate::idx::trees::store::TreeStoreType;
use crate::idx::IndexKeyBase; use crate::idx::IndexKeyBase;
use crate::kvs::{Datastore, LockType::*}; use crate::kvs::{Datastore, LockType::*, TransactionType};
use crate::sql::index::SearchParams; use crate::sql::index::SearchParams;
use crate::sql::scoring::Scoring; use crate::sql::scoring::Scoring;
use crate::sql::statements::{DefineAnalyzerStatement, DefineStatement}; use crate::sql::statements::{DefineAnalyzerStatement, DefineStatement};
use crate::sql::{Statement, Thing, Value}; use crate::sql::{Array, Statement, Thing, Value};
use crate::syn; use crate::syn;
use futures::lock::Mutex; use futures::lock::Mutex;
use std::collections::HashMap; use std::collections::HashMap;
@ -537,16 +547,17 @@ mod tests {
pub(super) async fn tx_fti<'a>( pub(super) async fn tx_fti<'a>(
ds: &Datastore, ds: &Datastore,
store_type: TreeStoreType, tt: TransactionType,
az: &DefineAnalyzerStatement, az: &DefineAnalyzerStatement,
order: u32, order: u32,
hl: bool, hl: bool,
) -> (Context<'a>, Options, Transaction, FtIndex) { ) -> (Context<'a>, Options, Transaction, FtIndex) {
let write = matches!(store_type, TreeStoreType::Write); let ctx = Context::default();
let tx = ds.transaction(write.into(), Optimistic).await.unwrap(); let tx = ds.transaction(tt, Optimistic).await.unwrap();
let txn = Arc::new(Mutex::new(tx)); let txn = Arc::new(Mutex::new(tx));
let mut tx = txn.lock().await; let mut tx = txn.lock().await;
let fti = FtIndex::with_analyzer( let fti = FtIndex::with_analyzer(
ctx.get_index_stores(),
&mut tx, &mut tx,
az.clone(), az.clone(),
IndexKeyBase::default(), IndexKeyBase::default(),
@ -558,13 +569,17 @@ mod tests {
terms_order: order, terms_order: order,
sc: Scoring::bm25(), sc: Scoring::bm25(),
hl, hl,
doc_ids_cache: 100,
doc_lengths_cache: 100,
postings_cache: 100,
terms_cache: 100,
}, },
TreeStoreType::Write, tt,
) )
.await .await
.unwrap(); .unwrap();
drop(tx); drop(tx);
(Context::default(), Options::default(), txn, fti) (ctx, Options::default(), txn, fti)
} }
pub(super) async fn finish(txn: &Transaction, fti: FtIndex) { pub(super) async fn finish(txn: &Transaction, fti: FtIndex) {
@ -589,7 +604,7 @@ mod tests {
{ {
// Add one document // Add one document
let (ctx, opt, txn, mut fti) = let (ctx, opt, txn, mut fti) =
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await; tx_fti(&ds, TransactionType::Write, &az, btree_order, false).await;
fti.index_document(&ctx, &opt, &txn, &doc1, vec![Value::from("hello the world")]) fti.index_document(&ctx, &opt, &txn, &doc1, vec![Value::from("hello the world")])
.await .await
.unwrap(); .unwrap();
@ -599,7 +614,7 @@ mod tests {
{ {
// Add two documents // Add two documents
let (ctx, opt, txn, mut fti) = let (ctx, opt, txn, mut fti) =
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await; tx_fti(&ds, TransactionType::Write, &az, btree_order, false).await;
fti.index_document(&ctx, &opt, &txn, &doc2, vec![Value::from("a yellow hello")]) fti.index_document(&ctx, &opt, &txn, &doc2, vec![Value::from("a yellow hello")])
.await .await
.unwrap(); .unwrap();
@ -611,7 +626,7 @@ mod tests {
{ {
let (ctx, opt, txn, fti) = let (ctx, opt, txn, fti) =
tx_fti(&ds, TreeStoreType::Read, &az, btree_order, false).await; tx_fti(&ds, TransactionType::Read, &az, btree_order, false).await;
// Check the statistics // Check the statistics
let statistics = fti.statistics(&txn).await.unwrap(); let statistics = fti.statistics(&txn).await.unwrap();
assert_eq!(statistics.terms.keys_count, 7); assert_eq!(statistics.terms.keys_count, 7);
@ -643,14 +658,14 @@ mod tests {
{ {
// Reindex one document // Reindex one document
let (ctx, opt, txn, mut fti) = let (ctx, opt, txn, mut fti) =
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await; tx_fti(&ds, TransactionType::Write, &az, btree_order, false).await;
fti.index_document(&ctx, &opt, &txn, &doc3, vec![Value::from("nobar foo")]) fti.index_document(&ctx, &opt, &txn, &doc3, vec![Value::from("nobar foo")])
.await .await
.unwrap(); .unwrap();
finish(&txn, fti).await; finish(&txn, fti).await;
let (ctx, opt, txn, fti) = let (ctx, opt, txn, fti) =
tx_fti(&ds, TreeStoreType::Read, &az, btree_order, false).await; tx_fti(&ds, TransactionType::Read, &az, btree_order, false).await;
// We can still find 'foo' // We can still find 'foo'
let (hits, scr) = search(&ctx, &opt, &txn, &fti, "foo").await; let (hits, scr) = search(&ctx, &opt, &txn, &fti, "foo").await;
@ -668,7 +683,7 @@ mod tests {
{ {
// Remove documents // Remove documents
let (_, _, txn, mut fti) = let (_, _, txn, mut fti) =
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await; tx_fti(&ds, TransactionType::Write, &az, btree_order, false).await;
fti.remove_document(&txn, &doc1).await.unwrap(); fti.remove_document(&txn, &doc1).await.unwrap();
fti.remove_document(&txn, &doc2).await.unwrap(); fti.remove_document(&txn, &doc2).await.unwrap();
fti.remove_document(&txn, &doc3).await.unwrap(); fti.remove_document(&txn, &doc3).await.unwrap();
@ -677,7 +692,7 @@ mod tests {
{ {
let (ctx, opt, txn, fti) = let (ctx, opt, txn, fti) =
tx_fti(&ds, TreeStoreType::Read, &az, btree_order, false).await; tx_fti(&ds, TransactionType::Read, &az, btree_order, false).await;
let (hits, _) = search(&ctx, &opt, &txn, &fti, "hello").await; let (hits, _) = search(&ctx, &opt, &txn, &fti, "hello").await;
assert!(hits.is_none()); assert!(hits.is_none());
let (hits, _) = search(&ctx, &opt, &txn, &fti, "foo").await; let (hits, _) = search(&ctx, &opt, &txn, &fti, "foo").await;
@ -705,7 +720,7 @@ mod tests {
let btree_order = 5; let btree_order = 5;
{ {
let (ctx, opt, txn, mut fti) = let (ctx, opt, txn, mut fti) =
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, hl).await; tx_fti(&ds, TransactionType::Write, &az, btree_order, hl).await;
fti.index_document( fti.index_document(
&ctx, &ctx,
&opt, &opt,
@ -747,7 +762,7 @@ mod tests {
{ {
let (ctx, opt, txn, fti) = let (ctx, opt, txn, fti) =
tx_fti(&ds, TreeStoreType::Read, &az, btree_order, hl).await; tx_fti(&ds, TransactionType::Read, &az, btree_order, hl).await;
let statistics = fti.statistics(&txn).await.unwrap(); let statistics = fti.statistics(&txn).await.unwrap();
assert_eq!(statistics.terms.keys_count, 17); assert_eq!(statistics.terms.keys_count, 17);
@ -815,4 +830,80 @@ mod tests {
async fn test_ft_index_bm_25_with_highlighting() { async fn test_ft_index_bm_25_with_highlighting() {
test_ft_index_bm_25(true).await; test_ft_index_bm_25(true).await;
} }
async fn concurrent_task(ds: Arc<Datastore>, az: DefineAnalyzerStatement) {
let btree_order = 5;
let doc1: Thing = ("t", "doc1").into();
let content1 = Value::from(Array::from(vec!["Enter a search term", "Welcome", "Docusaurus blogging features are powered by the blog plugin.", "Simply add Markdown files (or folders) to the blog directory.", "blog", "Regular blog authors can be added to authors.yml.", "authors.yml", "The blog post date can be extracted from filenames, such as:", "2019-05-30-welcome.md", "2019-05-30-welcome/index.md", "A blog post folder can be convenient to co-locate blog post images:", "The blog supports tags as well!", "And if you don't want a blog: just delete this directory, and use blog: false in your Docusaurus config.", "blog: false", "MDX Blog Post", "Blog posts support Docusaurus Markdown features, such as MDX.", "Use the power of React to create interactive blog posts.", "Long Blog Post", "This is the summary of a very long blog post,", "Use a <!-- truncate --> comment to limit blog post size in the list view.", "<!--", "truncate", "-->", "First Blog Post", "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet"]));
let start = std::time::Instant::now();
while start.elapsed().as_secs() < 3 {
remove_insert_task(ds.as_ref(), &az, btree_order, &doc1, &content1).await;
}
}
#[test(tokio::test)]
async fn concurrent_test() {
let ds = Arc::new(Datastore::new("memory").await.unwrap());
let mut q = syn::parse("DEFINE ANALYZER test TOKENIZERS blank;").unwrap();
let Statement::Define(DefineStatement::Analyzer(az)) = q.0 .0.pop().unwrap() else {
panic!()
};
concurrent_task(ds.clone(), az.clone()).await;
let task1 = tokio::spawn(concurrent_task(ds.clone(), az.clone()));
let task2 = tokio::spawn(concurrent_task(ds.clone(), az.clone()));
let _ = tokio::try_join!(task1, task2).expect("Tasks failed");
}
async fn remove_insert_task(
ds: &Datastore,
az: &DefineAnalyzerStatement,
btree_order: u32,
rid: &Thing,
content: &Value,
) {
let (ctx, opt, txn, mut fti) =
tx_fti(ds, TransactionType::Write, &az, btree_order, false).await;
fti.remove_document(&txn, &rid).await.unwrap();
fti.index_document(&ctx, &opt, &txn, &rid, vec![content.clone()]).await.unwrap();
finish(&txn, fti).await;
}
#[test(tokio::test)]
async fn remove_insert_sequence() {
let ds = Datastore::new("memory").await.unwrap();
let mut q = syn::parse("DEFINE ANALYZER test TOKENIZERS blank;").unwrap();
let Statement::Define(DefineStatement::Analyzer(az)) = q.0 .0.pop().unwrap() else {
panic!()
};
let doc: Thing = ("t", "doc1").into();
let content = Value::from(Array::from(vec!["Enter a search term","Welcome","Docusaurus blogging features are powered by the blog plugin.","Simply add Markdown files (or folders) to the blog directory.","blog","Regular blog authors can be added to authors.yml.","authors.yml","The blog post date can be extracted from filenames, such as:","2019-05-30-welcome.md","2019-05-30-welcome/index.md","A blog post folder can be convenient to co-locate blog post images:","The blog supports tags as well!","And if you don't want a blog: just delete this directory, and use blog: false in your Docusaurus config.","blog: false","MDX Blog Post","Blog posts support Docusaurus Markdown features, such as MDX.","Use the power of React to create interactive blog posts.","Long Blog Post","This is the summary of a very long blog post,","Use a <!-- truncate --> comment to limit blog post size in the list view.","<!--","truncate","-->","First Blog Post","Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet"]));
for i in 0..5 {
debug!("Attempt {i}");
{
let (ctx, opt, txn, mut fti) =
tx_fti(&ds, TransactionType::Write, &az, 5, false).await;
fti.index_document(&ctx, &opt, &txn, &doc, vec![content.clone()]).await.unwrap();
finish(&txn, fti).await;
}
{
let (_, _, txn, fti) = tx_fti(&ds, TransactionType::Read, &az, 5, false).await;
let s = fti.statistics(&txn).await.unwrap();
assert_eq!(s.terms.keys_count, 113);
}
{
let (_, _, txn, mut fti) = tx_fti(&ds, TransactionType::Write, &az, 5, false).await;
fti.remove_document(&txn, &doc).await.unwrap();
finish(&txn, fti).await;
}
{
let (_, _, txn, fti) = tx_fti(&ds, TransactionType::Read, &az, 5, false).await;
let s = fti.statistics(&txn).await.unwrap();
assert_eq!(s.terms.keys_count, 0);
}
}
}
} }

View file

@ -109,16 +109,16 @@ impl TryFrom<Val> for OffsetRecords {
} }
let decompressed: Vec<u32> = bincode::deserialize(&val)?; let decompressed: Vec<u32> = bincode::deserialize(&val)?;
let mut iter = decompressed.iter(); let mut iter = decompressed.iter();
let s = *iter.next().ok_or(Error::CorruptedIndex)?; let s = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(1)"))?;
let mut indexes = Vec::with_capacity(s as usize); let mut indexes = Vec::with_capacity(s as usize);
for _ in 0..s { for _ in 0..s {
let index = *iter.next().ok_or(Error::CorruptedIndex)?; let index = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(2)"))?;
indexes.push(index); indexes.push(index);
} }
let mut res = Vec::with_capacity(s as usize); let mut res = Vec::with_capacity(s as usize);
for index in indexes { for index in indexes {
let start = *iter.next().ok_or(Error::CorruptedIndex)?; let start = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(3)"))?;
let end = *iter.next().ok_or(Error::CorruptedIndex)?; let end = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(4)"))?;
res.push(Offset::new(index, start, end)); res.push(Offset::new(index, start, end));
} }
Ok(OffsetRecords(res)) Ok(OffsetRecords(res))

View file

@ -2,12 +2,10 @@ use crate::err::Error;
use crate::idx::docids::DocId; use crate::idx::docids::DocId;
use crate::idx::ft::terms::TermId; use crate::idx::ft::terms::TermId;
use crate::idx::trees::bkeys::TrieKeys; use crate::idx::trees::bkeys::TrieKeys;
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeNodeStore}; use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeStore};
use crate::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType}; use crate::idx::trees::store::{IndexStores, TreeNodeProvider};
use crate::idx::{IndexKeyBase, VersionedSerdeState}; use crate::idx::{IndexKeyBase, VersionedSerdeState};
use crate::kvs::{Key, Transaction}; use crate::kvs::{Key, Transaction, TransactionType};
use std::sync::Arc;
use tokio::sync::Mutex;
pub(super) type TermFrequency = u64; pub(super) type TermFrequency = u64;
@ -15,15 +13,17 @@ pub(super) struct Postings {
state_key: Key, state_key: Key,
index_key_base: IndexKeyBase, index_key_base: IndexKeyBase,
btree: BTree<TrieKeys>, btree: BTree<TrieKeys>,
store: Arc<Mutex<BTreeNodeStore<TrieKeys>>>, store: BTreeStore<TrieKeys>,
} }
impl Postings { impl Postings {
pub(super) async fn new( pub(super) async fn new(
ixs: &IndexStores,
tx: &mut Transaction, tx: &mut Transaction,
index_key_base: IndexKeyBase, index_key_base: IndexKeyBase,
order: u32, order: u32,
store_type: TreeStoreType, tt: TransactionType,
cache_size: u32,
) -> Result<Self, Error> { ) -> Result<Self, Error> {
let state_key: Key = index_key_base.new_bp_key(None); let state_key: Key = index_key_base.new_bp_key(None);
let state: BState = if let Some(val) = tx.get(state_key.clone()).await? { let state: BState = if let Some(val) = tx.get(state_key.clone()).await? {
@ -31,8 +31,14 @@ impl Postings {
} else { } else {
BState::new(order) BState::new(order)
}; };
let store = let store = ixs
TreeNodeStore::new(TreeNodeProvider::Postings(index_key_base.clone()), store_type, 20); .get_store_btree_trie(
TreeNodeProvider::Postings(index_key_base.clone()),
state.generation(),
tt,
cache_size as usize,
)
.await;
Ok(Self { Ok(Self {
state_key, state_key,
index_key_base, index_key_base,
@ -49,8 +55,7 @@ impl Postings {
term_freq: TermFrequency, term_freq: TermFrequency,
) -> Result<(), Error> { ) -> Result<(), Error> {
let key = self.index_key_base.new_bf_key(term_id, doc_id); let key = self.index_key_base.new_bf_key(term_id, doc_id);
let mut store = self.store.lock().await; self.btree.insert(tx, &mut self.store, key, term_freq).await
self.btree.insert(tx, &mut store, key, term_freq).await
} }
pub(super) async fn get_term_frequency( pub(super) async fn get_term_frequency(
@ -60,8 +65,7 @@ impl Postings {
doc_id: DocId, doc_id: DocId,
) -> Result<Option<TermFrequency>, Error> { ) -> Result<Option<TermFrequency>, Error> {
let key = self.index_key_base.new_bf_key(term_id, doc_id); let key = self.index_key_base.new_bf_key(term_id, doc_id);
let mut store = self.store.lock().await; self.btree.search(tx, &self.store, &key).await
self.btree.search(tx, &mut store, &key).await
} }
pub(super) async fn remove_posting( pub(super) async fn remove_posting(
@ -71,18 +75,18 @@ impl Postings {
doc_id: DocId, doc_id: DocId,
) -> Result<Option<TermFrequency>, Error> { ) -> Result<Option<TermFrequency>, Error> {
let key = self.index_key_base.new_bf_key(term_id, doc_id); let key = self.index_key_base.new_bf_key(term_id, doc_id);
let mut store = self.store.lock().await; self.btree.delete(tx, &mut self.store, key).await
self.btree.delete(tx, &mut store, key).await
} }
pub(super) async fn statistics(&self, tx: &mut Transaction) -> Result<BStatistics, Error> { pub(super) async fn statistics(&self, tx: &mut Transaction) -> Result<BStatistics, Error> {
let mut store = self.store.lock().await; self.btree.statistics(tx, &self.store).await
self.btree.statistics(tx, &mut store).await
} }
pub(super) async fn finish(&self, tx: &mut Transaction) -> Result<(), Error> { pub(super) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
self.store.lock().await.finish(tx).await?; if self.store.finish(tx).await? {
self.btree.get_state().finish(tx, &self.state_key).await?; let state = self.btree.inc_generation();
tx.set(self.state_key.clone(), state.try_to_val()?).await?;
}
Ok(()) Ok(())
} }
} }
@ -90,58 +94,65 @@ impl Postings {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::idx::ft::postings::Postings; use crate::idx::ft::postings::Postings;
use crate::idx::trees::store::TreeStoreType;
use crate::idx::IndexKeyBase; use crate::idx::IndexKeyBase;
use crate::kvs::{Datastore, LockType::*, TransactionType::*}; use crate::kvs::{Datastore, LockType::*, Transaction, TransactionType, TransactionType::*};
use test_log::test; use test_log::test;
async fn new_operation(
ds: &Datastore,
order: u32,
tt: TransactionType,
) -> (Transaction, Postings) {
let mut tx = ds.transaction(tt, Optimistic).await.unwrap();
let p = Postings::new(ds.index_store(), &mut tx, IndexKeyBase::default(), order, tt, 100)
.await
.unwrap();
(tx, p)
}
async fn finish(mut tx: Transaction, mut p: Postings) {
p.finish(&mut tx).await.unwrap();
tx.commit().await.unwrap();
}
#[test(tokio::test)] #[test(tokio::test)]
async fn test_postings() { async fn test_postings() {
const DEFAULT_BTREE_ORDER: u32 = 5; const DEFAULT_BTREE_ORDER: u32 = 5;
let ds = Datastore::new("memory").await.unwrap(); let ds = Datastore::new("memory").await.unwrap();
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
// Check empty state
let mut p = Postings::new(
&mut tx,
IndexKeyBase::default(),
DEFAULT_BTREE_ORDER,
TreeStoreType::Write,
)
.await
.unwrap();
{
// Check empty state
let (tx, p) = new_operation(&ds, DEFAULT_BTREE_ORDER, Write).await;
finish(tx, p).await;
let (mut tx, p) = new_operation(&ds, DEFAULT_BTREE_ORDER, Read).await;
assert_eq!(p.statistics(&mut tx).await.unwrap().keys_count, 0); assert_eq!(p.statistics(&mut tx).await.unwrap().keys_count, 0);
// Add postings
let (mut tx, mut p) = new_operation(&ds, DEFAULT_BTREE_ORDER, Write).await;
p.update_posting(&mut tx, 1, 2, 3).await.unwrap(); p.update_posting(&mut tx, 1, 2, 3).await.unwrap();
p.update_posting(&mut tx, 1, 4, 5).await.unwrap(); p.update_posting(&mut tx, 1, 4, 5).await.unwrap();
finish(tx, p).await;
p.finish(&mut tx).await.unwrap(); let (mut tx, p) = new_operation(&ds, DEFAULT_BTREE_ORDER, Read).await;
tx.commit().await.unwrap();
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
let mut p = Postings::new(
&mut tx,
IndexKeyBase::default(),
DEFAULT_BTREE_ORDER,
TreeStoreType::Write,
)
.await
.unwrap();
assert_eq!(p.statistics(&mut tx).await.unwrap().keys_count, 2); assert_eq!(p.statistics(&mut tx).await.unwrap().keys_count, 2);
assert_eq!(p.get_term_frequency(&mut tx, 1, 2).await.unwrap(), Some(3)); assert_eq!(p.get_term_frequency(&mut tx, 1, 2).await.unwrap(), Some(3));
assert_eq!(p.get_term_frequency(&mut tx, 1, 4).await.unwrap(), Some(5)); assert_eq!(p.get_term_frequency(&mut tx, 1, 4).await.unwrap(), Some(5));
let (mut tx, mut p) = new_operation(&ds, DEFAULT_BTREE_ORDER, Write).await;
// Check removal of doc 2 // Check removal of doc 2
assert_eq!(p.remove_posting(&mut tx, 1, 2).await.unwrap(), Some(3)); assert_eq!(p.remove_posting(&mut tx, 1, 2).await.unwrap(), Some(3));
// Again the same // Again the same
assert_eq!(p.remove_posting(&mut tx, 1, 2).await.unwrap(), None); assert_eq!(p.remove_posting(&mut tx, 1, 2).await.unwrap(), None);
// Remove doc 4 // Remove doc 4
assert_eq!(p.remove_posting(&mut tx, 1, 4).await.unwrap(), Some(5)); assert_eq!(p.remove_posting(&mut tx, 1, 4).await.unwrap(), Some(5));
finish(tx, p).await;
// The underlying b-tree should be empty now // The underlying b-tree should be empty now
let (mut tx, p) = new_operation(&ds, DEFAULT_BTREE_ORDER, Read).await;
assert_eq!(p.statistics(&mut tx).await.unwrap().keys_count, 0); assert_eq!(p.statistics(&mut tx).await.unwrap().keys_count, 0);
tx.commit().await.unwrap(); }
} }
} }

View file

@ -1,32 +1,32 @@
use crate::err::Error; use crate::err::Error;
use crate::idx::trees::bkeys::FstKeys; use crate::idx::trees::bkeys::FstKeys;
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeNodeStore}; use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeStore};
use crate::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType}; use crate::idx::trees::store::{IndexStores, TreeNodeProvider};
use crate::idx::{IndexKeyBase, VersionedSerdeState}; use crate::idx::{IndexKeyBase, VersionedSerdeState};
use crate::kvs::{Key, Transaction}; use crate::kvs::{Key, Transaction, TransactionType};
use revision::revisioned; use revision::revisioned;
use roaring::RoaringTreemap; use roaring::RoaringTreemap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tokio::sync::Mutex;
pub(crate) type TermId = u64; pub(crate) type TermId = u64;
pub(super) struct Terms { pub(super) struct Terms {
state_key: Key, state_key: Key,
index_key_base: IndexKeyBase, index_key_base: IndexKeyBase,
btree: BTree<FstKeys>, btree: BTree<FstKeys>,
store: Arc<Mutex<BTreeNodeStore<FstKeys>>>, store: BTreeStore<FstKeys>,
available_ids: Option<RoaringTreemap>, available_ids: Option<RoaringTreemap>,
next_term_id: TermId, next_term_id: TermId,
updated: bool,
} }
impl Terms { impl Terms {
pub(super) async fn new( pub(super) async fn new(
ixs: &IndexStores,
tx: &mut Transaction, tx: &mut Transaction,
index_key_base: IndexKeyBase, index_key_base: IndexKeyBase,
default_btree_order: u32, default_btree_order: u32,
store_type: TreeStoreType, tt: TransactionType,
cache_size: u32,
) -> Result<Self, Error> { ) -> Result<Self, Error> {
let state_key: Key = index_key_base.new_bt_key(None); let state_key: Key = index_key_base.new_bt_key(None);
let state: State = if let Some(val) = tx.get(state_key.clone()).await? { let state: State = if let Some(val) = tx.get(state_key.clone()).await? {
@ -34,8 +34,14 @@ impl Terms {
} else { } else {
State::new(default_btree_order) State::new(default_btree_order)
}; };
let store = let store = ixs
TreeNodeStore::new(TreeNodeProvider::Terms(index_key_base.clone()), store_type, 20); .get_store_btree_fst(
TreeNodeProvider::Terms(index_key_base.clone()),
state.btree.generation(),
tt,
cache_size as usize,
)
.await;
Ok(Self { Ok(Self {
state_key, state_key,
index_key_base, index_key_base,
@ -43,7 +49,6 @@ impl Terms {
store, store,
available_ids: state.available_ids, available_ids: state.available_ids,
next_term_id: state.next_term_id, next_term_id: state.next_term_id,
updated: false,
}) })
} }
@ -71,16 +76,13 @@ impl Terms {
) -> Result<TermId, Error> { ) -> Result<TermId, Error> {
let term_key = term.into(); let term_key = term.into();
{ {
let mut store = self.store.lock().await; if let Some(term_id) = self.btree.search_mut(tx, &mut self.store, &term_key).await? {
if let Some(term_id) = self.btree.search(tx, &mut store, &term_key).await? {
return Ok(term_id); return Ok(term_id);
} }
} }
let term_id = self.get_next_term_id(); let term_id = self.get_next_term_id();
tx.set(self.index_key_base.new_bu_key(term_id), term_key.clone()).await?; tx.set(self.index_key_base.new_bu_key(term_id), term_key.clone()).await?;
let mut store = self.store.lock().await; self.btree.insert(tx, &mut self.store, term_key, term_id).await?;
self.btree.insert(tx, &mut store, term_key, term_id).await?;
self.updated = true;
Ok(term_id) Ok(term_id)
} }
@ -89,8 +91,7 @@ impl Terms {
tx: &mut Transaction, tx: &mut Transaction,
term: &str, term: &str,
) -> Result<Option<TermId>, Error> { ) -> Result<Option<TermId>, Error> {
let mut store = self.store.lock().await; self.btree.search(tx, &self.store, &term.into()).await
self.btree.search(tx, &mut store, &term.into()).await
} }
pub(super) async fn remove_term_id( pub(super) async fn remove_term_id(
@ -100,8 +101,7 @@ impl Terms {
) -> Result<(), Error> { ) -> Result<(), Error> {
let term_id_key = self.index_key_base.new_bu_key(term_id); let term_id_key = self.index_key_base.new_bu_key(term_id);
if let Some(term_key) = tx.get(term_id_key.clone()).await? { if let Some(term_key) = tx.get(term_id_key.clone()).await? {
let mut store = self.store.lock().await; self.btree.delete(tx, &mut self.store, term_key.clone()).await?;
self.btree.delete(tx, &mut store, term_key.clone()).await?;
tx.del(term_id_key).await?; tx.del(term_id_key).await?;
if let Some(available_ids) = &mut self.available_ids { if let Some(available_ids) = &mut self.available_ids {
available_ids.insert(term_id); available_ids.insert(term_id);
@ -110,21 +110,19 @@ impl Terms {
available_ids.insert(term_id); available_ids.insert(term_id);
self.available_ids = Some(available_ids); self.available_ids = Some(available_ids);
} }
self.updated = true;
} }
Ok(()) Ok(())
} }
pub(super) async fn statistics(&self, tx: &mut Transaction) -> Result<BStatistics, Error> { pub(super) async fn statistics(&self, tx: &mut Transaction) -> Result<BStatistics, Error> {
let mut store = self.store.lock().await; self.btree.statistics(tx, &self.store).await
self.btree.statistics(tx, &mut store).await
} }
pub(super) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> { pub(super) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
let updated = self.store.lock().await.finish(tx).await?; if self.store.finish(tx).await? {
if self.updated || updated { let btree = self.btree.inc_generation().clone();
let state = State { let state = State {
btree: self.btree.get_state().clone(), btree,
available_ids: self.available_ids.take(), available_ids: self.available_ids.take(),
next_term_id: self.next_term_id, next_term_id: self.next_term_id,
}; };
@ -158,11 +156,12 @@ impl State {
mod tests { mod tests {
use crate::idx::ft::postings::TermFrequency; use crate::idx::ft::postings::TermFrequency;
use crate::idx::ft::terms::Terms; use crate::idx::ft::terms::Terms;
use crate::idx::trees::store::TreeStoreType;
use crate::idx::IndexKeyBase; use crate::idx::IndexKeyBase;
use crate::kvs::{Datastore, LockType::*, TransactionType::*}; use crate::kvs::TransactionType::{Read, Write};
use crate::kvs::{Datastore, LockType::*, Transaction, TransactionType};
use rand::{thread_rng, Rng}; use rand::{thread_rng, Rng};
use std::collections::HashSet; use std::collections::HashSet;
use test_log::test;
fn random_term(key_length: usize) -> String { fn random_term(key_length: usize) -> String {
thread_rng() thread_rng()
@ -180,84 +179,85 @@ mod tests {
set set
} }
#[tokio::test] async fn new_operation(
ds: &Datastore,
order: u32,
tt: TransactionType,
) -> (Transaction, Terms) {
let mut tx = ds.transaction(tt, Optimistic).await.unwrap();
let t = Terms::new(ds.index_store(), &mut tx, IndexKeyBase::default(), order, tt, 100)
.await
.unwrap();
(tx, t)
}
async fn finish(mut tx: Transaction, mut t: Terms) {
t.finish(&mut tx).await.unwrap();
tx.commit().await.unwrap();
}
#[test(tokio::test)]
async fn test_resolve_terms() { async fn test_resolve_terms() {
const BTREE_ORDER: u32 = 7; const BTREE_ORDER: u32 = 7;
let idx = IndexKeyBase::default();
let ds = Datastore::new("memory").await.unwrap(); let ds = Datastore::new("memory").await.unwrap();
{ {
let mut tx = ds.transaction(Write, Optimistic).await.unwrap(); // Empty operation
let mut t = let (tx, t) = new_operation(&ds, BTREE_ORDER, Write).await;
Terms::new(&mut tx, idx.clone(), BTREE_ORDER, TreeStoreType::Write).await.unwrap(); finish(tx, t).await;
t.finish(&mut tx).await.unwrap();
tx.commit().await.unwrap();
} }
// Resolve a first term // Resolve a first term
{ {
let mut tx = ds.transaction(Write, Optimistic).await.unwrap(); let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
let mut t =
Terms::new(&mut tx, idx.clone(), BTREE_ORDER, TreeStoreType::Write).await.unwrap();
assert_eq!(t.resolve_term_id(&mut tx, "C").await.unwrap(), 0); assert_eq!(t.resolve_term_id(&mut tx, "C").await.unwrap(), 0);
finish(tx, t).await;
let (mut tx, t) = new_operation(&ds, BTREE_ORDER, Read).await;
assert_eq!(t.statistics(&mut tx).await.unwrap().keys_count, 1); assert_eq!(t.statistics(&mut tx).await.unwrap().keys_count, 1);
t.finish(&mut tx).await.unwrap();
tx.commit().await.unwrap();
} }
// Resolve a second term // Resolve a second term
{ {
let mut tx = ds.transaction(Write, Optimistic).await.unwrap(); let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
let mut t =
Terms::new(&mut tx, idx.clone(), BTREE_ORDER, TreeStoreType::Write).await.unwrap();
assert_eq!(t.resolve_term_id(&mut tx, "D").await.unwrap(), 1); assert_eq!(t.resolve_term_id(&mut tx, "D").await.unwrap(), 1);
finish(tx, t).await;
let (mut tx, t) = new_operation(&ds, BTREE_ORDER, Read).await;
assert_eq!(t.statistics(&mut tx).await.unwrap().keys_count, 2); assert_eq!(t.statistics(&mut tx).await.unwrap().keys_count, 2);
t.finish(&mut tx).await.unwrap();
tx.commit().await.unwrap();
} }
// Resolve two existing terms with new frequencies // Resolve two existing terms with new frequencies
{ {
let mut tx = ds.transaction(Write, Optimistic).await.unwrap(); let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
let mut t =
Terms::new(&mut tx, idx.clone(), BTREE_ORDER, TreeStoreType::Write).await.unwrap();
assert_eq!(t.resolve_term_id(&mut tx, "C").await.unwrap(), 0); assert_eq!(t.resolve_term_id(&mut tx, "C").await.unwrap(), 0);
assert_eq!(t.resolve_term_id(&mut tx, "D").await.unwrap(), 1); assert_eq!(t.resolve_term_id(&mut tx, "D").await.unwrap(), 1);
finish(tx, t).await;
let (mut tx, t) = new_operation(&ds, BTREE_ORDER, Read).await;
assert_eq!(t.statistics(&mut tx).await.unwrap().keys_count, 2); assert_eq!(t.statistics(&mut tx).await.unwrap().keys_count, 2);
t.finish(&mut tx).await.unwrap();
tx.commit().await.unwrap();
} }
// Resolve one existing terms and two new terms // Resolve one existing terms and two new terms
{ {
let mut tx = ds.transaction(Write, Optimistic).await.unwrap(); let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
let mut t =
Terms::new(&mut tx, idx.clone(), BTREE_ORDER, TreeStoreType::Write).await.unwrap();
assert_eq!(t.resolve_term_id(&mut tx, "A").await.unwrap(), 2); assert_eq!(t.resolve_term_id(&mut tx, "A").await.unwrap(), 2);
assert_eq!(t.resolve_term_id(&mut tx, "C").await.unwrap(), 0); assert_eq!(t.resolve_term_id(&mut tx, "C").await.unwrap(), 0);
assert_eq!(t.resolve_term_id(&mut tx, "E").await.unwrap(), 3); assert_eq!(t.resolve_term_id(&mut tx, "E").await.unwrap(), 3);
finish(tx, t).await;
let (mut tx, t) = new_operation(&ds, BTREE_ORDER, Read).await;
assert_eq!(t.statistics(&mut tx).await.unwrap().keys_count, 4); assert_eq!(t.statistics(&mut tx).await.unwrap().keys_count, 4);
t.finish(&mut tx).await.unwrap();
tx.commit().await.unwrap();
} }
} }
#[tokio::test] #[test(tokio::test)]
async fn test_deletion() { async fn test_deletion() {
const BTREE_ORDER: u32 = 7; const BTREE_ORDER: u32 = 7;
let idx = IndexKeyBase::default();
let ds = Datastore::new("memory").await.unwrap(); let ds = Datastore::new("memory").await.unwrap();
let mut tx = ds.transaction(Write, Optimistic).await.unwrap(); {
let mut t = let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
Terms::new(&mut tx, idx.clone(), BTREE_ORDER, TreeStoreType::Write).await.unwrap();
// Check removing an non-existing term id returns None // Check removing an non-existing term id returns None
assert!(t.remove_term_id(&mut tx, 0).await.is_ok()); assert!(t.remove_term_id(&mut tx, 0).await.is_ok());
@ -266,11 +266,19 @@ mod tests {
t.resolve_term_id(&mut tx, "A").await.unwrap(); t.resolve_term_id(&mut tx, "A").await.unwrap();
t.resolve_term_id(&mut tx, "C").await.unwrap(); t.resolve_term_id(&mut tx, "C").await.unwrap();
t.resolve_term_id(&mut tx, "E").await.unwrap(); t.resolve_term_id(&mut tx, "E").await.unwrap();
finish(tx, t).await;
}
for term in ["A", "C", "E"] { for term in ["A", "C", "E"] {
let (mut tx, t) = new_operation(&ds, BTREE_ORDER, Read).await;
let term_id = t.get_term_id(&mut tx, term).await.unwrap(); let term_id = t.get_term_id(&mut tx, term).await.unwrap();
if let Some(term_id) = term_id { if let Some(term_id) = term_id {
let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
t.remove_term_id(&mut tx, term_id).await.unwrap(); t.remove_term_id(&mut tx, term_id).await.unwrap();
finish(tx, t).await;
let (mut tx, t) = new_operation(&ds, BTREE_ORDER, Read).await;
assert_eq!(t.get_term_id(&mut tx, term).await.unwrap(), None); assert_eq!(t.get_term_id(&mut tx, term).await.unwrap(), None);
} else { } else {
panic!("Term ID not found: {}", term); panic!("Term ID not found: {}", term);
@ -278,11 +286,10 @@ mod tests {
} }
// Check id recycling // Check id recycling
let (mut tx, mut t) = new_operation(&ds, BTREE_ORDER, Write).await;
assert_eq!(t.resolve_term_id(&mut tx, "B").await.unwrap(), 0); assert_eq!(t.resolve_term_id(&mut tx, "B").await.unwrap(), 0);
assert_eq!(t.resolve_term_id(&mut tx, "D").await.unwrap(), 1); assert_eq!(t.resolve_term_id(&mut tx, "D").await.unwrap(), 1);
finish(tx, t).await;
t.finish(&mut tx).await.unwrap();
tx.commit().await.unwrap();
} }
fn random_term_freq_vec(term_count: usize) -> Vec<(String, TermFrequency)> { fn random_term_freq_vec(term_count: usize) -> Vec<(String, TermFrequency)> {
@ -295,39 +302,31 @@ mod tests {
vec vec
} }
#[tokio::test] #[test(tokio::test)]
async fn test_resolve_100_docs_with_50_words_one_by_one() { async fn test_resolve_100_docs_with_50_words_one_by_one() {
let ds = Datastore::new("memory").await.unwrap(); let ds = Datastore::new("memory").await.unwrap();
for _ in 0..100 { for _ in 0..100 {
let mut tx = ds.transaction(Write, Optimistic).await.unwrap(); let (mut tx, mut t) = new_operation(&ds, 100, Write).await;
let mut t = Terms::new(&mut tx, IndexKeyBase::default(), 100, TreeStoreType::Write)
.await
.unwrap();
let terms_string = random_term_freq_vec(50); let terms_string = random_term_freq_vec(50);
for (term, _) in terms_string { for (term, _) in terms_string {
t.resolve_term_id(&mut tx, &term).await.unwrap(); t.resolve_term_id(&mut tx, &term).await.unwrap();
} }
t.finish(&mut tx).await.unwrap(); finish(tx, t).await;
tx.commit().await.unwrap();
} }
} }
#[tokio::test] #[test(tokio::test)]
async fn test_resolve_100_docs_with_50_words_batch_of_10() { async fn test_resolve_100_docs_with_50_words_batch_of_10() {
let ds = Datastore::new("memory").await.unwrap(); let ds = Datastore::new("memory").await.unwrap();
for _ in 0..10 { for _ in 0..10 {
let mut tx = ds.transaction(Write, Optimistic).await.unwrap(); let (mut tx, mut t) = new_operation(&ds, 100, Write).await;
let mut t = Terms::new(&mut tx, IndexKeyBase::default(), 100, TreeStoreType::Write)
.await
.unwrap();
for _ in 0..10 { for _ in 0..10 {
let terms_string = random_term_freq_vec(50); let terms_string = random_term_freq_vec(50);
for (term, _) in terms_string { for (term, _) in terms_string {
t.resolve_term_id(&mut tx, &term).await.unwrap(); t.resolve_term_id(&mut tx, &term).await.unwrap();
} }
} }
t.finish(&mut tx).await.unwrap(); finish(tx, t).await;
tx.commit().await.unwrap();
} }
} }
} }

View file

@ -14,10 +14,9 @@ use crate::idx::planner::plan::IndexOperator::Matches;
use crate::idx::planner::plan::{IndexOperator, IndexOption, RangeValue}; use crate::idx::planner::plan::{IndexOperator, IndexOption, RangeValue};
use crate::idx::planner::tree::{IndexRef, IndexesMap}; use crate::idx::planner::tree::{IndexRef, IndexesMap};
use crate::idx::trees::mtree::MTreeIndex; use crate::idx::trees::mtree::MTreeIndex;
use crate::idx::trees::store::TreeStoreType;
use crate::idx::IndexKeyBase; use crate::idx::IndexKeyBase;
use crate::kvs; use crate::kvs;
use crate::kvs::Key; use crate::kvs::{Key, TransactionType};
use crate::sql::index::Index; use crate::sql::index::Index;
use crate::sql::statements::DefineIndexStatement; use crate::sql::statements::DefineIndexStatement;
use crate::sql::{Array, Expression, Object, Table, Thing, Value}; use crate::sql::{Array, Expression, Object, Table, Thing, Value};
@ -85,8 +84,15 @@ impl QueryExecutor {
} }
} else { } else {
let ikb = IndexKeyBase::new(opt, idx_def); let ikb = IndexKeyBase::new(opt, idx_def);
let ft = let ft = FtIndex::new(
FtIndex::new(opt, txn, p.az.as_str(), ikb, p, TreeStoreType::Read) ctx.get_index_stores(),
opt,
txn,
p.az.as_str(),
ikb,
p,
TransactionType::Read,
)
.await?; .await?;
if ft_entry.is_none() { if ft_entry.is_none() {
ft_entry = FtEntry::new(ctx, opt, txn, &ft, io).await?; ft_entry = FtEntry::new(ctx, opt, txn, &ft, io).await?;
@ -111,8 +117,14 @@ impl QueryExecutor {
MtEntry::new(&mut tx, mt, a.clone(), *k).await? MtEntry::new(&mut tx, mt, a.clone(), *k).await?
} else { } else {
let ikb = IndexKeyBase::new(opt, idx_def); let ikb = IndexKeyBase::new(opt, idx_def);
let mt = let mt = MTreeIndex::new(
MTreeIndex::new(&mut tx, ikb, p, TreeStoreType::Read).await?; ctx.get_index_stores(),
&mut tx,
ikb,
p,
TransactionType::Read,
)
.await?;
let entry = MtEntry::new(&mut tx, &mt, a.clone(), *k).await?; let entry = MtEntry::new(&mut tx, &mt, a.clone(), *k).await?;
mt_map.insert(ix_ref, mt); mt_map.insert(ix_ref, mt);
entry entry

View file

@ -5,11 +5,11 @@ use fst::{IntoStreamer, Map, MapBuilder, Streamer};
use radix_trie::{SubTrie, Trie, TrieCommon}; use radix_trie::{SubTrie, Trie, TrieCommon};
use serde::ser; use serde::ser;
use std::collections::VecDeque; use std::collections::VecDeque;
use std::fmt::{Display, Formatter}; use std::fmt::{Debug, Display, Formatter};
use std::io; use std::io;
use std::io::Cursor; use std::io::Cursor;
pub trait BKeys: Default + Display + Sized { pub trait BKeys: Default + Debug + Display + Sized {
fn with_key_val(key: Key, payload: Payload) -> Result<Self, Error>; fn with_key_val(key: Key, payload: Payload) -> Result<Self, Error>;
fn len(&self) -> u32; fn len(&self) -> u32;
fn is_empty(&self) -> bool; fn is_empty(&self) -> bool;
@ -19,7 +19,7 @@ pub trait BKeys: Default + Display + Sized {
// The size of the Node should be small, therefore one instance of // The size of the Node should be small, therefore one instance of
// BKeys would never be store a large volume of keys. // BKeys would never be store a large volume of keys.
fn collect_with_prefix(&self, prefix_key: &Key) -> Result<VecDeque<(Key, Payload)>, Error>; fn collect_with_prefix(&self, prefix_key: &Key) -> Result<VecDeque<(Key, Payload)>, Error>;
fn insert(&mut self, key: Key, payload: Payload); fn insert(&mut self, key: Key, payload: Payload) -> Option<Payload>;
fn append(&mut self, keys: Self); fn append(&mut self, keys: Self);
fn remove(&mut self, key: &Key) -> Option<Payload>; fn remove(&mut self, key: &Key) -> Option<Payload>;
fn split_keys(self) -> Result<SplitKeys<Self>, Error>; fn split_keys(self) -> Result<SplitKeys<Self>, Error>;
@ -30,9 +30,6 @@ pub trait BKeys: Default + Display + Sized {
fn read_from(c: &mut Cursor<Vec<u8>>) -> Result<Self, Error>; fn read_from(c: &mut Cursor<Vec<u8>>) -> Result<Self, Error>;
fn write_to(&self, c: &mut Cursor<Vec<u8>>) -> Result<(), Error>; fn write_to(&self, c: &mut Cursor<Vec<u8>>) -> Result<(), Error>;
fn compile(&mut self) {} fn compile(&mut self) {}
fn debug<F>(&self, to_string: F) -> Result<(), Error>
where
F: Fn(Key) -> Result<String, Error>;
} }
pub struct SplitKeys<BK> pub struct SplitKeys<BK>
@ -46,12 +43,12 @@ where
pub(in crate::idx) median_payload: Payload, pub(in crate::idx) median_payload: Payload,
} }
#[derive(Debug)] #[derive(Debug, Clone)]
pub struct FstKeys { pub struct FstKeys {
i: Inner, i: Inner,
} }
#[derive(Debug)] #[derive(Debug, Clone)]
enum Inner { enum Inner {
Map(Map<Vec<u8>>), Map(Map<Vec<u8>>),
Trie(TrieKeys), Trie(TrieKeys),
@ -104,14 +101,15 @@ impl BKeys for FstKeys {
} }
fn collect_with_prefix(&self, _prefix_key: &Key) -> Result<VecDeque<(Key, Payload)>, Error> { fn collect_with_prefix(&self, _prefix_key: &Key) -> Result<VecDeque<(Key, Payload)>, Error> {
Err(Error::Unreachable) Err(Error::Unreachable("BKeys/FSTKeys::collect_with_prefix"))
} }
fn insert(&mut self, key: Key, payload: Payload) { fn insert(&mut self, key: Key, payload: Payload) -> Option<Payload> {
self.edit(); self.edit();
if let Inner::Trie(t) = &mut self.i { if let Inner::Trie(t) = &mut self.i {
t.insert(key, payload); return t.insert(key, payload);
} }
unreachable!()
} }
fn append(&mut self, keys: Self) { fn append(&mut self, keys: Self) {
@ -159,7 +157,7 @@ impl BKeys for FstKeys {
median_payload: s.median_payload, median_payload: s.median_payload,
}) })
} else { } else {
Err(Error::Unreachable) Err(Error::Unreachable("BKeys/FSTKeys::split_keys"))
} }
} }
@ -245,30 +243,6 @@ impl BKeys for FstKeys {
))) )))
} }
} }
fn debug<F>(&self, to_string: F) -> Result<(), Error>
where
F: Fn(Key) -> Result<String, Error>,
{
match &self.i {
Inner::Map(m) => {
let mut s = String::new();
let mut iter = m.stream();
let mut start = true;
while let Some((k, p)) = iter.next() {
if !start {
s.push(',');
} else {
start = false;
}
s.push_str(&format!("{}={}", to_string(k.to_vec())?.as_str(), p));
}
debug!("FSTKeys[{}]", s);
Ok(())
}
Inner::Trie(t) => t.debug(to_string),
}
}
} }
impl TryFrom<MapBuilder<Vec<u8>>> for FstKeys { impl TryFrom<MapBuilder<Vec<u8>>> for FstKeys {
@ -305,12 +279,12 @@ impl Display for FstKeys {
} }
Ok(()) Ok(())
} }
Inner::Trie(t) => t.fmt(f), Inner::Trie(t) => write!(f, "{}", t),
} }
} }
} }
#[derive(Default, Debug)] #[derive(Default, Debug, Clone)]
pub struct TrieKeys { pub struct TrieKeys {
keys: Trie<Key, Payload>, keys: Trie<Key, Payload>,
} }
@ -372,8 +346,8 @@ impl BKeys for TrieKeys {
Ok(r) Ok(r)
} }
fn insert(&mut self, key: Key, payload: Payload) { fn insert(&mut self, key: Key, payload: Payload) -> Option<Payload> {
self.keys.insert(key, payload); self.keys.insert(key, payload)
} }
fn append(&mut self, keys: Self) { fn append(&mut self, keys: Self) {
@ -400,7 +374,7 @@ impl BKeys for TrieKeys {
let (median_key, median_payload) = if let Some((k, v)) = s.next() { let (median_key, median_payload) = if let Some((k, v)) = s.next() {
(k.clone(), *v) (k.clone(), *v)
} else { } else {
return Err(Error::Unreachable); return Err(Error::Unreachable("BKeys/TrieKeys::split_keys"));
}; };
let mut right = Trie::default(); let mut right = Trie::default();
for (key, val) in s { for (key, val) in s {
@ -468,24 +442,6 @@ impl BKeys for TrieKeys {
bincode::serialize_into(c, &compressed)?; bincode::serialize_into(c, &compressed)?;
Ok(()) Ok(())
} }
fn debug<F>(&self, to_string: F) -> Result<(), Error>
where
F: Fn(Key) -> Result<String, Error>,
{
let mut s = String::new();
let mut start = true;
for (k, p) in self.keys.iter() {
if !start {
s.push(',');
} else {
start = false;
}
s.push_str(&format!("{}={}", to_string(k.to_vec())?.as_str(), p));
}
debug!("TrieKeys[{}]", s);
Ok(())
}
} }
impl From<Trie<Key, Payload>> for TrieKeys { impl From<Trie<Key, Payload>> for TrieKeys {

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,322 +0,0 @@
use crate::err::Error;
use crate::idx::IndexKeyBase;
use crate::kvs::{Key, Transaction, Val};
use lru::LruCache;
use std::collections::{HashMap, HashSet};
use std::fmt::Debug;
use std::num::NonZeroUsize;
use std::sync::Arc;
use tokio::sync::Mutex;
pub type NodeId = u64;
#[derive(Clone, Copy, PartialEq)]
pub enum TreeStoreType {
Write,
Read,
Traversal,
}
pub enum TreeNodeStore<N>
where
N: TreeNode + Debug,
{
/// caches every read nodes, and keeps track of updated and created nodes
Write(TreeWriteCache<N>),
/// Uses an LRU cache to keep in memory the last node read
Read(TreeReadCache<N>),
/// Read the nodes from the KV store without any cache
Traversal(TreeNodeProvider),
}
impl<N> TreeNodeStore<N>
where
N: TreeNode + Debug,
{
pub fn new(
keys: TreeNodeProvider,
store_type: TreeStoreType,
read_size: usize,
) -> Arc<Mutex<Self>> {
Arc::new(Mutex::new(match store_type {
TreeStoreType::Write => Self::Write(TreeWriteCache::new(keys)),
TreeStoreType::Read => Self::Read(TreeReadCache::new(keys, read_size)),
TreeStoreType::Traversal => Self::Traversal(keys),
}))
}
pub(super) async fn get_node(
&mut self,
tx: &mut Transaction,
node_id: NodeId,
) -> Result<StoredNode<N>, Error> {
match self {
TreeNodeStore::Write(w) => w.get_node(tx, node_id).await,
TreeNodeStore::Read(r) => r.get_node(tx, node_id).await,
TreeNodeStore::Traversal(keys) => keys.load::<N>(tx, node_id).await,
}
}
pub(super) fn set_node(&mut self, node: StoredNode<N>, updated: bool) -> Result<(), Error> {
match self {
TreeNodeStore::Write(w) => w.set_node(node, updated),
TreeNodeStore::Read(r) => {
if updated {
Err(Error::Unreachable)
} else {
r.set_node(node);
Ok(())
}
}
TreeNodeStore::Traversal(_) => Ok(()),
}
}
pub(super) fn new_node(&mut self, id: NodeId, node: N) -> Result<StoredNode<N>, Error> {
match self {
TreeNodeStore::Write(w) => Ok(w.new_node(id, node)),
_ => Err(Error::Unreachable),
}
}
pub(super) fn remove_node(&mut self, node_id: NodeId, node_key: Key) -> Result<(), Error> {
match self {
TreeNodeStore::Write(w) => w.remove_node(node_id, node_key),
_ => Err(Error::Unreachable),
}
}
pub(in crate::idx) async fn finish(&mut self, tx: &mut Transaction) -> Result<bool, Error> {
if let TreeNodeStore::Write(w) = self {
w.finish(tx).await
} else {
Err(Error::Unreachable)
}
}
}
pub struct TreeWriteCache<N>
where
N: TreeNode + Debug,
{
np: TreeNodeProvider,
nodes: HashMap<NodeId, StoredNode<N>>,
updated: HashSet<NodeId>,
removed: HashMap<NodeId, Key>,
#[cfg(debug_assertions)]
out: HashSet<NodeId>,
}
impl<N: Debug> TreeWriteCache<N>
where
N: TreeNode,
{
fn new(keys: TreeNodeProvider) -> Self {
Self {
np: keys,
nodes: HashMap::new(),
updated: HashSet::new(),
removed: HashMap::new(),
#[cfg(debug_assertions)]
out: HashSet::new(),
}
}
async fn get_node(
&mut self,
tx: &mut Transaction,
node_id: NodeId,
) -> Result<StoredNode<N>, Error> {
#[cfg(debug_assertions)]
{
debug!("GET: {}", node_id);
self.out.insert(node_id);
}
if let Some(n) = self.nodes.remove(&node_id) {
return Ok(n);
}
self.np.load::<N>(tx, node_id).await
}
fn set_node(&mut self, node: StoredNode<N>, updated: bool) -> Result<(), Error> {
#[cfg(debug_assertions)]
{
debug!("SET: {} {} {:?}", node.id, updated, node.n);
self.out.remove(&node.id);
}
if updated {
self.updated.insert(node.id);
}
if self.removed.contains_key(&node.id) {
return Err(Error::Unreachable);
}
self.nodes.insert(node.id, node);
Ok(())
}
fn new_node(&mut self, id: NodeId, node: N) -> StoredNode<N> {
#[cfg(debug_assertions)]
{
debug!("NEW: {}", id);
self.out.insert(id);
}
StoredNode {
n: node,
id,
key: self.np.get_key(id),
size: 0,
}
}
fn remove_node(&mut self, node_id: NodeId, node_key: Key) -> Result<(), Error> {
#[cfg(debug_assertions)]
{
debug!("REMOVE: {}", node_id);
if self.nodes.contains_key(&node_id) {
return Err(Error::Unreachable);
}
self.out.remove(&node_id);
}
self.updated.remove(&node_id);
self.removed.insert(node_id, node_key);
Ok(())
}
async fn finish(&mut self, tx: &mut Transaction) -> Result<bool, Error> {
let update = !self.updated.is_empty() || !self.removed.is_empty();
#[cfg(debug_assertions)]
{
if !self.out.is_empty() {
debug!("OUT: {:?}", self.out);
return Err(Error::Unreachable);
}
}
for node_id in &self.updated {
if let Some(node) = self.nodes.remove(node_id) {
self.np.save(tx, node).await?;
} else {
return Err(Error::Unreachable);
}
}
self.updated.clear();
let node_ids: Vec<NodeId> = self.removed.keys().copied().collect();
for node_id in node_ids {
if let Some(node_key) = self.removed.remove(&node_id) {
tx.del(node_key).await?;
}
}
Ok(update)
}
}
pub struct TreeReadCache<N>
where
N: TreeNode,
{
keys: TreeNodeProvider,
nodes: LruCache<NodeId, StoredNode<N>>,
}
impl<N> TreeReadCache<N>
where
N: TreeNode,
{
fn new(keys: TreeNodeProvider, size: usize) -> Self {
Self {
keys,
nodes: LruCache::new(NonZeroUsize::new(size).unwrap()),
}
}
async fn get_node(
&mut self,
tx: &mut Transaction,
node_id: NodeId,
) -> Result<StoredNode<N>, Error> {
if let Some(n) = self.nodes.pop(&node_id) {
return Ok(n);
}
self.keys.load::<N>(tx, node_id).await
}
fn set_node(&mut self, node: StoredNode<N>) {
self.nodes.put(node.id, node);
}
}
#[derive(Clone)]
pub enum TreeNodeProvider {
DocIds(IndexKeyBase),
DocLengths(IndexKeyBase),
Postings(IndexKeyBase),
Terms(IndexKeyBase),
Vector(IndexKeyBase),
Debug,
}
impl TreeNodeProvider {
pub(in crate::idx) fn get_key(&self, node_id: NodeId) -> Key {
match self {
TreeNodeProvider::DocIds(ikb) => ikb.new_bd_key(Some(node_id)),
TreeNodeProvider::DocLengths(ikb) => ikb.new_bl_key(Some(node_id)),
TreeNodeProvider::Postings(ikb) => ikb.new_bp_key(Some(node_id)),
TreeNodeProvider::Terms(ikb) => ikb.new_bt_key(Some(node_id)),
TreeNodeProvider::Vector(ikb) => ikb.new_vm_key(Some(node_id)),
TreeNodeProvider::Debug => node_id.to_be_bytes().to_vec(),
}
}
async fn load<N>(&self, tx: &mut Transaction, id: NodeId) -> Result<StoredNode<N>, Error>
where
N: TreeNode,
{
let key = self.get_key(id);
if let Some(val) = tx.get(key.clone()).await? {
let size = val.len() as u32;
let node = N::try_from_val(val)?;
Ok(StoredNode {
n: node,
id,
key,
size,
})
} else {
Err(Error::CorruptedIndex)
}
}
async fn save<N>(&self, tx: &mut Transaction, mut node: StoredNode<N>) -> Result<(), Error>
where
N: TreeNode,
{
let val = node.n.try_into_val()?;
tx.set(node.key, val).await?;
Ok(())
}
}
pub(super) struct StoredNode<N> {
pub(super) n: N,
pub(super) id: NodeId,
pub(super) key: Key,
pub(super) size: u32,
}
impl<N> StoredNode<N> {
pub(super) fn new(n: N, id: NodeId, key: Key, size: u32) -> Self {
Self {
n,
id,
key,
size,
}
}
}
pub trait TreeNode
where
Self: Sized,
{
fn try_from_val(val: Val) -> Result<Self, Error>;
fn try_into_val(&mut self) -> Result<Val, Error>;
}

View file

@ -0,0 +1,217 @@
use crate::err::Error;
use crate::idx::trees::store::{NodeId, StoredNode, TreeNode, TreeNodeProvider};
use crate::kvs::{Key, Transaction};
use quick_cache::sync::Cache;
use quick_cache::GuardResult;
use std::cmp::Ordering;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::fmt::{Debug, Display};
use std::sync::Arc;
use tokio::sync::RwLock;
pub type CacheGen = u64;
pub(super) struct TreeCaches<N>(Arc<RwLock<HashMap<Key, TreeCache<N>>>>)
where
N: TreeNode + Debug + Clone + Display;
impl<N> TreeCaches<N>
where
N: TreeNode + Debug + Clone + Display,
{
pub(super) async fn get_cache(
&self,
generation: CacheGen,
keys: &TreeNodeProvider,
cache_size: usize,
) -> TreeCache<N> {
#[cfg(debug_assertions)]
debug!("get_cache {generation}");
// We take the key from the node 0 as the key identifier for the cache
let key = keys.get_key(0);
match self.0.write().await.entry(key) {
Entry::Occupied(mut e) => {
let c = e.get_mut();
// The cache and the store are matching, we can send a clone of the cache.
match generation.cmp(&c.generation()) {
Ordering::Less => {
// The store generation is older than the current cache,
// we return an empty cache, but we don't hold it
TreeCache::new(generation, keys.clone(), cache_size)
}
Ordering::Equal => c.clone(),
Ordering::Greater => {
// The store generation is more recent than the cache,
// we create a new one and hold it
let c = TreeCache::new(generation, keys.clone(), cache_size);
e.insert(c.clone());
c
}
}
}
Entry::Vacant(e) => {
// There is no cache for index, we create one and hold it
let c = TreeCache::new(generation, keys.clone(), cache_size);
e.insert(c.clone());
c
}
}
}
pub(super) async fn remove_cache(&self, keys: &TreeNodeProvider) {
let key = keys.get_key(0);
self.0.write().await.remove(&key);
}
pub(crate) async fn is_empty(&self) -> bool {
self.0.read().await.is_empty()
}
}
impl<N> Default for TreeCaches<N>
where
N: TreeNode + Debug + Clone + Display,
{
fn default() -> Self {
Self(Arc::new(RwLock::new(HashMap::new())))
}
}
#[derive(Clone)]
pub enum TreeCache<N>
where
N: TreeNode + Debug + Clone + Display,
{
Lru(CacheGen, TreeLruCache<N>),
Full(CacheGen, TreeFullCache<N>),
}
impl<N> TreeCache<N>
where
N: TreeNode + Debug + Clone + Display,
{
pub fn new(generation: CacheGen, keys: TreeNodeProvider, cache_size: usize) -> Self {
if cache_size == 0 {
TreeCache::Full(generation, TreeFullCache::new(keys))
} else {
TreeCache::Lru(generation, TreeLruCache::new(keys, cache_size))
}
}
pub(super) async fn get_node(
&self,
tx: &mut Transaction,
node_id: NodeId,
) -> Result<Arc<StoredNode<N>>, Error> {
match self {
TreeCache::Lru(_, c) => c.get_node(tx, node_id).await,
TreeCache::Full(_, c) => c.get_node(tx, node_id).await,
}
}
fn generation(&self) -> CacheGen {
match self {
TreeCache::Lru(gen, _) | TreeCache::Full(gen, _) => *gen,
}
}
}
pub struct TreeLruCache<N>
where
N: TreeNode + Debug + Clone + Display,
{
keys: TreeNodeProvider,
cache: Arc<Cache<NodeId, Arc<StoredNode<N>>>>,
}
impl<N> TreeLruCache<N>
where
N: TreeNode + Debug + Clone,
{
fn new(keys: TreeNodeProvider, cache_size: usize) -> Self {
Self {
keys,
cache: Arc::new(Cache::new(cache_size)),
}
}
async fn get_node(
&self,
tx: &mut Transaction,
node_id: NodeId,
) -> Result<Arc<StoredNode<N>>, Error> {
match self.cache.get_value_or_guard(&node_id, None) {
GuardResult::Value(v) => Ok(v),
GuardResult::Guard(g) => {
let n = Arc::new(self.keys.load::<N>(tx, node_id).await?);
g.insert(n.clone()).ok();
Ok(n)
}
GuardResult::Timeout => Err(Error::Unreachable("TreeCache::get_node")),
}
}
}
impl<N> Clone for TreeLruCache<N>
where
N: TreeNode + Debug + Clone,
{
fn clone(&self) -> Self {
Self {
keys: self.keys.clone(),
cache: self.cache.clone(),
}
}
}
pub struct TreeFullCache<N>
where
N: TreeNode + Debug + Clone,
{
keys: TreeNodeProvider,
cache: Arc<RwLock<HashMap<NodeId, Arc<StoredNode<N>>>>>,
}
impl<N> TreeFullCache<N>
where
N: TreeNode + Debug + Clone,
{
pub fn new(keys: TreeNodeProvider) -> Self {
Self {
keys,
cache: Arc::new(RwLock::new(HashMap::new())),
}
}
pub(super) async fn get_node(
&self,
tx: &mut Transaction,
node_id: NodeId,
) -> Result<Arc<StoredNode<N>>, Error> {
// Let's first try with the read lock
if let Some(n) = self.cache.read().await.get(&node_id).cloned() {
return Ok(n);
}
match self.cache.write().await.entry(node_id) {
Entry::Occupied(e) => Ok(e.get().clone()),
Entry::Vacant(e) => {
let n = Arc::new(self.keys.load::<N>(tx, node_id).await?);
e.insert(n.clone());
Ok(n)
}
}
}
}
impl<N> Clone for TreeFullCache<N>
where
N: TreeNode + Debug + Clone,
{
fn clone(&self) -> Self {
Self {
keys: self.keys.clone(),
cache: self.cache.clone(),
}
}
}

View file

@ -0,0 +1,306 @@
pub mod cache;
pub(crate) mod tree;
use crate::dbs::Options;
use crate::err::Error;
use crate::idx::trees::bkeys::{FstKeys, TrieKeys};
use crate::idx::trees::btree::{BTreeNode, BTreeStore};
use crate::idx::trees::mtree::{MTreeNode, MTreeStore};
use crate::idx::trees::store::cache::{TreeCache, TreeCaches};
use crate::idx::trees::store::tree::{TreeRead, TreeWrite};
use crate::idx::IndexKeyBase;
use crate::kvs::{Key, Transaction, TransactionType, Val};
use crate::sql::statements::DefineIndexStatement;
use crate::sql::Index;
use std::fmt::{Debug, Display, Formatter};
use std::sync::Arc;
pub type NodeId = u64;
pub enum TreeStore<N>
where
N: TreeNode + Debug + Clone,
{
/// caches every read nodes, and keeps track of updated and created nodes
Write(TreeWrite<N>),
/// caches read nodes in an LRU cache
Read(TreeRead<N>),
}
impl<N> TreeStore<N>
where
N: TreeNode + Debug + Display + Clone,
{
pub async fn new(keys: TreeNodeProvider, cache: TreeCache<N>, tt: TransactionType) -> Self {
match tt {
TransactionType::Read => Self::Read(TreeRead::new(cache)),
TransactionType::Write => Self::Write(TreeWrite::new(keys, cache)),
}
}
pub(in crate::idx) async fn get_node_mut(
&mut self,
tx: &mut Transaction,
node_id: NodeId,
) -> Result<StoredNode<N>, Error> {
match self {
TreeStore::Write(w) => w.get_node_mut(tx, node_id).await,
_ => Err(Error::Unreachable("TreeStore::get_node_mut")),
}
}
pub(in crate::idx) async fn get_node(
&self,
tx: &mut Transaction,
node_id: NodeId,
) -> Result<Arc<StoredNode<N>>, Error> {
match self {
TreeStore::Read(r) => r.get_node(tx, node_id).await,
_ => Err(Error::Unreachable("TreeStore::get_node")),
}
}
pub(in crate::idx) async fn set_node(
&mut self,
node: StoredNode<N>,
updated: bool,
) -> Result<(), Error> {
match self {
TreeStore::Write(w) => w.set_node(node, updated),
_ => Err(Error::Unreachable("TreeStore::set_node")),
}
}
pub(in crate::idx) fn new_node(&mut self, id: NodeId, node: N) -> Result<StoredNode<N>, Error> {
match self {
TreeStore::Write(w) => Ok(w.new_node(id, node)),
_ => Err(Error::Unreachable("TreeStore::new_node")),
}
}
pub(in crate::idx) async fn remove_node(
&mut self,
node_id: NodeId,
node_key: Key,
) -> Result<(), Error> {
match self {
TreeStore::Write(w) => w.remove_node(node_id, node_key),
_ => Err(Error::Unreachable("TreeStore::remove_node")),
}
}
pub async fn finish(&mut self, tx: &mut Transaction) -> Result<bool, Error> {
match self {
TreeStore::Write(w) => w.finish(tx).await,
_ => Ok(false),
}
}
}
#[derive(Clone)]
pub enum TreeNodeProvider {
DocIds(IndexKeyBase),
DocLengths(IndexKeyBase),
Postings(IndexKeyBase),
Terms(IndexKeyBase),
Vector(IndexKeyBase),
Debug,
}
impl TreeNodeProvider {
pub(in crate::idx) fn get_key(&self, node_id: NodeId) -> Key {
match self {
TreeNodeProvider::DocIds(ikb) => ikb.new_bd_key(Some(node_id)),
TreeNodeProvider::DocLengths(ikb) => ikb.new_bl_key(Some(node_id)),
TreeNodeProvider::Postings(ikb) => ikb.new_bp_key(Some(node_id)),
TreeNodeProvider::Terms(ikb) => ikb.new_bt_key(Some(node_id)),
TreeNodeProvider::Vector(ikb) => ikb.new_vm_key(Some(node_id)),
TreeNodeProvider::Debug => node_id.to_be_bytes().to_vec(),
}
}
async fn load<N>(&self, tx: &mut Transaction, id: NodeId) -> Result<StoredNode<N>, Error>
where
N: TreeNode + Clone,
{
let key = self.get_key(id);
if let Some(val) = tx.get(key.clone()).await? {
let size = val.len() as u32;
let node = N::try_from_val(val)?;
Ok(StoredNode::new(node, id, key, size))
} else {
Err(Error::CorruptedIndex("TreeStore::load"))
}
}
async fn save<N>(&self, tx: &mut Transaction, mut node: StoredNode<N>) -> Result<(), Error>
where
N: TreeNode + Clone + Display,
{
let val = node.n.try_into_val()?;
tx.set(node.key, val).await?;
Ok(())
}
}
pub struct StoredNode<N>
where
N: Clone + Display,
{
pub(super) n: N,
pub(super) id: NodeId,
pub(super) key: Key,
pub(super) size: u32,
}
impl<N> StoredNode<N>
where
N: Clone + Display,
{
pub(super) fn new(n: N, id: NodeId, key: Key, size: u32) -> Self {
Self {
n,
id,
key,
size,
}
}
}
impl<N> Display for StoredNode<N>
where
N: Clone + Display,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "node_id: {} - {}", self.id, self.n)
}
}
pub trait TreeNode: Debug + Clone + Display {
fn try_from_val(val: Val) -> Result<Self, Error>
where
Self: Sized;
fn try_into_val(&mut self) -> Result<Val, Error>;
}
#[derive(Clone)]
pub struct IndexStores(Arc<Inner>);
struct Inner {
btree_fst_caches: TreeCaches<BTreeNode<FstKeys>>,
btree_trie_caches: TreeCaches<BTreeNode<TrieKeys>>,
mtree_caches: TreeCaches<MTreeNode>,
}
impl Default for IndexStores {
fn default() -> Self {
Self(Arc::new(Inner {
btree_fst_caches: TreeCaches::default(),
btree_trie_caches: TreeCaches::default(),
mtree_caches: TreeCaches::default(),
}))
}
}
impl IndexStores {
pub(in crate::idx) async fn get_store_btree_fst(
&self,
keys: TreeNodeProvider,
generation: u64,
tt: TransactionType,
cache_size: usize,
) -> BTreeStore<FstKeys> {
let cache = self.0.btree_fst_caches.get_cache(generation, &keys, cache_size).await;
TreeStore::new(keys, cache, tt).await
}
pub(in crate::idx) async fn get_store_btree_trie(
&self,
keys: TreeNodeProvider,
generation: u64,
tt: TransactionType,
cache_size: usize,
) -> BTreeStore<TrieKeys> {
let cache = self.0.btree_trie_caches.get_cache(generation, &keys, cache_size).await;
TreeStore::new(keys, cache, tt).await
}
pub(in crate::idx) async fn get_store_mtree(
&self,
keys: TreeNodeProvider,
generation: u64,
tt: TransactionType,
cache_size: usize,
) -> MTreeStore {
let cache = self.0.mtree_caches.get_cache(generation, &keys, cache_size).await;
TreeStore::new(keys, cache, tt).await
}
pub(crate) async fn index_removed(
&self,
opt: &Options,
tx: &mut Transaction,
tb: &str,
ix: &str,
) -> Result<(), Error> {
self.remove_index(
opt,
tx.get_and_cache_tb_index(opt.ns(), opt.db(), tb, ix).await?.as_ref(),
)
.await
}
pub(crate) async fn namespace_removed(
&self,
opt: &Options,
tx: &mut Transaction,
) -> Result<(), Error> {
for tb in tx.all_tb(opt.ns(), opt.db()).await?.iter() {
self.table_removed(opt, tx, &tb.name).await?;
}
Ok(())
}
pub(crate) async fn table_removed(
&self,
opt: &Options,
tx: &mut Transaction,
tb: &str,
) -> Result<(), Error> {
for ix in tx.all_tb_indexes(opt.ns(), opt.db(), tb).await?.iter() {
self.remove_index(opt, ix).await?;
}
Ok(())
}
async fn remove_index(&self, opt: &Options, ix: &DefineIndexStatement) -> Result<(), Error> {
let ikb = IndexKeyBase::new(opt, ix);
match ix.index {
Index::Search(_) => {
self.remove_search_cache(ikb).await;
}
Index::MTree(_) => {
self.remove_mtree_cache(ikb).await;
}
_ => {}
}
Ok(())
}
async fn remove_search_cache(&self, ikb: IndexKeyBase) {
self.0.btree_trie_caches.remove_cache(&TreeNodeProvider::DocIds(ikb.clone())).await;
self.0.btree_trie_caches.remove_cache(&TreeNodeProvider::DocLengths(ikb.clone())).await;
self.0.btree_trie_caches.remove_cache(&TreeNodeProvider::Postings(ikb.clone())).await;
self.0.btree_fst_caches.remove_cache(&TreeNodeProvider::Terms(ikb)).await;
}
async fn remove_mtree_cache(&self, ikb: IndexKeyBase) {
self.0.btree_trie_caches.remove_cache(&TreeNodeProvider::DocIds(ikb.clone())).await;
self.0.mtree_caches.remove_cache(&TreeNodeProvider::Vector(ikb.clone())).await;
}
pub async fn is_empty(&self) -> bool {
self.0.mtree_caches.is_empty().await
&& self.0.btree_fst_caches.is_empty().await
&& self.0.btree_trie_caches.is_empty().await
}
}

View file

@ -0,0 +1,177 @@
use crate::err::Error;
use crate::idx::trees::store::cache::TreeCache;
use crate::idx::trees::store::{NodeId, StoredNode, TreeNode, TreeNodeProvider};
use crate::kvs::{Key, Transaction};
use std::collections::{HashMap, HashSet};
use std::fmt::{Debug, Display};
use std::sync::Arc;
pub struct TreeWrite<N>
where
N: TreeNode + Debug + Clone,
{
np: TreeNodeProvider,
cache: TreeCache<N>,
nodes: HashMap<NodeId, StoredNode<N>>,
updated: HashSet<NodeId>,
removed: HashMap<NodeId, Key>,
#[cfg(debug_assertions)]
out: HashSet<NodeId>,
}
impl<N> TreeWrite<N>
where
N: TreeNode + Clone + Debug + Display,
{
pub(super) fn new(keys: TreeNodeProvider, cache: TreeCache<N>) -> Self {
Self {
np: keys,
cache,
nodes: HashMap::new(),
updated: HashSet::new(),
removed: HashMap::new(),
#[cfg(debug_assertions)]
out: HashSet::new(),
}
}
pub(super) async fn get_node_mut(
&mut self,
tx: &mut Transaction,
node_id: NodeId,
) -> Result<StoredNode<N>, Error> {
#[cfg(debug_assertions)]
{
debug!("GET: {}", node_id);
self.out.insert(node_id);
if self.removed.contains_key(&node_id) {
return Err(Error::Unreachable("TreeTransactionWrite::get_node_mut"));
}
}
if let Some(n) = self.nodes.remove(&node_id) {
#[cfg(debug_assertions)]
debug!("GET (NODES): {}", n.n);
return Ok(n);
}
let r = self.cache.get_node(tx, node_id).await?;
#[cfg(debug_assertions)]
debug!("GET (CACHE): {}", r.n);
Ok(StoredNode::new(r.n.clone(), r.id, r.key.clone(), r.size))
}
pub(super) fn set_node(&mut self, node: StoredNode<N>, updated: bool) -> Result<(), Error> {
#[cfg(debug_assertions)]
{
if updated {
debug!("SET {updated}: {node}");
}
self.out.remove(&node.id);
}
if updated {
self.updated.insert(node.id);
}
if self.removed.contains_key(&node.id) {
return Err(Error::Unreachable("TreeTransactionWrite::set_node(2)"));
}
self.nodes.insert(node.id, node);
Ok(())
}
pub(super) fn new_node(&mut self, id: NodeId, node: N) -> StoredNode<N> {
#[cfg(debug_assertions)]
{
debug!("NEW: {}", id);
self.out.insert(id);
}
StoredNode::new(node, id, self.np.get_key(id), 0)
}
pub(super) fn remove_node(&mut self, node_id: NodeId, node_key: Key) -> Result<(), Error> {
#[cfg(debug_assertions)]
{
debug!("REMOVE: {}", node_id);
if self.nodes.contains_key(&node_id) {
return Err(Error::Unreachable("TreeTransactionWrite::remove_node"));
}
self.out.remove(&node_id);
}
self.updated.remove(&node_id);
self.removed.insert(node_id, node_key);
Ok(())
}
pub(super) async fn finish(&mut self, tx: &mut Transaction) -> Result<bool, Error> {
let update = !self.updated.is_empty() || !self.removed.is_empty();
#[cfg(debug_assertions)]
{
debug!("finish");
if !self.out.is_empty() {
debug!("OUT: {:?}", self.out);
return Err(Error::Unreachable("TreeTransactionWrite::finish(1)"));
}
}
for node_id in &self.updated {
if let Some(node) = self.nodes.remove(node_id) {
#[cfg(debug_assertions)]
debug!("finish: tx.save {node_id}");
self.np.save(tx, node).await?;
} else {
return Err(Error::Unreachable("TreeTransactionWrite::finish(2)"));
}
}
self.updated.clear();
let node_ids: Vec<NodeId> = self.removed.keys().copied().collect();
for node_id in node_ids {
if let Some(node_key) = self.removed.remove(&node_id) {
#[cfg(debug_assertions)]
debug!("finish: tx.del {node_id}");
tx.del(node_key).await?;
}
}
Ok(update)
}
}
#[cfg(debug_assertions)]
impl<N> Drop for TreeWrite<N>
where
N: TreeNode + Debug + Clone,
{
fn drop(&mut self) {
if !self.updated.is_empty() {
warn!("TreeWrite::finish not called?: updated not empty: {:?}", self.updated);
}
if !self.removed.is_empty() {
warn!("TreeWrite::finish not called?: removed not empty: {:?}", self.removed);
}
}
}
pub struct TreeRead<N>
where
N: TreeNode + Debug + Clone,
{
cache: TreeCache<N>,
}
impl<N> TreeRead<N>
where
N: TreeNode + Debug + Clone,
{
pub(super) fn new(cache: TreeCache<N>) -> Self {
Self {
cache,
}
}
pub(super) async fn get_node(
&self,
tx: &mut Transaction,
node_id: NodeId,
) -> Result<Arc<StoredNode<N>>, Error> {
let r = self.cache.get_node(tx, node_id).await?;
#[cfg(debug_assertions)]
debug!("GET: {}", node_id);
Ok(r)
}
}

View file

@ -167,7 +167,7 @@ impl Vector {
(Vector::I16(a), Vector::I16(b)) => { (Vector::I16(a), Vector::I16(b)) => {
Ok((a.iter().zip(b.iter()).map(|(a, b)| (a - b).pow(2)).sum::<i16>() as f64).sqrt()) Ok((a.iter().zip(b.iter()).map(|(a, b)| (a - b).pow(2)).sum::<i16>() as f64).sqrt())
} }
_ => Err(Error::Unreachable), _ => Err(Error::Unreachable("Vector::euclidean_distance")),
} }
} }
@ -189,7 +189,7 @@ impl Vector {
(Vector::I16(a), Vector::I16(b)) => { (Vector::I16(a), Vector::I16(b)) => {
Ok(a.iter().zip(b.iter()).map(|(a, b)| (a - b).abs()).sum::<i16>() as f64) Ok(a.iter().zip(b.iter()).map(|(a, b)| (a - b).abs()).sum::<i16>() as f64)
} }
_ => Err(Error::Unreachable), _ => Err(Error::Unreachable("Vector::manhattan_distance")),
} }
} }
pub(super) fn minkowski_distance(&self, other: &Self, order: &Number) -> Result<f64, Error> { pub(super) fn minkowski_distance(&self, other: &Self, order: &Number) -> Result<f64, Error> {
@ -220,7 +220,7 @@ impl Vector {
.zip(b.iter()) .zip(b.iter())
.map(|(a, b)| (a - b).abs().pow(order.to_int() as u32)) .map(|(a, b)| (a - b).abs().pow(order.to_int() as u32))
.sum::<i16>() as f64, .sum::<i16>() as f64,
_ => return Err(Error::Unreachable), _ => return Err(Error::Unreachable("Vector::minkowski_distance")),
}; };
Ok(dist.powf(1.0 / order.to_float())) Ok(dist.powf(1.0 / order.to_float()))
} }

View file

@ -7,6 +7,7 @@ use crate::dbs::{
}; };
use crate::err::Error; use crate::err::Error;
use crate::iam::{Action, Auth, Error as IamError, Resource, Role}; use crate::iam::{Action, Auth, Error as IamError, Resource, Role};
use crate::idx::trees::store::IndexStores;
use crate::key::root::hb::Hb; use crate::key::root::hb::Hb;
use crate::kvs::clock::SizedClock; use crate::kvs::clock::SizedClock;
#[allow(unused_imports)] #[allow(unused_imports)]
@ -108,6 +109,8 @@ pub struct Datastore {
notification_channel: Option<(Sender<Notification>, Receiver<Notification>)>, notification_channel: Option<(Sender<Notification>, Receiver<Notification>)>,
// Clock for tracking time. It is read only and accessible to all transactions. It is behind a mutex as tests may write to it. // Clock for tracking time. It is read only and accessible to all transactions. It is behind a mutex as tests may write to it.
clock: Arc<RwLock<SizedClock>>, clock: Arc<RwLock<SizedClock>>,
// The index store cache
index_stores: IndexStores,
} }
/// We always want to be circulating the live query information /// We always want to be circulating the live query information
@ -351,6 +354,7 @@ impl Datastore {
capabilities: Capabilities::default(), capabilities: Capabilities::default(),
versionstamp_oracle: Arc::new(Mutex::new(Oracle::systime_counter())), versionstamp_oracle: Arc::new(Mutex::new(Oracle::systime_counter())),
clock, clock,
index_stores: IndexStores::default(),
}) })
} }
@ -403,6 +407,10 @@ impl Datastore {
self self
} }
pub fn index_store(&self) -> &IndexStores {
&self.index_stores
}
/// Is authentication enabled for this Datastore? /// Is authentication enabled for this Datastore?
pub fn is_auth_enabled(&self) -> bool { pub fn is_auth_enabled(&self) -> bool {
self.auth_enabled self.auth_enabled
@ -1046,12 +1054,11 @@ impl Datastore {
// Create a new query executor // Create a new query executor
let mut exe = Executor::new(self); let mut exe = Executor::new(self);
// Create a default context // Create a default context
let mut ctx = Context::default(); let mut ctx = Context::from_ds(
ctx.add_capabilities(self.capabilities.clone()); self.query_timeout,
// Set the global query timeout self.capabilities.clone(),
if let Some(timeout) = self.query_timeout { self.index_stores.clone(),
ctx.add_timeout(timeout); );
}
// Setup the notification channel // Setup the notification channel
if let Some(channel) = &self.notification_channel { if let Some(channel) = &self.notification_channel {
ctx.add_notifications(Some(&channel.0)); ctx.add_notifications(Some(&channel.0));

View file

@ -7,7 +7,6 @@ use crate::dbs::node::ClusterMembership;
use crate::dbs::node::Timestamp; use crate::dbs::node::Timestamp;
use crate::err::Error; use crate::err::Error;
use crate::idg::u32::U32; use crate::idg::u32::U32;
use crate::idx::trees::store::TreeStoreType;
use crate::key::error::KeyCategory; use crate::key::error::KeyCategory;
use crate::key::key_req::KeyRequirements; use crate::key::key_req::KeyRequirements;
use crate::kvs::cache::Cache; use crate::kvs::cache::Cache;
@ -76,7 +75,7 @@ pub(super) enum Inner {
#[cfg(feature = "kv-fdb")] #[cfg(feature = "kv-fdb")]
FoundationDB(super::fdb::Transaction), FoundationDB(super::fdb::Transaction),
} }
#[derive(Copy, Clone)]
pub enum TransactionType { pub enum TransactionType {
Read, Read,
Write, Write,
@ -91,16 +90,6 @@ impl From<bool> for TransactionType {
} }
} }
impl From<TreeStoreType> for TransactionType {
fn from(value: TreeStoreType) -> Self {
match value {
TreeStoreType::Write => TransactionType::Write,
TreeStoreType::Read => TransactionType::Read,
TreeStoreType::Traversal => TransactionType::Read,
}
}
}
pub enum LockType { pub enum LockType {
Pessimistic, Pessimistic,
Optimistic, Optimistic,

View file

@ -21,7 +21,7 @@ pub enum Index {
} }
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)] #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
#[revisioned(revision = 1)] #[revisioned(revision = 2)]
pub struct SearchParams { pub struct SearchParams {
pub az: Ident, pub az: Ident,
pub hl: bool, pub hl: bool,
@ -30,16 +30,28 @@ pub struct SearchParams {
pub doc_lengths_order: u32, pub doc_lengths_order: u32,
pub postings_order: u32, pub postings_order: u32,
pub terms_order: u32, pub terms_order: u32,
#[revision(start = 2)]
pub doc_ids_cache: u32,
#[revision(start = 2)]
pub doc_lengths_cache: u32,
#[revision(start = 2)]
pub postings_cache: u32,
#[revision(start = 2)]
pub terms_cache: u32,
} }
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)] #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
#[revisioned(revision = 1)] #[revisioned(revision = 2)]
pub struct MTreeParams { pub struct MTreeParams {
pub dimension: u16, pub dimension: u16,
pub distance: Distance, pub distance: Distance,
pub vector_type: VectorType, pub vector_type: VectorType,
pub capacity: u16, pub capacity: u16,
pub doc_ids_order: u32, pub doc_ids_order: u32,
#[revision(start = 2)]
pub doc_ids_cache: u32,
#[revision(start = 2)]
pub mtree_cache: u32,
} }
#[derive(Clone, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)] #[derive(Clone, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
@ -94,13 +106,17 @@ impl Display for Index {
Self::Search(p) => { Self::Search(p) => {
write!( write!(
f, f,
"SEARCH ANALYZER {} {} DOC_IDS_ORDER {} DOC_LENGTHS_ORDER {} POSTINGS_ORDER {} TERMS_ORDER {}", "SEARCH ANALYZER {} {} DOC_IDS_ORDER {} DOC_LENGTHS_ORDER {} POSTINGS_ORDER {} TERMS_ORDER {} DOC_IDS_CACHE {} DOC_LENGTHS_CACHE {} POSTINGS_CACHE {} TERMS_CACHE {}",
p.az, p.az,
p.sc, p.sc,
p.doc_ids_order, p.doc_ids_order,
p.doc_lengths_order, p.doc_lengths_order,
p.postings_order, p.postings_order,
p.terms_order p.terms_order,
p.doc_ids_cache,
p.doc_lengths_cache,
p.postings_cache,
p.terms_cache
)?; )?;
if p.hl { if p.hl {
f.write_str(" HIGHLIGHTS")? f.write_str(" HIGHLIGHTS")?
@ -110,8 +126,8 @@ impl Display for Index {
Self::MTree(p) => { Self::MTree(p) => {
write!( write!(
f, f,
"MTREE DIMENSION {} DIST {} TYPE {} CAPACITY {} DOC_IDS_ORDER {}", "MTREE DIMENSION {} DIST {} TYPE {} CAPACITY {} DOC_IDS_ORDER {} DOC_IDS_CACHE {} MTREE_CACHE {}",
p.dimension, p.distance, p.vector_type, p.capacity, p.doc_ids_order p.dimension, p.distance, p.vector_type, p.capacity, p.doc_ids_order, p.doc_ids_cache, p.mtree_cache
) )
} }
} }

View file

@ -1,5 +1,6 @@
use lru::LruCache;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use quick_cache::sync::Cache;
use quick_cache::GuardResult;
use revision::revisioned; use revision::revisioned;
use serde::{ use serde::{
de::{self, Visitor}, de::{self, Visitor},
@ -9,9 +10,7 @@ use std::cmp::Ordering;
use std::fmt::Debug; use std::fmt::Debug;
use std::fmt::{self, Display, Formatter}; use std::fmt::{self, Display, Formatter};
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
use std::num::NonZeroUsize;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Mutex;
use std::{env, str}; use std::{env, str};
pub(crate) const TOKEN: &str = "$surrealdb::private::sql::Regex"; pub(crate) const TOKEN: &str = "$surrealdb::private::sql::Regex";
@ -28,23 +27,25 @@ impl Regex {
} }
fn regex_new(str: &str) -> Result<regex::Regex, regex::Error> { fn regex_new(str: &str) -> Result<regex::Regex, regex::Error> {
static REGEX_CACHE: Lazy<Mutex<LruCache<String, regex::Regex>>> = Lazy::new(|| { static REGEX_CACHE: Lazy<Cache<String, regex::Regex>> = Lazy::new(|| {
let cache_size: usize = env::var("SURREAL_REGEX_CACHE_SIZE") let cache_size: usize = env::var("SURREAL_REGEX_CACHE_SIZE")
.map_or(1000, |v| v.parse().unwrap_or(1000)) .map_or(1000, |v| v.parse().unwrap_or(1000))
.max(10); // The minimum cache size is 10 .max(10); // The minimum cache size is 10
Mutex::new(LruCache::new(NonZeroUsize::new(cache_size).unwrap())) Cache::new(cache_size)
}); });
let mut cache = match REGEX_CACHE.lock() { match REGEX_CACHE.get_value_or_guard(str, None) {
Ok(guard) => guard, GuardResult::Value(v) => Ok(v),
Err(poisoned) => poisoned.into_inner(), GuardResult::Guard(g) => {
};
if let Some(re) = cache.get(str) {
return Ok(re.clone());
}
let re = regex::Regex::new(str)?; let re = regex::Regex::new(str)?;
cache.put(str.to_owned(), re.clone()); g.insert(re.clone()).ok();
Ok(re) Ok(re)
} }
GuardResult::Timeout => {
warn!("Regex cache timeout");
regex::Regex::new(str)
}
}
}
impl FromStr for Regex { impl FromStr for Regex {
type Err = <regex::Regex as FromStr>::Err; type Err = <regex::Regex as FromStr>::Err;

View file

@ -6,8 +6,8 @@ use crate::err::Error;
use crate::iam::{Action, ResourceKind}; use crate::iam::{Action, ResourceKind};
use crate::idx::ft::FtIndex; use crate::idx::ft::FtIndex;
use crate::idx::trees::mtree::MTreeIndex; use crate::idx::trees::mtree::MTreeIndex;
use crate::idx::trees::store::TreeStoreType;
use crate::idx::IndexKeyBase; use crate::idx::IndexKeyBase;
use crate::kvs::TransactionType;
use crate::sql::ident::Ident; use crate::sql::ident::Ident;
use crate::sql::index::Index; use crate::sql::index::Index;
use crate::sql::value::Value; use crate::sql::value::Value;
@ -28,7 +28,7 @@ impl AnalyzeStatement {
/// Process this type returning a computed simple Value /// Process this type returning a computed simple Value
pub(crate) async fn compute( pub(crate) async fn compute(
&self, &self,
_ctx: &Context<'_>, ctx: &Context<'_>,
opt: &Options, opt: &Options,
txn: &Transaction, txn: &Transaction,
_doc: Option<&CursorDoc<'_>>, _doc: Option<&CursorDoc<'_>>,
@ -48,14 +48,28 @@ impl AnalyzeStatement {
// Index operation dispatching // Index operation dispatching
let value: Value = match &ix.index { let value: Value = match &ix.index {
Index::Search(p) => { Index::Search(p) => {
let ft = let ft = FtIndex::new(
FtIndex::new(opt, txn, p.az.as_str(), ikb, p, TreeStoreType::Traversal) ctx.get_index_stores(),
opt,
txn,
p.az.as_str(),
ikb,
p,
TransactionType::Read,
)
.await?; .await?;
ft.statistics(txn).await?.into() ft.statistics(txn).await?.into()
} }
Index::MTree(p) => { Index::MTree(p) => {
let mut tx = txn.lock().await; let mut tx = txn.lock().await;
let mt = MTreeIndex::new(&mut tx, ikb, p, TreeStoreType::Traversal).await?; let mt = MTreeIndex::new(
ctx.get_index_stores(),
&mut tx,
ikb,
p,
TransactionType::Read,
)
.await?;
mt.statistics(&mut tx).await?.into() mt.statistics(&mut tx).await?.into()
} }
_ => { _ => {

View file

@ -19,7 +19,7 @@ impl RemoveIndexStatement {
/// Process this type returning a computed simple Value /// Process this type returning a computed simple Value
pub(crate) async fn compute( pub(crate) async fn compute(
&self, &self,
_ctx: &Context<'_>, ctx: &Context<'_>,
opt: &Options, opt: &Options,
txn: &Transaction, txn: &Transaction,
) -> Result<Value, Error> { ) -> Result<Value, Error> {
@ -27,6 +27,8 @@ impl RemoveIndexStatement {
opt.is_allowed(Action::Edit, ResourceKind::Index, &Base::Db)?; opt.is_allowed(Action::Edit, ResourceKind::Index, &Base::Db)?;
// Claim transaction // Claim transaction
let mut run = txn.lock().await; let mut run = txn.lock().await;
// Clear the index store cache
ctx.get_index_stores().index_removed(opt, &mut run, &self.what, &self.name).await?;
// Clear the cache // Clear the cache
run.clear_cache(); run.clear_cache();
// Delete the definition // Delete the definition

View file

@ -18,7 +18,7 @@ impl RemoveNamespaceStatement {
/// Process this type returning a computed simple Value /// Process this type returning a computed simple Value
pub(crate) async fn compute( pub(crate) async fn compute(
&self, &self,
_ctx: &Context<'_>, ctx: &Context<'_>,
opt: &Options, opt: &Options,
txn: &Transaction, txn: &Transaction,
) -> Result<Value, Error> { ) -> Result<Value, Error> {
@ -26,6 +26,7 @@ impl RemoveNamespaceStatement {
opt.is_allowed(Action::Edit, ResourceKind::Namespace, &Base::Root)?; opt.is_allowed(Action::Edit, ResourceKind::Namespace, &Base::Root)?;
// Claim transaction // Claim transaction
let mut run = txn.lock().await; let mut run = txn.lock().await;
ctx.get_index_stores().namespace_removed(opt, &mut run).await?;
// Clear the cache // Clear the cache
run.clear_cache(); run.clear_cache();
// Delete the definition // Delete the definition

View file

@ -19,7 +19,7 @@ impl RemoveTableStatement {
/// Process this type returning a computed simple Value /// Process this type returning a computed simple Value
pub(crate) async fn compute( pub(crate) async fn compute(
&self, &self,
_ctx: &Context<'_>, ctx: &Context<'_>,
opt: &Options, opt: &Options,
txn: &Transaction, txn: &Transaction,
) -> Result<Value, Error> { ) -> Result<Value, Error> {
@ -27,6 +27,8 @@ impl RemoveTableStatement {
opt.is_allowed(Action::Edit, ResourceKind::Table, &Base::Db)?; opt.is_allowed(Action::Edit, ResourceKind::Table, &Base::Db)?;
// Claim transaction // Claim transaction
let mut run = txn.lock().await; let mut run = txn.lock().await;
// Remove the index stores
ctx.get_index_stores().table_removed(opt, &mut run, &self.name).await?;
// Clear the cache // Clear the cache
run.clear_cache(); run.clear_cache();
// Get the defined table // Get the defined table

View file

@ -89,10 +89,14 @@ mod tests {
k1: Default::default(), k1: Default::default(),
b: Default::default(), b: Default::default(),
}, },
doc_ids_order: Default::default(), doc_ids_order: 1,
doc_lengths_order: Default::default(), doc_lengths_order: 2,
postings_order: Default::default(), postings_order: 3,
terms_order: Default::default(), terms_order: 4,
doc_ids_cache: 5,
doc_lengths_cache: 6,
postings_cache: 7,
terms_cache: 8,
}); });
let serialized = idx.serialize(Serializer.wrap()).unwrap(); let serialized = idx.serialize(Serializer.wrap()).unwrap();
assert_eq!(idx, serialized); assert_eq!(idx, serialized);

View file

@ -51,6 +51,8 @@ pub(super) struct SerializeMTree {
vector_type: VectorType, vector_type: VectorType,
capacity: u16, capacity: u16,
doc_ids_order: u32, doc_ids_order: u32,
doc_ids_cache: u32,
mtree_cache: u32,
} }
impl serde::ser::SerializeStruct for SerializeMTree { impl serde::ser::SerializeStruct for SerializeMTree {
type Ok = MTreeParams; type Ok = MTreeParams;
@ -76,6 +78,12 @@ impl serde::ser::SerializeStruct for SerializeMTree {
"doc_ids_order" => { "doc_ids_order" => {
self.doc_ids_order = value.serialize(ser::primitive::u32::Serializer.wrap())?; self.doc_ids_order = value.serialize(ser::primitive::u32::Serializer.wrap())?;
} }
"doc_ids_cache" => {
self.doc_ids_cache = value.serialize(ser::primitive::u32::Serializer.wrap())?;
}
"mtree_cache" => {
self.mtree_cache = value.serialize(ser::primitive::u32::Serializer.wrap())?;
}
key => { key => {
return Err(Error::custom(format!("unexpected field `MTreeParams {{ {key} }}`"))); return Err(Error::custom(format!("unexpected field `MTreeParams {{ {key} }}`")));
} }
@ -90,6 +98,8 @@ impl serde::ser::SerializeStruct for SerializeMTree {
vector_type: self.vector_type, vector_type: self.vector_type,
capacity: self.capacity, capacity: self.capacity,
doc_ids_order: self.doc_ids_order, doc_ids_order: self.doc_ids_order,
doc_ids_cache: self.doc_ids_cache,
mtree_cache: self.mtree_cache,
}) })
} }
} }
@ -102,6 +112,8 @@ fn mtree_params() {
vector_type: Default::default(), vector_type: Default::default(),
capacity: 2, capacity: 2,
doc_ids_order: 3, doc_ids_order: 3,
doc_ids_cache: 4,
mtree_cache: 5,
}; };
let serialized = params.serialize(Serializer.wrap()).unwrap(); let serialized = params.serialize(Serializer.wrap()).unwrap();
assert_eq!(params, serialized); assert_eq!(params, serialized);

View file

@ -54,6 +54,10 @@ pub(super) struct SerializeSearch {
doc_lengths_order: u32, doc_lengths_order: u32,
postings_order: u32, postings_order: u32,
terms_order: u32, terms_order: u32,
doc_ids_cache: u32,
doc_lengths_cache: u32,
postings_cache: u32,
terms_cache: u32,
} }
impl serde::ser::SerializeStruct for SerializeSearch { impl serde::ser::SerializeStruct for SerializeSearch {
@ -86,6 +90,18 @@ impl serde::ser::SerializeStruct for SerializeSearch {
"terms_order" => { "terms_order" => {
self.terms_order = value.serialize(ser::primitive::u32::Serializer.wrap())?; self.terms_order = value.serialize(ser::primitive::u32::Serializer.wrap())?;
} }
"doc_ids_cache" => {
self.doc_ids_cache = value.serialize(ser::primitive::u32::Serializer.wrap())?;
}
"doc_lengths_cache" => {
self.doc_lengths_cache = value.serialize(ser::primitive::u32::Serializer.wrap())?;
}
"postings_cache" => {
self.postings_cache = value.serialize(ser::primitive::u32::Serializer.wrap())?;
}
"terms_cache" => {
self.terms_cache = value.serialize(ser::primitive::u32::Serializer.wrap())?;
}
key => { key => {
return Err(Error::custom(format!("unexpected field `SearchParams {{ {key} }}`"))); return Err(Error::custom(format!("unexpected field `SearchParams {{ {key} }}`")));
} }
@ -103,6 +119,10 @@ impl serde::ser::SerializeStruct for SerializeSearch {
doc_lengths_order: self.doc_lengths_order, doc_lengths_order: self.doc_lengths_order,
postings_order: self.postings_order, postings_order: self.postings_order,
terms_order: self.terms_order, terms_order: self.terms_order,
doc_ids_cache: self.doc_ids_cache,
doc_lengths_cache: self.doc_lengths_cache,
postings_cache: self.postings_cache,
terms_cache: self.terms_cache,
}), }),
_ => Err(Error::custom("`SearchParams` missing required field(s)")), _ => Err(Error::custom("`SearchParams` missing required field(s)")),
} }
@ -115,10 +135,14 @@ fn search_params() {
az: Default::default(), az: Default::default(),
hl: false, hl: false,
sc: Scoring::Vs, sc: Scoring::Vs,
doc_ids_order: 0, doc_ids_order: 1,
doc_lengths_order: 0, doc_lengths_order: 2,
postings_order: 0, postings_order: 3,
terms_order: 0, terms_order: 4,
doc_ids_cache: 5,
doc_lengths_cache: 6,
postings_cache: 7,
terms_cache: 8,
}; };
let serialized = params.serialize(Serializer.wrap()).unwrap(); let serialized = params.serialize(Serializer.wrap()).unwrap();
assert_eq!(params, serialized); assert_eq!(params, serialized);

View file

@ -7,6 +7,7 @@ use crate::sql::{
index::{Distance, MTreeParams, SearchParams, VectorType}, index::{Distance, MTreeParams, SearchParams, VectorType},
Ident, Index, Ident, Index,
}; };
use nom::{ use nom::{
branch::alt, branch::alt,
bytes::complete::{tag, tag_no_case}, bytes::complete::{tag, tag_no_case},
@ -43,18 +44,34 @@ pub fn doc_ids_order(i: &str) -> IResult<&str, u32> {
order("DOC_IDS_ORDER", i) order("DOC_IDS_ORDER", i)
} }
pub fn doc_ids_cache(i: &str) -> IResult<&str, u32> {
order("DOC_IDS_CACHE", i)
}
pub fn doc_lengths_order(i: &str) -> IResult<&str, u32> { pub fn doc_lengths_order(i: &str) -> IResult<&str, u32> {
order("DOC_LENGTHS_ORDER", i) order("DOC_LENGTHS_ORDER", i)
} }
pub fn doc_lengths_cache(i: &str) -> IResult<&str, u32> {
order("DOC_LENGTHS_CACHE", i)
}
pub fn postings_order(i: &str) -> IResult<&str, u32> { pub fn postings_order(i: &str) -> IResult<&str, u32> {
order("POSTINGS_ORDER", i) order("POSTINGS_ORDER", i)
} }
pub fn postings_cache(i: &str) -> IResult<&str, u32> {
order("POSTINGS_CACHE", i)
}
pub fn terms_order(i: &str) -> IResult<&str, u32> { pub fn terms_order(i: &str) -> IResult<&str, u32> {
order("TERMS_ORDER", i) order("TERMS_ORDER", i)
} }
pub fn terms_cache(i: &str) -> IResult<&str, u32> {
order("TERMS_CACHE", i)
}
pub fn highlights(i: &str) -> IResult<&str, bool> { pub fn highlights(i: &str) -> IResult<&str, bool> {
let (i, _) = mightbespace(i)?; let (i, _) = mightbespace(i)?;
map(opt(tag("HIGHLIGHTS")), |x| x.is_some())(i) map(opt(tag("HIGHLIGHTS")), |x| x.is_some())(i)
@ -71,6 +88,10 @@ pub fn search(i: &str) -> IResult<&str, Index> {
let (i, o2) = opt(doc_lengths_order)(i)?; let (i, o2) = opt(doc_lengths_order)(i)?;
let (i, o3) = opt(postings_order)(i)?; let (i, o3) = opt(postings_order)(i)?;
let (i, o4) = opt(terms_order)(i)?; let (i, o4) = opt(terms_order)(i)?;
let (i, c1) = opt(doc_ids_cache)(i)?;
let (i, c2) = opt(doc_lengths_cache)(i)?;
let (i, c3) = opt(postings_cache)(i)?;
let (i, c4) = opt(terms_cache)(i)?;
let (i, hl) = highlights(i)?; let (i, hl) = highlights(i)?;
Ok(( Ok((
i, i,
@ -82,6 +103,10 @@ pub fn search(i: &str) -> IResult<&str, Index> {
doc_lengths_order: o2.unwrap_or(100), doc_lengths_order: o2.unwrap_or(100),
postings_order: o3.unwrap_or(100), postings_order: o3.unwrap_or(100),
terms_order: o4.unwrap_or(100), terms_order: o4.unwrap_or(100),
doc_ids_cache: c1.unwrap_or(100),
doc_lengths_cache: c2.unwrap_or(100),
postings_cache: c3.unwrap_or(100),
terms_cache: c4.unwrap_or(100),
}), }),
)) ))
})(i) })(i)
@ -134,6 +159,10 @@ pub fn capacity(i: &str) -> IResult<&str, u16> {
Ok((i, capacity)) Ok((i, capacity))
} }
pub fn mtree_cache(i: &str) -> IResult<&str, u32> {
order("MTREE_CACHE", i)
}
pub fn mtree(i: &str) -> IResult<&str, Index> { pub fn mtree(i: &str) -> IResult<&str, Index> {
let (i, _) = tag_no_case("MTREE")(i)?; let (i, _) = tag_no_case("MTREE")(i)?;
let (i, _) = shouldbespace(i)?; let (i, _) = shouldbespace(i)?;
@ -143,6 +172,8 @@ pub fn mtree(i: &str) -> IResult<&str, Index> {
let (i, vector_type) = opt(vector_type)(i)?; let (i, vector_type) = opt(vector_type)(i)?;
let (i, capacity) = opt(capacity)(i)?; let (i, capacity) = opt(capacity)(i)?;
let (i, doc_ids_order) = opt(doc_ids_order)(i)?; let (i, doc_ids_order) = opt(doc_ids_order)(i)?;
let (i, doc_ids_cache) = opt(doc_ids_cache)(i)?;
let (i, mtree_cache) = opt(mtree_cache)(i)?;
Ok(( Ok((
i, i,
Index::MTree(MTreeParams { Index::MTree(MTreeParams {
@ -151,6 +182,8 @@ pub fn mtree(i: &str) -> IResult<&str, Index> {
vector_type: vector_type.unwrap_or(VectorType::F64), vector_type: vector_type.unwrap_or(VectorType::F64),
capacity: capacity.unwrap_or(40), capacity: capacity.unwrap_or(40),
doc_ids_order: doc_ids_order.unwrap_or(100), doc_ids_order: doc_ids_order.unwrap_or(100),
doc_ids_cache: doc_ids_cache.unwrap_or(100),
mtree_cache: mtree_cache.unwrap_or(100),
}), }),
)) ))
})(i) })(i)

View file

@ -138,7 +138,9 @@ mod tests {
#[test] #[test]
fn check_create_search_index_with_highlights() { fn check_create_search_index_with_highlights() {
let sql = "INDEX my_index ON TABLE my_table COLUMNS my_col SEARCH ANALYZER my_analyzer BM25(1.2,0.75) DOC_IDS_ORDER 1000 DOC_LENGTHS_ORDER 1000 POSTINGS_ORDER 1000 TERMS_ORDER 1000 HIGHLIGHTS"; let sql = "INDEX my_index ON TABLE my_table COLUMNS my_col SEARCH ANALYZER my_analyzer BM25(1.2,0.75) \
DOC_IDS_ORDER 1100 DOC_LENGTHS_ORDER 1200 POSTINGS_ORDER 1300 TERMS_ORDER 1400 \
DOC_IDS_CACHE 100 DOC_LENGTHS_CACHE 200 POSTINGS_CACHE 300 TERMS_CACHE 400 HIGHLIGHTS";
let (_, idx) = index(sql).unwrap(); let (_, idx) = index(sql).unwrap();
assert_eq!( assert_eq!(
idx, idx,
@ -153,15 +155,21 @@ mod tests {
k1: 1.2, k1: 1.2,
b: 0.75, b: 0.75,
}, },
doc_ids_order: 1000, doc_ids_order: 1100,
doc_lengths_order: 1000, doc_lengths_order: 1200,
postings_order: 1000, postings_order: 1300,
terms_order: 1000, terms_order: 1400,
doc_ids_cache: 100,
doc_lengths_cache: 200,
postings_cache: 300,
terms_cache: 400,
}), }),
comment: None, comment: None,
} }
); );
assert_eq!(idx.to_string(), "DEFINE INDEX my_index ON my_table FIELDS my_col SEARCH ANALYZER my_analyzer BM25(1.2,0.75) DOC_IDS_ORDER 1000 DOC_LENGTHS_ORDER 1000 POSTINGS_ORDER 1000 TERMS_ORDER 1000 HIGHLIGHTS"); assert_eq!(idx.to_string(), "DEFINE INDEX my_index ON my_table FIELDS my_col SEARCH ANALYZER my_analyzer BM25(1.2,0.75) \
DOC_IDS_ORDER 1100 DOC_LENGTHS_ORDER 1200 POSTINGS_ORDER 1300 TERMS_ORDER 1400 \
DOC_IDS_CACHE 100 DOC_LENGTHS_CACHE 200 POSTINGS_CACHE 300 TERMS_CACHE 400 HIGHLIGHTS");
} }
#[test] #[test]
@ -182,13 +190,17 @@ mod tests {
doc_lengths_order: 100, doc_lengths_order: 100,
postings_order: 100, postings_order: 100,
terms_order: 100, terms_order: 100,
doc_ids_cache: 100,
doc_lengths_cache: 100,
postings_cache: 100,
terms_cache: 100,
}), }),
comment: None, comment: None,
} }
); );
assert_eq!( assert_eq!(
idx.to_string(), idx.to_string(),
"DEFINE INDEX my_index ON my_table FIELDS my_col SEARCH ANALYZER my_analyzer VS DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100" "DEFINE INDEX my_index ON my_table FIELDS my_col SEARCH ANALYZER my_analyzer VS DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100 DOC_IDS_CACHE 100 DOC_LENGTHS_CACHE 100 POSTINGS_CACHE 100 TERMS_CACHE 100"
); );
} }
@ -208,13 +220,15 @@ mod tests {
distance: Distance::Euclidean, distance: Distance::Euclidean,
capacity: 40, capacity: 40,
doc_ids_order: 100, doc_ids_order: 100,
doc_ids_cache: 100,
mtree_cache: 100,
}), }),
comment: None, comment: None,
} }
); );
assert_eq!( assert_eq!(
idx.to_string(), idx.to_string(),
"DEFINE INDEX my_index ON my_table FIELDS my_col MTREE DIMENSION 4 DIST EUCLIDEAN TYPE F64 CAPACITY 40 DOC_IDS_ORDER 100" "DEFINE INDEX my_index ON my_table FIELDS my_col MTREE DIMENSION 4 DIST EUCLIDEAN TYPE F64 CAPACITY 40 DOC_IDS_ORDER 100 DOC_IDS_CACHE 100 MTREE_CACHE 100"
); );
} }
} }

View file

@ -1224,11 +1224,12 @@ async fn define_statement_search_index() -> Result<(), Error> {
tables: {}, tables: {},
indexes: { blog_title: 'DEFINE INDEX blog_title ON blog FIELDS title \ indexes: { blog_title: 'DEFINE INDEX blog_title ON blog FIELDS title \
SEARCH ANALYZER simple BM25(1.2,0.75) \ SEARCH ANALYZER simple BM25(1.2,0.75) \
DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100 HIGHLIGHTS' }, DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100 \
DOC_IDS_CACHE 100 DOC_LENGTHS_CACHE 100 POSTINGS_CACHE 100 TERMS_CACHE 100 HIGHLIGHTS' },
lives: {}, lives: {},
}", }",
); );
assert_eq!(tmp, val); assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
let tmp = res.remove(0).result?; let tmp = res.remove(0).result?;

View file

@ -39,7 +39,7 @@ async fn select_where_matches_using_index() -> Result<(), Error> {
} }
]", ]",
); );
assert_eq!(tmp, val); assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
let tmp = res.remove(0).result?; let tmp = res.remove(0).result?;
let val = Value::parse( let val = Value::parse(
"[ "[
@ -49,7 +49,7 @@ async fn select_where_matches_using_index() -> Result<(), Error> {
} }
]", ]",
); );
assert_eq!(tmp, val); assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
Ok(()) Ok(())
} }
@ -89,7 +89,7 @@ async fn select_where_matches_without_using_index_iterator() -> Result<(), Error
}, },
]", ]",
); );
assert_eq!(tmp, val); assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
let tmp = res.remove(0).result?; let tmp = res.remove(0).result?;
let val = Value::parse( let val = Value::parse(
"[ "[
@ -99,7 +99,7 @@ async fn select_where_matches_without_using_index_iterator() -> Result<(), Error
} }
]", ]",
); );
assert_eq!(tmp, val); assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
Ok(()) Ok(())
} }
@ -143,7 +143,7 @@ async fn select_where_matches_using_index_and_arrays(parallel: bool) -> Result<(
} }
]", ]",
); );
assert_eq!(tmp, val); assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
// //
let tmp = res.remove(0).result?; let tmp = res.remove(0).result?;
let val = Value::parse( let val = Value::parse(
@ -158,7 +158,7 @@ async fn select_where_matches_using_index_and_arrays(parallel: bool) -> Result<(
} }
]", ]",
); );
assert_eq!(tmp, val); assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
Ok(()) Ok(())
} }
@ -212,7 +212,7 @@ async fn select_where_matches_using_index_and_objects(parallel: bool) -> Result<
} }
]", ]",
); );
assert_eq!(tmp, val); assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
// //
let tmp = res.remove(0).result?; let tmp = res.remove(0).result?;
let val = Value::parse( let val = Value::parse(
@ -274,7 +274,7 @@ async fn select_where_matches_using_index_offsets() -> Result<(), Error> {
} }
]", ]",
); );
assert_eq!(tmp, val); assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
Ok(()) Ok(())
} }
@ -306,7 +306,7 @@ async fn select_where_matches_using_index_and_score() -> Result<(), Error> {
} }
]", ]",
); );
assert_eq!(tmp, val); assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
Ok(()) Ok(())
} }
@ -345,11 +345,11 @@ async fn select_where_matches_without_using_index_and_score() -> Result<(), Erro
} }
]", ]",
); );
assert_eq!(tmp, val); assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
// This result should be empty, as we are looking for non-existing terms (dummy1 and dummy2). // This result should be empty, as we are looking for non-existing terms (dummy1 and dummy2).
let tmp = res.remove(0).result?; let tmp = res.remove(0).result?;
let val = Value::parse("[]"); let val = Value::parse("[]");
assert_eq!(tmp, val); assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
Ok(()) Ok(())
} }

View file

@ -123,6 +123,9 @@ async fn remove_statement_index() -> Result<(), Error> {
for ix in ["uniq_isbn", "idx_author", "ft_title"] { for ix in ["uniq_isbn", "idx_author", "ft_title"] {
assert_empty_prefix!(&mut tx, surrealdb::key::index::all::new("test", "test", "book", ix)); assert_empty_prefix!(&mut tx, surrealdb::key::index::all::new("test", "test", "book", ix));
} }
// Every index store cache has been removed
assert!(dbs.index_store().is_empty().await);
Ok(()) Ok(())
} }