Feature: Vector Search: mtree index + knn operator (#2546)
Co-authored-by: Tobie Morgan Hitchcock <tobie@surrealdb.com>
This commit is contained in:
parent
1a85f4967a
commit
0772a8c592
41 changed files with 2541 additions and 235 deletions
|
@ -8,7 +8,7 @@ use crate::dbs::Statement;
|
|||
use crate::dbs::{Options, Transaction};
|
||||
use crate::doc::Document;
|
||||
use crate::err::Error;
|
||||
use crate::idx::ft::docids::DocId;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::planner::executor::IteratorRef;
|
||||
use crate::sql::array::Array;
|
||||
use crate::sql::edges::Edges;
|
||||
|
|
|
@ -594,7 +594,7 @@ impl<'a> Processor<'a> {
|
|||
}
|
||||
}
|
||||
Err(Error::QueryNotExecutedDetail {
|
||||
message: "No QueryExecutor has not been found.".to_string(),
|
||||
message: "No QueryExecutor has been found.".to_string(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ use crate::dbs::Workable;
|
|||
use crate::err::Error;
|
||||
use crate::iam::Action;
|
||||
use crate::iam::ResourceKind;
|
||||
use crate::idx::ft::docids::DocId;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::planner::executor::IteratorRef;
|
||||
use crate::sql::statements::define::DefineEventStatement;
|
||||
use crate::sql::statements::define::DefineFieldStatement;
|
||||
|
|
|
@ -4,10 +4,11 @@ use crate::dbs::{Options, Transaction};
|
|||
use crate::doc::{CursorDoc, Document};
|
||||
use crate::err::Error;
|
||||
use crate::idx::ft::FtIndex;
|
||||
use crate::idx::trees::mtree::MTreeIndex;
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::sql::array::Array;
|
||||
use crate::sql::index::{Index, SearchParams};
|
||||
use crate::sql::index::{Index, MTreeParams, SearchParams};
|
||||
use crate::sql::statements::DefineIndexStatement;
|
||||
use crate::sql::{Part, Thing, Value};
|
||||
use crate::{key, kvs};
|
||||
|
@ -55,11 +56,7 @@ impl<'a> Document<'a> {
|
|||
Index::Uniq => ic.index_unique(&mut run).await?,
|
||||
Index::Idx => ic.index_non_unique(&mut run).await?,
|
||||
Index::Search(p) => ic.index_full_text(&mut run, p).await?,
|
||||
Index::MTree(_) => {
|
||||
return Err(Error::FeatureNotYetImplemented {
|
||||
feature: "MTree indexing".to_string(),
|
||||
})
|
||||
}
|
||||
Index::MTree(p) => ic.index_mtree(&mut run, p).await?,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -332,18 +329,36 @@ impl<'a> IndexOperation<'a> {
|
|||
}
|
||||
|
||||
async fn index_full_text(
|
||||
&self,
|
||||
&mut self,
|
||||
run: &mut kvs::Transaction,
|
||||
p: &SearchParams,
|
||||
) -> Result<(), Error> {
|
||||
let ikb = IndexKeyBase::new(self.opt, self.ix);
|
||||
let az = run.get_db_analyzer(self.opt.ns(), self.opt.db(), p.az.as_str()).await?;
|
||||
let mut ft = FtIndex::new(run, az, ikb, p, TreeStoreType::Write).await?;
|
||||
if let Some(n) = &self.n {
|
||||
if let Some(n) = self.n.take() {
|
||||
ft.index_document(run, self.rid, n).await?;
|
||||
} else {
|
||||
ft.remove_document(run, self.rid).await?;
|
||||
}
|
||||
ft.finish(run).await
|
||||
}
|
||||
|
||||
async fn index_mtree(
|
||||
&mut self,
|
||||
run: &mut kvs::Transaction,
|
||||
p: &MTreeParams,
|
||||
) -> Result<(), Error> {
|
||||
let ikb = IndexKeyBase::new(self.opt, self.ix);
|
||||
let mut mt = MTreeIndex::new(run, ikb, p, TreeStoreType::Write).await?;
|
||||
// Delete the old index data
|
||||
if let Some(o) = self.o.take() {
|
||||
mt.remove_document(run, self.rid, o).await?;
|
||||
}
|
||||
// Create the new index data
|
||||
if let Some(n) = self.n.take() {
|
||||
mt.index_document(run, self.rid, n).await?;
|
||||
}
|
||||
mt.finish(run).await
|
||||
}
|
||||
}
|
||||
|
|
|
@ -208,6 +208,26 @@ pub enum Error {
|
|||
#[error("The URL `{0}` is invalid")]
|
||||
InvalidUrl(String),
|
||||
|
||||
/// The size of the vector is incorrect
|
||||
#[error("Incorrect vector dimension ({current}). Expected a vector of {expected} dimension.")]
|
||||
InvalidVectorDimension {
|
||||
current: usize,
|
||||
expected: usize,
|
||||
},
|
||||
|
||||
/// The size of the vector is incorrect
|
||||
#[error("The vector element ({current}) is not a number.")]
|
||||
InvalidVectorType {
|
||||
current: String,
|
||||
expected: &'static str,
|
||||
},
|
||||
|
||||
/// The size of the vector is incorrect
|
||||
#[error("The value '{current}' is not a vector.")]
|
||||
InvalidVectorValue {
|
||||
current: String,
|
||||
},
|
||||
|
||||
/// The query timedout
|
||||
#[error("The query was not executed because it exceeded the timeout")]
|
||||
QueryTimedout,
|
||||
|
|
|
@ -2,6 +2,7 @@ use crate::ctx::Context;
|
|||
use crate::dbs::Transaction;
|
||||
use crate::doc::CursorDoc;
|
||||
use crate::err::Error;
|
||||
use crate::idx::planner::executor::QueryExecutor;
|
||||
use crate::sql::value::TryAdd;
|
||||
use crate::sql::value::TryDiv;
|
||||
use crate::sql::value::TryMul;
|
||||
|
@ -9,7 +10,7 @@ use crate::sql::value::TryNeg;
|
|||
use crate::sql::value::TryPow;
|
||||
use crate::sql::value::TrySub;
|
||||
use crate::sql::value::Value;
|
||||
use crate::sql::Expression;
|
||||
use crate::sql::{Expression, Thing};
|
||||
|
||||
pub fn neg(a: Value) -> Result<Value, Error> {
|
||||
a.try_neg()
|
||||
|
@ -167,32 +168,59 @@ pub fn intersects(a: &Value, b: &Value) -> Result<Value, Error> {
|
|||
Ok(a.intersects(b).into())
|
||||
}
|
||||
|
||||
enum IndexOption<'a> {
|
||||
PreMatch,
|
||||
None,
|
||||
Execute(&'a QueryExecutor, &'a Thing),
|
||||
}
|
||||
|
||||
fn get_index_option<'a>(
|
||||
ctx: &'a Context<'_>,
|
||||
doc: Option<&'a CursorDoc<'_>>,
|
||||
exp: &'a Expression,
|
||||
) -> IndexOption<'a> {
|
||||
if let Some(doc) = doc {
|
||||
if let Some(thg) = doc.rid {
|
||||
if let Some(pla) = ctx.get_query_planner() {
|
||||
if let Some(exe) = pla.get_query_executor(&thg.tb) {
|
||||
if let Some(ir) = doc.ir {
|
||||
if exe.is_iterator_expression(ir, exp) {
|
||||
return IndexOption::PreMatch;
|
||||
}
|
||||
}
|
||||
return IndexOption::Execute(exe, thg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
IndexOption::None
|
||||
}
|
||||
|
||||
pub(crate) async fn matches(
|
||||
ctx: &Context<'_>,
|
||||
txn: &Transaction,
|
||||
doc: Option<&CursorDoc<'_>>,
|
||||
exp: &Expression,
|
||||
) -> Result<Value, Error> {
|
||||
if let Some(doc) = doc {
|
||||
if let Some(thg) = doc.rid {
|
||||
if let Some(pla) = ctx.get_query_planner() {
|
||||
if let Some(exe) = pla.get_query_executor(&thg.tb) {
|
||||
// If we find the expression in `pre_match`,
|
||||
// it means that we are using an Iterator::Index
|
||||
// and we are iterating over documents that already matches the expression.
|
||||
if let Some(ir) = doc.ir {
|
||||
if exe.is_iterator_expression(ir, exp) {
|
||||
return Ok(Value::Bool(true));
|
||||
match get_index_option(ctx, doc, exp) {
|
||||
IndexOption::PreMatch => Ok(Value::Bool(true)),
|
||||
IndexOption::None => Ok(Value::Bool(false)),
|
||||
IndexOption::Execute(exe, thg) => exe.matches(txn, thg, exp).await,
|
||||
}
|
||||
}
|
||||
// Evaluate the matches
|
||||
return exe.matches(txn, thg, exp).await;
|
||||
|
||||
pub(crate) async fn knn(
|
||||
ctx: &Context<'_>,
|
||||
txn: &Transaction,
|
||||
doc: Option<&CursorDoc<'_>>,
|
||||
exp: &Expression,
|
||||
) -> Result<Value, Error> {
|
||||
match get_index_option(ctx, doc, exp) {
|
||||
IndexOption::PreMatch => Ok(Value::Bool(true)),
|
||||
IndexOption::None => Ok(Value::Bool(false)),
|
||||
IndexOption::Execute(exe, thg) => exe.knn(txn, thg, exp).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Value::Bool(false))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
|
|
@ -30,6 +30,7 @@ impl_module_def!(
|
|||
"insert" => run,
|
||||
"intersect" => run,
|
||||
"join" => run,
|
||||
"knn" => run,
|
||||
"last" => run,
|
||||
"len" => run,
|
||||
"logical_and" => run,
|
||||
|
|
|
@ -132,11 +132,11 @@ impl ManhattanDistance for Vec<Number> {
|
|||
}
|
||||
|
||||
pub trait MinkowskiDistance {
|
||||
fn minkowski_distance(&self, other: &Self, order: Number) -> Result<Number, Error>;
|
||||
fn minkowski_distance(&self, other: &Self, order: &Number) -> Result<Number, Error>;
|
||||
}
|
||||
|
||||
impl MinkowskiDistance for Vec<Number> {
|
||||
fn minkowski_distance(&self, other: &Self, order: Number) -> Result<Number, Error> {
|
||||
fn minkowski_distance(&self, other: &Self, order: &Number) -> Result<Number, Error> {
|
||||
check_same_dimension("vector::distance::minkowski", self, other)?;
|
||||
let p = order.to_float();
|
||||
let dist: f64 = self
|
||||
|
|
|
@ -75,7 +75,7 @@ pub mod distance {
|
|||
}
|
||||
|
||||
pub fn minkowski((a, b, o): (Vec<Number>, Vec<Number>, Number)) -> Result<Value, Error> {
|
||||
Ok(a.minkowski_distance(&b, o)?.into())
|
||||
Ok(a.minkowski_distance(&b, &o)?.into())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ pub(crate) struct DocIds {
|
|||
}
|
||||
|
||||
impl DocIds {
|
||||
pub(super) async fn new(
|
||||
pub(in crate::idx) async fn new(
|
||||
tx: &mut Transaction,
|
||||
index_key_base: IndexKeyBase,
|
||||
default_btree_order: u32,
|
||||
|
@ -78,7 +78,7 @@ impl DocIds {
|
|||
|
||||
/// Returns the doc_id for the given doc_key.
|
||||
/// If the doc_id does not exists, a new one is created, and associated to the given key.
|
||||
pub(super) async fn resolve_doc_id(
|
||||
pub(in crate::idx) async fn resolve_doc_id(
|
||||
&mut self,
|
||||
tx: &mut Transaction,
|
||||
doc_key: Key,
|
||||
|
@ -97,7 +97,7 @@ impl DocIds {
|
|||
Ok(Resolved::New(doc_id))
|
||||
}
|
||||
|
||||
pub(super) async fn remove_doc(
|
||||
pub(in crate::idx) async fn remove_doc(
|
||||
&mut self,
|
||||
tx: &mut Transaction,
|
||||
doc_key: Key,
|
||||
|
@ -119,7 +119,7 @@ impl DocIds {
|
|||
}
|
||||
}
|
||||
|
||||
pub(super) async fn get_doc_key(
|
||||
pub(in crate::idx) async fn get_doc_key(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
doc_id: DocId,
|
||||
|
@ -132,12 +132,15 @@ impl DocIds {
|
|||
}
|
||||
}
|
||||
|
||||
pub(super) async fn statistics(&self, tx: &mut Transaction) -> Result<BStatistics, Error> {
|
||||
pub(in crate::idx) async fn statistics(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
) -> Result<BStatistics, Error> {
|
||||
let mut store = self.store.lock().await;
|
||||
self.btree.statistics(tx, &mut store).await
|
||||
}
|
||||
|
||||
pub(super) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
|
||||
pub(in crate::idx) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
|
||||
let updated = self.store.lock().await.finish(tx).await?;
|
||||
if self.updated || updated {
|
||||
let state = State {
|
||||
|
@ -172,20 +175,20 @@ impl State {
|
|||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub(super) enum Resolved {
|
||||
pub(in crate::idx) enum Resolved {
|
||||
New(DocId),
|
||||
Existing(DocId),
|
||||
}
|
||||
|
||||
impl Resolved {
|
||||
pub(super) fn doc_id(&self) -> &DocId {
|
||||
pub(in crate::idx) fn doc_id(&self) -> &DocId {
|
||||
match self {
|
||||
Resolved::New(doc_id) => doc_id,
|
||||
Resolved::Existing(doc_id) => doc_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn was_existing(&self) -> bool {
|
||||
pub(in crate::idx) fn was_existing(&self) -> bool {
|
||||
match self {
|
||||
Resolved::New(_) => false,
|
||||
Resolved::Existing(_) => true,
|
||||
|
@ -195,7 +198,7 @@ impl Resolved {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::idx::ft::docids::{DocIds, Resolved};
|
||||
use crate::idx::docids::{DocIds, Resolved};
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::{Datastore, Transaction};
|
|
@ -64,7 +64,7 @@ impl Analyzer {
|
|||
&self,
|
||||
terms: &mut Terms,
|
||||
tx: &mut Transaction,
|
||||
field_content: &[Value],
|
||||
field_content: Vec<Value>,
|
||||
) -> Result<(DocLength, Vec<(TermId, TermFrequency)>), Error> {
|
||||
let mut dl = 0;
|
||||
// Let's first collect all the inputs, and collect the tokens.
|
||||
|
@ -101,7 +101,7 @@ impl Analyzer {
|
|||
&self,
|
||||
terms: &mut Terms,
|
||||
tx: &mut Transaction,
|
||||
content: &[Value],
|
||||
content: Vec<Value>,
|
||||
) -> Result<(DocLength, Vec<(TermId, TermFrequency)>, Vec<(TermId, OffsetRecords)>), Error> {
|
||||
let mut dl = 0;
|
||||
// Let's first collect all the inputs, and collect the tokens.
|
||||
|
@ -135,25 +135,25 @@ impl Analyzer {
|
|||
Ok((dl, tfid, osid))
|
||||
}
|
||||
|
||||
fn analyze_content(&self, content: &[Value], tks: &mut Vec<Tokens>) -> Result<(), Error> {
|
||||
fn analyze_content(&self, content: Vec<Value>, tks: &mut Vec<Tokens>) -> Result<(), Error> {
|
||||
for v in content {
|
||||
self.analyze_value(v, tks)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn analyze_value(&self, val: &Value, tks: &mut Vec<Tokens>) -> Result<(), Error> {
|
||||
fn analyze_value(&self, val: Value, tks: &mut Vec<Tokens>) -> Result<(), Error> {
|
||||
match val {
|
||||
Value::Strand(s) => tks.push(self.analyze(s.0.clone())?),
|
||||
Value::Strand(s) => tks.push(self.analyze(s.0)?),
|
||||
Value::Number(n) => tks.push(self.analyze(n.to_string())?),
|
||||
Value::Bool(b) => tks.push(self.analyze(b.to_string())?),
|
||||
Value::Array(a) => {
|
||||
for v in &a.0 {
|
||||
for v in a.0 {
|
||||
self.analyze_value(v, tks)?;
|
||||
}
|
||||
}
|
||||
Value::Object(o) => {
|
||||
for v in o.0.values() {
|
||||
for (_, v) in o.0 {
|
||||
self.analyze_value(v, tks)?;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::ft::docids::DocId;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::trees::bkeys::TrieKeys;
|
||||
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeNodeStore, Payload};
|
||||
use crate::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType};
|
||||
|
@ -72,9 +72,8 @@ impl DocLengths {
|
|||
}
|
||||
|
||||
pub(super) async fn finish(&self, tx: &mut Transaction) -> Result<(), Error> {
|
||||
if self.store.lock().await.finish(tx).await? {
|
||||
tx.set(self.state_key.clone(), self.btree.get_state().try_to_val()?).await?;
|
||||
}
|
||||
self.store.lock().await.finish(tx).await?;
|
||||
self.btree.get_state().finish(tx, &self.state_key).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
pub(crate) mod analyzer;
|
||||
pub(crate) mod docids;
|
||||
mod doclength;
|
||||
mod highlighter;
|
||||
mod offsets;
|
||||
|
@ -9,8 +8,8 @@ pub(super) mod termdocs;
|
|||
pub(crate) mod terms;
|
||||
|
||||
use crate::err::Error;
|
||||
use crate::idx::docids::{DocId, DocIds};
|
||||
use crate::idx::ft::analyzer::Analyzer;
|
||||
use crate::idx::ft::docids::{DocId, DocIds};
|
||||
use crate::idx::ft::doclength::DocLengths;
|
||||
use crate::idx::ft::highlighter::{Highlighter, Offseter};
|
||||
use crate::idx::ft::offsets::Offsets;
|
||||
|
@ -198,7 +197,7 @@ impl FtIndex {
|
|||
&mut self,
|
||||
tx: &mut Transaction,
|
||||
rid: &Thing,
|
||||
content: &[Value],
|
||||
content: Vec<Value>,
|
||||
) -> Result<(), Error> {
|
||||
// Resolve the doc_id
|
||||
let resolved = self.doc_ids.write().await.resolve_doc_id(tx, rid.into()).await?;
|
||||
|
@ -481,7 +480,7 @@ mod tests {
|
|||
}
|
||||
assert_eq!(map.len(), e.len());
|
||||
for (k, p) in e {
|
||||
assert_eq!(map.get(k), Some(&p));
|
||||
assert_eq!(map.get(k), Some(&p), "{}", k);
|
||||
}
|
||||
} else {
|
||||
panic!("hits is none");
|
||||
|
@ -549,9 +548,7 @@ mod tests {
|
|||
// Add one document
|
||||
let (mut tx, mut fti) =
|
||||
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await;
|
||||
fti.index_document(&mut tx, &doc1, &vec![Value::from("hello the world")])
|
||||
.await
|
||||
.unwrap();
|
||||
fti.index_document(&mut tx, &doc1, vec![Value::from("hello the world")]).await.unwrap();
|
||||
finish(tx, fti).await;
|
||||
}
|
||||
|
||||
|
@ -559,8 +556,8 @@ mod tests {
|
|||
// Add two documents
|
||||
let (mut tx, mut fti) =
|
||||
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await;
|
||||
fti.index_document(&mut tx, &doc2, &vec![Value::from("a yellow hello")]).await.unwrap();
|
||||
fti.index_document(&mut tx, &doc3, &vec![Value::from("foo bar")]).await.unwrap();
|
||||
fti.index_document(&mut tx, &doc2, vec![Value::from("a yellow hello")]).await.unwrap();
|
||||
fti.index_document(&mut tx, &doc3, vec![Value::from("foo bar")]).await.unwrap();
|
||||
finish(tx, fti).await;
|
||||
}
|
||||
|
||||
|
@ -575,7 +572,13 @@ mod tests {
|
|||
|
||||
// Search & score
|
||||
let (hits, scr) = search(&mut tx, &fti, "hello").await;
|
||||
check_hits(&mut tx, hits, scr, vec![(&doc1, Some(0.0)), (&doc2, Some(0.0))]).await;
|
||||
check_hits(
|
||||
&mut tx,
|
||||
hits,
|
||||
scr,
|
||||
vec![(&doc1, Some(-0.4859746)), (&doc2, Some(-0.4859746))],
|
||||
)
|
||||
.await;
|
||||
|
||||
let (hits, scr) = search(&mut tx, &fti, "world").await;
|
||||
check_hits(&mut tx, hits, scr, vec![(&doc1, Some(0.4859746))]).await;
|
||||
|
@ -597,7 +600,7 @@ mod tests {
|
|||
// Reindex one document
|
||||
let (mut tx, mut fti) =
|
||||
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await;
|
||||
fti.index_document(&mut tx, &doc3, &vec![Value::from("nobar foo")]).await.unwrap();
|
||||
fti.index_document(&mut tx, &doc3, vec![Value::from("nobar foo")]).await.unwrap();
|
||||
finish(tx, fti).await;
|
||||
|
||||
let (mut tx, fti) = tx_fti(&ds, TreeStoreType::Read, &az, btree_order, false).await;
|
||||
|
@ -655,28 +658,28 @@ mod tests {
|
|||
fti.index_document(
|
||||
&mut tx,
|
||||
&doc1,
|
||||
&vec![Value::from("the quick brown fox jumped over the lazy dog")],
|
||||
vec![Value::from("the quick brown fox jumped over the lazy dog")],
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
fti.index_document(
|
||||
&mut tx,
|
||||
&doc2,
|
||||
&vec![Value::from("the fast fox jumped over the lazy dog")],
|
||||
vec![Value::from("the fast fox jumped over the lazy dog")],
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
fti.index_document(
|
||||
&mut tx,
|
||||
&doc3,
|
||||
&vec![Value::from("the dog sat there and did nothing")],
|
||||
vec![Value::from("the dog sat there and did nothing")],
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
fti.index_document(
|
||||
&mut tx,
|
||||
&doc4,
|
||||
&vec![Value::from("the other animals sat there watching")],
|
||||
vec![Value::from("the other animals sat there watching")],
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
@ -698,10 +701,10 @@ mod tests {
|
|||
hits,
|
||||
scr,
|
||||
vec![
|
||||
(&doc1, Some(0.0)),
|
||||
(&doc2, Some(0.0)),
|
||||
(&doc3, Some(0.0)),
|
||||
(&doc4, Some(0.0)),
|
||||
(&doc1, Some(-3.4388628)),
|
||||
(&doc2, Some(-3.621457)),
|
||||
(&doc3, Some(-2.258829)),
|
||||
(&doc4, Some(-2.393017)),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
@ -711,7 +714,11 @@ mod tests {
|
|||
&mut tx,
|
||||
hits,
|
||||
scr,
|
||||
vec![(&doc1, Some(0.0)), (&doc2, Some(0.0)), (&doc3, Some(0.0))],
|
||||
vec![
|
||||
(&doc1, Some(-0.7832165)),
|
||||
(&doc2, Some(-0.8248031)),
|
||||
(&doc3, Some(-0.87105393)),
|
||||
],
|
||||
)
|
||||
.await;
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::ft::docids::DocId;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::ft::terms::TermId;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::{Transaction, Val};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::ft::docids::DocId;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::ft::terms::TermId;
|
||||
use crate::idx::trees::bkeys::TrieKeys;
|
||||
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeNodeStore};
|
||||
|
@ -81,10 +81,8 @@ impl Postings {
|
|||
}
|
||||
|
||||
pub(super) async fn finish(&self, tx: &mut Transaction) -> Result<(), Error> {
|
||||
let updated = self.store.lock().await.finish(tx).await?;
|
||||
if self.btree.is_updated() || updated {
|
||||
tx.set(self.state_key.clone(), self.btree.get_state().try_to_val()?).await?;
|
||||
}
|
||||
self.store.lock().await.finish(tx).await?;
|
||||
self.btree.get_state().finish(tx, &self.state_key).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::ft::docids::DocId;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::ft::doclength::{DocLength, DocLengths};
|
||||
use crate::idx::ft::postings::{Postings, TermFrequency};
|
||||
use crate::idx::ft::termdocs::TermsDocs;
|
||||
|
@ -76,8 +76,8 @@ impl BM25Scorer {
|
|||
// (N - n(qi) + 0.5)
|
||||
let numerator = self.doc_count - term_doc_count + 0.5;
|
||||
let idf = (numerator / denominator).ln();
|
||||
if idf.is_nan() || idf <= 0.0 {
|
||||
return 0.0;
|
||||
if idf.is_nan() {
|
||||
return f32::NAN;
|
||||
}
|
||||
let tf_prim = 1.0 + term_freq.ln();
|
||||
// idf * (k1 + 1)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::ft::docids::DocId;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::ft::doclength::DocLength;
|
||||
use crate::idx::ft::terms::TermId;
|
||||
use crate::idx::IndexKeyBase;
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
pub(crate) mod docids;
|
||||
pub(crate) mod ft;
|
||||
pub(crate) mod planner;
|
||||
pub mod trees;
|
||||
|
||||
use crate::dbs::Options;
|
||||
use crate::err::Error;
|
||||
use crate::idx::ft::docids::DocId;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::ft::terms::TermId;
|
||||
use crate::idx::trees::store::NodeId;
|
||||
use crate::key::index::bc::Bc;
|
||||
|
@ -18,6 +19,7 @@ use crate::key::index::bp::Bp;
|
|||
use crate::key::index::bs::Bs;
|
||||
use crate::key::index::bt::Bt;
|
||||
use crate::key::index::bu::Bu;
|
||||
use crate::key::index::vm::Vm;
|
||||
use crate::kvs::{Key, Val};
|
||||
use crate::sql::statements::DefineIndexStatement;
|
||||
use revision::Revisioned;
|
||||
|
@ -171,6 +173,17 @@ impl IndexKeyBase {
|
|||
)
|
||||
.into()
|
||||
}
|
||||
|
||||
fn new_vm_key(&self, node_id: Option<NodeId>) -> Key {
|
||||
Vm::new(
|
||||
self.inner.ns.as_str(),
|
||||
self.inner.db.as_str(),
|
||||
self.inner.tb.as_str(),
|
||||
self.inner.ix.as_str(),
|
||||
node_id,
|
||||
)
|
||||
.into()
|
||||
}
|
||||
}
|
||||
|
||||
/// This trait provides `Revision` based default implementations for serialization/deserialization
|
||||
|
|
|
@ -1,25 +1,27 @@
|
|||
use crate::dbs::{Options, Transaction};
|
||||
use crate::err::Error;
|
||||
use crate::idx::ft::docids::{DocId, DocIds};
|
||||
use crate::idx::docids::{DocId, DocIds};
|
||||
use crate::idx::ft::scorer::BM25Scorer;
|
||||
use crate::idx::ft::termdocs::TermsDocs;
|
||||
use crate::idx::ft::terms::TermId;
|
||||
use crate::idx::ft::{FtIndex, MatchRef};
|
||||
use crate::idx::planner::iterators::{
|
||||
IndexEqualThingIterator, IndexRangeThingIterator, MatchesThingIterator, ThingIterator,
|
||||
UniqueEqualThingIterator, UniqueRangeThingIterator,
|
||||
IndexEqualThingIterator, IndexRangeThingIterator, KnnThingIterator, MatchesThingIterator,
|
||||
ThingIterator, UniqueEqualThingIterator, UniqueRangeThingIterator,
|
||||
};
|
||||
use crate::idx::planner::plan::IndexOperator::Matches;
|
||||
use crate::idx::planner::plan::{IndexOperator, IndexOption, RangeValue};
|
||||
use crate::idx::planner::tree::{IndexMap, IndexRef};
|
||||
use crate::idx::trees::mtree::MTreeIndex;
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs;
|
||||
use crate::kvs::Key;
|
||||
use crate::sql::index::Index;
|
||||
use crate::sql::statements::DefineIndexStatement;
|
||||
use crate::sql::{Expression, Object, Table, Thing, Value};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use crate::sql::{Array, Expression, Object, Table, Thing, Value};
|
||||
use roaring::RoaringTreemap;
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
|
@ -30,6 +32,7 @@ pub(crate) struct QueryExecutor {
|
|||
exp_entries: HashMap<Arc<Expression>, FtEntry>,
|
||||
it_entries: Vec<IteratorEntry>,
|
||||
index_definitions: HashMap<IndexRef, DefineIndexStatement>,
|
||||
mt_exp: HashMap<Arc<Expression>, MtEntry>,
|
||||
}
|
||||
|
||||
pub(crate) type IteratorRef = u16;
|
||||
|
@ -66,31 +69,32 @@ impl QueryExecutor {
|
|||
let mut mr_entries = HashMap::default();
|
||||
let mut exp_entries = HashMap::default();
|
||||
let mut ft_map = HashMap::default();
|
||||
let mut mt_map: HashMap<IndexRef, MTreeIndex> = HashMap::default();
|
||||
let mut mt_exp = HashMap::default();
|
||||
|
||||
// Create all the instances of FtIndex
|
||||
// Build the FtEntries and map them to Expressions and MatchRef
|
||||
for (exp, io) in im.options {
|
||||
let mut entry = None;
|
||||
let ir = io.ir();
|
||||
if let Some(idx_def) = im.definitions.get(&ir) {
|
||||
if let Index::Search(p) = &idx_def.index {
|
||||
match &idx_def.index {
|
||||
Index::Search(p) => {
|
||||
let mut ft_entry = None;
|
||||
if let Some(ft) = ft_map.get(&ir) {
|
||||
if entry.is_none() {
|
||||
entry = FtEntry::new(&mut run, ft, io).await?;
|
||||
if ft_entry.is_none() {
|
||||
ft_entry = FtEntry::new(&mut run, ft, io).await?;
|
||||
}
|
||||
} else {
|
||||
let ikb = IndexKeyBase::new(opt, idx_def);
|
||||
let az = run.get_db_analyzer(opt.ns(), opt.db(), p.az.as_str()).await?;
|
||||
let ft = FtIndex::new(&mut run, az, ikb, p, TreeStoreType::Read).await?;
|
||||
if entry.is_none() {
|
||||
entry = FtEntry::new(&mut run, &ft, io).await?;
|
||||
let ft =
|
||||
FtIndex::new(&mut run, az, ikb, p, TreeStoreType::Read).await?;
|
||||
if ft_entry.is_none() {
|
||||
ft_entry = FtEntry::new(&mut run, &ft, io).await?;
|
||||
}
|
||||
ft_map.insert(ir, ft);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(e) = entry {
|
||||
if let Some(e) = ft_entry {
|
||||
if let Matches(_, Some(mr)) = e.0.index_option.op() {
|
||||
if mr_entries.insert(*mr, e.clone()).is_some() {
|
||||
return Err(Error::DuplicatedMatchRef {
|
||||
|
@ -101,6 +105,25 @@ impl QueryExecutor {
|
|||
exp_entries.insert(exp, e);
|
||||
}
|
||||
}
|
||||
Index::MTree(p) => {
|
||||
if let IndexOperator::Knn(a, k) = io.op() {
|
||||
let entry = if let Some(mt) = mt_map.get(&ir) {
|
||||
MtEntry::new(&mut run, mt, a.clone(), *k).await?
|
||||
} else {
|
||||
let ikb = IndexKeyBase::new(opt, idx_def);
|
||||
let mt =
|
||||
MTreeIndex::new(&mut run, ikb, p, TreeStoreType::Read).await?;
|
||||
let entry = MtEntry::new(&mut run, &mt, a.clone(), *k).await?;
|
||||
mt_map.insert(ir, mt);
|
||||
entry
|
||||
};
|
||||
mt_exp.insert(exp, entry);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
table: table.0.clone(),
|
||||
|
@ -109,6 +132,19 @@ impl QueryExecutor {
|
|||
exp_entries,
|
||||
it_entries: Vec::new(),
|
||||
index_definitions: im.definitions,
|
||||
mt_exp,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn knn(
|
||||
&self,
|
||||
_txn: &Transaction,
|
||||
_thg: &Thing,
|
||||
exp: &Expression,
|
||||
) -> Result<Value, Error> {
|
||||
// If no previous case were successful, we end up with a user error
|
||||
Err(Error::NoIndexFoundForMatch {
|
||||
value: exp.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -168,9 +204,7 @@ impl QueryExecutor {
|
|||
Index::Search {
|
||||
..
|
||||
} => self.new_search_index_iterator(ir, io.clone()).await,
|
||||
Index::MTree(_) => Err(Error::FeatureNotYetImplemented {
|
||||
feature: "VectorSearch iterator".to_string(),
|
||||
}),
|
||||
Index::MTree(_) => Ok(self.new_mtree_index_knn_iterator(ir)),
|
||||
}
|
||||
} else {
|
||||
Ok(None)
|
||||
|
@ -258,6 +292,16 @@ impl QueryExecutor {
|
|||
Ok(None)
|
||||
}
|
||||
|
||||
fn new_mtree_index_knn_iterator(&self, ir: IteratorRef) -> Option<ThingIterator> {
|
||||
if let Some(IteratorEntry::Single(exp, ..)) = self.it_entries.get(ir as usize) {
|
||||
if let Some(mte) = self.mt_exp.get(exp.as_ref()) {
|
||||
let it = KnnThingIterator::new(mte.doc_ids.clone(), mte.res.clone());
|
||||
return Some(ThingIterator::Knn(it));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub(crate) async fn matches(
|
||||
&self,
|
||||
txn: &Transaction,
|
||||
|
@ -406,3 +450,24 @@ impl FtEntry {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(super) struct MtEntry {
|
||||
doc_ids: Arc<RwLock<DocIds>>,
|
||||
res: VecDeque<RoaringTreemap>,
|
||||
}
|
||||
|
||||
impl MtEntry {
|
||||
async fn new(
|
||||
tx: &mut kvs::Transaction,
|
||||
mt: &MTreeIndex,
|
||||
a: Array,
|
||||
k: u32,
|
||||
) -> Result<Self, Error> {
|
||||
let res = mt.knn_search(tx, a, k as usize).await?;
|
||||
Ok(Self {
|
||||
res,
|
||||
doc_ids: mt.doc_ids(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
use crate::dbs::{Options, Transaction};
|
||||
use crate::err::Error;
|
||||
use crate::idx::ft::docids::{DocId, NO_DOC_ID};
|
||||
use crate::idx::docids::{DocId, DocIds, NO_DOC_ID};
|
||||
use crate::idx::ft::termdocs::TermsDocs;
|
||||
use crate::idx::ft::{FtIndex, HitsIterator};
|
||||
use crate::idx::planner::plan::RangeValue;
|
||||
|
@ -8,6 +8,10 @@ use crate::key::index::Index;
|
|||
use crate::kvs::Key;
|
||||
use crate::sql::statements::DefineIndexStatement;
|
||||
use crate::sql::{Array, Thing, Value};
|
||||
use roaring::RoaringTreemap;
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
pub(crate) enum ThingIterator {
|
||||
IndexEqual(IndexEqualThingIterator),
|
||||
|
@ -15,6 +19,7 @@ pub(crate) enum ThingIterator {
|
|||
UniqueEqual(UniqueEqualThingIterator),
|
||||
UniqueRange(UniqueRangeThingIterator),
|
||||
Matches(MatchesThingIterator),
|
||||
Knn(KnnThingIterator),
|
||||
}
|
||||
|
||||
impl ThingIterator {
|
||||
|
@ -29,6 +34,7 @@ impl ThingIterator {
|
|||
ThingIterator::IndexRange(i) => i.next_batch(tx, size).await,
|
||||
ThingIterator::UniqueRange(i) => i.next_batch(tx, size).await,
|
||||
ThingIterator::Matches(i) => i.next_batch(tx, size).await,
|
||||
ThingIterator::Knn(i) => i.next_batch(tx, size).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -307,3 +313,52 @@ impl MatchesThingIterator {
|
|||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct KnnThingIterator {
|
||||
doc_ids: Arc<RwLock<DocIds>>,
|
||||
res: VecDeque<RoaringTreemap>,
|
||||
current: Option<RoaringTreemap>,
|
||||
skip: RoaringTreemap,
|
||||
}
|
||||
|
||||
impl KnnThingIterator {
|
||||
pub(super) fn new(doc_ids: Arc<RwLock<DocIds>>, mut res: VecDeque<RoaringTreemap>) -> Self {
|
||||
let current = res.pop_front();
|
||||
Self {
|
||||
doc_ids,
|
||||
res,
|
||||
current,
|
||||
skip: RoaringTreemap::new(),
|
||||
}
|
||||
}
|
||||
async fn next_batch(
|
||||
&mut self,
|
||||
txn: &Transaction,
|
||||
mut limit: u32,
|
||||
) -> Result<Vec<(Thing, DocId)>, Error> {
|
||||
let mut res = vec![];
|
||||
let mut tx = txn.lock().await;
|
||||
while self.current.is_some() && limit > 0 {
|
||||
if let Some(docs) = &mut self.current {
|
||||
if let Some(doc_id) = docs.iter().next() {
|
||||
docs.remove(doc_id);
|
||||
if self.skip.insert(doc_id) {
|
||||
if let Some(doc_key) =
|
||||
self.doc_ids.read().await.get_doc_key(&mut tx, doc_id).await?
|
||||
{
|
||||
res.push((doc_key.into(), doc_id));
|
||||
limit -= 1;
|
||||
}
|
||||
}
|
||||
if docs.is_empty() {
|
||||
self.current = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
if self.current.is_none() {
|
||||
self.current = self.res.pop_front();
|
||||
}
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -149,6 +149,7 @@ pub(super) enum IndexOperator {
|
|||
Equality(Array),
|
||||
RangePart(Operator, Value),
|
||||
Matches(String, Option<MatchRef>),
|
||||
Knn(Array, u32),
|
||||
}
|
||||
|
||||
impl IndexOption {
|
||||
|
@ -191,6 +192,10 @@ impl IndexOption {
|
|||
e.insert("operator", Value::from(op.to_string()));
|
||||
e.insert("value", v.to_owned());
|
||||
}
|
||||
IndexOperator::Knn(a, k) => {
|
||||
e.insert("operator", Value::from(format!("<{}>", k)));
|
||||
e.insert("value", Value::Array(a.clone()));
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -102,10 +102,10 @@ impl<'a> TreeBuilder<'a> {
|
|||
match v {
|
||||
Value::Expression(e) => self.eval_expression(e).await,
|
||||
Value::Idiom(i) => self.eval_idiom(i).await,
|
||||
Value::Strand(_) => Ok(Node::Scalar(v.to_owned())),
|
||||
Value::Number(_) => Ok(Node::Scalar(v.to_owned())),
|
||||
Value::Bool(_) => Ok(Node::Scalar(v.to_owned())),
|
||||
Value::Thing(_) => Ok(Node::Scalar(v.to_owned())),
|
||||
Value::Strand(_) | Value::Number(_) | Value::Bool(_) | Value::Thing(_) => {
|
||||
Ok(Node::Scalar(v.to_owned()))
|
||||
}
|
||||
Value::Array(a) => Ok(self.eval_array(a)),
|
||||
Value::Subquery(s) => self.eval_subquery(s).await,
|
||||
Value::Param(p) => {
|
||||
let v = p.compute(self.ctx, self.opt, self.txn, None).await?;
|
||||
|
@ -115,6 +115,16 @@ impl<'a> TreeBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn eval_array(&mut self, a: &Array) -> Node {
|
||||
// Check if it is a numeric vector
|
||||
for v in &a.0 {
|
||||
if !v.is_number() {
|
||||
return Node::Unsupported(format!("Unsupported array: {}", a));
|
||||
}
|
||||
}
|
||||
Node::Vector(a.to_owned())
|
||||
}
|
||||
|
||||
async fn eval_idiom(&mut self, i: &Idiom) -> Result<Node, Error> {
|
||||
if let Some(irs) = self.find_indexes(i).await? {
|
||||
if !irs.is_empty() {
|
||||
|
@ -165,25 +175,38 @@ impl<'a> TreeBuilder<'a> {
|
|||
irs: &[IndexRef],
|
||||
op: &Operator,
|
||||
id: &Idiom,
|
||||
v: &Node,
|
||||
n: &Node,
|
||||
e: &Expression,
|
||||
) -> Option<IndexOption> {
|
||||
if let Some(v) = v.is_scalar() {
|
||||
for ir in irs {
|
||||
if let Some(ix) = self.index_map.definitions.get(ir) {
|
||||
let op = match &ix.index {
|
||||
Index::Idx => Self::eval_index_operator(op, v),
|
||||
Index::Uniq => Self::eval_index_operator(op, v),
|
||||
Index::Idx => Self::eval_index_operator(op, n),
|
||||
Index::Uniq => Self::eval_index_operator(op, n),
|
||||
Index::Search {
|
||||
..
|
||||
} => {
|
||||
if let Some(v) = n.is_scalar() {
|
||||
if let Operator::Matches(mr) = op {
|
||||
Some(IndexOperator::Matches(v.clone().to_raw_string(), *mr))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Index::MTree(_) => {
|
||||
if let Operator::Knn(k) = op {
|
||||
if let Node::Vector(a) = n {
|
||||
Some(IndexOperator::Knn(a.clone(), *k))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
Index::MTree(_) => None,
|
||||
};
|
||||
if let Some(op) = op {
|
||||
let io = IndexOption::new(*ir, id.clone(), op);
|
||||
|
@ -192,11 +215,11 @@ impl<'a> TreeBuilder<'a> {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn eval_index_operator(op: &Operator, v: &Value) -> Option<IndexOperator> {
|
||||
fn eval_index_operator(op: &Operator, n: &Node) -> Option<IndexOperator> {
|
||||
if let Some(v) = n.is_scalar() {
|
||||
match op {
|
||||
Operator::Equal => Some(IndexOperator::Equality(Array::from(v.clone()))),
|
||||
Operator::LessThan
|
||||
|
@ -205,6 +228,9 @@ impl<'a> TreeBuilder<'a> {
|
|||
| Operator::MoreThanOrEqual => Some(IndexOperator::RangePart(op.clone(), v.clone())),
|
||||
_ => None,
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
async fn eval_subquery(&mut self, s: &Subquery) -> Result<Node, Error> {
|
||||
|
@ -235,6 +261,7 @@ pub(super) enum Node {
|
|||
IndexedField(Idiom, Arc<Vec<IndexRef>>),
|
||||
NonIndexedField,
|
||||
Scalar(Value),
|
||||
Vector(Array),
|
||||
Unsupported(String),
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@ where
|
|||
{
|
||||
state: BState,
|
||||
full_size: u32,
|
||||
updated: bool,
|
||||
bk: PhantomData<BK>,
|
||||
}
|
||||
|
||||
|
@ -31,6 +30,8 @@ pub struct BState {
|
|||
minimum_degree: u32,
|
||||
root: Option<NodeId>,
|
||||
next_node_id: NodeId,
|
||||
#[serde(skip)]
|
||||
updated: bool,
|
||||
}
|
||||
|
||||
impl VersionedSerdeState for BState {}
|
||||
|
@ -42,8 +43,34 @@ impl BState {
|
|||
minimum_degree,
|
||||
root: None,
|
||||
next_node_id: 0,
|
||||
updated: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn set_root(&mut self, node_id: Option<NodeId>) {
|
||||
if node_id.ne(&self.root) {
|
||||
self.root = node_id;
|
||||
self.updated = true;
|
||||
}
|
||||
}
|
||||
|
||||
fn new_node_id(&mut self) -> NodeId {
|
||||
let new_node_id = self.next_node_id;
|
||||
self.next_node_id += 1;
|
||||
self.updated = true;
|
||||
new_node_id
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn finish(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
key: &Key,
|
||||
) -> Result<(), Error> {
|
||||
if self.updated {
|
||||
tx.set(key.clone(), self.try_to_val()?).await?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq)]
|
||||
|
@ -166,7 +193,6 @@ where
|
|||
Self {
|
||||
full_size: state.minimum_degree * 2 - 1,
|
||||
state,
|
||||
updated: false,
|
||||
bk: PhantomData,
|
||||
}
|
||||
}
|
||||
|
@ -180,11 +206,11 @@ where
|
|||
let mut next_node = self.state.root;
|
||||
while let Some(node_id) = next_node.take() {
|
||||
let current = store.get_node(tx, node_id).await?;
|
||||
if let Some(payload) = current.node.keys().get(searched_key) {
|
||||
if let Some(payload) = current.n.keys().get(searched_key) {
|
||||
store.set_node(current, false)?;
|
||||
return Ok(Some(payload));
|
||||
}
|
||||
if let BTreeNode::Internal(keys, children) = ¤t.node {
|
||||
if let BTreeNode::Internal(keys, children) = ¤t.n {
|
||||
let child_idx = keys.get_child_idx(searched_key);
|
||||
next_node.replace(children[child_idx]);
|
||||
}
|
||||
|
@ -201,27 +227,30 @@ where
|
|||
payload: Payload,
|
||||
) -> Result<(), Error> {
|
||||
if let Some(root_id) = self.state.root {
|
||||
// We already have a root node
|
||||
let root = store.get_node(tx, root_id).await?;
|
||||
if root.node.keys().len() == self.full_size {
|
||||
let new_root_id = self.new_node_id();
|
||||
if root.n.keys().len() == self.full_size {
|
||||
// The root node is full, let's split it
|
||||
let new_root_id = self.state.new_node_id();
|
||||
let new_root = store
|
||||
.new_node(new_root_id, BTreeNode::Internal(BK::default(), vec![root_id]))?;
|
||||
self.state.root = Some(new_root.id);
|
||||
self.state.set_root(Some(new_root.id));
|
||||
self.split_child(store, new_root, 0, root).await?;
|
||||
self.insert_non_full(tx, store, new_root_id, key, payload).await?;
|
||||
} else {
|
||||
// The root node has place, let's insert the value
|
||||
let root_id = root.id;
|
||||
store.set_node(root, false)?;
|
||||
self.insert_non_full(tx, store, root_id, key, payload).await?;
|
||||
}
|
||||
} else {
|
||||
let new_root_id = self.new_node_id();
|
||||
// We don't have a root node, let's create id
|
||||
let new_root_id = self.state.new_node_id();
|
||||
let new_root_node =
|
||||
store.new_node(new_root_id, BTreeNode::Leaf(BK::with_key_val(key, payload)?))?;
|
||||
store.set_node(new_root_node, true)?;
|
||||
self.state.root = Some(new_root_id);
|
||||
self.state.set_root(Some(new_root_id));
|
||||
}
|
||||
self.updated = true;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -237,7 +266,7 @@ where
|
|||
while let Some(node_id) = next_node_id.take() {
|
||||
let mut node = store.get_node(tx, node_id).await?;
|
||||
let key: Key = key.clone();
|
||||
match &mut node.node {
|
||||
match &mut node.n {
|
||||
BTreeNode::Leaf(keys) => {
|
||||
keys.insert(key, payload);
|
||||
store.set_node(node, true)?;
|
||||
|
@ -250,7 +279,7 @@ where
|
|||
}
|
||||
let child_idx = keys.get_child_idx(&key);
|
||||
let child = store.get_node(tx, children[child_idx]).await?;
|
||||
let next_id = if child.node.keys().len() == self.full_size {
|
||||
let next_id = if child.n.keys().len() == self.full_size {
|
||||
let split_result = self.split_child(store, node, child_idx, child).await?;
|
||||
if key.gt(&split_result.median_key) {
|
||||
split_result.right_node_id
|
||||
|
@ -277,12 +306,12 @@ where
|
|||
idx: usize,
|
||||
child_node: BStoredNode<BK>,
|
||||
) -> Result<SplitResult, Error> {
|
||||
let (left_node, right_node, median_key, median_payload) = match child_node.node {
|
||||
let (left_node, right_node, median_key, median_payload) = match child_node.n {
|
||||
BTreeNode::Internal(keys, children) => self.split_internal_node(keys, children)?,
|
||||
BTreeNode::Leaf(keys) => self.split_leaf_node(keys)?,
|
||||
};
|
||||
let right_node_id = self.new_node_id();
|
||||
match parent_node.node {
|
||||
let right_node_id = self.state.new_node_id();
|
||||
match parent_node.n {
|
||||
BTreeNode::Internal(ref mut keys, ref mut children) => {
|
||||
keys.insert(median_key.clone(), median_payload);
|
||||
children.insert(idx + 1, right_node_id);
|
||||
|
@ -329,12 +358,6 @@ where
|
|||
Ok((left_node, right_node, r.median_key, r.median_payload))
|
||||
}
|
||||
|
||||
fn new_node_id(&mut self) -> NodeId {
|
||||
let new_node_id = self.state.next_node_id;
|
||||
self.state.next_node_id += 1;
|
||||
new_node_id
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn delete(
|
||||
&mut self,
|
||||
tx: &mut Transaction,
|
||||
|
@ -348,7 +371,7 @@ where
|
|||
|
||||
while let Some((is_main_key, key_to_delete, node_id)) = next_node.take() {
|
||||
let mut node = store.get_node(tx, node_id).await?;
|
||||
match &mut node.node {
|
||||
match &mut node.n {
|
||||
BTreeNode::Leaf(keys) => {
|
||||
// CLRS: 1
|
||||
if let Some(payload) = keys.get(&key_to_delete) {
|
||||
|
@ -361,12 +384,11 @@ where
|
|||
store.remove_node(node.id, node.key)?;
|
||||
// Check if this was the root node
|
||||
if Some(node_id) == self.state.root {
|
||||
self.state.root = None;
|
||||
self.state.set_root(None);
|
||||
}
|
||||
} else {
|
||||
store.set_node(node, true)?;
|
||||
}
|
||||
self.updated = true;
|
||||
} else {
|
||||
store.set_node(node, false)?;
|
||||
}
|
||||
|
@ -388,7 +410,6 @@ where
|
|||
.await?,
|
||||
);
|
||||
store.set_node(node, true)?;
|
||||
self.updated = true;
|
||||
} else {
|
||||
// CLRS: 3
|
||||
let (node_update, is_main_key, key_to_delete, next_stored_node) = self
|
||||
|
@ -409,11 +430,9 @@ where
|
|||
}
|
||||
}
|
||||
store.remove_node(node_id, node.key)?;
|
||||
self.state.root = Some(next_stored_node);
|
||||
self.updated = true;
|
||||
self.state.set_root(Some(next_stored_node));
|
||||
} else if node_update {
|
||||
store.set_node(node, true)?;
|
||||
self.updated = true;
|
||||
} else {
|
||||
store.set_node(node, false)?;
|
||||
}
|
||||
|
@ -437,9 +456,9 @@ where
|
|||
let left_idx = keys.get_child_idx(&key_to_delete);
|
||||
let left_id = children[left_idx];
|
||||
let mut left_node = store.get_node(tx, left_id).await?;
|
||||
if left_node.node.keys().len() >= self.state.minimum_degree {
|
||||
if left_node.n.keys().len() >= self.state.minimum_degree {
|
||||
// CLRS: 2a -> left_node is named `y` in the book
|
||||
if let Some((key_prim, payload_prim)) = left_node.node.keys().get_last_key() {
|
||||
if let Some((key_prim, payload_prim)) = left_node.n.keys().get_last_key() {
|
||||
keys.remove(&key_to_delete);
|
||||
keys.insert(key_prim.clone(), payload_prim);
|
||||
store.set_node(left_node, true)?;
|
||||
|
@ -450,9 +469,9 @@ where
|
|||
let right_idx = left_idx + 1;
|
||||
let right_id = children[right_idx];
|
||||
let right_node = store.get_node(tx, right_id).await?;
|
||||
if right_node.node.keys().len() >= self.state.minimum_degree {
|
||||
if right_node.n.keys().len() >= self.state.minimum_degree {
|
||||
// CLRS: 2b -> right_node is name `z` in the book
|
||||
if let Some((key_prim, payload_prim)) = right_node.node.keys().get_first_key() {
|
||||
if let Some((key_prim, payload_prim)) = right_node.n.keys().get_first_key() {
|
||||
keys.remove(&key_to_delete);
|
||||
keys.insert(key_prim.clone(), payload_prim);
|
||||
store.set_node(left_node, false)?;
|
||||
|
@ -464,7 +483,7 @@ where
|
|||
// CLRS: 2c
|
||||
// Merge children
|
||||
// The payload is set to 0. The value does not matter, as the key will be deleted after anyway.
|
||||
left_node.node.append(key_to_delete.clone(), 0, right_node.node)?;
|
||||
left_node.n.append(key_to_delete.clone(), 0, right_node.n)?;
|
||||
store.set_node(left_node, true)?;
|
||||
store.remove_node(right_id, right_node.key)?;
|
||||
keys.remove(&key_to_delete);
|
||||
|
@ -485,11 +504,11 @@ where
|
|||
let child_idx = keys.get_child_idx(&key_to_delete);
|
||||
let child_id = children[child_idx];
|
||||
let child_stored_node = store.get_node(tx, child_id).await?;
|
||||
if child_stored_node.node.keys().len() < self.state.minimum_degree {
|
||||
if child_stored_node.n.keys().len() < self.state.minimum_degree {
|
||||
// right child (successor)
|
||||
if child_idx < children.len() - 1 {
|
||||
let right_child_stored_node = store.get_node(tx, children[child_idx + 1]).await?;
|
||||
return if right_child_stored_node.node.keys().len() >= self.state.minimum_degree {
|
||||
return if right_child_stored_node.n.keys().len() >= self.state.minimum_degree {
|
||||
Self::delete_adjust_successor(
|
||||
store,
|
||||
keys,
|
||||
|
@ -520,7 +539,7 @@ where
|
|||
if child_idx > 0 {
|
||||
let child_idx = child_idx - 1;
|
||||
let left_child_stored_node = store.get_node(tx, children[child_idx]).await?;
|
||||
return if left_child_stored_node.node.keys().len() >= self.state.minimum_degree {
|
||||
return if left_child_stored_node.n.keys().len() >= self.state.minimum_degree {
|
||||
Self::delete_adjust_predecessor(
|
||||
store,
|
||||
keys,
|
||||
|
@ -562,12 +581,12 @@ where
|
|||
mut right_child_stored_node: BStoredNode<BK>,
|
||||
) -> Result<(bool, bool, Key, NodeId), Error> {
|
||||
if let Some((ascending_key, ascending_payload)) =
|
||||
right_child_stored_node.node.keys().get_first_key()
|
||||
right_child_stored_node.n.keys().get_first_key()
|
||||
{
|
||||
right_child_stored_node.node.keys_mut().remove(&ascending_key);
|
||||
right_child_stored_node.n.keys_mut().remove(&ascending_key);
|
||||
if let Some(descending_key) = keys.get_key(child_idx) {
|
||||
if let Some(descending_payload) = keys.remove(&descending_key) {
|
||||
child_stored_node.node.keys_mut().insert(descending_key, descending_payload);
|
||||
child_stored_node.n.keys_mut().insert(descending_key, descending_payload);
|
||||
keys.insert(ascending_key, ascending_payload);
|
||||
let child_id = child_stored_node.id;
|
||||
store.set_node(child_stored_node, true)?;
|
||||
|
@ -590,12 +609,12 @@ where
|
|||
mut left_child_stored_node: BStoredNode<BK>,
|
||||
) -> Result<(bool, bool, Key, NodeId), Error> {
|
||||
if let Some((ascending_key, ascending_payload)) =
|
||||
left_child_stored_node.node.keys().get_last_key()
|
||||
left_child_stored_node.n.keys().get_last_key()
|
||||
{
|
||||
left_child_stored_node.node.keys_mut().remove(&ascending_key);
|
||||
left_child_stored_node.n.keys_mut().remove(&ascending_key);
|
||||
if let Some(descending_key) = keys.get_key(child_idx) {
|
||||
if let Some(descending_payload) = keys.remove(&descending_key) {
|
||||
child_stored_node.node.keys_mut().insert(descending_key, descending_payload);
|
||||
child_stored_node.n.keys_mut().insert(descending_key, descending_payload);
|
||||
keys.insert(ascending_key, ascending_payload);
|
||||
let child_id = child_stored_node.id;
|
||||
store.set_node(child_stored_node, true)?;
|
||||
|
@ -623,7 +642,7 @@ where
|
|||
if let Some(descending_payload) = keys.remove(&descending_key) {
|
||||
children.remove(child_idx + 1);
|
||||
let left_id = left_child.id;
|
||||
left_child.node.append(descending_key, descending_payload, right_child.node)?;
|
||||
left_child.n.append(descending_key, descending_payload, right_child.n)?;
|
||||
store.set_node(left_child, true)?;
|
||||
store.remove_node(right_child.id, right_child.key)?;
|
||||
return Ok((true, is_main_key, key_to_delete, left_id));
|
||||
|
@ -645,13 +664,13 @@ where
|
|||
}
|
||||
while let Some((node_id, depth)) = node_queue.pop_front() {
|
||||
let stored = store.get_node(tx, node_id).await?;
|
||||
stats.keys_count += stored.node.keys().len() as u64;
|
||||
stats.keys_count += stored.n.keys().len() as u64;
|
||||
if depth > stats.max_depth {
|
||||
stats.max_depth = depth;
|
||||
}
|
||||
stats.nodes_count += 1;
|
||||
stats.total_size += stored.size as u64;
|
||||
if let BTreeNode::Internal(_, children) = &stored.node {
|
||||
if let BTreeNode::Internal(_, children) = &stored.n {
|
||||
let depth = depth + 1;
|
||||
for child_id in children.iter() {
|
||||
node_queue.push_front((*child_id, depth));
|
||||
|
@ -665,10 +684,6 @@ where
|
|||
pub(in crate::idx) fn get_state(&self) -> &BState {
|
||||
&self.state
|
||||
}
|
||||
|
||||
pub(in crate::idx) fn is_updated(&self) -> bool {
|
||||
self.updated
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -1032,13 +1047,13 @@ mod tests {
|
|||
0 => {
|
||||
assert_eq!(depth, 1);
|
||||
assert_eq!(node_id, 7);
|
||||
check_is_internal_node(node.node, vec![("p", 16)], vec![1, 8]);
|
||||
check_is_internal_node(node.n, vec![("p", 16)], vec![1, 8]);
|
||||
}
|
||||
1 => {
|
||||
assert_eq!(depth, 2);
|
||||
assert_eq!(node_id, 1);
|
||||
check_is_internal_node(
|
||||
node.node,
|
||||
node.n,
|
||||
vec![("c", 3), ("g", 7), ("m", 13)],
|
||||
vec![0, 9, 2, 3],
|
||||
);
|
||||
|
@ -1046,42 +1061,42 @@ mod tests {
|
|||
2 => {
|
||||
assert_eq!(depth, 2);
|
||||
assert_eq!(node_id, 8);
|
||||
check_is_internal_node(node.node, vec![("t", 20), ("x", 24)], vec![4, 6, 5]);
|
||||
check_is_internal_node(node.n, vec![("t", 20), ("x", 24)], vec![4, 6, 5]);
|
||||
}
|
||||
3 => {
|
||||
assert_eq!(depth, 3);
|
||||
assert_eq!(node_id, 0);
|
||||
check_is_leaf_node(node.node, vec![("a", 1), ("b", 2)]);
|
||||
check_is_leaf_node(node.n, vec![("a", 1), ("b", 2)]);
|
||||
}
|
||||
4 => {
|
||||
assert_eq!(depth, 3);
|
||||
assert_eq!(node_id, 9);
|
||||
check_is_leaf_node(node.node, vec![("d", 4), ("e", 5), ("f", 6)]);
|
||||
check_is_leaf_node(node.n, vec![("d", 4), ("e", 5), ("f", 6)]);
|
||||
}
|
||||
5 => {
|
||||
assert_eq!(depth, 3);
|
||||
assert_eq!(node_id, 2);
|
||||
check_is_leaf_node(node.node, vec![("j", 10), ("k", 11), ("l", 12)]);
|
||||
check_is_leaf_node(node.n, vec![("j", 10), ("k", 11), ("l", 12)]);
|
||||
}
|
||||
6 => {
|
||||
assert_eq!(depth, 3);
|
||||
assert_eq!(node_id, 3);
|
||||
check_is_leaf_node(node.node, vec![("n", 14), ("o", 15)]);
|
||||
check_is_leaf_node(node.n, vec![("n", 14), ("o", 15)]);
|
||||
}
|
||||
7 => {
|
||||
assert_eq!(depth, 3);
|
||||
assert_eq!(node_id, 4);
|
||||
check_is_leaf_node(node.node, vec![("q", 17), ("r", 18), ("s", 19)]);
|
||||
check_is_leaf_node(node.n, vec![("q", 17), ("r", 18), ("s", 19)]);
|
||||
}
|
||||
8 => {
|
||||
assert_eq!(depth, 3);
|
||||
assert_eq!(node_id, 6);
|
||||
check_is_leaf_node(node.node, vec![("u", 21), ("v", 22)]);
|
||||
check_is_leaf_node(node.n, vec![("u", 21), ("v", 22)]);
|
||||
}
|
||||
9 => {
|
||||
assert_eq!(depth, 3);
|
||||
assert_eq!(node_id, 5);
|
||||
check_is_leaf_node(node.node, vec![("y", 25), ("z", 26)]);
|
||||
check_is_leaf_node(node.n, vec![("y", 25), ("z", 26)]);
|
||||
}
|
||||
_ => panic!("This node should not exist {}", count),
|
||||
})
|
||||
|
@ -1135,13 +1150,13 @@ mod tests {
|
|||
let nodes_count = t
|
||||
.inspect_nodes(&mut tx, |count, depth, node_id, node| {
|
||||
debug!("{} -> {}", depth, node_id);
|
||||
node.node.debug(|k| Ok(String::from_utf8(k)?)).unwrap();
|
||||
node.n.debug(|k| Ok(String::from_utf8(k)?)).unwrap();
|
||||
match count {
|
||||
0 => {
|
||||
assert_eq!(depth, 1);
|
||||
assert_eq!(node_id, 1);
|
||||
check_is_internal_node(
|
||||
node.node,
|
||||
node.n,
|
||||
vec![("e", 5), ("l", 12), ("p", 16), ("t", 20), ("x", 24)],
|
||||
vec![0, 9, 3, 4, 6, 5],
|
||||
);
|
||||
|
@ -1149,32 +1164,32 @@ mod tests {
|
|||
1 => {
|
||||
assert_eq!(depth, 2);
|
||||
assert_eq!(node_id, 0);
|
||||
check_is_leaf_node(node.node, vec![("a", 1), ("c", 3)]);
|
||||
check_is_leaf_node(node.n, vec![("a", 1), ("c", 3)]);
|
||||
}
|
||||
2 => {
|
||||
assert_eq!(depth, 2);
|
||||
assert_eq!(node_id, 9);
|
||||
check_is_leaf_node(node.node, vec![("j", 10), ("k", 11)]);
|
||||
check_is_leaf_node(node.n, vec![("j", 10), ("k", 11)]);
|
||||
}
|
||||
3 => {
|
||||
assert_eq!(depth, 2);
|
||||
assert_eq!(node_id, 3);
|
||||
check_is_leaf_node(node.node, vec![("n", 14), ("o", 15)]);
|
||||
check_is_leaf_node(node.n, vec![("n", 14), ("o", 15)]);
|
||||
}
|
||||
4 => {
|
||||
assert_eq!(depth, 2);
|
||||
assert_eq!(node_id, 4);
|
||||
check_is_leaf_node(node.node, vec![("q", 17), ("r", 18), ("s", 19)]);
|
||||
check_is_leaf_node(node.n, vec![("q", 17), ("r", 18), ("s", 19)]);
|
||||
}
|
||||
5 => {
|
||||
assert_eq!(depth, 2);
|
||||
assert_eq!(node_id, 6);
|
||||
check_is_leaf_node(node.node, vec![("u", 21), ("v", 22)]);
|
||||
check_is_leaf_node(node.n, vec![("u", 21), ("v", 22)]);
|
||||
}
|
||||
6 => {
|
||||
assert_eq!(depth, 2);
|
||||
assert_eq!(node_id, 5);
|
||||
check_is_leaf_node(node.node, vec![("y", 25), ("z", 26)]);
|
||||
check_is_leaf_node(node.n, vec![("y", 25), ("z", 26)]);
|
||||
}
|
||||
_ => panic!("This node should not exist {}", count),
|
||||
}
|
||||
|
@ -1316,7 +1331,7 @@ mod tests {
|
|||
debug!("----------------------------------");
|
||||
t.inspect_nodes(tx, |_count, depth, node_id, node| {
|
||||
debug!("{} -> {}", depth, node_id);
|
||||
node.node.debug(|k| Ok(String::from_utf8(k)?)).unwrap();
|
||||
node.n.debug(|k| Ok(String::from_utf8(k)?)).unwrap();
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
@ -1359,7 +1374,7 @@ mod tests {
|
|||
let mut s = TreeNodeStore::Traversal(TreeNodeProvider::Debug);
|
||||
while let Some((node_id, depth)) = node_queue.pop_front() {
|
||||
let stored_node = s.get_node(tx, node_id).await?;
|
||||
if let BTreeNode::Internal(_, children) = &stored_node.node {
|
||||
if let BTreeNode::Internal(_, children) = &stored_node.n {
|
||||
let depth = depth + 1;
|
||||
for child_id in children {
|
||||
node_queue.push_back((*child_id, depth));
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
pub mod bkeys;
|
||||
pub mod btree;
|
||||
pub mod mtree;
|
||||
pub mod store;
|
||||
|
|
1792
lib/src/idx/trees/mtree.rs
Normal file
1792
lib/src/idx/trees/mtree.rs
Normal file
File diff suppressed because it is too large
Load diff
|
@ -9,7 +9,7 @@ use tokio::sync::Mutex;
|
|||
|
||||
pub type NodeId = u64;
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
#[derive(Clone, Copy, PartialEq)]
|
||||
pub enum TreeStoreType {
|
||||
Write,
|
||||
Read,
|
||||
|
@ -151,7 +151,7 @@ where
|
|||
#[cfg(debug_assertions)]
|
||||
self.out.insert(id);
|
||||
StoredNode {
|
||||
node,
|
||||
n: node,
|
||||
id,
|
||||
key: self.np.get_key(id),
|
||||
size: 0,
|
||||
|
@ -238,6 +238,7 @@ pub enum TreeNodeProvider {
|
|||
DocLengths(IndexKeyBase),
|
||||
Postings(IndexKeyBase),
|
||||
Terms(IndexKeyBase),
|
||||
Vector(IndexKeyBase),
|
||||
Debug,
|
||||
}
|
||||
|
||||
|
@ -248,6 +249,7 @@ impl TreeNodeProvider {
|
|||
TreeNodeProvider::DocLengths(ikb) => ikb.new_bl_key(Some(node_id)),
|
||||
TreeNodeProvider::Postings(ikb) => ikb.new_bp_key(Some(node_id)),
|
||||
TreeNodeProvider::Terms(ikb) => ikb.new_bt_key(Some(node_id)),
|
||||
TreeNodeProvider::Vector(ikb) => ikb.new_vm_key(Some(node_id)),
|
||||
TreeNodeProvider::Debug => node_id.to_be_bytes().to_vec(),
|
||||
}
|
||||
}
|
||||
|
@ -261,7 +263,7 @@ impl TreeNodeProvider {
|
|||
let size = val.len() as u32;
|
||||
let node = N::try_from_val(val)?;
|
||||
Ok(StoredNode {
|
||||
node,
|
||||
n: node,
|
||||
id,
|
||||
key,
|
||||
size,
|
||||
|
@ -275,19 +277,30 @@ impl TreeNodeProvider {
|
|||
where
|
||||
N: TreeNode,
|
||||
{
|
||||
let val = node.node.try_into_val()?;
|
||||
let val = node.n.try_into_val()?;
|
||||
tx.set(node.key, val).await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) struct StoredNode<N> {
|
||||
pub(super) node: N,
|
||||
pub(super) n: N,
|
||||
pub(super) id: NodeId,
|
||||
pub(super) key: Key,
|
||||
pub(super) size: u32,
|
||||
}
|
||||
|
||||
impl<N> StoredNode<N> {
|
||||
pub(super) fn new(n: N, id: NodeId, key: Key, size: u32) -> Self {
|
||||
Self {
|
||||
n,
|
||||
id,
|
||||
key,
|
||||
size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait TreeNode
|
||||
where
|
||||
Self: Sized,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
//! Stores Term/Doc frequency
|
||||
use crate::idx::ft::docids::DocId;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::ft::terms::TermId;
|
||||
use derive::Key;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
//! Stores the term list for doc_ids
|
||||
use crate::idx::ft::docids::DocId;
|
||||
use crate::idx::docids::DocId;
|
||||
use derive::Key;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
//! Stores the offsets
|
||||
use crate::idx::ft::docids::DocId;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::ft::terms::TermId;
|
||||
use derive::Key;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
|
@ -11,6 +11,7 @@ pub mod bp;
|
|||
pub mod bs;
|
||||
pub mod bt;
|
||||
pub mod bu;
|
||||
pub mod vm;
|
||||
|
||||
use crate::sql::array::Array;
|
||||
use crate::sql::id::Id;
|
||||
|
|
68
lib/src/key/index/vm.rs
Normal file
68
lib/src/key/index/vm.rs
Normal file
|
@ -0,0 +1,68 @@
|
|||
//! Stores MTree state and nodes
|
||||
use crate::idx::trees::store::NodeId;
|
||||
use derive::Key;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Key)]
|
||||
pub struct Vm<'a> {
|
||||
__: u8,
|
||||
_a: u8,
|
||||
pub ns: &'a str,
|
||||
_b: u8,
|
||||
pub db: &'a str,
|
||||
_c: u8,
|
||||
pub tb: &'a str,
|
||||
_d: u8,
|
||||
pub ix: &'a str,
|
||||
_e: u8,
|
||||
_f: u8,
|
||||
_g: u8,
|
||||
pub node_id: Option<NodeId>,
|
||||
}
|
||||
|
||||
impl<'a> Vm<'a> {
|
||||
pub fn new(
|
||||
ns: &'a str,
|
||||
db: &'a str,
|
||||
tb: &'a str,
|
||||
ix: &'a str,
|
||||
node_id: Option<NodeId>,
|
||||
) -> Self {
|
||||
Self {
|
||||
__: b'/',
|
||||
_a: b'*',
|
||||
ns,
|
||||
_b: b'*',
|
||||
db,
|
||||
_c: b'*',
|
||||
tb,
|
||||
_d: b'+',
|
||||
ix,
|
||||
_e: b'!',
|
||||
_f: b'v',
|
||||
_g: b'm',
|
||||
node_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn key() {
|
||||
use super::*;
|
||||
#[rustfmt::skip]
|
||||
let val = Vm::new(
|
||||
"testns",
|
||||
"testdb",
|
||||
"testtb",
|
||||
"testix",
|
||||
Some(8)
|
||||
);
|
||||
let enc = Vm::encode(&val).unwrap();
|
||||
assert_eq!(enc, b"/*testns\0*testdb\0*testtb\0+testix\0!vm\x01\0\0\0\0\0\0\0\x08");
|
||||
|
||||
let dec = Vm::decode(&enc).unwrap();
|
||||
assert_eq!(val, dec);
|
||||
}
|
||||
}
|
|
@ -316,6 +316,9 @@ impl Datastore {
|
|||
}
|
||||
|
||||
/// Setup the initial credentials
|
||||
/// Trigger the `unreachable definition` compilation error, probably due to this issue:
|
||||
/// https://github.com/rust-lang/rust/issues/111370
|
||||
#[allow(unreachable_code, unused_variables)]
|
||||
pub async fn setup_initial_creds(&self, creds: Root<'_>) -> Result<(), Error> {
|
||||
// Start a new writeable transaction
|
||||
let txn = self.transaction(true, false).await?.rollback_with_panic().enclose();
|
||||
|
|
|
@ -191,6 +191,7 @@ impl Expression {
|
|||
Operator::Outside => fnc::operate::outside(&l, &r),
|
||||
Operator::Intersects => fnc::operate::intersects(&l, &r),
|
||||
Operator::Matches(_) => fnc::operate::matches(ctx, txn, doc, self).await,
|
||||
Operator::Knn(_) => fnc::operate::knn(ctx, txn, doc, self).await,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ pub struct MTreeParams {
|
|||
pub doc_ids_order: u32,
|
||||
}
|
||||
|
||||
#[derive(Default, Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
#[derive(Clone, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||
#[revisioned(revision = 1)]
|
||||
pub enum Distance {
|
||||
#[default]
|
||||
|
@ -182,6 +182,7 @@ pub fn search(i: &str) -> IResult<&str, Index> {
|
|||
pub fn distance(i: &str) -> IResult<&str, Distance> {
|
||||
let (i, _) = mightbespace(i)?;
|
||||
let (i, _) = tag_no_case("DIST")(i)?;
|
||||
let (i, _) = shouldbespace(i)?;
|
||||
alt((
|
||||
map(tag_no_case("EUCLIDEAN"), |_| Distance::Euclidean),
|
||||
map(tag_no_case("MANHATTAN"), |_| Distance::Manhattan),
|
||||
|
@ -200,7 +201,7 @@ pub fn minkowski(i: &str) -> IResult<&str, Distance> {
|
|||
}
|
||||
|
||||
pub fn dimension(i: &str) -> IResult<&str, u16> {
|
||||
let (i, _) = shouldbespace(i)?;
|
||||
let (i, _) = mightbespace(i)?;
|
||||
let (i, _) = tag_no_case("DIMENSION")(i)?;
|
||||
let (i, _) = shouldbespace(i)?;
|
||||
let (i, dim) = uint16(i)?;
|
||||
|
|
|
@ -6,6 +6,7 @@ use nom::branch::alt;
|
|||
use nom::bytes::complete::tag;
|
||||
use nom::bytes::complete::tag_no_case;
|
||||
use nom::character::complete::char;
|
||||
use nom::character::complete::u32 as uint32;
|
||||
use nom::character::complete::u8 as uint8;
|
||||
use nom::combinator::cut;
|
||||
use nom::combinator::opt;
|
||||
|
@ -67,6 +68,8 @@ pub enum Operator {
|
|||
//
|
||||
Outside,
|
||||
Intersects,
|
||||
//
|
||||
Knn(u32), // <{k}>
|
||||
}
|
||||
|
||||
impl Default for Operator {
|
||||
|
@ -141,6 +144,7 @@ impl fmt::Display for Operator {
|
|||
f.write_str("@@")
|
||||
}
|
||||
}
|
||||
Self::Knn(k) => write!(f, "<{}>", k),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -191,12 +195,14 @@ pub fn binary_symbols(i: &str) -> IResult<&str, Operator> {
|
|||
value(Operator::AnyLike, tag("?~")),
|
||||
value(Operator::Like, char('~')),
|
||||
matches,
|
||||
knn,
|
||||
)),
|
||||
alt((
|
||||
value(Operator::LessThanOrEqual, tag("<=")),
|
||||
value(Operator::LessThan, char('<')),
|
||||
value(Operator::MoreThanOrEqual, tag(">=")),
|
||||
value(Operator::MoreThan, char('>')),
|
||||
knn,
|
||||
)),
|
||||
alt((
|
||||
value(Operator::Pow, tag("**")),
|
||||
|
@ -257,7 +263,6 @@ pub fn binary_phrases(i: &str) -> IResult<&str, Operator> {
|
|||
|
||||
pub fn matches(i: &str) -> IResult<&str, Operator> {
|
||||
let (i, _) = char('@')(i)?;
|
||||
// let (i, reference) = opt(|i| uint8(i))(i)?;
|
||||
cut(|i| {
|
||||
let (i, reference) = opt(uint8)(i)?;
|
||||
let (i, _) = char('@')(i)?;
|
||||
|
@ -265,6 +270,13 @@ pub fn matches(i: &str) -> IResult<&str, Operator> {
|
|||
})(i)
|
||||
}
|
||||
|
||||
pub fn knn(i: &str) -> IResult<&str, Operator> {
|
||||
let (i, _) = char('<')(i)?;
|
||||
let (i, k) = uint32(i)?;
|
||||
let (i, _) = char('>')(i)?;
|
||||
Ok((i, Operator::Knn(k)))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -290,4 +302,13 @@ mod tests {
|
|||
let res = matches("@256@");
|
||||
res.unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_knn() {
|
||||
let res = knn("<5>");
|
||||
assert!(res.is_ok());
|
||||
let out = res.unwrap().1;
|
||||
assert_eq!("<5>", format!("{}", out));
|
||||
assert_eq!(out, Operator::Knn(5));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@ use crate::doc::CursorDoc;
|
|||
use crate::err::Error;
|
||||
use crate::iam::{Action, ResourceKind};
|
||||
use crate::idx::ft::FtIndex;
|
||||
use crate::idx::trees::mtree::MTreeIndex;
|
||||
use crate::idx::trees::store::TreeStoreType;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::sql::comment::shouldbespace;
|
||||
|
@ -56,6 +57,11 @@ impl AnalyzeStatement {
|
|||
FtIndex::new(&mut run, az, ikb, p, TreeStoreType::Traversal).await?;
|
||||
ft.statistics(&mut run).await?.into()
|
||||
}
|
||||
Index::MTree(p) => {
|
||||
let mt =
|
||||
MTreeIndex::new(&mut run, ikb, p, TreeStoreType::Traversal).await?;
|
||||
mt.statistics(&mut run).await?.into()
|
||||
}
|
||||
_ => {
|
||||
return Err(Error::FeatureNotYetImplemented {
|
||||
feature: "Statistics on unique and non-unique indexes.".to_string(),
|
||||
|
|
|
@ -178,7 +178,7 @@ fn index_comment(i: &str) -> IResult<&str, DefineIndexOption> {
|
|||
mod tests {
|
||||
|
||||
use super::*;
|
||||
use crate::sql::index::SearchParams;
|
||||
use crate::sql::index::{Distance, MTreeParams, SearchParams};
|
||||
use crate::sql::Ident;
|
||||
use crate::sql::Idiom;
|
||||
use crate::sql::Idioms;
|
||||
|
@ -275,4 +275,29 @@ mod tests {
|
|||
"DEFINE INDEX my_index ON my_table FIELDS my_col SEARCH ANALYZER my_analyzer VS DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_create_mtree_index() {
|
||||
let sql = "INDEX my_index ON TABLE my_table COLUMNS my_col MTREE DIMENSION 4";
|
||||
let (_, idx) = index(sql).unwrap();
|
||||
assert_eq!(
|
||||
idx,
|
||||
DefineIndexStatement {
|
||||
name: Ident("my_index".to_string()),
|
||||
what: Ident("my_table".to_string()),
|
||||
cols: Idioms(vec![Idiom(vec![Part::Field(Ident("my_col".to_string()))])]),
|
||||
index: Index::MTree(MTreeParams {
|
||||
dimension: 4,
|
||||
distance: Distance::Euclidean,
|
||||
capacity: 40,
|
||||
doc_ids_order: 100,
|
||||
}),
|
||||
comment: None,
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
idx.to_string(),
|
||||
"DEFINE INDEX my_index ON my_table FIELDS my_col MTREE DIMENSION 4 DIST EUCLIDEAN CAPACITY 40 DOC_IDS_ORDER 100"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ use crate::sql::index::Distance;
|
|||
use crate::sql::value::serde::ser;
|
||||
use serde::ser::Error as _;
|
||||
use serde::ser::Impossible;
|
||||
use serde::Serialize;
|
||||
|
||||
pub(super) struct Serializer;
|
||||
|
||||
|
@ -29,9 +30,34 @@ impl ser::Serializer for Serializer {
|
|||
) -> Result<Self::Ok, Error> {
|
||||
match variant {
|
||||
"Euclidean" => Ok(Distance::Euclidean),
|
||||
"Manhattan" => Ok(Distance::Manhattan),
|
||||
"Cosine" => Ok(Distance::Cosine),
|
||||
"Hamming" => Ok(Distance::Hamming),
|
||||
"Mahalanobis" => Ok(Distance::Mahalanobis),
|
||||
variant => Err(Error::custom(format!("unexpected unit variant `{name}::{variant}`"))),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn serialize_newtype_variant<T>(
|
||||
self,
|
||||
name: &'static str,
|
||||
_variant_index: u32,
|
||||
variant: &'static str,
|
||||
value: &T,
|
||||
) -> Result<Self::Ok, Error>
|
||||
where
|
||||
T: ?Sized + Serialize,
|
||||
{
|
||||
match variant {
|
||||
"Minkowski" => {
|
||||
Ok(Distance::Minkowski(value.serialize(ser::number::Serializer.wrap())?))
|
||||
}
|
||||
variant => {
|
||||
Err(Error::custom(format!("unexpected newtype variant `{name}::{variant}`")))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -41,9 +67,44 @@ mod tests {
|
|||
use serde::Serialize;
|
||||
|
||||
#[test]
|
||||
fn euclidean() {
|
||||
fn distance_euclidean() {
|
||||
let dist = Distance::Euclidean;
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_manhattan() {
|
||||
let dist = Distance::Manhattan;
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_mahalanobis() {
|
||||
let dist = Distance::Mahalanobis;
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_hamming() {
|
||||
let dist = Distance::Hamming;
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_cosine() {
|
||||
let dist = Distance::Cosine;
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_minkowski() {
|
||||
let dist = Distance::Minkowski(7.into());
|
||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(dist, serialized);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -366,7 +366,7 @@ async fn changefeed_with_ts() -> Result<(), Error> {
|
|||
let Value::Object(a) = a else {
|
||||
unreachable!()
|
||||
};
|
||||
let Value::Number(versionstamp1) = a.get("versionstamp").unwrap() else {
|
||||
let Value::Number(versionstamp2) = a.get("versionstamp").unwrap() else {
|
||||
unreachable!()
|
||||
};
|
||||
let changes = a.get("changes").unwrap().to_owned();
|
||||
|
@ -389,10 +389,10 @@ async fn changefeed_with_ts() -> Result<(), Error> {
|
|||
let Value::Object(a) = a else {
|
||||
unreachable!()
|
||||
};
|
||||
let Value::Number(versionstamp2) = a.get("versionstamp").unwrap() else {
|
||||
let Value::Number(versionstamp3) = a.get("versionstamp").unwrap() else {
|
||||
unreachable!()
|
||||
};
|
||||
assert!(versionstamp1 < versionstamp2);
|
||||
assert!(versionstamp2 < versionstamp3);
|
||||
let changes = a.get("changes").unwrap().to_owned();
|
||||
assert_eq!(
|
||||
changes,
|
||||
|
@ -413,10 +413,10 @@ async fn changefeed_with_ts() -> Result<(), Error> {
|
|||
let Value::Object(a) = a else {
|
||||
unreachable!()
|
||||
};
|
||||
let Value::Number(versionstamp3) = a.get("versionstamp").unwrap() else {
|
||||
let Value::Number(versionstamp4) = a.get("versionstamp").unwrap() else {
|
||||
unreachable!()
|
||||
};
|
||||
assert!(versionstamp2 < versionstamp3);
|
||||
assert!(versionstamp3 < versionstamp4);
|
||||
let changes = a.get("changes").unwrap().to_owned();
|
||||
assert_eq!(
|
||||
changes,
|
||||
|
@ -437,10 +437,10 @@ async fn changefeed_with_ts() -> Result<(), Error> {
|
|||
let Value::Object(a) = a else {
|
||||
unreachable!()
|
||||
};
|
||||
let Value::Number(versionstamp4) = a.get("versionstamp").unwrap() else {
|
||||
let Value::Number(versionstamp5) = a.get("versionstamp").unwrap() else {
|
||||
unreachable!()
|
||||
};
|
||||
assert!(versionstamp3 < versionstamp4);
|
||||
assert!(versionstamp4 < versionstamp5);
|
||||
let changes = a.get("changes").unwrap().to_owned();
|
||||
assert_eq!(
|
||||
changes,
|
||||
|
@ -487,7 +487,7 @@ async fn changefeed_with_ts() -> Result<(), Error> {
|
|||
let Value::Number(versionstamp1b) = a.get("versionstamp").unwrap() else {
|
||||
unreachable!()
|
||||
};
|
||||
assert!(versionstamp1 == versionstamp1b);
|
||||
assert!(versionstamp2 == versionstamp1b);
|
||||
let changes = a.get("changes").unwrap().to_owned();
|
||||
assert_eq!(
|
||||
changes,
|
||||
|
|
|
@ -1211,7 +1211,9 @@ async fn define_statement_search_index() -> Result<(), Error> {
|
|||
events: {},
|
||||
fields: {},
|
||||
tables: {},
|
||||
indexes: { blog_title: 'DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75) DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100 HIGHLIGHTS' },
|
||||
indexes: { blog_title: 'DEFINE INDEX blog_title ON blog FIELDS title \
|
||||
SEARCH ANALYZER simple BM25(1.2,0.75) \
|
||||
DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100 HIGHLIGHTS' },
|
||||
lives: {},
|
||||
}",
|
||||
);
|
||||
|
|
60
lib/tests/vector.rs
Normal file
60
lib/tests/vector.rs
Normal file
|
@ -0,0 +1,60 @@
|
|||
mod helpers;
|
||||
mod parse;
|
||||
use crate::helpers::new_ds;
|
||||
use parse::Parse;
|
||||
use surrealdb::dbs::Session;
|
||||
use surrealdb::err::Error;
|
||||
use surrealdb::sql::Value;
|
||||
|
||||
#[tokio::test]
|
||||
async fn select_where_mtree_knn() -> Result<(), Error> {
|
||||
let sql = r"
|
||||
CREATE pts:1 SET point = [1,2,3,4];
|
||||
CREATE pts:2 SET point = [4,5,6,7];
|
||||
CREATE pts:3 SET point = [8,9,10,11];
|
||||
DEFINE INDEX mt_pts ON pts FIELDS point MTREE DIMENSION 4;
|
||||
LET $pt = [2,3,4,5];
|
||||
SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point <2> $pt;
|
||||
SELECT id FROM pts WHERE point <2> $pt EXPLAIN;
|
||||
";
|
||||
let dbs = new_ds().await?;
|
||||
let ses = Session::owner().with_ns("test").with_db("test");
|
||||
let res = &mut dbs.execute(sql, &ses, None).await?;
|
||||
assert_eq!(res.len(), 7);
|
||||
//
|
||||
for _ in 0..5 {
|
||||
let _ = res.remove(0).result?;
|
||||
}
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
"[
|
||||
{
|
||||
id: pts:1,
|
||||
dist: 2f
|
||||
},
|
||||
{
|
||||
id: pts:2,
|
||||
dist: 4f
|
||||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
"[
|
||||
{
|
||||
detail: {
|
||||
plan: {
|
||||
index: 'mt_pts',
|
||||
operator: '<2>',
|
||||
value: [2,3,4,5]
|
||||
},
|
||||
table: 'pts',
|
||||
},
|
||||
operation: 'Iterate Index'
|
||||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
Ok(())
|
||||
}
|
Loading…
Reference in a new issue