Feature: Vector Search: mtree index + knn operator (#2546)
Co-authored-by: Tobie Morgan Hitchcock <tobie@surrealdb.com>
This commit is contained in:
parent
1a85f4967a
commit
0772a8c592
41 changed files with 2541 additions and 235 deletions
|
@ -8,7 +8,7 @@ use crate::dbs::Statement;
|
||||||
use crate::dbs::{Options, Transaction};
|
use crate::dbs::{Options, Transaction};
|
||||||
use crate::doc::Document;
|
use crate::doc::Document;
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::idx::ft::docids::DocId;
|
use crate::idx::docids::DocId;
|
||||||
use crate::idx::planner::executor::IteratorRef;
|
use crate::idx::planner::executor::IteratorRef;
|
||||||
use crate::sql::array::Array;
|
use crate::sql::array::Array;
|
||||||
use crate::sql::edges::Edges;
|
use crate::sql::edges::Edges;
|
||||||
|
|
|
@ -594,7 +594,7 @@ impl<'a> Processor<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(Error::QueryNotExecutedDetail {
|
Err(Error::QueryNotExecutedDetail {
|
||||||
message: "No QueryExecutor has not been found.".to_string(),
|
message: "No QueryExecutor has been found.".to_string(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@ use crate::dbs::Workable;
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::iam::Action;
|
use crate::iam::Action;
|
||||||
use crate::iam::ResourceKind;
|
use crate::iam::ResourceKind;
|
||||||
use crate::idx::ft::docids::DocId;
|
use crate::idx::docids::DocId;
|
||||||
use crate::idx::planner::executor::IteratorRef;
|
use crate::idx::planner::executor::IteratorRef;
|
||||||
use crate::sql::statements::define::DefineEventStatement;
|
use crate::sql::statements::define::DefineEventStatement;
|
||||||
use crate::sql::statements::define::DefineFieldStatement;
|
use crate::sql::statements::define::DefineFieldStatement;
|
||||||
|
|
|
@ -4,10 +4,11 @@ use crate::dbs::{Options, Transaction};
|
||||||
use crate::doc::{CursorDoc, Document};
|
use crate::doc::{CursorDoc, Document};
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::idx::ft::FtIndex;
|
use crate::idx::ft::FtIndex;
|
||||||
|
use crate::idx::trees::mtree::MTreeIndex;
|
||||||
use crate::idx::trees::store::TreeStoreType;
|
use crate::idx::trees::store::TreeStoreType;
|
||||||
use crate::idx::IndexKeyBase;
|
use crate::idx::IndexKeyBase;
|
||||||
use crate::sql::array::Array;
|
use crate::sql::array::Array;
|
||||||
use crate::sql::index::{Index, SearchParams};
|
use crate::sql::index::{Index, MTreeParams, SearchParams};
|
||||||
use crate::sql::statements::DefineIndexStatement;
|
use crate::sql::statements::DefineIndexStatement;
|
||||||
use crate::sql::{Part, Thing, Value};
|
use crate::sql::{Part, Thing, Value};
|
||||||
use crate::{key, kvs};
|
use crate::{key, kvs};
|
||||||
|
@ -55,11 +56,7 @@ impl<'a> Document<'a> {
|
||||||
Index::Uniq => ic.index_unique(&mut run).await?,
|
Index::Uniq => ic.index_unique(&mut run).await?,
|
||||||
Index::Idx => ic.index_non_unique(&mut run).await?,
|
Index::Idx => ic.index_non_unique(&mut run).await?,
|
||||||
Index::Search(p) => ic.index_full_text(&mut run, p).await?,
|
Index::Search(p) => ic.index_full_text(&mut run, p).await?,
|
||||||
Index::MTree(_) => {
|
Index::MTree(p) => ic.index_mtree(&mut run, p).await?,
|
||||||
return Err(Error::FeatureNotYetImplemented {
|
|
||||||
feature: "MTree indexing".to_string(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -332,18 +329,36 @@ impl<'a> IndexOperation<'a> {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn index_full_text(
|
async fn index_full_text(
|
||||||
&self,
|
&mut self,
|
||||||
run: &mut kvs::Transaction,
|
run: &mut kvs::Transaction,
|
||||||
p: &SearchParams,
|
p: &SearchParams,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
let ikb = IndexKeyBase::new(self.opt, self.ix);
|
let ikb = IndexKeyBase::new(self.opt, self.ix);
|
||||||
let az = run.get_db_analyzer(self.opt.ns(), self.opt.db(), p.az.as_str()).await?;
|
let az = run.get_db_analyzer(self.opt.ns(), self.opt.db(), p.az.as_str()).await?;
|
||||||
let mut ft = FtIndex::new(run, az, ikb, p, TreeStoreType::Write).await?;
|
let mut ft = FtIndex::new(run, az, ikb, p, TreeStoreType::Write).await?;
|
||||||
if let Some(n) = &self.n {
|
if let Some(n) = self.n.take() {
|
||||||
ft.index_document(run, self.rid, n).await?;
|
ft.index_document(run, self.rid, n).await?;
|
||||||
} else {
|
} else {
|
||||||
ft.remove_document(run, self.rid).await?;
|
ft.remove_document(run, self.rid).await?;
|
||||||
}
|
}
|
||||||
ft.finish(run).await
|
ft.finish(run).await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn index_mtree(
|
||||||
|
&mut self,
|
||||||
|
run: &mut kvs::Transaction,
|
||||||
|
p: &MTreeParams,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let ikb = IndexKeyBase::new(self.opt, self.ix);
|
||||||
|
let mut mt = MTreeIndex::new(run, ikb, p, TreeStoreType::Write).await?;
|
||||||
|
// Delete the old index data
|
||||||
|
if let Some(o) = self.o.take() {
|
||||||
|
mt.remove_document(run, self.rid, o).await?;
|
||||||
|
}
|
||||||
|
// Create the new index data
|
||||||
|
if let Some(n) = self.n.take() {
|
||||||
|
mt.index_document(run, self.rid, n).await?;
|
||||||
|
}
|
||||||
|
mt.finish(run).await
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -208,6 +208,26 @@ pub enum Error {
|
||||||
#[error("The URL `{0}` is invalid")]
|
#[error("The URL `{0}` is invalid")]
|
||||||
InvalidUrl(String),
|
InvalidUrl(String),
|
||||||
|
|
||||||
|
/// The size of the vector is incorrect
|
||||||
|
#[error("Incorrect vector dimension ({current}). Expected a vector of {expected} dimension.")]
|
||||||
|
InvalidVectorDimension {
|
||||||
|
current: usize,
|
||||||
|
expected: usize,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// The size of the vector is incorrect
|
||||||
|
#[error("The vector element ({current}) is not a number.")]
|
||||||
|
InvalidVectorType {
|
||||||
|
current: String,
|
||||||
|
expected: &'static str,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// The size of the vector is incorrect
|
||||||
|
#[error("The value '{current}' is not a vector.")]
|
||||||
|
InvalidVectorValue {
|
||||||
|
current: String,
|
||||||
|
},
|
||||||
|
|
||||||
/// The query timedout
|
/// The query timedout
|
||||||
#[error("The query was not executed because it exceeded the timeout")]
|
#[error("The query was not executed because it exceeded the timeout")]
|
||||||
QueryTimedout,
|
QueryTimedout,
|
||||||
|
|
|
@ -2,6 +2,7 @@ use crate::ctx::Context;
|
||||||
use crate::dbs::Transaction;
|
use crate::dbs::Transaction;
|
||||||
use crate::doc::CursorDoc;
|
use crate::doc::CursorDoc;
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
|
use crate::idx::planner::executor::QueryExecutor;
|
||||||
use crate::sql::value::TryAdd;
|
use crate::sql::value::TryAdd;
|
||||||
use crate::sql::value::TryDiv;
|
use crate::sql::value::TryDiv;
|
||||||
use crate::sql::value::TryMul;
|
use crate::sql::value::TryMul;
|
||||||
|
@ -9,7 +10,7 @@ use crate::sql::value::TryNeg;
|
||||||
use crate::sql::value::TryPow;
|
use crate::sql::value::TryPow;
|
||||||
use crate::sql::value::TrySub;
|
use crate::sql::value::TrySub;
|
||||||
use crate::sql::value::Value;
|
use crate::sql::value::Value;
|
||||||
use crate::sql::Expression;
|
use crate::sql::{Expression, Thing};
|
||||||
|
|
||||||
pub fn neg(a: Value) -> Result<Value, Error> {
|
pub fn neg(a: Value) -> Result<Value, Error> {
|
||||||
a.try_neg()
|
a.try_neg()
|
||||||
|
@ -167,31 +168,58 @@ pub fn intersects(a: &Value, b: &Value) -> Result<Value, Error> {
|
||||||
Ok(a.intersects(b).into())
|
Ok(a.intersects(b).into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum IndexOption<'a> {
|
||||||
|
PreMatch,
|
||||||
|
None,
|
||||||
|
Execute(&'a QueryExecutor, &'a Thing),
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_index_option<'a>(
|
||||||
|
ctx: &'a Context<'_>,
|
||||||
|
doc: Option<&'a CursorDoc<'_>>,
|
||||||
|
exp: &'a Expression,
|
||||||
|
) -> IndexOption<'a> {
|
||||||
|
if let Some(doc) = doc {
|
||||||
|
if let Some(thg) = doc.rid {
|
||||||
|
if let Some(pla) = ctx.get_query_planner() {
|
||||||
|
if let Some(exe) = pla.get_query_executor(&thg.tb) {
|
||||||
|
if let Some(ir) = doc.ir {
|
||||||
|
if exe.is_iterator_expression(ir, exp) {
|
||||||
|
return IndexOption::PreMatch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return IndexOption::Execute(exe, thg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IndexOption::None
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) async fn matches(
|
pub(crate) async fn matches(
|
||||||
ctx: &Context<'_>,
|
ctx: &Context<'_>,
|
||||||
txn: &Transaction,
|
txn: &Transaction,
|
||||||
doc: Option<&CursorDoc<'_>>,
|
doc: Option<&CursorDoc<'_>>,
|
||||||
exp: &Expression,
|
exp: &Expression,
|
||||||
) -> Result<Value, Error> {
|
) -> Result<Value, Error> {
|
||||||
if let Some(doc) = doc {
|
match get_index_option(ctx, doc, exp) {
|
||||||
if let Some(thg) = doc.rid {
|
IndexOption::PreMatch => Ok(Value::Bool(true)),
|
||||||
if let Some(pla) = ctx.get_query_planner() {
|
IndexOption::None => Ok(Value::Bool(false)),
|
||||||
if let Some(exe) = pla.get_query_executor(&thg.tb) {
|
IndexOption::Execute(exe, thg) => exe.matches(txn, thg, exp).await,
|
||||||
// If we find the expression in `pre_match`,
|
|
||||||
// it means that we are using an Iterator::Index
|
|
||||||
// and we are iterating over documents that already matches the expression.
|
|
||||||
if let Some(ir) = doc.ir {
|
|
||||||
if exe.is_iterator_expression(ir, exp) {
|
|
||||||
return Ok(Value::Bool(true));
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn knn(
|
||||||
|
ctx: &Context<'_>,
|
||||||
|
txn: &Transaction,
|
||||||
|
doc: Option<&CursorDoc<'_>>,
|
||||||
|
exp: &Expression,
|
||||||
|
) -> Result<Value, Error> {
|
||||||
|
match get_index_option(ctx, doc, exp) {
|
||||||
|
IndexOption::PreMatch => Ok(Value::Bool(true)),
|
||||||
|
IndexOption::None => Ok(Value::Bool(false)),
|
||||||
|
IndexOption::Execute(exe, thg) => exe.knn(txn, thg, exp).await,
|
||||||
}
|
}
|
||||||
// Evaluate the matches
|
|
||||||
return exe.matches(txn, thg, exp).await;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(Value::Bool(false))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
|
@ -30,6 +30,7 @@ impl_module_def!(
|
||||||
"insert" => run,
|
"insert" => run,
|
||||||
"intersect" => run,
|
"intersect" => run,
|
||||||
"join" => run,
|
"join" => run,
|
||||||
|
"knn" => run,
|
||||||
"last" => run,
|
"last" => run,
|
||||||
"len" => run,
|
"len" => run,
|
||||||
"logical_and" => run,
|
"logical_and" => run,
|
||||||
|
|
|
@ -132,11 +132,11 @@ impl ManhattanDistance for Vec<Number> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait MinkowskiDistance {
|
pub trait MinkowskiDistance {
|
||||||
fn minkowski_distance(&self, other: &Self, order: Number) -> Result<Number, Error>;
|
fn minkowski_distance(&self, other: &Self, order: &Number) -> Result<Number, Error>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MinkowskiDistance for Vec<Number> {
|
impl MinkowskiDistance for Vec<Number> {
|
||||||
fn minkowski_distance(&self, other: &Self, order: Number) -> Result<Number, Error> {
|
fn minkowski_distance(&self, other: &Self, order: &Number) -> Result<Number, Error> {
|
||||||
check_same_dimension("vector::distance::minkowski", self, other)?;
|
check_same_dimension("vector::distance::minkowski", self, other)?;
|
||||||
let p = order.to_float();
|
let p = order.to_float();
|
||||||
let dist: f64 = self
|
let dist: f64 = self
|
||||||
|
|
|
@ -75,7 +75,7 @@ pub mod distance {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn minkowski((a, b, o): (Vec<Number>, Vec<Number>, Number)) -> Result<Value, Error> {
|
pub fn minkowski((a, b, o): (Vec<Number>, Vec<Number>, Number)) -> Result<Value, Error> {
|
||||||
Ok(a.minkowski_distance(&b, o)?.into())
|
Ok(a.minkowski_distance(&b, &o)?.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,7 @@ pub(crate) struct DocIds {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DocIds {
|
impl DocIds {
|
||||||
pub(super) async fn new(
|
pub(in crate::idx) async fn new(
|
||||||
tx: &mut Transaction,
|
tx: &mut Transaction,
|
||||||
index_key_base: IndexKeyBase,
|
index_key_base: IndexKeyBase,
|
||||||
default_btree_order: u32,
|
default_btree_order: u32,
|
||||||
|
@ -78,7 +78,7 @@ impl DocIds {
|
||||||
|
|
||||||
/// Returns the doc_id for the given doc_key.
|
/// Returns the doc_id for the given doc_key.
|
||||||
/// If the doc_id does not exists, a new one is created, and associated to the given key.
|
/// If the doc_id does not exists, a new one is created, and associated to the given key.
|
||||||
pub(super) async fn resolve_doc_id(
|
pub(in crate::idx) async fn resolve_doc_id(
|
||||||
&mut self,
|
&mut self,
|
||||||
tx: &mut Transaction,
|
tx: &mut Transaction,
|
||||||
doc_key: Key,
|
doc_key: Key,
|
||||||
|
@ -97,7 +97,7 @@ impl DocIds {
|
||||||
Ok(Resolved::New(doc_id))
|
Ok(Resolved::New(doc_id))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn remove_doc(
|
pub(in crate::idx) async fn remove_doc(
|
||||||
&mut self,
|
&mut self,
|
||||||
tx: &mut Transaction,
|
tx: &mut Transaction,
|
||||||
doc_key: Key,
|
doc_key: Key,
|
||||||
|
@ -119,7 +119,7 @@ impl DocIds {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn get_doc_key(
|
pub(in crate::idx) async fn get_doc_key(
|
||||||
&self,
|
&self,
|
||||||
tx: &mut Transaction,
|
tx: &mut Transaction,
|
||||||
doc_id: DocId,
|
doc_id: DocId,
|
||||||
|
@ -132,12 +132,15 @@ impl DocIds {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn statistics(&self, tx: &mut Transaction) -> Result<BStatistics, Error> {
|
pub(in crate::idx) async fn statistics(
|
||||||
|
&self,
|
||||||
|
tx: &mut Transaction,
|
||||||
|
) -> Result<BStatistics, Error> {
|
||||||
let mut store = self.store.lock().await;
|
let mut store = self.store.lock().await;
|
||||||
self.btree.statistics(tx, &mut store).await
|
self.btree.statistics(tx, &mut store).await
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
|
pub(in crate::idx) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
|
||||||
let updated = self.store.lock().await.finish(tx).await?;
|
let updated = self.store.lock().await.finish(tx).await?;
|
||||||
if self.updated || updated {
|
if self.updated || updated {
|
||||||
let state = State {
|
let state = State {
|
||||||
|
@ -172,20 +175,20 @@ impl State {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
pub(super) enum Resolved {
|
pub(in crate::idx) enum Resolved {
|
||||||
New(DocId),
|
New(DocId),
|
||||||
Existing(DocId),
|
Existing(DocId),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Resolved {
|
impl Resolved {
|
||||||
pub(super) fn doc_id(&self) -> &DocId {
|
pub(in crate::idx) fn doc_id(&self) -> &DocId {
|
||||||
match self {
|
match self {
|
||||||
Resolved::New(doc_id) => doc_id,
|
Resolved::New(doc_id) => doc_id,
|
||||||
Resolved::Existing(doc_id) => doc_id,
|
Resolved::Existing(doc_id) => doc_id,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn was_existing(&self) -> bool {
|
pub(in crate::idx) fn was_existing(&self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
Resolved::New(_) => false,
|
Resolved::New(_) => false,
|
||||||
Resolved::Existing(_) => true,
|
Resolved::Existing(_) => true,
|
||||||
|
@ -195,7 +198,7 @@ impl Resolved {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use crate::idx::ft::docids::{DocIds, Resolved};
|
use crate::idx::docids::{DocIds, Resolved};
|
||||||
use crate::idx::trees::store::TreeStoreType;
|
use crate::idx::trees::store::TreeStoreType;
|
||||||
use crate::idx::IndexKeyBase;
|
use crate::idx::IndexKeyBase;
|
||||||
use crate::kvs::{Datastore, Transaction};
|
use crate::kvs::{Datastore, Transaction};
|
|
@ -64,7 +64,7 @@ impl Analyzer {
|
||||||
&self,
|
&self,
|
||||||
terms: &mut Terms,
|
terms: &mut Terms,
|
||||||
tx: &mut Transaction,
|
tx: &mut Transaction,
|
||||||
field_content: &[Value],
|
field_content: Vec<Value>,
|
||||||
) -> Result<(DocLength, Vec<(TermId, TermFrequency)>), Error> {
|
) -> Result<(DocLength, Vec<(TermId, TermFrequency)>), Error> {
|
||||||
let mut dl = 0;
|
let mut dl = 0;
|
||||||
// Let's first collect all the inputs, and collect the tokens.
|
// Let's first collect all the inputs, and collect the tokens.
|
||||||
|
@ -101,7 +101,7 @@ impl Analyzer {
|
||||||
&self,
|
&self,
|
||||||
terms: &mut Terms,
|
terms: &mut Terms,
|
||||||
tx: &mut Transaction,
|
tx: &mut Transaction,
|
||||||
content: &[Value],
|
content: Vec<Value>,
|
||||||
) -> Result<(DocLength, Vec<(TermId, TermFrequency)>, Vec<(TermId, OffsetRecords)>), Error> {
|
) -> Result<(DocLength, Vec<(TermId, TermFrequency)>, Vec<(TermId, OffsetRecords)>), Error> {
|
||||||
let mut dl = 0;
|
let mut dl = 0;
|
||||||
// Let's first collect all the inputs, and collect the tokens.
|
// Let's first collect all the inputs, and collect the tokens.
|
||||||
|
@ -135,25 +135,25 @@ impl Analyzer {
|
||||||
Ok((dl, tfid, osid))
|
Ok((dl, tfid, osid))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn analyze_content(&self, content: &[Value], tks: &mut Vec<Tokens>) -> Result<(), Error> {
|
fn analyze_content(&self, content: Vec<Value>, tks: &mut Vec<Tokens>) -> Result<(), Error> {
|
||||||
for v in content {
|
for v in content {
|
||||||
self.analyze_value(v, tks)?;
|
self.analyze_value(v, tks)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn analyze_value(&self, val: &Value, tks: &mut Vec<Tokens>) -> Result<(), Error> {
|
fn analyze_value(&self, val: Value, tks: &mut Vec<Tokens>) -> Result<(), Error> {
|
||||||
match val {
|
match val {
|
||||||
Value::Strand(s) => tks.push(self.analyze(s.0.clone())?),
|
Value::Strand(s) => tks.push(self.analyze(s.0)?),
|
||||||
Value::Number(n) => tks.push(self.analyze(n.to_string())?),
|
Value::Number(n) => tks.push(self.analyze(n.to_string())?),
|
||||||
Value::Bool(b) => tks.push(self.analyze(b.to_string())?),
|
Value::Bool(b) => tks.push(self.analyze(b.to_string())?),
|
||||||
Value::Array(a) => {
|
Value::Array(a) => {
|
||||||
for v in &a.0 {
|
for v in a.0 {
|
||||||
self.analyze_value(v, tks)?;
|
self.analyze_value(v, tks)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Value::Object(o) => {
|
Value::Object(o) => {
|
||||||
for v in o.0.values() {
|
for (_, v) in o.0 {
|
||||||
self.analyze_value(v, tks)?;
|
self.analyze_value(v, tks)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::idx::ft::docids::DocId;
|
use crate::idx::docids::DocId;
|
||||||
use crate::idx::trees::bkeys::TrieKeys;
|
use crate::idx::trees::bkeys::TrieKeys;
|
||||||
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeNodeStore, Payload};
|
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeNodeStore, Payload};
|
||||||
use crate::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType};
|
use crate::idx::trees::store::{TreeNodeProvider, TreeNodeStore, TreeStoreType};
|
||||||
|
@ -72,9 +72,8 @@ impl DocLengths {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn finish(&self, tx: &mut Transaction) -> Result<(), Error> {
|
pub(super) async fn finish(&self, tx: &mut Transaction) -> Result<(), Error> {
|
||||||
if self.store.lock().await.finish(tx).await? {
|
self.store.lock().await.finish(tx).await?;
|
||||||
tx.set(self.state_key.clone(), self.btree.get_state().try_to_val()?).await?;
|
self.btree.get_state().finish(tx, &self.state_key).await?;
|
||||||
}
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
pub(crate) mod analyzer;
|
pub(crate) mod analyzer;
|
||||||
pub(crate) mod docids;
|
|
||||||
mod doclength;
|
mod doclength;
|
||||||
mod highlighter;
|
mod highlighter;
|
||||||
mod offsets;
|
mod offsets;
|
||||||
|
@ -9,8 +8,8 @@ pub(super) mod termdocs;
|
||||||
pub(crate) mod terms;
|
pub(crate) mod terms;
|
||||||
|
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
|
use crate::idx::docids::{DocId, DocIds};
|
||||||
use crate::idx::ft::analyzer::Analyzer;
|
use crate::idx::ft::analyzer::Analyzer;
|
||||||
use crate::idx::ft::docids::{DocId, DocIds};
|
|
||||||
use crate::idx::ft::doclength::DocLengths;
|
use crate::idx::ft::doclength::DocLengths;
|
||||||
use crate::idx::ft::highlighter::{Highlighter, Offseter};
|
use crate::idx::ft::highlighter::{Highlighter, Offseter};
|
||||||
use crate::idx::ft::offsets::Offsets;
|
use crate::idx::ft::offsets::Offsets;
|
||||||
|
@ -198,7 +197,7 @@ impl FtIndex {
|
||||||
&mut self,
|
&mut self,
|
||||||
tx: &mut Transaction,
|
tx: &mut Transaction,
|
||||||
rid: &Thing,
|
rid: &Thing,
|
||||||
content: &[Value],
|
content: Vec<Value>,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
// Resolve the doc_id
|
// Resolve the doc_id
|
||||||
let resolved = self.doc_ids.write().await.resolve_doc_id(tx, rid.into()).await?;
|
let resolved = self.doc_ids.write().await.resolve_doc_id(tx, rid.into()).await?;
|
||||||
|
@ -481,7 +480,7 @@ mod tests {
|
||||||
}
|
}
|
||||||
assert_eq!(map.len(), e.len());
|
assert_eq!(map.len(), e.len());
|
||||||
for (k, p) in e {
|
for (k, p) in e {
|
||||||
assert_eq!(map.get(k), Some(&p));
|
assert_eq!(map.get(k), Some(&p), "{}", k);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
panic!("hits is none");
|
panic!("hits is none");
|
||||||
|
@ -549,9 +548,7 @@ mod tests {
|
||||||
// Add one document
|
// Add one document
|
||||||
let (mut tx, mut fti) =
|
let (mut tx, mut fti) =
|
||||||
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await;
|
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await;
|
||||||
fti.index_document(&mut tx, &doc1, &vec![Value::from("hello the world")])
|
fti.index_document(&mut tx, &doc1, vec![Value::from("hello the world")]).await.unwrap();
|
||||||
.await
|
|
||||||
.unwrap();
|
|
||||||
finish(tx, fti).await;
|
finish(tx, fti).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -559,8 +556,8 @@ mod tests {
|
||||||
// Add two documents
|
// Add two documents
|
||||||
let (mut tx, mut fti) =
|
let (mut tx, mut fti) =
|
||||||
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await;
|
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await;
|
||||||
fti.index_document(&mut tx, &doc2, &vec![Value::from("a yellow hello")]).await.unwrap();
|
fti.index_document(&mut tx, &doc2, vec![Value::from("a yellow hello")]).await.unwrap();
|
||||||
fti.index_document(&mut tx, &doc3, &vec![Value::from("foo bar")]).await.unwrap();
|
fti.index_document(&mut tx, &doc3, vec![Value::from("foo bar")]).await.unwrap();
|
||||||
finish(tx, fti).await;
|
finish(tx, fti).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -575,7 +572,13 @@ mod tests {
|
||||||
|
|
||||||
// Search & score
|
// Search & score
|
||||||
let (hits, scr) = search(&mut tx, &fti, "hello").await;
|
let (hits, scr) = search(&mut tx, &fti, "hello").await;
|
||||||
check_hits(&mut tx, hits, scr, vec![(&doc1, Some(0.0)), (&doc2, Some(0.0))]).await;
|
check_hits(
|
||||||
|
&mut tx,
|
||||||
|
hits,
|
||||||
|
scr,
|
||||||
|
vec![(&doc1, Some(-0.4859746)), (&doc2, Some(-0.4859746))],
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
let (hits, scr) = search(&mut tx, &fti, "world").await;
|
let (hits, scr) = search(&mut tx, &fti, "world").await;
|
||||||
check_hits(&mut tx, hits, scr, vec![(&doc1, Some(0.4859746))]).await;
|
check_hits(&mut tx, hits, scr, vec![(&doc1, Some(0.4859746))]).await;
|
||||||
|
@ -597,7 +600,7 @@ mod tests {
|
||||||
// Reindex one document
|
// Reindex one document
|
||||||
let (mut tx, mut fti) =
|
let (mut tx, mut fti) =
|
||||||
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await;
|
tx_fti(&ds, TreeStoreType::Write, &az, btree_order, false).await;
|
||||||
fti.index_document(&mut tx, &doc3, &vec![Value::from("nobar foo")]).await.unwrap();
|
fti.index_document(&mut tx, &doc3, vec![Value::from("nobar foo")]).await.unwrap();
|
||||||
finish(tx, fti).await;
|
finish(tx, fti).await;
|
||||||
|
|
||||||
let (mut tx, fti) = tx_fti(&ds, TreeStoreType::Read, &az, btree_order, false).await;
|
let (mut tx, fti) = tx_fti(&ds, TreeStoreType::Read, &az, btree_order, false).await;
|
||||||
|
@ -655,28 +658,28 @@ mod tests {
|
||||||
fti.index_document(
|
fti.index_document(
|
||||||
&mut tx,
|
&mut tx,
|
||||||
&doc1,
|
&doc1,
|
||||||
&vec![Value::from("the quick brown fox jumped over the lazy dog")],
|
vec![Value::from("the quick brown fox jumped over the lazy dog")],
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
fti.index_document(
|
fti.index_document(
|
||||||
&mut tx,
|
&mut tx,
|
||||||
&doc2,
|
&doc2,
|
||||||
&vec![Value::from("the fast fox jumped over the lazy dog")],
|
vec![Value::from("the fast fox jumped over the lazy dog")],
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
fti.index_document(
|
fti.index_document(
|
||||||
&mut tx,
|
&mut tx,
|
||||||
&doc3,
|
&doc3,
|
||||||
&vec![Value::from("the dog sat there and did nothing")],
|
vec![Value::from("the dog sat there and did nothing")],
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
fti.index_document(
|
fti.index_document(
|
||||||
&mut tx,
|
&mut tx,
|
||||||
&doc4,
|
&doc4,
|
||||||
&vec![Value::from("the other animals sat there watching")],
|
vec![Value::from("the other animals sat there watching")],
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
@ -698,10 +701,10 @@ mod tests {
|
||||||
hits,
|
hits,
|
||||||
scr,
|
scr,
|
||||||
vec![
|
vec![
|
||||||
(&doc1, Some(0.0)),
|
(&doc1, Some(-3.4388628)),
|
||||||
(&doc2, Some(0.0)),
|
(&doc2, Some(-3.621457)),
|
||||||
(&doc3, Some(0.0)),
|
(&doc3, Some(-2.258829)),
|
||||||
(&doc4, Some(0.0)),
|
(&doc4, Some(-2.393017)),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
@ -711,7 +714,11 @@ mod tests {
|
||||||
&mut tx,
|
&mut tx,
|
||||||
hits,
|
hits,
|
||||||
scr,
|
scr,
|
||||||
vec![(&doc1, Some(0.0)), (&doc2, Some(0.0)), (&doc3, Some(0.0))],
|
vec![
|
||||||
|
(&doc1, Some(-0.7832165)),
|
||||||
|
(&doc2, Some(-0.8248031)),
|
||||||
|
(&doc3, Some(-0.87105393)),
|
||||||
|
],
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::idx::ft::docids::DocId;
|
use crate::idx::docids::DocId;
|
||||||
use crate::idx::ft::terms::TermId;
|
use crate::idx::ft::terms::TermId;
|
||||||
use crate::idx::IndexKeyBase;
|
use crate::idx::IndexKeyBase;
|
||||||
use crate::kvs::{Transaction, Val};
|
use crate::kvs::{Transaction, Val};
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::idx::ft::docids::DocId;
|
use crate::idx::docids::DocId;
|
||||||
use crate::idx::ft::terms::TermId;
|
use crate::idx::ft::terms::TermId;
|
||||||
use crate::idx::trees::bkeys::TrieKeys;
|
use crate::idx::trees::bkeys::TrieKeys;
|
||||||
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeNodeStore};
|
use crate::idx::trees::btree::{BState, BStatistics, BTree, BTreeNodeStore};
|
||||||
|
@ -81,10 +81,8 @@ impl Postings {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) async fn finish(&self, tx: &mut Transaction) -> Result<(), Error> {
|
pub(super) async fn finish(&self, tx: &mut Transaction) -> Result<(), Error> {
|
||||||
let updated = self.store.lock().await.finish(tx).await?;
|
self.store.lock().await.finish(tx).await?;
|
||||||
if self.btree.is_updated() || updated {
|
self.btree.get_state().finish(tx, &self.state_key).await?;
|
||||||
tx.set(self.state_key.clone(), self.btree.get_state().try_to_val()?).await?;
|
|
||||||
}
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::idx::ft::docids::DocId;
|
use crate::idx::docids::DocId;
|
||||||
use crate::idx::ft::doclength::{DocLength, DocLengths};
|
use crate::idx::ft::doclength::{DocLength, DocLengths};
|
||||||
use crate::idx::ft::postings::{Postings, TermFrequency};
|
use crate::idx::ft::postings::{Postings, TermFrequency};
|
||||||
use crate::idx::ft::termdocs::TermsDocs;
|
use crate::idx::ft::termdocs::TermsDocs;
|
||||||
|
@ -76,8 +76,8 @@ impl BM25Scorer {
|
||||||
// (N - n(qi) + 0.5)
|
// (N - n(qi) + 0.5)
|
||||||
let numerator = self.doc_count - term_doc_count + 0.5;
|
let numerator = self.doc_count - term_doc_count + 0.5;
|
||||||
let idf = (numerator / denominator).ln();
|
let idf = (numerator / denominator).ln();
|
||||||
if idf.is_nan() || idf <= 0.0 {
|
if idf.is_nan() {
|
||||||
return 0.0;
|
return f32::NAN;
|
||||||
}
|
}
|
||||||
let tf_prim = 1.0 + term_freq.ln();
|
let tf_prim = 1.0 + term_freq.ln();
|
||||||
// idf * (k1 + 1)
|
// idf * (k1 + 1)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::idx::ft::docids::DocId;
|
use crate::idx::docids::DocId;
|
||||||
use crate::idx::ft::doclength::DocLength;
|
use crate::idx::ft::doclength::DocLength;
|
||||||
use crate::idx::ft::terms::TermId;
|
use crate::idx::ft::terms::TermId;
|
||||||
use crate::idx::IndexKeyBase;
|
use crate::idx::IndexKeyBase;
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
|
pub(crate) mod docids;
|
||||||
pub(crate) mod ft;
|
pub(crate) mod ft;
|
||||||
pub(crate) mod planner;
|
pub(crate) mod planner;
|
||||||
pub mod trees;
|
pub mod trees;
|
||||||
|
|
||||||
use crate::dbs::Options;
|
use crate::dbs::Options;
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::idx::ft::docids::DocId;
|
use crate::idx::docids::DocId;
|
||||||
use crate::idx::ft::terms::TermId;
|
use crate::idx::ft::terms::TermId;
|
||||||
use crate::idx::trees::store::NodeId;
|
use crate::idx::trees::store::NodeId;
|
||||||
use crate::key::index::bc::Bc;
|
use crate::key::index::bc::Bc;
|
||||||
|
@ -18,6 +19,7 @@ use crate::key::index::bp::Bp;
|
||||||
use crate::key::index::bs::Bs;
|
use crate::key::index::bs::Bs;
|
||||||
use crate::key::index::bt::Bt;
|
use crate::key::index::bt::Bt;
|
||||||
use crate::key::index::bu::Bu;
|
use crate::key::index::bu::Bu;
|
||||||
|
use crate::key::index::vm::Vm;
|
||||||
use crate::kvs::{Key, Val};
|
use crate::kvs::{Key, Val};
|
||||||
use crate::sql::statements::DefineIndexStatement;
|
use crate::sql::statements::DefineIndexStatement;
|
||||||
use revision::Revisioned;
|
use revision::Revisioned;
|
||||||
|
@ -171,6 +173,17 @@ impl IndexKeyBase {
|
||||||
)
|
)
|
||||||
.into()
|
.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn new_vm_key(&self, node_id: Option<NodeId>) -> Key {
|
||||||
|
Vm::new(
|
||||||
|
self.inner.ns.as_str(),
|
||||||
|
self.inner.db.as_str(),
|
||||||
|
self.inner.tb.as_str(),
|
||||||
|
self.inner.ix.as_str(),
|
||||||
|
node_id,
|
||||||
|
)
|
||||||
|
.into()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This trait provides `Revision` based default implementations for serialization/deserialization
|
/// This trait provides `Revision` based default implementations for serialization/deserialization
|
||||||
|
|
|
@ -1,25 +1,27 @@
|
||||||
use crate::dbs::{Options, Transaction};
|
use crate::dbs::{Options, Transaction};
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::idx::ft::docids::{DocId, DocIds};
|
use crate::idx::docids::{DocId, DocIds};
|
||||||
use crate::idx::ft::scorer::BM25Scorer;
|
use crate::idx::ft::scorer::BM25Scorer;
|
||||||
use crate::idx::ft::termdocs::TermsDocs;
|
use crate::idx::ft::termdocs::TermsDocs;
|
||||||
use crate::idx::ft::terms::TermId;
|
use crate::idx::ft::terms::TermId;
|
||||||
use crate::idx::ft::{FtIndex, MatchRef};
|
use crate::idx::ft::{FtIndex, MatchRef};
|
||||||
use crate::idx::planner::iterators::{
|
use crate::idx::planner::iterators::{
|
||||||
IndexEqualThingIterator, IndexRangeThingIterator, MatchesThingIterator, ThingIterator,
|
IndexEqualThingIterator, IndexRangeThingIterator, KnnThingIterator, MatchesThingIterator,
|
||||||
UniqueEqualThingIterator, UniqueRangeThingIterator,
|
ThingIterator, UniqueEqualThingIterator, UniqueRangeThingIterator,
|
||||||
};
|
};
|
||||||
use crate::idx::planner::plan::IndexOperator::Matches;
|
use crate::idx::planner::plan::IndexOperator::Matches;
|
||||||
use crate::idx::planner::plan::{IndexOperator, IndexOption, RangeValue};
|
use crate::idx::planner::plan::{IndexOperator, IndexOption, RangeValue};
|
||||||
use crate::idx::planner::tree::{IndexMap, IndexRef};
|
use crate::idx::planner::tree::{IndexMap, IndexRef};
|
||||||
|
use crate::idx::trees::mtree::MTreeIndex;
|
||||||
use crate::idx::trees::store::TreeStoreType;
|
use crate::idx::trees::store::TreeStoreType;
|
||||||
use crate::idx::IndexKeyBase;
|
use crate::idx::IndexKeyBase;
|
||||||
use crate::kvs;
|
use crate::kvs;
|
||||||
use crate::kvs::Key;
|
use crate::kvs::Key;
|
||||||
use crate::sql::index::Index;
|
use crate::sql::index::Index;
|
||||||
use crate::sql::statements::DefineIndexStatement;
|
use crate::sql::statements::DefineIndexStatement;
|
||||||
use crate::sql::{Expression, Object, Table, Thing, Value};
|
use crate::sql::{Array, Expression, Object, Table, Thing, Value};
|
||||||
use std::collections::{HashMap, HashSet};
|
use roaring::RoaringTreemap;
|
||||||
|
use std::collections::{HashMap, HashSet, VecDeque};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::sync::RwLock;
|
use tokio::sync::RwLock;
|
||||||
|
|
||||||
|
@ -30,6 +32,7 @@ pub(crate) struct QueryExecutor {
|
||||||
exp_entries: HashMap<Arc<Expression>, FtEntry>,
|
exp_entries: HashMap<Arc<Expression>, FtEntry>,
|
||||||
it_entries: Vec<IteratorEntry>,
|
it_entries: Vec<IteratorEntry>,
|
||||||
index_definitions: HashMap<IndexRef, DefineIndexStatement>,
|
index_definitions: HashMap<IndexRef, DefineIndexStatement>,
|
||||||
|
mt_exp: HashMap<Arc<Expression>, MtEntry>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) type IteratorRef = u16;
|
pub(crate) type IteratorRef = u16;
|
||||||
|
@ -66,31 +69,32 @@ impl QueryExecutor {
|
||||||
let mut mr_entries = HashMap::default();
|
let mut mr_entries = HashMap::default();
|
||||||
let mut exp_entries = HashMap::default();
|
let mut exp_entries = HashMap::default();
|
||||||
let mut ft_map = HashMap::default();
|
let mut ft_map = HashMap::default();
|
||||||
|
let mut mt_map: HashMap<IndexRef, MTreeIndex> = HashMap::default();
|
||||||
|
let mut mt_exp = HashMap::default();
|
||||||
|
|
||||||
// Create all the instances of FtIndex
|
// Create all the instances of FtIndex
|
||||||
// Build the FtEntries and map them to Expressions and MatchRef
|
// Build the FtEntries and map them to Expressions and MatchRef
|
||||||
for (exp, io) in im.options {
|
for (exp, io) in im.options {
|
||||||
let mut entry = None;
|
|
||||||
let ir = io.ir();
|
let ir = io.ir();
|
||||||
if let Some(idx_def) = im.definitions.get(&ir) {
|
if let Some(idx_def) = im.definitions.get(&ir) {
|
||||||
if let Index::Search(p) = &idx_def.index {
|
match &idx_def.index {
|
||||||
|
Index::Search(p) => {
|
||||||
|
let mut ft_entry = None;
|
||||||
if let Some(ft) = ft_map.get(&ir) {
|
if let Some(ft) = ft_map.get(&ir) {
|
||||||
if entry.is_none() {
|
if ft_entry.is_none() {
|
||||||
entry = FtEntry::new(&mut run, ft, io).await?;
|
ft_entry = FtEntry::new(&mut run, ft, io).await?;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let ikb = IndexKeyBase::new(opt, idx_def);
|
let ikb = IndexKeyBase::new(opt, idx_def);
|
||||||
let az = run.get_db_analyzer(opt.ns(), opt.db(), p.az.as_str()).await?;
|
let az = run.get_db_analyzer(opt.ns(), opt.db(), p.az.as_str()).await?;
|
||||||
let ft = FtIndex::new(&mut run, az, ikb, p, TreeStoreType::Read).await?;
|
let ft =
|
||||||
if entry.is_none() {
|
FtIndex::new(&mut run, az, ikb, p, TreeStoreType::Read).await?;
|
||||||
entry = FtEntry::new(&mut run, &ft, io).await?;
|
if ft_entry.is_none() {
|
||||||
|
ft_entry = FtEntry::new(&mut run, &ft, io).await?;
|
||||||
}
|
}
|
||||||
ft_map.insert(ir, ft);
|
ft_map.insert(ir, ft);
|
||||||
}
|
}
|
||||||
}
|
if let Some(e) = ft_entry {
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(e) = entry {
|
|
||||||
if let Matches(_, Some(mr)) = e.0.index_option.op() {
|
if let Matches(_, Some(mr)) = e.0.index_option.op() {
|
||||||
if mr_entries.insert(*mr, e.clone()).is_some() {
|
if mr_entries.insert(*mr, e.clone()).is_some() {
|
||||||
return Err(Error::DuplicatedMatchRef {
|
return Err(Error::DuplicatedMatchRef {
|
||||||
|
@ -101,6 +105,25 @@ impl QueryExecutor {
|
||||||
exp_entries.insert(exp, e);
|
exp_entries.insert(exp, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Index::MTree(p) => {
|
||||||
|
if let IndexOperator::Knn(a, k) = io.op() {
|
||||||
|
let entry = if let Some(mt) = mt_map.get(&ir) {
|
||||||
|
MtEntry::new(&mut run, mt, a.clone(), *k).await?
|
||||||
|
} else {
|
||||||
|
let ikb = IndexKeyBase::new(opt, idx_def);
|
||||||
|
let mt =
|
||||||
|
MTreeIndex::new(&mut run, ikb, p, TreeStoreType::Read).await?;
|
||||||
|
let entry = MtEntry::new(&mut run, &mt, a.clone(), *k).await?;
|
||||||
|
mt_map.insert(ir, mt);
|
||||||
|
entry
|
||||||
|
};
|
||||||
|
mt_exp.insert(exp, entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
table: table.0.clone(),
|
table: table.0.clone(),
|
||||||
|
@ -109,6 +132,19 @@ impl QueryExecutor {
|
||||||
exp_entries,
|
exp_entries,
|
||||||
it_entries: Vec::new(),
|
it_entries: Vec::new(),
|
||||||
index_definitions: im.definitions,
|
index_definitions: im.definitions,
|
||||||
|
mt_exp,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn knn(
|
||||||
|
&self,
|
||||||
|
_txn: &Transaction,
|
||||||
|
_thg: &Thing,
|
||||||
|
exp: &Expression,
|
||||||
|
) -> Result<Value, Error> {
|
||||||
|
// If no previous case were successful, we end up with a user error
|
||||||
|
Err(Error::NoIndexFoundForMatch {
|
||||||
|
value: exp.to_string(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -168,9 +204,7 @@ impl QueryExecutor {
|
||||||
Index::Search {
|
Index::Search {
|
||||||
..
|
..
|
||||||
} => self.new_search_index_iterator(ir, io.clone()).await,
|
} => self.new_search_index_iterator(ir, io.clone()).await,
|
||||||
Index::MTree(_) => Err(Error::FeatureNotYetImplemented {
|
Index::MTree(_) => Ok(self.new_mtree_index_knn_iterator(ir)),
|
||||||
feature: "VectorSearch iterator".to_string(),
|
|
||||||
}),
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
|
@ -258,6 +292,16 @@ impl QueryExecutor {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn new_mtree_index_knn_iterator(&self, ir: IteratorRef) -> Option<ThingIterator> {
|
||||||
|
if let Some(IteratorEntry::Single(exp, ..)) = self.it_entries.get(ir as usize) {
|
||||||
|
if let Some(mte) = self.mt_exp.get(exp.as_ref()) {
|
||||||
|
let it = KnnThingIterator::new(mte.doc_ids.clone(), mte.res.clone());
|
||||||
|
return Some(ThingIterator::Knn(it));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) async fn matches(
|
pub(crate) async fn matches(
|
||||||
&self,
|
&self,
|
||||||
txn: &Transaction,
|
txn: &Transaction,
|
||||||
|
@ -406,3 +450,24 @@ impl FtEntry {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub(super) struct MtEntry {
|
||||||
|
doc_ids: Arc<RwLock<DocIds>>,
|
||||||
|
res: VecDeque<RoaringTreemap>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MtEntry {
|
||||||
|
async fn new(
|
||||||
|
tx: &mut kvs::Transaction,
|
||||||
|
mt: &MTreeIndex,
|
||||||
|
a: Array,
|
||||||
|
k: u32,
|
||||||
|
) -> Result<Self, Error> {
|
||||||
|
let res = mt.knn_search(tx, a, k as usize).await?;
|
||||||
|
Ok(Self {
|
||||||
|
res,
|
||||||
|
doc_ids: mt.doc_ids(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
use crate::dbs::{Options, Transaction};
|
use crate::dbs::{Options, Transaction};
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::idx::ft::docids::{DocId, NO_DOC_ID};
|
use crate::idx::docids::{DocId, DocIds, NO_DOC_ID};
|
||||||
use crate::idx::ft::termdocs::TermsDocs;
|
use crate::idx::ft::termdocs::TermsDocs;
|
||||||
use crate::idx::ft::{FtIndex, HitsIterator};
|
use crate::idx::ft::{FtIndex, HitsIterator};
|
||||||
use crate::idx::planner::plan::RangeValue;
|
use crate::idx::planner::plan::RangeValue;
|
||||||
|
@ -8,6 +8,10 @@ use crate::key::index::Index;
|
||||||
use crate::kvs::Key;
|
use crate::kvs::Key;
|
||||||
use crate::sql::statements::DefineIndexStatement;
|
use crate::sql::statements::DefineIndexStatement;
|
||||||
use crate::sql::{Array, Thing, Value};
|
use crate::sql::{Array, Thing, Value};
|
||||||
|
use roaring::RoaringTreemap;
|
||||||
|
use std::collections::VecDeque;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tokio::sync::RwLock;
|
||||||
|
|
||||||
pub(crate) enum ThingIterator {
|
pub(crate) enum ThingIterator {
|
||||||
IndexEqual(IndexEqualThingIterator),
|
IndexEqual(IndexEqualThingIterator),
|
||||||
|
@ -15,6 +19,7 @@ pub(crate) enum ThingIterator {
|
||||||
UniqueEqual(UniqueEqualThingIterator),
|
UniqueEqual(UniqueEqualThingIterator),
|
||||||
UniqueRange(UniqueRangeThingIterator),
|
UniqueRange(UniqueRangeThingIterator),
|
||||||
Matches(MatchesThingIterator),
|
Matches(MatchesThingIterator),
|
||||||
|
Knn(KnnThingIterator),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ThingIterator {
|
impl ThingIterator {
|
||||||
|
@ -29,6 +34,7 @@ impl ThingIterator {
|
||||||
ThingIterator::IndexRange(i) => i.next_batch(tx, size).await,
|
ThingIterator::IndexRange(i) => i.next_batch(tx, size).await,
|
||||||
ThingIterator::UniqueRange(i) => i.next_batch(tx, size).await,
|
ThingIterator::UniqueRange(i) => i.next_batch(tx, size).await,
|
||||||
ThingIterator::Matches(i) => i.next_batch(tx, size).await,
|
ThingIterator::Matches(i) => i.next_batch(tx, size).await,
|
||||||
|
ThingIterator::Knn(i) => i.next_batch(tx, size).await,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -307,3 +313,52 @@ impl MatchesThingIterator {
|
||||||
Ok(res)
|
Ok(res)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) struct KnnThingIterator {
|
||||||
|
doc_ids: Arc<RwLock<DocIds>>,
|
||||||
|
res: VecDeque<RoaringTreemap>,
|
||||||
|
current: Option<RoaringTreemap>,
|
||||||
|
skip: RoaringTreemap,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KnnThingIterator {
|
||||||
|
pub(super) fn new(doc_ids: Arc<RwLock<DocIds>>, mut res: VecDeque<RoaringTreemap>) -> Self {
|
||||||
|
let current = res.pop_front();
|
||||||
|
Self {
|
||||||
|
doc_ids,
|
||||||
|
res,
|
||||||
|
current,
|
||||||
|
skip: RoaringTreemap::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async fn next_batch(
|
||||||
|
&mut self,
|
||||||
|
txn: &Transaction,
|
||||||
|
mut limit: u32,
|
||||||
|
) -> Result<Vec<(Thing, DocId)>, Error> {
|
||||||
|
let mut res = vec![];
|
||||||
|
let mut tx = txn.lock().await;
|
||||||
|
while self.current.is_some() && limit > 0 {
|
||||||
|
if let Some(docs) = &mut self.current {
|
||||||
|
if let Some(doc_id) = docs.iter().next() {
|
||||||
|
docs.remove(doc_id);
|
||||||
|
if self.skip.insert(doc_id) {
|
||||||
|
if let Some(doc_key) =
|
||||||
|
self.doc_ids.read().await.get_doc_key(&mut tx, doc_id).await?
|
||||||
|
{
|
||||||
|
res.push((doc_key.into(), doc_id));
|
||||||
|
limit -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if docs.is_empty() {
|
||||||
|
self.current = None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if self.current.is_none() {
|
||||||
|
self.current = self.res.pop_front();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(res)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -149,6 +149,7 @@ pub(super) enum IndexOperator {
|
||||||
Equality(Array),
|
Equality(Array),
|
||||||
RangePart(Operator, Value),
|
RangePart(Operator, Value),
|
||||||
Matches(String, Option<MatchRef>),
|
Matches(String, Option<MatchRef>),
|
||||||
|
Knn(Array, u32),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IndexOption {
|
impl IndexOption {
|
||||||
|
@ -191,6 +192,10 @@ impl IndexOption {
|
||||||
e.insert("operator", Value::from(op.to_string()));
|
e.insert("operator", Value::from(op.to_string()));
|
||||||
e.insert("value", v.to_owned());
|
e.insert("value", v.to_owned());
|
||||||
}
|
}
|
||||||
|
IndexOperator::Knn(a, k) => {
|
||||||
|
e.insert("operator", Value::from(format!("<{}>", k)));
|
||||||
|
e.insert("value", Value::Array(a.clone()));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -102,10 +102,10 @@ impl<'a> TreeBuilder<'a> {
|
||||||
match v {
|
match v {
|
||||||
Value::Expression(e) => self.eval_expression(e).await,
|
Value::Expression(e) => self.eval_expression(e).await,
|
||||||
Value::Idiom(i) => self.eval_idiom(i).await,
|
Value::Idiom(i) => self.eval_idiom(i).await,
|
||||||
Value::Strand(_) => Ok(Node::Scalar(v.to_owned())),
|
Value::Strand(_) | Value::Number(_) | Value::Bool(_) | Value::Thing(_) => {
|
||||||
Value::Number(_) => Ok(Node::Scalar(v.to_owned())),
|
Ok(Node::Scalar(v.to_owned()))
|
||||||
Value::Bool(_) => Ok(Node::Scalar(v.to_owned())),
|
}
|
||||||
Value::Thing(_) => Ok(Node::Scalar(v.to_owned())),
|
Value::Array(a) => Ok(self.eval_array(a)),
|
||||||
Value::Subquery(s) => self.eval_subquery(s).await,
|
Value::Subquery(s) => self.eval_subquery(s).await,
|
||||||
Value::Param(p) => {
|
Value::Param(p) => {
|
||||||
let v = p.compute(self.ctx, self.opt, self.txn, None).await?;
|
let v = p.compute(self.ctx, self.opt, self.txn, None).await?;
|
||||||
|
@ -115,6 +115,16 @@ impl<'a> TreeBuilder<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn eval_array(&mut self, a: &Array) -> Node {
|
||||||
|
// Check if it is a numeric vector
|
||||||
|
for v in &a.0 {
|
||||||
|
if !v.is_number() {
|
||||||
|
return Node::Unsupported(format!("Unsupported array: {}", a));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Vector(a.to_owned())
|
||||||
|
}
|
||||||
|
|
||||||
async fn eval_idiom(&mut self, i: &Idiom) -> Result<Node, Error> {
|
async fn eval_idiom(&mut self, i: &Idiom) -> Result<Node, Error> {
|
||||||
if let Some(irs) = self.find_indexes(i).await? {
|
if let Some(irs) = self.find_indexes(i).await? {
|
||||||
if !irs.is_empty() {
|
if !irs.is_empty() {
|
||||||
|
@ -165,25 +175,38 @@ impl<'a> TreeBuilder<'a> {
|
||||||
irs: &[IndexRef],
|
irs: &[IndexRef],
|
||||||
op: &Operator,
|
op: &Operator,
|
||||||
id: &Idiom,
|
id: &Idiom,
|
||||||
v: &Node,
|
n: &Node,
|
||||||
e: &Expression,
|
e: &Expression,
|
||||||
) -> Option<IndexOption> {
|
) -> Option<IndexOption> {
|
||||||
if let Some(v) = v.is_scalar() {
|
|
||||||
for ir in irs {
|
for ir in irs {
|
||||||
if let Some(ix) = self.index_map.definitions.get(ir) {
|
if let Some(ix) = self.index_map.definitions.get(ir) {
|
||||||
let op = match &ix.index {
|
let op = match &ix.index {
|
||||||
Index::Idx => Self::eval_index_operator(op, v),
|
Index::Idx => Self::eval_index_operator(op, n),
|
||||||
Index::Uniq => Self::eval_index_operator(op, v),
|
Index::Uniq => Self::eval_index_operator(op, n),
|
||||||
Index::Search {
|
Index::Search {
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
|
if let Some(v) = n.is_scalar() {
|
||||||
if let Operator::Matches(mr) = op {
|
if let Operator::Matches(mr) = op {
|
||||||
Some(IndexOperator::Matches(v.clone().to_raw_string(), *mr))
|
Some(IndexOperator::Matches(v.clone().to_raw_string(), *mr))
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Index::MTree(_) => {
|
||||||
|
if let Operator::Knn(k) = op {
|
||||||
|
if let Node::Vector(a) = n {
|
||||||
|
Some(IndexOperator::Knn(a.clone(), *k))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Index::MTree(_) => None,
|
|
||||||
};
|
};
|
||||||
if let Some(op) = op {
|
if let Some(op) = op {
|
||||||
let io = IndexOption::new(*ir, id.clone(), op);
|
let io = IndexOption::new(*ir, id.clone(), op);
|
||||||
|
@ -192,11 +215,11 @@ impl<'a> TreeBuilder<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
fn eval_index_operator(op: &Operator, v: &Value) -> Option<IndexOperator> {
|
fn eval_index_operator(op: &Operator, n: &Node) -> Option<IndexOperator> {
|
||||||
|
if let Some(v) = n.is_scalar() {
|
||||||
match op {
|
match op {
|
||||||
Operator::Equal => Some(IndexOperator::Equality(Array::from(v.clone()))),
|
Operator::Equal => Some(IndexOperator::Equality(Array::from(v.clone()))),
|
||||||
Operator::LessThan
|
Operator::LessThan
|
||||||
|
@ -205,6 +228,9 @@ impl<'a> TreeBuilder<'a> {
|
||||||
| Operator::MoreThanOrEqual => Some(IndexOperator::RangePart(op.clone(), v.clone())),
|
| Operator::MoreThanOrEqual => Some(IndexOperator::RangePart(op.clone(), v.clone())),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn eval_subquery(&mut self, s: &Subquery) -> Result<Node, Error> {
|
async fn eval_subquery(&mut self, s: &Subquery) -> Result<Node, Error> {
|
||||||
|
@ -235,6 +261,7 @@ pub(super) enum Node {
|
||||||
IndexedField(Idiom, Arc<Vec<IndexRef>>),
|
IndexedField(Idiom, Arc<Vec<IndexRef>>),
|
||||||
NonIndexedField,
|
NonIndexedField,
|
||||||
Scalar(Value),
|
Scalar(Value),
|
||||||
|
Vector(Array),
|
||||||
Unsupported(String),
|
Unsupported(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,6 @@ where
|
||||||
{
|
{
|
||||||
state: BState,
|
state: BState,
|
||||||
full_size: u32,
|
full_size: u32,
|
||||||
updated: bool,
|
|
||||||
bk: PhantomData<BK>,
|
bk: PhantomData<BK>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,6 +30,8 @@ pub struct BState {
|
||||||
minimum_degree: u32,
|
minimum_degree: u32,
|
||||||
root: Option<NodeId>,
|
root: Option<NodeId>,
|
||||||
next_node_id: NodeId,
|
next_node_id: NodeId,
|
||||||
|
#[serde(skip)]
|
||||||
|
updated: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl VersionedSerdeState for BState {}
|
impl VersionedSerdeState for BState {}
|
||||||
|
@ -42,8 +43,34 @@ impl BState {
|
||||||
minimum_degree,
|
minimum_degree,
|
||||||
root: None,
|
root: None,
|
||||||
next_node_id: 0,
|
next_node_id: 0,
|
||||||
|
updated: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn set_root(&mut self, node_id: Option<NodeId>) {
|
||||||
|
if node_id.ne(&self.root) {
|
||||||
|
self.root = node_id;
|
||||||
|
self.updated = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_node_id(&mut self) -> NodeId {
|
||||||
|
let new_node_id = self.next_node_id;
|
||||||
|
self.next_node_id += 1;
|
||||||
|
self.updated = true;
|
||||||
|
new_node_id
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(in crate::idx) async fn finish(
|
||||||
|
&self,
|
||||||
|
tx: &mut Transaction,
|
||||||
|
key: &Key,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
if self.updated {
|
||||||
|
tx.set(key.clone(), self.try_to_val()?).await?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Default, PartialEq)]
|
#[derive(Debug, Default, PartialEq)]
|
||||||
|
@ -166,7 +193,6 @@ where
|
||||||
Self {
|
Self {
|
||||||
full_size: state.minimum_degree * 2 - 1,
|
full_size: state.minimum_degree * 2 - 1,
|
||||||
state,
|
state,
|
||||||
updated: false,
|
|
||||||
bk: PhantomData,
|
bk: PhantomData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -180,11 +206,11 @@ where
|
||||||
let mut next_node = self.state.root;
|
let mut next_node = self.state.root;
|
||||||
while let Some(node_id) = next_node.take() {
|
while let Some(node_id) = next_node.take() {
|
||||||
let current = store.get_node(tx, node_id).await?;
|
let current = store.get_node(tx, node_id).await?;
|
||||||
if let Some(payload) = current.node.keys().get(searched_key) {
|
if let Some(payload) = current.n.keys().get(searched_key) {
|
||||||
store.set_node(current, false)?;
|
store.set_node(current, false)?;
|
||||||
return Ok(Some(payload));
|
return Ok(Some(payload));
|
||||||
}
|
}
|
||||||
if let BTreeNode::Internal(keys, children) = ¤t.node {
|
if let BTreeNode::Internal(keys, children) = ¤t.n {
|
||||||
let child_idx = keys.get_child_idx(searched_key);
|
let child_idx = keys.get_child_idx(searched_key);
|
||||||
next_node.replace(children[child_idx]);
|
next_node.replace(children[child_idx]);
|
||||||
}
|
}
|
||||||
|
@ -201,27 +227,30 @@ where
|
||||||
payload: Payload,
|
payload: Payload,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
if let Some(root_id) = self.state.root {
|
if let Some(root_id) = self.state.root {
|
||||||
|
// We already have a root node
|
||||||
let root = store.get_node(tx, root_id).await?;
|
let root = store.get_node(tx, root_id).await?;
|
||||||
if root.node.keys().len() == self.full_size {
|
if root.n.keys().len() == self.full_size {
|
||||||
let new_root_id = self.new_node_id();
|
// The root node is full, let's split it
|
||||||
|
let new_root_id = self.state.new_node_id();
|
||||||
let new_root = store
|
let new_root = store
|
||||||
.new_node(new_root_id, BTreeNode::Internal(BK::default(), vec![root_id]))?;
|
.new_node(new_root_id, BTreeNode::Internal(BK::default(), vec![root_id]))?;
|
||||||
self.state.root = Some(new_root.id);
|
self.state.set_root(Some(new_root.id));
|
||||||
self.split_child(store, new_root, 0, root).await?;
|
self.split_child(store, new_root, 0, root).await?;
|
||||||
self.insert_non_full(tx, store, new_root_id, key, payload).await?;
|
self.insert_non_full(tx, store, new_root_id, key, payload).await?;
|
||||||
} else {
|
} else {
|
||||||
|
// The root node has place, let's insert the value
|
||||||
let root_id = root.id;
|
let root_id = root.id;
|
||||||
store.set_node(root, false)?;
|
store.set_node(root, false)?;
|
||||||
self.insert_non_full(tx, store, root_id, key, payload).await?;
|
self.insert_non_full(tx, store, root_id, key, payload).await?;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let new_root_id = self.new_node_id();
|
// We don't have a root node, let's create id
|
||||||
|
let new_root_id = self.state.new_node_id();
|
||||||
let new_root_node =
|
let new_root_node =
|
||||||
store.new_node(new_root_id, BTreeNode::Leaf(BK::with_key_val(key, payload)?))?;
|
store.new_node(new_root_id, BTreeNode::Leaf(BK::with_key_val(key, payload)?))?;
|
||||||
store.set_node(new_root_node, true)?;
|
store.set_node(new_root_node, true)?;
|
||||||
self.state.root = Some(new_root_id);
|
self.state.set_root(Some(new_root_id));
|
||||||
}
|
}
|
||||||
self.updated = true;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -237,7 +266,7 @@ where
|
||||||
while let Some(node_id) = next_node_id.take() {
|
while let Some(node_id) = next_node_id.take() {
|
||||||
let mut node = store.get_node(tx, node_id).await?;
|
let mut node = store.get_node(tx, node_id).await?;
|
||||||
let key: Key = key.clone();
|
let key: Key = key.clone();
|
||||||
match &mut node.node {
|
match &mut node.n {
|
||||||
BTreeNode::Leaf(keys) => {
|
BTreeNode::Leaf(keys) => {
|
||||||
keys.insert(key, payload);
|
keys.insert(key, payload);
|
||||||
store.set_node(node, true)?;
|
store.set_node(node, true)?;
|
||||||
|
@ -250,7 +279,7 @@ where
|
||||||
}
|
}
|
||||||
let child_idx = keys.get_child_idx(&key);
|
let child_idx = keys.get_child_idx(&key);
|
||||||
let child = store.get_node(tx, children[child_idx]).await?;
|
let child = store.get_node(tx, children[child_idx]).await?;
|
||||||
let next_id = if child.node.keys().len() == self.full_size {
|
let next_id = if child.n.keys().len() == self.full_size {
|
||||||
let split_result = self.split_child(store, node, child_idx, child).await?;
|
let split_result = self.split_child(store, node, child_idx, child).await?;
|
||||||
if key.gt(&split_result.median_key) {
|
if key.gt(&split_result.median_key) {
|
||||||
split_result.right_node_id
|
split_result.right_node_id
|
||||||
|
@ -277,12 +306,12 @@ where
|
||||||
idx: usize,
|
idx: usize,
|
||||||
child_node: BStoredNode<BK>,
|
child_node: BStoredNode<BK>,
|
||||||
) -> Result<SplitResult, Error> {
|
) -> Result<SplitResult, Error> {
|
||||||
let (left_node, right_node, median_key, median_payload) = match child_node.node {
|
let (left_node, right_node, median_key, median_payload) = match child_node.n {
|
||||||
BTreeNode::Internal(keys, children) => self.split_internal_node(keys, children)?,
|
BTreeNode::Internal(keys, children) => self.split_internal_node(keys, children)?,
|
||||||
BTreeNode::Leaf(keys) => self.split_leaf_node(keys)?,
|
BTreeNode::Leaf(keys) => self.split_leaf_node(keys)?,
|
||||||
};
|
};
|
||||||
let right_node_id = self.new_node_id();
|
let right_node_id = self.state.new_node_id();
|
||||||
match parent_node.node {
|
match parent_node.n {
|
||||||
BTreeNode::Internal(ref mut keys, ref mut children) => {
|
BTreeNode::Internal(ref mut keys, ref mut children) => {
|
||||||
keys.insert(median_key.clone(), median_payload);
|
keys.insert(median_key.clone(), median_payload);
|
||||||
children.insert(idx + 1, right_node_id);
|
children.insert(idx + 1, right_node_id);
|
||||||
|
@ -329,12 +358,6 @@ where
|
||||||
Ok((left_node, right_node, r.median_key, r.median_payload))
|
Ok((left_node, right_node, r.median_key, r.median_payload))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new_node_id(&mut self) -> NodeId {
|
|
||||||
let new_node_id = self.state.next_node_id;
|
|
||||||
self.state.next_node_id += 1;
|
|
||||||
new_node_id
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(in crate::idx) async fn delete(
|
pub(in crate::idx) async fn delete(
|
||||||
&mut self,
|
&mut self,
|
||||||
tx: &mut Transaction,
|
tx: &mut Transaction,
|
||||||
|
@ -348,7 +371,7 @@ where
|
||||||
|
|
||||||
while let Some((is_main_key, key_to_delete, node_id)) = next_node.take() {
|
while let Some((is_main_key, key_to_delete, node_id)) = next_node.take() {
|
||||||
let mut node = store.get_node(tx, node_id).await?;
|
let mut node = store.get_node(tx, node_id).await?;
|
||||||
match &mut node.node {
|
match &mut node.n {
|
||||||
BTreeNode::Leaf(keys) => {
|
BTreeNode::Leaf(keys) => {
|
||||||
// CLRS: 1
|
// CLRS: 1
|
||||||
if let Some(payload) = keys.get(&key_to_delete) {
|
if let Some(payload) = keys.get(&key_to_delete) {
|
||||||
|
@ -361,12 +384,11 @@ where
|
||||||
store.remove_node(node.id, node.key)?;
|
store.remove_node(node.id, node.key)?;
|
||||||
// Check if this was the root node
|
// Check if this was the root node
|
||||||
if Some(node_id) == self.state.root {
|
if Some(node_id) == self.state.root {
|
||||||
self.state.root = None;
|
self.state.set_root(None);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
store.set_node(node, true)?;
|
store.set_node(node, true)?;
|
||||||
}
|
}
|
||||||
self.updated = true;
|
|
||||||
} else {
|
} else {
|
||||||
store.set_node(node, false)?;
|
store.set_node(node, false)?;
|
||||||
}
|
}
|
||||||
|
@ -388,7 +410,6 @@ where
|
||||||
.await?,
|
.await?,
|
||||||
);
|
);
|
||||||
store.set_node(node, true)?;
|
store.set_node(node, true)?;
|
||||||
self.updated = true;
|
|
||||||
} else {
|
} else {
|
||||||
// CLRS: 3
|
// CLRS: 3
|
||||||
let (node_update, is_main_key, key_to_delete, next_stored_node) = self
|
let (node_update, is_main_key, key_to_delete, next_stored_node) = self
|
||||||
|
@ -409,11 +430,9 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
store.remove_node(node_id, node.key)?;
|
store.remove_node(node_id, node.key)?;
|
||||||
self.state.root = Some(next_stored_node);
|
self.state.set_root(Some(next_stored_node));
|
||||||
self.updated = true;
|
|
||||||
} else if node_update {
|
} else if node_update {
|
||||||
store.set_node(node, true)?;
|
store.set_node(node, true)?;
|
||||||
self.updated = true;
|
|
||||||
} else {
|
} else {
|
||||||
store.set_node(node, false)?;
|
store.set_node(node, false)?;
|
||||||
}
|
}
|
||||||
|
@ -437,9 +456,9 @@ where
|
||||||
let left_idx = keys.get_child_idx(&key_to_delete);
|
let left_idx = keys.get_child_idx(&key_to_delete);
|
||||||
let left_id = children[left_idx];
|
let left_id = children[left_idx];
|
||||||
let mut left_node = store.get_node(tx, left_id).await?;
|
let mut left_node = store.get_node(tx, left_id).await?;
|
||||||
if left_node.node.keys().len() >= self.state.minimum_degree {
|
if left_node.n.keys().len() >= self.state.minimum_degree {
|
||||||
// CLRS: 2a -> left_node is named `y` in the book
|
// CLRS: 2a -> left_node is named `y` in the book
|
||||||
if let Some((key_prim, payload_prim)) = left_node.node.keys().get_last_key() {
|
if let Some((key_prim, payload_prim)) = left_node.n.keys().get_last_key() {
|
||||||
keys.remove(&key_to_delete);
|
keys.remove(&key_to_delete);
|
||||||
keys.insert(key_prim.clone(), payload_prim);
|
keys.insert(key_prim.clone(), payload_prim);
|
||||||
store.set_node(left_node, true)?;
|
store.set_node(left_node, true)?;
|
||||||
|
@ -450,9 +469,9 @@ where
|
||||||
let right_idx = left_idx + 1;
|
let right_idx = left_idx + 1;
|
||||||
let right_id = children[right_idx];
|
let right_id = children[right_idx];
|
||||||
let right_node = store.get_node(tx, right_id).await?;
|
let right_node = store.get_node(tx, right_id).await?;
|
||||||
if right_node.node.keys().len() >= self.state.minimum_degree {
|
if right_node.n.keys().len() >= self.state.minimum_degree {
|
||||||
// CLRS: 2b -> right_node is name `z` in the book
|
// CLRS: 2b -> right_node is name `z` in the book
|
||||||
if let Some((key_prim, payload_prim)) = right_node.node.keys().get_first_key() {
|
if let Some((key_prim, payload_prim)) = right_node.n.keys().get_first_key() {
|
||||||
keys.remove(&key_to_delete);
|
keys.remove(&key_to_delete);
|
||||||
keys.insert(key_prim.clone(), payload_prim);
|
keys.insert(key_prim.clone(), payload_prim);
|
||||||
store.set_node(left_node, false)?;
|
store.set_node(left_node, false)?;
|
||||||
|
@ -464,7 +483,7 @@ where
|
||||||
// CLRS: 2c
|
// CLRS: 2c
|
||||||
// Merge children
|
// Merge children
|
||||||
// The payload is set to 0. The value does not matter, as the key will be deleted after anyway.
|
// The payload is set to 0. The value does not matter, as the key will be deleted after anyway.
|
||||||
left_node.node.append(key_to_delete.clone(), 0, right_node.node)?;
|
left_node.n.append(key_to_delete.clone(), 0, right_node.n)?;
|
||||||
store.set_node(left_node, true)?;
|
store.set_node(left_node, true)?;
|
||||||
store.remove_node(right_id, right_node.key)?;
|
store.remove_node(right_id, right_node.key)?;
|
||||||
keys.remove(&key_to_delete);
|
keys.remove(&key_to_delete);
|
||||||
|
@ -485,11 +504,11 @@ where
|
||||||
let child_idx = keys.get_child_idx(&key_to_delete);
|
let child_idx = keys.get_child_idx(&key_to_delete);
|
||||||
let child_id = children[child_idx];
|
let child_id = children[child_idx];
|
||||||
let child_stored_node = store.get_node(tx, child_id).await?;
|
let child_stored_node = store.get_node(tx, child_id).await?;
|
||||||
if child_stored_node.node.keys().len() < self.state.minimum_degree {
|
if child_stored_node.n.keys().len() < self.state.minimum_degree {
|
||||||
// right child (successor)
|
// right child (successor)
|
||||||
if child_idx < children.len() - 1 {
|
if child_idx < children.len() - 1 {
|
||||||
let right_child_stored_node = store.get_node(tx, children[child_idx + 1]).await?;
|
let right_child_stored_node = store.get_node(tx, children[child_idx + 1]).await?;
|
||||||
return if right_child_stored_node.node.keys().len() >= self.state.minimum_degree {
|
return if right_child_stored_node.n.keys().len() >= self.state.minimum_degree {
|
||||||
Self::delete_adjust_successor(
|
Self::delete_adjust_successor(
|
||||||
store,
|
store,
|
||||||
keys,
|
keys,
|
||||||
|
@ -520,7 +539,7 @@ where
|
||||||
if child_idx > 0 {
|
if child_idx > 0 {
|
||||||
let child_idx = child_idx - 1;
|
let child_idx = child_idx - 1;
|
||||||
let left_child_stored_node = store.get_node(tx, children[child_idx]).await?;
|
let left_child_stored_node = store.get_node(tx, children[child_idx]).await?;
|
||||||
return if left_child_stored_node.node.keys().len() >= self.state.minimum_degree {
|
return if left_child_stored_node.n.keys().len() >= self.state.minimum_degree {
|
||||||
Self::delete_adjust_predecessor(
|
Self::delete_adjust_predecessor(
|
||||||
store,
|
store,
|
||||||
keys,
|
keys,
|
||||||
|
@ -562,12 +581,12 @@ where
|
||||||
mut right_child_stored_node: BStoredNode<BK>,
|
mut right_child_stored_node: BStoredNode<BK>,
|
||||||
) -> Result<(bool, bool, Key, NodeId), Error> {
|
) -> Result<(bool, bool, Key, NodeId), Error> {
|
||||||
if let Some((ascending_key, ascending_payload)) =
|
if let Some((ascending_key, ascending_payload)) =
|
||||||
right_child_stored_node.node.keys().get_first_key()
|
right_child_stored_node.n.keys().get_first_key()
|
||||||
{
|
{
|
||||||
right_child_stored_node.node.keys_mut().remove(&ascending_key);
|
right_child_stored_node.n.keys_mut().remove(&ascending_key);
|
||||||
if let Some(descending_key) = keys.get_key(child_idx) {
|
if let Some(descending_key) = keys.get_key(child_idx) {
|
||||||
if let Some(descending_payload) = keys.remove(&descending_key) {
|
if let Some(descending_payload) = keys.remove(&descending_key) {
|
||||||
child_stored_node.node.keys_mut().insert(descending_key, descending_payload);
|
child_stored_node.n.keys_mut().insert(descending_key, descending_payload);
|
||||||
keys.insert(ascending_key, ascending_payload);
|
keys.insert(ascending_key, ascending_payload);
|
||||||
let child_id = child_stored_node.id;
|
let child_id = child_stored_node.id;
|
||||||
store.set_node(child_stored_node, true)?;
|
store.set_node(child_stored_node, true)?;
|
||||||
|
@ -590,12 +609,12 @@ where
|
||||||
mut left_child_stored_node: BStoredNode<BK>,
|
mut left_child_stored_node: BStoredNode<BK>,
|
||||||
) -> Result<(bool, bool, Key, NodeId), Error> {
|
) -> Result<(bool, bool, Key, NodeId), Error> {
|
||||||
if let Some((ascending_key, ascending_payload)) =
|
if let Some((ascending_key, ascending_payload)) =
|
||||||
left_child_stored_node.node.keys().get_last_key()
|
left_child_stored_node.n.keys().get_last_key()
|
||||||
{
|
{
|
||||||
left_child_stored_node.node.keys_mut().remove(&ascending_key);
|
left_child_stored_node.n.keys_mut().remove(&ascending_key);
|
||||||
if let Some(descending_key) = keys.get_key(child_idx) {
|
if let Some(descending_key) = keys.get_key(child_idx) {
|
||||||
if let Some(descending_payload) = keys.remove(&descending_key) {
|
if let Some(descending_payload) = keys.remove(&descending_key) {
|
||||||
child_stored_node.node.keys_mut().insert(descending_key, descending_payload);
|
child_stored_node.n.keys_mut().insert(descending_key, descending_payload);
|
||||||
keys.insert(ascending_key, ascending_payload);
|
keys.insert(ascending_key, ascending_payload);
|
||||||
let child_id = child_stored_node.id;
|
let child_id = child_stored_node.id;
|
||||||
store.set_node(child_stored_node, true)?;
|
store.set_node(child_stored_node, true)?;
|
||||||
|
@ -623,7 +642,7 @@ where
|
||||||
if let Some(descending_payload) = keys.remove(&descending_key) {
|
if let Some(descending_payload) = keys.remove(&descending_key) {
|
||||||
children.remove(child_idx + 1);
|
children.remove(child_idx + 1);
|
||||||
let left_id = left_child.id;
|
let left_id = left_child.id;
|
||||||
left_child.node.append(descending_key, descending_payload, right_child.node)?;
|
left_child.n.append(descending_key, descending_payload, right_child.n)?;
|
||||||
store.set_node(left_child, true)?;
|
store.set_node(left_child, true)?;
|
||||||
store.remove_node(right_child.id, right_child.key)?;
|
store.remove_node(right_child.id, right_child.key)?;
|
||||||
return Ok((true, is_main_key, key_to_delete, left_id));
|
return Ok((true, is_main_key, key_to_delete, left_id));
|
||||||
|
@ -645,13 +664,13 @@ where
|
||||||
}
|
}
|
||||||
while let Some((node_id, depth)) = node_queue.pop_front() {
|
while let Some((node_id, depth)) = node_queue.pop_front() {
|
||||||
let stored = store.get_node(tx, node_id).await?;
|
let stored = store.get_node(tx, node_id).await?;
|
||||||
stats.keys_count += stored.node.keys().len() as u64;
|
stats.keys_count += stored.n.keys().len() as u64;
|
||||||
if depth > stats.max_depth {
|
if depth > stats.max_depth {
|
||||||
stats.max_depth = depth;
|
stats.max_depth = depth;
|
||||||
}
|
}
|
||||||
stats.nodes_count += 1;
|
stats.nodes_count += 1;
|
||||||
stats.total_size += stored.size as u64;
|
stats.total_size += stored.size as u64;
|
||||||
if let BTreeNode::Internal(_, children) = &stored.node {
|
if let BTreeNode::Internal(_, children) = &stored.n {
|
||||||
let depth = depth + 1;
|
let depth = depth + 1;
|
||||||
for child_id in children.iter() {
|
for child_id in children.iter() {
|
||||||
node_queue.push_front((*child_id, depth));
|
node_queue.push_front((*child_id, depth));
|
||||||
|
@ -665,10 +684,6 @@ where
|
||||||
pub(in crate::idx) fn get_state(&self) -> &BState {
|
pub(in crate::idx) fn get_state(&self) -> &BState {
|
||||||
&self.state
|
&self.state
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(in crate::idx) fn is_updated(&self) -> bool {
|
|
||||||
self.updated
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -1032,13 +1047,13 @@ mod tests {
|
||||||
0 => {
|
0 => {
|
||||||
assert_eq!(depth, 1);
|
assert_eq!(depth, 1);
|
||||||
assert_eq!(node_id, 7);
|
assert_eq!(node_id, 7);
|
||||||
check_is_internal_node(node.node, vec![("p", 16)], vec![1, 8]);
|
check_is_internal_node(node.n, vec![("p", 16)], vec![1, 8]);
|
||||||
}
|
}
|
||||||
1 => {
|
1 => {
|
||||||
assert_eq!(depth, 2);
|
assert_eq!(depth, 2);
|
||||||
assert_eq!(node_id, 1);
|
assert_eq!(node_id, 1);
|
||||||
check_is_internal_node(
|
check_is_internal_node(
|
||||||
node.node,
|
node.n,
|
||||||
vec![("c", 3), ("g", 7), ("m", 13)],
|
vec![("c", 3), ("g", 7), ("m", 13)],
|
||||||
vec![0, 9, 2, 3],
|
vec![0, 9, 2, 3],
|
||||||
);
|
);
|
||||||
|
@ -1046,42 +1061,42 @@ mod tests {
|
||||||
2 => {
|
2 => {
|
||||||
assert_eq!(depth, 2);
|
assert_eq!(depth, 2);
|
||||||
assert_eq!(node_id, 8);
|
assert_eq!(node_id, 8);
|
||||||
check_is_internal_node(node.node, vec![("t", 20), ("x", 24)], vec![4, 6, 5]);
|
check_is_internal_node(node.n, vec![("t", 20), ("x", 24)], vec![4, 6, 5]);
|
||||||
}
|
}
|
||||||
3 => {
|
3 => {
|
||||||
assert_eq!(depth, 3);
|
assert_eq!(depth, 3);
|
||||||
assert_eq!(node_id, 0);
|
assert_eq!(node_id, 0);
|
||||||
check_is_leaf_node(node.node, vec![("a", 1), ("b", 2)]);
|
check_is_leaf_node(node.n, vec![("a", 1), ("b", 2)]);
|
||||||
}
|
}
|
||||||
4 => {
|
4 => {
|
||||||
assert_eq!(depth, 3);
|
assert_eq!(depth, 3);
|
||||||
assert_eq!(node_id, 9);
|
assert_eq!(node_id, 9);
|
||||||
check_is_leaf_node(node.node, vec![("d", 4), ("e", 5), ("f", 6)]);
|
check_is_leaf_node(node.n, vec![("d", 4), ("e", 5), ("f", 6)]);
|
||||||
}
|
}
|
||||||
5 => {
|
5 => {
|
||||||
assert_eq!(depth, 3);
|
assert_eq!(depth, 3);
|
||||||
assert_eq!(node_id, 2);
|
assert_eq!(node_id, 2);
|
||||||
check_is_leaf_node(node.node, vec![("j", 10), ("k", 11), ("l", 12)]);
|
check_is_leaf_node(node.n, vec![("j", 10), ("k", 11), ("l", 12)]);
|
||||||
}
|
}
|
||||||
6 => {
|
6 => {
|
||||||
assert_eq!(depth, 3);
|
assert_eq!(depth, 3);
|
||||||
assert_eq!(node_id, 3);
|
assert_eq!(node_id, 3);
|
||||||
check_is_leaf_node(node.node, vec![("n", 14), ("o", 15)]);
|
check_is_leaf_node(node.n, vec![("n", 14), ("o", 15)]);
|
||||||
}
|
}
|
||||||
7 => {
|
7 => {
|
||||||
assert_eq!(depth, 3);
|
assert_eq!(depth, 3);
|
||||||
assert_eq!(node_id, 4);
|
assert_eq!(node_id, 4);
|
||||||
check_is_leaf_node(node.node, vec![("q", 17), ("r", 18), ("s", 19)]);
|
check_is_leaf_node(node.n, vec![("q", 17), ("r", 18), ("s", 19)]);
|
||||||
}
|
}
|
||||||
8 => {
|
8 => {
|
||||||
assert_eq!(depth, 3);
|
assert_eq!(depth, 3);
|
||||||
assert_eq!(node_id, 6);
|
assert_eq!(node_id, 6);
|
||||||
check_is_leaf_node(node.node, vec![("u", 21), ("v", 22)]);
|
check_is_leaf_node(node.n, vec![("u", 21), ("v", 22)]);
|
||||||
}
|
}
|
||||||
9 => {
|
9 => {
|
||||||
assert_eq!(depth, 3);
|
assert_eq!(depth, 3);
|
||||||
assert_eq!(node_id, 5);
|
assert_eq!(node_id, 5);
|
||||||
check_is_leaf_node(node.node, vec![("y", 25), ("z", 26)]);
|
check_is_leaf_node(node.n, vec![("y", 25), ("z", 26)]);
|
||||||
}
|
}
|
||||||
_ => panic!("This node should not exist {}", count),
|
_ => panic!("This node should not exist {}", count),
|
||||||
})
|
})
|
||||||
|
@ -1135,13 +1150,13 @@ mod tests {
|
||||||
let nodes_count = t
|
let nodes_count = t
|
||||||
.inspect_nodes(&mut tx, |count, depth, node_id, node| {
|
.inspect_nodes(&mut tx, |count, depth, node_id, node| {
|
||||||
debug!("{} -> {}", depth, node_id);
|
debug!("{} -> {}", depth, node_id);
|
||||||
node.node.debug(|k| Ok(String::from_utf8(k)?)).unwrap();
|
node.n.debug(|k| Ok(String::from_utf8(k)?)).unwrap();
|
||||||
match count {
|
match count {
|
||||||
0 => {
|
0 => {
|
||||||
assert_eq!(depth, 1);
|
assert_eq!(depth, 1);
|
||||||
assert_eq!(node_id, 1);
|
assert_eq!(node_id, 1);
|
||||||
check_is_internal_node(
|
check_is_internal_node(
|
||||||
node.node,
|
node.n,
|
||||||
vec![("e", 5), ("l", 12), ("p", 16), ("t", 20), ("x", 24)],
|
vec![("e", 5), ("l", 12), ("p", 16), ("t", 20), ("x", 24)],
|
||||||
vec![0, 9, 3, 4, 6, 5],
|
vec![0, 9, 3, 4, 6, 5],
|
||||||
);
|
);
|
||||||
|
@ -1149,32 +1164,32 @@ mod tests {
|
||||||
1 => {
|
1 => {
|
||||||
assert_eq!(depth, 2);
|
assert_eq!(depth, 2);
|
||||||
assert_eq!(node_id, 0);
|
assert_eq!(node_id, 0);
|
||||||
check_is_leaf_node(node.node, vec![("a", 1), ("c", 3)]);
|
check_is_leaf_node(node.n, vec![("a", 1), ("c", 3)]);
|
||||||
}
|
}
|
||||||
2 => {
|
2 => {
|
||||||
assert_eq!(depth, 2);
|
assert_eq!(depth, 2);
|
||||||
assert_eq!(node_id, 9);
|
assert_eq!(node_id, 9);
|
||||||
check_is_leaf_node(node.node, vec![("j", 10), ("k", 11)]);
|
check_is_leaf_node(node.n, vec![("j", 10), ("k", 11)]);
|
||||||
}
|
}
|
||||||
3 => {
|
3 => {
|
||||||
assert_eq!(depth, 2);
|
assert_eq!(depth, 2);
|
||||||
assert_eq!(node_id, 3);
|
assert_eq!(node_id, 3);
|
||||||
check_is_leaf_node(node.node, vec![("n", 14), ("o", 15)]);
|
check_is_leaf_node(node.n, vec![("n", 14), ("o", 15)]);
|
||||||
}
|
}
|
||||||
4 => {
|
4 => {
|
||||||
assert_eq!(depth, 2);
|
assert_eq!(depth, 2);
|
||||||
assert_eq!(node_id, 4);
|
assert_eq!(node_id, 4);
|
||||||
check_is_leaf_node(node.node, vec![("q", 17), ("r", 18), ("s", 19)]);
|
check_is_leaf_node(node.n, vec![("q", 17), ("r", 18), ("s", 19)]);
|
||||||
}
|
}
|
||||||
5 => {
|
5 => {
|
||||||
assert_eq!(depth, 2);
|
assert_eq!(depth, 2);
|
||||||
assert_eq!(node_id, 6);
|
assert_eq!(node_id, 6);
|
||||||
check_is_leaf_node(node.node, vec![("u", 21), ("v", 22)]);
|
check_is_leaf_node(node.n, vec![("u", 21), ("v", 22)]);
|
||||||
}
|
}
|
||||||
6 => {
|
6 => {
|
||||||
assert_eq!(depth, 2);
|
assert_eq!(depth, 2);
|
||||||
assert_eq!(node_id, 5);
|
assert_eq!(node_id, 5);
|
||||||
check_is_leaf_node(node.node, vec![("y", 25), ("z", 26)]);
|
check_is_leaf_node(node.n, vec![("y", 25), ("z", 26)]);
|
||||||
}
|
}
|
||||||
_ => panic!("This node should not exist {}", count),
|
_ => panic!("This node should not exist {}", count),
|
||||||
}
|
}
|
||||||
|
@ -1316,7 +1331,7 @@ mod tests {
|
||||||
debug!("----------------------------------");
|
debug!("----------------------------------");
|
||||||
t.inspect_nodes(tx, |_count, depth, node_id, node| {
|
t.inspect_nodes(tx, |_count, depth, node_id, node| {
|
||||||
debug!("{} -> {}", depth, node_id);
|
debug!("{} -> {}", depth, node_id);
|
||||||
node.node.debug(|k| Ok(String::from_utf8(k)?)).unwrap();
|
node.n.debug(|k| Ok(String::from_utf8(k)?)).unwrap();
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
@ -1359,7 +1374,7 @@ mod tests {
|
||||||
let mut s = TreeNodeStore::Traversal(TreeNodeProvider::Debug);
|
let mut s = TreeNodeStore::Traversal(TreeNodeProvider::Debug);
|
||||||
while let Some((node_id, depth)) = node_queue.pop_front() {
|
while let Some((node_id, depth)) = node_queue.pop_front() {
|
||||||
let stored_node = s.get_node(tx, node_id).await?;
|
let stored_node = s.get_node(tx, node_id).await?;
|
||||||
if let BTreeNode::Internal(_, children) = &stored_node.node {
|
if let BTreeNode::Internal(_, children) = &stored_node.n {
|
||||||
let depth = depth + 1;
|
let depth = depth + 1;
|
||||||
for child_id in children {
|
for child_id in children {
|
||||||
node_queue.push_back((*child_id, depth));
|
node_queue.push_back((*child_id, depth));
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
pub mod bkeys;
|
pub mod bkeys;
|
||||||
pub mod btree;
|
pub mod btree;
|
||||||
|
pub mod mtree;
|
||||||
pub mod store;
|
pub mod store;
|
||||||
|
|
1792
lib/src/idx/trees/mtree.rs
Normal file
1792
lib/src/idx/trees/mtree.rs
Normal file
File diff suppressed because it is too large
Load diff
|
@ -9,7 +9,7 @@ use tokio::sync::Mutex;
|
||||||
|
|
||||||
pub type NodeId = u64;
|
pub type NodeId = u64;
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy, PartialEq)]
|
||||||
pub enum TreeStoreType {
|
pub enum TreeStoreType {
|
||||||
Write,
|
Write,
|
||||||
Read,
|
Read,
|
||||||
|
@ -151,7 +151,7 @@ where
|
||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
self.out.insert(id);
|
self.out.insert(id);
|
||||||
StoredNode {
|
StoredNode {
|
||||||
node,
|
n: node,
|
||||||
id,
|
id,
|
||||||
key: self.np.get_key(id),
|
key: self.np.get_key(id),
|
||||||
size: 0,
|
size: 0,
|
||||||
|
@ -238,6 +238,7 @@ pub enum TreeNodeProvider {
|
||||||
DocLengths(IndexKeyBase),
|
DocLengths(IndexKeyBase),
|
||||||
Postings(IndexKeyBase),
|
Postings(IndexKeyBase),
|
||||||
Terms(IndexKeyBase),
|
Terms(IndexKeyBase),
|
||||||
|
Vector(IndexKeyBase),
|
||||||
Debug,
|
Debug,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -248,6 +249,7 @@ impl TreeNodeProvider {
|
||||||
TreeNodeProvider::DocLengths(ikb) => ikb.new_bl_key(Some(node_id)),
|
TreeNodeProvider::DocLengths(ikb) => ikb.new_bl_key(Some(node_id)),
|
||||||
TreeNodeProvider::Postings(ikb) => ikb.new_bp_key(Some(node_id)),
|
TreeNodeProvider::Postings(ikb) => ikb.new_bp_key(Some(node_id)),
|
||||||
TreeNodeProvider::Terms(ikb) => ikb.new_bt_key(Some(node_id)),
|
TreeNodeProvider::Terms(ikb) => ikb.new_bt_key(Some(node_id)),
|
||||||
|
TreeNodeProvider::Vector(ikb) => ikb.new_vm_key(Some(node_id)),
|
||||||
TreeNodeProvider::Debug => node_id.to_be_bytes().to_vec(),
|
TreeNodeProvider::Debug => node_id.to_be_bytes().to_vec(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -261,7 +263,7 @@ impl TreeNodeProvider {
|
||||||
let size = val.len() as u32;
|
let size = val.len() as u32;
|
||||||
let node = N::try_from_val(val)?;
|
let node = N::try_from_val(val)?;
|
||||||
Ok(StoredNode {
|
Ok(StoredNode {
|
||||||
node,
|
n: node,
|
||||||
id,
|
id,
|
||||||
key,
|
key,
|
||||||
size,
|
size,
|
||||||
|
@ -275,19 +277,30 @@ impl TreeNodeProvider {
|
||||||
where
|
where
|
||||||
N: TreeNode,
|
N: TreeNode,
|
||||||
{
|
{
|
||||||
let val = node.node.try_into_val()?;
|
let val = node.n.try_into_val()?;
|
||||||
tx.set(node.key, val).await?;
|
tx.set(node.key, val).await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) struct StoredNode<N> {
|
pub(super) struct StoredNode<N> {
|
||||||
pub(super) node: N,
|
pub(super) n: N,
|
||||||
pub(super) id: NodeId,
|
pub(super) id: NodeId,
|
||||||
pub(super) key: Key,
|
pub(super) key: Key,
|
||||||
pub(super) size: u32,
|
pub(super) size: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<N> StoredNode<N> {
|
||||||
|
pub(super) fn new(n: N, id: NodeId, key: Key, size: u32) -> Self {
|
||||||
|
Self {
|
||||||
|
n,
|
||||||
|
id,
|
||||||
|
key,
|
||||||
|
size,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub trait TreeNode
|
pub trait TreeNode
|
||||||
where
|
where
|
||||||
Self: Sized,
|
Self: Sized,
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
//! Stores Term/Doc frequency
|
//! Stores Term/Doc frequency
|
||||||
use crate::idx::ft::docids::DocId;
|
use crate::idx::docids::DocId;
|
||||||
use crate::idx::ft::terms::TermId;
|
use crate::idx::ft::terms::TermId;
|
||||||
use derive::Key;
|
use derive::Key;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
//! Stores the term list for doc_ids
|
//! Stores the term list for doc_ids
|
||||||
use crate::idx::ft::docids::DocId;
|
use crate::idx::docids::DocId;
|
||||||
use derive::Key;
|
use derive::Key;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
//! Stores the offsets
|
//! Stores the offsets
|
||||||
use crate::idx::ft::docids::DocId;
|
use crate::idx::docids::DocId;
|
||||||
use crate::idx::ft::terms::TermId;
|
use crate::idx::ft::terms::TermId;
|
||||||
use derive::Key;
|
use derive::Key;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
|
@ -11,6 +11,7 @@ pub mod bp;
|
||||||
pub mod bs;
|
pub mod bs;
|
||||||
pub mod bt;
|
pub mod bt;
|
||||||
pub mod bu;
|
pub mod bu;
|
||||||
|
pub mod vm;
|
||||||
|
|
||||||
use crate::sql::array::Array;
|
use crate::sql::array::Array;
|
||||||
use crate::sql::id::Id;
|
use crate::sql::id::Id;
|
||||||
|
|
68
lib/src/key/index/vm.rs
Normal file
68
lib/src/key/index/vm.rs
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
//! Stores MTree state and nodes
|
||||||
|
use crate::idx::trees::store::NodeId;
|
||||||
|
use derive::Key;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Key)]
|
||||||
|
pub struct Vm<'a> {
|
||||||
|
__: u8,
|
||||||
|
_a: u8,
|
||||||
|
pub ns: &'a str,
|
||||||
|
_b: u8,
|
||||||
|
pub db: &'a str,
|
||||||
|
_c: u8,
|
||||||
|
pub tb: &'a str,
|
||||||
|
_d: u8,
|
||||||
|
pub ix: &'a str,
|
||||||
|
_e: u8,
|
||||||
|
_f: u8,
|
||||||
|
_g: u8,
|
||||||
|
pub node_id: Option<NodeId>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Vm<'a> {
|
||||||
|
pub fn new(
|
||||||
|
ns: &'a str,
|
||||||
|
db: &'a str,
|
||||||
|
tb: &'a str,
|
||||||
|
ix: &'a str,
|
||||||
|
node_id: Option<NodeId>,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
__: b'/',
|
||||||
|
_a: b'*',
|
||||||
|
ns,
|
||||||
|
_b: b'*',
|
||||||
|
db,
|
||||||
|
_c: b'*',
|
||||||
|
tb,
|
||||||
|
_d: b'+',
|
||||||
|
ix,
|
||||||
|
_e: b'!',
|
||||||
|
_f: b'v',
|
||||||
|
_g: b'm',
|
||||||
|
node_id,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
#[test]
|
||||||
|
fn key() {
|
||||||
|
use super::*;
|
||||||
|
#[rustfmt::skip]
|
||||||
|
let val = Vm::new(
|
||||||
|
"testns",
|
||||||
|
"testdb",
|
||||||
|
"testtb",
|
||||||
|
"testix",
|
||||||
|
Some(8)
|
||||||
|
);
|
||||||
|
let enc = Vm::encode(&val).unwrap();
|
||||||
|
assert_eq!(enc, b"/*testns\0*testdb\0*testtb\0+testix\0!vm\x01\0\0\0\0\0\0\0\x08");
|
||||||
|
|
||||||
|
let dec = Vm::decode(&enc).unwrap();
|
||||||
|
assert_eq!(val, dec);
|
||||||
|
}
|
||||||
|
}
|
|
@ -316,6 +316,9 @@ impl Datastore {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Setup the initial credentials
|
/// Setup the initial credentials
|
||||||
|
/// Trigger the `unreachable definition` compilation error, probably due to this issue:
|
||||||
|
/// https://github.com/rust-lang/rust/issues/111370
|
||||||
|
#[allow(unreachable_code, unused_variables)]
|
||||||
pub async fn setup_initial_creds(&self, creds: Root<'_>) -> Result<(), Error> {
|
pub async fn setup_initial_creds(&self, creds: Root<'_>) -> Result<(), Error> {
|
||||||
// Start a new writeable transaction
|
// Start a new writeable transaction
|
||||||
let txn = self.transaction(true, false).await?.rollback_with_panic().enclose();
|
let txn = self.transaction(true, false).await?.rollback_with_panic().enclose();
|
||||||
|
|
|
@ -191,6 +191,7 @@ impl Expression {
|
||||||
Operator::Outside => fnc::operate::outside(&l, &r),
|
Operator::Outside => fnc::operate::outside(&l, &r),
|
||||||
Operator::Intersects => fnc::operate::intersects(&l, &r),
|
Operator::Intersects => fnc::operate::intersects(&l, &r),
|
||||||
Operator::Matches(_) => fnc::operate::matches(ctx, txn, doc, self).await,
|
Operator::Matches(_) => fnc::operate::matches(ctx, txn, doc, self).await,
|
||||||
|
Operator::Knn(_) => fnc::operate::knn(ctx, txn, doc, self).await,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,7 +49,7 @@ pub struct MTreeParams {
|
||||||
pub doc_ids_order: u32,
|
pub doc_ids_order: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
#[derive(Clone, Default, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
|
||||||
#[revisioned(revision = 1)]
|
#[revisioned(revision = 1)]
|
||||||
pub enum Distance {
|
pub enum Distance {
|
||||||
#[default]
|
#[default]
|
||||||
|
@ -182,6 +182,7 @@ pub fn search(i: &str) -> IResult<&str, Index> {
|
||||||
pub fn distance(i: &str) -> IResult<&str, Distance> {
|
pub fn distance(i: &str) -> IResult<&str, Distance> {
|
||||||
let (i, _) = mightbespace(i)?;
|
let (i, _) = mightbespace(i)?;
|
||||||
let (i, _) = tag_no_case("DIST")(i)?;
|
let (i, _) = tag_no_case("DIST")(i)?;
|
||||||
|
let (i, _) = shouldbespace(i)?;
|
||||||
alt((
|
alt((
|
||||||
map(tag_no_case("EUCLIDEAN"), |_| Distance::Euclidean),
|
map(tag_no_case("EUCLIDEAN"), |_| Distance::Euclidean),
|
||||||
map(tag_no_case("MANHATTAN"), |_| Distance::Manhattan),
|
map(tag_no_case("MANHATTAN"), |_| Distance::Manhattan),
|
||||||
|
@ -200,7 +201,7 @@ pub fn minkowski(i: &str) -> IResult<&str, Distance> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn dimension(i: &str) -> IResult<&str, u16> {
|
pub fn dimension(i: &str) -> IResult<&str, u16> {
|
||||||
let (i, _) = shouldbespace(i)?;
|
let (i, _) = mightbespace(i)?;
|
||||||
let (i, _) = tag_no_case("DIMENSION")(i)?;
|
let (i, _) = tag_no_case("DIMENSION")(i)?;
|
||||||
let (i, _) = shouldbespace(i)?;
|
let (i, _) = shouldbespace(i)?;
|
||||||
let (i, dim) = uint16(i)?;
|
let (i, dim) = uint16(i)?;
|
||||||
|
|
|
@ -6,6 +6,7 @@ use nom::branch::alt;
|
||||||
use nom::bytes::complete::tag;
|
use nom::bytes::complete::tag;
|
||||||
use nom::bytes::complete::tag_no_case;
|
use nom::bytes::complete::tag_no_case;
|
||||||
use nom::character::complete::char;
|
use nom::character::complete::char;
|
||||||
|
use nom::character::complete::u32 as uint32;
|
||||||
use nom::character::complete::u8 as uint8;
|
use nom::character::complete::u8 as uint8;
|
||||||
use nom::combinator::cut;
|
use nom::combinator::cut;
|
||||||
use nom::combinator::opt;
|
use nom::combinator::opt;
|
||||||
|
@ -67,6 +68,8 @@ pub enum Operator {
|
||||||
//
|
//
|
||||||
Outside,
|
Outside,
|
||||||
Intersects,
|
Intersects,
|
||||||
|
//
|
||||||
|
Knn(u32), // <{k}>
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Operator {
|
impl Default for Operator {
|
||||||
|
@ -141,6 +144,7 @@ impl fmt::Display for Operator {
|
||||||
f.write_str("@@")
|
f.write_str("@@")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Self::Knn(k) => write!(f, "<{}>", k),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -191,12 +195,14 @@ pub fn binary_symbols(i: &str) -> IResult<&str, Operator> {
|
||||||
value(Operator::AnyLike, tag("?~")),
|
value(Operator::AnyLike, tag("?~")),
|
||||||
value(Operator::Like, char('~')),
|
value(Operator::Like, char('~')),
|
||||||
matches,
|
matches,
|
||||||
|
knn,
|
||||||
)),
|
)),
|
||||||
alt((
|
alt((
|
||||||
value(Operator::LessThanOrEqual, tag("<=")),
|
value(Operator::LessThanOrEqual, tag("<=")),
|
||||||
value(Operator::LessThan, char('<')),
|
value(Operator::LessThan, char('<')),
|
||||||
value(Operator::MoreThanOrEqual, tag(">=")),
|
value(Operator::MoreThanOrEqual, tag(">=")),
|
||||||
value(Operator::MoreThan, char('>')),
|
value(Operator::MoreThan, char('>')),
|
||||||
|
knn,
|
||||||
)),
|
)),
|
||||||
alt((
|
alt((
|
||||||
value(Operator::Pow, tag("**")),
|
value(Operator::Pow, tag("**")),
|
||||||
|
@ -257,7 +263,6 @@ pub fn binary_phrases(i: &str) -> IResult<&str, Operator> {
|
||||||
|
|
||||||
pub fn matches(i: &str) -> IResult<&str, Operator> {
|
pub fn matches(i: &str) -> IResult<&str, Operator> {
|
||||||
let (i, _) = char('@')(i)?;
|
let (i, _) = char('@')(i)?;
|
||||||
// let (i, reference) = opt(|i| uint8(i))(i)?;
|
|
||||||
cut(|i| {
|
cut(|i| {
|
||||||
let (i, reference) = opt(uint8)(i)?;
|
let (i, reference) = opt(uint8)(i)?;
|
||||||
let (i, _) = char('@')(i)?;
|
let (i, _) = char('@')(i)?;
|
||||||
|
@ -265,6 +270,13 @@ pub fn matches(i: &str) -> IResult<&str, Operator> {
|
||||||
})(i)
|
})(i)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn knn(i: &str) -> IResult<&str, Operator> {
|
||||||
|
let (i, _) = char('<')(i)?;
|
||||||
|
let (i, k) = uint32(i)?;
|
||||||
|
let (i, _) = char('>')(i)?;
|
||||||
|
Ok((i, Operator::Knn(k)))
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -290,4 +302,13 @@ mod tests {
|
||||||
let res = matches("@256@");
|
let res = matches("@256@");
|
||||||
res.unwrap_err();
|
res.unwrap_err();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_knn() {
|
||||||
|
let res = knn("<5>");
|
||||||
|
assert!(res.is_ok());
|
||||||
|
let out = res.unwrap().1;
|
||||||
|
assert_eq!("<5>", format!("{}", out));
|
||||||
|
assert_eq!(out, Operator::Knn(5));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,6 +5,7 @@ use crate::doc::CursorDoc;
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::iam::{Action, ResourceKind};
|
use crate::iam::{Action, ResourceKind};
|
||||||
use crate::idx::ft::FtIndex;
|
use crate::idx::ft::FtIndex;
|
||||||
|
use crate::idx::trees::mtree::MTreeIndex;
|
||||||
use crate::idx::trees::store::TreeStoreType;
|
use crate::idx::trees::store::TreeStoreType;
|
||||||
use crate::idx::IndexKeyBase;
|
use crate::idx::IndexKeyBase;
|
||||||
use crate::sql::comment::shouldbespace;
|
use crate::sql::comment::shouldbespace;
|
||||||
|
@ -56,6 +57,11 @@ impl AnalyzeStatement {
|
||||||
FtIndex::new(&mut run, az, ikb, p, TreeStoreType::Traversal).await?;
|
FtIndex::new(&mut run, az, ikb, p, TreeStoreType::Traversal).await?;
|
||||||
ft.statistics(&mut run).await?.into()
|
ft.statistics(&mut run).await?.into()
|
||||||
}
|
}
|
||||||
|
Index::MTree(p) => {
|
||||||
|
let mt =
|
||||||
|
MTreeIndex::new(&mut run, ikb, p, TreeStoreType::Traversal).await?;
|
||||||
|
mt.statistics(&mut run).await?.into()
|
||||||
|
}
|
||||||
_ => {
|
_ => {
|
||||||
return Err(Error::FeatureNotYetImplemented {
|
return Err(Error::FeatureNotYetImplemented {
|
||||||
feature: "Statistics on unique and non-unique indexes.".to_string(),
|
feature: "Statistics on unique and non-unique indexes.".to_string(),
|
||||||
|
|
|
@ -178,7 +178,7 @@ fn index_comment(i: &str) -> IResult<&str, DefineIndexOption> {
|
||||||
mod tests {
|
mod tests {
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::sql::index::SearchParams;
|
use crate::sql::index::{Distance, MTreeParams, SearchParams};
|
||||||
use crate::sql::Ident;
|
use crate::sql::Ident;
|
||||||
use crate::sql::Idiom;
|
use crate::sql::Idiom;
|
||||||
use crate::sql::Idioms;
|
use crate::sql::Idioms;
|
||||||
|
@ -275,4 +275,29 @@ mod tests {
|
||||||
"DEFINE INDEX my_index ON my_table FIELDS my_col SEARCH ANALYZER my_analyzer VS DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100"
|
"DEFINE INDEX my_index ON my_table FIELDS my_col SEARCH ANALYZER my_analyzer VS DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn check_create_mtree_index() {
|
||||||
|
let sql = "INDEX my_index ON TABLE my_table COLUMNS my_col MTREE DIMENSION 4";
|
||||||
|
let (_, idx) = index(sql).unwrap();
|
||||||
|
assert_eq!(
|
||||||
|
idx,
|
||||||
|
DefineIndexStatement {
|
||||||
|
name: Ident("my_index".to_string()),
|
||||||
|
what: Ident("my_table".to_string()),
|
||||||
|
cols: Idioms(vec![Idiom(vec![Part::Field(Ident("my_col".to_string()))])]),
|
||||||
|
index: Index::MTree(MTreeParams {
|
||||||
|
dimension: 4,
|
||||||
|
distance: Distance::Euclidean,
|
||||||
|
capacity: 40,
|
||||||
|
doc_ids_order: 100,
|
||||||
|
}),
|
||||||
|
comment: None,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
idx.to_string(),
|
||||||
|
"DEFINE INDEX my_index ON my_table FIELDS my_col MTREE DIMENSION 4 DIST EUCLIDEAN CAPACITY 40 DOC_IDS_ORDER 100"
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ use crate::sql::index::Distance;
|
||||||
use crate::sql::value::serde::ser;
|
use crate::sql::value::serde::ser;
|
||||||
use serde::ser::Error as _;
|
use serde::ser::Error as _;
|
||||||
use serde::ser::Impossible;
|
use serde::ser::Impossible;
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
pub(super) struct Serializer;
|
pub(super) struct Serializer;
|
||||||
|
|
||||||
|
@ -29,9 +30,34 @@ impl ser::Serializer for Serializer {
|
||||||
) -> Result<Self::Ok, Error> {
|
) -> Result<Self::Ok, Error> {
|
||||||
match variant {
|
match variant {
|
||||||
"Euclidean" => Ok(Distance::Euclidean),
|
"Euclidean" => Ok(Distance::Euclidean),
|
||||||
|
"Manhattan" => Ok(Distance::Manhattan),
|
||||||
|
"Cosine" => Ok(Distance::Cosine),
|
||||||
|
"Hamming" => Ok(Distance::Hamming),
|
||||||
|
"Mahalanobis" => Ok(Distance::Mahalanobis),
|
||||||
variant => Err(Error::custom(format!("unexpected unit variant `{name}::{variant}`"))),
|
variant => Err(Error::custom(format!("unexpected unit variant `{name}::{variant}`"))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn serialize_newtype_variant<T>(
|
||||||
|
self,
|
||||||
|
name: &'static str,
|
||||||
|
_variant_index: u32,
|
||||||
|
variant: &'static str,
|
||||||
|
value: &T,
|
||||||
|
) -> Result<Self::Ok, Error>
|
||||||
|
where
|
||||||
|
T: ?Sized + Serialize,
|
||||||
|
{
|
||||||
|
match variant {
|
||||||
|
"Minkowski" => {
|
||||||
|
Ok(Distance::Minkowski(value.serialize(ser::number::Serializer.wrap())?))
|
||||||
|
}
|
||||||
|
variant => {
|
||||||
|
Err(Error::custom(format!("unexpected newtype variant `{name}::{variant}`")))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -41,9 +67,44 @@ mod tests {
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn euclidean() {
|
fn distance_euclidean() {
|
||||||
let dist = Distance::Euclidean;
|
let dist = Distance::Euclidean;
|
||||||
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||||
assert_eq!(dist, serialized);
|
assert_eq!(dist, serialized);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn distance_manhattan() {
|
||||||
|
let dist = Distance::Manhattan;
|
||||||
|
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||||
|
assert_eq!(dist, serialized);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn distance_mahalanobis() {
|
||||||
|
let dist = Distance::Mahalanobis;
|
||||||
|
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||||
|
assert_eq!(dist, serialized);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn distance_hamming() {
|
||||||
|
let dist = Distance::Hamming;
|
||||||
|
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||||
|
assert_eq!(dist, serialized);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn distance_cosine() {
|
||||||
|
let dist = Distance::Cosine;
|
||||||
|
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||||
|
assert_eq!(dist, serialized);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn distance_minkowski() {
|
||||||
|
let dist = Distance::Minkowski(7.into());
|
||||||
|
let serialized = dist.serialize(Serializer.wrap()).unwrap();
|
||||||
|
assert_eq!(dist, serialized);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -366,7 +366,7 @@ async fn changefeed_with_ts() -> Result<(), Error> {
|
||||||
let Value::Object(a) = a else {
|
let Value::Object(a) = a else {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
};
|
};
|
||||||
let Value::Number(versionstamp1) = a.get("versionstamp").unwrap() else {
|
let Value::Number(versionstamp2) = a.get("versionstamp").unwrap() else {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
};
|
};
|
||||||
let changes = a.get("changes").unwrap().to_owned();
|
let changes = a.get("changes").unwrap().to_owned();
|
||||||
|
@ -389,10 +389,10 @@ async fn changefeed_with_ts() -> Result<(), Error> {
|
||||||
let Value::Object(a) = a else {
|
let Value::Object(a) = a else {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
};
|
};
|
||||||
let Value::Number(versionstamp2) = a.get("versionstamp").unwrap() else {
|
let Value::Number(versionstamp3) = a.get("versionstamp").unwrap() else {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
};
|
};
|
||||||
assert!(versionstamp1 < versionstamp2);
|
assert!(versionstamp2 < versionstamp3);
|
||||||
let changes = a.get("changes").unwrap().to_owned();
|
let changes = a.get("changes").unwrap().to_owned();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
changes,
|
changes,
|
||||||
|
@ -413,10 +413,10 @@ async fn changefeed_with_ts() -> Result<(), Error> {
|
||||||
let Value::Object(a) = a else {
|
let Value::Object(a) = a else {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
};
|
};
|
||||||
let Value::Number(versionstamp3) = a.get("versionstamp").unwrap() else {
|
let Value::Number(versionstamp4) = a.get("versionstamp").unwrap() else {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
};
|
};
|
||||||
assert!(versionstamp2 < versionstamp3);
|
assert!(versionstamp3 < versionstamp4);
|
||||||
let changes = a.get("changes").unwrap().to_owned();
|
let changes = a.get("changes").unwrap().to_owned();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
changes,
|
changes,
|
||||||
|
@ -437,10 +437,10 @@ async fn changefeed_with_ts() -> Result<(), Error> {
|
||||||
let Value::Object(a) = a else {
|
let Value::Object(a) = a else {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
};
|
};
|
||||||
let Value::Number(versionstamp4) = a.get("versionstamp").unwrap() else {
|
let Value::Number(versionstamp5) = a.get("versionstamp").unwrap() else {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
};
|
};
|
||||||
assert!(versionstamp3 < versionstamp4);
|
assert!(versionstamp4 < versionstamp5);
|
||||||
let changes = a.get("changes").unwrap().to_owned();
|
let changes = a.get("changes").unwrap().to_owned();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
changes,
|
changes,
|
||||||
|
@ -487,7 +487,7 @@ async fn changefeed_with_ts() -> Result<(), Error> {
|
||||||
let Value::Number(versionstamp1b) = a.get("versionstamp").unwrap() else {
|
let Value::Number(versionstamp1b) = a.get("versionstamp").unwrap() else {
|
||||||
unreachable!()
|
unreachable!()
|
||||||
};
|
};
|
||||||
assert!(versionstamp1 == versionstamp1b);
|
assert!(versionstamp2 == versionstamp1b);
|
||||||
let changes = a.get("changes").unwrap().to_owned();
|
let changes = a.get("changes").unwrap().to_owned();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
changes,
|
changes,
|
||||||
|
|
|
@ -1211,7 +1211,9 @@ async fn define_statement_search_index() -> Result<(), Error> {
|
||||||
events: {},
|
events: {},
|
||||||
fields: {},
|
fields: {},
|
||||||
tables: {},
|
tables: {},
|
||||||
indexes: { blog_title: 'DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75) DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100 HIGHLIGHTS' },
|
indexes: { blog_title: 'DEFINE INDEX blog_title ON blog FIELDS title \
|
||||||
|
SEARCH ANALYZER simple BM25(1.2,0.75) \
|
||||||
|
DOC_IDS_ORDER 100 DOC_LENGTHS_ORDER 100 POSTINGS_ORDER 100 TERMS_ORDER 100 HIGHLIGHTS' },
|
||||||
lives: {},
|
lives: {},
|
||||||
}",
|
}",
|
||||||
);
|
);
|
||||||
|
|
60
lib/tests/vector.rs
Normal file
60
lib/tests/vector.rs
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
mod helpers;
|
||||||
|
mod parse;
|
||||||
|
use crate::helpers::new_ds;
|
||||||
|
use parse::Parse;
|
||||||
|
use surrealdb::dbs::Session;
|
||||||
|
use surrealdb::err::Error;
|
||||||
|
use surrealdb::sql::Value;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn select_where_mtree_knn() -> Result<(), Error> {
|
||||||
|
let sql = r"
|
||||||
|
CREATE pts:1 SET point = [1,2,3,4];
|
||||||
|
CREATE pts:2 SET point = [4,5,6,7];
|
||||||
|
CREATE pts:3 SET point = [8,9,10,11];
|
||||||
|
DEFINE INDEX mt_pts ON pts FIELDS point MTREE DIMENSION 4;
|
||||||
|
LET $pt = [2,3,4,5];
|
||||||
|
SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point <2> $pt;
|
||||||
|
SELECT id FROM pts WHERE point <2> $pt EXPLAIN;
|
||||||
|
";
|
||||||
|
let dbs = new_ds().await?;
|
||||||
|
let ses = Session::owner().with_ns("test").with_db("test");
|
||||||
|
let res = &mut dbs.execute(sql, &ses, None).await?;
|
||||||
|
assert_eq!(res.len(), 7);
|
||||||
|
//
|
||||||
|
for _ in 0..5 {
|
||||||
|
let _ = res.remove(0).result?;
|
||||||
|
}
|
||||||
|
let tmp = res.remove(0).result?;
|
||||||
|
let val = Value::parse(
|
||||||
|
"[
|
||||||
|
{
|
||||||
|
id: pts:1,
|
||||||
|
dist: 2f
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: pts:2,
|
||||||
|
dist: 4f
|
||||||
|
}
|
||||||
|
]",
|
||||||
|
);
|
||||||
|
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||||
|
let tmp = res.remove(0).result?;
|
||||||
|
let val = Value::parse(
|
||||||
|
"[
|
||||||
|
{
|
||||||
|
detail: {
|
||||||
|
plan: {
|
||||||
|
index: 'mt_pts',
|
||||||
|
operator: '<2>',
|
||||||
|
value: [2,3,4,5]
|
||||||
|
},
|
||||||
|
table: 'pts',
|
||||||
|
},
|
||||||
|
operation: 'Iterate Index'
|
||||||
|
}
|
||||||
|
]",
|
||||||
|
);
|
||||||
|
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||||
|
Ok(())
|
||||||
|
}
|
Loading…
Reference in a new issue