[Feat] KNN filtering with limit and KNN distance function (#4036)
This commit is contained in:
parent
23653e5fce
commit
7495611bc4
47 changed files with 2889 additions and 1103 deletions
|
@ -9,8 +9,7 @@ use crate::dbs::Statement;
|
|||
use crate::dbs::{Options, Transaction};
|
||||
use crate::doc::Document;
|
||||
use crate::err::Error;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::planner::executor::IteratorRef;
|
||||
use crate::idx::planner::iterators::{IteratorRecord, IteratorRef};
|
||||
use crate::idx::planner::IterationStage;
|
||||
use crate::sql::edges::Edges;
|
||||
use crate::sql::range::Range;
|
||||
|
@ -34,9 +33,8 @@ pub(crate) enum Iterable {
|
|||
}
|
||||
|
||||
pub(crate) struct Processed {
|
||||
pub(crate) ir: Option<IteratorRef>,
|
||||
pub(crate) rid: Option<Thing>,
|
||||
pub(crate) doc_id: Option<DocId>,
|
||||
pub(crate) ir: Option<IteratorRecord>,
|
||||
pub(crate) val: Operable,
|
||||
}
|
||||
|
||||
|
|
|
@ -5,9 +5,10 @@ use crate::dbs::distinct::AsyncDistinct;
|
|||
use crate::dbs::distinct::SyncDistinct;
|
||||
use crate::dbs::{Iterable, Iterator, Operable, Options, Processed, Statement, Transaction};
|
||||
use crate::err::Error;
|
||||
use crate::idx::planner::executor::IteratorRef;
|
||||
use crate::idx::planner::iterators::{CollectorRecord, IteratorRef, ThingIterator};
|
||||
use crate::idx::planner::IterationStage;
|
||||
use crate::key::{graph, thing};
|
||||
use crate::kvs;
|
||||
use crate::kvs::ScanPage;
|
||||
use crate::sql::dir::Dir;
|
||||
use crate::sql::{Edges, Range, Table, Thing, Value};
|
||||
|
@ -15,6 +16,7 @@ use crate::sql::{Edges, Range, Table, Thing, Value};
|
|||
use channel::Sender;
|
||||
use reblessive::tree::Stk;
|
||||
use std::ops::Bound;
|
||||
use std::vec;
|
||||
|
||||
impl Iterable {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
|
@ -60,7 +62,7 @@ impl Iterable {
|
|||
if let Some(IterationStage::BuildKnn) = ctx.get_iteration_stage() {
|
||||
if let Some(qp) = ctx.get_query_planner() {
|
||||
if let Some(exe) = qp.get_query_executor(tb) {
|
||||
return exe.has_knn();
|
||||
return exe.has_bruteforce_knn();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -71,7 +73,7 @@ impl Iterable {
|
|||
}
|
||||
}
|
||||
|
||||
enum Processor<'a> {
|
||||
pub(crate) enum Processor<'a> {
|
||||
Iterator(Option<&'a mut SyncDistinct>, &'a mut Iterator),
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
Channel(Option<AsyncDistinct>, Sender<Processed>),
|
||||
|
@ -141,17 +143,17 @@ impl<'a> Processor<'a> {
|
|||
}
|
||||
Iterable::Range(v) => self.process_range(stk, ctx, opt, txn, stm, v).await?,
|
||||
Iterable::Edges(e) => self.process_edge(stk, ctx, opt, txn, stm, e).await?,
|
||||
Iterable::Index(t, ir) => {
|
||||
Iterable::Index(t, irf) => {
|
||||
if let Some(qp) = ctx.get_query_planner() {
|
||||
if let Some(exe) = qp.get_query_executor(&t.0) {
|
||||
// We set the query executor matching the current table in the Context
|
||||
// Avoiding search in the hashmap of the query planner for each doc
|
||||
let mut ctx = Context::new(ctx);
|
||||
ctx.set_query_executor(exe.clone());
|
||||
return self.process_index(stk, &ctx, opt, txn, stm, &t, ir).await;
|
||||
return self.process_index(stk, &ctx, opt, txn, stm, &t, irf).await;
|
||||
}
|
||||
}
|
||||
self.process_index(stk, ctx, opt, txn, stm, &t, ir).await?
|
||||
self.process_index(stk, ctx, opt, txn, stm, &t, irf).await?
|
||||
}
|
||||
Iterable::Mergeable(v, o) => {
|
||||
self.process_mergeable(stk, ctx, opt, txn, stm, v, o).await?
|
||||
|
@ -175,9 +177,8 @@ impl<'a> Processor<'a> {
|
|||
) -> Result<(), Error> {
|
||||
// Pass the value through
|
||||
let pro = Processed {
|
||||
ir: None,
|
||||
rid: None,
|
||||
doc_id: None,
|
||||
ir: None,
|
||||
val: Operable::Value(v),
|
||||
};
|
||||
// Process the document record
|
||||
|
@ -205,9 +206,8 @@ impl<'a> Processor<'a> {
|
|||
});
|
||||
// Process the document record
|
||||
let pro = Processed {
|
||||
ir: None,
|
||||
rid: Some(v),
|
||||
doc_id: None,
|
||||
ir: None,
|
||||
val,
|
||||
};
|
||||
self.process(stk, ctx, opt, txn, stm, pro).await?;
|
||||
|
@ -228,9 +228,8 @@ impl<'a> Processor<'a> {
|
|||
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &v.tb, opt.strict).await?;
|
||||
// Process the document record
|
||||
let pro = Processed {
|
||||
ir: None,
|
||||
rid: Some(v),
|
||||
doc_id: None,
|
||||
ir: None,
|
||||
val: Operable::Value(Value::None),
|
||||
};
|
||||
self.process(stk, ctx, opt, txn, stm, pro).await?;
|
||||
|
@ -263,9 +262,8 @@ impl<'a> Processor<'a> {
|
|||
let val = Operable::Mergeable(x, o);
|
||||
// Process the document record
|
||||
let pro = Processed {
|
||||
ir: None,
|
||||
rid: Some(v),
|
||||
doc_id: None,
|
||||
ir: None,
|
||||
val,
|
||||
};
|
||||
self.process(stk, ctx, opt, txn, stm, pro).await?;
|
||||
|
@ -299,9 +297,8 @@ impl<'a> Processor<'a> {
|
|||
let val = Operable::Relatable(f, x, w);
|
||||
// Process the document record
|
||||
let pro = Processed {
|
||||
ir: None,
|
||||
rid: Some(v),
|
||||
doc_id: None,
|
||||
ir: None,
|
||||
val,
|
||||
};
|
||||
self.process(stk, ctx, opt, txn, stm, pro).await?;
|
||||
|
@ -352,9 +349,8 @@ impl<'a> Processor<'a> {
|
|||
let val = Operable::Value(val);
|
||||
// Process the record
|
||||
let pro = Processed {
|
||||
ir: None,
|
||||
rid: Some(rid),
|
||||
doc_id: None,
|
||||
ir: None,
|
||||
val,
|
||||
};
|
||||
self.process(stk, ctx, opt, txn, stm, pro).await?;
|
||||
|
@ -425,9 +421,8 @@ impl<'a> Processor<'a> {
|
|||
let val = Operable::Value(val);
|
||||
// Process the record
|
||||
let pro = Processed {
|
||||
ir: None,
|
||||
rid: Some(rid),
|
||||
doc_id: None,
|
||||
ir: None,
|
||||
val,
|
||||
};
|
||||
self.process(stk, ctx, opt, txn, stm, pro).await?;
|
||||
|
@ -551,9 +546,8 @@ impl<'a> Processor<'a> {
|
|||
});
|
||||
// Process the record
|
||||
let pro = Processed {
|
||||
ir: None,
|
||||
rid: Some(rid),
|
||||
doc_id: None,
|
||||
ir: None,
|
||||
val,
|
||||
};
|
||||
self.process(stk, ctx, opt, txn, stm, pro).await?;
|
||||
|
@ -574,53 +568,27 @@ impl<'a> Processor<'a> {
|
|||
txn: &Transaction,
|
||||
stm: &Statement<'_>,
|
||||
table: &Table,
|
||||
ir: IteratorRef,
|
||||
irf: IteratorRef,
|
||||
) -> Result<(), Error> {
|
||||
// Check that the table exists
|
||||
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &table.0, opt.strict).await?;
|
||||
if let Some(exe) = ctx.get_query_executor() {
|
||||
if let Some(mut iterator) = exe.new_iterator(opt, ir).await? {
|
||||
let mut things = Vec::new();
|
||||
iterator.next_batch(txn, PROCESSOR_BATCH_SIZE, &mut things).await?;
|
||||
while !things.is_empty() {
|
||||
if let Some(mut iterator) = exe.new_iterator(opt, irf).await? {
|
||||
// Get the first batch
|
||||
let mut to_process = Self::next_batch(ctx, opt, txn, &mut iterator).await?;
|
||||
|
||||
while !to_process.is_empty() {
|
||||
// Check if the context is finished
|
||||
if ctx.is_done() {
|
||||
break;
|
||||
}
|
||||
|
||||
for (thing, doc_id) in things {
|
||||
// Check the context
|
||||
if ctx.is_done() {
|
||||
break;
|
||||
}
|
||||
|
||||
// If the record is from another table we can skip
|
||||
if !thing.tb.eq(table.as_str()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Fetch the data from the store
|
||||
let key = thing::new(opt.ns(), opt.db(), &table.0, &thing.id);
|
||||
let val = txn.lock().await.get(key.clone()).await?;
|
||||
let rid = Thing::from((key.tb, key.id));
|
||||
// Parse the data from the store
|
||||
let val = Operable::Value(match val {
|
||||
Some(v) => Value::from(v),
|
||||
None => Value::None,
|
||||
});
|
||||
// Process the document record
|
||||
let pro = Processed {
|
||||
ir: Some(ir),
|
||||
rid: Some(rid),
|
||||
doc_id,
|
||||
val,
|
||||
};
|
||||
// Process the records
|
||||
// TODO: par_iter
|
||||
for pro in to_process {
|
||||
self.process(stk, ctx, opt, txn, stm, pro).await?;
|
||||
}
|
||||
|
||||
// Collect the next batch of ids
|
||||
things = Vec::new();
|
||||
iterator.next_batch(txn, PROCESSOR_BATCH_SIZE, &mut things).await?;
|
||||
// Get the next batch
|
||||
to_process = Self::next_batch(ctx, opt, txn, &mut iterator).await?;
|
||||
}
|
||||
// Everything ok
|
||||
return Ok(());
|
||||
|
@ -634,4 +602,48 @@ impl<'a> Processor<'a> {
|
|||
message: "No QueryExecutor has been found.".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn next_batch(
|
||||
ctx: &Context<'_>,
|
||||
opt: &Options,
|
||||
txn: &Transaction,
|
||||
iterator: &mut ThingIterator,
|
||||
) -> Result<Vec<Processed>, Error> {
|
||||
let mut tx = txn.lock().await;
|
||||
let records: Vec<CollectorRecord> =
|
||||
iterator.next_batch(ctx, &mut tx, PROCESSOR_BATCH_SIZE).await?;
|
||||
let mut to_process = Vec::with_capacity(records.len());
|
||||
for r in records {
|
||||
let v = if let Some(v) = r.2 {
|
||||
// The value may be already be fetched by the KNN iterator to evaluate the condition
|
||||
v
|
||||
} else {
|
||||
// Otherwise we have to fetch the record
|
||||
Iterable::fetch_thing(&mut tx, opt, &r.0).await?
|
||||
};
|
||||
let p = Processed {
|
||||
rid: Some(r.0),
|
||||
ir: Some(r.1),
|
||||
val: Operable::Value(v),
|
||||
};
|
||||
to_process.push(p);
|
||||
}
|
||||
Ok(to_process)
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterable {
|
||||
/// Returns the value from the store, or Value::None it the value does not exist.
|
||||
pub(crate) async fn fetch_thing(
|
||||
tx: &mut kvs::Transaction,
|
||||
opt: &Options,
|
||||
thg: &Thing,
|
||||
) -> Result<Value, Error> {
|
||||
// Fetch the data from the store
|
||||
let key = thing::new(opt.ns(), opt.db(), &thg.tb, &thg.id);
|
||||
// Fetch and parse the data from the store
|
||||
let val = tx.get(key).await?.map(Value::from).unwrap_or(Value::None);
|
||||
// Return the result
|
||||
Ok(val)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
use crate::ctx::Context;
|
||||
use crate::dbs::Statement;
|
||||
use crate::dbs::{Options, Transaction};
|
||||
use crate::doc::Document;
|
||||
use crate::doc::{CursorDoc, Document};
|
||||
use crate::err::Error;
|
||||
use crate::sql::Cond;
|
||||
use reblessive::tree::Stk;
|
||||
|
||||
impl<'a> Document<'a> {
|
||||
|
@ -13,11 +14,22 @@ impl<'a> Document<'a> {
|
|||
opt: &Options,
|
||||
txn: &Transaction,
|
||||
stm: &Statement<'_>,
|
||||
) -> Result<(), Error> {
|
||||
Self::check_cond(stk, ctx, opt, txn, stm.conds(), &self.current).await
|
||||
}
|
||||
|
||||
pub(crate) async fn check_cond(
|
||||
stk: &mut Stk,
|
||||
ctx: &Context<'_>,
|
||||
opt: &Options,
|
||||
txn: &Transaction,
|
||||
cond: Option<&Cond>,
|
||||
doc: &CursorDoc<'_>,
|
||||
) -> Result<(), Error> {
|
||||
// Check where condition
|
||||
if let Some(cond) = stm.conds() {
|
||||
if let Some(cond) = cond {
|
||||
// Check if the expression is truthy
|
||||
if !cond.compute(stk, ctx, opt, txn, Some(&self.current)).await?.is_truthy() {
|
||||
if !cond.compute(stk, ctx, opt, txn, Some(doc)).await?.is_truthy() {
|
||||
// Ignore this document
|
||||
return Err(Error::Ignore);
|
||||
}
|
||||
|
|
|
@ -33,7 +33,7 @@ impl<'a> Document<'a> {
|
|||
Operable::Relatable(f, v, w) => (v, Workable::Relate(f, w)),
|
||||
};
|
||||
// Setup a new document
|
||||
let mut doc = Document::new(pro.ir, pro.rid.as_ref(), pro.doc_id, &ins.0, ins.1);
|
||||
let mut doc = Document::new(pro.rid.as_ref(), pro.ir.as_ref(), &ins.0, ins.1);
|
||||
// Process the statement
|
||||
let res = match stm {
|
||||
Statement::Select(_) => doc.select(stk, ctx, opt, txn, stm).await,
|
||||
|
@ -59,9 +59,8 @@ impl<'a> Document<'a> {
|
|||
None => Value::None,
|
||||
};
|
||||
pro = Processed {
|
||||
ir: None,
|
||||
doc_id: None,
|
||||
rid: Some(v),
|
||||
ir: None,
|
||||
val: match doc.extras {
|
||||
Workable::Normal => Operable::Value(val),
|
||||
Workable::Insert(o) => Operable::Mergeable(val, o),
|
||||
|
|
|
@ -4,8 +4,7 @@ use crate::dbs::Workable;
|
|||
use crate::err::Error;
|
||||
use crate::iam::Action;
|
||||
use crate::iam::ResourceKind;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::planner::executor::IteratorRef;
|
||||
use crate::idx::planner::iterators::IteratorRecord;
|
||||
use crate::sql::statements::define::DefineEventStatement;
|
||||
use crate::sql::statements::define::DefineFieldStatement;
|
||||
use crate::sql::statements::define::DefineIndexStatement;
|
||||
|
@ -28,24 +27,21 @@ pub(crate) struct Document<'a> {
|
|||
#[non_exhaustive]
|
||||
#[cfg_attr(debug_assertions, derive(Debug))]
|
||||
pub struct CursorDoc<'a> {
|
||||
pub(crate) ir: Option<IteratorRef>,
|
||||
pub(crate) rid: Option<&'a Thing>,
|
||||
pub(crate) ir: Option<&'a IteratorRecord>,
|
||||
pub(crate) doc: Cow<'a, Value>,
|
||||
pub(crate) doc_id: Option<DocId>,
|
||||
}
|
||||
|
||||
impl<'a> CursorDoc<'a> {
|
||||
pub(crate) fn new(
|
||||
ir: Option<IteratorRef>,
|
||||
rid: Option<&'a Thing>,
|
||||
doc_id: Option<DocId>,
|
||||
ir: Option<&'a IteratorRecord>,
|
||||
doc: Cow<'a, Value>,
|
||||
) -> Self {
|
||||
Self {
|
||||
ir,
|
||||
rid,
|
||||
ir,
|
||||
doc,
|
||||
doc_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -53,10 +49,9 @@ impl<'a> CursorDoc<'a> {
|
|||
impl<'a> From<&'a Value> for CursorDoc<'a> {
|
||||
fn from(doc: &'a Value) -> Self {
|
||||
Self {
|
||||
ir: None,
|
||||
rid: None,
|
||||
ir: None,
|
||||
doc: Cow::Borrowed(doc),
|
||||
doc_id: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -64,10 +59,9 @@ impl<'a> From<&'a Value> for CursorDoc<'a> {
|
|||
impl<'a> From<&'a mut Value> for CursorDoc<'a> {
|
||||
fn from(doc: &'a mut Value) -> Self {
|
||||
Self {
|
||||
ir: None,
|
||||
rid: None,
|
||||
ir: None,
|
||||
doc: Cow::Borrowed(doc),
|
||||
doc_id: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -86,17 +80,16 @@ impl<'a> From<&Document<'a>> for Vec<u8> {
|
|||
|
||||
impl<'a> Document<'a> {
|
||||
pub fn new(
|
||||
ir: Option<IteratorRef>,
|
||||
id: Option<&'a Thing>,
|
||||
doc_id: Option<DocId>,
|
||||
ir: Option<&'a IteratorRecord>,
|
||||
val: &'a Value,
|
||||
extras: Workable,
|
||||
) -> Self {
|
||||
Document {
|
||||
id,
|
||||
extras,
|
||||
current: CursorDoc::new(ir, id, doc_id, Cow::Borrowed(val)),
|
||||
initial: CursorDoc::new(ir, id, doc_id, Cow::Borrowed(val)),
|
||||
current: CursorDoc::new(id, ir, Cow::Borrowed(val)),
|
||||
initial: CursorDoc::new(id, ir, Cow::Borrowed(val)),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -105,9 +98,8 @@ impl<'a> Document<'a> {
|
|||
/// This allows for it to be crafted without needing statements to operate on it
|
||||
#[doc(hidden)]
|
||||
pub fn new_artificial(
|
||||
ir: Option<IteratorRef>,
|
||||
id: Option<&'a Thing>,
|
||||
doc_id: Option<DocId>,
|
||||
ir: Option<&'a IteratorRecord>,
|
||||
val: Cow<'a, Value>,
|
||||
initial: Cow<'a, Value>,
|
||||
extras: Workable,
|
||||
|
@ -115,8 +107,8 @@ impl<'a> Document<'a> {
|
|||
Document {
|
||||
id,
|
||||
extras,
|
||||
current: CursorDoc::new(ir, id, doc_id, val),
|
||||
initial: CursorDoc::new(ir, id, doc_id, initial),
|
||||
current: CursorDoc::new(id, ir, val),
|
||||
initial: CursorDoc::new(id, ir, initial),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ impl<'a> Document<'a> {
|
|||
Operable::Relatable(f, v, w) => (v, Workable::Relate(f, w)),
|
||||
};
|
||||
// Setup a new document
|
||||
let mut doc = Document::new(pro.ir, pro.rid.as_ref(), pro.doc_id, &ins.0, ins.1);
|
||||
let mut doc = Document::new(pro.rid.as_ref(), pro.ir.as_ref(), &ins.0, ins.1);
|
||||
// Process the statement
|
||||
let res = match stm {
|
||||
Statement::Select(_) => doc.select(stk, ctx, opt, txn, stm).await,
|
||||
|
@ -53,9 +53,8 @@ impl<'a> Document<'a> {
|
|||
None => Value::None,
|
||||
};
|
||||
pro = Processed {
|
||||
ir: None,
|
||||
doc_id: None,
|
||||
rid: Some(v),
|
||||
ir: None,
|
||||
val: match doc.extras {
|
||||
Workable::Normal => Operable::Value(val),
|
||||
Workable::Insert(o) => Operable::Mergeable(val, o),
|
||||
|
|
|
@ -4,7 +4,9 @@ use crate::dbs::Options;
|
|||
use crate::dbs::Transaction;
|
||||
use crate::doc::CursorDoc;
|
||||
use crate::err::Error;
|
||||
use crate::idx::planner::executor::QueryExecutor;
|
||||
use crate::sql::value::Value;
|
||||
use crate::sql::Thing;
|
||||
use reblessive::tree::Stk;
|
||||
|
||||
pub mod args;
|
||||
|
@ -55,7 +57,7 @@ pub async fn run(
|
|||
{
|
||||
stk.run(|stk| asynchronous(stk, ctx, Some(opt), Some(txn), doc, name, args)).await
|
||||
} else {
|
||||
synchronous(ctx, name, args)
|
||||
synchronous(ctx, doc, name, args)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -85,7 +87,12 @@ macro_rules! dispatch {
|
|||
}
|
||||
|
||||
/// Attempts to run any synchronous function.
|
||||
pub fn synchronous(ctx: &Context<'_>, name: &str, args: Vec<Value>) -> Result<Value, Error> {
|
||||
pub fn synchronous(
|
||||
ctx: &Context<'_>,
|
||||
doc: Option<&CursorDoc<'_>>,
|
||||
name: &str,
|
||||
args: Vec<Value>,
|
||||
) -> Result<Value, Error> {
|
||||
dispatch!(
|
||||
name,
|
||||
args,
|
||||
|
@ -362,6 +369,7 @@ pub fn synchronous(ctx: &Context<'_>, name: &str, args: Vec<Value>) -> Result<Va
|
|||
"vector::distance::chebyshev" => vector::distance::chebyshev,
|
||||
"vector::distance::euclidean" => vector::distance::euclidean,
|
||||
"vector::distance::hamming" => vector::distance::hamming,
|
||||
"vector::distance::knn" => vector::distance::knn((ctx, doc)),
|
||||
"vector::distance::mahalanobis" => vector::distance::mahalanobis,
|
||||
"vector::distance::manhattan" => vector::distance::manhattan,
|
||||
"vector::distance::minkowski" => vector::distance::minkowski,
|
||||
|
@ -509,3 +517,19 @@ mod tests {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_execution_context<'a>(
|
||||
ctx: &'a Context<'_>,
|
||||
doc: Option<&'a CursorDoc<'_>>,
|
||||
) -> Option<(&'a QueryExecutor, &'a CursorDoc<'a>, &'a Thing)> {
|
||||
if let Some(doc) = doc {
|
||||
if let Some(thg) = doc.rid {
|
||||
if let Some(pla) = ctx.get_query_planner() {
|
||||
if let Some(exe) = pla.get_query_executor(&thg.tb) {
|
||||
return Some((exe, doc, thg));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
|
|
@ -201,7 +201,7 @@ fn get_executor_option<'a>(
|
|||
if let Some(doc) = doc {
|
||||
if let Some((exe, thg)) = get_executor_and_thing(ctx, doc) {
|
||||
if let Some(ir) = doc.ir {
|
||||
if exe.is_iterator_expression(ir, exp) {
|
||||
if exe.is_iterator_expression(ir.irf(), exp) {
|
||||
return ExecutorOption::PreMatch;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -60,7 +60,7 @@ impl_module_def!(
|
|||
fn run(js_ctx: js::Ctx<'_>, name: &str, args: Vec<Value>) -> Result<Value> {
|
||||
let this = js_ctx.globals().get::<_, OwnedBorrow<QueryContext>>(QUERY_DATA_PROP_NAME)?;
|
||||
// Process the called function
|
||||
let res = fnc::synchronous(this.context, name, args);
|
||||
let res = fnc::synchronous(this.context, this.doc, name, args);
|
||||
// Convert any response error
|
||||
res.map_err(|err| {
|
||||
js::Exception::from_message(js_ctx, &err.to_string())
|
||||
|
|
|
@ -10,6 +10,7 @@ impl_module_def!(
|
|||
"chebyshev" => run,
|
||||
"euclidean" => run,
|
||||
"hamming" => run,
|
||||
"knn" => run,
|
||||
"mahalanobis" => run,
|
||||
"manhattan" => run,
|
||||
"minkowski" => run
|
||||
|
|
|
@ -2,30 +2,11 @@ use crate::ctx::Context;
|
|||
use crate::dbs::{Options, Transaction};
|
||||
use crate::doc::CursorDoc;
|
||||
use crate::err::Error;
|
||||
use crate::fnc::get_execution_context;
|
||||
use crate::idx::ft::analyzer::Analyzer;
|
||||
use crate::idx::planner::executor::QueryExecutor;
|
||||
use crate::sql::{Thing, Value};
|
||||
use crate::sql::Value;
|
||||
use reblessive::tree::Stk;
|
||||
|
||||
fn get_execution_context<'a>(
|
||||
ctx: &'a Context<'_>,
|
||||
txn: Option<&'a Transaction>,
|
||||
doc: Option<&'a CursorDoc<'_>>,
|
||||
) -> Option<(&'a Transaction, &'a QueryExecutor, &'a CursorDoc<'a>, &'a Thing)> {
|
||||
if let Some(txn) = txn {
|
||||
if let Some(doc) = doc {
|
||||
if let Some(thg) = doc.rid {
|
||||
if let Some(pla) = ctx.get_query_planner() {
|
||||
if let Some(exe) = pla.get_query_executor(&thg.tb) {
|
||||
return Some((txn, exe, doc, thg));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub async fn analyze(
|
||||
(stk, ctx, txn, opt): (&mut Stk, &Context<'_>, Option<&Transaction>, Option<&Options>),
|
||||
(az, val): (Value, Value),
|
||||
|
@ -43,33 +24,38 @@ pub async fn score(
|
|||
(ctx, txn, doc): (&Context<'_>, Option<&Transaction>, Option<&CursorDoc<'_>>),
|
||||
(match_ref,): (Value,),
|
||||
) -> Result<Value, Error> {
|
||||
if let Some((txn, exe, doc, thg)) = get_execution_context(ctx, txn, doc) {
|
||||
exe.score(txn, &match_ref, thg, doc.doc_id).await
|
||||
} else {
|
||||
Ok(Value::None)
|
||||
if let Some(txn) = txn {
|
||||
if let Some((exe, doc, thg)) = get_execution_context(ctx, doc) {
|
||||
return exe.score(txn, &match_ref, thg, doc.ir).await;
|
||||
}
|
||||
}
|
||||
Ok(Value::None)
|
||||
}
|
||||
|
||||
pub async fn highlight(
|
||||
(ctx, txn, doc): (&Context<'_>, Option<&Transaction>, Option<&CursorDoc<'_>>),
|
||||
(prefix, suffix, match_ref, partial): (Value, Value, Value, Option<Value>),
|
||||
) -> Result<Value, Error> {
|
||||
if let Some((txn, exe, doc, thg)) = get_execution_context(ctx, txn, doc) {
|
||||
let partial = partial.map(|p| p.convert_to_bool()).unwrap_or(Ok(false))?;
|
||||
exe.highlight(txn, thg, prefix, suffix, match_ref, partial, doc.doc.as_ref()).await
|
||||
} else {
|
||||
Ok(Value::None)
|
||||
if let Some(txn) = txn {
|
||||
if let Some((exe, doc, thg)) = get_execution_context(ctx, doc) {
|
||||
let partial = partial.map(|p| p.convert_to_bool()).unwrap_or(Ok(false))?;
|
||||
return exe
|
||||
.highlight(txn, thg, prefix, suffix, match_ref, partial, doc.doc.as_ref())
|
||||
.await;
|
||||
}
|
||||
}
|
||||
Ok(Value::None)
|
||||
}
|
||||
|
||||
pub async fn offsets(
|
||||
(ctx, txn, doc): (&Context<'_>, Option<&Transaction>, Option<&CursorDoc<'_>>),
|
||||
(match_ref, partial): (Value, Option<Value>),
|
||||
) -> Result<Value, Error> {
|
||||
if let Some((txn, exe, _, thg)) = get_execution_context(ctx, txn, doc) {
|
||||
let partial = partial.map(|p| p.convert_to_bool()).unwrap_or(Ok(false))?;
|
||||
exe.offsets(txn, thg, match_ref, partial).await
|
||||
} else {
|
||||
Ok(Value::None)
|
||||
if let Some(txn) = txn {
|
||||
if let Some((exe, _, thg)) = get_execution_context(ctx, doc) {
|
||||
let partial = partial.map(|p| p.convert_to_bool()).unwrap_or(Ok(false))?;
|
||||
return exe.offsets(txn, thg, match_ref, partial).await;
|
||||
}
|
||||
}
|
||||
Ok(Value::None)
|
||||
}
|
||||
|
|
|
@ -45,11 +45,14 @@ pub fn subtract((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
|||
}
|
||||
|
||||
pub mod distance {
|
||||
|
||||
use crate::ctx::Context;
|
||||
use crate::doc::CursorDoc;
|
||||
use crate::err::Error;
|
||||
use crate::fnc::get_execution_context;
|
||||
use crate::fnc::util::math::vector::{
|
||||
ChebyshevDistance, EuclideanDistance, HammingDistance, ManhattanDistance, MinkowskiDistance,
|
||||
};
|
||||
use crate::idx::planner::IterationStage;
|
||||
use crate::sql::{Number, Value};
|
||||
|
||||
pub fn chebyshev((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
|
@ -64,6 +67,30 @@ pub mod distance {
|
|||
Ok(a.hamming_distance(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn knn(
|
||||
(ctx, doc): (&Context<'_>, Option<&CursorDoc<'_>>),
|
||||
(knn_ref,): (Option<Value>,),
|
||||
) -> Result<Value, Error> {
|
||||
if let Some((_exe, doc, thg)) = get_execution_context(ctx, doc) {
|
||||
if let Some(ir) = doc.ir {
|
||||
if let Some(d) = ir.dist() {
|
||||
return Ok(d.into());
|
||||
}
|
||||
}
|
||||
if let Some(IterationStage::Iterate(Some(results))) = ctx.get_iteration_stage() {
|
||||
let n = if let Some(Value::Number(n)) = knn_ref {
|
||||
n.as_usize()
|
||||
} else {
|
||||
0
|
||||
};
|
||||
if let Some(d) = results.get_dist(n, thg) {
|
||||
return Ok(d.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Value::None)
|
||||
}
|
||||
|
||||
pub fn mahalanobis((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Err(Error::FeatureNotYetImplemented {
|
||||
feature: "vector::distance::mahalanobis() function".to_string(),
|
||||
|
|
|
@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize};
|
|||
|
||||
pub type DocId = u64;
|
||||
|
||||
pub(crate) struct DocIds {
|
||||
pub struct DocIds {
|
||||
ixs: IndexStores,
|
||||
state_key: Key,
|
||||
index_key_base: IndexKeyBase,
|
||||
|
@ -21,7 +21,7 @@ pub(crate) struct DocIds {
|
|||
}
|
||||
|
||||
impl DocIds {
|
||||
pub(in crate::idx) async fn new(
|
||||
pub async fn new(
|
||||
ixs: &IndexStores,
|
||||
tx: &mut Transaction,
|
||||
tt: TransactionType,
|
||||
|
|
|
@ -89,6 +89,7 @@ impl Analyzer {
|
|||
}
|
||||
}
|
||||
}
|
||||
drop(tx);
|
||||
Ok((
|
||||
list,
|
||||
TermsSet {
|
||||
|
@ -123,6 +124,7 @@ impl Analyzer {
|
|||
}
|
||||
}
|
||||
}
|
||||
drop(tx);
|
||||
Ok(TermsSet {
|
||||
set,
|
||||
has_unknown_terms,
|
||||
|
@ -176,6 +178,7 @@ impl Analyzer {
|
|||
for (t, f) in tf {
|
||||
tfid.push((terms.resolve_term_id(&mut tx, t).await?, f));
|
||||
}
|
||||
drop(tx);
|
||||
Ok((dl, tfid))
|
||||
}
|
||||
|
||||
|
@ -221,6 +224,7 @@ impl Analyzer {
|
|||
tfid.push((id, o.len() as TermFrequency));
|
||||
osid.push((id, OffsetRecords(o)));
|
||||
}
|
||||
drop(tx);
|
||||
Ok((dl, tfid, osid))
|
||||
}
|
||||
|
||||
|
|
|
@ -108,7 +108,9 @@ impl FtIndex {
|
|||
) -> Result<Self, Error> {
|
||||
let mut tx = txn.lock().await;
|
||||
let az = tx.get_db_analyzer(opt.ns(), opt.db(), az).await?;
|
||||
Self::with_analyzer(ixs, &mut tx, az, index_key_base, p, tt).await
|
||||
let res = Self::with_analyzer(ixs, &mut tx, az, index_key_base, p, tt).await;
|
||||
drop(tx);
|
||||
res
|
||||
}
|
||||
async fn with_analyzer(
|
||||
ixs: &IndexStores,
|
||||
|
@ -194,13 +196,17 @@ impl FtIndex {
|
|||
) -> Result<(), Error> {
|
||||
let mut tx = txn.lock().await;
|
||||
// Extract and remove the doc_id (if any)
|
||||
if let Some(doc_id) = self.doc_ids.write().await.remove_doc(&mut tx, rid.into()).await? {
|
||||
let mut doc_ids = self.doc_ids.write().await;
|
||||
let doc_id = doc_ids.remove_doc(&mut tx, rid.into()).await?;
|
||||
drop(doc_ids);
|
||||
if let Some(doc_id) = doc_id {
|
||||
self.state.doc_count -= 1;
|
||||
|
||||
// Remove the doc length
|
||||
if let Some(doc_lengths) =
|
||||
self.doc_lengths.write().await.remove_doc_length(&mut tx, doc_id).await?
|
||||
{
|
||||
let mut doc_lengths = self.doc_lengths.write().await;
|
||||
let dl = doc_lengths.remove_doc_length(&mut tx, doc_id).await?;
|
||||
drop(doc_lengths);
|
||||
if let Some(doc_lengths) = dl {
|
||||
self.state.total_docs_lengths -= doc_lengths as u128;
|
||||
}
|
||||
|
||||
|
@ -218,6 +224,8 @@ impl FtIndex {
|
|||
t.remove_term_id(&mut tx, term_id).await?;
|
||||
}
|
||||
}
|
||||
drop(p);
|
||||
drop(t);
|
||||
// Remove the offsets if any
|
||||
if self.highlighting {
|
||||
for term_id in term_list {
|
||||
|
@ -227,6 +235,7 @@ impl FtIndex {
|
|||
}
|
||||
}
|
||||
}
|
||||
drop(tx);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -241,9 +250,11 @@ impl FtIndex {
|
|||
) -> Result<(), Error> {
|
||||
// Resolve the doc_id
|
||||
let mut tx = txn.lock().await;
|
||||
let resolved = self.doc_ids.write().await.resolve_doc_id(&mut tx, rid.into()).await?;
|
||||
let doc_id = *resolved.doc_id();
|
||||
let mut doc_ids = self.doc_ids.write().await;
|
||||
let resolved = doc_ids.resolve_doc_id(&mut tx, rid.into()).await?;
|
||||
drop(doc_ids);
|
||||
drop(tx);
|
||||
let doc_id = *resolved.doc_id();
|
||||
|
||||
// Extract the doc_lengths, terms en frequencies (and offset)
|
||||
let mut t = self.terms.write().await;
|
||||
|
@ -270,6 +281,7 @@ impl FtIndex {
|
|||
}
|
||||
}
|
||||
dl.set_doc_length(&mut tx, doc_id, doc_length).await?;
|
||||
drop(dl);
|
||||
|
||||
// Retrieve the existing terms for this document (if any)
|
||||
let term_ids_key = self.index_key_base.new_bk_key(doc_id);
|
||||
|
@ -302,6 +314,8 @@ impl FtIndex {
|
|||
}
|
||||
}
|
||||
}
|
||||
drop(p);
|
||||
drop(t);
|
||||
|
||||
if self.highlighting {
|
||||
// Set the offset if any
|
||||
|
@ -333,6 +347,7 @@ impl FtIndex {
|
|||
|
||||
// Update the states
|
||||
tx.set(self.state_key.clone(), self.state.try_to_val()?).await?;
|
||||
drop(tx);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -347,6 +362,7 @@ impl FtIndex {
|
|||
let t = self.terms.read().await;
|
||||
let res =
|
||||
self.analyzer.extract_querying_terms(stk, ctx, opt, txn, &t, query_string).await?;
|
||||
drop(t);
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
|
@ -422,7 +438,10 @@ impl FtIndex {
|
|||
doc: &Value,
|
||||
) -> Result<Value, Error> {
|
||||
let doc_key: Key = thg.into();
|
||||
if let Some(doc_id) = self.doc_ids.read().await.get_doc_id(tx, doc_key).await? {
|
||||
let di = self.doc_ids.read().await;
|
||||
let doc_id = di.get_doc_id(tx, doc_key).await?;
|
||||
drop(di);
|
||||
if let Some(doc_id) = doc_id {
|
||||
let mut hl = Highlighter::new(prefix, suffix, partial, idiom, doc);
|
||||
for (term_id, term_len) in terms.iter().flatten() {
|
||||
let o = self.offsets.get_offsets(tx, doc_id, *term_id).await?;
|
||||
|
@ -443,7 +462,10 @@ impl FtIndex {
|
|||
partial: bool,
|
||||
) -> Result<Value, Error> {
|
||||
let doc_key: Key = thg.into();
|
||||
if let Some(doc_id) = self.doc_ids.read().await.get_doc_id(tx, doc_key).await? {
|
||||
let di = self.doc_ids.read().await;
|
||||
let doc_id = di.get_doc_id(tx, doc_key).await?;
|
||||
drop(di);
|
||||
if let Some(doc_id) = doc_id {
|
||||
let mut or = Offseter::new(partial);
|
||||
for (term_id, term_len) in terms.iter().flatten() {
|
||||
let o = self.offsets.get_offsets(tx, doc_id, *term_id).await?;
|
||||
|
@ -459,12 +481,14 @@ impl FtIndex {
|
|||
pub(crate) async fn statistics(&self, txn: &Transaction) -> Result<FtStatistics, Error> {
|
||||
// TODO do parallel execution
|
||||
let mut run = txn.lock().await;
|
||||
Ok(FtStatistics {
|
||||
let res = FtStatistics {
|
||||
doc_ids: self.doc_ids.read().await.statistics(&mut run).await?,
|
||||
terms: self.terms.read().await.statistics(&mut run).await?,
|
||||
doc_lengths: self.doc_lengths.read().await.statistics(&mut run).await?,
|
||||
postings: self.postings.read().await.statistics(&mut run).await?,
|
||||
})
|
||||
};
|
||||
drop(run);
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
pub(crate) async fn finish(&self, tx: &Transaction) -> Result<(), Error> {
|
||||
|
@ -473,6 +497,7 @@ impl FtIndex {
|
|||
self.doc_lengths.write().await.finish(&mut run).await?;
|
||||
self.postings.write().await.finish(&mut run).await?;
|
||||
self.terms.write().await.finish(&mut run).await?;
|
||||
drop(run);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -490,15 +515,27 @@ impl HitsIterator {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
pub(crate) fn len(&self) -> usize {
|
||||
self.iter.len()
|
||||
}
|
||||
#[cfg(target_arch = "wasm32")]
|
||||
pub(crate) fn len(&self) -> usize {
|
||||
self.iter.size_hint().0
|
||||
}
|
||||
|
||||
pub(crate) async fn next(
|
||||
&mut self,
|
||||
tx: &mut kvs::Transaction,
|
||||
) -> Result<Option<(Thing, DocId)>, Error> {
|
||||
let di = self.doc_ids.read().await;
|
||||
for doc_id in self.iter.by_ref() {
|
||||
if let Some(doc_key) = self.doc_ids.read().await.get_doc_key(tx, doc_id).await? {
|
||||
if let Some(doc_key) = di.get_doc_key(tx, doc_id).await? {
|
||||
drop(di);
|
||||
return Ok(Some((doc_key.into(), doc_id)));
|
||||
}
|
||||
}
|
||||
drop(di);
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
@ -541,6 +578,7 @@ mod tests {
|
|||
} else {
|
||||
panic!("hits is none");
|
||||
}
|
||||
drop(tx);
|
||||
}
|
||||
|
||||
async fn search(
|
||||
|
@ -600,7 +638,9 @@ mod tests {
|
|||
|
||||
pub(super) async fn finish(txn: &Transaction, fti: FtIndex) {
|
||||
fti.finish(txn).await.unwrap();
|
||||
txn.lock().await.commit().await.unwrap();
|
||||
let mut tx = txn.lock().await;
|
||||
tx.commit().await.unwrap();
|
||||
drop(tx);
|
||||
}
|
||||
|
||||
#[test(tokio::test)]
|
||||
|
|
|
@ -45,8 +45,9 @@ impl BM25Scorer {
|
|||
term_doc_count: DocLength,
|
||||
term_frequency: TermFrequency,
|
||||
) -> Result<Score, Error> {
|
||||
let doc_length =
|
||||
self.doc_lengths.read().await.get_doc_length(tx, doc_id).await?.unwrap_or(0);
|
||||
let dl = self.doc_lengths.read().await;
|
||||
let doc_length = dl.get_doc_length(tx, doc_id).await?.unwrap_or(0);
|
||||
drop(dl);
|
||||
Ok(self.compute_bm25_score(term_frequency as f32, term_doc_count as f32, doc_length as f32))
|
||||
}
|
||||
|
||||
|
@ -56,15 +57,16 @@ impl BM25Scorer {
|
|||
doc_id: DocId,
|
||||
) -> Result<Option<Score>, Error> {
|
||||
let mut sc = 0.0;
|
||||
let p = self.postings.read().await;
|
||||
for (term_id, docs) in self.terms_docs.iter().flatten() {
|
||||
if docs.contains(doc_id) {
|
||||
if let Some(term_freq) =
|
||||
self.postings.read().await.get_term_frequency(tx, *term_id, doc_id).await?
|
||||
{
|
||||
let tf = p.get_term_frequency(tx, *term_id, doc_id).await?;
|
||||
if let Some(term_freq) = tf {
|
||||
sc += self.term_score(tx, doc_id, docs.len(), term_freq).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
drop(p);
|
||||
Ok(Some(sc))
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
pub mod docids;
|
||||
pub(crate) mod ft;
|
||||
pub(crate) mod planner;
|
||||
pub mod planner;
|
||||
pub mod trees;
|
||||
|
||||
use crate::dbs::Options;
|
||||
|
|
352
core/src/idx/planner/checker.rs
Normal file
352
core/src/idx/planner/checker.rs
Normal file
|
@ -0,0 +1,352 @@
|
|||
use crate::ctx::Context;
|
||||
use crate::dbs::{Iterable, Options, Transaction};
|
||||
use crate::doc::CursorDoc;
|
||||
use crate::err::Error;
|
||||
use crate::idx::docids::{DocId, DocIds};
|
||||
use crate::idx::planner::iterators::KnnIteratorResult;
|
||||
use crate::idx::trees::hnsw::docs::HnswDocs;
|
||||
use crate::idx::trees::knn::Ids64;
|
||||
use crate::sql::{Cond, Thing, Value};
|
||||
use hashbrown::hash_map::Entry;
|
||||
use hashbrown::HashMap;
|
||||
use reblessive::tree::Stk;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub enum HnswConditionChecker<'a> {
|
||||
Hnsw(HnswChecker),
|
||||
HnswCondition(HnswCondChecker<'a>),
|
||||
}
|
||||
|
||||
pub enum MTreeConditionChecker<'a> {
|
||||
MTree(MTreeChecker<'a>),
|
||||
MTreeCondition(MTreeCondChecker<'a>),
|
||||
}
|
||||
|
||||
impl<'a> Default for HnswConditionChecker<'a> {
|
||||
fn default() -> Self {
|
||||
Self::Hnsw(HnswChecker {})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> HnswConditionChecker<'a> {
|
||||
pub(in crate::idx) fn new_cond(
|
||||
ctx: &'a Context<'a>,
|
||||
opt: &'a Options,
|
||||
txn: &'a Transaction,
|
||||
cond: Arc<Cond>,
|
||||
) -> Self {
|
||||
Self::HnswCondition(HnswCondChecker {
|
||||
ctx,
|
||||
opt,
|
||||
txn,
|
||||
cond,
|
||||
cache: Default::default(),
|
||||
})
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn check_truthy(
|
||||
&mut self,
|
||||
stk: &mut Stk,
|
||||
docs: &HnswDocs,
|
||||
doc_ids: &Ids64,
|
||||
) -> Result<bool, Error> {
|
||||
match self {
|
||||
Self::HnswCondition(c) => c.check_any_truthy(stk, docs, doc_ids).await,
|
||||
Self::Hnsw(_) => Ok(true),
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) fn expire(&mut self, doc_id: u64) {
|
||||
if let Self::HnswCondition(c) = self {
|
||||
c.expire(doc_id)
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) fn expires(&mut self, doc_ids: Ids64) {
|
||||
if let Self::HnswCondition(c) = self {
|
||||
c.expires(doc_ids)
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn convert_result(
|
||||
&mut self,
|
||||
docs: &HnswDocs,
|
||||
res: VecDeque<(DocId, f64)>,
|
||||
) -> Result<VecDeque<KnnIteratorResult>, Error> {
|
||||
match self {
|
||||
Self::Hnsw(c) => c.convert_result(docs, res).await,
|
||||
Self::HnswCondition(c) => Ok(c.convert_result(res)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> MTreeConditionChecker<'a> {
|
||||
pub fn new_cond(
|
||||
ctx: &'a Context<'_>,
|
||||
opt: &'a Options,
|
||||
txn: &'a Transaction,
|
||||
cond: Arc<Cond>,
|
||||
) -> Self {
|
||||
if Cond(Value::Bool(true)).ne(cond.as_ref()) {
|
||||
return Self::MTreeCondition(MTreeCondChecker {
|
||||
ctx,
|
||||
opt,
|
||||
txn,
|
||||
cond,
|
||||
cache: Default::default(),
|
||||
});
|
||||
} else {
|
||||
Self::new(txn)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(txn: &'a Transaction) -> Self {
|
||||
Self::MTree(MTreeChecker {
|
||||
txn,
|
||||
})
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn check_truthy(
|
||||
&mut self,
|
||||
stk: &mut Stk,
|
||||
doc_ids: &DocIds,
|
||||
doc_id: DocId,
|
||||
) -> Result<bool, Error> {
|
||||
match self {
|
||||
Self::MTreeCondition(c) => c.check_truthy(stk, doc_ids, doc_id).await,
|
||||
Self::MTree(_) => Ok(true),
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) fn expires(&mut self, ids: Ids64) {
|
||||
if let Self::MTreeCondition(c) = self {
|
||||
c.expires(ids)
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn convert_result(
|
||||
&mut self,
|
||||
doc_ids: &DocIds,
|
||||
res: VecDeque<(DocId, f64)>,
|
||||
) -> Result<VecDeque<KnnIteratorResult>, Error> {
|
||||
match self {
|
||||
Self::MTree(c) => c.convert_result(doc_ids, res).await,
|
||||
Self::MTreeCondition(c) => Ok(c.convert_result(res)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MTreeChecker<'a> {
|
||||
txn: &'a Transaction,
|
||||
}
|
||||
|
||||
impl<'a> MTreeChecker<'a> {
|
||||
async fn convert_result(
|
||||
&self,
|
||||
doc_ids: &DocIds,
|
||||
res: VecDeque<(DocId, f64)>,
|
||||
) -> Result<VecDeque<KnnIteratorResult>, Error> {
|
||||
if res.is_empty() {
|
||||
return Ok(VecDeque::from([]));
|
||||
}
|
||||
let mut result = VecDeque::with_capacity(res.len());
|
||||
let mut tx = self.txn.lock().await;
|
||||
for (doc_id, dist) in res {
|
||||
if let Some(key) = doc_ids.get_doc_key(&mut tx, doc_id).await? {
|
||||
result.push_back((key.into(), dist, None));
|
||||
}
|
||||
}
|
||||
drop(tx);
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
struct CheckerCacheEntry {
|
||||
record: Option<(Thing, Value)>,
|
||||
truthy: bool,
|
||||
}
|
||||
|
||||
impl CheckerCacheEntry {
|
||||
fn convert_result(
|
||||
res: VecDeque<(DocId, f64)>,
|
||||
cache: &mut HashMap<DocId, CheckerCacheEntry>,
|
||||
) -> VecDeque<KnnIteratorResult> {
|
||||
let mut result = VecDeque::with_capacity(res.len());
|
||||
for (doc_id, dist) in res {
|
||||
if let Some(e) = cache.remove(&doc_id) {
|
||||
if e.truthy {
|
||||
if let Some((rid, value)) = e.record {
|
||||
result.push_back((rid, dist, Some(value)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
async fn build(
|
||||
stk: &mut Stk,
|
||||
ctx: &Context<'_>,
|
||||
opt: &Options,
|
||||
txn: &Transaction,
|
||||
rid: Option<Thing>,
|
||||
cond: &Cond,
|
||||
) -> Result<Self, Error> {
|
||||
if let Some(rid) = rid {
|
||||
let mut tx = txn.lock().await;
|
||||
let val = Iterable::fetch_thing(&mut tx, opt, &rid).await?;
|
||||
drop(tx);
|
||||
if !val.is_none_or_null() {
|
||||
let (value, truthy) = {
|
||||
let cursor_doc = CursorDoc {
|
||||
rid: Some(&rid),
|
||||
ir: None,
|
||||
doc: Cow::Owned(val),
|
||||
};
|
||||
let truthy =
|
||||
cond.compute(stk, ctx, opt, txn, Some(&cursor_doc)).await?.is_truthy();
|
||||
(cursor_doc.doc.into_owned(), truthy)
|
||||
};
|
||||
return Ok(CheckerCacheEntry {
|
||||
record: Some((rid, value)),
|
||||
truthy,
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(CheckerCacheEntry {
|
||||
record: None,
|
||||
truthy: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MTreeCondChecker<'a> {
|
||||
ctx: &'a Context<'a>,
|
||||
opt: &'a Options,
|
||||
txn: &'a Transaction,
|
||||
cond: Arc<Cond>,
|
||||
cache: HashMap<DocId, CheckerCacheEntry>,
|
||||
}
|
||||
|
||||
impl<'a> MTreeCondChecker<'a> {
|
||||
async fn check_truthy(
|
||||
&mut self,
|
||||
stk: &mut Stk,
|
||||
doc_ids: &DocIds,
|
||||
doc_id: u64,
|
||||
) -> Result<bool, Error> {
|
||||
match self.cache.entry(doc_id) {
|
||||
Entry::Occupied(e) => Ok(e.get().truthy),
|
||||
Entry::Vacant(e) => {
|
||||
let mut tx = self.txn.lock().await;
|
||||
let rid = doc_ids.get_doc_key(&mut tx, doc_id).await?.map(|k| k.into());
|
||||
drop(tx);
|
||||
let ent = CheckerCacheEntry::build(
|
||||
stk,
|
||||
self.ctx,
|
||||
self.opt,
|
||||
self.txn,
|
||||
rid,
|
||||
self.cond.as_ref(),
|
||||
)
|
||||
.await?;
|
||||
let truthy = ent.truthy;
|
||||
e.insert(ent);
|
||||
Ok(truthy)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn expire(&mut self, doc_id: DocId) {
|
||||
self.cache.remove(&doc_id);
|
||||
}
|
||||
|
||||
fn expires(&mut self, doc_ids: Ids64) {
|
||||
for doc_id in doc_ids.iter() {
|
||||
self.expire(doc_id);
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_result(&mut self, res: VecDeque<(DocId, f64)>) -> VecDeque<KnnIteratorResult> {
|
||||
CheckerCacheEntry::convert_result(res, &mut self.cache)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct HnswChecker {}
|
||||
|
||||
impl<'a> HnswChecker {
|
||||
async fn convert_result(
|
||||
&self,
|
||||
docs: &HnswDocs,
|
||||
res: VecDeque<(DocId, f64)>,
|
||||
) -> Result<VecDeque<KnnIteratorResult>, Error> {
|
||||
if res.is_empty() {
|
||||
return Ok(VecDeque::from([]));
|
||||
}
|
||||
let mut result = VecDeque::with_capacity(res.len());
|
||||
for (doc_id, dist) in res {
|
||||
if let Some(rid) = docs.get_thing(doc_id) {
|
||||
result.push_back((rid.clone(), dist, None));
|
||||
}
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct HnswCondChecker<'a> {
|
||||
ctx: &'a Context<'a>,
|
||||
opt: &'a Options,
|
||||
txn: &'a Transaction,
|
||||
cond: Arc<Cond>,
|
||||
cache: HashMap<DocId, CheckerCacheEntry>,
|
||||
}
|
||||
|
||||
impl<'a> HnswCondChecker<'a> {
|
||||
fn convert_result(&mut self, res: VecDeque<(DocId, f64)>) -> VecDeque<KnnIteratorResult> {
|
||||
CheckerCacheEntry::convert_result(res, &mut self.cache)
|
||||
}
|
||||
|
||||
async fn check_any_truthy(
|
||||
&mut self,
|
||||
stk: &mut Stk,
|
||||
docs: &HnswDocs,
|
||||
doc_ids: &Ids64,
|
||||
) -> Result<bool, Error> {
|
||||
let mut res = false;
|
||||
for doc_id in doc_ids.iter() {
|
||||
if match self.cache.entry(doc_id) {
|
||||
Entry::Occupied(e) => e.get().truthy,
|
||||
Entry::Vacant(e) => {
|
||||
let rid: Option<Thing> = docs.get_thing(doc_id).cloned();
|
||||
let ent = CheckerCacheEntry::build(
|
||||
stk,
|
||||
self.ctx,
|
||||
self.opt,
|
||||
self.txn,
|
||||
rid,
|
||||
self.cond.as_ref(),
|
||||
)
|
||||
.await?;
|
||||
let truthy = ent.truthy;
|
||||
e.insert(ent);
|
||||
truthy
|
||||
}
|
||||
} {
|
||||
res = true;
|
||||
}
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn expire(&mut self, doc_id: DocId) {
|
||||
self.cache.remove(&doc_id);
|
||||
}
|
||||
|
||||
fn expires(&mut self, doc_ids: Ids64) {
|
||||
for doc_id in doc_ids.iter() {
|
||||
self.expire(doc_id);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -2,40 +2,60 @@ use crate::ctx::Context;
|
|||
use crate::dbs::{Options, Transaction};
|
||||
use crate::doc::CursorDoc;
|
||||
use crate::err::Error;
|
||||
use crate::idx::docids::{DocId, DocIds};
|
||||
use crate::idx::docids::DocIds;
|
||||
use crate::idx::ft::analyzer::{Analyzer, TermsList, TermsSet};
|
||||
use crate::idx::ft::scorer::BM25Scorer;
|
||||
use crate::idx::ft::termdocs::TermsDocs;
|
||||
use crate::idx::ft::terms::Terms;
|
||||
use crate::idx::ft::{FtIndex, MatchRef};
|
||||
use crate::idx::planner::checker::{HnswConditionChecker, MTreeConditionChecker};
|
||||
use crate::idx::planner::iterators::{
|
||||
DocIdsIterator, IndexEqualThingIterator, IndexJoinThingIterator, IndexRangeThingIterator,
|
||||
IndexUnionThingIterator, MatchesThingIterator, ThingIterator, ThingsIterator,
|
||||
UniqueEqualThingIterator, UniqueJoinThingIterator, UniqueRangeThingIterator,
|
||||
UniqueUnionThingIterator,
|
||||
IndexEqualThingIterator, IndexJoinThingIterator, IndexRangeThingIterator,
|
||||
IndexUnionThingIterator, IteratorRecord, IteratorRef, KnnIterator, KnnIteratorResult,
|
||||
MatchesThingIterator, ThingIterator, UniqueEqualThingIterator, UniqueJoinThingIterator,
|
||||
UniqueRangeThingIterator, UniqueUnionThingIterator,
|
||||
};
|
||||
use crate::idx::planner::knn::KnnPriorityList;
|
||||
use crate::idx::planner::knn::{KnnBruteForceResult, KnnPriorityList};
|
||||
use crate::idx::planner::plan::IndexOperator::Matches;
|
||||
use crate::idx::planner::plan::{IndexOperator, IndexOption, RangeValue};
|
||||
use crate::idx::planner::tree::{IdiomPosition, IndexRef, IndexesMap};
|
||||
use crate::idx::planner::{IterationStage, KnnSet};
|
||||
use crate::idx::planner::IterationStage;
|
||||
use crate::idx::trees::mtree::MTreeIndex;
|
||||
use crate::idx::trees::store::hnsw::SharedHnswIndex;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs;
|
||||
use crate::kvs::{Key, TransactionType};
|
||||
use crate::sql::index::{Distance, Index};
|
||||
use crate::sql::statements::DefineIndexStatement;
|
||||
use crate::sql::{Array, Expression, Idiom, Number, Object, Table, Thing, Value};
|
||||
use crate::sql::{Cond, Expression, Idiom, Number, Object, Table, Thing, Value};
|
||||
use reblessive::tree::Stk;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
pub(super) type KnnEntry = (KnnPriorityList, Idiom, Arc<Vec<Number>>, Distance);
|
||||
pub(super) type KnnExpressions = HashMap<Arc<Expression>, (u32, Idiom, Arc<Vec<Number>>, Distance)>;
|
||||
pub(super) type AnnExpressions = HashMap<Arc<Expression>, (usize, Idiom, Arc<Vec<Number>>, usize)>;
|
||||
pub(super) type KnnBruteForceEntry = (KnnPriorityList, Idiom, Arc<Vec<Number>>, Distance);
|
||||
|
||||
pub(super) struct KnnBruteForceExpression {
|
||||
k: u32,
|
||||
id: Idiom,
|
||||
obj: Arc<Vec<Number>>,
|
||||
d: Distance,
|
||||
}
|
||||
|
||||
impl KnnBruteForceExpression {
|
||||
pub(super) fn new(k: u32, id: Idiom, obj: Arc<Vec<Number>>, d: Distance) -> Self {
|
||||
Self {
|
||||
k,
|
||||
id,
|
||||
obj,
|
||||
d,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) type KnnBruteForceExpressions = HashMap<Arc<Expression>, KnnBruteForceExpression>;
|
||||
|
||||
pub(super) type KnnExpressions = HashSet<Arc<Expression>>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct QueryExecutor(Arc<InnerQueryExecutor>);
|
||||
|
@ -49,7 +69,7 @@ pub(super) struct InnerQueryExecutor {
|
|||
index_definitions: Vec<DefineIndexStatement>,
|
||||
mt_entries: HashMap<Arc<Expression>, MtEntry>,
|
||||
hnsw_entries: HashMap<Arc<Expression>, HnswEntry>,
|
||||
knn_entries: HashMap<Arc<Expression>, KnnEntry>,
|
||||
knn_bruteforce_entries: HashMap<Arc<Expression>, KnnBruteForceEntry>,
|
||||
}
|
||||
|
||||
impl From<InnerQueryExecutor> for QueryExecutor {
|
||||
|
@ -58,8 +78,6 @@ impl From<InnerQueryExecutor> for QueryExecutor {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) type IteratorRef = u16;
|
||||
|
||||
pub(super) enum IteratorEntry {
|
||||
Single(Arc<Expression>, IndexOption),
|
||||
Range(HashSet<Arc<Expression>>, IndexRef, RangeValue, RangeValue),
|
||||
|
@ -82,6 +100,7 @@ impl IteratorEntry {
|
|||
}
|
||||
}
|
||||
impl InnerQueryExecutor {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(super) async fn new(
|
||||
stk: &mut Stk,
|
||||
ctx: &Context<'_>,
|
||||
|
@ -90,6 +109,8 @@ impl InnerQueryExecutor {
|
|||
table: &Table,
|
||||
im: IndexesMap,
|
||||
knns: KnnExpressions,
|
||||
kbtes: KnnBruteForceExpressions,
|
||||
knn_condition: Option<Cond>,
|
||||
) -> Result<Self, Error> {
|
||||
let mut mr_entries = HashMap::default();
|
||||
let mut exp_entries = HashMap::default();
|
||||
|
@ -98,7 +119,8 @@ impl InnerQueryExecutor {
|
|||
let mut mt_entries = HashMap::default();
|
||||
let mut hnsw_map: HashMap<IndexRef, SharedHnswIndex> = HashMap::default();
|
||||
let mut hnsw_entries = HashMap::default();
|
||||
let mut knn_entries = HashMap::with_capacity(knns.len());
|
||||
let mut knn_bruteforce_entries = HashMap::with_capacity(knns.len());
|
||||
let knn_condition = knn_condition.map(Arc::new);
|
||||
|
||||
// Create all the instances of FtIndex
|
||||
// Build the FtEntries and map them to Idioms and MatchRef
|
||||
|
@ -141,11 +163,23 @@ impl InnerQueryExecutor {
|
|||
}
|
||||
Index::MTree(p) => {
|
||||
if let IndexOperator::Knn(a, k) = io.op() {
|
||||
let mut tx = txn.lock().await;
|
||||
let entry = match mt_map.entry(ix_ref) {
|
||||
Entry::Occupied(e) => MtEntry::new(&mut tx, e.get(), a, *k).await?,
|
||||
Entry::Occupied(e) => {
|
||||
MtEntry::new(
|
||||
stk,
|
||||
ctx,
|
||||
opt,
|
||||
txn,
|
||||
e.get(),
|
||||
a,
|
||||
*k,
|
||||
knn_condition.clone(),
|
||||
)
|
||||
.await?
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
let ikb = IndexKeyBase::new(opt, idx_def);
|
||||
let mut tx = txn.lock().await;
|
||||
let mt = MTreeIndex::new(
|
||||
ctx.get_index_stores(),
|
||||
&mut tx,
|
||||
|
@ -154,7 +188,18 @@ impl InnerQueryExecutor {
|
|||
TransactionType::Read,
|
||||
)
|
||||
.await?;
|
||||
let entry = MtEntry::new(&mut tx, &mt, a, *k).await?;
|
||||
drop(tx);
|
||||
let entry = MtEntry::new(
|
||||
stk,
|
||||
ctx,
|
||||
opt,
|
||||
txn,
|
||||
&mt,
|
||||
a,
|
||||
*k,
|
||||
knn_condition.clone(),
|
||||
)
|
||||
.await?;
|
||||
e.insert(mt);
|
||||
entry
|
||||
}
|
||||
|
@ -163,17 +208,39 @@ impl InnerQueryExecutor {
|
|||
}
|
||||
}
|
||||
Index::Hnsw(p) => {
|
||||
if let IndexOperator::Ann(a, n, ef) = io.op() {
|
||||
if let IndexOperator::Ann(a, k, ef) = io.op() {
|
||||
let entry = match hnsw_map.entry(ix_ref) {
|
||||
Entry::Occupied(e) => {
|
||||
HnswEntry::new(e.get().clone(), a, *n, *ef).await?
|
||||
HnswEntry::new(
|
||||
stk,
|
||||
ctx,
|
||||
opt,
|
||||
txn,
|
||||
e.get().clone(),
|
||||
a,
|
||||
*k,
|
||||
*ef,
|
||||
knn_condition.clone(),
|
||||
)
|
||||
.await?
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
let hnsw = ctx
|
||||
.get_index_stores()
|
||||
.get_index_hnsw(opt, idx_def, p)
|
||||
.await;
|
||||
let entry = HnswEntry::new(hnsw.clone(), a, *n, *ef).await?;
|
||||
let entry = HnswEntry::new(
|
||||
stk,
|
||||
ctx,
|
||||
opt,
|
||||
txn,
|
||||
hnsw.clone(),
|
||||
a,
|
||||
*k,
|
||||
*ef,
|
||||
knn_condition.clone(),
|
||||
)
|
||||
.await?;
|
||||
e.insert(hnsw);
|
||||
entry
|
||||
}
|
||||
|
@ -186,8 +253,9 @@ impl InnerQueryExecutor {
|
|||
}
|
||||
}
|
||||
|
||||
for (exp, (knn, id, obj, dist)) in knns {
|
||||
knn_entries.insert(exp, (KnnPriorityList::new(knn as usize), id, obj, dist));
|
||||
for (exp, knn) in kbtes {
|
||||
knn_bruteforce_entries
|
||||
.insert(exp, (KnnPriorityList::new(knn.k as usize), knn.id, knn.obj, knn.d));
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
|
@ -199,7 +267,7 @@ impl InnerQueryExecutor {
|
|||
index_definitions: im.definitions,
|
||||
mt_entries,
|
||||
hnsw_entries,
|
||||
knn_entries,
|
||||
knn_bruteforce_entries,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -223,18 +291,12 @@ impl QueryExecutor {
|
|||
exp: &Expression,
|
||||
) -> Result<Value, Error> {
|
||||
if let Some(IterationStage::Iterate(e)) = ctx.get_iteration_stage() {
|
||||
if let Some(e) = e {
|
||||
if let Some(e) = e.get(thg.tb.as_str()) {
|
||||
if let Some(things) = e.get(exp) {
|
||||
if things.contains(thg) {
|
||||
return Ok(Value::Bool(true));
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(results) = e {
|
||||
return Ok(results.contains(exp, thg).into());
|
||||
}
|
||||
Ok(Value::Bool(false))
|
||||
} else {
|
||||
if let Some((p, id, val, dist)) = self.0.knn_entries.get(exp) {
|
||||
if let Some((p, id, val, dist)) = self.0.knn_bruteforce_entries.get(exp) {
|
||||
let v: Vec<Number> = id.compute(stk, ctx, opt, txn, doc).await?.try_into()?;
|
||||
let dist = dist.compute(&v, val.as_ref())?;
|
||||
p.add(dist, thg).await;
|
||||
|
@ -243,25 +305,25 @@ impl QueryExecutor {
|
|||
}
|
||||
}
|
||||
|
||||
pub(super) async fn build_knn_set(&self) -> KnnSet {
|
||||
let mut set = HashMap::with_capacity(self.0.knn_entries.len());
|
||||
for (exp, (p, _, _, _)) in &self.0.knn_entries {
|
||||
set.insert(exp.clone(), p.build().await);
|
||||
pub(super) async fn build_bruteforce_knn_result(&self) -> KnnBruteForceResult {
|
||||
let mut result = KnnBruteForceResult::with_capacity(self.0.knn_bruteforce_entries.len());
|
||||
for (e, (p, _, _, _)) in &self.0.knn_bruteforce_entries {
|
||||
result.insert(e.clone(), p.build().await);
|
||||
}
|
||||
set
|
||||
result
|
||||
}
|
||||
|
||||
pub(crate) fn is_table(&self, tb: &str) -> bool {
|
||||
self.0.table.eq(tb)
|
||||
}
|
||||
|
||||
pub(crate) fn has_knn(&self) -> bool {
|
||||
!self.0.knn_entries.is_empty()
|
||||
pub(crate) fn has_bruteforce_knn(&self) -> bool {
|
||||
!self.0.knn_bruteforce_entries.is_empty()
|
||||
}
|
||||
|
||||
/// Returns `true` if the expression is matching the current iterator.
|
||||
pub(crate) fn is_iterator_expression(&self, ir: IteratorRef, exp: &Expression) -> bool {
|
||||
match self.0.it_entries.get(ir as usize) {
|
||||
pub(crate) fn is_iterator_expression(&self, irf: IteratorRef, exp: &Expression) -> bool {
|
||||
match self.0.it_entries.get(irf as usize) {
|
||||
Some(IteratorEntry::Single(e, ..)) => exp.eq(e.as_ref()),
|
||||
Some(IteratorEntry::Range(es, ..)) => es.contains(exp),
|
||||
_ => false,
|
||||
|
@ -287,13 +349,13 @@ impl QueryExecutor {
|
|||
pub(crate) async fn new_iterator(
|
||||
&self,
|
||||
opt: &Options,
|
||||
it_ref: IteratorRef,
|
||||
irf: IteratorRef,
|
||||
) -> Result<Option<ThingIterator>, Error> {
|
||||
if let Some(it_entry) = self.0.it_entries.get(it_ref as usize) {
|
||||
if let Some(it_entry) = self.0.it_entries.get(irf as usize) {
|
||||
match it_entry {
|
||||
IteratorEntry::Single(_, io) => self.new_single_iterator(opt, it_ref, io).await,
|
||||
IteratorEntry::Range(_, ir, from, to) => {
|
||||
Ok(self.new_range_iterator(opt, *ir, from, to))
|
||||
IteratorEntry::Single(_, io) => self.new_single_iterator(opt, irf, io).await,
|
||||
IteratorEntry::Range(_, ixr, from, to) => {
|
||||
Ok(self.new_range_iterator(opt, *ixr, from, to))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -304,20 +366,18 @@ impl QueryExecutor {
|
|||
async fn new_single_iterator(
|
||||
&self,
|
||||
opt: &Options,
|
||||
it_ref: IteratorRef,
|
||||
irf: IteratorRef,
|
||||
io: &IndexOption,
|
||||
) -> Result<Option<ThingIterator>, Error> {
|
||||
if let Some(ix) = self.get_index_def(io.ix_ref()) {
|
||||
match ix.index {
|
||||
Index::Idx => Ok(self.new_index_iterator(opt, it_ref, ix, io.clone()).await?),
|
||||
Index::Uniq => {
|
||||
Ok(self.new_unique_index_iterator(opt, it_ref, ix, io.clone()).await?)
|
||||
}
|
||||
Index::Idx => Ok(self.new_index_iterator(opt, irf, ix, io.clone()).await?),
|
||||
Index::Uniq => Ok(self.new_unique_index_iterator(opt, irf, ix, io.clone()).await?),
|
||||
Index::Search {
|
||||
..
|
||||
} => self.new_search_index_iterator(it_ref, io.clone()).await,
|
||||
Index::MTree(_) => Ok(self.new_mtree_index_knn_iterator(it_ref)),
|
||||
Index::Hnsw(_) => Ok(self.new_hnsw_index_ann_iterator(it_ref)),
|
||||
} => self.new_search_index_iterator(irf, io.clone()).await,
|
||||
Index::MTree(_) => Ok(self.new_mtree_index_knn_iterator(irf)),
|
||||
Index::Hnsw(_) => Ok(self.new_hnsw_index_ann_iterator(irf)),
|
||||
}
|
||||
} else {
|
||||
Ok(None)
|
||||
|
@ -327,13 +387,14 @@ impl QueryExecutor {
|
|||
async fn new_index_iterator(
|
||||
&self,
|
||||
opt: &Options,
|
||||
it_ref: IteratorRef,
|
||||
irf: IteratorRef,
|
||||
ix: &DefineIndexStatement,
|
||||
io: IndexOption,
|
||||
) -> Result<Option<ThingIterator>, Error> {
|
||||
Ok(match io.op() {
|
||||
IndexOperator::Equality(value) | IndexOperator::Exactness(value) => {
|
||||
Some(ThingIterator::IndexEqual(IndexEqualThingIterator::new(
|
||||
irf,
|
||||
opt.ns(),
|
||||
opt.db(),
|
||||
&ix.what,
|
||||
|
@ -342,11 +403,11 @@ impl QueryExecutor {
|
|||
)))
|
||||
}
|
||||
IndexOperator::Union(value) => Some(ThingIterator::IndexUnion(
|
||||
IndexUnionThingIterator::new(opt.ns(), opt.db(), &ix.what, &ix.name, value),
|
||||
IndexUnionThingIterator::new(irf, opt.ns(), opt.db(), &ix.what, &ix.name, value),
|
||||
)),
|
||||
IndexOperator::Join(ios) => {
|
||||
let iterators = self.build_iterators(opt, it_ref, ios).await?;
|
||||
let index_join = Box::new(IndexJoinThingIterator::new(opt, ix, iterators));
|
||||
let iterators = self.build_iterators(opt, irf, ios).await?;
|
||||
let index_join = Box::new(IndexJoinThingIterator::new(irf, opt, ix, iterators));
|
||||
Some(ThingIterator::IndexJoin(index_join))
|
||||
}
|
||||
_ => None,
|
||||
|
@ -364,6 +425,7 @@ impl QueryExecutor {
|
|||
match ix.index {
|
||||
Index::Idx => {
|
||||
return Some(ThingIterator::IndexRange(IndexRangeThingIterator::new(
|
||||
ir,
|
||||
opt.ns(),
|
||||
opt.db(),
|
||||
&ix.what,
|
||||
|
@ -374,6 +436,7 @@ impl QueryExecutor {
|
|||
}
|
||||
Index::Uniq => {
|
||||
return Some(ThingIterator::UniqueRange(UniqueRangeThingIterator::new(
|
||||
ir,
|
||||
opt.ns(),
|
||||
opt.db(),
|
||||
&ix.what,
|
||||
|
@ -391,20 +454,20 @@ impl QueryExecutor {
|
|||
async fn new_unique_index_iterator(
|
||||
&self,
|
||||
opt: &Options,
|
||||
it_ref: IteratorRef,
|
||||
irf: IteratorRef,
|
||||
ix: &DefineIndexStatement,
|
||||
io: IndexOption,
|
||||
) -> Result<Option<ThingIterator>, Error> {
|
||||
Ok(match io.op() {
|
||||
IndexOperator::Equality(value) => Some(ThingIterator::UniqueEqual(
|
||||
UniqueEqualThingIterator::new(opt.ns(), opt.db(), &ix.what, &ix.name, value),
|
||||
UniqueEqualThingIterator::new(irf, opt.ns(), opt.db(), &ix.what, &ix.name, value),
|
||||
)),
|
||||
IndexOperator::Union(value) => {
|
||||
Some(ThingIterator::UniqueUnion(UniqueUnionThingIterator::new(opt, ix, value)))
|
||||
Some(ThingIterator::UniqueUnion(UniqueUnionThingIterator::new(irf, opt, ix, value)))
|
||||
}
|
||||
IndexOperator::Join(ios) => {
|
||||
let iterators = self.build_iterators(opt, it_ref, ios).await?;
|
||||
let unique_join = Box::new(UniqueJoinThingIterator::new(opt, ix, iterators));
|
||||
let iterators = self.build_iterators(opt, irf, ios).await?;
|
||||
let unique_join = Box::new(UniqueJoinThingIterator::new(irf, opt, ix, iterators));
|
||||
Some(ThingIterator::UniqueJoin(unique_join))
|
||||
}
|
||||
_ => None,
|
||||
|
@ -413,14 +476,15 @@ impl QueryExecutor {
|
|||
|
||||
async fn new_search_index_iterator(
|
||||
&self,
|
||||
it_ref: IteratorRef,
|
||||
irf: IteratorRef,
|
||||
io: IndexOption,
|
||||
) -> Result<Option<ThingIterator>, Error> {
|
||||
if let Some(IteratorEntry::Single(exp, ..)) = self.0.it_entries.get(it_ref as usize) {
|
||||
if let Some(IteratorEntry::Single(exp, ..)) = self.0.it_entries.get(irf as usize) {
|
||||
if let Matches(_, _) = io.op() {
|
||||
if let Some(fti) = self.0.ft_map.get(&io.ix_ref()) {
|
||||
if let Some(fte) = self.0.exp_entries.get(exp) {
|
||||
let it = MatchesThingIterator::new(fti, fte.0.terms_docs.clone()).await?;
|
||||
let it =
|
||||
MatchesThingIterator::new(irf, fti, fte.0.terms_docs.clone()).await?;
|
||||
return Ok(Some(ThingIterator::Matches(it)));
|
||||
}
|
||||
}
|
||||
|
@ -429,24 +493,21 @@ impl QueryExecutor {
|
|||
Ok(None)
|
||||
}
|
||||
|
||||
fn new_mtree_index_knn_iterator(&self, it_ref: IteratorRef) -> Option<ThingIterator> {
|
||||
if let Some(IteratorEntry::Single(exp, ..)) = self.0.it_entries.get(it_ref as usize) {
|
||||
fn new_mtree_index_knn_iterator(&self, irf: IteratorRef) -> Option<ThingIterator> {
|
||||
if let Some(IteratorEntry::Single(exp, ..)) = self.0.it_entries.get(irf as usize) {
|
||||
if let Some(mte) = self.0.mt_entries.get(exp) {
|
||||
let it = DocIdsIterator::new(
|
||||
mte.doc_ids.clone(),
|
||||
mte.res.iter().map(|(d, _)| *d).collect(),
|
||||
);
|
||||
let it = KnnIterator::new(irf, mte.res.clone());
|
||||
return Some(ThingIterator::Knn(it));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn new_hnsw_index_ann_iterator(&self, it_ref: IteratorRef) -> Option<ThingIterator> {
|
||||
if let Some(IteratorEntry::Single(exp, ..)) = self.0.it_entries.get(it_ref as usize) {
|
||||
fn new_hnsw_index_ann_iterator(&self, irf: IteratorRef) -> Option<ThingIterator> {
|
||||
if let Some(IteratorEntry::Single(exp, ..)) = self.0.it_entries.get(irf as usize) {
|
||||
if let Some(he) = self.0.hnsw_entries.get(exp) {
|
||||
let it = ThingsIterator::new(he.res.iter().map(|(thg, _)| thg.clone()).collect());
|
||||
return Some(ThingIterator::Things(it));
|
||||
let it = KnnIterator::new(irf, he.res.clone());
|
||||
return Some(ThingIterator::Knn(it));
|
||||
}
|
||||
}
|
||||
None
|
||||
|
@ -455,12 +516,12 @@ impl QueryExecutor {
|
|||
async fn build_iterators(
|
||||
&self,
|
||||
opt: &Options,
|
||||
it_ref: IteratorRef,
|
||||
irf: IteratorRef,
|
||||
ios: &[IndexOption],
|
||||
) -> Result<VecDeque<ThingIterator>, Error> {
|
||||
let mut iterators = VecDeque::with_capacity(ios.len());
|
||||
for io in ios {
|
||||
if let Some(it) = Box::pin(self.new_single_iterator(opt, it_ref, io)).await? {
|
||||
if let Some(it) = Box::pin(self.new_single_iterator(opt, irf, io)).await? {
|
||||
iterators.push_back(it);
|
||||
}
|
||||
}
|
||||
|
@ -504,9 +565,13 @@ impl QueryExecutor {
|
|||
thg: &Thing,
|
||||
ft: &FtEntry,
|
||||
) -> Result<bool, Error> {
|
||||
let mut run = txn.lock().await;
|
||||
let doc_key: Key = thg.into();
|
||||
if let Some(doc_id) = ft.0.doc_ids.read().await.get_doc_id(&mut run, doc_key).await? {
|
||||
let mut run = txn.lock().await;
|
||||
let di = ft.0.doc_ids.read().await;
|
||||
let doc_id = di.get_doc_id(&mut run, doc_key).await?;
|
||||
drop(di);
|
||||
drop(run);
|
||||
if let Some(doc_id) = doc_id {
|
||||
let term_goals = ft.0.terms_docs.len();
|
||||
// If there is no terms, it can't be a match
|
||||
if term_goals == 0 {
|
||||
|
@ -551,6 +616,7 @@ impl QueryExecutor {
|
|||
let terms = ft.0.terms.read().await;
|
||||
// Extract the terms set from the record
|
||||
let t = ft.0.analyzer.extract_indexing_terms(stk, ctx, opt, txn, &terms, v).await?;
|
||||
drop(terms);
|
||||
Ok(ft.0.query_terms_set.is_subset(&t))
|
||||
}
|
||||
|
||||
|
@ -584,7 +650,7 @@ impl QueryExecutor {
|
|||
) -> Result<Value, Error> {
|
||||
if let Some((e, ft)) = self.get_ft_entry_and_index(&match_ref) {
|
||||
let mut run = txn.lock().await;
|
||||
return ft
|
||||
let res = ft
|
||||
.highlight(
|
||||
&mut run,
|
||||
thg,
|
||||
|
@ -596,6 +662,8 @@ impl QueryExecutor {
|
|||
doc,
|
||||
)
|
||||
.await;
|
||||
drop(run);
|
||||
return res;
|
||||
}
|
||||
Ok(Value::None)
|
||||
}
|
||||
|
@ -609,7 +677,9 @@ impl QueryExecutor {
|
|||
) -> Result<Value, Error> {
|
||||
if let Some((e, ft)) = self.get_ft_entry_and_index(&match_ref) {
|
||||
let mut run = txn.lock().await;
|
||||
return ft.extract_offsets(&mut run, thg, &e.0.query_terms_list, partial).await;
|
||||
let res = ft.extract_offsets(&mut run, thg, &e.0.query_terms_list, partial).await;
|
||||
drop(run);
|
||||
return res;
|
||||
}
|
||||
Ok(Value::None)
|
||||
}
|
||||
|
@ -619,21 +689,30 @@ impl QueryExecutor {
|
|||
txn: &Transaction,
|
||||
match_ref: &Value,
|
||||
rid: &Thing,
|
||||
mut doc_id: Option<DocId>,
|
||||
ir: Option<&IteratorRecord>,
|
||||
) -> Result<Value, Error> {
|
||||
if let Some(e) = self.get_ft_entry(match_ref) {
|
||||
if let Some(scorer) = &e.0.scorer {
|
||||
let mut run = txn.lock().await;
|
||||
let mut doc_id = if let Some(ir) = ir {
|
||||
ir.doc_id()
|
||||
} else {
|
||||
None
|
||||
};
|
||||
if doc_id.is_none() {
|
||||
let key: Key = rid.into();
|
||||
doc_id = e.0.doc_ids.read().await.get_doc_id(&mut run, key).await?;
|
||||
};
|
||||
let di = e.0.doc_ids.read().await;
|
||||
doc_id = di.get_doc_id(&mut run, key).await?;
|
||||
drop(di);
|
||||
}
|
||||
if let Some(doc_id) = doc_id {
|
||||
let score = scorer.score(&mut run, doc_id).await?;
|
||||
if let Some(score) = score {
|
||||
drop(run);
|
||||
return Ok(Value::from(score));
|
||||
}
|
||||
}
|
||||
drop(run);
|
||||
}
|
||||
}
|
||||
Ok(Value::None)
|
||||
|
@ -668,6 +747,7 @@ impl FtEntry {
|
|||
ft.extract_querying_terms(stk, ctx, opt, txn, qs.to_owned()).await?;
|
||||
let mut tx = txn.lock().await;
|
||||
let terms_docs = Arc::new(ft.get_terms_docs(&mut tx, &terms_list).await?);
|
||||
drop(tx);
|
||||
Ok(Some(Self(Arc::new(Inner {
|
||||
index_option: io,
|
||||
doc_ids: ft.doc_ids(),
|
||||
|
@ -686,33 +766,59 @@ impl FtEntry {
|
|||
|
||||
#[derive(Clone)]
|
||||
pub(super) struct MtEntry {
|
||||
doc_ids: Arc<RwLock<DocIds>>,
|
||||
res: VecDeque<(DocId, f64)>,
|
||||
res: VecDeque<KnnIteratorResult>,
|
||||
}
|
||||
|
||||
impl MtEntry {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn new(
|
||||
tx: &mut kvs::Transaction,
|
||||
stk: &mut Stk,
|
||||
ctx: &Context<'_>,
|
||||
opt: &Options,
|
||||
txn: &Transaction,
|
||||
mt: &MTreeIndex,
|
||||
a: &Array,
|
||||
o: &[Number],
|
||||
k: u32,
|
||||
cond: Option<Arc<Cond>>,
|
||||
) -> Result<Self, Error> {
|
||||
let res = mt.knn_search(tx, a, k as usize).await?;
|
||||
let cond_checker = if let Some(cond) = cond {
|
||||
MTreeConditionChecker::new_cond(ctx, opt, txn, cond)
|
||||
} else {
|
||||
MTreeConditionChecker::new(txn)
|
||||
};
|
||||
let res = mt.knn_search(stk, txn, o, k as usize, cond_checker).await?;
|
||||
Ok(Self {
|
||||
res,
|
||||
doc_ids: mt.doc_ids(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(super) struct HnswEntry {
|
||||
res: VecDeque<(Thing, f64)>,
|
||||
res: VecDeque<KnnIteratorResult>,
|
||||
}
|
||||
|
||||
impl HnswEntry {
|
||||
async fn new(h: SharedHnswIndex, a: &Array, n: usize, ef: usize) -> Result<Self, Error> {
|
||||
let res = h.read().await.knn_search(a, n, ef)?;
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn new(
|
||||
stk: &mut Stk,
|
||||
ctx: &Context<'_>,
|
||||
opt: &Options,
|
||||
txn: &Transaction,
|
||||
h: SharedHnswIndex,
|
||||
v: &[Number],
|
||||
n: u32,
|
||||
ef: u32,
|
||||
cond: Option<Arc<Cond>>,
|
||||
) -> Result<Self, Error> {
|
||||
let cond_checker = if let Some(cond) = cond {
|
||||
HnswConditionChecker::new_cond(ctx, opt, txn, cond)
|
||||
} else {
|
||||
HnswConditionChecker::default()
|
||||
};
|
||||
let h = h.read().await;
|
||||
let res = h.knn_search(v, n as usize, ef as usize, stk, cond_checker).await?;
|
||||
drop(h);
|
||||
Ok(Self {
|
||||
res,
|
||||
})
|
||||
|
|
|
@ -1,17 +1,105 @@
|
|||
use crate::dbs::{Options, Transaction};
|
||||
use crate::ctx::Context;
|
||||
use crate::dbs::Options;
|
||||
use crate::err::Error;
|
||||
use crate::idx::docids::{DocId, DocIds};
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::ft::termdocs::TermsDocs;
|
||||
use crate::idx::ft::{FtIndex, HitsIterator};
|
||||
use crate::idx::planner::plan::RangeValue;
|
||||
use crate::key::index::Index;
|
||||
use crate::kvs;
|
||||
use crate::kvs::{Key, Limit, ScanPage};
|
||||
use crate::sql::statements::DefineIndexStatement;
|
||||
use crate::sql::{Array, Ident, Thing, Value};
|
||||
use radix_trie::Trie;
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
pub(crate) type IteratorRef = u16;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct IteratorRecord {
|
||||
irf: IteratorRef,
|
||||
doc_id: Option<DocId>,
|
||||
dist: Option<f64>,
|
||||
}
|
||||
|
||||
impl IteratorRecord {
|
||||
pub(crate) fn irf(&self) -> IteratorRef {
|
||||
self.irf
|
||||
}
|
||||
pub(crate) fn doc_id(&self) -> Option<DocId> {
|
||||
self.doc_id
|
||||
}
|
||||
|
||||
pub(crate) fn dist(&self) -> Option<f64> {
|
||||
self.dist
|
||||
}
|
||||
}
|
||||
impl From<IteratorRef> for IteratorRecord {
|
||||
fn from(irf: IteratorRef) -> Self {
|
||||
IteratorRecord {
|
||||
irf,
|
||||
doc_id: None,
|
||||
dist: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait IteratorBatch {
|
||||
fn empty() -> Self;
|
||||
fn with_capacity(capacity: usize) -> Self;
|
||||
fn from_one(record: CollectorRecord) -> Self;
|
||||
fn add(&mut self, record: CollectorRecord);
|
||||
fn len(&self) -> usize;
|
||||
fn is_empty(&self) -> bool;
|
||||
}
|
||||
|
||||
impl IteratorBatch for Vec<CollectorRecord> {
|
||||
fn empty() -> Self {
|
||||
Vec::from([])
|
||||
}
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Vec::with_capacity(capacity)
|
||||
}
|
||||
fn from_one(record: CollectorRecord) -> Self {
|
||||
Vec::from([record])
|
||||
}
|
||||
|
||||
fn add(&mut self, record: CollectorRecord) {
|
||||
self.push(record)
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
Vec::len(self)
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
Vec::is_empty(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl IteratorBatch for VecDeque<CollectorRecord> {
|
||||
fn empty() -> Self {
|
||||
VecDeque::from([])
|
||||
}
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
VecDeque::with_capacity(capacity)
|
||||
}
|
||||
fn from_one(record: CollectorRecord) -> Self {
|
||||
VecDeque::from([record])
|
||||
}
|
||||
|
||||
fn add(&mut self, record: CollectorRecord) {
|
||||
self.push_back(record)
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
VecDeque::len(self)
|
||||
}
|
||||
fn is_empty(&self) -> bool {
|
||||
VecDeque::is_empty(self)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) enum ThingIterator {
|
||||
IndexEqual(IndexEqualThingIterator),
|
||||
|
@ -23,77 +111,68 @@ pub(crate) enum ThingIterator {
|
|||
UniqueUnion(UniqueUnionThingIterator),
|
||||
UniqueJoin(Box<UniqueJoinThingIterator>),
|
||||
Matches(MatchesThingIterator),
|
||||
Knn(DocIdsIterator),
|
||||
Things(ThingsIterator),
|
||||
Knn(KnnIterator),
|
||||
}
|
||||
|
||||
impl ThingIterator {
|
||||
pub(crate) async fn next_batch<T: ThingCollector>(
|
||||
pub(crate) async fn next_batch<B: IteratorBatch>(
|
||||
&mut self,
|
||||
tx: &Transaction,
|
||||
ctx: &Context<'_>,
|
||||
tx: &mut kvs::Transaction,
|
||||
size: u32,
|
||||
collector: &mut T,
|
||||
) -> Result<usize, Error> {
|
||||
) -> Result<B, Error> {
|
||||
match self {
|
||||
Self::IndexEqual(i) => i.next_batch(tx, size, collector).await,
|
||||
Self::UniqueEqual(i) => i.next_batch(tx, collector).await,
|
||||
Self::IndexRange(i) => i.next_batch(tx, size, collector).await,
|
||||
Self::UniqueRange(i) => i.next_batch(tx, size, collector).await,
|
||||
Self::IndexUnion(i) => i.next_batch(tx, size, collector).await,
|
||||
Self::UniqueUnion(i) => i.next_batch(tx, size, collector).await,
|
||||
Self::Matches(i) => i.next_batch(tx, size, collector).await,
|
||||
Self::Knn(i) => i.next_batch(tx, size, collector).await,
|
||||
Self::IndexJoin(i) => Box::pin(i.next_batch(tx, size, collector)).await,
|
||||
Self::UniqueJoin(i) => Box::pin(i.next_batch(tx, size, collector)).await,
|
||||
Self::Things(i) => Ok(i.next_batch(size, collector)),
|
||||
Self::IndexEqual(i) => i.next_batch(tx, size).await,
|
||||
Self::UniqueEqual(i) => i.next_batch(tx).await,
|
||||
Self::IndexRange(i) => i.next_batch(tx, size).await,
|
||||
Self::UniqueRange(i) => i.next_batch(tx, size).await,
|
||||
Self::IndexUnion(i) => i.next_batch(ctx, tx, size).await,
|
||||
Self::UniqueUnion(i) => i.next_batch(ctx, tx, size).await,
|
||||
Self::Matches(i) => i.next_batch(ctx, tx, size).await,
|
||||
Self::Knn(i) => i.next_batch(ctx, size).await,
|
||||
Self::IndexJoin(i) => Box::pin(i.next_batch(ctx, tx, size)).await,
|
||||
Self::UniqueJoin(i) => Box::pin(i.next_batch(ctx, tx, size)).await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) trait ThingCollector {
|
||||
fn add(&mut self, thing: Thing, doc_id: Option<DocId>);
|
||||
}
|
||||
|
||||
impl ThingCollector for Vec<(Thing, Option<DocId>)> {
|
||||
fn add(&mut self, thing: Thing, doc_id: Option<DocId>) {
|
||||
self.push((thing, doc_id));
|
||||
}
|
||||
}
|
||||
|
||||
impl ThingCollector for VecDeque<(Thing, Option<DocId>)> {
|
||||
fn add(&mut self, thing: Thing, doc_id: Option<DocId>) {
|
||||
self.push_back((thing, doc_id));
|
||||
}
|
||||
}
|
||||
pub(crate) type CollectorRecord = (Thing, IteratorRecord, Option<Value>);
|
||||
|
||||
pub(crate) struct IndexEqualThingIterator {
|
||||
irf: IteratorRef,
|
||||
beg: Vec<u8>,
|
||||
end: Vec<u8>,
|
||||
}
|
||||
|
||||
impl IndexEqualThingIterator {
|
||||
pub(super) fn new(ns: &str, db: &str, ix_what: &Ident, ix_name: &Ident, v: &Value) -> Self {
|
||||
pub(super) fn new(
|
||||
irf: IteratorRef,
|
||||
ns: &str,
|
||||
db: &str,
|
||||
ix_what: &Ident,
|
||||
ix_name: &Ident,
|
||||
v: &Value,
|
||||
) -> Self {
|
||||
let a = Array::from(v.clone());
|
||||
let beg = Index::prefix_ids_beg(ns, db, ix_what, ix_name, &a);
|
||||
let end = Index::prefix_ids_end(ns, db, ix_what, ix_name, &a);
|
||||
Self {
|
||||
irf,
|
||||
beg,
|
||||
end,
|
||||
}
|
||||
}
|
||||
|
||||
async fn next_scan<T: ThingCollector>(
|
||||
txn: &Transaction,
|
||||
async fn next_scan<B: IteratorBatch>(
|
||||
tx: &mut kvs::Transaction,
|
||||
irf: IteratorRef,
|
||||
beg: &mut Vec<u8>,
|
||||
end: &[u8],
|
||||
limit: u32,
|
||||
collector: &mut T,
|
||||
) -> Result<usize, Error> {
|
||||
) -> Result<B, Error> {
|
||||
let min = beg.clone();
|
||||
let max = end.to_owned();
|
||||
let res = txn
|
||||
.lock()
|
||||
.await
|
||||
let res = tx
|
||||
.scan_paged(
|
||||
ScanPage {
|
||||
range: min..max,
|
||||
|
@ -108,18 +187,17 @@ impl IndexEqualThingIterator {
|
|||
key.push(0x00);
|
||||
*beg = key;
|
||||
}
|
||||
let count = res.len();
|
||||
res.into_iter().for_each(|(_, val)| collector.add(val.into(), None));
|
||||
Ok(count)
|
||||
let mut records = B::with_capacity(res.len());
|
||||
res.into_iter().for_each(|(_, val)| records.add((val.into(), irf.into(), None)));
|
||||
Ok(records)
|
||||
}
|
||||
|
||||
async fn next_batch<T: ThingCollector>(
|
||||
async fn next_batch<B: IteratorBatch>(
|
||||
&mut self,
|
||||
txn: &Transaction,
|
||||
tx: &mut kvs::Transaction,
|
||||
limit: u32,
|
||||
collector: &mut T,
|
||||
) -> Result<usize, Error> {
|
||||
Self::next_scan(txn, &mut self.beg, &self.end, limit, collector).await
|
||||
) -> Result<B, Error> {
|
||||
Self::next_scan(tx, self.irf, &mut self.beg, &self.end, limit).await
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -168,11 +246,13 @@ impl RangeScan {
|
|||
}
|
||||
|
||||
pub(crate) struct IndexRangeThingIterator {
|
||||
irf: IteratorRef,
|
||||
r: RangeScan,
|
||||
}
|
||||
|
||||
impl IndexRangeThingIterator {
|
||||
pub(super) fn new(
|
||||
irf: IteratorRef,
|
||||
ns: &str,
|
||||
db: &str,
|
||||
ix_what: &Ident,
|
||||
|
@ -183,6 +263,7 @@ impl IndexRangeThingIterator {
|
|||
let beg = Self::compute_beg(ns, db, ix_what, ix_name, from);
|
||||
let end = Self::compute_end(ns, db, ix_what, ix_name, to);
|
||||
Self {
|
||||
irf,
|
||||
r: RangeScan::new(beg, from.inclusive, end, to.inclusive),
|
||||
}
|
||||
}
|
||||
|
@ -223,17 +304,14 @@ impl IndexRangeThingIterator {
|
|||
}
|
||||
}
|
||||
|
||||
async fn next_batch<T: ThingCollector>(
|
||||
async fn next_batch<B: IteratorBatch>(
|
||||
&mut self,
|
||||
txn: &Transaction,
|
||||
tx: &mut kvs::Transaction,
|
||||
limit: u32,
|
||||
collector: &mut T,
|
||||
) -> Result<usize, Error> {
|
||||
) -> Result<B, Error> {
|
||||
let min = self.r.beg.clone();
|
||||
let max = self.r.end.clone();
|
||||
let res = txn
|
||||
.lock()
|
||||
.await
|
||||
let res = tx
|
||||
.scan_paged(
|
||||
ScanPage {
|
||||
range: min..max,
|
||||
|
@ -247,24 +325,29 @@ impl IndexRangeThingIterator {
|
|||
self.r.beg.clone_from(key);
|
||||
self.r.beg.push(0x00);
|
||||
}
|
||||
let mut count = 0;
|
||||
for (k, v) in res {
|
||||
if self.r.matches(&k) {
|
||||
collector.add(v.into(), None);
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
Ok(count)
|
||||
let mut records = B::with_capacity(res.len());
|
||||
res.into_iter()
|
||||
.filter(|(k, _)| self.r.matches(k))
|
||||
.for_each(|(_, v)| records.add((v.into(), self.irf.into(), None)));
|
||||
Ok(records)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct IndexUnionThingIterator {
|
||||
irf: IteratorRef,
|
||||
values: VecDeque<(Vec<u8>, Vec<u8>)>,
|
||||
current: Option<(Vec<u8>, Vec<u8>)>,
|
||||
}
|
||||
|
||||
impl IndexUnionThingIterator {
|
||||
pub(super) fn new(ns: &str, db: &str, ix_what: &Ident, ix_name: &Ident, a: &Array) -> Self {
|
||||
pub(super) fn new(
|
||||
irf: IteratorRef,
|
||||
ns: &str,
|
||||
db: &str,
|
||||
ix_what: &Ident,
|
||||
ix_name: &Ident,
|
||||
a: &Array,
|
||||
) -> Self {
|
||||
// We create a VecDeque to hold the prefix keys (begin and end) for each value in the array.
|
||||
let mut values: VecDeque<(Vec<u8>, Vec<u8>)> =
|
||||
a.0.iter()
|
||||
|
@ -277,26 +360,30 @@ impl IndexUnionThingIterator {
|
|||
.collect();
|
||||
let current = values.pop_front();
|
||||
Self {
|
||||
irf,
|
||||
values,
|
||||
current,
|
||||
}
|
||||
}
|
||||
|
||||
async fn next_batch<T: ThingCollector>(
|
||||
async fn next_batch<B: IteratorBatch>(
|
||||
&mut self,
|
||||
txn: &Transaction,
|
||||
ctx: &Context<'_>,
|
||||
tx: &mut kvs::Transaction,
|
||||
limit: u32,
|
||||
collector: &mut T,
|
||||
) -> Result<usize, Error> {
|
||||
) -> Result<B, Error> {
|
||||
while let Some(r) = &mut self.current {
|
||||
let count =
|
||||
IndexEqualThingIterator::next_scan(txn, &mut r.0, &r.1, limit, collector).await?;
|
||||
if count != 0 {
|
||||
return Ok(count);
|
||||
if ctx.is_done() {
|
||||
break;
|
||||
}
|
||||
let records: B =
|
||||
IndexEqualThingIterator::next_scan(tx, self.irf, &mut r.0, &r.1, limit).await?;
|
||||
if !records.is_empty() {
|
||||
return Ok(records);
|
||||
}
|
||||
self.current = self.values.pop_front();
|
||||
}
|
||||
Ok(0)
|
||||
Ok(B::empty())
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -307,7 +394,7 @@ struct JoinThingIterator {
|
|||
ix_name: Ident,
|
||||
remote_iterators: VecDeque<ThingIterator>,
|
||||
current_remote: Option<ThingIterator>,
|
||||
current_remote_batch: VecDeque<(Thing, Option<DocId>)>,
|
||||
current_remote_batch: VecDeque<CollectorRecord>,
|
||||
current_local: Option<ThingIterator>,
|
||||
distinct: Trie<Key, bool>,
|
||||
}
|
||||
|
@ -324,7 +411,7 @@ impl JoinThingIterator {
|
|||
ix_what: ix.what.clone(),
|
||||
ix_name: ix.name.clone(),
|
||||
current_remote: None,
|
||||
current_remote_batch: VecDeque::with_capacity(0),
|
||||
current_remote_batch: VecDeque::with_capacity(1),
|
||||
remote_iterators,
|
||||
current_local: None,
|
||||
distinct: Default::default(),
|
||||
|
@ -335,34 +422,37 @@ impl JoinThingIterator {
|
|||
impl JoinThingIterator {
|
||||
async fn next_current_remote_batch(
|
||||
&mut self,
|
||||
tx: &Transaction,
|
||||
ctx: &Context<'_>,
|
||||
tx: &mut kvs::Transaction,
|
||||
limit: u32,
|
||||
) -> Result<bool, Error> {
|
||||
loop {
|
||||
while !ctx.is_done() {
|
||||
if let Some(it) = &mut self.current_remote {
|
||||
self.current_remote_batch.clear();
|
||||
if it.next_batch(tx, limit, &mut self.current_remote_batch).await? > 0 {
|
||||
self.current_remote_batch = it.next_batch(ctx, tx, limit).await?;
|
||||
if !self.current_remote_batch.is_empty() {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
self.current_remote = self.remote_iterators.pop_front();
|
||||
if self.current_remote.is_none() {
|
||||
return Ok(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
async fn next_current_local<F>(
|
||||
&mut self,
|
||||
tx: &Transaction,
|
||||
ctx: &Context<'_>,
|
||||
tx: &mut kvs::Transaction,
|
||||
limit: u32,
|
||||
new_iter: F,
|
||||
) -> Result<bool, Error>
|
||||
where
|
||||
F: Fn(&str, &str, &Ident, &Ident, Value) -> ThingIterator,
|
||||
{
|
||||
loop {
|
||||
while let Some((thing, _)) = self.current_remote_batch.pop_front() {
|
||||
while !ctx.is_done() {
|
||||
while let Some((thing, _, _)) = self.current_remote_batch.pop_front() {
|
||||
let k: Key = (&thing).into();
|
||||
let value = Value::from(thing);
|
||||
if self.distinct.insert(k, true).is_none() {
|
||||
|
@ -371,98 +461,109 @@ impl JoinThingIterator {
|
|||
return Ok(true);
|
||||
}
|
||||
}
|
||||
if !self.next_current_remote_batch(tx, limit).await? {
|
||||
if !self.next_current_remote_batch(ctx, tx, limit).await? {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
async fn next_batch<T: ThingCollector, F>(
|
||||
async fn next_batch<F, B: IteratorBatch>(
|
||||
&mut self,
|
||||
tx: &Transaction,
|
||||
ctx: &Context<'_>,
|
||||
tx: &mut kvs::Transaction,
|
||||
limit: u32,
|
||||
collector: &mut T,
|
||||
new_iter: F,
|
||||
) -> Result<usize, Error>
|
||||
) -> Result<B, Error>
|
||||
where
|
||||
F: Fn(&str, &str, &Ident, &Ident, Value) -> ThingIterator + Copy,
|
||||
{
|
||||
loop {
|
||||
while !ctx.is_done() {
|
||||
if let Some(current_local) = &mut self.current_local {
|
||||
let n = current_local.next_batch(tx, limit, collector).await?;
|
||||
if n > 0 {
|
||||
return Ok(n);
|
||||
let records: B = current_local.next_batch(ctx, tx, limit).await?;
|
||||
if !records.is_empty() {
|
||||
return Ok(records);
|
||||
}
|
||||
}
|
||||
if !self.next_current_local(tx, limit, new_iter).await? {
|
||||
return Ok(0);
|
||||
if !self.next_current_local(ctx, tx, limit, new_iter).await? {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(B::empty())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct IndexJoinThingIterator(JoinThingIterator);
|
||||
pub(crate) struct IndexJoinThingIterator(IteratorRef, JoinThingIterator);
|
||||
|
||||
impl IndexJoinThingIterator {
|
||||
pub(super) fn new(
|
||||
irf: IteratorRef,
|
||||
opt: &Options,
|
||||
ix: &DefineIndexStatement,
|
||||
remote_iterators: VecDeque<ThingIterator>,
|
||||
) -> Self {
|
||||
Self(JoinThingIterator::new(opt, ix, remote_iterators))
|
||||
Self(irf, JoinThingIterator::new(opt, ix, remote_iterators))
|
||||
}
|
||||
|
||||
async fn next_batch<T: ThingCollector>(
|
||||
async fn next_batch<B: IteratorBatch>(
|
||||
&mut self,
|
||||
tx: &Transaction,
|
||||
ctx: &Context<'_>,
|
||||
tx: &mut kvs::Transaction,
|
||||
limit: u32,
|
||||
collector: &mut T,
|
||||
) -> Result<usize, Error> {
|
||||
) -> Result<B, Error> {
|
||||
let new_iter = |ns: &str, db: &str, ix_what: &Ident, ix_name: &Ident, value: Value| {
|
||||
let it = IndexEqualThingIterator::new(ns, db, ix_what, ix_name, &value);
|
||||
let it = IndexEqualThingIterator::new(self.0, ns, db, ix_what, ix_name, &value);
|
||||
ThingIterator::IndexEqual(it)
|
||||
};
|
||||
self.0.next_batch(tx, limit, collector, new_iter).await
|
||||
self.1.next_batch(ctx, tx, limit, new_iter).await
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct UniqueEqualThingIterator {
|
||||
irf: IteratorRef,
|
||||
key: Option<Key>,
|
||||
}
|
||||
|
||||
impl UniqueEqualThingIterator {
|
||||
pub(super) fn new(ns: &str, db: &str, ix_what: &Ident, ix_name: &Ident, v: &Value) -> Self {
|
||||
pub(super) fn new(
|
||||
irf: IteratorRef,
|
||||
ns: &str,
|
||||
db: &str,
|
||||
ix_what: &Ident,
|
||||
ix_name: &Ident,
|
||||
v: &Value,
|
||||
) -> Self {
|
||||
let a = Array::from(v.to_owned());
|
||||
let key = Index::new(ns, db, ix_what, ix_name, &a, None).into();
|
||||
Self {
|
||||
irf,
|
||||
key: Some(key),
|
||||
}
|
||||
}
|
||||
|
||||
async fn next_batch<T: ThingCollector>(
|
||||
async fn next_batch<B: IteratorBatch>(
|
||||
&mut self,
|
||||
txn: &Transaction,
|
||||
collector: &mut T,
|
||||
) -> Result<usize, Error> {
|
||||
let mut count = 0;
|
||||
tx: &mut kvs::Transaction,
|
||||
) -> Result<B, Error> {
|
||||
if let Some(key) = self.key.take() {
|
||||
if let Some(val) = txn.lock().await.get(key).await? {
|
||||
collector.add(val.into(), None);
|
||||
count += 1;
|
||||
if let Some(val) = tx.get(key).await? {
|
||||
let record = (val.into(), self.irf.into(), None);
|
||||
return Ok(B::from_one(record));
|
||||
}
|
||||
}
|
||||
Ok(count)
|
||||
Ok(B::empty())
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct UniqueRangeThingIterator {
|
||||
irf: IteratorRef,
|
||||
r: RangeScan,
|
||||
done: bool,
|
||||
}
|
||||
|
||||
impl UniqueRangeThingIterator {
|
||||
pub(super) fn new(
|
||||
irf: IteratorRef,
|
||||
ns: &str,
|
||||
db: &str,
|
||||
ix_what: &Ident,
|
||||
|
@ -473,6 +574,7 @@ impl UniqueRangeThingIterator {
|
|||
let beg = Self::compute_beg(ns, db, ix_what, ix_name, from);
|
||||
let end = Self::compute_end(ns, db, ix_what, ix_name, to);
|
||||
Self {
|
||||
irf,
|
||||
r: RangeScan::new(beg, from.inclusive, end, to.inclusive),
|
||||
done: false,
|
||||
}
|
||||
|
@ -508,19 +610,17 @@ impl UniqueRangeThingIterator {
|
|||
.unwrap()
|
||||
}
|
||||
|
||||
async fn next_batch<T: ThingCollector>(
|
||||
async fn next_batch<B: IteratorBatch>(
|
||||
&mut self,
|
||||
txn: &Transaction,
|
||||
tx: &mut kvs::Transaction,
|
||||
mut limit: u32,
|
||||
collector: &mut T,
|
||||
) -> Result<usize, Error> {
|
||||
) -> Result<B, Error> {
|
||||
if self.done {
|
||||
return Ok(0);
|
||||
return Ok(B::empty());
|
||||
}
|
||||
let min = self.r.beg.clone();
|
||||
let max = self.r.end.clone();
|
||||
limit += 1;
|
||||
let mut tx = txn.lock().await;
|
||||
let res = tx
|
||||
.scan_paged(
|
||||
ScanPage {
|
||||
|
@ -530,36 +630,40 @@ impl UniqueRangeThingIterator {
|
|||
limit,
|
||||
)
|
||||
.await?;
|
||||
let mut count = 0;
|
||||
let mut records = B::with_capacity(res.values.len());
|
||||
for (k, v) in res.values {
|
||||
limit -= 1;
|
||||
if limit == 0 {
|
||||
self.r.beg = k;
|
||||
return Ok(count);
|
||||
return Ok(records);
|
||||
}
|
||||
if self.r.matches(&k) {
|
||||
collector.add(v.into(), None);
|
||||
count += 1;
|
||||
records.add((v.into(), self.irf.into(), None));
|
||||
}
|
||||
}
|
||||
let end = self.r.end.clone();
|
||||
if self.r.matches(&end) {
|
||||
if let Some(v) = tx.get(end).await? {
|
||||
collector.add(v.into(), None);
|
||||
count += 1;
|
||||
records.add((v.into(), self.irf.into(), None));
|
||||
}
|
||||
}
|
||||
self.done = true;
|
||||
Ok(count)
|
||||
Ok(records)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct UniqueUnionThingIterator {
|
||||
irf: IteratorRef,
|
||||
keys: VecDeque<Key>,
|
||||
}
|
||||
|
||||
impl UniqueUnionThingIterator {
|
||||
pub(super) fn new(opt: &Options, ix: &DefineIndexStatement, a: &Array) -> Self {
|
||||
pub(super) fn new(
|
||||
irf: IteratorRef,
|
||||
opt: &Options,
|
||||
ix: &DefineIndexStatement,
|
||||
a: &Array,
|
||||
) -> Self {
|
||||
// We create a VecDeque to hold the key for each value in the array.
|
||||
let keys: VecDeque<Key> =
|
||||
a.0.iter()
|
||||
|
@ -570,146 +674,147 @@ impl UniqueUnionThingIterator {
|
|||
})
|
||||
.collect();
|
||||
Self {
|
||||
irf,
|
||||
keys,
|
||||
}
|
||||
}
|
||||
|
||||
async fn next_batch<T: ThingCollector>(
|
||||
async fn next_batch<B: IteratorBatch>(
|
||||
&mut self,
|
||||
txn: &Transaction,
|
||||
ctx: &Context<'_>,
|
||||
tx: &mut kvs::Transaction,
|
||||
limit: u32,
|
||||
collector: &mut T,
|
||||
) -> Result<usize, Error> {
|
||||
let mut run = txn.lock().await;
|
||||
let mut count = 0;
|
||||
) -> Result<B, Error> {
|
||||
let limit = limit as usize;
|
||||
let mut results = B::with_capacity(limit.min(self.keys.len()));
|
||||
while let Some(key) = self.keys.pop_front() {
|
||||
if let Some(val) = run.get(key).await? {
|
||||
collector.add(val.into(), None);
|
||||
count += 1;
|
||||
if count >= limit {
|
||||
if ctx.is_done() {
|
||||
break;
|
||||
}
|
||||
if let Some(val) = tx.get(key).await? {
|
||||
results.add((val.into(), self.irf.into(), None));
|
||||
if results.len() >= limit {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(count as usize)
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct UniqueJoinThingIterator(JoinThingIterator);
|
||||
pub(crate) struct UniqueJoinThingIterator(IteratorRef, JoinThingIterator);
|
||||
|
||||
impl UniqueJoinThingIterator {
|
||||
pub(super) fn new(
|
||||
irf: IteratorRef,
|
||||
opt: &Options,
|
||||
ix: &DefineIndexStatement,
|
||||
remote_iterators: VecDeque<ThingIterator>,
|
||||
) -> Self {
|
||||
Self(JoinThingIterator::new(opt, ix, remote_iterators))
|
||||
Self(irf, JoinThingIterator::new(opt, ix, remote_iterators))
|
||||
}
|
||||
|
||||
async fn next_batch<T: ThingCollector>(
|
||||
async fn next_batch<B: IteratorBatch>(
|
||||
&mut self,
|
||||
tx: &Transaction,
|
||||
ctx: &Context<'_>,
|
||||
tx: &mut kvs::Transaction,
|
||||
limit: u32,
|
||||
collector: &mut T,
|
||||
) -> Result<usize, Error> {
|
||||
) -> Result<B, Error> {
|
||||
let new_iter = |ns: &str, db: &str, ix_what: &Ident, ix_name: &Ident, value: Value| {
|
||||
let it = UniqueEqualThingIterator::new(ns, db, ix_what, ix_name, &value);
|
||||
let it = UniqueEqualThingIterator::new(self.0, ns, db, ix_what, ix_name, &value);
|
||||
ThingIterator::UniqueEqual(it)
|
||||
};
|
||||
self.0.next_batch(tx, limit, collector, new_iter).await
|
||||
self.1.next_batch(ctx, tx, limit, new_iter).await
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct MatchesThingIterator {
|
||||
irf: IteratorRef,
|
||||
hits_left: usize,
|
||||
hits: Option<HitsIterator>,
|
||||
}
|
||||
|
||||
impl MatchesThingIterator {
|
||||
pub(super) async fn new(fti: &FtIndex, terms_docs: TermsDocs) -> Result<Self, Error> {
|
||||
pub(super) async fn new(
|
||||
irf: IteratorRef,
|
||||
fti: &FtIndex,
|
||||
terms_docs: TermsDocs,
|
||||
) -> Result<Self, Error> {
|
||||
let hits = fti.new_hits_iterator(terms_docs)?;
|
||||
let hits_left = if let Some(h) = &hits {
|
||||
h.len()
|
||||
} else {
|
||||
0
|
||||
};
|
||||
Ok(Self {
|
||||
irf,
|
||||
hits,
|
||||
hits_left,
|
||||
})
|
||||
}
|
||||
|
||||
async fn next_batch<T: ThingCollector>(
|
||||
async fn next_batch<B: IteratorBatch>(
|
||||
&mut self,
|
||||
txn: &Transaction,
|
||||
ctx: &Context<'_>,
|
||||
tx: &mut kvs::Transaction,
|
||||
limit: u32,
|
||||
collector: &mut T,
|
||||
) -> Result<usize, Error> {
|
||||
let mut count = 0;
|
||||
) -> Result<B, Error> {
|
||||
if let Some(hits) = &mut self.hits {
|
||||
let mut run = txn.lock().await;
|
||||
while limit > count {
|
||||
if let Some((thg, doc_id)) = hits.next(&mut run).await? {
|
||||
collector.add(thg, Some(doc_id));
|
||||
count += 1;
|
||||
let limit = limit as usize;
|
||||
let mut records = B::with_capacity(limit.min(self.hits_left));
|
||||
while limit > records.len() && !ctx.is_done() {
|
||||
if let Some((thg, doc_id)) = hits.next(tx).await? {
|
||||
let ir = IteratorRecord {
|
||||
irf: self.irf,
|
||||
doc_id: Some(doc_id),
|
||||
dist: None,
|
||||
};
|
||||
records.add((thg, ir, None));
|
||||
self.hits_left -= 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(records)
|
||||
} else {
|
||||
Ok(B::empty())
|
||||
}
|
||||
Ok(count as usize)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct DocIdsIterator {
|
||||
doc_ids: Arc<RwLock<DocIds>>,
|
||||
res: VecDeque<DocId>,
|
||||
pub(crate) type KnnIteratorResult = (Thing, f64, Option<Value>);
|
||||
|
||||
pub(crate) struct KnnIterator {
|
||||
irf: IteratorRef,
|
||||
res: VecDeque<KnnIteratorResult>,
|
||||
}
|
||||
|
||||
impl DocIdsIterator {
|
||||
pub(super) fn new(doc_ids: Arc<RwLock<DocIds>>, res: VecDeque<DocId>) -> Self {
|
||||
impl KnnIterator {
|
||||
pub(super) fn new(irf: IteratorRef, res: VecDeque<KnnIteratorResult>) -> Self {
|
||||
Self {
|
||||
doc_ids,
|
||||
irf,
|
||||
res,
|
||||
}
|
||||
}
|
||||
async fn next_batch<T: ThingCollector>(
|
||||
async fn next_batch<B: IteratorBatch>(
|
||||
&mut self,
|
||||
txn: &Transaction,
|
||||
ctx: &Context<'_>,
|
||||
limit: u32,
|
||||
collector: &mut T,
|
||||
) -> Result<usize, Error> {
|
||||
let mut tx = txn.lock().await;
|
||||
let mut count = 0;
|
||||
while limit > count {
|
||||
if let Some(doc_id) = self.res.pop_front() {
|
||||
if let Some(doc_key) =
|
||||
self.doc_ids.read().await.get_doc_key(&mut tx, doc_id).await?
|
||||
{
|
||||
collector.add(doc_key.into(), Some(doc_id));
|
||||
count += 1;
|
||||
}
|
||||
) -> Result<B, Error> {
|
||||
let limit = limit as usize;
|
||||
let mut records = B::with_capacity(limit.min(self.res.len()));
|
||||
while limit > records.len() && !ctx.is_done() {
|
||||
if let Some((thing, dist, val)) = self.res.pop_front() {
|
||||
let ir = IteratorRecord {
|
||||
irf: self.irf,
|
||||
doc_id: None,
|
||||
dist: Some(dist),
|
||||
};
|
||||
records.add((thing, ir, val));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(count as usize)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct ThingsIterator {
|
||||
res: VecDeque<Thing>,
|
||||
}
|
||||
|
||||
impl ThingsIterator {
|
||||
pub(super) fn new(res: VecDeque<Thing>) -> Self {
|
||||
Self {
|
||||
res,
|
||||
}
|
||||
}
|
||||
fn next_batch<T: ThingCollector>(&mut self, limit: u32, collector: &mut T) -> usize {
|
||||
let mut count = 0;
|
||||
while limit > count {
|
||||
if let Some(thg) = self.res.pop_front() {
|
||||
collector.add(thg, None);
|
||||
count += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
count as usize
|
||||
Ok(records)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use crate::sql::{Number, Thing};
|
||||
use crate::sql::{Expression, Number, Thing};
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::{BTreeMap, HashSet, VecDeque};
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
|
@ -25,10 +26,14 @@ impl KnnPriorityList {
|
|||
if i.check_add(&dist) {
|
||||
i.add(dist, thing);
|
||||
}
|
||||
drop(i);
|
||||
}
|
||||
|
||||
pub(super) async fn build(&self) -> HashSet<Arc<Thing>> {
|
||||
self.0.lock().await.build()
|
||||
pub(super) async fn build(&self) -> HashMap<Arc<Thing>, Number> {
|
||||
let l = self.0.lock().await;
|
||||
let r = l.build();
|
||||
drop(l);
|
||||
r
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -73,21 +78,21 @@ impl Inner {
|
|||
}
|
||||
}
|
||||
|
||||
fn build(&self) -> HashSet<Arc<Thing>> {
|
||||
let mut sorted_docs = VecDeque::with_capacity(self.knn);
|
||||
fn build(&self) -> HashMap<Arc<Thing>, Number> {
|
||||
let mut result = HashMap::with_capacity(self.knn);
|
||||
#[cfg(debug_assertions)]
|
||||
debug!("self.priority_list: {:?} - self.docs: {:?}", self.priority_list, self.docs);
|
||||
let mut left = self.knn;
|
||||
for docs in self.priority_list.values() {
|
||||
for (dist, docs) in &self.priority_list {
|
||||
let dl = docs.len();
|
||||
if dl > left {
|
||||
for doc_id in docs.iter().take(left) {
|
||||
sorted_docs.push_back(doc_id);
|
||||
result.insert(doc_id.clone(), dist.clone());
|
||||
}
|
||||
break;
|
||||
}
|
||||
for doc_id in docs {
|
||||
sorted_docs.push_back(doc_id);
|
||||
result.insert(doc_id.clone(), dist.clone());
|
||||
}
|
||||
left -= dl;
|
||||
// We don't expect anymore result, we can leave
|
||||
|
@ -95,12 +100,55 @@ impl Inner {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
debug!("sorted_docs: {:?}", sorted_docs);
|
||||
let mut r = HashSet::with_capacity(sorted_docs.len());
|
||||
for id in sorted_docs {
|
||||
r.insert(id.clone());
|
||||
}
|
||||
r
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct KnnBruteForceResult {
|
||||
exp: HashMap<Arc<Expression>, usize>,
|
||||
res: Vec<HashMap<Arc<Thing>, Number>>,
|
||||
}
|
||||
|
||||
impl KnnBruteForceResult {
|
||||
pub(super) fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
exp: HashMap::with_capacity(capacity),
|
||||
res: Vec::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn insert(&mut self, e: Arc<Expression>, m: HashMap<Arc<Thing>, Number>) {
|
||||
self.exp.insert(e.clone(), self.res.len());
|
||||
self.res.push(m);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct KnnBruteForceResults(Arc<std::collections::HashMap<String, KnnBruteForceResult>>);
|
||||
|
||||
impl From<std::collections::HashMap<String, KnnBruteForceResult>> for KnnBruteForceResults {
|
||||
fn from(map: std::collections::HashMap<String, KnnBruteForceResult>) -> Self {
|
||||
Self(map.into())
|
||||
}
|
||||
}
|
||||
impl KnnBruteForceResults {
|
||||
pub(super) fn contains(&self, exp: &Expression, thg: &Thing) -> bool {
|
||||
if let Some(result) = self.0.get(thg.tb.as_str()) {
|
||||
if let Some(&pos) = result.exp.get(exp) {
|
||||
if let Some(things) = result.res.get(pos) {
|
||||
return things.contains_key(thg);
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub(crate) fn get_dist(&self, pos: usize, thg: &Thing) -> Option<Number> {
|
||||
if let Some(result) = self.0.get(thg.tb.as_str()) {
|
||||
if let Some(things) = result.res.get(pos) {
|
||||
return things.get(thg).cloned();
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,23 +1,24 @@
|
|||
pub mod checker;
|
||||
pub(crate) mod executor;
|
||||
pub(crate) mod iterators;
|
||||
pub(in crate::idx) mod knn;
|
||||
pub(crate) mod plan;
|
||||
mod tree;
|
||||
pub(in crate::idx) mod rewriter;
|
||||
pub(in crate::idx) mod tree;
|
||||
|
||||
use crate::ctx::Context;
|
||||
use crate::dbs::{Iterable, Iterator, Options, Transaction};
|
||||
use crate::err::Error;
|
||||
use crate::idx::planner::executor::{
|
||||
InnerQueryExecutor, IteratorEntry, IteratorRef, QueryExecutor,
|
||||
};
|
||||
use crate::idx::planner::executor::{InnerQueryExecutor, IteratorEntry, QueryExecutor};
|
||||
use crate::idx::planner::iterators::IteratorRef;
|
||||
use crate::idx::planner::knn::KnnBruteForceResults;
|
||||
use crate::idx::planner::plan::{Plan, PlanBuilder};
|
||||
use crate::idx::planner::tree::Tree;
|
||||
use crate::sql::with::With;
|
||||
use crate::sql::{Cond, Expression, Table, Thing};
|
||||
use crate::sql::{Cond, Table};
|
||||
use reblessive::tree::Stk;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicU8, Ordering};
|
||||
use std::sync::Arc;
|
||||
|
||||
pub(crate) struct QueryPlanner<'a> {
|
||||
opt: &'a Options,
|
||||
|
@ -66,6 +67,8 @@ impl<'a> QueryPlanner<'a> {
|
|||
&t,
|
||||
tree.index_map,
|
||||
tree.knn_expressions,
|
||||
tree.knn_brute_force_expressions,
|
||||
tree.knn_condition,
|
||||
)
|
||||
.await?;
|
||||
match PlanBuilder::build(tree.root, self.with, tree.with_indexes)? {
|
||||
|
@ -149,27 +152,24 @@ impl<'a> QueryPlanner<'a> {
|
|||
let pos = self.iteration_index.fetch_add(1, Ordering::Relaxed);
|
||||
match self.iteration_workflow.get(pos as usize) {
|
||||
Some(IterationStage::BuildKnn) => {
|
||||
Some(IterationStage::Iterate(Some(self.build_knn_sets().await)))
|
||||
Some(IterationStage::Iterate(Some(self.build_bruteforce_knn_results().await)))
|
||||
}
|
||||
is => is.cloned(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn build_knn_sets(&self) -> KnnSets {
|
||||
async fn build_bruteforce_knn_results(&self) -> KnnBruteForceResults {
|
||||
let mut results = HashMap::with_capacity(self.executors.len());
|
||||
for (tb, exe) in &self.executors {
|
||||
results.insert(tb.clone(), exe.build_knn_set().await);
|
||||
results.insert(tb.clone(), exe.build_bruteforce_knn_result().await);
|
||||
}
|
||||
Arc::new(results)
|
||||
results.into()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) type KnnSet = HashMap<Arc<Expression>, HashSet<Arc<Thing>>>;
|
||||
pub(crate) type KnnSets = Arc<HashMap<String, KnnSet>>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) enum IterationStage {
|
||||
Iterate(Option<KnnSets>),
|
||||
Iterate(Option<KnnBruteForceResults>),
|
||||
CollectKnn,
|
||||
BuildKnn,
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@ use crate::idx::ft::MatchRef;
|
|||
use crate::idx::planner::tree::{GroupRef, IdiomPosition, IndexRef, Node};
|
||||
use crate::sql::statements::DefineIndexStatement;
|
||||
use crate::sql::with::With;
|
||||
use crate::sql::{Array, Expression, Idiom, Object};
|
||||
use crate::sql::{Array, Expression, Idiom, Number, Object};
|
||||
use crate::sql::{Operator, Value};
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
|
@ -166,7 +166,7 @@ pub(super) enum Plan {
|
|||
#[derive(Debug, Eq, PartialEq, Hash, Clone)]
|
||||
pub(super) struct IndexOption {
|
||||
/// A reference o the index definition
|
||||
ir: IndexRef,
|
||||
ix_ref: IndexRef,
|
||||
id: Idiom,
|
||||
id_pos: IdiomPosition,
|
||||
op: Arc<IndexOperator>,
|
||||
|
@ -180,14 +180,19 @@ pub(super) enum IndexOperator {
|
|||
Join(Vec<IndexOption>),
|
||||
RangePart(Operator, Value),
|
||||
Matches(String, Option<MatchRef>),
|
||||
Knn(Array, u32),
|
||||
Ann(Array, usize, usize),
|
||||
Knn(Arc<Vec<Number>>, u32),
|
||||
Ann(Arc<Vec<Number>>, u32, u32),
|
||||
}
|
||||
|
||||
impl IndexOption {
|
||||
pub(super) fn new(ir: IndexRef, id: Idiom, id_pos: IdiomPosition, op: IndexOperator) -> Self {
|
||||
pub(super) fn new(
|
||||
ix_ref: IndexRef,
|
||||
id: Idiom,
|
||||
id_pos: IdiomPosition,
|
||||
op: IndexOperator,
|
||||
) -> Self {
|
||||
Self {
|
||||
ir,
|
||||
ix_ref,
|
||||
id,
|
||||
id_pos,
|
||||
op: Arc::new(op),
|
||||
|
@ -199,7 +204,7 @@ impl IndexOption {
|
|||
}
|
||||
|
||||
pub(super) fn ix_ref(&self) -> IndexRef {
|
||||
self.ir
|
||||
self.ix_ref
|
||||
}
|
||||
|
||||
pub(super) fn op(&self) -> &IndexOperator {
|
||||
|
@ -225,7 +230,7 @@ impl IndexOption {
|
|||
|
||||
pub(crate) fn explain(&self, ix_def: &[DefineIndexStatement]) -> Value {
|
||||
let mut e = HashMap::new();
|
||||
if let Some(ix) = ix_def.get(self.ir as usize) {
|
||||
if let Some(ix) = ix_def.get(self.ix_ref as usize) {
|
||||
e.insert("index", Value::from(ix.name.0.to_owned()));
|
||||
}
|
||||
match self.op() {
|
||||
|
@ -259,12 +264,16 @@ impl IndexOption {
|
|||
e.insert("value", v.to_owned());
|
||||
}
|
||||
IndexOperator::Knn(a, k) => {
|
||||
e.insert("operator", Value::from(format!("<{}>", k)));
|
||||
e.insert("value", Value::Array(a.clone()));
|
||||
let op = Value::from(Operator::Knn(*k, None).to_string());
|
||||
let val = Value::Array(Array::from(a.as_ref().clone()));
|
||||
e.insert("operator", op);
|
||||
e.insert("value", val);
|
||||
}
|
||||
IndexOperator::Ann(a, n, ef) => {
|
||||
e.insert("operator", Value::from(format!("<{},{}>", n, ef)));
|
||||
e.insert("value", Value::Array(a.clone()));
|
||||
IndexOperator::Ann(a, k, ef) => {
|
||||
let op = Value::from(Operator::Ann(*k, *ef).to_string());
|
||||
let val = Value::Array(Array::from(a.as_ref().clone()));
|
||||
e.insert("operator", op);
|
||||
e.insert("value", val);
|
||||
}
|
||||
};
|
||||
Value::from(e)
|
||||
|
|
232
core/src/idx/planner/rewriter.rs
Normal file
232
core/src/idx/planner/rewriter.rs
Normal file
|
@ -0,0 +1,232 @@
|
|||
use crate::idx::planner::executor::KnnExpressions;
|
||||
use crate::sql::{
|
||||
Array, Cast, Cond, Expression, Function, Id, Idiom, Model, Object, Part, Range, Thing, Value,
|
||||
};
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::ops::Bound;
|
||||
|
||||
pub(super) struct KnnConditionRewriter<'a>(&'a KnnExpressions);
|
||||
|
||||
impl<'a> KnnConditionRewriter<'a> {
|
||||
// This function rebuild the same condition, but replaces any KnnExpression by a `true` value
|
||||
pub(super) fn build(expressions: &'a KnnExpressions, cond: &Cond) -> Option<Cond> {
|
||||
let b = Self(expressions);
|
||||
b.eval_value(&cond.0).map(Cond)
|
||||
}
|
||||
|
||||
fn eval_value(&self, v: &Value) -> Option<Value> {
|
||||
match v {
|
||||
Value::Array(a) => self.eval_value_array(a),
|
||||
Value::Object(o) => self.eval_value_object(o),
|
||||
Value::Thing(t) => self.eval_value_thing(t),
|
||||
Value::Idiom(i) => self.eval_value_idiom(i),
|
||||
Value::Cast(c) => self.eval_value_cast(c),
|
||||
Value::Range(r) => self.eval_value_range(r),
|
||||
Value::Edges(_)
|
||||
| Value::Block(_)
|
||||
| Value::Future(_)
|
||||
| Value::Subquery(_)
|
||||
| Value::Query(_) => None,
|
||||
Value::Function(f) => self.eval_value_function(f),
|
||||
Value::Expression(e) => self.eval_value_expression(e),
|
||||
Value::Model(m) => self.eval_value_model(m),
|
||||
Value::None
|
||||
| Value::Null
|
||||
| Value::Bool(_)
|
||||
| Value::Number(_)
|
||||
| Value::Strand(_)
|
||||
| Value::Duration(_)
|
||||
| Value::Datetime(_)
|
||||
| Value::Uuid(_)
|
||||
| Value::Geometry(_)
|
||||
| Value::Bytes(_)
|
||||
| Value::Param(_)
|
||||
| Value::Table(_)
|
||||
| Value::Mock(_)
|
||||
| Value::Regex(_)
|
||||
| Value::Constant(_) => Some(v.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
fn eval_value_array(&self, a: &Array) -> Option<Value> {
|
||||
self.eval_array(a).map(|a| a.into())
|
||||
}
|
||||
|
||||
fn eval_array(&self, a: &Array) -> Option<Array> {
|
||||
self.eval_values(&a.0).map(|v| v.into())
|
||||
}
|
||||
|
||||
fn eval_values(&self, values: &[Value]) -> Option<Vec<Value>> {
|
||||
let mut new_vec = Vec::with_capacity(values.len());
|
||||
for v in values {
|
||||
if let Some(v) = self.eval_value(v) {
|
||||
new_vec.push(v);
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(new_vec)
|
||||
}
|
||||
|
||||
fn eval_value_object(&self, o: &Object) -> Option<Value> {
|
||||
self.eval_object(o).map(|o| o.into())
|
||||
}
|
||||
fn eval_object(&self, o: &Object) -> Option<Object> {
|
||||
let mut new_o = BTreeMap::new();
|
||||
for (k, v) in &o.0 {
|
||||
if let Some(v) = self.eval_value(v) {
|
||||
new_o.insert(k.to_owned(), v);
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(new_o.into())
|
||||
}
|
||||
|
||||
fn eval_value_thing(&self, t: &Thing) -> Option<Value> {
|
||||
self.eval_thing(t).map(|t| t.into())
|
||||
}
|
||||
|
||||
fn eval_thing(&self, t: &Thing) -> Option<Thing> {
|
||||
self.eval_id(&t.id).map(|id| Thing {
|
||||
tb: t.tb.clone(),
|
||||
id,
|
||||
})
|
||||
}
|
||||
|
||||
fn eval_id(&self, id: &Id) -> Option<Id> {
|
||||
match id {
|
||||
Id::Number(_) | Id::String(_) | Id::Generate(_) => Some(id.clone()),
|
||||
Id::Array(a) => self.eval_array(a).map(Id::Array),
|
||||
Id::Object(o) => self.eval_object(o).map(Id::Object),
|
||||
}
|
||||
}
|
||||
|
||||
fn eval_value_idiom(&self, i: &Idiom) -> Option<Value> {
|
||||
self.eval_idiom(i).map(|i| i.into())
|
||||
}
|
||||
|
||||
fn eval_idiom(&self, i: &Idiom) -> Option<Idiom> {
|
||||
let mut new_i = Vec::with_capacity(i.0.len());
|
||||
for p in &i.0 {
|
||||
if let Some(p) = self.eval_part(p) {
|
||||
new_i.push(p);
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(new_i.into())
|
||||
}
|
||||
fn eval_part(&self, p: &Part) -> Option<Part> {
|
||||
match p {
|
||||
Part::All
|
||||
| Part::Flatten
|
||||
| Part::Last
|
||||
| Part::First
|
||||
| Part::Field(_)
|
||||
| Part::Index(_) => Some(p.clone()),
|
||||
Part::Where(v) => self.eval_value(v).map(Part::Where),
|
||||
Part::Graph(_) => None,
|
||||
Part::Value(v) => self.eval_value(v).map(Part::Value),
|
||||
Part::Start(v) => self.eval_value(v).map(Part::Start),
|
||||
Part::Method(n, p) => self.eval_values(p).map(|v| Part::Method(n.clone(), v)),
|
||||
}
|
||||
}
|
||||
|
||||
fn eval_value_cast(&self, c: &Cast) -> Option<Value> {
|
||||
self.eval_cast(c).map(|c| c.into())
|
||||
}
|
||||
|
||||
fn eval_cast(&self, c: &Cast) -> Option<Cast> {
|
||||
self.eval_value(&c.1).map(|v| Cast(c.0.clone(), v))
|
||||
}
|
||||
|
||||
fn eval_value_range(&self, r: &Range) -> Option<Value> {
|
||||
self.eval_range(r).map(|r| r.into())
|
||||
}
|
||||
|
||||
fn eval_range(&self, r: &Range) -> Option<Range> {
|
||||
if let Some(beg) = self.eval_bound(&r.beg) {
|
||||
self.eval_bound(&r.end).map(|end| Range {
|
||||
tb: r.tb.clone(),
|
||||
beg,
|
||||
end,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn eval_bound(&self, b: &Bound<Id>) -> Option<Bound<Id>> {
|
||||
match b {
|
||||
Bound::Included(id) => self.eval_id(id).map(Bound::Included),
|
||||
Bound::Excluded(id) => self.eval_id(id).map(Bound::Excluded),
|
||||
Bound::Unbounded => Some(Bound::Unbounded),
|
||||
}
|
||||
}
|
||||
|
||||
fn eval_value_function(&self, f: &Function) -> Option<Value> {
|
||||
self.eval_function(f).map(|f| f.into())
|
||||
}
|
||||
|
||||
fn eval_function(&self, f: &Function) -> Option<Function> {
|
||||
match f {
|
||||
Function::Normal(s, args) => {
|
||||
self.eval_values(args).map(|args| Function::Normal(s.clone(), args))
|
||||
}
|
||||
Function::Custom(s, args) => {
|
||||
self.eval_values(args).map(|args| Function::Custom(s.clone(), args))
|
||||
}
|
||||
Function::Script(s, args) => {
|
||||
self.eval_values(args).map(|args| Function::Script(s.clone(), args))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn eval_value_model(&self, m: &Model) -> Option<Value> {
|
||||
self.eval_model(m).map(|m| m.into())
|
||||
}
|
||||
|
||||
fn eval_model(&self, m: &Model) -> Option<Model> {
|
||||
self.eval_values(&m.args).map(|args| Model {
|
||||
name: m.name.clone(),
|
||||
version: m.version.clone(),
|
||||
args,
|
||||
})
|
||||
}
|
||||
|
||||
fn eval_value_expression(&self, e: &Expression) -> Option<Value> {
|
||||
if self.0.contains(e) {
|
||||
return Some(Value::Bool(true));
|
||||
}
|
||||
self.eval_expression(e).map(|e| e.into())
|
||||
}
|
||||
|
||||
fn eval_expression(&self, e: &Expression) -> Option<Expression> {
|
||||
match e {
|
||||
Expression::Unary {
|
||||
o,
|
||||
v,
|
||||
} => self.eval_value(v).map(|v| Expression::Unary {
|
||||
o: o.clone(),
|
||||
v,
|
||||
}),
|
||||
Expression::Binary {
|
||||
l,
|
||||
o,
|
||||
r,
|
||||
} => {
|
||||
if let Some(l) = self.eval_value(l) {
|
||||
self.eval_value(r).map(|r| Expression::Binary {
|
||||
l,
|
||||
o: o.clone(),
|
||||
r,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,10 +1,13 @@
|
|||
use crate::ctx::Context;
|
||||
use crate::dbs::{Options, Transaction};
|
||||
use crate::err::Error;
|
||||
use crate::idx::planner::executor::{AnnExpressions, KnnExpressions};
|
||||
use crate::idx::planner::executor::{
|
||||
KnnBruteForceExpression, KnnBruteForceExpressions, KnnExpressions,
|
||||
};
|
||||
use crate::idx::planner::plan::{IndexOperator, IndexOption};
|
||||
use crate::idx::planner::rewriter::KnnConditionRewriter;
|
||||
use crate::kvs;
|
||||
use crate::sql::index::{Distance, Index};
|
||||
use crate::sql::index::Index;
|
||||
use crate::sql::statements::{DefineFieldStatement, DefineIndexStatement};
|
||||
use crate::sql::{
|
||||
Array, Cond, Expression, Idiom, Kind, Number, Operator, Part, Subquery, Table, Value, With,
|
||||
|
@ -18,6 +21,8 @@ pub(super) struct Tree {
|
|||
pub(super) index_map: IndexesMap,
|
||||
pub(super) with_indexes: Vec<IndexRef>,
|
||||
pub(super) knn_expressions: KnnExpressions,
|
||||
pub(super) knn_brute_force_expressions: KnnBruteForceExpressions,
|
||||
pub(super) knn_condition: Option<Cond>,
|
||||
}
|
||||
|
||||
impl Tree {
|
||||
|
@ -35,11 +40,18 @@ impl Tree {
|
|||
let mut b = TreeBuilder::new(ctx, opt, txn, table, with);
|
||||
if let Some(cond) = cond {
|
||||
let root = b.eval_value(stk, 0, &cond.0).await?;
|
||||
let knn_condition = if b.knn_expressions.is_empty() {
|
||||
None
|
||||
} else {
|
||||
KnnConditionRewriter::build(&b.knn_expressions, cond)
|
||||
};
|
||||
Ok(Some(Self {
|
||||
root,
|
||||
index_map: b.index_map,
|
||||
with_indexes: b.with_indexes,
|
||||
knn_expressions: b.knn_expressions,
|
||||
knn_brute_force_expressions: b.knn_brute_force_expressions,
|
||||
knn_condition,
|
||||
}))
|
||||
} else {
|
||||
Ok(None)
|
||||
|
@ -59,8 +71,8 @@ struct TreeBuilder<'a> {
|
|||
resolved_idioms: HashMap<Idiom, Node>,
|
||||
index_map: IndexesMap,
|
||||
with_indexes: Vec<IndexRef>,
|
||||
knn_brute_force_expressions: HashMap<Arc<Expression>, KnnBruteForceExpression>,
|
||||
knn_expressions: KnnExpressions,
|
||||
ann_expressions: AnnExpressions,
|
||||
idioms_record_options: HashMap<Idiom, RecordOptions>,
|
||||
group_sequence: GroupRef,
|
||||
}
|
||||
|
@ -98,8 +110,8 @@ impl<'a> TreeBuilder<'a> {
|
|||
resolved_idioms: Default::default(),
|
||||
index_map: Default::default(),
|
||||
with_indexes,
|
||||
knn_brute_force_expressions: Default::default(),
|
||||
knn_expressions: Default::default(),
|
||||
ann_expressions: Default::default(),
|
||||
idioms_record_options: Default::default(),
|
||||
group_sequence: 0,
|
||||
}
|
||||
|
@ -201,9 +213,11 @@ impl<'a> TreeBuilder<'a> {
|
|||
}
|
||||
// Try to detect an indexed record field
|
||||
if let Some(ro) = self.resolve_record_field(&mut tx, schema.fields.as_ref(), i).await? {
|
||||
drop(tx);
|
||||
return Ok(Node::RecordField(i.clone(), ro));
|
||||
}
|
||||
}
|
||||
drop(tx);
|
||||
Ok(Node::NonIndexedField(i.clone()))
|
||||
}
|
||||
|
||||
|
@ -329,9 +343,9 @@ impl<'a> TreeBuilder<'a> {
|
|||
remote_irs,
|
||||
)?;
|
||||
} else if let Some(id) = left.is_non_indexed_field() {
|
||||
self.eval_knn(id, &right, &exp)?;
|
||||
self.eval_bruteforce_knn(id, &right, &exp)?;
|
||||
} else if let Some(id) = right.is_non_indexed_field() {
|
||||
self.eval_knn(id, &left, &exp)?;
|
||||
self.eval_bruteforce_knn(id, &left, &exp)?;
|
||||
}
|
||||
let re = ResolvedExpression {
|
||||
group,
|
||||
|
@ -393,8 +407,8 @@ impl<'a> TreeBuilder<'a> {
|
|||
Index::Search {
|
||||
..
|
||||
} => Self::eval_matches_operator(op, n),
|
||||
Index::MTree(_) => self.eval_indexed_knn(e, op, n, id)?,
|
||||
Index::Hnsw(_) => self.eval_indexed_ann(e, op, n, id)?,
|
||||
Index::MTree(_) => self.eval_mtree_knn(e, op, n)?,
|
||||
Index::Hnsw(_) => self.eval_hnsw_knn(e, op, n)?,
|
||||
};
|
||||
if let Some(op) = op {
|
||||
let io = IndexOption::new(*ir, id.clone(), p, op);
|
||||
|
@ -427,61 +441,51 @@ impl<'a> TreeBuilder<'a> {
|
|||
None
|
||||
}
|
||||
|
||||
fn eval_indexed_knn(
|
||||
fn eval_mtree_knn(
|
||||
&mut self,
|
||||
exp: &Arc<Expression>,
|
||||
op: &Operator,
|
||||
n: &Node,
|
||||
id: &Idiom,
|
||||
) -> Result<Option<IndexOperator>, Error> {
|
||||
if let Operator::Knn(k, d) = op {
|
||||
if let Operator::Knn(k, None) = op {
|
||||
if let Node::Computed(v) = n {
|
||||
let vec: Vec<Number> = v.as_ref().try_into()?;
|
||||
self.knn_expressions.insert(
|
||||
exp.clone(),
|
||||
(*k, id.clone(), Arc::new(vec), d.clone().unwrap_or(Distance::Euclidean)),
|
||||
);
|
||||
if let Value::Array(a) = v.as_ref() {
|
||||
match d {
|
||||
None | Some(Distance::Euclidean) | Some(Distance::Manhattan) => {
|
||||
return Ok(Some(IndexOperator::Knn(a.clone(), *k)))
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let vec: Arc<Vec<Number>> = Arc::new(v.as_ref().try_into()?);
|
||||
self.knn_expressions.insert(exp.clone());
|
||||
return Ok(Some(IndexOperator::Knn(vec, *k)));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn eval_indexed_ann(
|
||||
fn eval_hnsw_knn(
|
||||
&mut self,
|
||||
exp: &Arc<Expression>,
|
||||
op: &Operator,
|
||||
nd: &Node,
|
||||
id: &Idiom,
|
||||
n: &Node,
|
||||
) -> Result<Option<IndexOperator>, Error> {
|
||||
if let Operator::Ann(n, ef) = op {
|
||||
if let Node::Computed(v) = nd {
|
||||
let vec: Vec<Number> = v.as_ref().try_into()?;
|
||||
let n = *n as usize;
|
||||
let ef = *ef as usize;
|
||||
self.ann_expressions.insert(exp.clone(), (n, id.clone(), Arc::new(vec), ef));
|
||||
if let Value::Array(a) = v.as_ref() {
|
||||
return Ok(Some(IndexOperator::Ann(a.clone(), n, ef)));
|
||||
}
|
||||
if let Operator::Ann(k, ef) = op {
|
||||
if let Node::Computed(v) = n {
|
||||
let vec: Arc<Vec<Number>> = Arc::new(v.as_ref().try_into()?);
|
||||
self.knn_expressions.insert(exp.clone());
|
||||
return Ok(Some(IndexOperator::Ann(vec, *k, *ef)));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn eval_knn(&mut self, id: &Idiom, val: &Node, exp: &Arc<Expression>) -> Result<(), Error> {
|
||||
if let Operator::Knn(k, d) = exp.operator() {
|
||||
fn eval_bruteforce_knn(
|
||||
&mut self,
|
||||
id: &Idiom,
|
||||
val: &Node,
|
||||
exp: &Arc<Expression>,
|
||||
) -> Result<(), Error> {
|
||||
if let Operator::Knn(k, Some(d)) = exp.operator() {
|
||||
if let Node::Computed(v) = val {
|
||||
let vec: Vec<Number> = v.as_ref().try_into()?;
|
||||
self.knn_expressions.insert(
|
||||
let vec: Arc<Vec<Number>> = Arc::new(v.as_ref().try_into()?);
|
||||
self.knn_expressions.insert(exp.clone());
|
||||
self.knn_brute_force_expressions.insert(
|
||||
exp.clone(),
|
||||
(*k, id.clone(), Arc::new(vec), d.clone().unwrap_or(Distance::Euclidean)),
|
||||
KnnBruteForceExpression::new(*k, id.clone(), vec, d.clone()),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
56
core/src/idx/trees/hnsw/docs.rs
Normal file
56
core/src/idx/trees/hnsw/docs.rs
Normal file
|
@ -0,0 +1,56 @@
|
|||
use crate::idx::docids::DocId;
|
||||
use crate::kvs::Key;
|
||||
use crate::sql::Thing;
|
||||
use radix_trie::Trie;
|
||||
use roaring::RoaringTreemap;
|
||||
|
||||
#[derive(Default)]
|
||||
pub(in crate::idx) struct HnswDocs {
|
||||
doc_ids: Trie<Key, DocId>,
|
||||
ids_doc: Vec<Option<Thing>>,
|
||||
available: RoaringTreemap,
|
||||
}
|
||||
|
||||
impl HnswDocs {
|
||||
pub(super) fn resolve(&mut self, rid: &Thing) -> DocId {
|
||||
let doc_key: Key = rid.into();
|
||||
if let Some(doc_id) = self.doc_ids.get(&doc_key) {
|
||||
*doc_id
|
||||
} else {
|
||||
let doc_id = self.next_doc_id();
|
||||
self.ids_doc.push(Some(rid.clone()));
|
||||
self.doc_ids.insert(doc_key, doc_id);
|
||||
doc_id
|
||||
}
|
||||
}
|
||||
|
||||
fn next_doc_id(&mut self) -> DocId {
|
||||
if let Some(doc_id) = self.available.iter().next() {
|
||||
self.available.remove(doc_id);
|
||||
doc_id
|
||||
} else {
|
||||
self.ids_doc.len() as DocId
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) fn get_thing(&self, doc_id: DocId) -> Option<&Thing> {
|
||||
if let Some(r) = self.ids_doc.get(doc_id as usize) {
|
||||
r.as_ref()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
pub(super) fn remove(&mut self, rid: &Thing) -> Option<DocId> {
|
||||
let doc_key: Key = rid.into();
|
||||
if let Some(doc_id) = self.doc_ids.remove(&doc_key) {
|
||||
let n = doc_id as usize;
|
||||
if n < self.ids_doc.len() {
|
||||
self.ids_doc[n] = None;
|
||||
}
|
||||
self.available.insert(doc_id);
|
||||
Some(doc_id)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
58
core/src/idx/trees/hnsw/elements.rs
Normal file
58
core/src/idx/trees/hnsw/elements.rs
Normal file
|
@ -0,0 +1,58 @@
|
|||
use crate::idx::trees::hnsw::ElementId;
|
||||
use crate::idx::trees::vector::SharedVector;
|
||||
use crate::sql::index::Distance;
|
||||
use hashbrown::HashMap;
|
||||
|
||||
pub(super) struct HnswElements {
|
||||
elements: HashMap<ElementId, SharedVector>,
|
||||
next_element_id: ElementId,
|
||||
dist: Distance,
|
||||
}
|
||||
|
||||
impl HnswElements {
|
||||
pub(super) fn new(dist: Distance) -> Self {
|
||||
Self {
|
||||
elements: Default::default(),
|
||||
next_element_id: 0,
|
||||
dist,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn next_element_id(&self) -> ElementId {
|
||||
self.next_element_id
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(super) fn len(&self) -> usize {
|
||||
self.elements.len()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(super) fn contains(&self, e_id: &ElementId) -> bool {
|
||||
self.elements.contains_key(e_id)
|
||||
}
|
||||
|
||||
pub(super) fn inc_next_element_id(&mut self) {
|
||||
self.next_element_id += 1;
|
||||
}
|
||||
|
||||
pub(super) fn insert(&mut self, id: ElementId, pt: SharedVector) {
|
||||
self.elements.insert(id, pt);
|
||||
}
|
||||
|
||||
pub(super) fn get_vector(&self, e_id: &ElementId) -> Option<&SharedVector> {
|
||||
self.elements.get(e_id)
|
||||
}
|
||||
|
||||
pub(super) fn distance(&self, a: &SharedVector, b: &SharedVector) -> f64 {
|
||||
self.dist.calculate(a, b)
|
||||
}
|
||||
|
||||
pub(super) fn get_distance(&self, q: &SharedVector, e_id: &ElementId) -> Option<f64> {
|
||||
self.elements.get(e_id).map(|e_pt| self.dist.calculate(e_pt, q))
|
||||
}
|
||||
|
||||
pub(super) fn remove(&mut self, e_id: &ElementId) {
|
||||
self.elements.remove(e_id);
|
||||
}
|
||||
}
|
201
core/src/idx/trees/hnsw/flavor.rs
Normal file
201
core/src/idx/trees/hnsw/flavor.rs
Normal file
|
@ -0,0 +1,201 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::planner::checker::HnswConditionChecker;
|
||||
use crate::idx::trees::dynamicset::{ArraySet, HashBrownSet};
|
||||
use crate::idx::trees::hnsw::docs::HnswDocs;
|
||||
use crate::idx::trees::hnsw::index::VecDocs;
|
||||
use crate::idx::trees::hnsw::{ElementId, Hnsw, HnswSearch};
|
||||
use crate::idx::trees::vector::SharedVector;
|
||||
use crate::sql::index::HnswParams;
|
||||
use reblessive::tree::Stk;
|
||||
|
||||
pub(super) type ASet<const N: usize> = ArraySet<ElementId, N>;
|
||||
pub(super) type HSet = HashBrownSet<ElementId>;
|
||||
|
||||
pub(super) enum HnswFlavor {
|
||||
H5_9(Hnsw<ASet<9>, ASet<5>>),
|
||||
H5_17(Hnsw<ASet<17>, ASet<5>>),
|
||||
H5_25(Hnsw<ASet<25>, ASet<5>>),
|
||||
H5set(Hnsw<HSet, ASet<5>>),
|
||||
H9_17(Hnsw<ASet<17>, ASet<9>>),
|
||||
H9_25(Hnsw<ASet<25>, ASet<9>>),
|
||||
H9set(Hnsw<HSet, ASet<9>>),
|
||||
H13_25(Hnsw<ASet<25>, ASet<13>>),
|
||||
H13set(Hnsw<HSet, ASet<13>>),
|
||||
H17set(Hnsw<HSet, ASet<17>>),
|
||||
H21set(Hnsw<HSet, ASet<21>>),
|
||||
H25set(Hnsw<HSet, ASet<25>>),
|
||||
H29set(Hnsw<HSet, ASet<29>>),
|
||||
Hset(Hnsw<HSet, HSet>),
|
||||
}
|
||||
|
||||
impl HnswFlavor {
|
||||
pub(super) fn new(p: &HnswParams) -> Self {
|
||||
match p.m {
|
||||
1..=4 => match p.m0 {
|
||||
1..=8 => Self::H5_9(Hnsw::<ASet<9>, ASet<5>>::new(p)),
|
||||
9..=16 => Self::H5_17(Hnsw::<ASet<17>, ASet<5>>::new(p)),
|
||||
17..=24 => Self::H5_25(Hnsw::<ASet<25>, ASet<5>>::new(p)),
|
||||
_ => Self::H5set(Hnsw::<HSet, ASet<5>>::new(p)),
|
||||
},
|
||||
5..=8 => match p.m0 {
|
||||
1..=16 => Self::H9_17(Hnsw::<ASet<17>, ASet<9>>::new(p)),
|
||||
17..=24 => Self::H9_25(Hnsw::<ASet<25>, ASet<9>>::new(p)),
|
||||
_ => Self::H9set(Hnsw::<HSet, ASet<9>>::new(p)),
|
||||
},
|
||||
9..=12 => match p.m0 {
|
||||
17..=24 => Self::H13_25(Hnsw::<ASet<25>, ASet<13>>::new(p)),
|
||||
_ => Self::H13set(Hnsw::<HSet, ASet<13>>::new(p)),
|
||||
},
|
||||
13..=16 => Self::H17set(Hnsw::<HSet, ASet<17>>::new(p)),
|
||||
17..=20 => Self::H21set(Hnsw::<HSet, ASet<21>>::new(p)),
|
||||
21..=24 => Self::H25set(Hnsw::<HSet, ASet<25>>::new(p)),
|
||||
25..=28 => Self::H29set(Hnsw::<HSet, ASet<29>>::new(p)),
|
||||
_ => Self::Hset(Hnsw::<HSet, HSet>::new(p)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn insert(&mut self, q_pt: SharedVector) -> ElementId {
|
||||
match self {
|
||||
HnswFlavor::H5_9(h) => h.insert(q_pt),
|
||||
HnswFlavor::H5_17(h) => h.insert(q_pt),
|
||||
HnswFlavor::H5_25(h) => h.insert(q_pt),
|
||||
HnswFlavor::H5set(h) => h.insert(q_pt),
|
||||
HnswFlavor::H9_17(h) => h.insert(q_pt),
|
||||
HnswFlavor::H9_25(h) => h.insert(q_pt),
|
||||
HnswFlavor::H9set(h) => h.insert(q_pt),
|
||||
HnswFlavor::H13_25(h) => h.insert(q_pt),
|
||||
HnswFlavor::H13set(h) => h.insert(q_pt),
|
||||
HnswFlavor::H17set(h) => h.insert(q_pt),
|
||||
HnswFlavor::H21set(h) => h.insert(q_pt),
|
||||
HnswFlavor::H25set(h) => h.insert(q_pt),
|
||||
HnswFlavor::H29set(h) => h.insert(q_pt),
|
||||
HnswFlavor::Hset(h) => h.insert(q_pt),
|
||||
}
|
||||
}
|
||||
pub(super) fn remove(&mut self, e_id: ElementId) -> bool {
|
||||
match self {
|
||||
HnswFlavor::H5_9(h) => h.remove(e_id),
|
||||
HnswFlavor::H5_17(h) => h.remove(e_id),
|
||||
HnswFlavor::H5_25(h) => h.remove(e_id),
|
||||
HnswFlavor::H5set(h) => h.remove(e_id),
|
||||
HnswFlavor::H9_17(h) => h.remove(e_id),
|
||||
HnswFlavor::H9_25(h) => h.remove(e_id),
|
||||
HnswFlavor::H9set(h) => h.remove(e_id),
|
||||
HnswFlavor::H13_25(h) => h.remove(e_id),
|
||||
HnswFlavor::H13set(h) => h.remove(e_id),
|
||||
HnswFlavor::H17set(h) => h.remove(e_id),
|
||||
HnswFlavor::H21set(h) => h.remove(e_id),
|
||||
HnswFlavor::H25set(h) => h.remove(e_id),
|
||||
HnswFlavor::H29set(h) => h.remove(e_id),
|
||||
HnswFlavor::Hset(h) => h.remove(e_id),
|
||||
}
|
||||
}
|
||||
pub(super) fn knn_search(&self, search: &HnswSearch) -> Vec<(f64, ElementId)> {
|
||||
match self {
|
||||
HnswFlavor::H5_9(h) => h.knn_search(search),
|
||||
HnswFlavor::H5_17(h) => h.knn_search(search),
|
||||
HnswFlavor::H5_25(h) => h.knn_search(search),
|
||||
HnswFlavor::H5set(h) => h.knn_search(search),
|
||||
HnswFlavor::H9_17(h) => h.knn_search(search),
|
||||
HnswFlavor::H9_25(h) => h.knn_search(search),
|
||||
HnswFlavor::H9set(h) => h.knn_search(search),
|
||||
HnswFlavor::H13_25(h) => h.knn_search(search),
|
||||
HnswFlavor::H13set(h) => h.knn_search(search),
|
||||
HnswFlavor::H17set(h) => h.knn_search(search),
|
||||
HnswFlavor::H21set(h) => h.knn_search(search),
|
||||
HnswFlavor::H25set(h) => h.knn_search(search),
|
||||
HnswFlavor::H29set(h) => h.knn_search(search),
|
||||
HnswFlavor::Hset(h) => h.knn_search(search),
|
||||
}
|
||||
}
|
||||
pub(super) async fn knn_search_checked(
|
||||
&self,
|
||||
search: &HnswSearch,
|
||||
hnsw_docs: &HnswDocs,
|
||||
vec_docs: &VecDocs,
|
||||
stk: &mut Stk,
|
||||
chk: &mut HnswConditionChecker<'_>,
|
||||
) -> Result<Vec<(f64, ElementId)>, Error> {
|
||||
match self {
|
||||
HnswFlavor::H5_9(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
HnswFlavor::H5_17(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
HnswFlavor::H5_25(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
HnswFlavor::H5set(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
HnswFlavor::H9_17(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
HnswFlavor::H9_25(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
HnswFlavor::H9set(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
HnswFlavor::H13_25(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
HnswFlavor::H13set(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
HnswFlavor::H17set(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
HnswFlavor::H21set(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
HnswFlavor::H25set(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
HnswFlavor::H29set(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
HnswFlavor::Hset(h) => {
|
||||
h.knn_search_checked(search, hnsw_docs, vec_docs, stk, chk).await
|
||||
}
|
||||
}
|
||||
}
|
||||
pub(super) fn get_vector(&self, e_id: &ElementId) -> Option<&SharedVector> {
|
||||
match self {
|
||||
HnswFlavor::H5_9(h) => h.get_vector(e_id),
|
||||
HnswFlavor::H5_17(h) => h.get_vector(e_id),
|
||||
HnswFlavor::H5_25(h) => h.get_vector(e_id),
|
||||
HnswFlavor::H5set(h) => h.get_vector(e_id),
|
||||
HnswFlavor::H9_17(h) => h.get_vector(e_id),
|
||||
HnswFlavor::H9_25(h) => h.get_vector(e_id),
|
||||
HnswFlavor::H9set(h) => h.get_vector(e_id),
|
||||
HnswFlavor::H13_25(h) => h.get_vector(e_id),
|
||||
HnswFlavor::H13set(h) => h.get_vector(e_id),
|
||||
HnswFlavor::H17set(h) => h.get_vector(e_id),
|
||||
HnswFlavor::H21set(h) => h.get_vector(e_id),
|
||||
HnswFlavor::H25set(h) => h.get_vector(e_id),
|
||||
HnswFlavor::H29set(h) => h.get_vector(e_id),
|
||||
HnswFlavor::Hset(h) => h.get_vector(e_id),
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
pub(super) fn check_hnsw_properties(&self, expected_count: usize) {
|
||||
match self {
|
||||
HnswFlavor::H5_9(h) => h.check_hnsw_properties(expected_count),
|
||||
HnswFlavor::H5_17(h) => h.check_hnsw_properties(expected_count),
|
||||
HnswFlavor::H5_25(h) => h.check_hnsw_properties(expected_count),
|
||||
HnswFlavor::H5set(h) => h.check_hnsw_properties(expected_count),
|
||||
HnswFlavor::H9_17(h) => h.check_hnsw_properties(expected_count),
|
||||
HnswFlavor::H9_25(h) => h.check_hnsw_properties(expected_count),
|
||||
HnswFlavor::H9set(h) => h.check_hnsw_properties(expected_count),
|
||||
HnswFlavor::H13_25(h) => h.check_hnsw_properties(expected_count),
|
||||
HnswFlavor::H13set(h) => h.check_hnsw_properties(expected_count),
|
||||
HnswFlavor::H17set(h) => h.check_hnsw_properties(expected_count),
|
||||
HnswFlavor::H21set(h) => h.check_hnsw_properties(expected_count),
|
||||
HnswFlavor::H25set(h) => h.check_hnsw_properties(expected_count),
|
||||
HnswFlavor::H29set(h) => h.check_hnsw_properties(expected_count),
|
||||
HnswFlavor::Hset(h) => h.check_hnsw_properties(expected_count),
|
||||
}
|
||||
}
|
||||
}
|
208
core/src/idx/trees/hnsw/index.rs
Normal file
208
core/src/idx/trees/hnsw/index.rs
Normal file
|
@ -0,0 +1,208 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::planner::checker::HnswConditionChecker;
|
||||
use crate::idx::planner::iterators::KnnIteratorResult;
|
||||
use crate::idx::trees::hnsw::docs::HnswDocs;
|
||||
use crate::idx::trees::hnsw::elements::HnswElements;
|
||||
use crate::idx::trees::hnsw::flavor::HnswFlavor;
|
||||
use crate::idx::trees::hnsw::{ElementId, HnswSearch};
|
||||
use crate::idx::trees::knn::{Ids64, KnnResult, KnnResultBuilder};
|
||||
use crate::idx::trees::vector::{SharedVector, Vector};
|
||||
use crate::sql::index::{HnswParams, VectorType};
|
||||
use crate::sql::{Number, Thing, Value};
|
||||
use hashbrown::hash_map::Entry;
|
||||
use hashbrown::HashMap;
|
||||
use reblessive::tree::Stk;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
pub struct HnswIndex {
|
||||
dim: usize,
|
||||
vector_type: VectorType,
|
||||
hnsw: HnswFlavor,
|
||||
docs: HnswDocs,
|
||||
vec_docs: VecDocs,
|
||||
}
|
||||
|
||||
pub(super) type VecDocs = HashMap<SharedVector, (Ids64, ElementId)>;
|
||||
|
||||
pub(super) struct HnswCheckedSearchContext<'a> {
|
||||
elements: &'a HnswElements,
|
||||
docs: &'a HnswDocs,
|
||||
vec_docs: &'a VecDocs,
|
||||
pt: &'a SharedVector,
|
||||
ef: usize,
|
||||
}
|
||||
|
||||
impl<'a> HnswCheckedSearchContext<'a> {
|
||||
pub(super) fn new(
|
||||
elements: &'a HnswElements,
|
||||
docs: &'a HnswDocs,
|
||||
vec_docs: &'a VecDocs,
|
||||
pt: &'a SharedVector,
|
||||
ef: usize,
|
||||
) -> Self {
|
||||
Self {
|
||||
elements,
|
||||
docs,
|
||||
vec_docs,
|
||||
pt,
|
||||
ef,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn pt(&self) -> &SharedVector {
|
||||
self.pt
|
||||
}
|
||||
|
||||
pub(super) fn ef(&self) -> usize {
|
||||
self.ef
|
||||
}
|
||||
|
||||
pub(super) fn docs(&self) -> &HnswDocs {
|
||||
self.docs
|
||||
}
|
||||
|
||||
pub(super) fn get_docs(&self, pt: &SharedVector) -> Option<&Ids64> {
|
||||
self.vec_docs.get(pt).map(|(doc_ids, _)| doc_ids)
|
||||
}
|
||||
|
||||
pub(super) fn elements(&self) -> &HnswElements {
|
||||
self.elements
|
||||
}
|
||||
}
|
||||
|
||||
impl HnswIndex {
|
||||
pub fn new(p: &HnswParams) -> Self {
|
||||
Self {
|
||||
dim: p.dimension as usize,
|
||||
vector_type: p.vector_type,
|
||||
hnsw: HnswFlavor::new(p),
|
||||
docs: HnswDocs::default(),
|
||||
vec_docs: HashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_document(&mut self, rid: &Thing, content: &Vec<Value>) -> Result<(), Error> {
|
||||
// Resolve the doc_id
|
||||
let doc_id = self.docs.resolve(rid);
|
||||
// Index the values
|
||||
for value in content {
|
||||
// Extract the vector
|
||||
let vector = Vector::try_from_value(self.vector_type, self.dim, value)?;
|
||||
vector.check_dimension(self.dim)?;
|
||||
self.insert(vector.into(), doc_id);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) fn insert(&mut self, o: SharedVector, d: DocId) {
|
||||
match self.vec_docs.entry(o) {
|
||||
Entry::Occupied(mut e) => {
|
||||
let (docs, element_id) = e.get_mut();
|
||||
if let Some(new_docs) = docs.insert(d) {
|
||||
let element_id = *element_id;
|
||||
e.insert((new_docs, element_id));
|
||||
}
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
let o = e.key().clone();
|
||||
let element_id = self.hnsw.insert(o);
|
||||
e.insert((Ids64::One(d), element_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn remove(&mut self, o: SharedVector, d: DocId) {
|
||||
if let Entry::Occupied(mut e) = self.vec_docs.entry(o) {
|
||||
let (docs, e_id) = e.get_mut();
|
||||
if let Some(new_docs) = docs.remove(d) {
|
||||
let e_id = *e_id;
|
||||
if new_docs.is_empty() {
|
||||
e.remove();
|
||||
self.hnsw.remove(e_id);
|
||||
} else {
|
||||
e.insert((new_docs, e_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn remove_document(
|
||||
&mut self,
|
||||
rid: &Thing,
|
||||
content: &Vec<Value>,
|
||||
) -> Result<(), Error> {
|
||||
if let Some(doc_id) = self.docs.remove(rid) {
|
||||
for v in content {
|
||||
// Extract the vector
|
||||
let vector = Vector::try_from_value(self.vector_type, self.dim, v)?;
|
||||
vector.check_dimension(self.dim)?;
|
||||
// Remove the vector
|
||||
self.remove(vector.into(), doc_id);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn knn_search(
|
||||
&self,
|
||||
pt: &[Number],
|
||||
k: usize,
|
||||
ef: usize,
|
||||
stk: &mut Stk,
|
||||
mut chk: HnswConditionChecker<'_>,
|
||||
) -> Result<VecDeque<KnnIteratorResult>, Error> {
|
||||
// Extract the vector
|
||||
let vector: SharedVector = Vector::try_from_vector(self.vector_type, pt)?.into();
|
||||
vector.check_dimension(self.dim)?;
|
||||
let search = HnswSearch::new(vector, k, ef);
|
||||
// Do the search
|
||||
let result = self.search(&search, stk, &mut chk).await?;
|
||||
let res = chk.convert_result(&self.docs, result.docs).await?;
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
pub(super) async fn search(
|
||||
&self,
|
||||
search: &HnswSearch,
|
||||
stk: &mut Stk,
|
||||
chk: &mut HnswConditionChecker<'_>,
|
||||
) -> Result<KnnResult, Error> {
|
||||
// Do the search
|
||||
let neighbors = match chk {
|
||||
HnswConditionChecker::Hnsw(_) => self.hnsw.knn_search(search),
|
||||
HnswConditionChecker::HnswCondition(_) => {
|
||||
self.hnsw.knn_search_checked(search, &self.docs, &self.vec_docs, stk, chk).await?
|
||||
}
|
||||
};
|
||||
Ok(self.build_result(neighbors, search.k, chk))
|
||||
}
|
||||
|
||||
fn build_result(
|
||||
&self,
|
||||
neighbors: Vec<(f64, ElementId)>,
|
||||
n: usize,
|
||||
chk: &mut HnswConditionChecker<'_>,
|
||||
) -> KnnResult {
|
||||
let mut builder = KnnResultBuilder::new(n);
|
||||
for (e_dist, e_id) in neighbors {
|
||||
if builder.check_add(e_dist) {
|
||||
if let Some(v) = self.hnsw.get_vector(&e_id) {
|
||||
if let Some((docs, _)) = self.vec_docs.get(v) {
|
||||
let evicted_docs = builder.add(e_dist, docs);
|
||||
chk.expires(evicted_docs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
builder.build(
|
||||
#[cfg(debug_assertions)]
|
||||
HashMap::new(),
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(super) fn check_hnsw_properties(&self, expected_count: usize) {
|
||||
self.hnsw.check_hnsw_properties(expected_count)
|
||||
}
|
||||
}
|
|
@ -1,10 +1,14 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::planner::checker::HnswConditionChecker;
|
||||
use crate::idx::trees::dynamicset::DynamicSet;
|
||||
use crate::idx::trees::graph::UndirectedGraph;
|
||||
use crate::idx::trees::hnsw::heuristic::Heuristic;
|
||||
use crate::idx::trees::hnsw::index::HnswCheckedSearchContext;
|
||||
use crate::idx::trees::hnsw::{ElementId, HnswElements};
|
||||
use crate::idx::trees::knn::DoublePriorityQueue;
|
||||
use crate::idx::trees::vector::SharedVector;
|
||||
use hashbrown::HashSet;
|
||||
use reblessive::tree::Stk;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) struct HnswLayer<S>
|
||||
|
@ -40,7 +44,7 @@ where
|
|||
pub(super) fn search_single(
|
||||
&self,
|
||||
elements: &HnswElements,
|
||||
q: &SharedVector,
|
||||
pt: &SharedVector,
|
||||
ep_dist: f64,
|
||||
ep_id: ElementId,
|
||||
ef: usize,
|
||||
|
@ -48,51 +52,67 @@ where
|
|||
let visited = HashSet::from([ep_id]);
|
||||
let candidates = DoublePriorityQueue::from(ep_dist, ep_id);
|
||||
let w = candidates.clone();
|
||||
self.search(elements, q, candidates, visited, w, ef)
|
||||
self.search(elements, pt, candidates, visited, w, ef)
|
||||
}
|
||||
|
||||
pub(super) async fn search_single_checked(
|
||||
&self,
|
||||
search: &HnswCheckedSearchContext<'_>,
|
||||
ep_pt: &SharedVector,
|
||||
ep_dist: f64,
|
||||
ep_id: ElementId,
|
||||
stk: &mut Stk,
|
||||
chk: &mut HnswConditionChecker<'_>,
|
||||
) -> Result<DoublePriorityQueue, Error> {
|
||||
let visited = HashSet::from([ep_id]);
|
||||
let candidates = DoublePriorityQueue::from(ep_dist, ep_id);
|
||||
let mut w = DoublePriorityQueue::default();
|
||||
Self::add_if_truthy(search, &mut w, ep_pt, ep_dist, ep_id, stk, chk).await?;
|
||||
self.search_checked(search, candidates, visited, w, stk, chk).await
|
||||
}
|
||||
|
||||
pub(super) fn search_multi(
|
||||
&self,
|
||||
elements: &HnswElements,
|
||||
q: &SharedVector,
|
||||
pt: &SharedVector,
|
||||
candidates: DoublePriorityQueue,
|
||||
ef: usize,
|
||||
) -> DoublePriorityQueue {
|
||||
let w = candidates.clone();
|
||||
let visited = w.to_set();
|
||||
self.search(elements, q, candidates, visited, w, ef)
|
||||
self.search(elements, pt, candidates, visited, w, ef)
|
||||
}
|
||||
|
||||
pub(super) fn search_single_ignore_ep(
|
||||
&self,
|
||||
elements: &HnswElements,
|
||||
q: &SharedVector,
|
||||
pt: &SharedVector,
|
||||
ep_id: ElementId,
|
||||
) -> Option<(f64, ElementId)> {
|
||||
let visited = HashSet::from([ep_id]);
|
||||
let candidates = DoublePriorityQueue::from(0.0, ep_id);
|
||||
let w = candidates.clone();
|
||||
let q = self.search(elements, q, candidates, visited, w, 1);
|
||||
let q = self.search(elements, pt, candidates, visited, w, 1);
|
||||
q.peek_first()
|
||||
}
|
||||
|
||||
pub(super) fn search_multi_ignore_ep(
|
||||
&self,
|
||||
elements: &HnswElements,
|
||||
q: &SharedVector,
|
||||
pt: &SharedVector,
|
||||
ep_id: ElementId,
|
||||
ef: usize,
|
||||
efc: usize,
|
||||
) -> DoublePriorityQueue {
|
||||
let visited = HashSet::from([ep_id]);
|
||||
let candidates = DoublePriorityQueue::from(0.0, ep_id);
|
||||
let w = DoublePriorityQueue::default();
|
||||
self.search(elements, q, candidates, visited, w, ef)
|
||||
self.search(elements, pt, candidates, visited, w, efc)
|
||||
}
|
||||
|
||||
pub(super) fn search(
|
||||
&self,
|
||||
elements: &HnswElements,
|
||||
q: &SharedVector,
|
||||
pt: &SharedVector,
|
||||
mut candidates: DoublePriorityQueue,
|
||||
mut visited: HashSet<ElementId>,
|
||||
mut w: DoublePriorityQueue,
|
||||
|
@ -109,15 +129,59 @@ where
|
|||
}
|
||||
if let Some(neighbourhood) = self.graph.get_edges(&doc) {
|
||||
for &e_id in neighbourhood.iter() {
|
||||
if visited.insert(e_id) {
|
||||
if let Some(e_pt) = elements.get_vector(&e_id) {
|
||||
let e_dist = elements.distance(e_pt, q);
|
||||
if e_dist < f_dist || w.len() < ef {
|
||||
candidates.push(e_dist, e_id);
|
||||
w.push(e_dist, e_id);
|
||||
if w.len() > ef {
|
||||
w.pop_last();
|
||||
}
|
||||
// Did we already visit it?
|
||||
if !visited.insert(e_id) {
|
||||
continue;
|
||||
}
|
||||
if let Some(e_pt) = elements.get_vector(&e_id) {
|
||||
let e_dist = elements.distance(e_pt, pt);
|
||||
if e_dist < f_dist || w.len() < ef {
|
||||
candidates.push(e_dist, e_id);
|
||||
w.push(e_dist, e_id);
|
||||
if w.len() > ef {
|
||||
w.pop_last();
|
||||
}
|
||||
f_dist = w.peek_last_dist().unwrap(); // w can't be empty
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
w
|
||||
}
|
||||
|
||||
pub(super) async fn search_checked(
|
||||
&self,
|
||||
search: &HnswCheckedSearchContext<'_>,
|
||||
mut candidates: DoublePriorityQueue,
|
||||
mut visited: HashSet<ElementId>,
|
||||
mut w: DoublePriorityQueue,
|
||||
stk: &mut Stk,
|
||||
chk: &mut HnswConditionChecker<'_>,
|
||||
) -> Result<DoublePriorityQueue, Error> {
|
||||
let mut f_dist = w.peek_last_dist().unwrap_or(f64::MAX);
|
||||
|
||||
let ef = search.ef();
|
||||
let pt = search.pt();
|
||||
let elements = search.elements();
|
||||
|
||||
while let Some((dist, doc)) = candidates.pop_first() {
|
||||
if dist > f_dist {
|
||||
break;
|
||||
}
|
||||
if let Some(neighbourhood) = self.graph.get_edges(&doc) {
|
||||
for &e_id in neighbourhood.iter() {
|
||||
// Did we already visit it?
|
||||
if !visited.insert(e_id) {
|
||||
continue;
|
||||
}
|
||||
if let Some(e_pt) = elements.get_vector(&e_id) {
|
||||
let e_dist = elements.distance(e_pt, pt);
|
||||
if e_dist < f_dist || w.len() < ef {
|
||||
candidates.push(e_dist, e_id);
|
||||
if Self::add_if_truthy(search, &mut w, e_pt, e_dist, e_id, stk, chk)
|
||||
.await?
|
||||
{
|
||||
f_dist = w.peek_last_dist().unwrap(); // w can't be empty
|
||||
}
|
||||
}
|
||||
|
@ -125,7 +189,30 @@ where
|
|||
}
|
||||
}
|
||||
}
|
||||
w
|
||||
Ok(w)
|
||||
}
|
||||
|
||||
pub(super) async fn add_if_truthy(
|
||||
search: &HnswCheckedSearchContext<'_>,
|
||||
w: &mut DoublePriorityQueue,
|
||||
e_pt: &SharedVector,
|
||||
e_dist: f64,
|
||||
e_id: ElementId,
|
||||
stk: &mut Stk,
|
||||
chk: &mut HnswConditionChecker<'_>,
|
||||
) -> Result<bool, Error> {
|
||||
if let Some(docs) = search.get_docs(e_pt) {
|
||||
if chk.check_truthy(stk, search.docs(), docs).await? {
|
||||
w.push(e_dist, e_id);
|
||||
if w.len() > search.ef() {
|
||||
if let Some((_, id)) = w.pop_last() {
|
||||
chk.expire(id);
|
||||
}
|
||||
}
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
pub(super) fn insert(
|
||||
|
@ -223,12 +310,7 @@ where
|
|||
S: DynamicSet<ElementId>,
|
||||
{
|
||||
pub(in crate::idx::trees::hnsw) fn check_props(&self, elements: &HnswElements) {
|
||||
assert!(
|
||||
self.graph.len() <= elements.elements.len(),
|
||||
"{} - {}",
|
||||
self.graph.len(),
|
||||
elements.elements.len()
|
||||
);
|
||||
assert!(self.graph.len() <= elements.len(), "{} - {}", self.graph.len(), elements.len());
|
||||
for (e_id, f_ids) in self.graph.nodes() {
|
||||
assert!(
|
||||
f_ids.len() <= self.m_max,
|
||||
|
@ -238,7 +320,7 @@ where
|
|||
);
|
||||
assert!(!f_ids.contains(e_id), "!f_ids.contains(e_id) - el: {e_id} - f_ids: {f_ids:?}");
|
||||
assert!(
|
||||
elements.elements.contains_key(e_id),
|
||||
elements.contains(e_id),
|
||||
"h.elements.contains_key(e_id) - el: {e_id} - f_ids: {f_ids:?}"
|
||||
);
|
||||
}
|
||||
|
|
|
@ -1,279 +1,40 @@
|
|||
pub(in crate::idx) mod docs;
|
||||
mod elements;
|
||||
mod flavor;
|
||||
mod heuristic;
|
||||
pub mod index;
|
||||
mod layer;
|
||||
|
||||
use crate::err::Error;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::trees::dynamicset::{ArraySet, DynamicSet, HashBrownSet};
|
||||
use crate::idx::planner::checker::HnswConditionChecker;
|
||||
use crate::idx::trees::dynamicset::DynamicSet;
|
||||
use crate::idx::trees::hnsw::docs::HnswDocs;
|
||||
use crate::idx::trees::hnsw::elements::HnswElements;
|
||||
use crate::idx::trees::hnsw::heuristic::Heuristic;
|
||||
use crate::idx::trees::hnsw::index::{HnswCheckedSearchContext, VecDocs};
|
||||
use crate::idx::trees::hnsw::layer::HnswLayer;
|
||||
use crate::idx::trees::knn::{DoublePriorityQueue, Ids64, KnnResult, KnnResultBuilder};
|
||||
use crate::idx::trees::vector::{SharedVector, Vector};
|
||||
use crate::kvs::Key;
|
||||
use crate::sql::index::{Distance, HnswParams, VectorType};
|
||||
use crate::sql::{Array, Thing, Value};
|
||||
use hashbrown::hash_map::Entry;
|
||||
use hashbrown::HashMap;
|
||||
use radix_trie::Trie;
|
||||
use crate::idx::trees::knn::DoublePriorityQueue;
|
||||
use crate::idx::trees::vector::SharedVector;
|
||||
use crate::sql::index::HnswParams;
|
||||
use rand::prelude::SmallRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringTreemap;
|
||||
use std::collections::VecDeque;
|
||||
use reblessive::tree::Stk;
|
||||
|
||||
pub struct HnswIndex {
|
||||
dim: usize,
|
||||
vector_type: VectorType,
|
||||
hnsw: Box<dyn HnswMethods>,
|
||||
docs: HnswDocs,
|
||||
vec_docs: HashMap<SharedVector, (Ids64, ElementId)>,
|
||||
struct HnswSearch {
|
||||
pt: SharedVector,
|
||||
k: usize,
|
||||
ef: usize,
|
||||
}
|
||||
|
||||
type ASet<const N: usize> = ArraySet<ElementId, N>;
|
||||
type HSet = HashBrownSet<ElementId>;
|
||||
|
||||
impl HnswIndex {
|
||||
pub fn new(p: &HnswParams) -> Self {
|
||||
impl HnswSearch {
|
||||
pub(super) fn new(pt: SharedVector, k: usize, ef: usize) -> Self {
|
||||
Self {
|
||||
dim: p.dimension as usize,
|
||||
vector_type: p.vector_type,
|
||||
hnsw: Self::new_hnsw(p),
|
||||
docs: HnswDocs::default(),
|
||||
vec_docs: HashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn new_hnsw(p: &HnswParams) -> Box<dyn HnswMethods> {
|
||||
match p.m {
|
||||
1..=4 => match p.m0 {
|
||||
1..=8 => Box::new(Hnsw::<ASet<9>, ASet<5>>::new(p)),
|
||||
9..=16 => Box::new(Hnsw::<ASet<17>, ASet<5>>::new(p)),
|
||||
17..=24 => Box::new(Hnsw::<ASet<25>, ASet<5>>::new(p)),
|
||||
_ => Box::new(Hnsw::<HSet, ASet<5>>::new(p)),
|
||||
},
|
||||
5..=8 => match p.m0 {
|
||||
1..=16 => Box::new(Hnsw::<ASet<17>, ASet<9>>::new(p)),
|
||||
17..=24 => Box::new(Hnsw::<ASet<25>, ASet<9>>::new(p)),
|
||||
_ => Box::new(Hnsw::<HSet, ASet<9>>::new(p)),
|
||||
},
|
||||
9..=12 => match p.m0 {
|
||||
17..=24 => Box::new(Hnsw::<ASet<25>, ASet<13>>::new(p)),
|
||||
_ => Box::new(Hnsw::<HSet, ASet<13>>::new(p)),
|
||||
},
|
||||
13..=16 => Box::new(Hnsw::<HSet, ASet<17>>::new(p)),
|
||||
17..=20 => Box::new(Hnsw::<HSet, ASet<21>>::new(p)),
|
||||
21..=24 => Box::new(Hnsw::<HSet, ASet<25>>::new(p)),
|
||||
25..=28 => Box::new(Hnsw::<HSet, ASet<29>>::new(p)),
|
||||
_ => Box::new(Hnsw::<HSet, HSet>::new(p)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_document(&mut self, rid: &Thing, content: &Vec<Value>) -> Result<(), Error> {
|
||||
// Resolve the doc_id
|
||||
let doc_id = self.docs.resolve(rid);
|
||||
// Index the values
|
||||
for value in content {
|
||||
// Extract the vector
|
||||
let vector = Vector::try_from_value(self.vector_type, self.dim, value)?;
|
||||
vector.check_dimension(self.dim)?;
|
||||
self.insert(vector.into(), doc_id);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert(&mut self, o: SharedVector, d: DocId) {
|
||||
match self.vec_docs.entry(o) {
|
||||
Entry::Occupied(mut e) => {
|
||||
let (docs, element_id) = e.get_mut();
|
||||
if let Some(new_docs) = docs.insert(d) {
|
||||
let element_id = *element_id;
|
||||
e.insert((new_docs, element_id));
|
||||
}
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
let o = e.key().clone();
|
||||
let element_id = self.hnsw.insert(o);
|
||||
e.insert((Ids64::One(d), element_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn remove(&mut self, o: SharedVector, d: DocId) {
|
||||
if let Entry::Occupied(mut e) = self.vec_docs.entry(o) {
|
||||
let (docs, e_id) = e.get_mut();
|
||||
if let Some(new_docs) = docs.remove(d) {
|
||||
let e_id = *e_id;
|
||||
if new_docs.is_empty() {
|
||||
e.remove();
|
||||
self.hnsw.remove(e_id);
|
||||
} else {
|
||||
e.insert((new_docs, e_id));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn remove_document(
|
||||
&mut self,
|
||||
rid: &Thing,
|
||||
content: &Vec<Value>,
|
||||
) -> Result<(), Error> {
|
||||
if let Some(doc_id) = self.docs.remove(rid) {
|
||||
for v in content {
|
||||
// Extract the vector
|
||||
let vector = Vector::try_from_value(self.vector_type, self.dim, v)?;
|
||||
vector.check_dimension(self.dim)?;
|
||||
// Remove the vector
|
||||
self.remove(vector.into(), doc_id);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn knn_search(
|
||||
&self,
|
||||
a: &Array,
|
||||
n: usize,
|
||||
ef: usize,
|
||||
) -> Result<VecDeque<(Thing, f64)>, Error> {
|
||||
// Extract the vector
|
||||
let vector = Vector::try_from_array(self.vector_type, a)?;
|
||||
vector.check_dimension(self.dim)?;
|
||||
// Do the search
|
||||
let res = self.search(&vector.into(), n, ef);
|
||||
Ok(self.result(res))
|
||||
}
|
||||
|
||||
fn result(&self, res: KnnResult) -> VecDeque<(Thing, f64)> {
|
||||
res.docs
|
||||
.into_iter()
|
||||
.filter_map(|(doc_id, dist)| self.docs.get(doc_id).map(|t| (t.clone(), dist)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn search(&self, o: &SharedVector, n: usize, ef: usize) -> KnnResult {
|
||||
let neighbors = self.hnsw.knn_search(o, n, ef);
|
||||
|
||||
let mut builder = KnnResultBuilder::new(n);
|
||||
for (e_dist, e_id) in neighbors {
|
||||
if builder.check_add(e_dist) {
|
||||
if let Some(v) = self.hnsw.get_vector(&e_id) {
|
||||
if let Some((docs, _)) = self.vec_docs.get(v) {
|
||||
builder.add(e_dist, docs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
builder.build(
|
||||
#[cfg(debug_assertions)]
|
||||
HashMap::new(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct HnswDocs {
|
||||
doc_ids: Trie<Key, DocId>,
|
||||
ids_doc: Vec<Option<Thing>>,
|
||||
available: RoaringTreemap,
|
||||
}
|
||||
|
||||
impl HnswDocs {
|
||||
fn resolve(&mut self, rid: &Thing) -> DocId {
|
||||
let doc_key: Key = rid.into();
|
||||
if let Some(doc_id) = self.doc_ids.get(&doc_key) {
|
||||
*doc_id
|
||||
} else {
|
||||
let doc_id = self.next_doc_id();
|
||||
self.ids_doc.push(Some(rid.clone()));
|
||||
self.doc_ids.insert(doc_key, doc_id);
|
||||
doc_id
|
||||
}
|
||||
}
|
||||
|
||||
fn next_doc_id(&mut self) -> DocId {
|
||||
if let Some(doc_id) = self.available.iter().next() {
|
||||
self.available.remove(doc_id);
|
||||
doc_id
|
||||
} else {
|
||||
self.ids_doc.len() as DocId
|
||||
}
|
||||
}
|
||||
|
||||
fn get(&self, doc_id: DocId) -> Option<Thing> {
|
||||
if let Some(t) = self.ids_doc.get(doc_id as usize) {
|
||||
t.clone()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn remove(&mut self, rid: &Thing) -> Option<DocId> {
|
||||
let doc_key: Key = rid.into();
|
||||
if let Some(doc_id) = self.doc_ids.remove(&doc_key) {
|
||||
let n = doc_id as usize;
|
||||
if n < self.ids_doc.len() {
|
||||
self.ids_doc[n] = None;
|
||||
}
|
||||
self.available.insert(doc_id);
|
||||
Some(doc_id)
|
||||
} else {
|
||||
None
|
||||
pt,
|
||||
k,
|
||||
ef,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trait HnswMethods: Send + Sync {
|
||||
fn insert(&mut self, q_pt: SharedVector) -> ElementId;
|
||||
fn remove(&mut self, e_id: ElementId) -> bool;
|
||||
fn knn_search(&self, q: &SharedVector, k: usize, efs: usize) -> Vec<(f64, ElementId)>;
|
||||
fn get_vector(&self, e_id: &ElementId) -> Option<&SharedVector>;
|
||||
#[cfg(test)]
|
||||
fn check_hnsw_properties(&self, expected_count: usize);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn check_hnsw_props<L0, L>(h: &Hnsw<L0, L>, expected_count: usize)
|
||||
where
|
||||
L0: DynamicSet<ElementId>,
|
||||
L: DynamicSet<ElementId>,
|
||||
{
|
||||
assert_eq!(h.elements.elements.len(), expected_count);
|
||||
for layer in h.layers.iter() {
|
||||
layer.check_props(&h.elements);
|
||||
}
|
||||
}
|
||||
|
||||
struct HnswElements {
|
||||
elements: HashMap<ElementId, SharedVector>,
|
||||
next_element_id: ElementId,
|
||||
dist: Distance,
|
||||
}
|
||||
|
||||
impl HnswElements {
|
||||
fn new(dist: Distance) -> Self {
|
||||
Self {
|
||||
elements: Default::default(),
|
||||
next_element_id: 0,
|
||||
dist,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_vector(&self, e_id: &ElementId) -> Option<&SharedVector> {
|
||||
self.elements.get(e_id)
|
||||
}
|
||||
|
||||
fn distance(&self, a: &SharedVector, b: &SharedVector) -> f64 {
|
||||
self.dist.calculate(a, b)
|
||||
}
|
||||
fn get_distance(&self, q: &SharedVector, e_id: &ElementId) -> Option<f64> {
|
||||
self.elements.get(e_id).map(|e_pt| self.dist.calculate(e_pt, q))
|
||||
}
|
||||
|
||||
fn remove(&mut self, e_id: &ElementId) {
|
||||
self.elements.remove(e_id);
|
||||
}
|
||||
}
|
||||
|
||||
struct Hnsw<L0, L>
|
||||
where
|
||||
L0: DynamicSet<ElementId>,
|
||||
|
@ -314,7 +75,7 @@ where
|
|||
|
||||
fn insert_level(&mut self, q_pt: SharedVector, q_level: usize) -> ElementId {
|
||||
// Attribute an ID to the vector
|
||||
let q_id = self.elements.next_element_id;
|
||||
let q_id = self.elements.next_element_id();
|
||||
let top_up_layers = self.layers.len();
|
||||
|
||||
// Be sure we have existing (up) layers if required
|
||||
|
@ -323,7 +84,7 @@ where
|
|||
}
|
||||
|
||||
// Store the vector
|
||||
self.elements.elements.insert(q_id, q_pt.clone());
|
||||
self.elements.insert(q_id, q_pt.clone());
|
||||
|
||||
if let Some(ep_id) = self.enter_point {
|
||||
// We already have an enter_point, let's insert the element in the layers
|
||||
|
@ -333,7 +94,7 @@ where
|
|||
self.insert_first_element(q_id, q_level);
|
||||
}
|
||||
|
||||
self.elements.next_element_id += 1;
|
||||
self.elements.inc_next_element_id();
|
||||
q_id
|
||||
}
|
||||
|
||||
|
@ -395,13 +156,7 @@ where
|
|||
self.enter_point = Some(q_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<L0, L> HnswMethods for Hnsw<L0, L>
|
||||
where
|
||||
L0: DynamicSet<ElementId>,
|
||||
L: DynamicSet<ElementId>,
|
||||
{
|
||||
fn insert(&mut self, q_pt: SharedVector) -> ElementId {
|
||||
let q_level = self.get_random_level();
|
||||
self.insert_level(q_pt, q_level)
|
||||
|
@ -448,31 +203,56 @@ where
|
|||
removed
|
||||
}
|
||||
|
||||
fn knn_search(&self, q: &SharedVector, k: usize, efs: usize) -> Vec<(f64, ElementId)> {
|
||||
#[cfg(debug_assertions)]
|
||||
let expected_w_len = self.elements.elements.len().min(k);
|
||||
fn knn_search(&self, search: &HnswSearch) -> Vec<(f64, ElementId)> {
|
||||
if let Some((ep_dist, ep_id)) = self.search_ep(&search.pt) {
|
||||
let w =
|
||||
self.layer0.search_single(&self.elements, &search.pt, ep_dist, ep_id, search.ef);
|
||||
w.to_vec_limit(search.k)
|
||||
} else {
|
||||
vec![]
|
||||
}
|
||||
}
|
||||
|
||||
async fn knn_search_checked(
|
||||
&self,
|
||||
search: &HnswSearch,
|
||||
hnsw_docs: &HnswDocs,
|
||||
vec_docs: &VecDocs,
|
||||
stk: &mut Stk,
|
||||
chk: &mut HnswConditionChecker<'_>,
|
||||
) -> Result<Vec<(f64, ElementId)>, Error> {
|
||||
if let Some((ep_dist, ep_id)) = self.search_ep(&search.pt) {
|
||||
if let Some(ep_pt) = self.elements.get_vector(&ep_id) {
|
||||
let search_ctx = HnswCheckedSearchContext::new(
|
||||
&self.elements,
|
||||
hnsw_docs,
|
||||
vec_docs,
|
||||
&search.pt,
|
||||
search.ef,
|
||||
);
|
||||
let w = self
|
||||
.layer0
|
||||
.search_single_checked(&search_ctx, ep_pt, ep_dist, ep_id, stk, chk)
|
||||
.await?;
|
||||
return Ok(w.to_vec_limit(search.k));
|
||||
}
|
||||
}
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
fn search_ep(&self, pt: &SharedVector) -> Option<(f64, ElementId)> {
|
||||
if let Some(mut ep_id) = self.enter_point {
|
||||
let mut ep_dist =
|
||||
self.elements.get_distance(q, &ep_id).unwrap_or_else(|| unreachable!());
|
||||
self.elements.get_distance(pt, &ep_id).unwrap_or_else(|| unreachable!());
|
||||
for layer in self.layers.iter().rev() {
|
||||
(ep_dist, ep_id) = layer
|
||||
.search_single(&self.elements, q, ep_dist, ep_id, 1)
|
||||
.search_single(&self.elements, pt, ep_dist, ep_id, 1)
|
||||
.peek_first()
|
||||
.unwrap_or_else(|| unreachable!());
|
||||
}
|
||||
{
|
||||
let w = self.layer0.search_single(&self.elements, q, ep_dist, ep_id, efs);
|
||||
#[cfg(debug_assertions)]
|
||||
if w.len() < expected_w_len {
|
||||
debug!(
|
||||
"0 search_layer - ep_id: {ep_id:?} - ef_search: {efs} - k: {k} - w.len: {} < {expected_w_len}",
|
||||
w.len()
|
||||
);
|
||||
}
|
||||
w.to_vec_limit(k)
|
||||
}
|
||||
Some((ep_dist, ep_id))
|
||||
} else {
|
||||
vec![]
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -485,23 +265,39 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn check_hnsw_props<L0, L>(h: &Hnsw<L0, L>, expected_count: usize)
|
||||
where
|
||||
L0: DynamicSet<ElementId>,
|
||||
L: DynamicSet<ElementId>,
|
||||
{
|
||||
assert_eq!(h.elements.len(), expected_count);
|
||||
for layer in h.layers.iter() {
|
||||
layer.check_props(&h.elements);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::err::Error;
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::trees::hnsw::{HnswIndex, HnswMethods};
|
||||
use crate::idx::planner::checker::HnswConditionChecker;
|
||||
use crate::idx::trees::hnsw::flavor::HnswFlavor;
|
||||
use crate::idx::trees::hnsw::index::HnswIndex;
|
||||
use crate::idx::trees::hnsw::HnswSearch;
|
||||
use crate::idx::trees::knn::tests::{new_vectors_from_file, TestCollection};
|
||||
use crate::idx::trees::knn::{Ids64, KnnResult, KnnResultBuilder};
|
||||
use crate::idx::trees::vector::{SharedVector, Vector};
|
||||
use crate::sql::index::{Distance, HnswParams, VectorType};
|
||||
use hashbrown::{hash_map::Entry, HashMap, HashSet};
|
||||
use ndarray::Array1;
|
||||
use reblessive::tree::Stk;
|
||||
use roaring::RoaringTreemap;
|
||||
use std::sync::Arc;
|
||||
use test_log::test;
|
||||
|
||||
fn insert_collection_hnsw(
|
||||
h: &mut Box<dyn HnswMethods>,
|
||||
h: &mut HnswFlavor,
|
||||
collection: &TestCollection,
|
||||
) -> HashSet<SharedVector> {
|
||||
let mut set = HashSet::new();
|
||||
|
@ -513,12 +309,12 @@ mod tests {
|
|||
}
|
||||
set
|
||||
}
|
||||
fn find_collection_hnsw(h: &Box<dyn HnswMethods>, collection: &TestCollection) {
|
||||
fn find_collection_hnsw(h: &HnswFlavor, collection: &TestCollection) {
|
||||
let max_knn = 20.min(collection.len());
|
||||
for (_, obj) in collection.to_vec_ref() {
|
||||
let obj = obj.clone().into();
|
||||
for knn in 1..max_knn {
|
||||
let res = h.knn_search(&obj, knn, 80);
|
||||
let search = HnswSearch::new(obj.clone(), knn, 80);
|
||||
let res = h.knn_search(&search);
|
||||
if collection.is_unique() {
|
||||
let mut found = false;
|
||||
for (_, e_id) in &res {
|
||||
|
@ -556,7 +352,7 @@ mod tests {
|
|||
}
|
||||
|
||||
fn test_hnsw_collection(p: &HnswParams, collection: &TestCollection) {
|
||||
let mut h = HnswIndex::new_hnsw(p);
|
||||
let mut h = HnswFlavor::new(p);
|
||||
insert_collection_hnsw(&mut h, collection);
|
||||
find_collection_hnsw(&h, &collection);
|
||||
}
|
||||
|
@ -648,17 +444,22 @@ mod tests {
|
|||
e.insert(HashSet::from([*doc_id]));
|
||||
}
|
||||
}
|
||||
h.hnsw.check_hnsw_properties(map.len());
|
||||
h.check_hnsw_properties(map.len());
|
||||
}
|
||||
map
|
||||
}
|
||||
|
||||
fn find_collection_hnsw_index(h: &mut HnswIndex, collection: &TestCollection) {
|
||||
async fn find_collection_hnsw_index(
|
||||
stk: &mut Stk,
|
||||
h: &mut HnswIndex,
|
||||
collection: &TestCollection,
|
||||
) {
|
||||
let max_knn = 20.min(collection.len());
|
||||
for (doc_id, obj) in collection.to_vec_ref() {
|
||||
for knn in 1..max_knn {
|
||||
let obj: SharedVector = obj.clone().into();
|
||||
let res = h.search(&obj, knn, 500);
|
||||
let mut chk = HnswConditionChecker::default();
|
||||
let search = HnswSearch::new(obj.clone(), knn, 500);
|
||||
let res = h.search(&search, stk, &mut chk).await.unwrap();
|
||||
if knn == 1 && res.docs.len() == 1 && res.docs[0].1 > 0.0 {
|
||||
let docs: Vec<DocId> = res.docs.iter().map(|(d, _)| *d).collect();
|
||||
if collection.is_unique() {
|
||||
|
@ -701,11 +502,11 @@ mod tests {
|
|||
e.remove();
|
||||
}
|
||||
}
|
||||
h.hnsw.check_hnsw_properties(map.len());
|
||||
h.check_hnsw_properties(map.len());
|
||||
}
|
||||
}
|
||||
|
||||
fn test_hnsw_index(collection_size: usize, unique: bool, p: HnswParams) {
|
||||
async fn test_hnsw_index(collection_size: usize, unique: bool, p: HnswParams) {
|
||||
info!("test_hnsw_index - coll size: {collection_size} - params: {p:?}");
|
||||
let collection = TestCollection::new(
|
||||
unique,
|
||||
|
@ -716,7 +517,13 @@ mod tests {
|
|||
);
|
||||
let mut h = HnswIndex::new(&p);
|
||||
let map = insert_collection_hnsw_index(&mut h, &collection);
|
||||
find_collection_hnsw_index(&mut h, &collection);
|
||||
let mut stack = reblessive::tree::TreeStack::new();
|
||||
stack
|
||||
.enter(|stk| async {
|
||||
find_collection_hnsw_index(stk, &mut h, &collection).await;
|
||||
})
|
||||
.finish()
|
||||
.await;
|
||||
delete_hnsw_index_collection(&mut h, &collection, map);
|
||||
}
|
||||
|
||||
|
@ -744,7 +551,7 @@ mod tests {
|
|||
for unique in [false, true] {
|
||||
let p = new_params(dim, vt, dist.clone(), 8, 150, extend, keep);
|
||||
let f = tokio::spawn(async move {
|
||||
test_hnsw_index(30, unique, p);
|
||||
test_hnsw_index(30, unique, p).await;
|
||||
});
|
||||
futures.push(f);
|
||||
}
|
||||
|
@ -773,13 +580,11 @@ mod tests {
|
|||
(10, new_i16_vec(0, 3)),
|
||||
]);
|
||||
let p = new_params(2, VectorType::I16, Distance::Euclidean, 3, 500, true, true);
|
||||
let mut h = HnswIndex::new_hnsw(&p);
|
||||
let mut h = HnswFlavor::new(&p);
|
||||
insert_collection_hnsw(&mut h, &collection);
|
||||
let pt = new_i16_vec(-2, -3);
|
||||
let knn = 10;
|
||||
let efs = 501;
|
||||
let res = h.knn_search(&pt, knn, efs);
|
||||
assert_eq!(res.len(), knn);
|
||||
let search = HnswSearch::new(new_i16_vec(-2, -3), 10, 501);
|
||||
let res = h.knn_search(&search);
|
||||
assert_eq!(res.len(), 10);
|
||||
}
|
||||
|
||||
async fn test_recall(
|
||||
|
@ -820,24 +625,32 @@ mod tests {
|
|||
let collection = collection.clone();
|
||||
let h = h.clone();
|
||||
let f = tokio::spawn(async move {
|
||||
let mut total_recall = 0.0;
|
||||
for (_, pt) in queries.to_vec_ref() {
|
||||
let knn = 10;
|
||||
let hnsw_res = h.search(pt, knn, efs);
|
||||
assert_eq!(hnsw_res.docs.len(), knn, "Different size - knn: {knn}",);
|
||||
let brute_force_res = collection.knn(pt, Distance::Euclidean, knn);
|
||||
let rec = brute_force_res.recall(&hnsw_res);
|
||||
if rec == 1.0 {
|
||||
assert_eq!(brute_force_res.docs, hnsw_res.docs);
|
||||
}
|
||||
total_recall += rec;
|
||||
}
|
||||
let recall = total_recall / queries.to_vec_ref().len() as f64;
|
||||
info!("EFS: {efs} - Recall: {recall}");
|
||||
assert!(
|
||||
recall >= expected_recall,
|
||||
"EFS: {efs} - Recall: {recall} - Expected: {expected_recall}"
|
||||
);
|
||||
let mut stack = reblessive::tree::TreeStack::new();
|
||||
stack
|
||||
.enter(|stk| async {
|
||||
let mut total_recall = 0.0;
|
||||
for (_, pt) in queries.to_vec_ref() {
|
||||
let knn = 10;
|
||||
let mut chk = HnswConditionChecker::default();
|
||||
let search = HnswSearch::new(pt.clone(), knn, efs);
|
||||
let hnsw_res = h.search(&search, stk, &mut chk).await.unwrap();
|
||||
assert_eq!(hnsw_res.docs.len(), knn, "Different size - knn: {knn}",);
|
||||
let brute_force_res = collection.knn(pt, Distance::Euclidean, knn);
|
||||
let rec = brute_force_res.recall(&hnsw_res);
|
||||
if rec == 1.0 {
|
||||
assert_eq!(brute_force_res.docs, hnsw_res.docs);
|
||||
}
|
||||
total_recall += rec;
|
||||
}
|
||||
let recall = total_recall / queries.to_vec_ref().len() as f64;
|
||||
info!("EFS: {efs} - Recall: {recall}");
|
||||
assert!(
|
||||
recall >= expected_recall,
|
||||
"EFS: {efs} - Recall: {recall} - Expected: {expected_recall}"
|
||||
);
|
||||
})
|
||||
.finish()
|
||||
.await;
|
||||
});
|
||||
futures.push(f);
|
||||
}
|
||||
|
|
|
@ -175,7 +175,7 @@ impl Ord for FloatKey {
|
|||
/// When identifiers are added or removed, the method returned the most appropriate
|
||||
/// variant (if required).
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub(super) enum Ids64 {
|
||||
pub(in crate::idx) enum Ids64 {
|
||||
#[allow(dead_code)] // Will be used with HNSW
|
||||
Empty,
|
||||
One(u64),
|
||||
|
@ -354,7 +354,7 @@ impl Ids64 {
|
|||
}
|
||||
}
|
||||
|
||||
fn iter(&self) -> Box<dyn Iterator<Item = DocId> + '_> {
|
||||
pub(in crate::idx) fn iter(&self) -> Box<dyn Iterator<Item = DocId> + '_> {
|
||||
match &self {
|
||||
Self::Empty => Box::new(EmptyIterator {}),
|
||||
Self::One(d) => Box::new(OneDocIterator(Some(*d))),
|
||||
|
@ -531,16 +531,17 @@ impl KnnResultBuilder {
|
|||
}
|
||||
}
|
||||
pub(super) fn check_add(&self, dist: f64) -> bool {
|
||||
if self.docs.len() < self.knn {
|
||||
true
|
||||
} else if let Some(pr) = self.priority_list.keys().last() {
|
||||
dist <= pr.0
|
||||
} else {
|
||||
true
|
||||
if self.docs.len() >= self.knn {
|
||||
if let Some(pr) = self.priority_list.keys().last() {
|
||||
if dist > pr.0 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
pub(super) fn add(&mut self, dist: f64, docs: &Ids64) {
|
||||
pub(super) fn add(&mut self, dist: f64, docs: &Ids64) -> Ids64 {
|
||||
let pr = FloatKey(dist);
|
||||
docs.append_to(&mut self.docs);
|
||||
match self.priority_list.entry(pr) {
|
||||
|
@ -562,10 +563,12 @@ impl KnnResultBuilder {
|
|||
if docs_len - d.len() >= self.knn {
|
||||
if let Some((_, evicted_docs)) = self.priority_list.pop_last() {
|
||||
evicted_docs.remove_to(&mut self.docs);
|
||||
return evicted_docs;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ids64::Empty
|
||||
}
|
||||
|
||||
pub(super) fn build(
|
||||
|
@ -614,7 +617,7 @@ pub(super) mod tests {
|
|||
use crate::idx::trees::knn::{DoublePriorityQueue, FloatKey, Ids64, KnnResultBuilder};
|
||||
use crate::idx::trees::vector::{SharedVector, Vector};
|
||||
use crate::sql::index::{Distance, VectorType};
|
||||
use crate::sql::{Array, Number};
|
||||
use crate::sql::{Array, Number, Value};
|
||||
use crate::syn::Parse;
|
||||
use flate2::read::GzDecoder;
|
||||
#[cfg(debug_assertions)]
|
||||
|
@ -683,7 +686,7 @@ pub(super) mod tests {
|
|||
}
|
||||
let line = line_result?;
|
||||
let array = Array::parse(&line);
|
||||
let vec = Vector::try_from_array(t, &array)?.into();
|
||||
let vec = Vector::try_from_value(t, array.len(), &Value::Array(array))?.into();
|
||||
res.push((i as DocId, vec));
|
||||
}
|
||||
Ok(res)
|
||||
|
@ -699,7 +702,7 @@ pub(super) mod tests {
|
|||
for _ in 0..dim {
|
||||
vec.push(gen.generate(rng));
|
||||
}
|
||||
let vec = Vector::try_from_array(t, &Array::from(vec)).unwrap();
|
||||
let vec = Vector::try_from_vector(t, &vec).unwrap();
|
||||
if vec.is_null() {
|
||||
// Some similarities (cosine) is undefined for null vector.
|
||||
new_random_vec(rng, t, dim, gen)
|
||||
|
|
|
@ -3,7 +3,7 @@ pub mod btree;
|
|||
pub mod dynamicset;
|
||||
mod graph;
|
||||
pub mod hnsw;
|
||||
mod knn;
|
||||
pub(in crate::idx) mod knn;
|
||||
pub mod mtree;
|
||||
pub mod store;
|
||||
pub mod vector;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use crate::dbs;
|
||||
use hashbrown::hash_map::Entry;
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use reblessive::tree::Stk;
|
||||
|
@ -13,6 +14,8 @@ use tokio::sync::RwLock;
|
|||
use crate::err::Error;
|
||||
|
||||
use crate::idx::docids::{DocId, DocIds};
|
||||
use crate::idx::planner::checker::MTreeConditionChecker;
|
||||
use crate::idx::planner::iterators::KnnIteratorResult;
|
||||
use crate::idx::trees::btree::BStatistics;
|
||||
use crate::idx::trees::knn::{Ids64, KnnResult, KnnResultBuilder, PriorityNode};
|
||||
use crate::idx::trees::store::{
|
||||
|
@ -22,9 +25,10 @@ use crate::idx::trees::vector::{SharedVector, Vector};
|
|||
use crate::idx::{IndexKeyBase, VersionedSerdeState};
|
||||
use crate::kvs::{Key, Transaction, TransactionType, Val};
|
||||
use crate::sql::index::{Distance, MTreeParams, VectorType};
|
||||
use crate::sql::{Array, Object, Thing, Value};
|
||||
use crate::sql::{Number, Object, Thing, Value};
|
||||
|
||||
pub(crate) struct MTreeIndex {
|
||||
#[non_exhaustive]
|
||||
pub struct MTreeIndex {
|
||||
ixs: IndexStores,
|
||||
state_key: Key,
|
||||
dim: usize,
|
||||
|
@ -34,8 +38,15 @@ pub(crate) struct MTreeIndex {
|
|||
mtree: Arc<RwLock<MTree>>,
|
||||
}
|
||||
|
||||
struct MTreeSearchContext<'a> {
|
||||
txn: &'a dbs::Transaction,
|
||||
pt: SharedVector,
|
||||
k: usize,
|
||||
store: &'a MTreeStore,
|
||||
}
|
||||
|
||||
impl MTreeIndex {
|
||||
pub(crate) async fn new(
|
||||
pub async fn new(
|
||||
ixs: &IndexStores,
|
||||
tx: &mut Transaction,
|
||||
ikb: IndexKeyBase,
|
||||
|
@ -70,7 +81,7 @@ impl MTreeIndex {
|
|||
store,
|
||||
})
|
||||
}
|
||||
pub(crate) async fn index_document(
|
||||
pub async fn index_document(
|
||||
&mut self,
|
||||
stk: &mut Stk,
|
||||
tx: &mut Transaction,
|
||||
|
@ -78,8 +89,10 @@ impl MTreeIndex {
|
|||
content: &Vec<Value>,
|
||||
) -> Result<(), Error> {
|
||||
// Resolve the doc_id
|
||||
let resolved = self.doc_ids.write().await.resolve_doc_id(tx, rid.into()).await?;
|
||||
let mut doc_ids = self.doc_ids.write().await;
|
||||
let resolved = doc_ids.resolve_doc_id(tx, rid.into()).await?;
|
||||
let doc_id = *resolved.doc_id();
|
||||
drop(doc_ids);
|
||||
// Index the values
|
||||
let mut mtree = self.mtree.write().await;
|
||||
for v in content {
|
||||
|
@ -89,34 +102,51 @@ impl MTreeIndex {
|
|||
// Insert the vector in the index
|
||||
mtree.insert(stk, tx, &mut self.store, vector.into(), doc_id).await?;
|
||||
}
|
||||
drop(mtree);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) async fn knn_search(
|
||||
pub async fn knn_search(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
a: &Array,
|
||||
stk: &mut Stk,
|
||||
txn: &dbs::Transaction,
|
||||
v: &[Number],
|
||||
k: usize,
|
||||
) -> Result<VecDeque<(DocId, f64)>, Error> {
|
||||
mut chk: MTreeConditionChecker<'_>,
|
||||
) -> Result<VecDeque<KnnIteratorResult>, Error> {
|
||||
// Extract the vector
|
||||
let vector = Vector::try_from_array(self.vector_type, a)?;
|
||||
let vector = Vector::try_from_vector(self.vector_type, v)?;
|
||||
vector.check_dimension(self.dim)?;
|
||||
let vector: SharedVector = vector.into();
|
||||
// Lock the index
|
||||
// Build the search context
|
||||
let search = MTreeSearchContext {
|
||||
txn,
|
||||
pt: vector.into(),
|
||||
k,
|
||||
store: &self.store,
|
||||
};
|
||||
// Lock the tree and the docs
|
||||
let mtree = self.mtree.read().await;
|
||||
let doc_ids = self.doc_ids.read().await;
|
||||
// Do the search
|
||||
let res = mtree.knn_search(tx, &self.store, &vector, k).await?;
|
||||
Ok(res.docs)
|
||||
let res = mtree.knn_search(&search, &doc_ids, stk, &mut chk).await?;
|
||||
drop(mtree);
|
||||
// Resolve the doc_id to Thing and the optional value
|
||||
let res = chk.convert_result(&doc_ids, res.docs).await;
|
||||
drop(doc_ids);
|
||||
res
|
||||
}
|
||||
|
||||
pub(crate) async fn remove_document(
|
||||
pub async fn remove_document(
|
||||
&mut self,
|
||||
stk: &mut Stk,
|
||||
tx: &mut Transaction,
|
||||
rid: &Thing,
|
||||
content: &Vec<Value>,
|
||||
) -> Result<(), Error> {
|
||||
if let Some(doc_id) = self.doc_ids.write().await.remove_doc(tx, rid.into()).await? {
|
||||
let mut doc_ids = self.doc_ids.write().await;
|
||||
let doc_id = doc_ids.remove_doc(tx, rid.into()).await?;
|
||||
drop(doc_ids);
|
||||
if let Some(doc_id) = doc_id {
|
||||
// Lock the index
|
||||
let mut mtree = self.mtree.write().await;
|
||||
for v in content {
|
||||
|
@ -126,28 +156,28 @@ impl MTreeIndex {
|
|||
// Remove the vector
|
||||
mtree.delete(stk, tx, &mut self.store, vector.into(), doc_id).await?;
|
||||
}
|
||||
drop(mtree);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(in crate::idx) fn doc_ids(&self) -> Arc<RwLock<DocIds>> {
|
||||
self.doc_ids.clone()
|
||||
}
|
||||
|
||||
pub(crate) async fn statistics(&self, tx: &mut Transaction) -> Result<MtStatistics, Error> {
|
||||
Ok(MtStatistics {
|
||||
doc_ids: self.doc_ids.read().await.statistics(tx).await?,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
|
||||
self.doc_ids.write().await.finish(tx).await?;
|
||||
pub async fn finish(&mut self, tx: &mut Transaction) -> Result<(), Error> {
|
||||
let mut doc_ids = self.doc_ids.write().await;
|
||||
doc_ids.finish(tx).await?;
|
||||
drop(doc_ids);
|
||||
let mut mtree = self.mtree.write().await;
|
||||
if let Some(new_cache) = self.store.finish(tx).await? {
|
||||
mtree.state.generation += 1;
|
||||
tx.set(self.state_key.clone(), mtree.state.try_to_val()?).await?;
|
||||
self.ixs.advance_store_mtree(new_cache);
|
||||
}
|
||||
drop(mtree);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -155,14 +185,14 @@ impl MTreeIndex {
|
|||
// https://en.wikipedia.org/wiki/M-tree
|
||||
// https://arxiv.org/pdf/1004.4216.pdf
|
||||
#[non_exhaustive]
|
||||
pub struct MTree {
|
||||
struct MTree {
|
||||
state: MState,
|
||||
distance: Distance,
|
||||
minimum: usize,
|
||||
}
|
||||
|
||||
impl MTree {
|
||||
pub fn new(state: MState, distance: Distance) -> Self {
|
||||
fn new(state: MState, distance: Distance) -> Self {
|
||||
let minimum = (state.capacity + 1) as usize / 2;
|
||||
Self {
|
||||
state,
|
||||
|
@ -171,17 +201,17 @@ impl MTree {
|
|||
}
|
||||
}
|
||||
|
||||
pub async fn knn_search(
|
||||
async fn knn_search(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
store: &MTreeStore,
|
||||
v: &SharedVector,
|
||||
k: usize,
|
||||
search: &MTreeSearchContext<'_>,
|
||||
doc_ids: &DocIds,
|
||||
stk: &mut Stk,
|
||||
chk: &mut MTreeConditionChecker<'_>,
|
||||
) -> Result<KnnResult, Error> {
|
||||
#[cfg(debug_assertions)]
|
||||
debug!("knn_search - v: {:?} - k: {}", v, k);
|
||||
debug!("knn_search - pt: {:?} - k: {}", search.pt, search.k);
|
||||
let mut queue = BinaryHeap::new();
|
||||
let mut res = KnnResultBuilder::new(k);
|
||||
let mut res = KnnResultBuilder::new(search.k);
|
||||
if let Some(root_id) = self.state.root {
|
||||
queue.push(PriorityNode::new(0.0, root_id));
|
||||
}
|
||||
|
@ -189,7 +219,7 @@ impl MTree {
|
|||
let mut visited_nodes = HashMap::new();
|
||||
while let Some(e) = queue.pop() {
|
||||
let id = e.id();
|
||||
let node = store.get_node(tx, id).await?;
|
||||
let node = search.store.get_node_txn(search.txn, id).await?;
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
debug!("Visit node id: {}", id);
|
||||
|
@ -202,11 +232,22 @@ impl MTree {
|
|||
#[cfg(debug_assertions)]
|
||||
debug!("Leaf found - id: {} - len: {}", node.id, n.len(),);
|
||||
for (o, p) in n {
|
||||
let d = self.calculate_distance(o, v)?;
|
||||
let d = self.calculate_distance(o, &search.pt)?;
|
||||
if res.check_add(d) {
|
||||
#[cfg(debug_assertions)]
|
||||
debug!("Add: {d} - obj: {o:?} - docs: {:?}", p.docs);
|
||||
res.add(d, &Ids64::Bits(p.docs.clone()));
|
||||
let mut docs = Ids64::Empty;
|
||||
for doc in &p.docs {
|
||||
if chk.check_truthy(stk, doc_ids, doc).await? {
|
||||
if let Some(new_docs) = docs.insert(doc) {
|
||||
docs = new_docs;
|
||||
}
|
||||
}
|
||||
}
|
||||
if !docs.is_empty() {
|
||||
let evicted_docs = res.add(d, &docs);
|
||||
chk.expires(evicted_docs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -214,7 +255,7 @@ impl MTree {
|
|||
#[cfg(debug_assertions)]
|
||||
debug!("Internal found - id: {} - {:?}", node.id, n);
|
||||
for (o, p) in n {
|
||||
let d = self.calculate_distance(o, v)?;
|
||||
let d = self.calculate_distance(o, &search.pt)?;
|
||||
let min_dist = (d - p.radius).max(0.0);
|
||||
if res.check_add(min_dist) {
|
||||
debug!("Queue add - dist: {} - node: {}", min_dist, p.node);
|
||||
|
@ -252,7 +293,7 @@ impl MTree {
|
|||
new_node_id
|
||||
}
|
||||
|
||||
pub async fn insert(
|
||||
async fn insert(
|
||||
&mut self,
|
||||
stk: &mut Stk,
|
||||
tx: &mut Transaction,
|
||||
|
@ -1430,45 +1471,50 @@ impl VersionedSerdeState for MState {}
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use futures::lock::Mutex;
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use reblessive::tree::Stk;
|
||||
use std::collections::VecDeque;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::dbs;
|
||||
use crate::err::Error;
|
||||
use test_log::test;
|
||||
|
||||
use crate::idx::docids::DocId;
|
||||
use crate::idx::docids::{DocId, DocIds};
|
||||
use crate::idx::planner::checker::MTreeConditionChecker;
|
||||
use crate::idx::trees::knn::tests::TestCollection;
|
||||
use crate::idx::trees::mtree::{MState, MTree, MTreeNode, MTreeStore};
|
||||
use crate::idx::trees::mtree::{MState, MTree, MTreeNode, MTreeSearchContext, MTreeStore};
|
||||
use crate::idx::trees::store::{NodeId, TreeNodeProvider, TreeStore};
|
||||
use crate::idx::trees::vector::SharedVector;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::LockType::*;
|
||||
use crate::kvs::Transaction;
|
||||
use crate::kvs::{Datastore, TransactionType};
|
||||
use crate::sql::index::{Distance, VectorType};
|
||||
|
||||
async fn new_operation(
|
||||
async fn new_operation<'a>(
|
||||
ds: &Datastore,
|
||||
t: &MTree,
|
||||
tt: TransactionType,
|
||||
cache_size: usize,
|
||||
) -> (TreeStore<MTreeNode>, Transaction) {
|
||||
) -> (dbs::Transaction, TreeStore<MTreeNode>) {
|
||||
let st = ds
|
||||
.index_store()
|
||||
.get_store_mtree(TreeNodeProvider::Debug, t.state.generation, tt, cache_size)
|
||||
.await;
|
||||
let tx = ds.transaction(tt, Optimistic).await.unwrap();
|
||||
(st, tx)
|
||||
let tx = Arc::new(Mutex::new(ds.transaction(tt, Optimistic).await.unwrap()));
|
||||
(tx, st)
|
||||
}
|
||||
|
||||
async fn finish_operation(
|
||||
ds: &Datastore,
|
||||
t: &mut MTree,
|
||||
mut tx: Transaction,
|
||||
tx: &mut Transaction,
|
||||
mut st: TreeStore<MTreeNode>,
|
||||
commit: bool,
|
||||
) -> Result<(), Error> {
|
||||
if let Some(new_cache) = st.finish(&mut tx).await? {
|
||||
if let Some(new_cache) = st.finish(tx).await? {
|
||||
assert!(new_cache.len() > 0, "new_cache.len() = {}", new_cache.len());
|
||||
t.state.generation += 1;
|
||||
ds.index_store().advance_store_mtree(new_cache);
|
||||
|
@ -1492,17 +1538,19 @@ mod tests {
|
|||
let mut c = 0;
|
||||
for (doc_id, obj) in collection.to_vec_ref() {
|
||||
{
|
||||
let (mut st, mut tx) =
|
||||
new_operation(ds, t, TransactionType::Write, cache_size).await;
|
||||
let (txn, mut st) = new_operation(ds, t, TransactionType::Write, cache_size).await;
|
||||
let mut tx = txn.lock().await;
|
||||
t.insert(stk, &mut tx, &mut st, obj.clone(), *doc_id).await?;
|
||||
finish_operation(ds, t, tx, st, true).await?;
|
||||
finish_operation(ds, t, &mut tx, st, true).await?;
|
||||
drop(tx);
|
||||
map.insert(*doc_id, obj.clone());
|
||||
}
|
||||
c += 1;
|
||||
{
|
||||
let (mut st, mut tx) =
|
||||
new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
let (txn, mut st) = new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
let mut tx = txn.lock().await;
|
||||
let p = check_tree_properties(&mut tx, &mut st, t).await?;
|
||||
drop(tx);
|
||||
assert_eq!(p.doc_count, c);
|
||||
}
|
||||
}
|
||||
|
@ -1518,16 +1566,20 @@ mod tests {
|
|||
) -> Result<HashMap<DocId, SharedVector>, Error> {
|
||||
let mut map = HashMap::with_capacity(collection.len());
|
||||
{
|
||||
let (mut st, mut tx) = new_operation(ds, t, TransactionType::Write, cache_size).await;
|
||||
let (txn, mut st) = new_operation(ds, t, TransactionType::Write, cache_size).await;
|
||||
let mut tx = txn.lock().await;
|
||||
for (doc_id, obj) in collection.to_vec_ref() {
|
||||
t.insert(stk, &mut tx, &mut st, obj.clone(), *doc_id).await?;
|
||||
map.insert(*doc_id, obj.clone());
|
||||
}
|
||||
finish_operation(ds, t, tx, st, true).await?;
|
||||
finish_operation(ds, t, &mut tx, st, true).await?;
|
||||
drop(tx);
|
||||
}
|
||||
{
|
||||
let (mut st, mut tx) = new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
let (txn, mut st) = new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
let mut tx = txn.lock().await;
|
||||
check_tree_properties(&mut tx, &mut st, t).await?;
|
||||
drop(tx);
|
||||
}
|
||||
Ok(map)
|
||||
}
|
||||
|
@ -1535,6 +1587,7 @@ mod tests {
|
|||
async fn delete_collection(
|
||||
stk: &mut Stk,
|
||||
ds: &Datastore,
|
||||
doc_ids: &DocIds,
|
||||
t: &mut MTree,
|
||||
collection: &TestCollection,
|
||||
cache_size: usize,
|
||||
|
@ -1543,16 +1596,24 @@ mod tests {
|
|||
for (doc_id, obj) in collection.to_vec_ref() {
|
||||
let deleted = {
|
||||
debug!("### Remove {} {:?}", doc_id, obj);
|
||||
let (mut st, mut tx) =
|
||||
new_operation(ds, t, TransactionType::Write, cache_size).await;
|
||||
let (txn, mut st) = new_operation(ds, t, TransactionType::Write, cache_size).await;
|
||||
let mut tx = txn.lock().await;
|
||||
let deleted = t.delete(stk, &mut tx, &mut st, obj.clone(), *doc_id).await?;
|
||||
finish_operation(ds, t, tx, st, true).await?;
|
||||
finish_operation(ds, t, &mut tx, st, true).await?;
|
||||
drop(tx);
|
||||
deleted
|
||||
};
|
||||
all_deleted = all_deleted && deleted;
|
||||
if deleted {
|
||||
let (st, mut tx) = new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
let res = t.knn_search(&mut tx, &st, obj, 1).await?;
|
||||
let (txn, st) = new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
let mut chk = MTreeConditionChecker::new(&txn);
|
||||
let search = MTreeSearchContext {
|
||||
txn: &txn,
|
||||
pt: obj.clone(),
|
||||
k: 1,
|
||||
store: &st,
|
||||
};
|
||||
let res = t.knn_search(&search, doc_ids, stk, &mut chk).await?;
|
||||
assert!(
|
||||
!res.docs.iter().any(|(id, _)| id == doc_id),
|
||||
"Found: {} {:?}",
|
||||
|
@ -1564,30 +1625,42 @@ mod tests {
|
|||
warn!("Delete failed: {} {:?}", doc_id, obj);
|
||||
}
|
||||
{
|
||||
let (mut st, mut tx) =
|
||||
new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
let (txn, mut st) = new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
let mut tx = txn.lock().await;
|
||||
check_tree_properties(&mut tx, &mut st, t).await?;
|
||||
drop(tx);
|
||||
}
|
||||
}
|
||||
|
||||
if all_deleted {
|
||||
let (mut st, mut tx) = new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
let (txn, mut st) = new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
let mut tx = txn.lock().await;
|
||||
check_tree_properties(&mut tx, &mut st, t).await?.check(0, 0, None, None, 0, 0);
|
||||
drop(tx);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn find_collection(
|
||||
stk: &mut Stk,
|
||||
ds: &Datastore,
|
||||
doc_ids: &DocIds,
|
||||
t: &mut MTree,
|
||||
collection: &TestCollection,
|
||||
cache_size: usize,
|
||||
) -> Result<(), Error> {
|
||||
let (mut st, mut tx) = new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
let (txn, mut st) = new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
let max_knn = 20.max(collection.len());
|
||||
for (doc_id, obj) in collection.to_vec_ref() {
|
||||
for knn in 1..max_knn {
|
||||
let res = t.knn_search(&mut tx, &st, obj, knn).await?;
|
||||
let mut chk = MTreeConditionChecker::new(&txn);
|
||||
let search = MTreeSearchContext {
|
||||
txn: &txn,
|
||||
pt: obj.clone(),
|
||||
k: knn,
|
||||
store: &st,
|
||||
};
|
||||
let res = t.knn_search(&search, doc_ids, stk, &mut chk).await?;
|
||||
let docs: Vec<DocId> = res.docs.iter().map(|(d, _)| *d).collect();
|
||||
if collection.is_unique() {
|
||||
assert!(
|
||||
|
@ -1603,7 +1676,9 @@ mod tests {
|
|||
if expected_len != res.docs.len() {
|
||||
#[cfg(debug_assertions)]
|
||||
debug!("{:?}", res.visited_nodes);
|
||||
let mut tx = txn.lock().await;
|
||||
check_tree_properties(&mut tx, &mut st, t).await?;
|
||||
drop(tx);
|
||||
}
|
||||
assert_eq!(
|
||||
expected_len,
|
||||
|
@ -1619,14 +1694,23 @@ mod tests {
|
|||
}
|
||||
|
||||
async fn check_full_knn(
|
||||
stk: &mut Stk,
|
||||
ds: &Datastore,
|
||||
doc_ids: &DocIds,
|
||||
t: &mut MTree,
|
||||
map: &HashMap<DocId, SharedVector>,
|
||||
cache_size: usize,
|
||||
) -> Result<(), Error> {
|
||||
let (st, mut tx) = new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
let (txn, st) = new_operation(ds, t, TransactionType::Read, cache_size).await;
|
||||
for obj in map.values() {
|
||||
let res = t.knn_search(&mut tx, &st, obj, map.len()).await?;
|
||||
let mut chk = MTreeConditionChecker::new(&txn);
|
||||
let search = MTreeSearchContext {
|
||||
txn: &txn,
|
||||
pt: obj.clone(),
|
||||
k: map.len(),
|
||||
store: &st,
|
||||
};
|
||||
let res = t.knn_search(&search, doc_ids, stk, &mut chk).await?;
|
||||
assert_eq!(
|
||||
map.len(),
|
||||
res.docs.len(),
|
||||
|
@ -1671,21 +1755,36 @@ mod tests {
|
|||
vector_type,
|
||||
);
|
||||
let ds = Datastore::new("memory").await?;
|
||||
|
||||
let mut t = MTree::new(MState::new(*capacity), distance.clone());
|
||||
|
||||
let (txn, _st) = new_operation(&ds, &t, TransactionType::Read, cache_size).await;
|
||||
let mut tx = txn.lock().await;
|
||||
let doc_ids = DocIds::new(
|
||||
ds.index_store(),
|
||||
&mut tx,
|
||||
TransactionType::Read,
|
||||
IndexKeyBase::default(),
|
||||
7,
|
||||
100,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
drop(tx);
|
||||
|
||||
let map = if collection.len() < 1000 {
|
||||
insert_collection_one_by_one(stk, &ds, &mut t, &collection, cache_size).await?
|
||||
} else {
|
||||
insert_collection_batch(stk, &ds, &mut t, &collection, cache_size).await?
|
||||
};
|
||||
if check_find {
|
||||
find_collection(&ds, &mut t, &collection, cache_size).await?;
|
||||
find_collection(stk, &ds, &doc_ids, &mut t, &collection, cache_size).await?;
|
||||
}
|
||||
if check_full {
|
||||
check_full_knn(&ds, &mut t, &map, cache_size).await?;
|
||||
check_full_knn(stk, &ds, &doc_ids, &mut t, &map, cache_size).await?;
|
||||
}
|
||||
if check_delete {
|
||||
delete_collection(stk, &ds, &mut t, &collection, cache_size).await?;
|
||||
delete_collection(stk, &ds, &doc_ids, &mut t, &collection, cache_size).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
use crate::idx::trees::hnsw::HnswIndex;
|
||||
use crate::idx::trees::hnsw::index::HnswIndex;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::Key;
|
||||
use crate::sql::index::HnswParams;
|
||||
|
@ -20,30 +20,36 @@ impl Default for HnswIndexes {
|
|||
impl HnswIndexes {
|
||||
pub(super) async fn get(&self, ikb: &IndexKeyBase, p: &HnswParams) -> SharedHnswIndex {
|
||||
let key = ikb.new_vm_key(None);
|
||||
{
|
||||
let r = self.0.read().await;
|
||||
if let Some(h) = r.get(&key).cloned() {
|
||||
return h;
|
||||
}
|
||||
let r = self.0.read().await;
|
||||
let h = r.get(&key).cloned();
|
||||
drop(r);
|
||||
if let Some(h) = h {
|
||||
return h;
|
||||
}
|
||||
let mut w = self.0.write().await;
|
||||
match w.entry(key) {
|
||||
let ix = match w.entry(key) {
|
||||
Entry::Occupied(e) => e.get().clone(),
|
||||
Entry::Vacant(e) => {
|
||||
let h = Arc::new(RwLock::new(HnswIndex::new(p)));
|
||||
e.insert(h.clone());
|
||||
h
|
||||
}
|
||||
}
|
||||
};
|
||||
drop(w);
|
||||
ix
|
||||
}
|
||||
|
||||
pub(super) async fn remove(&self, ikb: &IndexKeyBase) {
|
||||
let key = ikb.new_vm_key(None);
|
||||
let mut w = self.0.write().await;
|
||||
w.remove(&key);
|
||||
drop(w);
|
||||
}
|
||||
|
||||
pub(super) async fn is_empty(&self) -> bool {
|
||||
self.0.read().await.is_empty()
|
||||
let h = self.0.read().await;
|
||||
let r = h.is_empty();
|
||||
drop(h);
|
||||
r
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,7 +47,10 @@ where
|
|||
// Locate the shard
|
||||
let n = key as usize % self.shards_count;
|
||||
// Get and promote the key
|
||||
self.shards[n].lock().await.get_and_promote(key)
|
||||
let mut shard = self.shards[n].lock().await;
|
||||
let v = shard.get_and_promote(key);
|
||||
drop(shard);
|
||||
v
|
||||
}
|
||||
|
||||
pub(super) async fn insert<K: Into<CacheKey>>(&self, key: K, val: V) {
|
||||
|
@ -55,7 +58,9 @@ where
|
|||
// Locate the shard
|
||||
let shard = key as usize % self.shards_count;
|
||||
// Insert the key/object in the shard and get the new length
|
||||
let new_length = self.shards[shard].lock().await.insert(key, val, self.full.load(Relaxed));
|
||||
let mut s = self.shards[shard].lock().await;
|
||||
let new_length = s.insert(key, val, self.full.load(Relaxed));
|
||||
drop(s);
|
||||
// Update lengths
|
||||
self.check_length(new_length, shard);
|
||||
}
|
||||
|
@ -65,7 +70,9 @@ where
|
|||
// Locate the shard
|
||||
let shard = key as usize % self.shards_count;
|
||||
// Remove the key
|
||||
let new_length = self.shards[shard].lock().await.remove(key);
|
||||
let mut s = self.shards[shard].lock().await;
|
||||
let new_length = s.remove(key);
|
||||
drop(s);
|
||||
// Update lengths
|
||||
self.check_length(new_length, shard);
|
||||
}
|
||||
|
@ -94,7 +101,9 @@ where
|
|||
.shards
|
||||
.iter()
|
||||
.map(|s| async {
|
||||
let shard = s.lock().await.duplicate(filter);
|
||||
let s = s.lock().await;
|
||||
let shard = s.duplicate(filter);
|
||||
drop(s);
|
||||
(shard.map.len(), Mutex::new(shard))
|
||||
})
|
||||
.collect();
|
||||
|
|
|
@ -3,6 +3,7 @@ pub(crate) mod hnsw;
|
|||
mod lru;
|
||||
pub(crate) mod tree;
|
||||
|
||||
use crate::dbs;
|
||||
use crate::dbs::Options;
|
||||
use crate::err::Error;
|
||||
use crate::idx::trees::bkeys::{FstKeys, TrieKeys};
|
||||
|
@ -66,6 +67,22 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn get_node_txn(
|
||||
&self,
|
||||
txn: &dbs::Transaction,
|
||||
node_id: NodeId,
|
||||
) -> Result<Arc<StoredNode<N>>, Error> {
|
||||
match self {
|
||||
Self::Read(r) => {
|
||||
let mut tx = txn.lock().await;
|
||||
let n = r.get_node(&mut tx, node_id).await;
|
||||
drop(tx);
|
||||
n
|
||||
}
|
||||
_ => Err(Error::Unreachable("TreeStore::get_node_txn")),
|
||||
}
|
||||
}
|
||||
|
||||
pub(in crate::idx) async fn set_node(
|
||||
&mut self,
|
||||
node: StoredNode<N>,
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::err::Error;
|
||||
use crate::fnc::util::math::ToFloat;
|
||||
use crate::sql::index::{Distance, VectorType};
|
||||
use crate::sql::{Array, Number, Value};
|
||||
use crate::sql::{Number, Value};
|
||||
use ahash::AHasher;
|
||||
use hashbrown::HashSet;
|
||||
use linfa_linalg::norm::Norm;
|
||||
|
@ -446,50 +446,43 @@ impl Vector {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn try_from_array(t: VectorType, a: &Array) -> Result<Self, Error> {
|
||||
pub fn try_from_vector(t: VectorType, v: &[Number]) -> Result<Self, Error> {
|
||||
let res = match t {
|
||||
VectorType::F64 => {
|
||||
let mut vec = Vec::with_capacity(a.len());
|
||||
Self::check_vector_array(a, &mut vec)?;
|
||||
let mut vec = Vec::with_capacity(v.len());
|
||||
Self::check_vector_number(v, &mut vec)?;
|
||||
Vector::F64(Array1::from_vec(vec))
|
||||
}
|
||||
VectorType::F32 => {
|
||||
let mut vec = Vec::with_capacity(a.len());
|
||||
Self::check_vector_array(a, &mut vec)?;
|
||||
let mut vec = Vec::with_capacity(v.len());
|
||||
Self::check_vector_number(v, &mut vec)?;
|
||||
Vector::F32(Array1::from_vec(vec))
|
||||
}
|
||||
VectorType::I64 => {
|
||||
let mut vec = Vec::with_capacity(a.len());
|
||||
Self::check_vector_array(a, &mut vec)?;
|
||||
let mut vec = Vec::with_capacity(v.len());
|
||||
Self::check_vector_number(v, &mut vec)?;
|
||||
Vector::I64(Array1::from_vec(vec))
|
||||
}
|
||||
VectorType::I32 => {
|
||||
let mut vec = Vec::with_capacity(a.len());
|
||||
Self::check_vector_array(a, &mut vec)?;
|
||||
let mut vec = Vec::with_capacity(v.len());
|
||||
Self::check_vector_number(v, &mut vec)?;
|
||||
Vector::I32(Array1::from_vec(vec))
|
||||
}
|
||||
VectorType::I16 => {
|
||||
let mut vec = Vec::with_capacity(a.len());
|
||||
Self::check_vector_array(a, &mut vec)?;
|
||||
let mut vec = Vec::with_capacity(v.len());
|
||||
Self::check_vector_number(v, &mut vec)?;
|
||||
Vector::I16(Array1::from_vec(vec))
|
||||
}
|
||||
};
|
||||
Ok(res)
|
||||
}
|
||||
|
||||
fn check_vector_array<T>(a: &Array, vec: &mut Vec<T>) -> Result<(), Error>
|
||||
fn check_vector_number<T>(v: &[Number], vec: &mut Vec<T>) -> Result<(), Error>
|
||||
where
|
||||
T: for<'a> TryFrom<&'a Number, Error = Error>,
|
||||
{
|
||||
for v in &a.0 {
|
||||
if let Value::Number(n) = v {
|
||||
vec.push(n.try_into()?);
|
||||
} else {
|
||||
return Err(Error::InvalidVectorType {
|
||||
current: v.clone().to_string(),
|
||||
expected: "Number",
|
||||
});
|
||||
}
|
||||
for n in v {
|
||||
vec.push(n.try_into()?);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
@ -540,7 +533,6 @@ mod tests {
|
|||
use crate::idx::trees::knn::tests::{get_seed_rnd, new_random_vec, RandomItemGenerator};
|
||||
use crate::idx::trees::vector::{SharedVector, Vector};
|
||||
use crate::sql::index::{Distance, VectorType};
|
||||
use crate::sql::Array;
|
||||
|
||||
fn test_distance(dist: Distance, a1: &[f64], a2: &[f64], res: f64) {
|
||||
// Convert the arrays to Vec<Number>
|
||||
|
@ -554,10 +546,8 @@ mod tests {
|
|||
|
||||
// Check the "Vector" optimised implementations
|
||||
for t in [VectorType::F64] {
|
||||
let v1: SharedVector =
|
||||
Vector::try_from_array(t, &Array::from(v1.clone())).unwrap().into();
|
||||
let v2: SharedVector =
|
||||
Vector::try_from_array(t, &Array::from(v2.clone())).unwrap().into();
|
||||
let v1: SharedVector = Vector::try_from_vector(t, &v1).unwrap().into();
|
||||
let v2: SharedVector = Vector::try_from_vector(t, &v2).unwrap().into();
|
||||
assert_eq!(dist.calculate(&v1, &v2), res);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,7 +16,6 @@ pub(in crate::kvs) fn construct_document(
|
|||
match mutation {
|
||||
TableMutation::Set(id, current_value) => {
|
||||
let doc = Document::new_artificial(
|
||||
None,
|
||||
Some(id),
|
||||
None,
|
||||
Cow::Borrowed(current_value),
|
||||
|
@ -31,7 +30,6 @@ pub(in crate::kvs) fn construct_document(
|
|||
"id" => Value::Thing(id.clone()),
|
||||
}));
|
||||
let doc = Document::new_artificial(
|
||||
None,
|
||||
Some(id),
|
||||
None,
|
||||
Cow::Owned(Value::None),
|
||||
|
@ -49,7 +47,6 @@ pub(in crate::kvs) fn construct_document(
|
|||
operations.iter().map(|op| Value::Object(Object::from(op.clone()))).collect(),
|
||||
)))?;
|
||||
let doc = Document::new_artificial(
|
||||
None,
|
||||
Some(id),
|
||||
None,
|
||||
Cow::Borrowed(current_value),
|
||||
|
@ -62,7 +59,6 @@ pub(in crate::kvs) fn construct_document(
|
|||
}
|
||||
TableMutation::DelWithOriginal(id, val) => {
|
||||
let doc = Document::new_artificial(
|
||||
None,
|
||||
Some(id),
|
||||
None,
|
||||
Cow::Owned(Value::None),
|
||||
|
|
|
@ -73,6 +73,27 @@ pub struct MTreeParams {
|
|||
}
|
||||
|
||||
impl MTreeParams {
|
||||
pub fn new(
|
||||
dimension: u16,
|
||||
distance: Distance,
|
||||
vector_type: VectorType,
|
||||
capacity: u16,
|
||||
doc_ids_order: u32,
|
||||
doc_ids_cache: u32,
|
||||
mtree_cache: u32,
|
||||
) -> Self {
|
||||
Self {
|
||||
dimension,
|
||||
_distance: Default::default(),
|
||||
distance,
|
||||
vector_type,
|
||||
capacity,
|
||||
doc_ids_order,
|
||||
doc_ids_cache,
|
||||
mtree_cache,
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_old_distance(
|
||||
&mut self,
|
||||
_revision: u16,
|
||||
|
|
|
@ -149,7 +149,7 @@ impl fmt::Display for Operator {
|
|||
}
|
||||
}
|
||||
Self::Ann(k, ef) => {
|
||||
write!(f, "<{k},{ef}>")
|
||||
write!(f, "<|{k},{ef}|>")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -110,16 +110,7 @@ impl serde::ser::SerializeStruct for SerializeMTree {
|
|||
|
||||
#[test]
|
||||
fn mtree_params() {
|
||||
let params = MTreeParams {
|
||||
dimension: 1,
|
||||
_distance: Default::default(),
|
||||
distance: Default::default(),
|
||||
vector_type: Default::default(),
|
||||
capacity: 2,
|
||||
doc_ids_order: 3,
|
||||
doc_ids_cache: 4,
|
||||
mtree_cache: 5,
|
||||
};
|
||||
let params = MTreeParams::new(1, Default::default(), Default::default(), 2, 3, 4, 5);
|
||||
let serialized = params.serialize(Serializer.wrap()).unwrap();
|
||||
assert_eq!(params, serialized);
|
||||
}
|
||||
|
|
|
@ -353,6 +353,7 @@ pub(crate) static PATHS: phf::Map<UniCase<&'static str>, PathKind> = phf_map! {
|
|||
UniCase::ascii("vector::distance::chebyshev") => PathKind::Function,
|
||||
UniCase::ascii("vector::distance::euclidean") => PathKind::Function,
|
||||
UniCase::ascii("vector::distance::hamming") => PathKind::Function,
|
||||
UniCase::ascii("vector::distance::knn") => PathKind::Function,
|
||||
UniCase::ascii("vector::distance::mahalanobis") => PathKind::Function,
|
||||
UniCase::ascii("vector::distance::manhattan") => PathKind::Function,
|
||||
UniCase::ascii("vector::distance::minkowski") => PathKind::Function,
|
||||
|
|
|
@ -707,7 +707,7 @@ impl Parser<'_> {
|
|||
|
||||
loop {
|
||||
match self.peek_kind() {
|
||||
// COLUMS and FIELDS are the same tokenkind
|
||||
// COLUMNS and FIELDS are the same tokenkind
|
||||
t!("FIELDS") => {
|
||||
self.pop_peek();
|
||||
res.cols = Idioms(vec![self.parse_local_idiom()?]);
|
||||
|
@ -852,16 +852,15 @@ impl Parser<'_> {
|
|||
_ => break,
|
||||
}
|
||||
}
|
||||
res.index = Index::MTree(crate::sql::index::MTreeParams {
|
||||
res.index = Index::MTree(crate::sql::index::MTreeParams::new(
|
||||
dimension,
|
||||
_distance: Default::default(),
|
||||
distance,
|
||||
vector_type,
|
||||
capacity,
|
||||
doc_ids_order,
|
||||
doc_ids_cache,
|
||||
mtree_cache,
|
||||
vector_type,
|
||||
})
|
||||
))
|
||||
}
|
||||
t!("HNSW") => {
|
||||
self.pop_peek();
|
||||
|
@ -909,8 +908,7 @@ impl Parser<'_> {
|
|||
self.pop_peek();
|
||||
keep_pruned_connections = true;
|
||||
}
|
||||
t => {
|
||||
println!("TOKEN: {t:?}");
|
||||
_ => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,15 +1,17 @@
|
|||
use criterion::measurement::WallTime;
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkGroup, Criterion, Throughput};
|
||||
use flate2::read::GzDecoder;
|
||||
use reblessive::TreeStack;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::time::Duration;
|
||||
use surrealdb::idx::trees::hnsw::HnswIndex;
|
||||
use surrealdb::sql::index::Distance;
|
||||
use surrealdb_core::dbs::Session;
|
||||
use surrealdb_core::idx::planner::checker::HnswConditionChecker;
|
||||
use surrealdb_core::idx::trees::hnsw::index::HnswIndex;
|
||||
use surrealdb_core::kvs::Datastore;
|
||||
use surrealdb_core::sql::index::{HnswParams, VectorType};
|
||||
use surrealdb_core::sql::{value, Array, Id, Thing, Value};
|
||||
use surrealdb_core::sql::{value, Array, Id, Number, Thing, Value};
|
||||
use tokio::runtime::{Builder, Runtime};
|
||||
|
||||
const EF_CONSTRUCTION: u16 = 150;
|
||||
|
@ -45,14 +47,15 @@ fn bench_hnsw_no_db(c: &mut Criterion) {
|
|||
let hnsw = insert_objects(&samples);
|
||||
|
||||
let samples = new_vectors_from_file(QUERYING_SOURCE);
|
||||
let samples: Vec<Array> = samples.into_iter().map(|(_, a)| a).collect();
|
||||
let samples: Vec<Vec<Number>> =
|
||||
samples.into_iter().map(|(_, a)| convert_array_to_vec_number(a)).collect();
|
||||
|
||||
// Knn lookup benchmark group
|
||||
{
|
||||
let mut group = get_group(c, GROUP_NAME, samples.len(), 10);
|
||||
let id = format!("lookup len: {}", samples.len());
|
||||
group.bench_function(id, |b| {
|
||||
b.iter(|| knn_lookup_objects(&hnsw, &samples));
|
||||
b.to_async(Runtime::new().unwrap()).iter(|| knn_lookup_objects(&hnsw, &samples));
|
||||
});
|
||||
group.finish();
|
||||
}
|
||||
|
@ -175,6 +178,19 @@ fn new_vectors_from_file(path: &str) -> Vec<(Thing, Array)> {
|
|||
}
|
||||
res
|
||||
}
|
||||
|
||||
fn convert_array_to_vec_number(a: Array) -> Vec<Number> {
|
||||
a.into_iter()
|
||||
.map(|v| {
|
||||
if let Value::Number(n) = v {
|
||||
n
|
||||
} else {
|
||||
panic!("Wrong value {}", v);
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
async fn init_datastore(session: &Session, with_index: bool) -> Datastore {
|
||||
let ds = Datastore::new("memory").await.unwrap();
|
||||
if with_index {
|
||||
|
@ -215,11 +231,20 @@ async fn insert_objects_db(session: &Session, create_index: bool, inserts: &[Str
|
|||
ds
|
||||
}
|
||||
|
||||
fn knn_lookup_objects(h: &HnswIndex, samples: &[Array]) {
|
||||
for a in samples {
|
||||
let r = h.knn_search(a, NN, EF_SEARCH).unwrap();
|
||||
assert_eq!(r.len(), NN);
|
||||
}
|
||||
async fn knn_lookup_objects(h: &HnswIndex, samples: &[Vec<Number>]) {
|
||||
let mut stack = TreeStack::new();
|
||||
stack
|
||||
.enter(|stk| async {
|
||||
for v in samples {
|
||||
let r = h
|
||||
.knn_search(v, NN, EF_SEARCH, stk, HnswConditionChecker::default())
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(r.len(), NN);
|
||||
}
|
||||
})
|
||||
.finish()
|
||||
.await;
|
||||
}
|
||||
|
||||
async fn knn_lookup_objects_db(ds: &Datastore, session: &Session, selects: &[String]) {
|
||||
|
|
|
@ -1,50 +1,77 @@
|
|||
use criterion::measurement::WallTime;
|
||||
use criterion::{criterion_group, criterion_main, BenchmarkGroup, Criterion, Throughput};
|
||||
use futures::executor::block_on;
|
||||
use rand::prelude::ThreadRng;
|
||||
use rand::{thread_rng, Rng};
|
||||
use futures::lock::Mutex;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use reblessive::TreeStack;
|
||||
use std::time::Duration;
|
||||
use surrealdb::idx::docids::DocId;
|
||||
use surrealdb::idx::trees::mtree::{MState, MTree};
|
||||
use surrealdb::idx::trees::store::TreeNodeProvider;
|
||||
use surrealdb::idx::trees::vector::Vector;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::Arc;
|
||||
use surrealdb::kvs::Datastore;
|
||||
use surrealdb::kvs::LockType::Optimistic;
|
||||
use surrealdb::kvs::TransactionType::{Read, Write};
|
||||
use surrealdb::sql::index::Distance;
|
||||
use tokio::runtime::Runtime;
|
||||
use surrealdb_core::idx::planner::checker::MTreeConditionChecker;
|
||||
use surrealdb_core::idx::trees::mtree::MTreeIndex;
|
||||
use surrealdb_core::idx::IndexKeyBase;
|
||||
use surrealdb_core::kvs::{Transaction, TransactionType};
|
||||
use surrealdb_core::sql::index::{Distance, MTreeParams, VectorType};
|
||||
use surrealdb_core::sql::{Id, Number, Thing, Value};
|
||||
use tokio::runtime::{Builder, Runtime};
|
||||
use tokio::task;
|
||||
|
||||
fn bench_index_mtree_dim_3(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 1_000, 100_000, 3, 120, 100);
|
||||
bench_index_mtree(c, 250, 25_000, 3, 100);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_3_full_cache(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 1_000, 100_000, 3, 120, 0);
|
||||
bench_index_mtree(c, 250, 25_000, 3, 0);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_50(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 100, 10_000, 50, 20, 100);
|
||||
bench_index_mtree(c, 100, 10_000, 50, 100);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_50_full_cache(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 100, 10_000, 50, 20, 0);
|
||||
bench_index_mtree(c, 100, 10_000, 50, 0);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_300(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 50, 5_000, 300, 40, 100);
|
||||
bench_index_mtree(c, 50, 5_000, 300, 100);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_300_full_cache(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 50, 5_000, 300, 40, 0);
|
||||
bench_index_mtree(c, 50, 5_000, 300, 0);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_2048(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 10, 1_000, 2048, 60, 100);
|
||||
bench_index_mtree(c, 10, 1_000, 2048, 100);
|
||||
}
|
||||
|
||||
fn bench_index_mtree_dim_2048_full_cache(c: &mut Criterion) {
|
||||
bench_index_mtree(c, 10, 1_000, 2048, 60, 0);
|
||||
bench_index_mtree(c, 10, 1_000, 2048, 0);
|
||||
}
|
||||
|
||||
async fn mtree_index(
|
||||
ds: &Datastore,
|
||||
tx: &mut Transaction,
|
||||
dimension: usize,
|
||||
cache_size: usize,
|
||||
tt: TransactionType,
|
||||
) -> MTreeIndex {
|
||||
let p = MTreeParams::new(
|
||||
dimension as u16,
|
||||
Distance::Euclidean,
|
||||
VectorType::F64,
|
||||
40,
|
||||
100,
|
||||
cache_size as u32,
|
||||
cache_size as u32,
|
||||
);
|
||||
MTreeIndex::new(ds.index_store(), tx, IndexKeyBase::default(), &p, tt).await.unwrap()
|
||||
}
|
||||
|
||||
fn runtime() -> Runtime {
|
||||
Builder::new_multi_thread().worker_threads(4).enable_all().build().unwrap()
|
||||
}
|
||||
|
||||
fn bench_index_mtree(
|
||||
|
@ -52,7 +79,6 @@ fn bench_index_mtree(
|
|||
debug_samples_len: usize,
|
||||
release_samples_len: usize,
|
||||
vector_dimension: usize,
|
||||
measurement_secs: u64,
|
||||
cache_size: usize,
|
||||
) {
|
||||
let samples_len = if cfg!(debug_assertions) {
|
||||
|
@ -66,10 +92,10 @@ fn bench_index_mtree(
|
|||
|
||||
// Indexing benchmark group
|
||||
{
|
||||
let mut group = get_group(c, "index_mtree_insert", samples_len, measurement_secs);
|
||||
let mut group = get_group(c, "index_mtree_insert", samples_len);
|
||||
let id = format!("len_{}_dim_{}_cache_{}", samples_len, vector_dimension, cache_size);
|
||||
group.bench_function(id, |b| {
|
||||
b.to_async(Runtime::new().unwrap())
|
||||
b.to_async(runtime())
|
||||
.iter(|| insert_objects(&ds, samples_len, vector_dimension, cache_size));
|
||||
});
|
||||
group.finish();
|
||||
|
@ -77,15 +103,15 @@ fn bench_index_mtree(
|
|||
|
||||
// Knn lookup benchmark group
|
||||
{
|
||||
let mut group = get_group(c, "index_mtree_lookup", samples_len, 10);
|
||||
let mut group = get_group(c, "index_mtree_lookup", samples_len);
|
||||
for knn in [1, 10] {
|
||||
let id = format!(
|
||||
"knn_{}_len_{}_dim_{}_cache_{}",
|
||||
knn, samples_len, vector_dimension, cache_size
|
||||
);
|
||||
group.bench_function(id, |b| {
|
||||
b.to_async(Runtime::new().unwrap()).iter(|| {
|
||||
knn_lookup_objects(&ds, samples_len, vector_dimension, knn, cache_size)
|
||||
b.to_async(runtime()).iter(|| {
|
||||
knn_lookup_objects(&ds, samples_len, vector_dimension, cache_size, knn)
|
||||
});
|
||||
});
|
||||
}
|
||||
|
@ -97,24 +123,18 @@ fn get_group<'a>(
|
|||
c: &'a mut Criterion,
|
||||
group_name: &str,
|
||||
samples_len: usize,
|
||||
measurement_secs: u64,
|
||||
) -> BenchmarkGroup<'a, WallTime> {
|
||||
let mut group = c.benchmark_group(group_name);
|
||||
group.throughput(Throughput::Elements(samples_len as u64));
|
||||
group.sample_size(10);
|
||||
group.measurement_time(Duration::from_secs(measurement_secs));
|
||||
group
|
||||
}
|
||||
fn random_object(rng: &mut ThreadRng, vector_size: usize) -> Vector {
|
||||
fn random_object(rng: &mut StdRng, vector_size: usize) -> Vec<Number> {
|
||||
let mut vec = Vec::with_capacity(vector_size);
|
||||
for _ in 0..vector_size {
|
||||
vec.push(rng.gen_range(-1.0..=1.0));
|
||||
vec.push(rng.gen_range(-1.0..=1.0).into());
|
||||
}
|
||||
Vector::F32(vec.into())
|
||||
}
|
||||
|
||||
fn mtree() -> MTree {
|
||||
MTree::new(MState::new(40), Distance::Euclidean)
|
||||
vec
|
||||
}
|
||||
|
||||
async fn insert_objects(
|
||||
|
@ -123,27 +143,22 @@ async fn insert_objects(
|
|||
vector_size: usize,
|
||||
cache_size: usize,
|
||||
) {
|
||||
let mut rng = thread_rng();
|
||||
let mut t = mtree();
|
||||
let mut tx = ds.transaction(Write, Optimistic).await.unwrap();
|
||||
let mut s =
|
||||
ds.index_store().get_store_mtree(TreeNodeProvider::Debug, 0, Write, cache_size).await;
|
||||
|
||||
let mut mt = mtree_index(ds, &mut tx, vector_size, cache_size, Write).await;
|
||||
let mut stack = TreeStack::new();
|
||||
let mut rng = StdRng::from_entropy();
|
||||
stack
|
||||
.enter(|stk| async {
|
||||
for i in 0..samples_size {
|
||||
let object = random_object(&mut rng, vector_size).into();
|
||||
let vector: Vec<Number> = random_object(&mut rng, vector_size);
|
||||
// Insert the sample
|
||||
t.insert(stk, &mut tx, &mut s, object, i as DocId).await.unwrap();
|
||||
let rid = Thing::from(("test", Id::from(i as i64)));
|
||||
mt.index_document(stk, &mut tx, &rid, &vec![Value::from(vector)]).await.unwrap();
|
||||
}
|
||||
})
|
||||
.finish()
|
||||
.await;
|
||||
|
||||
if let Some(new_cache) = s.finish(&mut tx).await.unwrap() {
|
||||
ds.index_store().advance_store_mtree(new_cache);
|
||||
}
|
||||
mt.finish(&mut tx).await.unwrap();
|
||||
tx.commit().await.unwrap();
|
||||
}
|
||||
|
||||
|
@ -151,19 +166,48 @@ async fn knn_lookup_objects(
|
|||
ds: &Datastore,
|
||||
samples_size: usize,
|
||||
vector_size: usize,
|
||||
knn: usize,
|
||||
cache_size: usize,
|
||||
knn: usize,
|
||||
) {
|
||||
let mut rng = thread_rng();
|
||||
let t = mtree();
|
||||
let mut tx = ds.transaction(Read, Optimistic).await.unwrap();
|
||||
let s = ds.index_store().get_store_mtree(TreeNodeProvider::Debug, 0, Read, cache_size).await;
|
||||
for _ in 0..samples_size {
|
||||
let object = random_object(&mut rng, vector_size).into();
|
||||
// Insert the sample
|
||||
t.knn_search(&mut tx, &s, &object, knn).await.unwrap();
|
||||
let txn = Arc::new(Mutex::new(ds.transaction(Read, Optimistic).await.unwrap()));
|
||||
let mut tx = txn.lock().await;
|
||||
let mt = Arc::new(mtree_index(ds, &mut tx, vector_size, cache_size, Read).await);
|
||||
drop(tx);
|
||||
|
||||
let counter = Arc::new(AtomicUsize::new(0));
|
||||
|
||||
let mut consumers = Vec::with_capacity(4);
|
||||
for _ in 0..4 {
|
||||
let (txn, mt, counter) = (txn.clone(), mt.clone(), counter.clone());
|
||||
let c = task::spawn(async move {
|
||||
let mut rng = StdRng::from_entropy();
|
||||
while counter.fetch_add(1, Ordering::Relaxed) < samples_size {
|
||||
let object = random_object(&mut rng, vector_size);
|
||||
knn_lookup_object(mt.as_ref(), &txn, object, knn).await;
|
||||
}
|
||||
});
|
||||
consumers.push(c);
|
||||
}
|
||||
tx.rollback_with_panic();
|
||||
for c in consumers {
|
||||
c.await.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
async fn knn_lookup_object(
|
||||
mt: &MTreeIndex,
|
||||
txn: &Arc<Mutex<Transaction>>,
|
||||
object: Vec<Number>,
|
||||
knn: usize,
|
||||
) {
|
||||
let mut stack = TreeStack::new();
|
||||
stack
|
||||
.enter(|stk| async {
|
||||
let chk = MTreeConditionChecker::new(&txn);
|
||||
let r = mt.knn_search(stk, txn, &object, knn, chk).await.unwrap();
|
||||
assert_eq!(r.len(), knn);
|
||||
})
|
||||
.finish()
|
||||
.await;
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
|
|
|
@ -14,7 +14,7 @@ async fn select_where_mtree_knn() -> Result<(), Error> {
|
|||
CREATE pts:3 SET point = [8,9,10,11];
|
||||
DEFINE INDEX mt_pts ON pts FIELDS point MTREE DIMENSION 4 TYPE F32;
|
||||
LET $pt = [2,3,4,5];
|
||||
SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point <|2|> $pt;
|
||||
SELECT id, vector::distance::knn() AS dist FROM pts WHERE point <|2|> $pt;
|
||||
SELECT id FROM pts WHERE point <|2|> $pt EXPLAIN;
|
||||
";
|
||||
let dbs = new_ds().await?;
|
||||
|
@ -46,7 +46,7 @@ async fn select_where_mtree_knn() -> Result<(), Error> {
|
|||
detail: {
|
||||
plan: {
|
||||
index: 'mt_pts',
|
||||
operator: '<2>',
|
||||
operator: '<|2|>',
|
||||
value: [2,3,4,5]
|
||||
},
|
||||
table: 'pts',
|
||||
|
@ -76,7 +76,7 @@ async fn delete_update_mtree_index() -> Result<(), Error> {
|
|||
DELETE pts:2;
|
||||
UPDATE pts:3 SET point = [12,13,14,15];
|
||||
LET $pt = [2,3,4,5];
|
||||
SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point <|5|> $pt ORDER BY dist;
|
||||
SELECT id, vector::distance::knn() AS dist FROM pts WHERE point <|5|> $pt ORDER BY dist;
|
||||
";
|
||||
let dbs = new_ds().await?;
|
||||
let ses = Session::owner().with_ns("test").with_db("test");
|
||||
|
@ -162,8 +162,8 @@ async fn select_where_brute_force_knn() -> Result<(), Error> {
|
|||
CREATE pts:3 SET point = [8,9,10,11];
|
||||
LET $pt = [2,3,4,5];
|
||||
SELECT id FROM pts WHERE point <|2,EUCLIDEAN|> $pt EXPLAIN;
|
||||
SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point <|2,EUCLIDEAN|> $pt;
|
||||
SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point <|2,EUCLIDEAN|> $pt PARALLEL;
|
||||
SELECT id, vector::distance::knn() AS dist FROM pts WHERE point <|2,EUCLIDEAN|> $pt;
|
||||
SELECT id, vector::distance::knn() AS dist FROM pts WHERE point <|2,EUCLIDEAN|> $pt PARALLEL;
|
||||
";
|
||||
let dbs = new_ds().await?;
|
||||
let ses = Session::owner().with_ns("test").with_db("test");
|
||||
|
@ -224,7 +224,7 @@ async fn select_where_hnsw_knn() -> Result<(), Error> {
|
|||
CREATE pts:3 SET point = [8,9,10,11];
|
||||
DEFINE INDEX hnsw_pts ON pts FIELDS point HNSW DIMENSION 4 DIST EUCLIDEAN TYPE F32 EFC 500 M 12;
|
||||
LET $pt = [2,3,4,5];
|
||||
SELECT id, vector::distance::euclidean(point, $pt) AS dist FROM pts WHERE point <|2,100|> $pt;
|
||||
SELECT id, vector::distance::knn() AS dist FROM pts WHERE point <|2,100|> $pt;
|
||||
SELECT id FROM pts WHERE point <|2,100|> $pt EXPLAIN;
|
||||
";
|
||||
let dbs = new_ds().await?;
|
||||
|
@ -256,7 +256,7 @@ async fn select_where_hnsw_knn() -> Result<(), Error> {
|
|||
detail: {
|
||||
plan: {
|
||||
index: 'hnsw_pts',
|
||||
operator: '<2,100>',
|
||||
operator: '<|2,100|>',
|
||||
value: [2,3,4,5]
|
||||
},
|
||||
table: 'pts',
|
||||
|
@ -274,3 +274,219 @@ async fn select_where_hnsw_knn() -> Result<(), Error> {
|
|||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn select_mtree_knn_with_condition() -> Result<(), Error> {
|
||||
let sql = r"
|
||||
DEFINE INDEX mt_pt1 ON pts FIELDS point MTREE DIMENSION 1;
|
||||
INSERT INTO pts [
|
||||
{ id: pts:1, point: [ 10f ], flag: true },
|
||||
{ id: pts:2, point: [ 20f ], flag: false },
|
||||
{ id: pts:3, point: [ 30f ], flag: true },
|
||||
{ id: pts:4, point: [ 40f ], flag: false },
|
||||
{ id: pts:5, point: [ 50f ], flag: true },
|
||||
{ id: pts:6, point: [ 60f ], flag: false },
|
||||
{ id: pts:7, point: [ 70f ], flag: true }
|
||||
];
|
||||
LET $pt = [44f];
|
||||
SELECT id, flag, vector::distance::knn() AS distance FROM pts
|
||||
WHERE flag = true && point <|2|> $pt
|
||||
ORDER BY distance EXPLAIN;
|
||||
SELECT id, flag, vector::distance::knn() AS distance FROM pts
|
||||
WHERE flag = true && point <|2|> $pt
|
||||
ORDER BY distance;
|
||||
";
|
||||
let dbs = new_ds().await?;
|
||||
let ses = Session::owner().with_ns("test").with_db("test");
|
||||
let mut res = &mut dbs.execute(sql, &ses, None).await?;
|
||||
assert_eq!(res.len(), 5);
|
||||
//
|
||||
skip_ok(&mut res, 3)?;
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
"[
|
||||
{
|
||||
detail: {
|
||||
plan: {
|
||||
index: 'mt_pt1',
|
||||
operator: '<|2|>',
|
||||
value: [44f]
|
||||
},
|
||||
table: 'pts',
|
||||
},
|
||||
operation: 'Iterate Index'
|
||||
},
|
||||
{
|
||||
detail: {
|
||||
type: 'Memory'
|
||||
},
|
||||
operation: 'Collector'
|
||||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
"[
|
||||
{
|
||||
id: pts:5,
|
||||
flag: true,
|
||||
distance: 6f
|
||||
},
|
||||
{
|
||||
id: pts:3,
|
||||
flag: true,
|
||||
distance: 14f
|
||||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
//
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test_log::test(tokio::test)]
|
||||
async fn select_hnsw_knn_with_condition() -> Result<(), Error> {
|
||||
let sql = r"
|
||||
DEFINE INDEX hn_pt1 ON pts FIELDS point HNSW DIMENSION 1;
|
||||
INSERT INTO pts [
|
||||
{ id: pts:1, point: [ 10f ], flag: true },
|
||||
{ id: pts:2, point: [ 20f ], flag: false },
|
||||
{ id: pts:3, point: [ 30f ], flag: true },
|
||||
{ id: pts:4, point: [ 40f ], flag: false },
|
||||
{ id: pts:5, point: [ 50f ], flag: true },
|
||||
{ id: pts:6, point: [ 60f ], flag: false },
|
||||
{ id: pts:7, point: [ 70f ], flag: true }
|
||||
];
|
||||
LET $pt = [44f];
|
||||
SELECT id, flag, vector::distance::knn() AS distance FROM pts
|
||||
WHERE flag = true AND point <|2,40|> $pt
|
||||
ORDER BY distance EXPLAIN;
|
||||
SELECT id, flag, vector::distance::knn() AS distance FROM pts
|
||||
WHERE flag = true AND point <|2,40|> $pt
|
||||
ORDER BY distance;
|
||||
";
|
||||
let dbs = new_ds().await?;
|
||||
let ses = Session::owner().with_ns("test").with_db("test");
|
||||
let mut res = &mut dbs.execute(sql, &ses, None).await?;
|
||||
assert_eq!(res.len(), 5);
|
||||
//
|
||||
skip_ok(&mut res, 3)?;
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
"[
|
||||
{
|
||||
detail: {
|
||||
plan: {
|
||||
index: 'hn_pt1',
|
||||
operator: '<|2,40|>',
|
||||
value: [44f]
|
||||
},
|
||||
table: 'pts',
|
||||
},
|
||||
operation: 'Iterate Index'
|
||||
},
|
||||
{
|
||||
detail: {
|
||||
type: 'Memory'
|
||||
},
|
||||
operation: 'Collector'
|
||||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
"[
|
||||
{
|
||||
distance: 6f,
|
||||
flag: true,
|
||||
id: pts:5
|
||||
},
|
||||
{
|
||||
distance: 14f,
|
||||
flag: true,
|
||||
id: pts:3
|
||||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
//
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test_log::test(tokio::test)]
|
||||
async fn select_bruteforce_knn_with_condition() -> Result<(), Error> {
|
||||
let sql = r"
|
||||
INSERT INTO pts [
|
||||
{ id: pts:1, point: [ 10f ], flag: true },
|
||||
{ id: pts:2, point: [ 20f ], flag: false },
|
||||
{ id: pts:3, point: [ 30f ], flag: true },
|
||||
{ id: pts:4, point: [ 40f ], flag: false },
|
||||
{ id: pts:5, point: [ 50f ], flag: true },
|
||||
{ id: pts:6, point: [ 60f ], flag: false },
|
||||
{ id: pts:7, point: [ 70f ], flag: true }
|
||||
];
|
||||
LET $pt = [44f];
|
||||
SELECT id, flag, vector::distance::knn() AS distance FROM pts
|
||||
WHERE flag = true AND point <|2,EUCLIDEAN|> $pt
|
||||
ORDER BY distance EXPLAIN;
|
||||
SELECT id, flag, vector::distance::knn() AS distance FROM pts
|
||||
WHERE flag = true AND point <|2,EUCLIDEAN|> $pt
|
||||
ORDER BY distance;
|
||||
";
|
||||
let dbs = new_ds().await?;
|
||||
let ses = Session::owner().with_ns("test").with_db("test");
|
||||
let mut res = &mut dbs.execute(sql, &ses, None).await?;
|
||||
assert_eq!(res.len(), 4);
|
||||
//
|
||||
skip_ok(&mut res, 2)?;
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
"[
|
||||
{
|
||||
detail: {
|
||||
table: 'pts'
|
||||
},
|
||||
operation: 'Iterate Table'
|
||||
},
|
||||
{
|
||||
detail: {
|
||||
reason: 'NO INDEX FOUND'
|
||||
},
|
||||
operation: 'Fallback'
|
||||
},
|
||||
{
|
||||
detail: {
|
||||
type: 'Memory'
|
||||
},
|
||||
operation: 'Collector'
|
||||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
"[
|
||||
{
|
||||
distance: 6f,
|
||||
flag: true,
|
||||
id: pts:5
|
||||
},
|
||||
{
|
||||
distance: 14f,
|
||||
flag: true,
|
||||
id: pts:3
|
||||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
|
||||
//
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue