2023-06-19 18:41:13 +00:00
|
|
|
use crate::dbs::{Options, Transaction};
|
|
|
|
use crate::err::Error;
|
2023-07-11 18:22:31 +00:00
|
|
|
use crate::idx::btree::store::BTreeStoreType;
|
2023-06-23 20:26:19 +00:00
|
|
|
use crate::idx::ft::docids::{DocId, NO_DOC_ID};
|
2023-06-26 18:23:05 +00:00
|
|
|
use crate::idx::ft::termdocs::TermsDocs;
|
2023-06-21 18:31:15 +00:00
|
|
|
use crate::idx::ft::{FtIndex, HitsIterator, MatchRef};
|
2023-06-19 18:41:13 +00:00
|
|
|
use crate::idx::planner::executor::QueryExecutor;
|
|
|
|
use crate::idx::IndexKeyBase;
|
|
|
|
use crate::key;
|
|
|
|
use crate::kvs::Key;
|
|
|
|
use crate::sql::index::Index;
|
|
|
|
use crate::sql::scoring::Scoring;
|
|
|
|
use crate::sql::statements::DefineIndexStatement;
|
2023-06-23 20:26:19 +00:00
|
|
|
use crate::sql::{Array, Expression, Ident, Idiom, Object, Operator, Thing, Value};
|
2023-06-19 18:41:13 +00:00
|
|
|
use std::collections::HashMap;
|
2023-06-23 20:26:19 +00:00
|
|
|
use std::hash::Hash;
|
|
|
|
use std::sync::Arc;
|
2023-06-19 18:41:13 +00:00
|
|
|
|
|
|
|
#[derive(Default)]
|
|
|
|
pub(super) struct PlanBuilder {
|
2023-06-23 20:26:19 +00:00
|
|
|
indexes: Vec<(Expression, IndexOption)>,
|
2023-06-19 18:41:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl PlanBuilder {
|
2023-06-23 20:26:19 +00:00
|
|
|
pub(super) fn add_index_option(&mut self, e: Expression, i: IndexOption) {
|
|
|
|
self.indexes.push((e, i));
|
2023-06-19 18:41:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub(super) fn build(mut self) -> Result<Plan, Error> {
|
|
|
|
// TODO select the best option if there are several (cost based)
|
2023-06-23 20:26:19 +00:00
|
|
|
if let Some((e, i)) = self.indexes.pop() {
|
|
|
|
Ok(Plan::new(e, i))
|
2023-06-19 18:41:13 +00:00
|
|
|
} else {
|
|
|
|
Err(Error::BypassQueryPlanner)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) struct Plan {
|
2023-06-23 20:26:19 +00:00
|
|
|
pub(super) e: Expression,
|
2023-06-19 18:41:13 +00:00
|
|
|
pub(super) i: IndexOption,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Plan {
|
2023-06-23 20:26:19 +00:00
|
|
|
pub(super) fn new(e: Expression, i: IndexOption) -> Self {
|
|
|
|
Self {
|
|
|
|
e,
|
|
|
|
i,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-19 18:41:13 +00:00
|
|
|
pub(crate) async fn new_iterator(
|
|
|
|
&self,
|
|
|
|
opt: &Options,
|
|
|
|
txn: &Transaction,
|
2023-06-23 20:26:19 +00:00
|
|
|
exe: &QueryExecutor,
|
2023-07-06 14:57:42 +00:00
|
|
|
) -> Result<ThingIterator, Error> {
|
2023-06-23 20:26:19 +00:00
|
|
|
self.i.new_iterator(opt, txn, exe).await
|
2023-06-19 18:41:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) fn explain(&self) -> Value {
|
2023-06-20 11:48:20 +00:00
|
|
|
Value::Object(Object::from(HashMap::from([
|
2023-06-23 20:26:19 +00:00
|
|
|
("index", Value::from(self.i.ix().name.0.to_owned())),
|
|
|
|
("operator", Value::from(self.i.op().to_string())),
|
|
|
|
("value", self.i.value().clone()),
|
2023-06-20 11:48:20 +00:00
|
|
|
])))
|
2023-06-19 18:41:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
|
2023-06-23 20:26:19 +00:00
|
|
|
pub(super) struct IndexOption(Arc<Inner>);
|
|
|
|
|
|
|
|
#[derive(Debug, Eq, PartialEq, Hash)]
|
|
|
|
pub(super) struct Inner {
|
|
|
|
ix: DefineIndexStatement,
|
|
|
|
id: Idiom,
|
|
|
|
v: Value,
|
|
|
|
qs: Option<String>,
|
|
|
|
op: Operator,
|
|
|
|
mr: Option<MatchRef>,
|
2023-06-19 18:41:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl IndexOption {
|
2023-06-23 20:26:19 +00:00
|
|
|
pub(super) fn new(
|
|
|
|
ix: DefineIndexStatement,
|
|
|
|
id: Idiom,
|
|
|
|
op: Operator,
|
|
|
|
v: Value,
|
|
|
|
qs: Option<String>,
|
|
|
|
mr: Option<MatchRef>,
|
|
|
|
) -> Self {
|
|
|
|
Self(Arc::new(Inner {
|
2023-06-19 18:41:13 +00:00
|
|
|
ix,
|
2023-06-23 20:26:19 +00:00
|
|
|
id,
|
2023-06-19 18:41:13 +00:00
|
|
|
op,
|
|
|
|
v,
|
2023-06-23 20:26:19 +00:00
|
|
|
qs,
|
|
|
|
mr,
|
|
|
|
}))
|
2023-06-19 18:41:13 +00:00
|
|
|
}
|
|
|
|
|
2023-06-23 20:26:19 +00:00
|
|
|
pub(super) fn ix(&self) -> &DefineIndexStatement {
|
|
|
|
&self.0.ix
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(super) fn op(&self) -> &Operator {
|
|
|
|
&self.0.op
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(super) fn value(&self) -> &Value {
|
|
|
|
&self.0.v
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(super) fn qs(&self) -> Option<&String> {
|
|
|
|
self.0.qs.as_ref()
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(super) fn id(&self) -> &Idiom {
|
|
|
|
&self.0.id
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(super) fn match_ref(&self) -> Option<&MatchRef> {
|
|
|
|
self.0.mr.as_ref()
|
2023-06-19 18:41:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async fn new_iterator(
|
|
|
|
&self,
|
|
|
|
opt: &Options,
|
|
|
|
txn: &Transaction,
|
2023-06-23 20:26:19 +00:00
|
|
|
exe: &QueryExecutor,
|
2023-07-06 14:57:42 +00:00
|
|
|
) -> Result<ThingIterator, Error> {
|
2023-06-23 20:26:19 +00:00
|
|
|
match &self.ix().index {
|
|
|
|
Index::Idx => {
|
|
|
|
if self.op() == &Operator::Equal {
|
2023-07-06 14:57:42 +00:00
|
|
|
return Ok(ThingIterator::NonUniqueEqual(NonUniqueEqualThingIterator::new(
|
2023-06-23 20:26:19 +00:00
|
|
|
opt,
|
|
|
|
self.ix(),
|
|
|
|
self.value(),
|
|
|
|
)?));
|
2023-06-19 18:41:13 +00:00
|
|
|
}
|
2023-06-23 20:26:19 +00:00
|
|
|
}
|
|
|
|
Index::Uniq => {
|
|
|
|
if self.op() == &Operator::Equal {
|
2023-07-06 14:57:42 +00:00
|
|
|
return Ok(ThingIterator::UniqueEqual(UniqueEqualThingIterator::new(
|
2023-06-23 20:26:19 +00:00
|
|
|
opt,
|
|
|
|
self.ix(),
|
|
|
|
self.value(),
|
|
|
|
)?));
|
2023-06-19 18:41:13 +00:00
|
|
|
}
|
2023-06-23 20:26:19 +00:00
|
|
|
}
|
2023-06-19 18:41:13 +00:00
|
|
|
Index::Search {
|
|
|
|
az,
|
|
|
|
hl,
|
|
|
|
sc,
|
|
|
|
order,
|
2023-06-23 20:26:19 +00:00
|
|
|
} => {
|
|
|
|
if let Operator::Matches(_) = self.op() {
|
|
|
|
let td = exe.pre_match_terms_docs();
|
2023-07-06 14:57:42 +00:00
|
|
|
return Ok(ThingIterator::Matches(
|
2023-06-23 20:26:19 +00:00
|
|
|
MatchesThingIterator::new(opt, txn, self.ix(), az, *hl, sc, *order, td)
|
|
|
|
.await?,
|
|
|
|
));
|
|
|
|
}
|
|
|
|
}
|
2023-06-19 18:41:13 +00:00
|
|
|
}
|
2023-06-23 20:26:19 +00:00
|
|
|
Err(Error::BypassQueryPlanner)
|
2023-06-19 18:41:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-06 14:57:42 +00:00
|
|
|
pub(crate) enum ThingIterator {
|
|
|
|
NonUniqueEqual(NonUniqueEqualThingIterator),
|
|
|
|
UniqueEqual(UniqueEqualThingIterator),
|
|
|
|
Matches(MatchesThingIterator),
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ThingIterator {
|
|
|
|
pub(crate) async fn next_batch(
|
2023-06-23 20:26:19 +00:00
|
|
|
&mut self,
|
|
|
|
tx: &Transaction,
|
|
|
|
size: u32,
|
2023-07-06 14:57:42 +00:00
|
|
|
) -> Result<Vec<(Thing, DocId)>, Error> {
|
|
|
|
match self {
|
|
|
|
ThingIterator::NonUniqueEqual(i) => i.next_batch(tx, size).await,
|
|
|
|
ThingIterator::UniqueEqual(i) => i.next_batch(tx, size).await,
|
|
|
|
ThingIterator::Matches(i) => i.next_batch(tx, size).await,
|
|
|
|
}
|
|
|
|
}
|
2023-06-19 18:41:13 +00:00
|
|
|
}
|
|
|
|
|
2023-07-06 14:57:42 +00:00
|
|
|
pub(crate) struct NonUniqueEqualThingIterator {
|
2023-06-19 18:41:13 +00:00
|
|
|
beg: Vec<u8>,
|
|
|
|
end: Vec<u8>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl NonUniqueEqualThingIterator {
|
2023-07-06 14:57:42 +00:00
|
|
|
fn new(
|
|
|
|
opt: &Options,
|
|
|
|
ix: &DefineIndexStatement,
|
|
|
|
v: &Value,
|
|
|
|
) -> Result<NonUniqueEqualThingIterator, Error> {
|
2023-06-19 18:41:13 +00:00
|
|
|
let v = Array::from(v.clone());
|
2023-07-10 08:24:47 +00:00
|
|
|
let (beg, end) =
|
|
|
|
key::index::Index::range_all_ids(opt.ns(), opt.db(), &ix.what, &ix.name, &v);
|
2023-06-19 18:41:13 +00:00
|
|
|
Ok(Self {
|
|
|
|
beg,
|
|
|
|
end,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2023-06-23 20:26:19 +00:00
|
|
|
async fn next_batch(
|
|
|
|
&mut self,
|
|
|
|
txn: &Transaction,
|
|
|
|
limit: u32,
|
|
|
|
) -> Result<Vec<(Thing, DocId)>, Error> {
|
2023-06-19 18:41:13 +00:00
|
|
|
let min = self.beg.clone();
|
|
|
|
let max = self.end.clone();
|
|
|
|
let res = txn.lock().await.scan(min..max, limit).await?;
|
|
|
|
if let Some((key, _)) = res.last() {
|
|
|
|
self.beg = key.clone();
|
|
|
|
self.beg.push(0x00);
|
|
|
|
}
|
2023-06-23 20:26:19 +00:00
|
|
|
let res = res.iter().map(|(_, val)| (val.into(), NO_DOC_ID)).collect();
|
2023-06-19 18:41:13 +00:00
|
|
|
Ok(res)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-06 14:57:42 +00:00
|
|
|
pub(crate) struct UniqueEqualThingIterator {
|
2023-06-19 18:41:13 +00:00
|
|
|
key: Option<Key>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl UniqueEqualThingIterator {
|
|
|
|
fn new(opt: &Options, ix: &DefineIndexStatement, v: &Value) -> Result<Self, Error> {
|
|
|
|
let v = Array::from(v.clone());
|
2023-07-10 08:24:47 +00:00
|
|
|
let key = key::index::Index::new(opt.ns(), opt.db(), &ix.what, &ix.name, v, None).into();
|
2023-06-19 18:41:13 +00:00
|
|
|
Ok(Self {
|
|
|
|
key: Some(key),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2023-06-23 20:26:19 +00:00
|
|
|
async fn next_batch(
|
|
|
|
&mut self,
|
|
|
|
txn: &Transaction,
|
|
|
|
_limit: u32,
|
|
|
|
) -> Result<Vec<(Thing, DocId)>, Error> {
|
2023-06-19 18:41:13 +00:00
|
|
|
if let Some(key) = self.key.take() {
|
|
|
|
if let Some(val) = txn.lock().await.get(key).await? {
|
2023-06-23 20:26:19 +00:00
|
|
|
return Ok(vec![(val.into(), NO_DOC_ID)]);
|
2023-06-19 18:41:13 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(vec![])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-07-06 14:57:42 +00:00
|
|
|
pub(crate) struct MatchesThingIterator {
|
2023-06-19 18:41:13 +00:00
|
|
|
hits: Option<HitsIterator>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl MatchesThingIterator {
|
2023-06-20 11:48:20 +00:00
|
|
|
#[allow(clippy::too_many_arguments)]
|
2023-06-19 18:41:13 +00:00
|
|
|
async fn new(
|
|
|
|
opt: &Options,
|
|
|
|
txn: &Transaction,
|
|
|
|
ix: &DefineIndexStatement,
|
|
|
|
az: &Ident,
|
2023-06-21 18:31:15 +00:00
|
|
|
hl: bool,
|
2023-06-19 18:41:13 +00:00
|
|
|
sc: &Scoring,
|
|
|
|
order: u32,
|
2023-06-26 18:23:05 +00:00
|
|
|
terms_docs: Option<TermsDocs>,
|
2023-06-19 18:41:13 +00:00
|
|
|
) -> Result<Self, Error> {
|
|
|
|
let ikb = IndexKeyBase::new(opt, ix);
|
|
|
|
if let Scoring::Bm {
|
|
|
|
..
|
|
|
|
} = sc
|
|
|
|
{
|
2023-06-23 20:26:19 +00:00
|
|
|
let mut run = txn.lock().await;
|
2023-06-19 18:41:13 +00:00
|
|
|
let az = run.get_az(opt.ns(), opt.db(), az.as_str()).await?;
|
2023-07-11 18:22:31 +00:00
|
|
|
let fti = FtIndex::new(&mut run, az, ikb, order, sc, hl, BTreeStoreType::Read).await?;
|
2023-06-23 20:26:19 +00:00
|
|
|
if let Some(terms_docs) = terms_docs {
|
2023-07-11 18:22:31 +00:00
|
|
|
let hits = fti.new_hits_iterator(terms_docs)?;
|
2023-06-23 20:26:19 +00:00
|
|
|
Ok(Self {
|
|
|
|
hits,
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
Ok(Self {
|
|
|
|
hits: None,
|
|
|
|
})
|
|
|
|
}
|
2023-06-19 18:41:13 +00:00
|
|
|
} else {
|
|
|
|
Err(Error::FeatureNotYetImplemented {
|
|
|
|
feature: "Vector Search",
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-23 20:26:19 +00:00
|
|
|
async fn next_batch(
|
|
|
|
&mut self,
|
|
|
|
txn: &Transaction,
|
|
|
|
mut limit: u32,
|
|
|
|
) -> Result<Vec<(Thing, DocId)>, Error> {
|
2023-06-19 18:41:13 +00:00
|
|
|
let mut res = vec![];
|
|
|
|
if let Some(hits) = &mut self.hits {
|
|
|
|
let mut run = txn.lock().await;
|
|
|
|
while limit > 0 {
|
2023-06-23 20:26:19 +00:00
|
|
|
if let Some(hit) = hits.next(&mut run).await? {
|
2023-06-19 18:41:13 +00:00
|
|
|
res.push(hit);
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
limit -= 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(res)
|
|
|
|
}
|
|
|
|
}
|
2023-06-23 20:26:19 +00:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use crate::idx::planner::plan::IndexOption;
|
|
|
|
use crate::sql::statements::DefineIndexStatement;
|
|
|
|
use crate::sql::{Idiom, Operator, Value};
|
|
|
|
use std::collections::HashSet;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_hash_index_option() {
|
|
|
|
let mut set = HashSet::new();
|
|
|
|
let io1 = IndexOption::new(
|
|
|
|
DefineIndexStatement::default(),
|
|
|
|
Idiom::from("a.b".to_string()),
|
|
|
|
Operator::Equal,
|
|
|
|
Value::from("test"),
|
|
|
|
None,
|
|
|
|
None,
|
|
|
|
);
|
|
|
|
|
|
|
|
let io2 = IndexOption::new(
|
|
|
|
DefineIndexStatement::default(),
|
|
|
|
Idiom::from("a.b".to_string()),
|
|
|
|
Operator::Equal,
|
|
|
|
Value::from("test"),
|
|
|
|
None,
|
|
|
|
None,
|
|
|
|
);
|
|
|
|
|
|
|
|
set.insert(io1);
|
|
|
|
set.insert(io2.clone());
|
|
|
|
set.insert(io2);
|
|
|
|
|
|
|
|
assert_eq!(set.len(), 1);
|
|
|
|
}
|
|
|
|
}
|