[Feat] SELECT ORDER should use the index iterator when available (#4525)

This commit is contained in:
Emmanuel Keller 2024-08-23 14:40:34 +01:00 committed by GitHub
parent a16f850c4f
commit 0b5d79cae0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 606 additions and 243 deletions

View file

@ -58,15 +58,17 @@ pub(crate) struct Iterator {
// Iterator status
run: Canceller,
// Iterator limit value
limit: Option<usize>,
limit: Option<u32>,
// Iterator start value
start: Option<usize>,
start: Option<u32>,
// Iterator runtime error
error: Option<Error>,
// Iterator output results
results: Results,
// Iterator input values
entries: Vec<Iterable>,
// Set if the iterator can be cancelled once it reaches start/limit
cancel_on_limit: Option<u32>,
}
impl Clone for Iterator {
@ -78,6 +80,7 @@ impl Clone for Iterator {
error: None,
results: Results::default(),
entries: self.entries.clone(),
cancel_on_limit: None,
}
}
}
@ -338,7 +341,7 @@ impl Iterator {
}
// Process any START & LIMIT clause
self.results.start_limit(self.start.as_ref(), self.limit.as_ref());
self.results.start_limit(self.start, self.limit);
if let Some(e) = &mut plan.explanation {
e.add_fetch(self.results.len());
@ -364,17 +367,19 @@ impl Iterator {
}
#[inline]
async fn setup_limit(
pub(crate) async fn setup_limit(
&mut self,
stk: &mut Stk,
ctx: &Context,
opt: &Options,
stm: &Statement<'_>,
) -> Result<(), Error> {
if let Some(v) = stm.limit() {
self.limit = Some(v.process(stk, ctx, opt, None).await?);
) -> Result<Option<u32>, Error> {
if self.limit.is_none() {
if let Some(v) = stm.limit() {
self.limit = Some(v.process(stk, ctx, opt, None).await?);
}
}
Ok(())
Ok(self.limit)
}
#[inline]
@ -391,6 +396,44 @@ impl Iterator {
Ok(())
}
/// Check if the iteration can be limited per iterator
fn check_set_start_limit(&mut self, ctx: &Context, stm: &Statement<'_>) -> bool {
// If there are groups we can't
if stm.group().is_some() {
return false;
}
// If there is no specified order, we can
if stm.order().is_none() {
return true;
}
// If there is more than 1 iterator, we can't
if self.entries.len() != 1 {
return false;
}
// If the iterator is backed by a sorted index
// and the sorting matches the first ORDER entry, we can
if let Some(Iterable::Index(_, irf)) = self.entries.first() {
if let Some(qp) = ctx.get_query_planner() {
if qp.is_order(irf) {
return true;
}
}
}
false
}
fn compute_start_limit(&mut self, ctx: &Context, stm: &Statement<'_>) {
if self.check_set_start_limit(ctx, stm) {
if let Some(l) = self.limit {
if let Some(s) = self.start {
self.cancel_on_limit = Some(l + s);
} else {
self.cancel_on_limit = Some(l);
}
}
}
}
#[inline]
async fn output_split(
&mut self,
@ -489,13 +532,15 @@ impl Iterator {
opt: &Options,
stm: &Statement<'_>,
) -> Result<(), Error> {
// Compute iteration limits
self.compute_start_limit(ctx, stm);
// Prevent deep recursion
let opt = &opt.dive(4)?;
// Check if iterating in parallel
match stm.parallel() {
// Run statements sequentially
false => {
// If any iterator requires distinct, we new to create a global distinct instance
// If any iterator requires distinct, we need to create a global distinct instance
let mut distinct = SyncDistinct::new(ctx);
// Process all prepared values
for v in mem::take(&mut self.entries) {
@ -621,16 +666,10 @@ impl Iterator {
}
}
}
// Check if we can exit
if stm.group().is_none() && stm.order().is_none() {
if let Some(l) = self.limit {
if let Some(s) = self.start {
if self.results.len() == l + s {
self.run.cancel()
}
} else if self.results.len() == l {
self.run.cancel()
}
// Check if we have enough results
if let Some(l) = self.cancel_on_limit {
if self.results.len() == l as usize {
self.run.cancel()
}
}
}

View file

@ -105,7 +105,7 @@ impl Results {
}
}
pub(super) fn start_limit(&mut self, start: Option<&usize>, limit: Option<&usize>) {
pub(super) fn start_limit(&mut self, start: Option<u32>, limit: Option<u32>) {
match self {
Self::None => {}
Self::Memory(m) => m.start_limit(start, limit),

View file

@ -19,16 +19,20 @@ impl MemoryCollector {
self.0.len()
}
pub(super) fn start_limit(&mut self, start: Option<&usize>, limit: Option<&usize>) {
pub(super) fn start_limit(&mut self, start: Option<u32>, limit: Option<u32>) {
match (start, limit) {
(Some(&start), Some(&limit)) => {
self.0 = mem::take(&mut self.0).into_iter().skip(start).take(limit).collect()
(Some(start), Some(limit)) => {
self.0 = mem::take(&mut self.0)
.into_iter()
.skip(start as usize)
.take(limit as usize)
.collect()
}
(Some(&start), None) => {
self.0 = mem::take(&mut self.0).into_iter().skip(start).collect()
(Some(start), None) => {
self.0 = mem::take(&mut self.0).into_iter().skip(start as usize).collect()
}
(None, Some(&limit)) => {
self.0 = mem::take(&mut self.0).into_iter().take(limit).collect()
(None, Some(limit)) => {
self.0 = mem::take(&mut self.0).into_iter().take(limit as usize).collect()
}
(None, None) => {}
}
@ -124,15 +128,15 @@ pub(super) mod file_store {
self.len
}
pub(in crate::dbs) fn start_limit(&mut self, start: Option<&usize>, limit: Option<&usize>) {
self.paging.start = start.cloned();
self.paging.limit = limit.cloned();
pub(in crate::dbs) fn start_limit(&mut self, start: Option<u32>, limit: Option<u32>) {
self.paging.start = start;
self.paging.limit = limit;
}
pub(in crate::dbs) fn take_vec(&mut self) -> Result<Vec<Value>, Error> {
self.check_reader()?;
if let Some(mut reader) = self.reader.take() {
if let Some((start, num)) = self.paging.get_start_num(reader.len) {
if let Some((start, num)) = self.paging.get_start_num(reader.len as u32) {
if let Some(orders) = self.orders.take() {
return self.sort_and_take_vec(reader, orders, start, num);
}
@ -146,8 +150,8 @@ pub(super) mod file_store {
&mut self,
reader: FileReader,
orders: Orders,
start: usize,
num: usize,
start: u32,
num: u32,
) -> Result<Vec<Value>, Error> {
let sort_dir = self.dir.path().join(Self::SORT_DIRECTORY_NAME);
fs::create_dir(&sort_dir)?;
@ -160,7 +164,7 @@ pub(super) mod file_store {
let sorted = sorter.sort_by(reader, |a, b| orders.compare(a, b))?;
let iter = sorted.map(Result::unwrap);
let r: Vec<Value> = iter.skip(start).take(num).collect();
let r: Vec<Value> = iter.skip(start as usize).take(num as usize).collect();
Ok(r)
}
pub(in crate::dbs) fn explain(&self, exp: &mut Explanation) {
@ -259,20 +263,22 @@ pub(super) mod file_store {
Ok(u)
}
fn take_vec(&mut self, start: usize, num: usize) -> Result<Vec<Value>, Error> {
fn take_vec(&mut self, start: u32, num: u32) -> Result<Vec<Value>, Error> {
let mut iter = FileRecordsIterator::new(self.records.clone(), self.len);
if start > 0 {
// Get the start offset of the first record
let mut index = OpenOptions::new().read(true).open(&self.index)?;
index.seek(SeekFrom::Start(((start - 1) * FileCollector::USIZE_SIZE) as u64))?;
index.seek(SeekFrom::Start(
((start as usize - 1) * FileCollector::USIZE_SIZE) as u64,
))?;
let start_offset = Self::read_usize(&mut index)?;
// Set records to the position of the first record
iter.seek(start_offset, start)?;
iter.seek(start_offset, start as usize)?;
}
// Collect the records
let mut res = Vec::with_capacity(num);
let mut res = Vec::with_capacity(num as usize);
for _ in 0..num {
debug!("READ");
if let Some(val) = iter.next() {
@ -356,12 +362,12 @@ pub(super) mod file_store {
#[derive(Default)]
struct FilePaging {
start: Option<usize>,
limit: Option<usize>,
start: Option<u32>,
limit: Option<u32>,
}
impl FilePaging {
fn get_start_num(&self, len: usize) -> Option<(usize, usize)> {
fn get_start_num(&self, len: u32) -> Option<(u32, u32)> {
let start = self.start.unwrap_or(0);
if start >= len {
return None;

View file

@ -80,7 +80,7 @@ impl From<InnerQueryExecutor> for QueryExecutor {
}
pub(super) enum IteratorEntry {
Single(Arc<Expression>, IndexOption),
Single(Option<Arc<Expression>>, IndexOption),
Range(HashSet<Arc<Expression>>, IndexRef, RangeValue, RangeValue),
}
@ -319,7 +319,7 @@ impl QueryExecutor {
/// Returns `true` if the expression is matching the current iterator.
pub(crate) fn is_iterator_expression(&self, irf: IteratorRef, exp: &Expression) -> bool {
match self.0.it_entries.get(irf as usize) {
Some(IteratorEntry::Single(e, ..)) => exp.eq(e.as_ref()),
Some(IteratorEntry::Single(Some(e), ..)) => exp.eq(e.as_ref()),
Some(IteratorEntry::Range(es, ..)) => es.contains(exp),
_ => false,
}
@ -405,6 +405,19 @@ impl QueryExecutor {
let index_join = Box::new(IndexJoinThingIterator::new(irf, opt, ix, iterators)?);
Some(ThingIterator::IndexJoin(index_join))
}
IndexOperator::Order(asc) => {
if *asc {
Some(ThingIterator::IndexRange(IndexRangeThingIterator::full_range(
irf,
opt.ns()?,
opt.db()?,
&ix.what,
&ix.name,
)))
} else {
None
}
}
_ => None,
})
}
@ -472,6 +485,19 @@ impl QueryExecutor {
let unique_join = Box::new(UniqueJoinThingIterator::new(irf, opt, ix, iterators)?);
Some(ThingIterator::UniqueJoin(unique_join))
}
IndexOperator::Order(asc) => {
if *asc {
Some(ThingIterator::UniqueRange(UniqueRangeThingIterator::full_range(
irf,
opt.ns()?,
opt.db()?,
&ix.what,
&ix.name,
)))
} else {
None
}
}
_ => None,
})
}
@ -481,7 +507,7 @@ impl QueryExecutor {
irf: IteratorRef,
io: IndexOption,
) -> Result<Option<ThingIterator>, Error> {
if let Some(IteratorEntry::Single(exp, ..)) = self.0.it_entries.get(irf as usize) {
if let Some(IteratorEntry::Single(Some(exp), ..)) = self.0.it_entries.get(irf as usize) {
if let Matches(_, _) = io.op() {
if let Some(fti) = self.0.ft_map.get(&io.ix_ref()) {
if let Some(fte) = self.0.exp_entries.get(exp) {
@ -496,7 +522,7 @@ impl QueryExecutor {
}
fn new_mtree_index_knn_iterator(&self, irf: IteratorRef) -> Option<ThingIterator> {
if let Some(IteratorEntry::Single(exp, ..)) = self.0.it_entries.get(irf as usize) {
if let Some(IteratorEntry::Single(Some(exp), ..)) = self.0.it_entries.get(irf as usize) {
if let Some(mte) = self.0.mt_entries.get(exp) {
let it = KnnIterator::new(irf, mte.res.clone());
return Some(ThingIterator::Knn(it));
@ -506,7 +532,7 @@ impl QueryExecutor {
}
fn new_hnsw_index_ann_iterator(&self, irf: IteratorRef) -> Option<ThingIterator> {
if let Some(IteratorEntry::Single(exp, ..)) = self.0.it_entries.get(irf as usize) {
if let Some(IteratorEntry::Single(Some(exp), ..)) = self.0.it_entries.get(irf as usize) {
if let Some(he) = self.0.hnsw_entries.get(exp) {
let it = KnnIterator::new(irf, he.res.clone());
return Some(ThingIterator::Knn(it));
@ -602,7 +628,7 @@ impl QueryExecutor {
r: Value,
) -> Result<bool, Error> {
// If the query terms contains terms that are unknown in the index
// of if there is not terms in the query
// of if there are no terms in the query
// we are sure that it does not match any document
if !ft.0.query_terms_set.is_matchable() {
return Ok(false);
@ -610,6 +636,7 @@ impl QueryExecutor {
let v = match ft.0.index_option.id_pos() {
IdiomPosition::Left => r,
IdiomPosition::Right => l,
IdiomPosition::None => return Ok(false),
};
let terms = ft.0.terms.read().await;
// Extract the terms set from the record

View file

@ -260,6 +260,20 @@ impl IndexRangeThingIterator {
}
}
pub(super) fn full_range(
irf: IteratorRef,
ns: &str,
db: &str,
ix_what: &Ident,
ix_name: &Ident,
) -> Self {
let full_range = RangeValue {
value: Value::None,
inclusive: true,
};
Self::new(irf, ns, db, ix_what, ix_name, &full_range, &full_range)
}
fn compute_beg(
ns: &str,
db: &str,
@ -329,10 +343,10 @@ impl IndexUnionThingIterator {
db: &str,
ix_what: &Ident,
ix_name: &Ident,
a: &Array,
a: &Value,
) -> Self {
// We create a VecDeque to hold the prefix keys (begin and end) for each value in the array.
let mut values: VecDeque<(Vec<u8>, Vec<u8>)> =
let mut values: VecDeque<(Vec<u8>, Vec<u8>)> = if let Value::Array(a) = a {
a.0.iter()
.map(|v| {
let a = Array::from(v.clone());
@ -340,7 +354,10 @@ impl IndexUnionThingIterator {
let end = Index::prefix_ids_end(ns, db, ix_what, ix_name, &a);
(beg, end)
})
.collect();
.collect()
} else {
VecDeque::with_capacity(0)
};
let current = values.pop_front();
Self {
irf,
@ -561,6 +578,20 @@ impl UniqueRangeThingIterator {
}
}
pub(super) fn full_range(
irf: IteratorRef,
ns: &str,
db: &str,
ix_what: &Ident,
ix_name: &Ident,
) -> Self {
let full_range = RangeValue {
value: Value::None,
inclusive: true,
};
Self::new(irf, ns, db, ix_what, ix_name, &full_range, &full_range)
}
fn compute_beg(
ns: &str,
db: &str,
@ -637,17 +668,20 @@ impl UniqueUnionThingIterator {
irf: IteratorRef,
opt: &Options,
ix: &DefineIndexStatement,
a: &Array,
a: &Value,
) -> Result<Self, Error> {
// We create a VecDeque to hold the key for each value in the array.
let keys: VecDeque<Key> =
let keys: VecDeque<Key> = if let Value::Array(a) = a {
a.0.iter()
.map(|v| -> Result<Key, Error> {
let a = Array::from(v.clone());
let key = Index::new(opt.ns()?, opt.db()?, &ix.what, &ix.name, &a, None).into();
Ok(key)
})
.collect::<Result<VecDeque<Key>, Error>>()?;
.collect::<Result<VecDeque<Key>, Error>>()?
} else {
VecDeque::with_capacity(0)
};
Ok(Self {
irf,
keys,

View file

@ -15,7 +15,7 @@ use crate::idx::planner::knn::KnnBruteForceResults;
use crate::idx::planner::plan::{Plan, PlanBuilder};
use crate::idx::planner::tree::Tree;
use crate::sql::with::With;
use crate::sql::{Cond, Table};
use crate::sql::{Cond, Orders, Table};
use reblessive::tree::Stk;
use std::collections::HashMap;
use std::sync::atomic::{AtomicU8, Ordering};
@ -25,25 +25,34 @@ pub(crate) struct QueryPlanner {
opt: Arc<Options>,
with: Option<Arc<With>>,
cond: Option<Arc<Cond>>,
order: Option<Arc<Orders>>,
/// There is one executor per table
executors: HashMap<String, QueryExecutor>,
requires_distinct: bool,
fallbacks: Vec<String>,
iteration_workflow: Vec<IterationStage>,
iteration_index: AtomicU8,
orders: Vec<IteratorRef>,
}
impl QueryPlanner {
pub(crate) fn new(opt: Arc<Options>, with: Option<Arc<With>>, cond: Option<Arc<Cond>>) -> Self {
pub(crate) fn new(
opt: Arc<Options>,
with: Option<Arc<With>>,
cond: Option<Arc<Cond>>,
order: Option<Arc<Orders>>,
) -> Self {
Self {
opt,
with,
cond,
order,
executors: HashMap::default(),
requires_distinct: false,
fallbacks: vec![],
iteration_workflow: Vec::default(),
iteration_index: AtomicU8::new(0),
orders: vec![],
}
}
@ -55,73 +64,72 @@ impl QueryPlanner {
it: &mut Iterator,
) -> Result<(), Error> {
let mut is_table_iterator = false;
let mut is_knn = false;
match Tree::build(
let mut tree = Tree::build(
stk,
ctx,
&self.opt,
&t,
self.cond.as_ref().map(|w| w.as_ref()),
self.with.as_ref().map(|c| c.as_ref()),
self.order.as_ref().map(|o| o.as_ref()),
)
.await?
{
Some(tree) => {
is_knn = is_knn || !tree.knn_expressions.is_empty();
let mut exe = InnerQueryExecutor::new(
stk,
ctx,
&self.opt,
&t,
tree.index_map,
tree.knn_expressions,
tree.knn_brute_force_expressions,
tree.knn_condition,
)
.await?;
match PlanBuilder::build(
tree.root,
self.with.as_ref().map(|w| w.as_ref()),
tree.with_indexes,
)? {
Plan::SingleIndex(exp, io) => {
if io.require_distinct() {
self.requires_distinct = true;
}
let ir = exe.add_iterator(IteratorEntry::Single(exp, io));
self.add(t.clone(), Some(ir), exe, it);
}
Plan::MultiIndex(non_range_indexes, ranges_indexes) => {
for (exp, io) in non_range_indexes {
let ie = IteratorEntry::Single(exp, io);
let ir = exe.add_iterator(ie);
it.ingest(Iterable::Index(t.clone(), ir));
}
for (ixn, rq) in ranges_indexes {
let ie = IteratorEntry::Range(rq.exps, ixn, rq.from, rq.to);
let ir = exe.add_iterator(ie);
it.ingest(Iterable::Index(t.clone(), ir));
}
self.requires_distinct = true;
self.add(t.clone(), None, exe, it);
}
Plan::SingleIndexRange(ixn, rq) => {
let ir =
exe.add_iterator(IteratorEntry::Range(rq.exps, ixn, rq.from, rq.to));
self.add(t.clone(), Some(ir), exe, it);
}
Plan::TableIterator(fallback) => {
if let Some(fallback) = fallback {
self.fallbacks.push(fallback);
}
self.add(t.clone(), None, exe, it);
it.ingest(Iterable::Table(t));
is_table_iterator = true;
}
.await?;
let is_knn = !tree.knn_expressions.is_empty();
let order = tree.index_map.order_limit.take();
let mut exe = InnerQueryExecutor::new(
stk,
ctx,
&self.opt,
&t,
tree.index_map,
tree.knn_expressions,
tree.knn_brute_force_expressions,
tree.knn_condition,
)
.await?;
match PlanBuilder::build(
tree.root,
self.with.as_ref().map(|w| w.as_ref()),
tree.with_indexes,
order,
)? {
Plan::SingleIndex(exp, io) => {
if io.require_distinct() {
self.requires_distinct = true;
}
let is_order = exp.is_none();
let ir = exe.add_iterator(IteratorEntry::Single(exp, io));
self.add(t.clone(), Some(ir), exe, it);
if is_order {
self.orders.push(ir);
}
}
None => {
Plan::MultiIndex(non_range_indexes, ranges_indexes) => {
for (exp, io) in non_range_indexes {
let ie = IteratorEntry::Single(Some(exp), io);
let ir = exe.add_iterator(ie);
it.ingest(Iterable::Index(t.clone(), ir));
}
for (ixr, rq) in ranges_indexes {
let ie = IteratorEntry::Range(rq.exps, ixr, rq.from, rq.to);
let ir = exe.add_iterator(ie);
it.ingest(Iterable::Index(t.clone(), ir));
}
self.requires_distinct = true;
self.add(t.clone(), None, exe, it);
}
Plan::SingleIndexRange(ixn, rq) => {
let ir = exe.add_iterator(IteratorEntry::Range(rq.exps, ixn, rq.from, rq.to));
self.add(t.clone(), Some(ir), exe, it);
}
Plan::TableIterator(fallback) => {
if let Some(fallback) = fallback {
self.fallbacks.push(fallback);
}
self.add(t.clone(), None, exe, it);
it.ingest(Iterable::Table(t));
is_table_iterator = true;
}
}
if is_knn && is_table_iterator {
@ -160,6 +168,10 @@ impl QueryPlanner {
&self.fallbacks
}
pub(crate) fn is_order(&self, irf: &IteratorRef) -> bool {
self.orders.contains(irf)
}
pub(crate) async fn next_iteration_stage(&self) -> Option<IterationStage> {
let pos = self.iteration_index.fetch_add(1, Ordering::Relaxed);
match self.iteration_workflow.get(pos as usize) {

View file

@ -30,9 +30,10 @@ pub(super) struct PlanBuilder {
impl PlanBuilder {
pub(super) fn build(
root: Node,
root: Option<Node>,
with: Option<&With>,
with_indexes: Vec<IndexRef>,
order: Option<IndexOption>,
) -> Result<Plan, Error> {
if let Some(With::NoIndex) = with {
return Ok(Plan::TableIterator(Some("WITH NOINDEX".to_string())));
@ -47,12 +48,10 @@ impl PlanBuilder {
all_exp_with_index: true,
};
// Browse the AST and collect information
if let Err(e) = b.eval_node(&root) {
return Ok(Plan::TableIterator(Some(e.to_string())));
}
// If we didn't find any index, we're done with no index plan
if !b.has_indexes {
return Ok(Plan::TableIterator(Some("NO INDEX FOUND".to_string())));
if let Some(root) = &root {
if let Err(e) = b.eval_node(root) {
return Ok(Plan::TableIterator(Some(e.to_string())));
}
}
// If every boolean operator are AND then we can use the single index plan
@ -66,7 +65,11 @@ impl PlanBuilder {
}
// Otherwise we take the first single index option
if let Some((e, i)) = b.non_range_indexes.pop() {
return Ok(Plan::SingleIndex(e, i));
return Ok(Plan::SingleIndex(Some(e), i));
}
// If there is an order option
if let Some(o) = order {
return Ok(Plan::SingleIndex(None, o.clone()));
}
}
// If every expression is backed by an index with can use the MultiIndex plan
@ -157,15 +160,19 @@ impl PlanBuilder {
}
pub(super) enum Plan {
/// Table full scan
TableIterator(Option<String>),
SingleIndex(Arc<Expression>, IndexOption),
/// Index scan filtered on records matching a given expression
SingleIndex(Option<Arc<Expression>>, IndexOption),
/// Union of filtered index scans
MultiIndex(Vec<(Arc<Expression>, IndexOption)>, Vec<(IndexRef, UnionRangeQueryBuilder)>),
/// Index scan for record matching a given range
SingleIndexRange(IndexRef, UnionRangeQueryBuilder),
}
#[derive(Debug, Eq, PartialEq, Hash, Clone)]
pub(super) struct IndexOption {
/// A reference o the index definition
/// A reference to the index definition
ix_ref: IndexRef,
id: Idiom,
id_pos: IdiomPosition,
@ -174,14 +181,15 @@ pub(super) struct IndexOption {
#[derive(Debug, Eq, PartialEq, Hash)]
pub(super) enum IndexOperator {
Equality(Value),
Exactness(Value),
Union(Array),
Equality(Arc<Value>),
Exactness(Arc<Value>),
Union(Arc<Value>),
Join(Vec<IndexOption>),
RangePart(Operator, Value),
RangePart(Operator, Arc<Value>),
Matches(String, Option<MatchRef>),
Knn(Arc<Vec<Number>>, u32),
Ann(Arc<Vec<Number>>, u32, u32),
Order(bool),
}
impl IndexOption {
@ -242,9 +250,9 @@ impl IndexOption {
e.insert("operator", Value::from(Operator::Exact.to_string()));
e.insert("value", Self::reduce_array(v));
}
IndexOperator::Union(a) => {
IndexOperator::Union(v) => {
e.insert("operator", Value::from("union"));
e.insert("value", Value::Array(a.clone()));
e.insert("value", v.as_ref().clone());
}
IndexOperator::Join(ios) => {
e.insert("operator", Value::from("join"));
@ -261,7 +269,7 @@ impl IndexOption {
}
IndexOperator::RangePart(op, v) => {
e.insert("operator", Value::from(op.to_string()));
e.insert("value", v.to_owned());
e.insert("value", v.as_ref().to_owned());
}
IndexOperator::Knn(a, k) => {
let op = Value::from(Operator::Knn(*k, None).to_string());
@ -275,6 +283,10 @@ impl IndexOption {
e.insert("operator", op);
e.insert("value", val);
}
IndexOperator::Order(asc) => {
e.insert("operator", Value::from("Order"));
e.insert("ascending", Value::from(*asc));
}
};
Value::from(e)
}
@ -443,14 +455,14 @@ mod tests {
1,
Idiom::parse("test"),
IdiomPosition::Right,
IndexOperator::Equality(Value::Array(Array::from(vec!["test"]))),
IndexOperator::Equality(Value::Array(Array::from(vec!["test"])).into()),
);
let io2 = IndexOption::new(
1,
Idiom::parse("test"),
IdiomPosition::Right,
IndexOperator::Equality(Value::Array(Array::from(vec!["test"]))),
IndexOperator::Equality(Value::Array(Array::from(vec!["test"])).into()),
);
set.insert(io1);

View file

@ -10,14 +10,15 @@ use crate::kvs::Transaction;
use crate::sql::index::Index;
use crate::sql::statements::{DefineFieldStatement, DefineIndexStatement};
use crate::sql::{
Array, Cond, Expression, Idiom, Kind, Number, Operator, Part, Subquery, Table, Value, With,
Array, Cond, Expression, Idiom, Kind, Number, Operator, Order, Orders, Part, Subquery, Table,
Value, With,
};
use reblessive::tree::Stk;
use std::collections::HashMap;
use std::sync::Arc;
pub(super) struct Tree {
pub(super) root: Node,
pub(super) root: Option<Node>,
pub(super) index_map: IndexesMap,
pub(super) with_indexes: Vec<IndexRef>,
pub(super) knn_expressions: KnnExpressions,
@ -35,26 +36,21 @@ impl Tree {
table: &'a Table,
cond: Option<&Cond>,
with: Option<&With>,
) -> Result<Option<Self>, Error> {
let mut b = TreeBuilder::new(ctx, opt, table, with);
order: Option<&Orders>,
) -> Result<Self, Error> {
let mut b = TreeBuilder::new(ctx, opt, table, with, order);
if let Some(cond) = cond {
let root = b.eval_value(stk, 0, &cond.0).await?;
let knn_condition = if b.knn_expressions.is_empty() {
None
} else {
KnnConditionRewriter::build(&b.knn_expressions, cond)
};
Ok(Some(Self {
root,
index_map: b.index_map,
with_indexes: b.with_indexes,
knn_expressions: b.knn_expressions,
knn_brute_force_expressions: b.knn_brute_force_expressions,
knn_condition,
}))
} else {
Ok(None)
b.eval_cond(stk, cond).await?;
}
b.eval_order().await?;
Ok(Self {
root: b.root,
index_map: b.index_map,
with_indexes: b.with_indexes,
knn_expressions: b.knn_expressions,
knn_brute_force_expressions: b.knn_brute_force_expressions,
knn_condition: b.knn_condition,
})
}
}
@ -63,6 +59,7 @@ struct TreeBuilder<'a> {
opt: &'a Options,
table: &'a Table,
with: Option<&'a With>,
first_order: Option<&'a Order>,
schemas: HashMap<Table, SchemaCache>,
idioms_indexes: HashMap<Table, HashMap<Idiom, LocalIndexRefs>>,
resolved_expressions: HashMap<Arc<Expression>, ResolvedExpression>,
@ -73,6 +70,8 @@ struct TreeBuilder<'a> {
knn_expressions: KnnExpressions,
idioms_record_options: HashMap<Idiom, RecordOptions>,
group_sequence: GroupRef,
root: Option<Node>,
knn_condition: Option<Cond>,
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
@ -85,16 +84,28 @@ pub(super) type LocalIndexRefs = Vec<IndexRef>;
pub(super) type RemoteIndexRefs = Arc<Vec<(Idiom, LocalIndexRefs)>>;
impl<'a> TreeBuilder<'a> {
fn new(ctx: &'a Context, opt: &'a Options, table: &'a Table, with: Option<&'a With>) -> Self {
fn new(
ctx: &'a Context,
opt: &'a Options,
table: &'a Table,
with: Option<&'a With>,
orders: Option<&'a Orders>,
) -> Self {
let with_indexes = match with {
Some(With::Index(ixs)) => Vec::with_capacity(ixs.len()),
_ => vec![],
};
let first_order = if let Some(o) = orders {
o.0.first()
} else {
None
};
Self {
ctx,
opt,
table,
with,
first_order,
schemas: Default::default(),
idioms_indexes: Default::default(),
resolved_expressions: Default::default(),
@ -105,6 +116,8 @@ impl<'a> TreeBuilder<'a> {
knn_expressions: Default::default(),
idioms_record_options: Default::default(),
group_sequence: 0,
root: None,
knn_condition: None,
}
}
@ -121,7 +134,34 @@ impl<'a> TreeBuilder<'a> {
Ok(())
}
/// Was marked recursive
async fn eval_order(&mut self) -> Result<(), Error> {
if let Some(o) = self.first_order {
if !o.random {
if let Node::IndexedField(id, irf) = self.resolve_idiom(&o.order).await? {
if let Some(ix_ref) = irf.first().cloned() {
self.index_map.order_limit = Some(IndexOption::new(
ix_ref,
id,
IdiomPosition::None,
IndexOperator::Order(o.direction),
));
}
}
}
}
Ok(())
}
async fn eval_cond(&mut self, stk: &mut Stk, cond: &Cond) -> Result<(), Error> {
self.root = Some(self.eval_value(stk, 0, &cond.0).await?);
self.knn_condition = if self.knn_expressions.is_empty() {
None
} else {
KnnConditionRewriter::build(&self.knn_expressions, cond)
};
Ok(())
}
async fn eval_value(
&mut self,
stk: &mut Stk,
@ -141,6 +181,8 @@ impl<'a> TreeBuilder<'a> {
| Value::Geometry(_)
| Value::Datetime(_)
| Value::Param(_)
| Value::Null
| Value::None
| Value::Function(_) => Ok(Node::Computable),
Value::Array(a) => self.eval_array(stk, a).await,
Value::Subquery(s) => self.eval_subquery(stk, s).await,
@ -151,7 +193,7 @@ impl<'a> TreeBuilder<'a> {
async fn compute(&self, stk: &mut Stk, v: &Value, n: Node) -> Result<Node, Error> {
Ok(if n == Node::Computable {
match v.compute(stk, self.ctx, self.opt, None).await {
Ok(v) => Node::Computed(Arc::new(v)),
Ok(v) => Node::Computed(v.into()),
Err(_) => Node::Unsupported(format!("Unsupported value: {}", v)),
}
} else {
@ -187,8 +229,6 @@ impl<'a> TreeBuilder<'a> {
}
let n = self.resolve_idiom(i).await?;
self.resolved_idioms.insert(i.clone(), n.clone());
Ok(n)
}
@ -197,17 +237,23 @@ impl<'a> TreeBuilder<'a> {
self.lazy_load_schema_resolver(&tx, self.table).await?;
// Try to detect if it matches an index
if let Some(schema) = self.schemas.get(self.table).cloned() {
let n = if let Some(schema) = self.schemas.get(self.table).cloned() {
let irs = self.resolve_indexes(self.table, i, &schema);
if !irs.is_empty() {
return Ok(Node::IndexedField(i.clone(), irs));
Node::IndexedField(i.clone(), irs)
} else if let Some(ro) =
self.resolve_record_field(&tx, schema.fields.as_ref(), i).await?
{
// Try to detect an indexed record field
Node::RecordField(i.clone(), ro)
} else {
Node::NonIndexedField(i.clone())
}
// Try to detect an indexed record field
if let Some(ro) = self.resolve_record_field(&tx, schema.fields.as_ref(), i).await? {
return Ok(Node::RecordField(i.clone(), ro));
}
}
Ok(Node::NonIndexedField(i.clone()))
} else {
Node::NonIndexedField(i.clone())
};
self.resolved_idioms.insert(i.clone(), n.clone());
Ok(n)
}
fn resolve_indexes(&mut self, t: &Table, i: &Idiom, schema: &SchemaCache) -> Vec<IndexRef> {
@ -392,8 +438,8 @@ impl<'a> TreeBuilder<'a> {
for ir in irs {
if let Some(ix) = self.index_map.definitions.get(*ir as usize) {
let op = match &ix.index {
Index::Idx => Self::eval_index_operator(op, n, p),
Index::Uniq => Self::eval_index_operator(op, n, p),
Index::Idx => self.eval_index_operator(op, n, p),
Index::Uniq => self.eval_index_operator(op, n, p),
Index::Search {
..
} => Self::eval_matches_operator(op, n),
@ -425,7 +471,7 @@ impl<'a> TreeBuilder<'a> {
fn eval_matches_operator(op: &Operator, n: &Node) -> Option<IndexOperator> {
if let Some(v) = n.is_computed() {
if let Operator::Matches(mr) = op {
return Some(IndexOperator::Matches(v.clone().to_raw_string(), *mr));
return Some(IndexOperator::Matches(v.to_raw_string(), *mr));
}
}
None
@ -482,22 +528,31 @@ impl<'a> TreeBuilder<'a> {
Ok(())
}
fn eval_index_operator(op: &Operator, n: &Node, p: IdiomPosition) -> Option<IndexOperator> {
fn eval_index_operator(
&self,
op: &Operator,
n: &Node,
p: IdiomPosition,
) -> Option<IndexOperator> {
if let Some(v) = n.is_computed() {
match (op, v, p) {
(Operator::Equal, v, _) => Some(IndexOperator::Equality(v.clone())),
(Operator::Exact, v, _) => Some(IndexOperator::Exactness(v.clone())),
(Operator::Equal, v, _) => return Some(IndexOperator::Equality(v)),
(Operator::Exact, v, _) => return Some(IndexOperator::Exactness(v)),
(Operator::Contain, v, IdiomPosition::Left) => {
Some(IndexOperator::Equality(v.clone()))
return Some(IndexOperator::Equality(v))
}
(Operator::Inside, v, IdiomPosition::Right) => {
Some(IndexOperator::Equality(v.clone()))
return Some(IndexOperator::Equality(v))
}
(
Operator::ContainAny | Operator::ContainAll | Operator::Inside,
Value::Array(a),
v,
IdiomPosition::Left,
) => Some(IndexOperator::Union(a.clone())),
) => {
if let Value::Array(_) = v.as_ref() {
return Some(IndexOperator::Union(v));
}
}
(
Operator::LessThan
| Operator::LessThanOrEqual
@ -505,12 +560,11 @@ impl<'a> TreeBuilder<'a> {
| Operator::MoreThanOrEqual,
v,
p,
) => Some(IndexOperator::RangePart(p.transform(op), v.clone())),
_ => None,
) => return Some(IndexOperator::RangePart(p.transform(op), v)),
_ => {}
}
} else {
None
}
None
}
async fn eval_subquery(&mut self, stk: &mut Stk, s: &Subquery) -> Result<Node, Error> {
@ -528,6 +582,7 @@ pub(super) type IndexRef = u16;
pub(super) struct IndexesMap {
pub(super) options: Vec<(Arc<Expression>, IndexOption)>,
pub(super) definitions: Vec<DefineIndexStatement>,
pub(super) order_limit: Option<IndexOption>,
}
#[derive(Clone)]
@ -567,9 +622,9 @@ pub(super) enum Node {
}
impl Node {
pub(super) fn is_computed(&self) -> Option<&Value> {
if let Node::Computed(v) = self {
Some(v)
pub(super) fn is_computed(&self) -> Option<Arc<Value>> {
if let Self::Computed(v) = self {
Some(v.clone())
} else {
None
}
@ -579,17 +634,17 @@ impl Node {
&self,
) -> Option<(&Idiom, LocalIndexRefs, Option<RemoteIndexRefs>)> {
match self {
Node::IndexedField(id, irs) => Some((id, irs.clone(), None)),
Node::RecordField(id, ro) => Some((id, ro.locals.clone(), Some(ro.remotes.clone()))),
Self::IndexedField(id, irs) => Some((id, irs.clone(), None)),
Self::RecordField(id, ro) => Some((id, ro.locals.clone(), Some(ro.remotes.clone()))),
_ => None,
}
}
pub(super) fn is_field(&self) -> Option<&Idiom> {
match self {
Node::IndexedField(id, _) => Some(id),
Node::RecordField(id, _) => Some(id),
Node::NonIndexedField(id) => Some(id),
Self::IndexedField(id, _) => Some(id),
Self::RecordField(id, _) => Some(id),
Self::NonIndexedField(id) => Some(id),
_ => None,
}
}
@ -597,8 +652,12 @@ impl Node {
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
pub(super) enum IdiomPosition {
/// The idiom is on the left of the condition clause
Left,
/// The idiom is on the right tf the condition clause
Right,
/// Eg. ORDER LIMIT
None,
}
impl IdiomPosition {
@ -613,6 +672,7 @@ impl IdiomPosition {
Operator::MoreThanOrEqual => Operator::LessThanOrEqual,
_ => op.clone(),
},
IdiomPosition::None => op.clone(),
}
}
}

View file

@ -26,7 +26,6 @@ use crate::sql::id::Id;
use derive::Key;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::ops::Range;
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Key)]
struct Prefix<'a> {
@ -156,10 +155,6 @@ impl<'a> Index<'a> {
beg
}
pub fn range(ns: &str, db: &str, tb: &str, ix: &str) -> Range<Vec<u8>> {
Self::prefix_beg(ns, db, tb, ix)..Self::prefix_end(ns, db, tb, ix)
}
fn prefix_ids(ns: &str, db: &str, tb: &str, ix: &str, fd: &Array) -> Vec<u8> {
PrefixIds::new(ns, db, tb, ix, fd).encode().unwrap()
}

View file

@ -22,10 +22,18 @@ impl Limit {
ctx: &Context,
opt: &Options,
doc: Option<&CursorDoc>,
) -> Result<usize, Error> {
) -> Result<u32, Error> {
match self.0.compute(stk, ctx, opt, doc).await {
// This is a valid limiting number
Ok(Value::Number(Number::Int(v))) if v >= 0 => Ok(v as usize),
Ok(Value::Number(Number::Int(v))) if v >= 0 => {
if v > u32::MAX as i64 {
Err(Error::InvalidLimit {
value: v.to_string(),
})
} else {
Ok(v as u32)
}
}
// An invalid value was specified
Ok(v) => Err(Error::InvalidLimit {
value: v.as_string(),

View file

@ -22,10 +22,18 @@ impl Start {
ctx: &Context,
opt: &Options,
doc: Option<&CursorDoc>,
) -> Result<usize, Error> {
) -> Result<u32, Error> {
match self.0.compute(stk, ctx, opt, doc).await {
// This is a valid starting number
Ok(Value::Number(Number::Int(v))) if v >= 0 => Ok(v as usize),
Ok(Value::Number(Number::Int(v))) if v >= 0 => {
if v > u32::MAX as i64 {
Err(Error::InvalidStart {
value: v.to_string(),
})
} else {
Ok(v as u32)
}
}
// An invalid value was specified
Ok(v) => Err(Error::InvalidStart {
value: v.as_string(),

View file

@ -68,22 +68,26 @@ impl SelectStatement {
) -> Result<Value, Error> {
// Valid options?
opt.valid_for_db()?;
// Assign the statement
let stm = Statement::from(self);
// Create a new iterator
let mut i = Iterator::new();
// Ensure futures are stored and the version is set if specified
let version = self.version.as_ref().map(|v| v.to_u64());
let opt =
Arc::new(opt.new_with_futures(false).with_projections(true).with_version(version));
//;
// Get a query planner
let mut planner = QueryPlanner::new(
opt.clone(),
self.with.as_ref().cloned().map(|w| w.into()),
self.cond.as_ref().cloned().map(|c| c.into()),
self.order.as_ref().cloned().map(|o| o.into()),
);
// Extract the limit
let limit = i.setup_limit(stk, ctx, &opt, &stm).await?;
// Used for ONLY: is the limit 1?
let limit_is_one_or_zero = match &self.limit {
Some(l) => l.process(stk, ctx, &opt, doc).await? <= 1,
let limit_is_one_or_zero = match limit {
Some(l) => l <= 1,
_ => false,
};
// Fail for multiple targets without a limit
@ -98,7 +102,6 @@ impl SelectStatement {
if self.only && !limit_is_one_or_zero {
return Err(Error::SingleOnlyOutput);
}
planner.add_iterables(stk, ctx, t, &mut i).await?;
}
Value::Thing(v) => match &v.id {
@ -147,8 +150,6 @@ impl SelectStatement {
}
// Create a new context
let mut ctx = MutableContext::new(ctx);
// Assign the statement
let stm = Statement::from(self);
// Add query executors if any
if planner.has_executors() {
ctx.set_query_planner(planner);

View file

@ -2,6 +2,7 @@ mod parse;
use parse::Parse;
mod helpers;
use crate::helpers::Test;
use helpers::{new_ds, skip_ok};
use surrealdb::dbs::{Response, Session};
use surrealdb::err::Error;
@ -1047,12 +1048,6 @@ const CONTAINS_TABLE_EXPLAIN: &str = r"[
},
operation: 'Iterate Table'
},
{
detail: {
reason: 'NO INDEX FOUND'
},
operation: 'Fallback'
},
{
detail: {
type: 'Memory'
@ -1953,12 +1948,6 @@ async fn select_with_record_id_link_no_index() -> Result<(), Error> {
},
operation: 'Iterate Table'
},
{
detail: {
reason: 'NO INDEX FOUND'
},
operation: 'Fallback'
},
{
detail: {
type: 'Memory'
@ -2256,12 +2245,6 @@ async fn select_with_record_id_link_full_text_no_record_index() -> Result<(), Er
},
operation: 'Iterate Table'
},
{
detail: {
reason: 'NO INDEX FOUND'
},
operation: 'Fallback'
},
{
detail: {
type: 'Memory'
@ -2328,12 +2311,6 @@ async fn select_with_record_id_index() -> Result<(), Error> {
},
operation: 'Iterate Table'
},
{
detail: {
reason: 'NO INDEX FOUND'
},
operation: 'Fallback'
},
{
detail: {
type: 'Memory'
@ -2600,3 +2577,205 @@ async fn select_with_non_boolean_expression() -> Result<(), Error> {
//
Ok(())
}
#[tokio::test]
async fn select_from_standard_index_ascending() -> Result<(), Error> {
//
let sql = "
DEFINE INDEX time ON TABLE session COLUMNS time;
CREATE session:1 SET time = d'2024-07-01T01:00:00Z';
CREATE session:2 SET time = d'2024-06-30T23:00:00Z';
CREATE session:3 SET other = 'test';
CREATE session:4 SET time = null;
CREATE session:5 SET time = d'2024-07-01T02:00:00Z';
CREATE session:6 SET time = d'2024-06-30T23:30:00Z';
SELECT * FROM session ORDER BY time ASC LIMIT 4 EXPLAIN;
SELECT * FROM session ORDER BY time ASC LIMIT 4;
SELECT * FROM session ORDER BY time ASC EXPLAIN;
SELECT * FROM session ORDER BY time ASC;
";
let mut t = Test::new(sql).await?;
t.skip_ok(7)?;
//
t.expect_vals(&vec![
"[
{
detail: {
plan: {
ascending: true,
index: 'time',
operator: 'Order'
},
table: 'session'
},
operation: 'Iterate Index'
},
{
detail: {
type: 'Memory'
},
operation: 'Collector'
}
]",
"[
{
id: session:3,
other: 'test'
},
{
id: session:4,
time: NULL
},
{
id: session:2,
time: d'2024-06-30T23:00:00Z'
},
{
id: session:6,
time: d'2024-06-30T23:30:00Z'
}
]",
"[
{
detail: {
plan: {
ascending: true,
index: 'time',
operator: 'Order'
},
table: 'session'
},
operation: 'Iterate Index'
},
{
detail: {
type: 'Memory'
},
operation: 'Collector'
}
]",
"[
{
id: session:3,
other: 'test'
},
{
id: session:4,
time: NULL
},
{
id: session:2,
time: d'2024-06-30T23:00:00Z'
},
{
id: session:6,
time: d'2024-06-30T23:30:00Z'
},
{
id: session:1,
time: d'2024-07-01T01:00:00Z'
},
{
id: session:5,
time: d'2024-07-01T02:00:00Z'
}
]",
])?;
//
Ok(())
}
#[tokio::test]
async fn select_from_unique_index_ascending() -> Result<(), Error> {
//
let sql = "
DEFINE INDEX time ON TABLE session COLUMNS time UNIQUE;
CREATE session:1 SET time = d'2024-07-01T01:00:00Z';
CREATE session:2 SET time = d'2024-06-30T23:00:00Z';
CREATE session:3 SET other = 'test';
CREATE session:4 SET time = null;
CREATE session:5 SET time = d'2024-07-01T02:00:00Z';
CREATE session:6 SET time = d'2024-06-30T23:30:00Z';
SELECT * FROM session ORDER BY time ASC LIMIT 3 EXPLAIN;
SELECT * FROM session ORDER BY time ASC LIMIT 3;
SELECT * FROM session ORDER BY time ASC EXPLAIN;
SELECT * FROM session ORDER BY time ASC;
";
let mut t = Test::new(sql).await?;
t.skip_ok(7)?;
//
t.expect_vals(&vec![
"[
{
detail: {
plan: {
ascending: true,
index: 'time',
operator: 'Order'
},
table: 'session'
},
operation: 'Iterate Index'
},
{
detail: {
type: 'Memory'
},
operation: 'Collector'
}
]",
"[
{
id: session:2,
time: d'2024-06-30T23:00:00Z'
},
{
id: session:6,
time: d'2024-06-30T23:30:00Z'
},
{
id: session:1,
time: d'2024-07-01T01:00:00Z'
}
]",
"[
{
detail: {
plan: {
ascending: true,
index: 'time',
operator: 'Order'
},
table: 'session'
},
operation: 'Iterate Index'
},
{
detail: {
type: 'Memory'
},
operation: 'Collector'
}
]",
"[
{
id: session:2,
time: d'2024-06-30T23:00:00Z'
},
{
id: session:6,
time: d'2024-06-30T23:30:00Z'
},
{
id: session:1,
time: d'2024-07-01T01:00:00Z'
},
{
id: session:5,
time: d'2024-07-01T02:00:00Z'
}
]",
])?;
//
Ok(())
}

View file

@ -181,12 +181,6 @@ async fn select_where_brute_force_knn() -> Result<(), Error> {
},
operation: 'Iterate Table'
},
{
detail: {
reason: 'NO INDEX FOUND'
},
operation: 'Fallback'
},
{
detail: {
type: 'Memory'
@ -288,12 +282,6 @@ async fn select_where_hnsw_knn() -> Result<(), Error> {
},
operation: 'Iterate Table'
},
{
detail: {
reason: 'NO INDEX FOUND'
},
operation: 'Fallback'
},
{
detail: {
type: 'Memory'
@ -485,12 +473,6 @@ async fn select_bruteforce_knn_with_condition() -> Result<(), Error> {
},
operation: 'Iterate Table'
},
{
detail: {
reason: 'NO INDEX FOUND'
},
operation: 'Fallback'
},
{
detail: {
type: 'Memory'