From ebec244b01ca84db02a8f41df2d80c8b63e5236d Mon Sep 17 00:00:00 2001 From: Emmanuel Keller Date: Thu, 19 Sep 2024 16:17:25 +0100 Subject: [PATCH] Fix Query Planner Ignoring Isolated predicates in WHERE Clause (#4806) --- core/src/idx/planner/mod.rs | 10 +++- core/src/idx/planner/plan.rs | 57 ++++++---------------- core/src/idx/planner/tree.rs | 92 ++++++++++++++++++++++++++++++------ sdk/tests/helpers.rs | 27 +++++++++-- sdk/tests/planner.rs | 34 ++++++++++++- 5 files changed, 157 insertions(+), 63 deletions(-) diff --git a/core/src/idx/planner/mod.rs b/core/src/idx/planner/mod.rs index 2ec9ab84..a2fc1828 100644 --- a/core/src/idx/planner/mod.rs +++ b/core/src/idx/planner/mod.rs @@ -112,7 +112,15 @@ impl QueryPlanner { tree.knn_condition, ) .await?; - match PlanBuilder::build(tree.root, params, tree.with_indexes, order)? { + match PlanBuilder::build( + tree.root, + params, + tree.with_indexes, + order, + tree.all_and_groups, + tree.all_and, + tree.all_expressions_with_index, + )? { Plan::SingleIndex(exp, io) => { if io.require_distinct() { self.requires_distinct = true; diff --git a/core/src/idx/planner/plan.rs b/core/src/idx/planner/plan.rs index 627e776e..2ca15428 100644 --- a/core/src/idx/planner/plan.rs +++ b/core/src/idx/planner/plan.rs @@ -17,33 +17,27 @@ pub(super) struct PlanBuilder { has_indexes: bool, /// List of expressions that are not ranges, backed by an index non_range_indexes: Vec<(Arc, IndexOption)>, - /// List of indexes involved in this plan - with_indexes: Vec, + /// List of indexes allowed in this plan + with_indexes: Option>, /// Group each possible optimisations local to a SubQuery groups: BTreeMap, // The order matters because we want the plan to be consistent across repeated queries. - /// Does a group contains only AND relations? - all_and_groups: HashMap, - /// Does the whole query contains only AND relations? - all_and: bool, - /// Is every expression backed by an index? - all_exp_with_index: bool, } impl PlanBuilder { pub(super) fn build( root: Option, params: &QueryPlannerParams, - with_indexes: Vec, + with_indexes: Option>, order: Option, + all_and_groups: HashMap, + all_and: bool, + all_expressions_with_index: bool, ) -> Result { let mut b = PlanBuilder { has_indexes: false, non_range_indexes: Default::default(), groups: Default::default(), with_indexes, - all_and_groups: Default::default(), - all_and: true, - all_exp_with_index: true, }; // If we only count and there are no conditions and no aggregations, then we can only scan keys @@ -61,7 +55,7 @@ impl PlanBuilder { } // If every boolean operator are AND then we can use the single index plan - if b.all_and { + if all_and { // TODO: This is currently pretty arbitrary // We take the "first" range query if one is available if let Some((_, group)) = b.groups.into_iter().next() { @@ -79,10 +73,10 @@ impl PlanBuilder { } } // If every expression is backed by an index with can use the MultiIndex plan - else if b.all_exp_with_index { + else if all_expressions_with_index { let mut ranges = Vec::with_capacity(b.groups.len()); - for (depth, group) in b.groups { - if b.all_and_groups.get(&depth) == Some(&true) { + for (gr, group) in b.groups { + if all_and_groups.get(&gr) == Some(&true) { group.take_union_ranges(&mut ranges); } else { group.take_intersect_ranges(&mut ranges); @@ -100,9 +94,11 @@ impl PlanBuilder { // Check if we have an explicit list of index we can use fn filter_index_option(&self, io: Option<&IndexOption>) -> Option { - if let Some(io) = &io { - if !self.with_indexes.is_empty() && !self.with_indexes.contains(&io.ix_ref()) { - return None; + if let Some(io) = io { + if let Some(wi) = &self.with_indexes { + if !wi.contains(&io.ix_ref()) { + return None; + } } } io.cloned() @@ -117,11 +113,8 @@ impl PlanBuilder { right, exp, } => { - let is_bool = self.check_boolean_operator(*group, exp.operator()); if let Some(io) = self.filter_index_option(io.as_ref()) { self.add_index_option(*group, exp.clone(), io); - } else if self.all_exp_with_index && !is_bool { - self.all_exp_with_index = false; } self.eval_node(left)?; self.eval_node(right)?; @@ -132,26 +125,6 @@ impl PlanBuilder { } } - fn check_boolean_operator(&mut self, gr: GroupRef, op: &Operator) -> bool { - match op { - Operator::Neg | Operator::Or => { - if self.all_and { - self.all_and = false; - } - self.all_and_groups.entry(gr).and_modify(|b| *b = false).or_insert(false); - true - } - Operator::And => { - self.all_and_groups.entry(gr).or_insert(true); - true - } - _ => { - self.all_and_groups.entry(gr).or_insert(true); - false - } - } - } - fn add_index_option(&mut self, group_ref: GroupRef, exp: Arc, io: IndexOption) { if let IndexOperator::RangePart(_, _) = io.op() { let level = self.groups.entry(group_ref).or_default(); diff --git a/core/src/idx/planner/tree.rs b/core/src/idx/planner/tree.rs index ea637a7a..aa366287 100644 --- a/core/src/idx/planner/tree.rs +++ b/core/src/idx/planner/tree.rs @@ -20,10 +20,16 @@ use std::sync::Arc; pub(super) struct Tree { pub(super) root: Option, pub(super) index_map: IndexesMap, - pub(super) with_indexes: Vec, + pub(super) with_indexes: Option>, pub(super) knn_expressions: KnnExpressions, pub(super) knn_brute_force_expressions: KnnBruteForceExpressions, pub(super) knn_condition: Option, + /// Is every expression backed by an index? + pub(super) all_expressions_with_index: bool, + /// Does the whole query contains only AND relations? + pub(super) all_and: bool, + /// Does a group contains only AND relations? + pub(super) all_and_groups: HashMap, } impl Tree { @@ -50,6 +56,10 @@ impl Tree { knn_expressions: b.knn_expressions, knn_brute_force_expressions: b.knn_brute_force_expressions, knn_condition: b.knn_condition, + all_expressions_with_index: b.leaf_nodes_count > 0 + && b.leaf_nodes_with_index_count == b.leaf_nodes_count, + all_and: b.all_and.unwrap_or(true), + all_and_groups: b.all_and_groups, }) } } @@ -65,13 +75,17 @@ struct TreeBuilder<'a> { resolved_expressions: HashMap, ResolvedExpression>, resolved_idioms: HashMap, Node>, index_map: IndexesMap, - with_indexes: Vec, + with_indexes: Option>, knn_brute_force_expressions: HashMap, KnnBruteForceExpression>, knn_expressions: KnnExpressions, idioms_record_options: HashMap, RecordOptions>, group_sequence: GroupRef, root: Option, knn_condition: Option, + leaf_nodes_count: usize, + leaf_nodes_with_index_count: usize, + all_and: Option, + all_and_groups: HashMap, } #[derive(Debug, Clone, Eq, PartialEq, Hash)] @@ -93,8 +107,8 @@ impl<'a> TreeBuilder<'a> { orders: Option<&'a Orders>, ) -> Self { let with_indexes = match with { - Some(With::Index(ixs)) => Vec::with_capacity(ixs.len()), - _ => vec![], + Some(With::Index(ixs)) => Some(Vec::with_capacity(ixs.len())), + _ => None, }; let first_order = if let Some(o) = orders { o.0.first() @@ -119,6 +133,10 @@ impl<'a> TreeBuilder<'a> { group_sequence: 0, root: None, knn_condition: None, + all_and: None, + all_and_groups: Default::default(), + leaf_nodes_count: 0, + leaf_nodes_with_index_count: 0, } } @@ -188,7 +206,10 @@ impl<'a> TreeBuilder<'a> { | Value::Param(_) | Value::Null | Value::None - | Value::Function(_) => Ok(Node::Computable), + | Value::Function(_) => { + self.leaf_nodes_count += 1; + Ok(Node::Computable) + } Value::Array(a) => self.eval_array(stk, a).await, Value::Subquery(s) => self.eval_subquery(stk, s).await, _ => Ok(Node::Unsupported(format!("Unsupported value: {}", v))), @@ -207,6 +228,7 @@ impl<'a> TreeBuilder<'a> { } async fn eval_array(&mut self, stk: &mut Stk, a: &Array) -> Result { + self.leaf_nodes_count += 1; let mut values = Vec::with_capacity(a.len()); for v in &a.0 { values.push(stk.run(|stk| v.compute(stk, self.ctx, self.opt, None)).await?); @@ -220,6 +242,7 @@ impl<'a> TreeBuilder<'a> { group: GroupRef, i: &Idiom, ) -> Result { + self.leaf_nodes_count += 1; // Check if the idiom has already been resolved if let Some(node) = self.resolved_idioms.get(i).cloned() { return Ok(node); @@ -273,7 +296,11 @@ impl<'a> TreeBuilder<'a> { let ixr = self.index_map.definitions.len() as IndexRef; if let Some(With::Index(ixs)) = &self.with { if ixs.contains(&ix.name.0) { - self.with_indexes.push(ixr); + if let Some(wi) = &mut self.with_indexes { + wi.push(ixr); + } else { + self.with_indexes = Some(vec![ixr]); + } } } self.index_map.definitions.push(ix.clone().into()); @@ -343,7 +370,10 @@ impl<'a> TreeBuilder<'a> { match e { Expression::Unary { .. - } => Ok(Node::Unsupported("unary expressions not supported".to_string())), + } => { + self.leaf_nodes_count += 1; + Ok(Node::Unsupported("unary expressions not supported".to_string())) + } Expression::Binary { l, o, @@ -353,6 +383,7 @@ impl<'a> TreeBuilder<'a> { if let Some(re) = self.resolved_expressions.get(e).cloned() { return Ok(re.into()); } + self.check_boolean_operator(group, o); let left = stk.run(|stk| self.eval_value(stk, group, l)).await?; let right = stk.run(|stk| self.eval_value(stk, group, r)).await?; // If both values are computable, then we can delegate the computation to the parent @@ -362,9 +393,8 @@ impl<'a> TreeBuilder<'a> { let exp = Arc::new(e.clone()); let left = Arc::new(self.compute(stk, l, left).await?); let right = Arc::new(self.compute(stk, r, right).await?); - let mut io = None; - if let Some((id, local_irs, remote_irs)) = left.is_indexed_field() { - io = self.lookup_index_options( + let io = if let Some((id, local_irs, remote_irs)) = left.is_indexed_field() { + self.lookup_index_options( o, id, &right, @@ -372,9 +402,9 @@ impl<'a> TreeBuilder<'a> { IdiomPosition::Left, local_irs, remote_irs, - )?; + )? } else if let Some((id, local_irs, remote_irs)) = right.is_indexed_field() { - io = self.lookup_index_options( + self.lookup_index_options( o, id, &left, @@ -382,13 +412,16 @@ impl<'a> TreeBuilder<'a> { IdiomPosition::Right, local_irs, remote_irs, - )?; - } + )? + } else { + None + }; if let Some(id) = left.is_field() { self.eval_bruteforce_knn(id, &right, &exp)?; } else if let Some(id) = right.is_field() { self.eval_bruteforce_knn(id, &left, &exp)?; } + self.check_leaf_node_with_index(io.as_ref()); let re = ResolvedExpression { group, exp: exp.clone(), @@ -402,6 +435,37 @@ impl<'a> TreeBuilder<'a> { } } + fn check_boolean_operator(&mut self, gr: GroupRef, op: &Operator) { + match op { + Operator::Neg | Operator::Or => { + if self.all_and != Some(false) { + self.all_and = Some(false); + } + self.all_and_groups.entry(gr).and_modify(|b| *b = false).or_insert(false); + } + Operator::And => { + if self.all_and.is_none() { + self.all_and = Some(true); + } + self.all_and_groups.entry(gr).or_insert(true); + } + _ => { + self.all_and_groups.entry(gr).or_insert(true); + } + } + } + + fn check_leaf_node_with_index(&mut self, io: Option<&IndexOption>) { + if let Some(io) = io { + if let Some(wi) = &self.with_indexes { + if !wi.contains(&io.ix_ref()) { + return; + } + } + self.leaf_nodes_with_index_count += 2; + } + } + #[allow(clippy::too_many_arguments)] fn lookup_index_options( &mut self, diff --git a/sdk/tests/helpers.rs b/sdk/tests/helpers.rs index dbfa5d00..54b0dc74 100644 --- a/sdk/tests/helpers.rs +++ b/sdk/tests/helpers.rs @@ -1,5 +1,5 @@ use std::collections::HashMap; -use std::fmt::{Debug, Formatter}; +use std::fmt::{Debug, Display, Formatter}; use std::future::Future; use std::sync::Arc; use std::thread::Builder; @@ -313,7 +313,11 @@ impl Test { /// Panics if the expected value is not equal to the actual value. /// Compliant with NaN and Constants. #[allow(dead_code)] - pub fn expect_value(&mut self, val: Value) -> Result<&mut Self, Error> { + pub fn expect_value_info( + &mut self, + val: Value, + info: I, + ) -> Result<&mut Self, Error> { let tmp = self.next_value()?; // Then check they are indeed the same values // @@ -324,14 +328,19 @@ impl Test { val }; if val.is_nan() { - assert!(tmp.is_nan(), "Expected NaN but got: {tmp}"); + assert!(tmp.is_nan(), "Expected NaN but got {info}: {tmp}"); } else { - assert_eq!(tmp, val, "{tmp:#}"); + assert_eq!(tmp, val, "{info} {tmp:#}"); } // Ok(self) } + #[allow(dead_code)] + pub fn expect_value(&mut self, val: Value) -> Result<&mut Self, Error> { + self.expect_value_info(val, "") + } + /// Expect values in the given slice to be present in the responses, following the same order. #[allow(dead_code)] pub fn expect_values(&mut self, values: &[Value]) -> Result<&mut Self, Error> { @@ -344,7 +353,15 @@ impl Test { /// Expect the given value to be equals to the next response. #[allow(dead_code)] pub fn expect_val(&mut self, val: &str) -> Result<&mut Self, Error> { - self.expect_value(value(val).unwrap_or_else(|_| panic!("INVALID VALUE:\n{val}"))) + self.expect_val_info(val, "") + } + + #[allow(dead_code)] + pub fn expect_val_info(&mut self, val: &str, info: I) -> Result<&mut Self, Error> { + self.expect_value_info( + value(val).unwrap_or_else(|_| panic!("INVALID VALUE {info}:\n{val}")), + info, + ) } #[allow(dead_code)] diff --git a/sdk/tests/planner.rs b/sdk/tests/planner.rs index 74373ee0..49c2fe6f 100644 --- a/sdk/tests/planner.rs +++ b/sdk/tests/planner.rs @@ -188,7 +188,7 @@ fn three_multi_index_query(with: &str, parallel: &str) -> String { DEFINE INDEX uniq_name ON TABLE person COLUMNS name UNIQUE; DEFINE INDEX idx_genre ON TABLE person COLUMNS genre; SELECT name FROM person {with} WHERE name = 'Jaime' OR genre = 'm' OR company @@ 'surrealdb' {parallel}; - SELECT name FROM person {with} WHERE name = 'Jaime' OR genre = 'm' OR company @@ 'surrealdb' {parallel} EXPLAIN FULL; + SELECT name FROM person {with} WHERE name = 'Jaime' OR genre = 'm' OR company @@ 'surrealdb' {parallel} EXPLAIN FULL; SELECT name FROM person {with} WHERE name = 'Jaime' AND genre = 'm' AND company @@ 'surrealdb' {parallel}; SELECT name FROM person {with} WHERE name = 'Jaime' AND genre = 'm' AND company @@ 'surrealdb' {parallel} EXPLAIN FULL;") } @@ -3063,3 +3063,35 @@ async fn select_composite_standard_index() -> Result<(), Error> { async fn select_composite_unique_index() -> Result<(), Error> { select_composite_index(true).await } + +#[tokio::test] +async fn select_where_index_boolean_behaviour() -> Result<(), Error> { + let sql = r" + DEFINE INDEX flagIndex ON TABLE test COLUMNS flag; + CREATE test:t CONTENT { flag:true }; + CREATE test:f CONTENT { flag:false }; + SELECT * FROM test; + SELECT * FROM test WITH NOINDEX WHERE (true OR flag=true); + SELECT * FROM test WITH NOINDEX WHERE (true OR flag==true); + SELECT * FROM test WHERE (true OR flag=true); + SELECT * FROM test WHERE (true OR flag==true);"; + let mut t = Test::new(sql).await?; + t.expect_size(8)?; + t.skip_ok(3)?; + for i in 0..5 { + t.expect_val_info( + "[ + { + flag: false, + id: test:f + }, + { + flag: true, + id: test:t + } + ]", + i, + )?; + } + Ok(()) +}