Fix Query Planner Ignoring Isolated predicates in WHERE Clause (#4806)

This commit is contained in:
Emmanuel Keller 2024-09-19 16:17:25 +01:00 committed by GitHub
parent a3b6b2c5db
commit ebec244b01
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 157 additions and 63 deletions

View file

@ -112,7 +112,15 @@ impl QueryPlanner {
tree.knn_condition, tree.knn_condition,
) )
.await?; .await?;
match PlanBuilder::build(tree.root, params, tree.with_indexes, order)? { match PlanBuilder::build(
tree.root,
params,
tree.with_indexes,
order,
tree.all_and_groups,
tree.all_and,
tree.all_expressions_with_index,
)? {
Plan::SingleIndex(exp, io) => { Plan::SingleIndex(exp, io) => {
if io.require_distinct() { if io.require_distinct() {
self.requires_distinct = true; self.requires_distinct = true;

View file

@ -17,33 +17,27 @@ pub(super) struct PlanBuilder {
has_indexes: bool, has_indexes: bool,
/// List of expressions that are not ranges, backed by an index /// List of expressions that are not ranges, backed by an index
non_range_indexes: Vec<(Arc<Expression>, IndexOption)>, non_range_indexes: Vec<(Arc<Expression>, IndexOption)>,
/// List of indexes involved in this plan /// List of indexes allowed in this plan
with_indexes: Vec<IndexRef>, with_indexes: Option<Vec<IndexRef>>,
/// Group each possible optimisations local to a SubQuery /// Group each possible optimisations local to a SubQuery
groups: BTreeMap<GroupRef, Group>, // The order matters because we want the plan to be consistent across repeated queries. groups: BTreeMap<GroupRef, Group>, // The order matters because we want the plan to be consistent across repeated queries.
/// Does a group contains only AND relations?
all_and_groups: HashMap<GroupRef, bool>,
/// Does the whole query contains only AND relations?
all_and: bool,
/// Is every expression backed by an index?
all_exp_with_index: bool,
} }
impl PlanBuilder { impl PlanBuilder {
pub(super) fn build( pub(super) fn build(
root: Option<Node>, root: Option<Node>,
params: &QueryPlannerParams, params: &QueryPlannerParams,
with_indexes: Vec<IndexRef>, with_indexes: Option<Vec<IndexRef>>,
order: Option<IndexOption>, order: Option<IndexOption>,
all_and_groups: HashMap<GroupRef, bool>,
all_and: bool,
all_expressions_with_index: bool,
) -> Result<Plan, Error> { ) -> Result<Plan, Error> {
let mut b = PlanBuilder { let mut b = PlanBuilder {
has_indexes: false, has_indexes: false,
non_range_indexes: Default::default(), non_range_indexes: Default::default(),
groups: Default::default(), groups: Default::default(),
with_indexes, with_indexes,
all_and_groups: Default::default(),
all_and: true,
all_exp_with_index: true,
}; };
// If we only count and there are no conditions and no aggregations, then we can only scan keys // If we only count and there are no conditions and no aggregations, then we can only scan keys
@ -61,7 +55,7 @@ impl PlanBuilder {
} }
// If every boolean operator are AND then we can use the single index plan // If every boolean operator are AND then we can use the single index plan
if b.all_and { if all_and {
// TODO: This is currently pretty arbitrary // TODO: This is currently pretty arbitrary
// We take the "first" range query if one is available // We take the "first" range query if one is available
if let Some((_, group)) = b.groups.into_iter().next() { if let Some((_, group)) = b.groups.into_iter().next() {
@ -79,10 +73,10 @@ impl PlanBuilder {
} }
} }
// If every expression is backed by an index with can use the MultiIndex plan // If every expression is backed by an index with can use the MultiIndex plan
else if b.all_exp_with_index { else if all_expressions_with_index {
let mut ranges = Vec::with_capacity(b.groups.len()); let mut ranges = Vec::with_capacity(b.groups.len());
for (depth, group) in b.groups { for (gr, group) in b.groups {
if b.all_and_groups.get(&depth) == Some(&true) { if all_and_groups.get(&gr) == Some(&true) {
group.take_union_ranges(&mut ranges); group.take_union_ranges(&mut ranges);
} else { } else {
group.take_intersect_ranges(&mut ranges); group.take_intersect_ranges(&mut ranges);
@ -100,9 +94,11 @@ impl PlanBuilder {
// Check if we have an explicit list of index we can use // Check if we have an explicit list of index we can use
fn filter_index_option(&self, io: Option<&IndexOption>) -> Option<IndexOption> { fn filter_index_option(&self, io: Option<&IndexOption>) -> Option<IndexOption> {
if let Some(io) = &io { if let Some(io) = io {
if !self.with_indexes.is_empty() && !self.with_indexes.contains(&io.ix_ref()) { if let Some(wi) = &self.with_indexes {
return None; if !wi.contains(&io.ix_ref()) {
return None;
}
} }
} }
io.cloned() io.cloned()
@ -117,11 +113,8 @@ impl PlanBuilder {
right, right,
exp, exp,
} => { } => {
let is_bool = self.check_boolean_operator(*group, exp.operator());
if let Some(io) = self.filter_index_option(io.as_ref()) { if let Some(io) = self.filter_index_option(io.as_ref()) {
self.add_index_option(*group, exp.clone(), io); self.add_index_option(*group, exp.clone(), io);
} else if self.all_exp_with_index && !is_bool {
self.all_exp_with_index = false;
} }
self.eval_node(left)?; self.eval_node(left)?;
self.eval_node(right)?; self.eval_node(right)?;
@ -132,26 +125,6 @@ impl PlanBuilder {
} }
} }
fn check_boolean_operator(&mut self, gr: GroupRef, op: &Operator) -> bool {
match op {
Operator::Neg | Operator::Or => {
if self.all_and {
self.all_and = false;
}
self.all_and_groups.entry(gr).and_modify(|b| *b = false).or_insert(false);
true
}
Operator::And => {
self.all_and_groups.entry(gr).or_insert(true);
true
}
_ => {
self.all_and_groups.entry(gr).or_insert(true);
false
}
}
}
fn add_index_option(&mut self, group_ref: GroupRef, exp: Arc<Expression>, io: IndexOption) { fn add_index_option(&mut self, group_ref: GroupRef, exp: Arc<Expression>, io: IndexOption) {
if let IndexOperator::RangePart(_, _) = io.op() { if let IndexOperator::RangePart(_, _) = io.op() {
let level = self.groups.entry(group_ref).or_default(); let level = self.groups.entry(group_ref).or_default();

View file

@ -20,10 +20,16 @@ use std::sync::Arc;
pub(super) struct Tree { pub(super) struct Tree {
pub(super) root: Option<Node>, pub(super) root: Option<Node>,
pub(super) index_map: IndexesMap, pub(super) index_map: IndexesMap,
pub(super) with_indexes: Vec<IndexRef>, pub(super) with_indexes: Option<Vec<IndexRef>>,
pub(super) knn_expressions: KnnExpressions, pub(super) knn_expressions: KnnExpressions,
pub(super) knn_brute_force_expressions: KnnBruteForceExpressions, pub(super) knn_brute_force_expressions: KnnBruteForceExpressions,
pub(super) knn_condition: Option<Cond>, pub(super) knn_condition: Option<Cond>,
/// Is every expression backed by an index?
pub(super) all_expressions_with_index: bool,
/// Does the whole query contains only AND relations?
pub(super) all_and: bool,
/// Does a group contains only AND relations?
pub(super) all_and_groups: HashMap<GroupRef, bool>,
} }
impl Tree { impl Tree {
@ -50,6 +56,10 @@ impl Tree {
knn_expressions: b.knn_expressions, knn_expressions: b.knn_expressions,
knn_brute_force_expressions: b.knn_brute_force_expressions, knn_brute_force_expressions: b.knn_brute_force_expressions,
knn_condition: b.knn_condition, knn_condition: b.knn_condition,
all_expressions_with_index: b.leaf_nodes_count > 0
&& b.leaf_nodes_with_index_count == b.leaf_nodes_count,
all_and: b.all_and.unwrap_or(true),
all_and_groups: b.all_and_groups,
}) })
} }
} }
@ -65,13 +75,17 @@ struct TreeBuilder<'a> {
resolved_expressions: HashMap<Arc<Expression>, ResolvedExpression>, resolved_expressions: HashMap<Arc<Expression>, ResolvedExpression>,
resolved_idioms: HashMap<Arc<Idiom>, Node>, resolved_idioms: HashMap<Arc<Idiom>, Node>,
index_map: IndexesMap, index_map: IndexesMap,
with_indexes: Vec<IndexRef>, with_indexes: Option<Vec<IndexRef>>,
knn_brute_force_expressions: HashMap<Arc<Expression>, KnnBruteForceExpression>, knn_brute_force_expressions: HashMap<Arc<Expression>, KnnBruteForceExpression>,
knn_expressions: KnnExpressions, knn_expressions: KnnExpressions,
idioms_record_options: HashMap<Arc<Idiom>, RecordOptions>, idioms_record_options: HashMap<Arc<Idiom>, RecordOptions>,
group_sequence: GroupRef, group_sequence: GroupRef,
root: Option<Node>, root: Option<Node>,
knn_condition: Option<Cond>, knn_condition: Option<Cond>,
leaf_nodes_count: usize,
leaf_nodes_with_index_count: usize,
all_and: Option<bool>,
all_and_groups: HashMap<GroupRef, bool>,
} }
#[derive(Debug, Clone, Eq, PartialEq, Hash)] #[derive(Debug, Clone, Eq, PartialEq, Hash)]
@ -93,8 +107,8 @@ impl<'a> TreeBuilder<'a> {
orders: Option<&'a Orders>, orders: Option<&'a Orders>,
) -> Self { ) -> Self {
let with_indexes = match with { let with_indexes = match with {
Some(With::Index(ixs)) => Vec::with_capacity(ixs.len()), Some(With::Index(ixs)) => Some(Vec::with_capacity(ixs.len())),
_ => vec![], _ => None,
}; };
let first_order = if let Some(o) = orders { let first_order = if let Some(o) = orders {
o.0.first() o.0.first()
@ -119,6 +133,10 @@ impl<'a> TreeBuilder<'a> {
group_sequence: 0, group_sequence: 0,
root: None, root: None,
knn_condition: None, knn_condition: None,
all_and: None,
all_and_groups: Default::default(),
leaf_nodes_count: 0,
leaf_nodes_with_index_count: 0,
} }
} }
@ -188,7 +206,10 @@ impl<'a> TreeBuilder<'a> {
| Value::Param(_) | Value::Param(_)
| Value::Null | Value::Null
| Value::None | Value::None
| Value::Function(_) => Ok(Node::Computable), | Value::Function(_) => {
self.leaf_nodes_count += 1;
Ok(Node::Computable)
}
Value::Array(a) => self.eval_array(stk, a).await, Value::Array(a) => self.eval_array(stk, a).await,
Value::Subquery(s) => self.eval_subquery(stk, s).await, Value::Subquery(s) => self.eval_subquery(stk, s).await,
_ => Ok(Node::Unsupported(format!("Unsupported value: {}", v))), _ => Ok(Node::Unsupported(format!("Unsupported value: {}", v))),
@ -207,6 +228,7 @@ impl<'a> TreeBuilder<'a> {
} }
async fn eval_array(&mut self, stk: &mut Stk, a: &Array) -> Result<Node, Error> { async fn eval_array(&mut self, stk: &mut Stk, a: &Array) -> Result<Node, Error> {
self.leaf_nodes_count += 1;
let mut values = Vec::with_capacity(a.len()); let mut values = Vec::with_capacity(a.len());
for v in &a.0 { for v in &a.0 {
values.push(stk.run(|stk| v.compute(stk, self.ctx, self.opt, None)).await?); values.push(stk.run(|stk| v.compute(stk, self.ctx, self.opt, None)).await?);
@ -220,6 +242,7 @@ impl<'a> TreeBuilder<'a> {
group: GroupRef, group: GroupRef,
i: &Idiom, i: &Idiom,
) -> Result<Node, Error> { ) -> Result<Node, Error> {
self.leaf_nodes_count += 1;
// Check if the idiom has already been resolved // Check if the idiom has already been resolved
if let Some(node) = self.resolved_idioms.get(i).cloned() { if let Some(node) = self.resolved_idioms.get(i).cloned() {
return Ok(node); return Ok(node);
@ -273,7 +296,11 @@ impl<'a> TreeBuilder<'a> {
let ixr = self.index_map.definitions.len() as IndexRef; let ixr = self.index_map.definitions.len() as IndexRef;
if let Some(With::Index(ixs)) = &self.with { if let Some(With::Index(ixs)) = &self.with {
if ixs.contains(&ix.name.0) { if ixs.contains(&ix.name.0) {
self.with_indexes.push(ixr); if let Some(wi) = &mut self.with_indexes {
wi.push(ixr);
} else {
self.with_indexes = Some(vec![ixr]);
}
} }
} }
self.index_map.definitions.push(ix.clone().into()); self.index_map.definitions.push(ix.clone().into());
@ -343,7 +370,10 @@ impl<'a> TreeBuilder<'a> {
match e { match e {
Expression::Unary { Expression::Unary {
.. ..
} => Ok(Node::Unsupported("unary expressions not supported".to_string())), } => {
self.leaf_nodes_count += 1;
Ok(Node::Unsupported("unary expressions not supported".to_string()))
}
Expression::Binary { Expression::Binary {
l, l,
o, o,
@ -353,6 +383,7 @@ impl<'a> TreeBuilder<'a> {
if let Some(re) = self.resolved_expressions.get(e).cloned() { if let Some(re) = self.resolved_expressions.get(e).cloned() {
return Ok(re.into()); return Ok(re.into());
} }
self.check_boolean_operator(group, o);
let left = stk.run(|stk| self.eval_value(stk, group, l)).await?; let left = stk.run(|stk| self.eval_value(stk, group, l)).await?;
let right = stk.run(|stk| self.eval_value(stk, group, r)).await?; let right = stk.run(|stk| self.eval_value(stk, group, r)).await?;
// If both values are computable, then we can delegate the computation to the parent // If both values are computable, then we can delegate the computation to the parent
@ -362,9 +393,8 @@ impl<'a> TreeBuilder<'a> {
let exp = Arc::new(e.clone()); let exp = Arc::new(e.clone());
let left = Arc::new(self.compute(stk, l, left).await?); let left = Arc::new(self.compute(stk, l, left).await?);
let right = Arc::new(self.compute(stk, r, right).await?); let right = Arc::new(self.compute(stk, r, right).await?);
let mut io = None; let io = if let Some((id, local_irs, remote_irs)) = left.is_indexed_field() {
if let Some((id, local_irs, remote_irs)) = left.is_indexed_field() { self.lookup_index_options(
io = self.lookup_index_options(
o, o,
id, id,
&right, &right,
@ -372,9 +402,9 @@ impl<'a> TreeBuilder<'a> {
IdiomPosition::Left, IdiomPosition::Left,
local_irs, local_irs,
remote_irs, remote_irs,
)?; )?
} else if let Some((id, local_irs, remote_irs)) = right.is_indexed_field() { } else if let Some((id, local_irs, remote_irs)) = right.is_indexed_field() {
io = self.lookup_index_options( self.lookup_index_options(
o, o,
id, id,
&left, &left,
@ -382,13 +412,16 @@ impl<'a> TreeBuilder<'a> {
IdiomPosition::Right, IdiomPosition::Right,
local_irs, local_irs,
remote_irs, remote_irs,
)?; )?
} } else {
None
};
if let Some(id) = left.is_field() { if let Some(id) = left.is_field() {
self.eval_bruteforce_knn(id, &right, &exp)?; self.eval_bruteforce_knn(id, &right, &exp)?;
} else if let Some(id) = right.is_field() { } else if let Some(id) = right.is_field() {
self.eval_bruteforce_knn(id, &left, &exp)?; self.eval_bruteforce_knn(id, &left, &exp)?;
} }
self.check_leaf_node_with_index(io.as_ref());
let re = ResolvedExpression { let re = ResolvedExpression {
group, group,
exp: exp.clone(), exp: exp.clone(),
@ -402,6 +435,37 @@ impl<'a> TreeBuilder<'a> {
} }
} }
fn check_boolean_operator(&mut self, gr: GroupRef, op: &Operator) {
match op {
Operator::Neg | Operator::Or => {
if self.all_and != Some(false) {
self.all_and = Some(false);
}
self.all_and_groups.entry(gr).and_modify(|b| *b = false).or_insert(false);
}
Operator::And => {
if self.all_and.is_none() {
self.all_and = Some(true);
}
self.all_and_groups.entry(gr).or_insert(true);
}
_ => {
self.all_and_groups.entry(gr).or_insert(true);
}
}
}
fn check_leaf_node_with_index(&mut self, io: Option<&IndexOption>) {
if let Some(io) = io {
if let Some(wi) = &self.with_indexes {
if !wi.contains(&io.ix_ref()) {
return;
}
}
self.leaf_nodes_with_index_count += 2;
}
}
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
fn lookup_index_options( fn lookup_index_options(
&mut self, &mut self,

View file

@ -1,5 +1,5 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt::{Debug, Formatter}; use std::fmt::{Debug, Display, Formatter};
use std::future::Future; use std::future::Future;
use std::sync::Arc; use std::sync::Arc;
use std::thread::Builder; use std::thread::Builder;
@ -313,7 +313,11 @@ impl Test {
/// Panics if the expected value is not equal to the actual value. /// Panics if the expected value is not equal to the actual value.
/// Compliant with NaN and Constants. /// Compliant with NaN and Constants.
#[allow(dead_code)] #[allow(dead_code)]
pub fn expect_value(&mut self, val: Value) -> Result<&mut Self, Error> { pub fn expect_value_info<I: Display>(
&mut self,
val: Value,
info: I,
) -> Result<&mut Self, Error> {
let tmp = self.next_value()?; let tmp = self.next_value()?;
// Then check they are indeed the same values // Then check they are indeed the same values
// //
@ -324,14 +328,19 @@ impl Test {
val val
}; };
if val.is_nan() { if val.is_nan() {
assert!(tmp.is_nan(), "Expected NaN but got: {tmp}"); assert!(tmp.is_nan(), "Expected NaN but got {info}: {tmp}");
} else { } else {
assert_eq!(tmp, val, "{tmp:#}"); assert_eq!(tmp, val, "{info} {tmp:#}");
} }
// //
Ok(self) Ok(self)
} }
#[allow(dead_code)]
pub fn expect_value(&mut self, val: Value) -> Result<&mut Self, Error> {
self.expect_value_info(val, "")
}
/// Expect values in the given slice to be present in the responses, following the same order. /// Expect values in the given slice to be present in the responses, following the same order.
#[allow(dead_code)] #[allow(dead_code)]
pub fn expect_values(&mut self, values: &[Value]) -> Result<&mut Self, Error> { pub fn expect_values(&mut self, values: &[Value]) -> Result<&mut Self, Error> {
@ -344,7 +353,15 @@ impl Test {
/// Expect the given value to be equals to the next response. /// Expect the given value to be equals to the next response.
#[allow(dead_code)] #[allow(dead_code)]
pub fn expect_val(&mut self, val: &str) -> Result<&mut Self, Error> { pub fn expect_val(&mut self, val: &str) -> Result<&mut Self, Error> {
self.expect_value(value(val).unwrap_or_else(|_| panic!("INVALID VALUE:\n{val}"))) self.expect_val_info(val, "")
}
#[allow(dead_code)]
pub fn expect_val_info<I: Display>(&mut self, val: &str, info: I) -> Result<&mut Self, Error> {
self.expect_value_info(
value(val).unwrap_or_else(|_| panic!("INVALID VALUE {info}:\n{val}")),
info,
)
} }
#[allow(dead_code)] #[allow(dead_code)]

View file

@ -188,7 +188,7 @@ fn three_multi_index_query(with: &str, parallel: &str) -> String {
DEFINE INDEX uniq_name ON TABLE person COLUMNS name UNIQUE; DEFINE INDEX uniq_name ON TABLE person COLUMNS name UNIQUE;
DEFINE INDEX idx_genre ON TABLE person COLUMNS genre; DEFINE INDEX idx_genre ON TABLE person COLUMNS genre;
SELECT name FROM person {with} WHERE name = 'Jaime' OR genre = 'm' OR company @@ 'surrealdb' {parallel}; SELECT name FROM person {with} WHERE name = 'Jaime' OR genre = 'm' OR company @@ 'surrealdb' {parallel};
SELECT name FROM person {with} WHERE name = 'Jaime' OR genre = 'm' OR company @@ 'surrealdb' {parallel} EXPLAIN FULL; SELECT name FROM person {with} WHERE name = 'Jaime' OR genre = 'm' OR company @@ 'surrealdb' {parallel} EXPLAIN FULL;
SELECT name FROM person {with} WHERE name = 'Jaime' AND genre = 'm' AND company @@ 'surrealdb' {parallel}; SELECT name FROM person {with} WHERE name = 'Jaime' AND genre = 'm' AND company @@ 'surrealdb' {parallel};
SELECT name FROM person {with} WHERE name = 'Jaime' AND genre = 'm' AND company @@ 'surrealdb' {parallel} EXPLAIN FULL;") SELECT name FROM person {with} WHERE name = 'Jaime' AND genre = 'm' AND company @@ 'surrealdb' {parallel} EXPLAIN FULL;")
} }
@ -3063,3 +3063,35 @@ async fn select_composite_standard_index() -> Result<(), Error> {
async fn select_composite_unique_index() -> Result<(), Error> { async fn select_composite_unique_index() -> Result<(), Error> {
select_composite_index(true).await select_composite_index(true).await
} }
#[tokio::test]
async fn select_where_index_boolean_behaviour() -> Result<(), Error> {
let sql = r"
DEFINE INDEX flagIndex ON TABLE test COLUMNS flag;
CREATE test:t CONTENT { flag:true };
CREATE test:f CONTENT { flag:false };
SELECT * FROM test;
SELECT * FROM test WITH NOINDEX WHERE (true OR flag=true);
SELECT * FROM test WITH NOINDEX WHERE (true OR flag==true);
SELECT * FROM test WHERE (true OR flag=true);
SELECT * FROM test WHERE (true OR flag==true);";
let mut t = Test::new(sql).await?;
t.expect_size(8)?;
t.skip_ok(3)?;
for i in 0..5 {
t.expect_val_info(
"[
{
flag: false,
id: test:f
},
{
flag: true,
id: test:t
}
]",
i,
)?;
}
Ok(())
}