From efb7000583ca552eebde11ec5ae57177f745d7d2 Mon Sep 17 00:00:00 2001 From: Emmanuel Keller Date: Tue, 17 Sep 2024 12:50:08 +0100 Subject: [PATCH] select/count optimisation on table range (#4786) --- core/src/dbs/iterator.rs | 4 +- core/src/dbs/plan.rs | 9 ++- core/src/dbs/processor.rs | 91 +++++++++++++++++++++++++------ core/src/idx/planner/mod.rs | 22 ++++++++ core/src/idx/planner/plan.rs | 22 +------- core/src/sql/statements/select.rs | 4 +- sdk/tests/group.rs | 81 +++++++++++++++++++++++++++ 7 files changed, 189 insertions(+), 44 deletions(-) diff --git a/core/src/dbs/iterator.rs b/core/src/dbs/iterator.rs index dbb7d81f..800ee26c 100644 --- a/core/src/dbs/iterator.rs +++ b/core/src/dbs/iterator.rs @@ -29,7 +29,7 @@ pub(crate) enum Iterable { Value(Value), Table(Table, bool), // true = keys only Thing(Thing), - TableRange(String, IdRange), + TableRange(String, IdRange, bool), // true = keys_only Edges(Edges), Defer(Thing), Mergeable(Thing, Value), @@ -169,7 +169,7 @@ impl Iterator { }); } _ => { - self.ingest(Iterable::TableRange(v.tb, *r.to_owned())); + self.ingest(Iterable::TableRange(v.tb, *r.to_owned(), false)); } }; } diff --git a/core/src/dbs/plan.rs b/core/src/dbs/plan.rs index 05cc12a8..126a65e0 100644 --- a/core/src/dbs/plan.rs +++ b/core/src/dbs/plan.rs @@ -110,8 +110,13 @@ impl ExplainItem { name: "Iterate Defer".into(), details: vec![("thing", Value::Thing(t.to_owned()))], }, - Iterable::TableRange(tb, r) => Self { - name: "Iterate Range".into(), + Iterable::TableRange(tb, r, keys_only) => Self { + name: if *keys_only { + "Iterate Range Keys" + } else { + "Iterate Range" + } + .into(), details: vec![("table", tb.to_owned().into()), ("range", r.to_owned().into())], }, Iterable::Edges(e) => Self { diff --git a/core/src/dbs/processor.rs b/core/src/dbs/processor.rs index b4aacb34..c2b5f6c9 100644 --- a/core/src/dbs/processor.rs +++ b/core/src/dbs/processor.rs @@ -138,8 +138,12 @@ impl<'a> Processor<'a> { Iterable::Value(v) => self.process_value(stk, ctx, opt, stm, v).await?, Iterable::Thing(v) => self.process_thing(stk, ctx, opt, stm, v).await?, Iterable::Defer(v) => self.process_defer(stk, ctx, opt, stm, v).await?, - Iterable::TableRange(tb, v) => { - self.process_range(stk, ctx, opt, stm, tb, v).await? + Iterable::TableRange(tb, v, keys_only) => { + if keys_only { + self.process_range_keys(stk, ctx, opt, stm, &tb, v).await? + } else { + self.process_range(stk, ctx, opt, stm, &tb, v).await? + } } Iterable::Edges(e) => self.process_edge(stk, ctx, opt, stm, e).await?, Iterable::Table(v, keys_only) => { @@ -388,39 +392,50 @@ impl<'a> Processor<'a> { Ok(()) } - async fn process_range( - &mut self, - stk: &mut Stk, - ctx: &Context, + async fn process_range_prepare( + txn: &Transaction, opt: &Options, - stm: &Statement<'_>, - tb: String, + tb: &str, r: IdRange, - ) -> Result<(), Error> { - // Get the transaction - let txn = ctx.tx(); + ) -> Result<(Vec, Vec), Error> { // Check that the table exists - txn.check_ns_db_tb(opt.ns()?, opt.db()?, &tb, opt.strict).await?; + txn.check_ns_db_tb(opt.ns()?, opt.db()?, tb, opt.strict).await?; // Prepare the range start key let beg = match &r.beg { - Bound::Unbounded => thing::prefix(opt.ns()?, opt.db()?, &tb), - Bound::Included(v) => thing::new(opt.ns()?, opt.db()?, &tb, v).encode().unwrap(), + Bound::Unbounded => thing::prefix(opt.ns()?, opt.db()?, tb), + Bound::Included(v) => thing::new(opt.ns()?, opt.db()?, tb, v).encode().unwrap(), Bound::Excluded(v) => { - let mut key = thing::new(opt.ns()?, opt.db()?, &tb, v).encode().unwrap(); + let mut key = thing::new(opt.ns()?, opt.db()?, tb, v).encode().unwrap(); key.push(0x00); key } }; // Prepare the range end key let end = match &r.end { - Bound::Unbounded => thing::suffix(opt.ns()?, opt.db()?, &tb), - Bound::Excluded(v) => thing::new(opt.ns()?, opt.db()?, &tb, v).encode().unwrap(), + Bound::Unbounded => thing::suffix(opt.ns()?, opt.db()?, tb), + Bound::Excluded(v) => thing::new(opt.ns()?, opt.db()?, tb, v).encode().unwrap(), Bound::Included(v) => { - let mut key = thing::new(opt.ns()?, opt.db()?, &tb, v).encode().unwrap(); + let mut key = thing::new(opt.ns()?, opt.db()?, tb, v).encode().unwrap(); key.push(0x00); key } }; + Ok((beg, end)) + } + + async fn process_range( + &mut self, + stk: &mut Stk, + ctx: &Context, + opt: &Options, + stm: &Statement<'_>, + tb: &str, + r: IdRange, + ) -> Result<(), Error> { + // Get the transaction + let txn = ctx.tx(); + // Prepare + let (beg, end) = Self::process_range_prepare(&txn, opt, tb, r).await?; // Create a new iterable range let mut stream = txn.stream(beg..end, None); // Loop until no more entries @@ -448,6 +463,46 @@ impl<'a> Processor<'a> { Ok(()) } + async fn process_range_keys( + &mut self, + stk: &mut Stk, + ctx: &Context, + opt: &Options, + stm: &Statement<'_>, + tb: &str, + r: IdRange, + ) -> Result<(), Error> { + // Get the transaction + let txn = ctx.tx(); + // Prepare + let (beg, end) = Self::process_range_prepare(&txn, opt, tb, r).await?; + // Create a new iterable range + let mut stream = txn.stream_keys(beg..end); + // Loop until no more entries + while let Some(res) = stream.next().await { + // Check if the context is finished + if ctx.is_done() { + break; + } + // Parse the data from the store + let k = res?; + let key: thing::Thing = (&k).into(); + let val = Value::Null; + let rid = Thing::from((key.tb, key.id)); + // Create a new operable value + let val = Operable::Value(val.into()); + // Process the record + let pro = Processed { + rid: Some(rid.into()), + ir: None, + val, + }; + self.process(stk, ctx, opt, stm, pro).await?; + } + // Everything ok + Ok(()) + } + async fn process_edge( &mut self, stk: &mut Stk, diff --git a/core/src/idx/planner/mod.rs b/core/src/idx/planner/mod.rs index db006551..2ec9ab84 100644 --- a/core/src/idx/planner/mod.rs +++ b/core/src/idx/planner/mod.rs @@ -29,6 +29,28 @@ pub(crate) struct QueryPlannerParams<'a> { group: Option<&'a Groups>, } +impl<'a> QueryPlannerParams<'a> { + pub(crate) fn is_keys_only(&self) -> bool { + if !self.fields.is_count_all_only() { + return false; + } + if self.cond.is_some() { + return false; + } + if let Some(g) = self.group { + if !g.is_empty() { + return false; + } + } + if let Some(p) = self.order { + if !p.is_empty() { + return false; + } + } + true + } +} + impl<'a> From<&'a SelectStatement> for QueryPlannerParams<'a> { fn from(stmt: &'a SelectStatement) -> Self { QueryPlannerParams { diff --git a/core/src/idx/planner/plan.rs b/core/src/idx/planner/plan.rs index 4b44af47..627e776e 100644 --- a/core/src/idx/planner/plan.rs +++ b/core/src/idx/planner/plan.rs @@ -47,7 +47,7 @@ impl PlanBuilder { }; // If we only count and there are no conditions and no aggregations, then we can only scan keys - let keys_only = Self::is_keys_only(params); + let keys_only = params.is_keys_only(); if let Some(With::NoIndex) = params.with { return Ok(Self::table_iterator(Some("WITH NOINDEX"), keys_only)); @@ -93,26 +93,6 @@ impl PlanBuilder { Ok(Self::table_iterator(None, keys_only)) } - fn is_keys_only(p: &QueryPlannerParams) -> bool { - if !p.fields.is_count_all_only() { - return false; - } - if p.cond.is_some() { - return false; - } - if let Some(g) = p.group { - if !g.is_empty() { - return false; - } - } - if let Some(p) = p.order { - if !p.is_empty() { - return false; - } - } - true - } - fn table_iterator(reason: Option<&str>, keys_only: bool) -> Plan { let reason = reason.map(|s| s.to_string()); Plan::TableIterator(reason, keys_only) diff --git a/core/src/sql/statements/select.rs b/core/src/sql/statements/select.rs index e380d34b..772ce69f 100644 --- a/core/src/sql/statements/select.rs +++ b/core/src/sql/statements/select.rs @@ -112,7 +112,9 @@ impl SelectStatement { planner.add_iterables(stk, &ctx, &opt, t, ¶ms, &mut i).await?; } Value::Thing(v) => match &v.id { - Id::Range(r) => i.ingest(Iterable::TableRange(v.tb, *r.to_owned())), + Id::Range(r) => { + i.ingest(Iterable::TableRange(v.tb, *r.to_owned(), params.is_keys_only())) + } _ => i.ingest(Iterable::Thing(v)), }, Value::Edges(v) => { diff --git a/sdk/tests/group.rs b/sdk/tests/group.rs index 3c3378fb..8aa59b4f 100644 --- a/sdk/tests/group.rs +++ b/sdk/tests/group.rs @@ -814,3 +814,84 @@ async fn select_count_group_all() -> Result<(), Error> { )?; Ok(()) } + +#[tokio::test] +async fn select_count_range_keys_only() -> Result<(), Error> { + let sql = r#" + CREATE table:1 CONTENT { bar: "hello", foo: "Man"}; + CREATE table:2 CONTENT { bar: "hello", foo: "World"}; + CREATE table:3 CONTENT { bar: "world"}; + SELECT COUNT() FROM table:1..4 GROUP ALL EXPLAIN; + SELECT COUNT() FROM table:1..4 GROUP ALL; + SELECT COUNT() FROM table:1..4 EXPLAIN; + SELECT COUNT() FROM table:1..4; + "#; + let mut t = Test::new(sql).await?; + t.expect_size(7)?; + // + t.skip_ok(3)?; + // + t.expect_val( + r#"[ + { + detail: { + range: 1..4, + table: 'table' + }, + operation: 'Iterate Range Keys' + }, + { + detail: { + idioms: { + count: [ + 'count' + ] + }, + type: 'Group' + }, + operation: 'Collector' + } + ]"#, + )?; + // + t.expect_val( + r#"[ + { + count: 3 + } + ]"#, + )?; + // + t.expect_val( + r#"[ + { + detail: { + range: 1..4, + table: 'table' + }, + operation: 'Iterate Range Keys' + }, + { + detail: { + type: 'Memory' + }, + operation: 'Collector' + } + ]"#, + )?; + // + t.expect_val( + r#"[ + { + count: 1 + }, + { + count: 1 + }, + { + count: 1 + } + ]"#, + )?; + Ok(()) +}