From b485d9cc3d004ddb36c46a3b8a632329b96b944c Mon Sep 17 00:00:00 2001 From: Emmanuel Keller Date: Tue, 1 Aug 2023 08:30:13 +0100 Subject: [PATCH] Feat: Support of (un)flattened fields in indexing (#2327) --- lib/src/api/opt/config.rs | 8 + lib/src/doc/index.rs | 256 +++++++++++++++++++++++-------- lib/src/idx/ft/analyzer/mod.rs | 19 ++- lib/src/idx/ft/highlighter.rs | 5 + lib/src/idx/ft/mod.rs | 26 ++-- lib/src/idx/planner/executor.rs | 4 +- lib/src/idx/planner/iterators.rs | 12 +- lib/src/idx/planner/plan.rs | 25 +-- lib/src/idx/planner/tree.rs | 11 +- lib/src/key/index/mod.rs | 30 ++-- lib/src/sql/ending.rs | 1 + lib/src/sql/idiom.rs | 13 +- lib/src/sql/part.rs | 30 +++- lib/src/sql/value/get.rs | 12 +- lib/src/sql/value/walk.rs | 5 - lib/tests/create.rs | 115 +++++++++++++- lib/tests/define.rs | 40 +++-- lib/tests/matches.rs | 70 +++++++++ 18 files changed, 531 insertions(+), 151 deletions(-) diff --git a/lib/src/api/opt/config.rs b/lib/src/api/opt/config.rs index a976594b..efb80efb 100644 --- a/lib/src/api/opt/config.rs +++ b/lib/src/api/opt/config.rs @@ -1,3 +1,11 @@ +#[cfg(any( + feature = "kv-mem", + feature = "kv-tikv", + feature = "kv-rocksdb", + feature = "kv-speedb", + feature = "kv-fdb", + feature = "kv-indxdb", +))] use std::time::Duration; /// Configuration for server connection, including: strictness, notifications, query_timeout, transaction_timeout diff --git a/lib/src/doc/index.rs b/lib/src/doc/index.rs index 45288821..ceeb5073 100644 --- a/lib/src/doc/index.rs +++ b/lib/src/doc/index.rs @@ -10,7 +10,7 @@ use crate::sql::array::Array; use crate::sql::index::Index; use crate::sql::scoring::Scoring; use crate::sql::statements::DefineIndexStatement; -use crate::sql::{Ident, Thing}; +use crate::sql::{Ident, Part, Thing, Value}; use crate::{key, kvs}; impl<'a> Document<'a> { @@ -38,10 +38,10 @@ impl<'a> Document<'a> { // Loop through all index statements for ix in self.ix(opt, txn).await?.iter() { // Calculate old values - let o = Self::build_opt_array(ctx, opt, txn, ix, &self.initial).await?; + let o = build_opt_values(ctx, opt, txn, ix, &self.initial).await?; // Calculate new values - let n = Self::build_opt_array(ctx, opt, txn, ix, &self.current).await?; + let n = build_opt_values(ctx, opt, txn, ix, &self.current).await?; // Update the index entries if opt.force || o != n { @@ -49,7 +49,7 @@ impl<'a> Document<'a> { let mut run = txn.lock().await; // Store all the variable and parameters required by the index operation - let ic = IndexOperation::new(opt, ix, o, n, rid); + let mut ic = IndexOperation::new(opt, ix, o, n, rid); // Index operation dispatching match &ix.index { @@ -67,37 +67,158 @@ impl<'a> Document<'a> { // Carry on Ok(()) } +} - /// Extract from the given document, the values required by the index and put then in an array. - /// Eg. IF the index is composed of the columns `name` and `instrument` - /// Given this doc: { "id": 1, "instrument":"piano", "name":"Tobie" } - /// It will return: ["Tobie", "piano"] - async fn build_opt_array( - ctx: &Context<'_>, - opt: &Options, - txn: &Transaction, - ix: &DefineIndexStatement, - doc: &CursorDoc<'_>, - ) -> Result, Error> { - if !doc.doc.is_some() { - return Ok(None); +/// Extract from the given document, the values required by the index and put then in an array. +/// Eg. IF the index is composed of the columns `name` and `instrument` +/// Given this doc: { "id": 1, "instrument":"piano", "name":"Tobie" } +/// It will return: ["Tobie", "piano"] +async fn build_opt_values( + ctx: &Context<'_>, + opt: &Options, + txn: &Transaction, + ix: &DefineIndexStatement, + doc: &CursorDoc<'_>, +) -> Result>, Error> { + if !doc.doc.is_some() { + return Ok(None); + } + let mut o = Vec::with_capacity(ix.cols.len()); + for i in ix.cols.iter() { + let v = i.compute(ctx, opt, txn, Some(doc)).await?; + o.push(v); + } + Ok(Some(o)) +} + +/// Extract from the given document, the values required by the index and put then in an array. +/// Eg. IF the index is composed of the columns `name` and `instrument` +/// Given this doc: { "id": 1, "instrument":"piano", "name":"Tobie" } +/// It will return: ["Tobie", "piano"] +struct Indexable(Vec<(Value, bool)>); + +impl Indexable { + fn new(vals: Vec, ix: &DefineIndexStatement) -> Self { + let mut source = Vec::with_capacity(vals.len()); + for (v, i) in vals.into_iter().zip(ix.cols.0.iter()) { + let f = matches!(i.0.last(), Some(&Part::Flatten)); + source.push((v, f)); } - let mut o = Array::with_capacity(ix.cols.len()); - for i in ix.cols.iter() { - let v = i.compute(ctx, opt, txn, Some(doc)).await?; - o.push(v); + Self(source) + } +} + +impl IntoIterator for Indexable { + type Item = Array; + type IntoIter = Combinator; + + fn into_iter(self) -> Self::IntoIter { + Combinator::new(self.0) + } +} + +struct Combinator { + iterators: Vec>, + has_next: bool, +} + +impl Combinator { + fn new(source: Vec<(Value, bool)>) -> Self { + let mut iterators: Vec> = Vec::new(); + // We create an iterator for each idiom + for (v, f) in source { + if !f { + // Iterator for not flattened values + if let Value::Array(v) = v { + iterators.push(Box::new(MultiValuesIterator { + vals: v.0, + done: false, + current: 0, + })); + continue; + } + } + iterators.push(Box::new(SingleValueIterator(v))); } - Ok(Some(o)) + Self { + iterators, + has_next: true, + } + } +} + +impl Iterator for Combinator { + type Item = Array; + + fn next(&mut self) -> Option { + if !self.has_next { + return None; + } + let mut o = Vec::with_capacity(self.iterators.len()); + // Create the combination and advance to the next + self.has_next = false; + for i in &mut self.iterators { + o.push(i.current().clone()); + if !self.has_next { + // We advance only one iterator per iteration + if i.next() { + self.has_next = true; + } + } + } + let o = Array::from(o); + Some(o) + } +} + +trait ValuesIterator: Send { + fn next(&mut self) -> bool; + fn current(&self) -> &Value; +} + +struct MultiValuesIterator { + vals: Vec, + done: bool, + current: usize, +} + +impl ValuesIterator for MultiValuesIterator { + fn next(&mut self) -> bool { + if self.done { + return false; + } + if self.current == self.vals.len() - 1 { + self.done = true; + return false; + } + self.current += 1; + true + } + + fn current(&self) -> &Value { + self.vals.get(self.current).unwrap_or(&Value::Null) + } +} + +struct SingleValueIterator(Value); + +impl ValuesIterator for SingleValueIterator { + fn next(&mut self) -> bool { + false + } + + fn current(&self) -> &Value { + &self.0 } } struct IndexOperation<'a> { opt: &'a Options, ix: &'a DefineIndexStatement, - /// The old value (if existing) - o: Option, - /// The new value (if existing) - n: Option, + /// The old values (if existing) + o: Option>, + /// The new values (if existing) + n: Option>, rid: &'a Thing, } @@ -105,8 +226,8 @@ impl<'a> IndexOperation<'a> { fn new( opt: &'a Options, ix: &'a DefineIndexStatement, - o: Option, - n: Option, + o: Option>, + n: Option>, rid: &'a Thing, ) -> Self { Self { @@ -118,54 +239,31 @@ impl<'a> IndexOperation<'a> { } } - fn get_non_unique_index_key(&self, v: &Array) -> key::index::Index { + fn get_non_unique_index_key(&self, v: &'a Array) -> key::index::Index { key::index::Index::new( self.opt.ns(), self.opt.db(), &self.ix.what, &self.ix.name, - v.to_owned(), - Some(self.rid.id.to_owned()), + v, + Some(&self.rid.id), ) } - async fn index_non_unique(&self, run: &mut kvs::Transaction) -> Result<(), Error> { + async fn index_non_unique(&mut self, run: &mut kvs::Transaction) -> Result<(), Error> { // Delete the old index data - if let Some(o) = &self.o { - let key = self.get_non_unique_index_key(o); - let _ = run.delc(key, Some(self.rid)).await; // Ignore this error - } - // Create the new index data - if let Some(n) = &self.n { - let key = self.get_non_unique_index_key(n); - if run.putc(key, self.rid, None).await.is_err() { - return self.err_index_exists(n); + if let Some(o) = self.o.take() { + let i = Indexable::new(o, self.ix); + for o in i { + let key = self.get_non_unique_index_key(&o); + let _ = run.delc(key, Some(self.rid)).await; // Ignore this error } } - Ok(()) - } - - fn get_unique_index_key(&self, v: &Array) -> key::index::Index { - key::index::Index::new( - self.opt.ns(), - self.opt.db(), - &self.ix.what, - &self.ix.name, - v.to_owned(), - None, - ) - } - - async fn index_unique(&self, run: &mut kvs::Transaction) -> Result<(), Error> { - // Delete the old index data - if let Some(o) = &self.o { - let key = self.get_unique_index_key(o); - let _ = run.delc(key, Some(self.rid)).await; // Ignore this error - } // Create the new index data - if let Some(n) = &self.n { - if !n.is_all_none_or_null() { - let key = self.get_unique_index_key(n); + if let Some(n) = self.n.take() { + let i = Indexable::new(n, self.ix); + for n in i { + let key = self.get_non_unique_index_key(&n); if run.putc(key, self.rid, None).await.is_err() { return self.err_index_exists(n); } @@ -174,7 +272,35 @@ impl<'a> IndexOperation<'a> { Ok(()) } - fn err_index_exists(&self, n: &Array) -> Result<(), Error> { + fn get_unique_index_key(&self, v: &'a Array) -> key::index::Index { + key::index::Index::new(self.opt.ns(), self.opt.db(), &self.ix.what, &self.ix.name, v, None) + } + + async fn index_unique(&mut self, run: &mut kvs::Transaction) -> Result<(), Error> { + // Delete the old index data + if let Some(o) = self.o.take() { + let i = Indexable::new(o, self.ix); + for o in i { + let key = self.get_unique_index_key(&o); + let _ = run.delc(key, Some(self.rid)).await; // Ignore this error + } + } + // Create the new index data + if let Some(n) = self.n.take() { + let i = Indexable::new(n, self.ix); + for n in i { + if !n.is_all_none_or_null() { + let key = self.get_unique_index_key(&n); + if run.putc(key, self.rid, None).await.is_err() { + return self.err_index_exists(n); + } + } + } + } + Ok(()) + } + + fn err_index_exists(&self, n: Array) -> Result<(), Error> { Err(Error::IndexExists { thing: self.rid.to_string(), index: self.ix.name.to_string(), diff --git a/lib/src/idx/ft/analyzer/mod.rs b/lib/src/idx/ft/analyzer/mod.rs index 582f9cc8..f03c04c7 100644 --- a/lib/src/idx/ft/analyzer/mod.rs +++ b/lib/src/idx/ft/analyzer/mod.rs @@ -7,7 +7,7 @@ use crate::idx::ft::terms::{TermId, Terms}; use crate::kvs::Transaction; use crate::sql::statements::DefineAnalyzerStatement; use crate::sql::tokenizer::Tokenizer as SqlTokenizer; -use crate::sql::{Array, Value}; +use crate::sql::Value; use filter::Filter; use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; @@ -64,7 +64,7 @@ impl Analyzer { &self, terms: &mut Terms, tx: &mut Transaction, - field_content: &Array, + field_content: &[Value], ) -> Result<(DocLength, Vec<(TermId, TermFrequency)>), Error> { let mut dl = 0; // Let's first collect all the inputs, and collect the tokens. @@ -101,13 +101,13 @@ impl Analyzer { &self, terms: &mut Terms, tx: &mut Transaction, - field_content: &Array, + content: &[Value], ) -> Result<(DocLength, Vec<(TermId, TermFrequency)>, Vec<(TermId, OffsetRecords)>), Error> { let mut dl = 0; // Let's first collect all the inputs, and collect the tokens. // We need to store them because everything after is zero-copy - let mut inputs = Vec::with_capacity(field_content.len()); - self.analyze_content(field_content, &mut inputs)?; + let mut inputs = Vec::with_capacity(content.len()); + self.analyze_content(content, &mut inputs)?; // We then collect every unique terms and count the frequency and extract the offsets let mut tfos: HashMap<&str, Vec> = HashMap::new(); for (i, tks) in inputs.iter().enumerate() { @@ -135,8 +135,8 @@ impl Analyzer { Ok((dl, tfid, osid)) } - fn analyze_content(&self, field_content: &Array, tks: &mut Vec) -> Result<(), Error> { - for v in &field_content.0 { + fn analyze_content(&self, content: &[Value], tks: &mut Vec) -> Result<(), Error> { + for v in content { self.analyze_value(v, tks)?; } Ok(()) @@ -152,6 +152,11 @@ impl Analyzer { self.analyze_value(v, tks)?; } } + Value::Object(o) => { + for v in o.0.values() { + self.analyze_value(v, tks)?; + } + } _ => {} }; Ok(()) diff --git a/lib/src/idx/ft/highlighter.rs b/lib/src/idx/ft/highlighter.rs index a9c5af15..3ebb6b48 100644 --- a/lib/src/idx/ft/highlighter.rs +++ b/lib/src/idx/ft/highlighter.rs @@ -42,6 +42,11 @@ impl Highlighter { Self::extract(v, vals); } } + Value::Object(a) => { + for (_, v) in a.0.into_iter() { + Self::extract(v, vals); + } + } _ => {} } } diff --git a/lib/src/idx/ft/mod.rs b/lib/src/idx/ft/mod.rs index 7384c86a..57c259aa 100644 --- a/lib/src/idx/ft/mod.rs +++ b/lib/src/idx/ft/mod.rs @@ -23,7 +23,7 @@ use crate::idx::{btree, IndexKeyBase, SerdeState}; use crate::kvs::{Key, Transaction}; use crate::sql::scoring::Scoring; use crate::sql::statements::DefineAnalyzerStatement; -use crate::sql::{Array, Idiom, Object, Thing, Value}; +use crate::sql::{Idiom, Object, Thing, Value}; use roaring::treemap::IntoIter; use roaring::RoaringTreemap; use serde::{Deserialize, Serialize}; @@ -195,7 +195,7 @@ impl FtIndex { &mut self, tx: &mut Transaction, rid: &Thing, - field_content: &Array, + content: &[Value], ) -> Result<(), Error> { // Resolve the doc_id let resolved = self.doc_ids.write().await.resolve_doc_id(tx, rid.into()).await?; @@ -206,12 +206,12 @@ impl FtIndex { let (doc_length, terms_and_frequencies, offsets) = if self.highlighting { let (dl, tf, ofs) = self .analyzer - .extract_terms_with_frequencies_with_offsets(&mut t, tx, field_content) + .extract_terms_with_frequencies_with_offsets(&mut t, tx, content) .await?; (dl, tf, Some(ofs)) } else { let (dl, tf) = - self.analyzer.extract_terms_with_frequencies(&mut t, tx, field_content).await?; + self.analyzer.extract_terms_with_frequencies(&mut t, tx, content).await?; (dl, tf, None) }; @@ -456,7 +456,7 @@ mod tests { use crate::sql::scoring::Scoring; use crate::sql::statements::define::analyzer; use crate::sql::statements::DefineAnalyzerStatement; - use crate::sql::{Array, Thing}; + use crate::sql::{Thing, Value}; use std::collections::HashMap; use std::sync::Arc; use test_log::test; @@ -537,7 +537,7 @@ mod tests { // Add one document let (mut tx, mut fti) = tx_fti(&ds, BTreeStoreType::Write, &az, btree_order, false).await; - fti.index_document(&mut tx, &doc1, &Array::from(vec!["hello the world"])) + fti.index_document(&mut tx, &doc1, &vec![Value::from("hello the world")]) .await .unwrap(); finish(tx, fti).await; @@ -547,8 +547,8 @@ mod tests { // Add two documents let (mut tx, mut fti) = tx_fti(&ds, BTreeStoreType::Write, &az, btree_order, false).await; - fti.index_document(&mut tx, &doc2, &Array::from(vec!["a yellow hello"])).await.unwrap(); - fti.index_document(&mut tx, &doc3, &Array::from(vec!["foo bar"])).await.unwrap(); + fti.index_document(&mut tx, &doc2, &vec![Value::from("a yellow hello")]).await.unwrap(); + fti.index_document(&mut tx, &doc3, &vec![Value::from("foo bar")]).await.unwrap(); finish(tx, fti).await; } @@ -585,7 +585,7 @@ mod tests { // Reindex one document let (mut tx, mut fti) = tx_fti(&ds, BTreeStoreType::Write, &az, btree_order, false).await; - fti.index_document(&mut tx, &doc3, &Array::from(vec!["nobar foo"])).await.unwrap(); + fti.index_document(&mut tx, &doc3, &vec![Value::from("nobar foo")]).await.unwrap(); finish(tx, fti).await; let (mut tx, fti) = tx_fti(&ds, BTreeStoreType::Read, &az, btree_order, false).await; @@ -643,28 +643,28 @@ mod tests { fti.index_document( &mut tx, &doc1, - &Array::from(vec!["the quick brown fox jumped over the lazy dog"]), + &vec![Value::from("the quick brown fox jumped over the lazy dog")], ) .await .unwrap(); fti.index_document( &mut tx, &doc2, - &Array::from(vec!["the fast fox jumped over the lazy dog"]), + &vec![Value::from("the fast fox jumped over the lazy dog")], ) .await .unwrap(); fti.index_document( &mut tx, &doc3, - &Array::from(vec!["the dog sat there and did nothing"]), + &vec![Value::from("the dog sat there and did nothing")], ) .await .unwrap(); fti.index_document( &mut tx, &doc4, - &Array::from(vec!["the other animals sat there watching"]), + &vec![Value::from("the other animals sat there watching")], ) .await .unwrap(); diff --git a/lib/src/idx/planner/executor.rs b/lib/src/idx/planner/executor.rs index 8d2f5296..22cb67d3 100644 --- a/lib/src/idx/planner/executor.rs +++ b/lib/src/idx/planner/executor.rs @@ -136,7 +136,7 @@ impl QueryExecutor { return Ok(Some(ThingIterator::NonUniqueEqual(NonUniqueEqualThingIterator::new( opt, io.ix(), - io.value(), + io.array(), )?))); } Ok(None) @@ -150,7 +150,7 @@ impl QueryExecutor { return Ok(Some(ThingIterator::UniqueEqual(UniqueEqualThingIterator::new( opt, io.ix(), - io.value(), + io.array(), )?))); } Ok(None) diff --git a/lib/src/idx/planner/iterators.rs b/lib/src/idx/planner/iterators.rs index f3c999de..10565c2e 100644 --- a/lib/src/idx/planner/iterators.rs +++ b/lib/src/idx/planner/iterators.rs @@ -6,7 +6,7 @@ use crate::idx::ft::{FtIndex, HitsIterator}; use crate::key; use crate::kvs::Key; use crate::sql::statements::DefineIndexStatement; -use crate::sql::{Array, Thing, Value}; +use crate::sql::{Array, Thing}; pub(crate) enum ThingIterator { NonUniqueEqual(NonUniqueEqualThingIterator), @@ -37,11 +37,10 @@ impl NonUniqueEqualThingIterator { pub(super) fn new( opt: &Options, ix: &DefineIndexStatement, - v: &Value, + v: &Array, ) -> Result { - let v = Array::from(v.clone()); let (beg, end) = - key::index::Index::range_all_ids(opt.ns(), opt.db(), &ix.what, &ix.name, &v); + key::index::Index::range_all_ids(opt.ns(), opt.db(), &ix.what, &ix.name, v); Ok(Self { beg, end, @@ -70,9 +69,8 @@ pub(crate) struct UniqueEqualThingIterator { } impl UniqueEqualThingIterator { - pub(super) fn new(opt: &Options, ix: &DefineIndexStatement, v: &Value) -> Result { - let v = Array::from(v.clone()); - let key = key::index::Index::new(opt.ns(), opt.db(), &ix.what, &ix.name, v, None).into(); + pub(super) fn new(opt: &Options, ix: &DefineIndexStatement, a: &Array) -> Result { + let key = key::index::Index::new(opt.ns(), opt.db(), &ix.what, &ix.name, a, None).into(); Ok(Self { key: Some(key), }) diff --git a/lib/src/idx/planner/plan.rs b/lib/src/idx/planner/plan.rs index 5f51a0b9..4b7d67ae 100644 --- a/lib/src/idx/planner/plan.rs +++ b/lib/src/idx/planner/plan.rs @@ -3,7 +3,7 @@ use crate::idx::ft::MatchRef; use crate::idx::planner::tree::Node; use crate::sql::statements::DefineIndexStatement; use crate::sql::with::With; -use crate::sql::Object; +use crate::sql::{Array, Object}; use crate::sql::{Expression, Idiom, Operator, Value}; use std::collections::HashMap; use std::hash::Hash; @@ -127,7 +127,7 @@ pub(crate) struct IndexOption(Arc); pub(super) struct Inner { ix: DefineIndexStatement, id: Idiom, - v: Value, + a: Array, qs: Option, op: Operator, mr: Option, @@ -138,7 +138,7 @@ impl IndexOption { ix: DefineIndexStatement, id: Idiom, op: Operator, - v: Value, + a: Array, qs: Option, mr: Option, ) -> Self { @@ -146,7 +146,7 @@ impl IndexOption { ix, id, op, - v, + a, qs, mr, })) @@ -160,8 +160,8 @@ impl IndexOption { &self.0.op } - pub(super) fn value(&self) -> &Value { - &self.0.v + pub(super) fn array(&self) -> &Array { + &self.0.a } pub(super) fn qs(&self) -> Option<&String> { @@ -177,10 +177,15 @@ impl IndexOption { } pub(crate) fn explain(&self) -> Value { + let v = if self.0.a.len() == 1 { + self.0.a[0].clone() + } else { + Value::Array(self.0.a.clone()) + }; Value::Object(Object::from(HashMap::from([ ("index", Value::from(self.ix().name.0.to_owned())), ("operator", Value::from(self.op().to_string())), - ("value", self.value().clone()), + ("value", v), ]))) } } @@ -189,7 +194,7 @@ impl IndexOption { mod tests { use crate::idx::planner::plan::IndexOption; use crate::sql::statements::DefineIndexStatement; - use crate::sql::{Idiom, Operator, Value}; + use crate::sql::{Array, Idiom, Operator}; use std::collections::HashSet; #[test] @@ -199,7 +204,7 @@ mod tests { DefineIndexStatement::default(), Idiom::from("a.b".to_string()), Operator::Equal, - Value::from("test"), + Array::from(vec!["test"]), None, None, ); @@ -208,7 +213,7 @@ mod tests { DefineIndexStatement::default(), Idiom::from("a.b".to_string()), Operator::Equal, - Value::from("test"), + Array::from(vec!["test"]), None, None, ); diff --git a/lib/src/idx/planner/tree.rs b/lib/src/idx/planner/tree.rs index 78196a79..e70730d3 100644 --- a/lib/src/idx/planner/tree.rs +++ b/lib/src/idx/planner/tree.rs @@ -4,7 +4,7 @@ use crate::err::Error; use crate::idx::planner::plan::IndexOption; use crate::sql::index::Index; use crate::sql::statements::DefineIndexStatement; -use crate::sql::{Cond, Expression, Idiom, Operator, Subquery, Table, Value}; +use crate::sql::{Array, Cond, Expression, Idiom, Operator, Subquery, Table, Value}; use async_recursion::async_recursion; use std::collections::HashMap; use std::sync::Arc; @@ -156,7 +156,14 @@ impl<'a> TreeBuilder<'a> { } }; if found { - let io = IndexOption::new(ix.clone(), id.clone(), op.to_owned(), v.clone(), qs, mr); + let io = IndexOption::new( + ix.clone(), + id.clone(), + op.to_owned(), + Array::from(v.clone()), + qs, + mr, + ); self.index_map.0.insert(e.clone(), io.clone()); return Some(io); } diff --git a/lib/src/key/index/mod.rs b/lib/src/key/index/mod.rs index 5d851ae0..8a1c7918 100644 --- a/lib/src/key/index/mod.rs +++ b/lib/src/key/index/mod.rs @@ -16,6 +16,7 @@ use crate::sql::array::Array; use crate::sql::id::Id; use derive::Key; use serde::{Deserialize, Serialize}; +use std::borrow::Cow; use std::ops::Range; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Key)] @@ -61,11 +62,11 @@ struct PrefixIds<'a> { _d: u8, pub ix: &'a str, _e: u8, - pub fd: Array, + pub fd: Cow<'a, Array>, } impl<'a> PrefixIds<'a> { - fn new(ns: &'a str, db: &'a str, tb: &'a str, ix: &'a str, fd: &Array) -> Self { + fn new(ns: &'a str, db: &'a str, tb: &'a str, ix: &'a str, fd: &'a Array) -> Self { Self { __: b'/', _a: b'*', @@ -77,7 +78,7 @@ impl<'a> PrefixIds<'a> { _d: b'+', ix, _e: b'*', - fd: fd.to_owned(), + fd: Cow::Borrowed(fd), } } } @@ -94,8 +95,8 @@ pub struct Index<'a> { _d: u8, pub ix: &'a str, _e: u8, - pub fd: Array, - pub id: Option, + pub fd: Cow<'a, Array>, + pub id: Option>, } impl<'a> Index<'a> { @@ -104,8 +105,8 @@ impl<'a> Index<'a> { db: &'a str, tb: &'a str, ix: &'a str, - fd: Array, - id: Option, + fd: &'a Array, + id: Option<&'a Id>, ) -> Self { Self { __: b'/', @@ -118,8 +119,8 @@ impl<'a> Index<'a> { _d: b'+', ix, _e: b'*', - fd, - id, + fd: Cow::Borrowed(fd), + id: id.map(Cow::Borrowed), } } @@ -146,14 +147,9 @@ mod tests { fn key() { use super::*; #[rustfmt::skip] - let val = Index::new( - "testns", - "testdb", - "testtb", - "testix", - vec!["testfd1", "testfd2"].into(), - Some("testid".into()), - ); + let fd = vec!["testfd1", "testfd2"].into(); + let id = "testid".into(); + let val = Index::new("testns", "testdb", "testtb", "testix", &fd, Some(&id)); let enc = Index::encode(&val).unwrap(); assert_eq!( enc, diff --git a/lib/src/sql/ending.rs b/lib/src/sql/ending.rs index 988234e6..5546c981 100644 --- a/lib/src/sql/ending.rs +++ b/lib/src/sql/ending.rs @@ -42,6 +42,7 @@ pub fn ident(i: &str) -> IResult<&str, ()> { map(char(';'), |_| ()), // SET k = a; map(char(','), |_| ()), // [a, b] map(char('.'), |_| ()), // a.k + map(char('…'), |_| ()), // a… map(char('['), |_| ()), // a[0] map(eof, |_| ()), // SET k = a )))(i) diff --git a/lib/src/sql/idiom.rs b/lib/src/sql/idiom.rs index 0749892b..8c18eeca 100644 --- a/lib/src/sql/idiom.rs +++ b/lib/src/sql/idiom.rs @@ -5,13 +5,14 @@ use crate::err::Error; use crate::sql::common::commas; use crate::sql::error::IResult; use crate::sql::fmt::{fmt_separated_by, Fmt}; -use crate::sql::part::Next; use crate::sql::part::{all, field, first, graph, index, last, part, start, Part}; +use crate::sql::part::{flatten, Next}; use crate::sql::paths::{ID, IN, META, OUT}; use crate::sql::value::Value; use md5::Digest; use md5::Md5; use nom::branch::alt; +use nom::combinator::opt; use nom::multi::separated_list1; use nom::multi::{many0, many1}; use serde::{Deserialize, Serialize}; @@ -181,6 +182,11 @@ impl Display for Idiom { pub fn local(i: &str) -> IResult<&str, Idiom> { let (i, p) = first(i)?; let (i, mut v) = many0(alt((all, index, field)))(i)?; + // Flatten is only allowed at the end + let (i, flat) = opt(flatten)(i)?; + if let Some(p) = flat { + v.push(p); + } v.insert(0, p); Ok((i, Idiom::from(v))) } @@ -375,11 +381,11 @@ mod tests { #[test] fn idiom_start_param_local_field() { - let sql = "$test.temporary[0].embedded"; + let sql = "$test.temporary[0].embedded…"; let res = idiom(sql); assert!(res.is_ok()); let out = res.unwrap().1; - assert_eq!("$test.temporary[0].embedded", format!("{}", out)); + assert_eq!("$test.temporary[0].embedded…", format!("{}", out)); assert_eq!( out, Idiom(vec![ @@ -387,6 +393,7 @@ mod tests { Part::from("temporary"), Part::Index(Number::Int(0)), Part::from("embedded"), + Part::Flatten, ]) ); } diff --git a/lib/src/sql/part.rs b/lib/src/sql/part.rs index 5d3fcc44..24249e97 100644 --- a/lib/src/sql/part.rs +++ b/lib/src/sql/part.rs @@ -22,6 +22,7 @@ use std::str; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)] pub enum Part { All, + Flatten, Last, First, Field(Ident), @@ -112,6 +113,7 @@ impl fmt::Display for Part { Part::First => f.write_str("[0]"), Part::Start(v) => write!(f, "{v}"), Part::Field(v) => write!(f, ".{v}"), + Part::Flatten => f.write_str("…"), Part::Index(v) => write!(f, "[{v}]"), Part::Where(v) => write!(f, "[WHERE {v}]"), Part::Graph(v) => write!(f, "{v}"), @@ -139,7 +141,7 @@ impl<'a> Next<'a> for &'a [Part] { // ------------------------------ pub fn part(i: &str) -> IResult<&str, Part> { - alt((all, last, index, field, value, graph, filter))(i) + alt((all, flatten, last, index, field, value, graph, filter))(i) } pub fn first(i: &str) -> IResult<&str, Part> { @@ -180,6 +182,12 @@ pub fn index(i: &str) -> IResult<&str, Part> { Ok((i, Part::Index(v))) } +pub fn flatten(i: &str) -> IResult<&str, Part> { + let (i, _) = alt((tag("…"), tag("...")))(i)?; + let (i, _) = ending(i)?; + Ok((i, Part::Flatten)) +} + pub fn field(i: &str) -> IResult<&str, Part> { let (i, _) = char('.')(i)?; let (i, v) = ident::ident(i)?; @@ -244,6 +252,26 @@ mod tests { assert_eq!(out, Part::Last); } + #[test] + fn part_flatten() { + let sql = "..."; + let res = part(sql); + assert!(res.is_ok()); + let out = res.unwrap().1; + assert_eq!("…", format!("{}", out)); + assert_eq!(out, Part::Flatten); + } + + #[test] + fn part_flatten_ellipsis() { + let sql = "…"; + let res = part(sql); + assert!(res.is_ok()); + let out = res.unwrap().1; + assert_eq!("…", format!("{}", out)); + assert_eq!(out, Part::Flatten); + } + #[test] fn part_number() { let sql = "[0]"; diff --git a/lib/src/sql/value/get.rs b/lib/src/sql/value/get.rs index 4850090a..84b37f8b 100644 --- a/lib/src/sql/value/get.rs +++ b/lib/src/sql/value/get.rs @@ -103,7 +103,7 @@ impl Value { // Current value at path is an array Value::Array(v) => match p { // Current path is an `*` part - Part::All => { + Part::All | Part::Flatten => { let path = path.next(); let futs = v.iter().map(|v| v.get(ctx, opt, txn, doc, path)); try_join_all_buffered(futs).await.map(Into::into) @@ -222,8 +222,14 @@ impl Value { }, } } - // Ignore everything else - _ => Ok(Value::None), + v => { + if matches!(p, Part::Flatten) { + v.get(ctx, opt, txn, None, path.next()).await + } else { + // Ignore everything else + Ok(Value::None) + } + } }, // No more parts so get the value None => Ok(self.clone()), diff --git a/lib/src/sql/value/walk.rs b/lib/src/sql/value/walk.rs index c51ec7ef..3b465129 100644 --- a/lib/src/sql/value/walk.rs +++ b/lib/src/sql/value/walk.rs @@ -26,11 +26,6 @@ impl Value { }, // Current path part is an array Value::Array(v) => match p { - Part::All => v - .iter() - .enumerate() - .flat_map(|(i, v)| v._walk(path.next(), prev.clone().push(Part::from(i)))) - .collect::>(), Part::First => match v.first() { Some(v) => v._walk(path.next(), prev.push(p.clone())), None => vec![], diff --git a/lib/tests/create.rs b/lib/tests/create.rs index 6d4eb2a5..d50747e3 100644 --- a/lib/tests/create.rs +++ b/lib/tests/create.rs @@ -103,7 +103,7 @@ async fn create_with_id() -> Result<(), Error> { } #[tokio::test] -async fn create_on_non_values_with_unique_index() -> Result<(), Error> { +async fn create_on_none_values_with_unique_index() -> Result<(), Error> { let sql = " DEFINE INDEX national_id_idx ON foo FIELDS national_id UNIQUE; CREATE foo SET name = 'John Doe'; @@ -121,6 +121,119 @@ async fn create_on_non_values_with_unique_index() -> Result<(), Error> { Ok(()) } +#[tokio::test] +async fn create_with_unique_index_with_two_flattened_fields() -> Result<(), Error> { + let sql = " + DEFINE INDEX test ON user FIELDS account, tags…, emails... UNIQUE; + CREATE user:1 SET account = 'Apple', tags = ['one', 'two'], emails = ['a@example.com', 'b@example.com']; + CREATE user:2 SET account = 'Apple', tags = ['two', 'three'], emails = ['a@example.com', 'b@example.com']; + CREATE user:3 SET account = 'Apple', tags = ['one', 'two'], emails = ['a@example.com', 'b@example.com']; + CREATE user:4 SET account = 'Apple', tags = ['two', 'three'], emails = ['a@example.com', 'b@example.com']; + "; + + let dbs = Datastore::new("memory").await?; + let ses = Session::owner().with_ns("test").with_db("test"); + let res = &mut dbs.execute(sql, &ses, None).await?; + assert_eq!(res.len(), 5); + // + for _ in 0..3 { + let _ = res.remove(0).result?; + } + // + let tmp = res.remove(0).result; + if let Err(e) = tmp { + assert_eq!(e.to_string(), "Database index `test` already contains ['Apple', ['one', 'two'], ['a@example.com', 'b@example.com']], with record `user:3`"); + } else { + panic!("An error was expected.") + } + // + let tmp = res.remove(0).result; + if let Err(e) = tmp { + assert_eq!(e.to_string(), "Database index `test` already contains ['Apple', ['two', 'three'], ['a@example.com', 'b@example.com']], with record `user:4`"); + } else { + panic!("An error was expected.") + } + Ok(()) +} + +#[tokio::test] +async fn create_with_unique_index_with_one_flattened_field() -> Result<(), Error> { + let sql = " + DEFINE INDEX test ON user FIELDS account, tags, emails... UNIQUE; + CREATE user:1 SET account = 'Apple', tags = ['one', 'two'], emails = ['a@example.com', 'b@example.com']; + CREATE user:2 SET account = 'Apple', tags = ['two', 'three'], emails = ['a@example.com', 'b@example.com']; + "; + + let dbs = Datastore::new("memory").await?; + let ses = Session::owner().with_ns("test").with_db("test"); + let res = &mut dbs.execute(sql, &ses, None).await?; + assert_eq!(res.len(), 3); + // + for _ in 0..2 { + let _ = res.remove(0).result?; + } + // + let tmp = res.remove(0).result; + if let Err(e) = tmp { + assert_eq!(e.to_string(), "Database index `test` already contains ['Apple', 'two', ['a@example.com', 'b@example.com']], with record `user:2`"); + } else { + panic!("An error was expected.") + } + Ok(()) +} + +#[tokio::test] +async fn create_with_unique_index_on_one_field_with_flattened_sub_values() -> Result<(), Error> { + let sql = " + DEFINE INDEX test ON user FIELDS account, tags, emails.*.value… UNIQUE; + CREATE user:1 SET account = 'Apple', tags = ['one', 'two'], emails = [ { value:'a@example.com'} , { value:'b@example.com' } ]; + CREATE user:2 SET account = 'Apple', tags = ['two', 'three'], emails = [ { value:'a@example.com'} , { value:'b@example.com' } ]; + "; + + let dbs = Datastore::new("memory").await?; + let ses = Session::owner().with_ns("test").with_db("test"); + let res = &mut dbs.execute(sql, &ses, None).await?; + assert_eq!(res.len(), 3); + // + for _ in 0..2 { + let _ = res.remove(0).result?; + } + // + let tmp = res.remove(0).result; + if let Err(e) = tmp { + assert_eq!(e.to_string(), "Database index `test` already contains ['Apple', 'two', ['a@example.com', 'b@example.com']], with record `user:2`"); + } else { + panic!("An error was expected.") + } + Ok(()) +} + +#[tokio::test] +async fn create_with_unique_index_on_two_fields() -> Result<(), Error> { + let sql = " + DEFINE INDEX test ON user FIELDS account, tags, emails UNIQUE; + CREATE user:1 SET account = 'Apple', tags = ['one', 'two'], emails = ['a@example.com', 'b@example.com']; + CREATE user:2 SET account = 'Apple', tags = ['two', 'one'], emails = ['b@example.com', 'c@example.com']; + "; + + let dbs = Datastore::new("memory").await?; + let ses = Session::owner().with_ns("test").with_db("test"); + let res = &mut dbs.execute(sql, &ses, None).await?; + assert_eq!(res.len(), 3); + // + for _ in 0..2 { + let _ = res.remove(0).result?; + } + let tmp = res.remove(0).result; + // + if let Err(e) = tmp { + assert_eq!(e.to_string(), "Database index `test` already contains ['Apple', 'two', 'b@example.com'], with record `user:2`"); + } else { + panic!("An error was expected.") + } + Ok(()) +} + // // Permissions // diff --git a/lib/tests/define.rs b/lib/tests/define.rs index f40e23fb..f8118494 100644 --- a/lib/tests/define.rs +++ b/lib/tests/define.rs @@ -938,7 +938,6 @@ async fn define_statement_index_multiple_unique_existing() -> Result<(), Error> } #[tokio::test] -#[ignore] async fn define_statement_index_single_unique_embedded_multiple() -> Result<(), Error> { let sql = " DEFINE INDEX test ON user FIELDS tags UNIQUE; @@ -974,16 +973,19 @@ async fn define_statement_index_single_unique_embedded_multiple() -> Result<(), assert_eq!(tmp, val); // let tmp = res.remove(0).result; - assert!(matches!( - tmp.err(), - Some(e) if e.to_string() == "Database index `test` already contains `user:2`" - )); + if let Err(e) = tmp { + assert_eq!( + e.to_string(), + "Database index `test` already contains 'two', with record `user:2`" + ); + } else { + panic!("An error was expected.") + } // Ok(()) } #[tokio::test] -#[ignore] async fn define_statement_index_multiple_unique_embedded_multiple() -> Result<(), Error> { let sql = " DEFINE INDEX test ON user FIELDS account, tags UNIQUE; @@ -1021,20 +1023,28 @@ async fn define_statement_index_multiple_unique_embedded_multiple() -> Result<() assert_eq!(tmp, val); // let tmp = res.remove(0).result?; - let val = Value::parse("[{ id: user:1, account: 'tesla', tags: ['one', 'two'] }]"); + let val = Value::parse("[{ id: user:2, account: 'tesla', tags: ['one', 'two'] }]"); assert_eq!(tmp, val); // let tmp = res.remove(0).result; - assert!(matches!( - tmp.err(), - Some(e) if e.to_string() == "Database index `test` already contains `user:3`" - )); + if let Err(e) = tmp { + assert_eq!( + e.to_string(), + "Database index `test` already contains ['apple', 'two'], with record `user:3`" + ); + } else { + panic!("An error was expected.") + } // let tmp = res.remove(0).result; - assert!(matches!( - tmp.err(), - Some(e) if e.to_string() == "Database index `test` already contains `user:4`" - )); + if let Err(e) = tmp { + assert_eq!( + e.to_string(), + "Database index `test` already contains ['tesla', 'two'], with record `user:4`" + ); + } else { + panic!("An error was expected.") + } // Ok(()) } diff --git a/lib/tests/matches.rs b/lib/tests/matches.rs index b4b60b36..21157220 100644 --- a/lib/tests/matches.rs +++ b/lib/tests/matches.rs @@ -171,6 +171,76 @@ async fn select_where_matches_using_index_and_arrays_with_parallel() -> Result<( select_where_matches_using_index_and_arrays(true).await } +async fn select_where_matches_using_index_and_objects(parallel: bool) -> Result<(), Error> { + let p = if parallel { + "PARALLEL" + } else { + "" + }; + let sql = format!( + r" + CREATE blog:1 SET content = {{ 'title':'Hello World!', 'content':'Be Bop', 'tags': ['Foo', 'Bãr'] }}; + DEFINE ANALYZER simple TOKENIZERS blank,class; + DEFINE INDEX blog_content ON blog FIELDS content SEARCH ANALYZER simple BM25 HIGHLIGHTS; + SELECT id FROM blog WHERE content @1@ 'Hello Bãr' {p} EXPLAIN; + SELECT id, search::highlight('', '', 1) AS content FROM blog WHERE content @1@ 'Hello Bãr' {p}; + " + ); + let dbs = Datastore::new("memory").await?; + let ses = Session::owner().with_ns("test").with_db("test"); + let res = &mut dbs.execute(&sql, &ses, None).await?; + assert_eq!(res.len(), 5); + // + let _ = res.remove(0).result?; + let _ = res.remove(0).result?; + let _ = res.remove(0).result?; + // + let tmp = res.remove(0).result?; + let val = Value::parse( + "[ + { + detail: { + plan: { + index: 'blog_content', + operator: '@1@', + value: 'Hello Bãr' + }, + table: 'blog', + }, + operation: 'Iterate Index' + } + ]", + ); + assert_eq!(tmp, val); + // + let tmp = res.remove(0).result?; + let val = Value::parse( + "[ + { + id: blog:1, + content: [ + 'Be Bop', + 'Foo', + 'Bãr', + 'Hello World!' + ] + } + ]", + ); + assert_eq!(format!("{:#}", tmp), format!("{:#}", val)); + Ok(()) +} + +#[tokio::test] +async fn select_where_matches_using_index_and_objects_non_parallel() -> Result<(), Error> { + select_where_matches_using_index_and_objects(false).await +} + +#[tokio::test] +async fn select_where_matches_using_index_and_objects_with_parallel() -> Result<(), Error> { + select_where_matches_using_index_and_objects(true).await +} + #[tokio::test] async fn select_where_matches_using_index_offsets() -> Result<(), Error> { let sql = r"