Feat: Support of (un)flattened fields in indexing (#2327)

This commit is contained in:
Emmanuel Keller 2023-08-01 08:30:13 +01:00 committed by GitHub
parent bb8c1cd478
commit b485d9cc3d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 531 additions and 151 deletions

View file

@ -1,3 +1,11 @@
#[cfg(any(
feature = "kv-mem",
feature = "kv-tikv",
feature = "kv-rocksdb",
feature = "kv-speedb",
feature = "kv-fdb",
feature = "kv-indxdb",
))]
use std::time::Duration;
/// Configuration for server connection, including: strictness, notifications, query_timeout, transaction_timeout

View file

@ -10,7 +10,7 @@ use crate::sql::array::Array;
use crate::sql::index::Index;
use crate::sql::scoring::Scoring;
use crate::sql::statements::DefineIndexStatement;
use crate::sql::{Ident, Thing};
use crate::sql::{Ident, Part, Thing, Value};
use crate::{key, kvs};
impl<'a> Document<'a> {
@ -38,10 +38,10 @@ impl<'a> Document<'a> {
// Loop through all index statements
for ix in self.ix(opt, txn).await?.iter() {
// Calculate old values
let o = Self::build_opt_array(ctx, opt, txn, ix, &self.initial).await?;
let o = build_opt_values(ctx, opt, txn, ix, &self.initial).await?;
// Calculate new values
let n = Self::build_opt_array(ctx, opt, txn, ix, &self.current).await?;
let n = build_opt_values(ctx, opt, txn, ix, &self.current).await?;
// Update the index entries
if opt.force || o != n {
@ -49,7 +49,7 @@ impl<'a> Document<'a> {
let mut run = txn.lock().await;
// Store all the variable and parameters required by the index operation
let ic = IndexOperation::new(opt, ix, o, n, rid);
let mut ic = IndexOperation::new(opt, ix, o, n, rid);
// Index operation dispatching
match &ix.index {
@ -67,37 +67,158 @@ impl<'a> Document<'a> {
// Carry on
Ok(())
}
}
/// Extract from the given document, the values required by the index and put then in an array.
/// Eg. IF the index is composed of the columns `name` and `instrument`
/// Given this doc: { "id": 1, "instrument":"piano", "name":"Tobie" }
/// It will return: ["Tobie", "piano"]
async fn build_opt_array(
/// Extract from the given document, the values required by the index and put then in an array.
/// Eg. IF the index is composed of the columns `name` and `instrument`
/// Given this doc: { "id": 1, "instrument":"piano", "name":"Tobie" }
/// It will return: ["Tobie", "piano"]
async fn build_opt_values(
ctx: &Context<'_>,
opt: &Options,
txn: &Transaction,
ix: &DefineIndexStatement,
doc: &CursorDoc<'_>,
) -> Result<Option<Array>, Error> {
) -> Result<Option<Vec<Value>>, Error> {
if !doc.doc.is_some() {
return Ok(None);
}
let mut o = Array::with_capacity(ix.cols.len());
let mut o = Vec::with_capacity(ix.cols.len());
for i in ix.cols.iter() {
let v = i.compute(ctx, opt, txn, Some(doc)).await?;
o.push(v);
}
Ok(Some(o))
}
/// Extract from the given document, the values required by the index and put then in an array.
/// Eg. IF the index is composed of the columns `name` and `instrument`
/// Given this doc: { "id": 1, "instrument":"piano", "name":"Tobie" }
/// It will return: ["Tobie", "piano"]
struct Indexable(Vec<(Value, bool)>);
impl Indexable {
fn new(vals: Vec<Value>, ix: &DefineIndexStatement) -> Self {
let mut source = Vec::with_capacity(vals.len());
for (v, i) in vals.into_iter().zip(ix.cols.0.iter()) {
let f = matches!(i.0.last(), Some(&Part::Flatten));
source.push((v, f));
}
Self(source)
}
}
impl IntoIterator for Indexable {
type Item = Array;
type IntoIter = Combinator;
fn into_iter(self) -> Self::IntoIter {
Combinator::new(self.0)
}
}
struct Combinator {
iterators: Vec<Box<dyn ValuesIterator>>,
has_next: bool,
}
impl Combinator {
fn new(source: Vec<(Value, bool)>) -> Self {
let mut iterators: Vec<Box<dyn ValuesIterator>> = Vec::new();
// We create an iterator for each idiom
for (v, f) in source {
if !f {
// Iterator for not flattened values
if let Value::Array(v) = v {
iterators.push(Box::new(MultiValuesIterator {
vals: v.0,
done: false,
current: 0,
}));
continue;
}
}
iterators.push(Box::new(SingleValueIterator(v)));
}
Self {
iterators,
has_next: true,
}
}
}
impl Iterator for Combinator {
type Item = Array;
fn next(&mut self) -> Option<Self::Item> {
if !self.has_next {
return None;
}
let mut o = Vec::with_capacity(self.iterators.len());
// Create the combination and advance to the next
self.has_next = false;
for i in &mut self.iterators {
o.push(i.current().clone());
if !self.has_next {
// We advance only one iterator per iteration
if i.next() {
self.has_next = true;
}
}
}
let o = Array::from(o);
Some(o)
}
}
trait ValuesIterator: Send {
fn next(&mut self) -> bool;
fn current(&self) -> &Value;
}
struct MultiValuesIterator {
vals: Vec<Value>,
done: bool,
current: usize,
}
impl ValuesIterator for MultiValuesIterator {
fn next(&mut self) -> bool {
if self.done {
return false;
}
if self.current == self.vals.len() - 1 {
self.done = true;
return false;
}
self.current += 1;
true
}
fn current(&self) -> &Value {
self.vals.get(self.current).unwrap_or(&Value::Null)
}
}
struct SingleValueIterator(Value);
impl ValuesIterator for SingleValueIterator {
fn next(&mut self) -> bool {
false
}
fn current(&self) -> &Value {
&self.0
}
}
struct IndexOperation<'a> {
opt: &'a Options,
ix: &'a DefineIndexStatement,
/// The old value (if existing)
o: Option<Array>,
/// The new value (if existing)
n: Option<Array>,
/// The old values (if existing)
o: Option<Vec<Value>>,
/// The new values (if existing)
n: Option<Vec<Value>>,
rid: &'a Thing,
}
@ -105,8 +226,8 @@ impl<'a> IndexOperation<'a> {
fn new(
opt: &'a Options,
ix: &'a DefineIndexStatement,
o: Option<Array>,
n: Option<Array>,
o: Option<Vec<Value>>,
n: Option<Vec<Value>>,
rid: &'a Thing,
) -> Self {
Self {
@ -118,63 +239,68 @@ impl<'a> IndexOperation<'a> {
}
}
fn get_non_unique_index_key(&self, v: &Array) -> key::index::Index {
fn get_non_unique_index_key(&self, v: &'a Array) -> key::index::Index {
key::index::Index::new(
self.opt.ns(),
self.opt.db(),
&self.ix.what,
&self.ix.name,
v.to_owned(),
Some(self.rid.id.to_owned()),
v,
Some(&self.rid.id),
)
}
async fn index_non_unique(&self, run: &mut kvs::Transaction) -> Result<(), Error> {
async fn index_non_unique(&mut self, run: &mut kvs::Transaction) -> Result<(), Error> {
// Delete the old index data
if let Some(o) = &self.o {
let key = self.get_non_unique_index_key(o);
if let Some(o) = self.o.take() {
let i = Indexable::new(o, self.ix);
for o in i {
let key = self.get_non_unique_index_key(&o);
let _ = run.delc(key, Some(self.rid)).await; // Ignore this error
}
}
// Create the new index data
if let Some(n) = &self.n {
let key = self.get_non_unique_index_key(n);
if let Some(n) = self.n.take() {
let i = Indexable::new(n, self.ix);
for n in i {
let key = self.get_non_unique_index_key(&n);
if run.putc(key, self.rid, None).await.is_err() {
return self.err_index_exists(n);
}
}
}
Ok(())
}
fn get_unique_index_key(&self, v: &Array) -> key::index::Index {
key::index::Index::new(
self.opt.ns(),
self.opt.db(),
&self.ix.what,
&self.ix.name,
v.to_owned(),
None,
)
fn get_unique_index_key(&self, v: &'a Array) -> key::index::Index {
key::index::Index::new(self.opt.ns(), self.opt.db(), &self.ix.what, &self.ix.name, v, None)
}
async fn index_unique(&self, run: &mut kvs::Transaction) -> Result<(), Error> {
async fn index_unique(&mut self, run: &mut kvs::Transaction) -> Result<(), Error> {
// Delete the old index data
if let Some(o) = &self.o {
let key = self.get_unique_index_key(o);
if let Some(o) = self.o.take() {
let i = Indexable::new(o, self.ix);
for o in i {
let key = self.get_unique_index_key(&o);
let _ = run.delc(key, Some(self.rid)).await; // Ignore this error
}
}
// Create the new index data
if let Some(n) = &self.n {
if let Some(n) = self.n.take() {
let i = Indexable::new(n, self.ix);
for n in i {
if !n.is_all_none_or_null() {
let key = self.get_unique_index_key(n);
let key = self.get_unique_index_key(&n);
if run.putc(key, self.rid, None).await.is_err() {
return self.err_index_exists(n);
}
}
}
}
Ok(())
}
fn err_index_exists(&self, n: &Array) -> Result<(), Error> {
fn err_index_exists(&self, n: Array) -> Result<(), Error> {
Err(Error::IndexExists {
thing: self.rid.to_string(),
index: self.ix.name.to_string(),

View file

@ -7,7 +7,7 @@ use crate::idx::ft::terms::{TermId, Terms};
use crate::kvs::Transaction;
use crate::sql::statements::DefineAnalyzerStatement;
use crate::sql::tokenizer::Tokenizer as SqlTokenizer;
use crate::sql::{Array, Value};
use crate::sql::Value;
use filter::Filter;
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
@ -64,7 +64,7 @@ impl Analyzer {
&self,
terms: &mut Terms,
tx: &mut Transaction,
field_content: &Array,
field_content: &[Value],
) -> Result<(DocLength, Vec<(TermId, TermFrequency)>), Error> {
let mut dl = 0;
// Let's first collect all the inputs, and collect the tokens.
@ -101,13 +101,13 @@ impl Analyzer {
&self,
terms: &mut Terms,
tx: &mut Transaction,
field_content: &Array,
content: &[Value],
) -> Result<(DocLength, Vec<(TermId, TermFrequency)>, Vec<(TermId, OffsetRecords)>), Error> {
let mut dl = 0;
// Let's first collect all the inputs, and collect the tokens.
// We need to store them because everything after is zero-copy
let mut inputs = Vec::with_capacity(field_content.len());
self.analyze_content(field_content, &mut inputs)?;
let mut inputs = Vec::with_capacity(content.len());
self.analyze_content(content, &mut inputs)?;
// We then collect every unique terms and count the frequency and extract the offsets
let mut tfos: HashMap<&str, Vec<Offset>> = HashMap::new();
for (i, tks) in inputs.iter().enumerate() {
@ -135,8 +135,8 @@ impl Analyzer {
Ok((dl, tfid, osid))
}
fn analyze_content(&self, field_content: &Array, tks: &mut Vec<Tokens>) -> Result<(), Error> {
for v in &field_content.0 {
fn analyze_content(&self, content: &[Value], tks: &mut Vec<Tokens>) -> Result<(), Error> {
for v in content {
self.analyze_value(v, tks)?;
}
Ok(())
@ -152,6 +152,11 @@ impl Analyzer {
self.analyze_value(v, tks)?;
}
}
Value::Object(o) => {
for v in o.0.values() {
self.analyze_value(v, tks)?;
}
}
_ => {}
};
Ok(())

View file

@ -42,6 +42,11 @@ impl Highlighter {
Self::extract(v, vals);
}
}
Value::Object(a) => {
for (_, v) in a.0.into_iter() {
Self::extract(v, vals);
}
}
_ => {}
}
}

View file

@ -23,7 +23,7 @@ use crate::idx::{btree, IndexKeyBase, SerdeState};
use crate::kvs::{Key, Transaction};
use crate::sql::scoring::Scoring;
use crate::sql::statements::DefineAnalyzerStatement;
use crate::sql::{Array, Idiom, Object, Thing, Value};
use crate::sql::{Idiom, Object, Thing, Value};
use roaring::treemap::IntoIter;
use roaring::RoaringTreemap;
use serde::{Deserialize, Serialize};
@ -195,7 +195,7 @@ impl FtIndex {
&mut self,
tx: &mut Transaction,
rid: &Thing,
field_content: &Array,
content: &[Value],
) -> Result<(), Error> {
// Resolve the doc_id
let resolved = self.doc_ids.write().await.resolve_doc_id(tx, rid.into()).await?;
@ -206,12 +206,12 @@ impl FtIndex {
let (doc_length, terms_and_frequencies, offsets) = if self.highlighting {
let (dl, tf, ofs) = self
.analyzer
.extract_terms_with_frequencies_with_offsets(&mut t, tx, field_content)
.extract_terms_with_frequencies_with_offsets(&mut t, tx, content)
.await?;
(dl, tf, Some(ofs))
} else {
let (dl, tf) =
self.analyzer.extract_terms_with_frequencies(&mut t, tx, field_content).await?;
self.analyzer.extract_terms_with_frequencies(&mut t, tx, content).await?;
(dl, tf, None)
};
@ -456,7 +456,7 @@ mod tests {
use crate::sql::scoring::Scoring;
use crate::sql::statements::define::analyzer;
use crate::sql::statements::DefineAnalyzerStatement;
use crate::sql::{Array, Thing};
use crate::sql::{Thing, Value};
use std::collections::HashMap;
use std::sync::Arc;
use test_log::test;
@ -537,7 +537,7 @@ mod tests {
// Add one document
let (mut tx, mut fti) =
tx_fti(&ds, BTreeStoreType::Write, &az, btree_order, false).await;
fti.index_document(&mut tx, &doc1, &Array::from(vec!["hello the world"]))
fti.index_document(&mut tx, &doc1, &vec![Value::from("hello the world")])
.await
.unwrap();
finish(tx, fti).await;
@ -547,8 +547,8 @@ mod tests {
// Add two documents
let (mut tx, mut fti) =
tx_fti(&ds, BTreeStoreType::Write, &az, btree_order, false).await;
fti.index_document(&mut tx, &doc2, &Array::from(vec!["a yellow hello"])).await.unwrap();
fti.index_document(&mut tx, &doc3, &Array::from(vec!["foo bar"])).await.unwrap();
fti.index_document(&mut tx, &doc2, &vec![Value::from("a yellow hello")]).await.unwrap();
fti.index_document(&mut tx, &doc3, &vec![Value::from("foo bar")]).await.unwrap();
finish(tx, fti).await;
}
@ -585,7 +585,7 @@ mod tests {
// Reindex one document
let (mut tx, mut fti) =
tx_fti(&ds, BTreeStoreType::Write, &az, btree_order, false).await;
fti.index_document(&mut tx, &doc3, &Array::from(vec!["nobar foo"])).await.unwrap();
fti.index_document(&mut tx, &doc3, &vec![Value::from("nobar foo")]).await.unwrap();
finish(tx, fti).await;
let (mut tx, fti) = tx_fti(&ds, BTreeStoreType::Read, &az, btree_order, false).await;
@ -643,28 +643,28 @@ mod tests {
fti.index_document(
&mut tx,
&doc1,
&Array::from(vec!["the quick brown fox jumped over the lazy dog"]),
&vec![Value::from("the quick brown fox jumped over the lazy dog")],
)
.await
.unwrap();
fti.index_document(
&mut tx,
&doc2,
&Array::from(vec!["the fast fox jumped over the lazy dog"]),
&vec![Value::from("the fast fox jumped over the lazy dog")],
)
.await
.unwrap();
fti.index_document(
&mut tx,
&doc3,
&Array::from(vec!["the dog sat there and did nothing"]),
&vec![Value::from("the dog sat there and did nothing")],
)
.await
.unwrap();
fti.index_document(
&mut tx,
&doc4,
&Array::from(vec!["the other animals sat there watching"]),
&vec![Value::from("the other animals sat there watching")],
)
.await
.unwrap();

View file

@ -136,7 +136,7 @@ impl QueryExecutor {
return Ok(Some(ThingIterator::NonUniqueEqual(NonUniqueEqualThingIterator::new(
opt,
io.ix(),
io.value(),
io.array(),
)?)));
}
Ok(None)
@ -150,7 +150,7 @@ impl QueryExecutor {
return Ok(Some(ThingIterator::UniqueEqual(UniqueEqualThingIterator::new(
opt,
io.ix(),
io.value(),
io.array(),
)?)));
}
Ok(None)

View file

@ -6,7 +6,7 @@ use crate::idx::ft::{FtIndex, HitsIterator};
use crate::key;
use crate::kvs::Key;
use crate::sql::statements::DefineIndexStatement;
use crate::sql::{Array, Thing, Value};
use crate::sql::{Array, Thing};
pub(crate) enum ThingIterator {
NonUniqueEqual(NonUniqueEqualThingIterator),
@ -37,11 +37,10 @@ impl NonUniqueEqualThingIterator {
pub(super) fn new(
opt: &Options,
ix: &DefineIndexStatement,
v: &Value,
v: &Array,
) -> Result<NonUniqueEqualThingIterator, Error> {
let v = Array::from(v.clone());
let (beg, end) =
key::index::Index::range_all_ids(opt.ns(), opt.db(), &ix.what, &ix.name, &v);
key::index::Index::range_all_ids(opt.ns(), opt.db(), &ix.what, &ix.name, v);
Ok(Self {
beg,
end,
@ -70,9 +69,8 @@ pub(crate) struct UniqueEqualThingIterator {
}
impl UniqueEqualThingIterator {
pub(super) fn new(opt: &Options, ix: &DefineIndexStatement, v: &Value) -> Result<Self, Error> {
let v = Array::from(v.clone());
let key = key::index::Index::new(opt.ns(), opt.db(), &ix.what, &ix.name, v, None).into();
pub(super) fn new(opt: &Options, ix: &DefineIndexStatement, a: &Array) -> Result<Self, Error> {
let key = key::index::Index::new(opt.ns(), opt.db(), &ix.what, &ix.name, a, None).into();
Ok(Self {
key: Some(key),
})

View file

@ -3,7 +3,7 @@ use crate::idx::ft::MatchRef;
use crate::idx::planner::tree::Node;
use crate::sql::statements::DefineIndexStatement;
use crate::sql::with::With;
use crate::sql::Object;
use crate::sql::{Array, Object};
use crate::sql::{Expression, Idiom, Operator, Value};
use std::collections::HashMap;
use std::hash::Hash;
@ -127,7 +127,7 @@ pub(crate) struct IndexOption(Arc<Inner>);
pub(super) struct Inner {
ix: DefineIndexStatement,
id: Idiom,
v: Value,
a: Array,
qs: Option<String>,
op: Operator,
mr: Option<MatchRef>,
@ -138,7 +138,7 @@ impl IndexOption {
ix: DefineIndexStatement,
id: Idiom,
op: Operator,
v: Value,
a: Array,
qs: Option<String>,
mr: Option<MatchRef>,
) -> Self {
@ -146,7 +146,7 @@ impl IndexOption {
ix,
id,
op,
v,
a,
qs,
mr,
}))
@ -160,8 +160,8 @@ impl IndexOption {
&self.0.op
}
pub(super) fn value(&self) -> &Value {
&self.0.v
pub(super) fn array(&self) -> &Array {
&self.0.a
}
pub(super) fn qs(&self) -> Option<&String> {
@ -177,10 +177,15 @@ impl IndexOption {
}
pub(crate) fn explain(&self) -> Value {
let v = if self.0.a.len() == 1 {
self.0.a[0].clone()
} else {
Value::Array(self.0.a.clone())
};
Value::Object(Object::from(HashMap::from([
("index", Value::from(self.ix().name.0.to_owned())),
("operator", Value::from(self.op().to_string())),
("value", self.value().clone()),
("value", v),
])))
}
}
@ -189,7 +194,7 @@ impl IndexOption {
mod tests {
use crate::idx::planner::plan::IndexOption;
use crate::sql::statements::DefineIndexStatement;
use crate::sql::{Idiom, Operator, Value};
use crate::sql::{Array, Idiom, Operator};
use std::collections::HashSet;
#[test]
@ -199,7 +204,7 @@ mod tests {
DefineIndexStatement::default(),
Idiom::from("a.b".to_string()),
Operator::Equal,
Value::from("test"),
Array::from(vec!["test"]),
None,
None,
);
@ -208,7 +213,7 @@ mod tests {
DefineIndexStatement::default(),
Idiom::from("a.b".to_string()),
Operator::Equal,
Value::from("test"),
Array::from(vec!["test"]),
None,
None,
);

View file

@ -4,7 +4,7 @@ use crate::err::Error;
use crate::idx::planner::plan::IndexOption;
use crate::sql::index::Index;
use crate::sql::statements::DefineIndexStatement;
use crate::sql::{Cond, Expression, Idiom, Operator, Subquery, Table, Value};
use crate::sql::{Array, Cond, Expression, Idiom, Operator, Subquery, Table, Value};
use async_recursion::async_recursion;
use std::collections::HashMap;
use std::sync::Arc;
@ -156,7 +156,14 @@ impl<'a> TreeBuilder<'a> {
}
};
if found {
let io = IndexOption::new(ix.clone(), id.clone(), op.to_owned(), v.clone(), qs, mr);
let io = IndexOption::new(
ix.clone(),
id.clone(),
op.to_owned(),
Array::from(v.clone()),
qs,
mr,
);
self.index_map.0.insert(e.clone(), io.clone());
return Some(io);
}

View file

@ -16,6 +16,7 @@ use crate::sql::array::Array;
use crate::sql::id::Id;
use derive::Key;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::ops::Range;
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Key)]
@ -61,11 +62,11 @@ struct PrefixIds<'a> {
_d: u8,
pub ix: &'a str,
_e: u8,
pub fd: Array,
pub fd: Cow<'a, Array>,
}
impl<'a> PrefixIds<'a> {
fn new(ns: &'a str, db: &'a str, tb: &'a str, ix: &'a str, fd: &Array) -> Self {
fn new(ns: &'a str, db: &'a str, tb: &'a str, ix: &'a str, fd: &'a Array) -> Self {
Self {
__: b'/',
_a: b'*',
@ -77,7 +78,7 @@ impl<'a> PrefixIds<'a> {
_d: b'+',
ix,
_e: b'*',
fd: fd.to_owned(),
fd: Cow::Borrowed(fd),
}
}
}
@ -94,8 +95,8 @@ pub struct Index<'a> {
_d: u8,
pub ix: &'a str,
_e: u8,
pub fd: Array,
pub id: Option<Id>,
pub fd: Cow<'a, Array>,
pub id: Option<Cow<'a, Id>>,
}
impl<'a> Index<'a> {
@ -104,8 +105,8 @@ impl<'a> Index<'a> {
db: &'a str,
tb: &'a str,
ix: &'a str,
fd: Array,
id: Option<Id>,
fd: &'a Array,
id: Option<&'a Id>,
) -> Self {
Self {
__: b'/',
@ -118,8 +119,8 @@ impl<'a> Index<'a> {
_d: b'+',
ix,
_e: b'*',
fd,
id,
fd: Cow::Borrowed(fd),
id: id.map(Cow::Borrowed),
}
}
@ -146,14 +147,9 @@ mod tests {
fn key() {
use super::*;
#[rustfmt::skip]
let val = Index::new(
"testns",
"testdb",
"testtb",
"testix",
vec!["testfd1", "testfd2"].into(),
Some("testid".into()),
);
let fd = vec!["testfd1", "testfd2"].into();
let id = "testid".into();
let val = Index::new("testns", "testdb", "testtb", "testix", &fd, Some(&id));
let enc = Index::encode(&val).unwrap();
assert_eq!(
enc,

View file

@ -42,6 +42,7 @@ pub fn ident(i: &str) -> IResult<&str, ()> {
map(char(';'), |_| ()), // SET k = a;
map(char(','), |_| ()), // [a, b]
map(char('.'), |_| ()), // a.k
map(char('…'), |_| ()), // a…
map(char('['), |_| ()), // a[0]
map(eof, |_| ()), // SET k = a
)))(i)

View file

@ -5,13 +5,14 @@ use crate::err::Error;
use crate::sql::common::commas;
use crate::sql::error::IResult;
use crate::sql::fmt::{fmt_separated_by, Fmt};
use crate::sql::part::Next;
use crate::sql::part::{all, field, first, graph, index, last, part, start, Part};
use crate::sql::part::{flatten, Next};
use crate::sql::paths::{ID, IN, META, OUT};
use crate::sql::value::Value;
use md5::Digest;
use md5::Md5;
use nom::branch::alt;
use nom::combinator::opt;
use nom::multi::separated_list1;
use nom::multi::{many0, many1};
use serde::{Deserialize, Serialize};
@ -181,6 +182,11 @@ impl Display for Idiom {
pub fn local(i: &str) -> IResult<&str, Idiom> {
let (i, p) = first(i)?;
let (i, mut v) = many0(alt((all, index, field)))(i)?;
// Flatten is only allowed at the end
let (i, flat) = opt(flatten)(i)?;
if let Some(p) = flat {
v.push(p);
}
v.insert(0, p);
Ok((i, Idiom::from(v)))
}
@ -375,11 +381,11 @@ mod tests {
#[test]
fn idiom_start_param_local_field() {
let sql = "$test.temporary[0].embedded";
let sql = "$test.temporary[0].embedded";
let res = idiom(sql);
assert!(res.is_ok());
let out = res.unwrap().1;
assert_eq!("$test.temporary[0].embedded", format!("{}", out));
assert_eq!("$test.temporary[0].embedded", format!("{}", out));
assert_eq!(
out,
Idiom(vec![
@ -387,6 +393,7 @@ mod tests {
Part::from("temporary"),
Part::Index(Number::Int(0)),
Part::from("embedded"),
Part::Flatten,
])
);
}

View file

@ -22,6 +22,7 @@ use std::str;
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Hash)]
pub enum Part {
All,
Flatten,
Last,
First,
Field(Ident),
@ -112,6 +113,7 @@ impl fmt::Display for Part {
Part::First => f.write_str("[0]"),
Part::Start(v) => write!(f, "{v}"),
Part::Field(v) => write!(f, ".{v}"),
Part::Flatten => f.write_str(""),
Part::Index(v) => write!(f, "[{v}]"),
Part::Where(v) => write!(f, "[WHERE {v}]"),
Part::Graph(v) => write!(f, "{v}"),
@ -139,7 +141,7 @@ impl<'a> Next<'a> for &'a [Part] {
// ------------------------------
pub fn part(i: &str) -> IResult<&str, Part> {
alt((all, last, index, field, value, graph, filter))(i)
alt((all, flatten, last, index, field, value, graph, filter))(i)
}
pub fn first(i: &str) -> IResult<&str, Part> {
@ -180,6 +182,12 @@ pub fn index(i: &str) -> IResult<&str, Part> {
Ok((i, Part::Index(v)))
}
pub fn flatten(i: &str) -> IResult<&str, Part> {
let (i, _) = alt((tag(""), tag("...")))(i)?;
let (i, _) = ending(i)?;
Ok((i, Part::Flatten))
}
pub fn field(i: &str) -> IResult<&str, Part> {
let (i, _) = char('.')(i)?;
let (i, v) = ident::ident(i)?;
@ -244,6 +252,26 @@ mod tests {
assert_eq!(out, Part::Last);
}
#[test]
fn part_flatten() {
let sql = "...";
let res = part(sql);
assert!(res.is_ok());
let out = res.unwrap().1;
assert_eq!("", format!("{}", out));
assert_eq!(out, Part::Flatten);
}
#[test]
fn part_flatten_ellipsis() {
let sql = "";
let res = part(sql);
assert!(res.is_ok());
let out = res.unwrap().1;
assert_eq!("", format!("{}", out));
assert_eq!(out, Part::Flatten);
}
#[test]
fn part_number() {
let sql = "[0]";

View file

@ -103,7 +103,7 @@ impl Value {
// Current value at path is an array
Value::Array(v) => match p {
// Current path is an `*` part
Part::All => {
Part::All | Part::Flatten => {
let path = path.next();
let futs = v.iter().map(|v| v.get(ctx, opt, txn, doc, path));
try_join_all_buffered(futs).await.map(Into::into)
@ -222,8 +222,14 @@ impl Value {
},
}
}
v => {
if matches!(p, Part::Flatten) {
v.get(ctx, opt, txn, None, path.next()).await
} else {
// Ignore everything else
_ => Ok(Value::None),
Ok(Value::None)
}
}
},
// No more parts so get the value
None => Ok(self.clone()),

View file

@ -26,11 +26,6 @@ impl Value {
},
// Current path part is an array
Value::Array(v) => match p {
Part::All => v
.iter()
.enumerate()
.flat_map(|(i, v)| v._walk(path.next(), prev.clone().push(Part::from(i))))
.collect::<Vec<_>>(),
Part::First => match v.first() {
Some(v) => v._walk(path.next(), prev.push(p.clone())),
None => vec![],

View file

@ -103,7 +103,7 @@ async fn create_with_id() -> Result<(), Error> {
}
#[tokio::test]
async fn create_on_non_values_with_unique_index() -> Result<(), Error> {
async fn create_on_none_values_with_unique_index() -> Result<(), Error> {
let sql = "
DEFINE INDEX national_id_idx ON foo FIELDS national_id UNIQUE;
CREATE foo SET name = 'John Doe';
@ -121,6 +121,119 @@ async fn create_on_non_values_with_unique_index() -> Result<(), Error> {
Ok(())
}
#[tokio::test]
async fn create_with_unique_index_with_two_flattened_fields() -> Result<(), Error> {
let sql = "
DEFINE INDEX test ON user FIELDS account, tags, emails... UNIQUE;
CREATE user:1 SET account = 'Apple', tags = ['one', 'two'], emails = ['a@example.com', 'b@example.com'];
CREATE user:2 SET account = 'Apple', tags = ['two', 'three'], emails = ['a@example.com', 'b@example.com'];
CREATE user:3 SET account = 'Apple', tags = ['one', 'two'], emails = ['a@example.com', 'b@example.com'];
CREATE user:4 SET account = 'Apple', tags = ['two', 'three'], emails = ['a@example.com', 'b@example.com'];
";
let dbs = Datastore::new("memory").await?;
let ses = Session::owner().with_ns("test").with_db("test");
let res = &mut dbs.execute(sql, &ses, None).await?;
assert_eq!(res.len(), 5);
//
for _ in 0..3 {
let _ = res.remove(0).result?;
}
//
let tmp = res.remove(0).result;
if let Err(e) = tmp {
assert_eq!(e.to_string(), "Database index `test` already contains ['Apple', ['one', 'two'], ['a@example.com', 'b@example.com']], with record `user:3`");
} else {
panic!("An error was expected.")
}
//
let tmp = res.remove(0).result;
if let Err(e) = tmp {
assert_eq!(e.to_string(), "Database index `test` already contains ['Apple', ['two', 'three'], ['a@example.com', 'b@example.com']], with record `user:4`");
} else {
panic!("An error was expected.")
}
Ok(())
}
#[tokio::test]
async fn create_with_unique_index_with_one_flattened_field() -> Result<(), Error> {
let sql = "
DEFINE INDEX test ON user FIELDS account, tags, emails... UNIQUE;
CREATE user:1 SET account = 'Apple', tags = ['one', 'two'], emails = ['a@example.com', 'b@example.com'];
CREATE user:2 SET account = 'Apple', tags = ['two', 'three'], emails = ['a@example.com', 'b@example.com'];
";
let dbs = Datastore::new("memory").await?;
let ses = Session::owner().with_ns("test").with_db("test");
let res = &mut dbs.execute(sql, &ses, None).await?;
assert_eq!(res.len(), 3);
//
for _ in 0..2 {
let _ = res.remove(0).result?;
}
//
let tmp = res.remove(0).result;
if let Err(e) = tmp {
assert_eq!(e.to_string(), "Database index `test` already contains ['Apple', 'two', ['a@example.com', 'b@example.com']], with record `user:2`");
} else {
panic!("An error was expected.")
}
Ok(())
}
#[tokio::test]
async fn create_with_unique_index_on_one_field_with_flattened_sub_values() -> Result<(), Error> {
let sql = "
DEFINE INDEX test ON user FIELDS account, tags, emails.*.value UNIQUE;
CREATE user:1 SET account = 'Apple', tags = ['one', 'two'], emails = [ { value:'a@example.com'} , { value:'b@example.com' } ];
CREATE user:2 SET account = 'Apple', tags = ['two', 'three'], emails = [ { value:'a@example.com'} , { value:'b@example.com' } ];
";
let dbs = Datastore::new("memory").await?;
let ses = Session::owner().with_ns("test").with_db("test");
let res = &mut dbs.execute(sql, &ses, None).await?;
assert_eq!(res.len(), 3);
//
for _ in 0..2 {
let _ = res.remove(0).result?;
}
//
let tmp = res.remove(0).result;
if let Err(e) = tmp {
assert_eq!(e.to_string(), "Database index `test` already contains ['Apple', 'two', ['a@example.com', 'b@example.com']], with record `user:2`");
} else {
panic!("An error was expected.")
}
Ok(())
}
#[tokio::test]
async fn create_with_unique_index_on_two_fields() -> Result<(), Error> {
let sql = "
DEFINE INDEX test ON user FIELDS account, tags, emails UNIQUE;
CREATE user:1 SET account = 'Apple', tags = ['one', 'two'], emails = ['a@example.com', 'b@example.com'];
CREATE user:2 SET account = 'Apple', tags = ['two', 'one'], emails = ['b@example.com', 'c@example.com'];
";
let dbs = Datastore::new("memory").await?;
let ses = Session::owner().with_ns("test").with_db("test");
let res = &mut dbs.execute(sql, &ses, None).await?;
assert_eq!(res.len(), 3);
//
for _ in 0..2 {
let _ = res.remove(0).result?;
}
let tmp = res.remove(0).result;
//
if let Err(e) = tmp {
assert_eq!(e.to_string(), "Database index `test` already contains ['Apple', 'two', 'b@example.com'], with record `user:2`");
} else {
panic!("An error was expected.")
}
Ok(())
}
//
// Permissions
//

View file

@ -938,7 +938,6 @@ async fn define_statement_index_multiple_unique_existing() -> Result<(), Error>
}
#[tokio::test]
#[ignore]
async fn define_statement_index_single_unique_embedded_multiple() -> Result<(), Error> {
let sql = "
DEFINE INDEX test ON user FIELDS tags UNIQUE;
@ -974,16 +973,19 @@ async fn define_statement_index_single_unique_embedded_multiple() -> Result<(),
assert_eq!(tmp, val);
//
let tmp = res.remove(0).result;
assert!(matches!(
tmp.err(),
Some(e) if e.to_string() == "Database index `test` already contains `user:2`"
));
if let Err(e) = tmp {
assert_eq!(
e.to_string(),
"Database index `test` already contains 'two', with record `user:2`"
);
} else {
panic!("An error was expected.")
}
//
Ok(())
}
#[tokio::test]
#[ignore]
async fn define_statement_index_multiple_unique_embedded_multiple() -> Result<(), Error> {
let sql = "
DEFINE INDEX test ON user FIELDS account, tags UNIQUE;
@ -1021,20 +1023,28 @@ async fn define_statement_index_multiple_unique_embedded_multiple() -> Result<()
assert_eq!(tmp, val);
//
let tmp = res.remove(0).result?;
let val = Value::parse("[{ id: user:1, account: 'tesla', tags: ['one', 'two'] }]");
let val = Value::parse("[{ id: user:2, account: 'tesla', tags: ['one', 'two'] }]");
assert_eq!(tmp, val);
//
let tmp = res.remove(0).result;
assert!(matches!(
tmp.err(),
Some(e) if e.to_string() == "Database index `test` already contains `user:3`"
));
if let Err(e) = tmp {
assert_eq!(
e.to_string(),
"Database index `test` already contains ['apple', 'two'], with record `user:3`"
);
} else {
panic!("An error was expected.")
}
//
let tmp = res.remove(0).result;
assert!(matches!(
tmp.err(),
Some(e) if e.to_string() == "Database index `test` already contains `user:4`"
));
if let Err(e) = tmp {
assert_eq!(
e.to_string(),
"Database index `test` already contains ['tesla', 'two'], with record `user:4`"
);
} else {
panic!("An error was expected.")
}
//
Ok(())
}

View file

@ -171,6 +171,76 @@ async fn select_where_matches_using_index_and_arrays_with_parallel() -> Result<(
select_where_matches_using_index_and_arrays(true).await
}
async fn select_where_matches_using_index_and_objects(parallel: bool) -> Result<(), Error> {
let p = if parallel {
"PARALLEL"
} else {
""
};
let sql = format!(
r"
CREATE blog:1 SET content = {{ 'title':'Hello World!', 'content':'Be Bop', 'tags': ['Foo', 'Bãr'] }};
DEFINE ANALYZER simple TOKENIZERS blank,class;
DEFINE INDEX blog_content ON blog FIELDS content SEARCH ANALYZER simple BM25 HIGHLIGHTS;
SELECT id FROM blog WHERE content @1@ 'Hello Bãr' {p} EXPLAIN;
SELECT id, search::highlight('<em>', '</em>', 1) AS content FROM blog WHERE content @1@ 'Hello Bãr' {p};
"
);
let dbs = Datastore::new("memory").await?;
let ses = Session::owner().with_ns("test").with_db("test");
let res = &mut dbs.execute(&sql, &ses, None).await?;
assert_eq!(res.len(), 5);
//
let _ = res.remove(0).result?;
let _ = res.remove(0).result?;
let _ = res.remove(0).result?;
//
let tmp = res.remove(0).result?;
let val = Value::parse(
"[
{
detail: {
plan: {
index: 'blog_content',
operator: '@1@',
value: 'Hello Bãr'
},
table: 'blog',
},
operation: 'Iterate Index'
}
]",
);
assert_eq!(tmp, val);
//
let tmp = res.remove(0).result?;
let val = Value::parse(
"[
{
id: blog:1,
content: [
'Be Bop',
'Foo',
'<em>Bãr</em>',
'<em>Hello</em> World!'
]
}
]",
);
assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
Ok(())
}
#[tokio::test]
async fn select_where_matches_using_index_and_objects_non_parallel() -> Result<(), Error> {
select_where_matches_using_index_and_objects(false).await
}
#[tokio::test]
async fn select_where_matches_using_index_and_objects_with_parallel() -> Result<(), Error> {
select_where_matches_using_index_and_objects(true).await
}
#[tokio::test]
async fn select_where_matches_using_index_offsets() -> Result<(), Error> {
let sql = r"