Feature - Search highlighting (#2132)

This commit is contained in:
Emmanuel Keller 2023-06-21 19:31:15 +01:00 committed by GitHub
parent 000a594ad5
commit 8e64750a60
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
44 changed files with 1229 additions and 347 deletions

View file

@ -58,8 +58,8 @@ async-trait = "0.1.68"
async-recursion = "1.0.4"
base64_lib = { version = "0.21.1", package = "base64" }
bcrypt = "0.14.0"
bung = "0.1.0"
bincode = "1.3.3"
bung = "0.1.0"
channel = { version = "1.8.0", package = "async-channel" }
chrono = { version = "0.4.24", features = ["serde"] }
derive = { version = "0.8.0", package = "surrealdb-derive" }

View file

@ -152,7 +152,9 @@ impl<'a> Context<'a> {
self.deadline.map(|v| v.saturating_duration_since(Instant::now()))
}
pub fn clone_transaction(&self) -> Result<Transaction, Error> {
/// Returns a transaction if any.
/// Otherwise it fails by returning a Error::NoTx error.
pub fn try_clone_transaction(&self) -> Result<Transaction, Error> {
match &self.transaction {
None => Err(Error::NoTx),
Some(txn) => Ok(txn.clone()),

View file

@ -23,7 +23,7 @@ impl Iterable {
) -> Result<(), Error> {
if ctx.is_ok() {
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
match self {
Iterable::Value(v) => {
// Pass the value through

View file

@ -64,7 +64,7 @@ impl Iterable {
ite: &mut Iterator,
) -> Result<(), Error> {
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Check that the table exists
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &v.tb, opt.strict).await?;
// Fetch the data from the store
@ -92,7 +92,7 @@ impl Iterable {
ite: &mut Iterator,
) -> Result<(), Error> {
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Check that the table exists
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &v.tb, opt.strict).await?;
// Fetch the data from the store
@ -123,7 +123,7 @@ impl Iterable {
ite: &mut Iterator,
) -> Result<(), Error> {
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Check that the table exists
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &v.tb, opt.strict).await?;
// Fetch the data from the store
@ -152,7 +152,7 @@ impl Iterable {
ite: &mut Iterator,
) -> Result<(), Error> {
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Check that the table exists
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &v, opt.strict).await?;
// Prepare the start and end keys
@ -220,7 +220,7 @@ impl Iterable {
ite: &mut Iterator,
) -> Result<(), Error> {
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Check that the table exists
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &v.tb, opt.strict).await?;
// Prepare the range start key
@ -387,13 +387,13 @@ impl Iterable {
None => {
let min = beg.clone();
let max = end.clone();
ctx.clone_transaction()?.lock().await.scan(min..max, 1000).await?
ctx.try_clone_transaction()?.lock().await.scan(min..max, 1000).await?
}
Some(ref mut beg) => {
beg.push(0x00);
let min = beg.clone();
let max = end.clone();
ctx.clone_transaction()?.lock().await.scan(min..max, 1000).await?
ctx.try_clone_transaction()?.lock().await.scan(min..max, 1000).await?
}
};
// If there are key-value entries then fetch them
@ -418,7 +418,7 @@ impl Iterable {
let gra: crate::key::graph::Graph = (&k).into();
// Fetch the data from the store
let key = thing::new(opt.ns(), opt.db(), gra.ft, &gra.fk);
let val = ctx.clone_transaction()?.lock().await.get(key).await?;
let val = ctx.try_clone_transaction()?.lock().await.get(key).await?;
let rid = Thing::from((gra.ft, gra.fk));
let mut ctx = Context::new(ctx);
ctx.add_thing(&rid);
@ -446,7 +446,7 @@ impl Iterable {
plan: Plan,
ite: &mut Iterator,
) -> Result<(), Error> {
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Check that the table exists
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &table.0, opt.strict).await?;
let mut iterator = plan.new_iterator(opt, &txn).await?;

View file

@ -17,7 +17,7 @@ impl<'a> Document<'a> {
// Should we run permissions checks?
if opt.perms && opt.auth.perms() {
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Get the table
let tb = self.tb(opt, &txn).await?;
// Get the permission clause

View file

@ -13,7 +13,7 @@ impl<'a> Document<'a> {
_stm: &Statement<'_>,
) -> Result<(), Error> {
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Get the table
let tb = self.tb(opt, &txn).await?;
// This table is schemafull

View file

@ -18,7 +18,7 @@ impl<'a> Document<'a> {
_stm: &Statement<'_>,
) -> Result<(), Error> {
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Check if the table is a view
if self.tb(opt, &txn).await?.drop {
return Ok(());

View file

@ -24,7 +24,7 @@ impl<'a> Document<'a> {
// Don't run permissions
let opt = &opt.perms(false);
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Loop through all event statements
for ev in self.ev(opt, &txn).await?.iter() {
// Get the event action

View file

@ -22,7 +22,7 @@ impl<'a> Document<'a> {
// Get the user applied input
let inp = self.initial.changed(self.current.as_ref());
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Loop through all field statements
for fd in self.fd(opt, &txn).await?.iter() {
// Loop over each field in document

View file

@ -28,7 +28,7 @@ impl<'a> Document<'a> {
return Ok(());
}
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Check if the table is a view
if self.tb(opt, &txn).await?.drop {
return Ok(());
@ -49,7 +49,7 @@ impl<'a> Document<'a> {
let mut run = txn.lock().await;
// Store all the variable and parameters required by the index operation
let mut ic = IndexOperation::new(opt, ix, o, n, rid);
let ic = IndexOperation::new(opt, ix, o, n, rid);
// Index operation dispatching
match &ix.index {
@ -60,12 +60,7 @@ impl<'a> Document<'a> {
sc,
hl,
order,
} => match sc {
Scoring::Bm {
..
} => ic.index_best_matching_search(&mut run, az, *order, *hl).await?,
Scoring::Vs => ic.index_vector_search(az, *hl).await?,
},
} => ic.index_full_text(&mut run, az, *order, sc, *hl).await?,
};
}
}
@ -182,16 +177,17 @@ impl<'a> IndexOperation<'a> {
})
}
async fn index_best_matching_search(
async fn index_full_text(
&self,
run: &mut kvs::Transaction,
az: &Ident,
order: u32,
_hl: bool,
scoring: &Scoring,
hl: bool,
) -> Result<(), Error> {
let ikb = IndexKeyBase::new(self.opt, self.ix);
let az = run.get_az(self.opt.ns(), self.opt.db(), az.as_str()).await?;
let mut ft = FtIndex::new(run, az, ikb, order).await?;
let mut ft = FtIndex::new(run, az, ikb, order, scoring, hl).await?;
if let Some(n) = &self.n {
// TODO: Apply the analyzer
ft.index_document(run, self.rid, n).await
@ -199,10 +195,4 @@ impl<'a> IndexOperation<'a> {
ft.remove_document(run, self.rid).await
}
}
async fn index_vector_search(&mut self, _az: &Ident, _hl: bool) -> Result<(), Error> {
Err(Error::FeatureNotYetImplemented {
feature: "VectorSearch indexing",
})
}
}

View file

@ -19,7 +19,7 @@ impl<'a> Document<'a> {
return Ok(());
}
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Get the record id
let id = self.id.as_ref().unwrap();
// Loop through all index statements

View file

@ -82,7 +82,7 @@ impl<'a> Document<'a> {
// Should we run permissions checks?
if opt.perms && opt.auth.perms() {
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Loop through all field statements
for fd in self.fd(opt, &txn).await?.iter() {
// Loop over each field in document

View file

@ -24,7 +24,7 @@ impl<'a> Document<'a> {
return Ok(());
}
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Check if the table is a view
if self.tb(opt, &txn).await?.drop {
return Ok(());

View file

@ -16,7 +16,7 @@ impl<'a> Document<'a> {
return Ok(());
}
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Check if the table is a view
if self.tb(opt, &txn).await?.drop {
return Ok(());

View file

@ -56,7 +56,7 @@ impl<'a> Document<'a> {
Action::Update
};
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Loop through all foreign table statements
for ft in self.ft(opt, &txn).await?.iter() {
// Get the table definition

View file

@ -20,6 +20,7 @@ pub mod operate;
pub mod parse;
pub mod rand;
pub mod script;
pub mod search;
pub mod session;
pub mod sleep;
pub mod string;
@ -30,6 +31,7 @@ pub mod util;
/// Attempts to run any function
pub async fn run(ctx: &Context<'_>, name: &str, args: Vec<Value>) -> Result<Value, Error> {
if name.eq("sleep")
|| name.starts_with("search")
|| name.starts_with("http")
|| name.starts_with("crypto::argon2")
|| name.starts_with("crypto::bcrypt")
@ -300,6 +302,9 @@ pub async fn asynchronous(ctx: &Context<'_>, name: &str, args: Vec<Value>) -> Re
"http::patch" => http::patch(ctx).await,
"http::delete" => http::delete(ctx).await,
//
"search::highlight" => search::highlight(ctx).await,
"search::offsets" => search::offsets(ctx).await,
//
"sleep" => sleep::sleep(ctx).await,
)
}

View file

@ -169,7 +169,7 @@ pub(crate) async fn matches(ctx: &Context<'_>, e: &Expression) -> Result<Value,
if let Some(thg) = ctx.thing() {
if let Some(exe) = ctx.get_query_executor(&thg.tb) {
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Check the matches
return exe.matches(&txn, thg, e).await;
}

View file

@ -17,6 +17,7 @@ mod math;
mod meta;
mod parse;
mod rand;
mod search;
mod session;
mod string;
mod time;
@ -42,6 +43,7 @@ impl_module_def!(
"parse" => (parse::Package),
"rand" => (rand::Package),
"array" => (array::Package),
"search" => (search::Package),
"session" => (session::Package),
"sleep" => fut Async,
"string" => (string::Package),

View file

@ -0,0 +1,12 @@
use super::fut;
use crate::fnc::script::modules::impl_module_def;
use js::prelude::Async;
pub struct Package;
impl_module_def!(
Package,
"search",
"highlight" => fut Async,
"offsets" => fut Async
);

28
lib/src/fnc/search.rs Normal file
View file

@ -0,0 +1,28 @@
use crate::ctx::Context;
use crate::err::Error;
use crate::sql::Value;
pub async fn highlight(
ctx: &Context<'_>,
(prefix, suffix, match_ref): (Value, Value, Value),
) -> Result<Value, Error> {
if let Some(doc) = ctx.doc() {
if let Some(thg) = ctx.thing() {
if let Some(exe) = ctx.get_query_executor(&thg.tb) {
let txn = ctx.try_clone_transaction()?;
return exe.highlight(txn, thg, prefix, suffix, match_ref.clone(), doc).await;
}
}
}
Ok(Value::None)
}
pub async fn offsets(ctx: &Context<'_>, (match_ref,): (Value,)) -> Result<Value, Error> {
if let Some(thg) = ctx.thing() {
if let Some(exe) = ctx.get_query_executor(&thg.tb) {
let txn = ctx.try_clone_transaction()?;
return exe.offsets(txn, thg, match_ref.clone()).await;
}
}
Ok(Value::None)
}

View file

@ -179,7 +179,7 @@ pub(super) enum Term {
#[cfg(test)]
mod tests {
use crate::idx::ft::analyzer::tests::test_analyser;
use crate::idx::ft::analyzer::tests::test_analyzer;
#[test]
fn test_arabic_stemmer() {
@ -189,17 +189,17 @@ mod tests {
"كلاب", "تحب", "الجر", "في", "حديق", "لكن", "كلب", "صغير", "يفضل", "نوم", "في", "سرير",
"بدل", "من", "الجر",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(arabic);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ar);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ara);",
input,
&output,
@ -233,17 +233,17 @@ mod tests {
"løb",
".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(danish);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(dan);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(da);",
input,
&output,
@ -257,17 +257,17 @@ mod tests {
"hond", "houd", "ervan", "om", "in", "het", "park", "te", "renn", ",", "mar", "mijn",
"klein", "hond", "slaapt", "liever", "in", "zijn", "mand", "dan", "te", "renn", ".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(dutch);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(nl);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(nld);",
input,
&output,
@ -281,17 +281,17 @@ mod tests {
"teacher", "are", "often", "teach", ",", "but", "my", "favorit", "teacher", "prefer",
"read", "in", "her", "spare", "time", "rather", "than", "teach", ".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(english);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(eng);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(en);",
input,
&output,
@ -305,17 +305,17 @@ mod tests {
"le", "chien", "adorent", "cour", "dan", "le", "parc", ",", "mais", "mon", "pet",
"chien", "aim", "plutôt", "se", "blott", "sur", "le", "canap", "que", "de", "cour",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(french);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(fr);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(fra);",
input,
&output,
@ -330,17 +330,17 @@ mod tests {
"hund", "zieht", "es", "vor", ",", "auf", "dem", "sofa", "zu", "schlaf", ",", "statt",
"zu", "lauf", ".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(german);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(de);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(deu);",
input,
&output,
@ -375,17 +375,17 @@ mod tests {
"τρεχ",
".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(greek);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ell);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(el);",
input,
&output,
@ -399,17 +399,17 @@ mod tests {
"a", "kutya", "szeret", "futn", "a", "par", ",", "de", "az", "én", "kics", "kutya",
"inkább", "alsz", "a", "kosar", ",", "mints", "fu", ".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(hungarian);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(hu);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(hun);",
input,
&output,
@ -423,17 +423,17 @@ mod tests {
"i", "can", "aman", "corr", "nel", "parc", ",", "ma", "il", "mio", "piccol", "can",
"prefer", "dorm", "nel", "suo", "cest", "piuttost", "che", "corr", ".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(italian);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(it);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ita);",
input,
&output,
@ -467,17 +467,17 @@ mod tests {
"løp",
".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(norwegian);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(no);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(nor);",
input,
&output,
@ -491,17 +491,17 @@ mod tests {
"os", "", "ador", "corr", "no", "parqu", ",", "mas", "o", "meu", "pequen", "",
"prefer", "dorm", "na", "sua", "cam", "em", "vez", "de", "corr", ".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(portuguese);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(pt);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(por);",
input,
&output,
@ -515,17 +515,17 @@ mod tests {
"câin", "ador", "", "alerg", "în", "parc", ",", "dar", "cățel", "meu", "prefer",
"", "doarm", "în", "coș", "lui", "decât", "", "alerg", ".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(romanian);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ro);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ron);",
input,
&output,
@ -557,17 +557,17 @@ mod tests {
"бега",
".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(russian);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ru);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(rus);",
input,
&output,
@ -581,17 +581,17 @@ mod tests {
"los", "perr", "aman", "corr", "en", "el", "parqu", ",", "per", "mi", "pequeñ", "perr",
"prefier", "dorm", "en", "su", "cam", "en", "lug", "de", "corr", ".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(spanish);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(es);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(spa);",
input,
&output,
@ -625,17 +625,17 @@ mod tests {
"spring",
".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(swedish);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(sv);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(swe);",
input,
&output,
@ -672,17 +672,17 @@ mod tests {
"லை",
".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(tamil);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ta);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(tam);",
input,
&output,
@ -696,17 +696,17 @@ mod tests {
"köpek", "park", "koşma", "sever", ",", "am", "be", "küçük", "köpek", "koşmak",
"yatak", "uyuma", "tercih", "eder", ".",
];
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(turkish);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(tr);",
input,
&output,
);
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(tur);",
input,
&output,
@ -715,7 +715,7 @@ mod tests {
#[test]
fn test_ngram() {
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS lowercase,ngram(2,3);",
"Ālea iacta est",
&["āl", "āle", "le", "lea", "ia", "iac", "ac", "act", "ct", "cta", "es", "est"],
@ -724,7 +724,7 @@ mod tests {
#[test]
fn test_edgengram() {
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS lowercase,edgengram(2,3);",
"Ālea iacta est",
&["āl", "āle", "ia", "iac", "es", "est"],

View file

@ -1,12 +1,13 @@
use crate::err::Error;
use crate::idx::ft::analyzer::tokenizer::{Tokenizer, Tokens};
use crate::idx::ft::doclength::DocLength;
use crate::idx::ft::offsets::{Offset, OffsetRecords};
use crate::idx::ft::postings::TermFrequency;
use crate::idx::ft::terms::{TermId, Terms};
use crate::kvs::Transaction;
use crate::sql::statements::DefineAnalyzerStatement;
use crate::sql::tokenizer::Tokenizer as SqlTokenizer;
use crate::sql::Array;
use crate::sql::{Array, Value};
use filter::Filter;
use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
@ -40,46 +41,47 @@ impl Analyzer {
t: &Terms,
tx: &mut Transaction,
query_string: String,
) -> Result<Vec<Option<TermId>>, Error> {
let tokens = self.analyse(query_string);
) -> Result<(Vec<TermId>, bool), Error> {
let tokens = self.analyze(query_string);
// We first collect every unique terms
// as it can contains duplicates
let mut terms = HashSet::new();
for token in tokens.list() {
terms.insert(token);
}
let mut missing = false;
// Now we can extract the term ids
let mut res = Vec::with_capacity(terms.len());
for term in terms {
let term_id = t.get_term_id(tx, tokens.get_token_string(term)).await?;
res.push(term_id);
if let Some(term_id) = t.get_term_id(tx, tokens.get_token_string(term)).await? {
res.push(term_id);
} else {
missing = false;
}
}
Ok(res)
Ok((res, missing))
}
/// This method is used for indexing.
/// It will create new term ids for non already existing terms.
pub(super) async fn extract_terms_with_frequencies(
&self,
t: &mut Terms,
terms: &mut Terms,
tx: &mut Transaction,
field_content: &Array,
) -> Result<(DocLength, Vec<(TermId, TermFrequency)>), Error> {
let mut doc_length = 0;
let mut dl = 0;
// Let's first collect all the inputs, and collect the tokens.
// We need to store them because everything after is zero-copy
let mut inputs = Vec::with_capacity(field_content.0.len());
for v in &field_content.0 {
let input = v.to_owned().convert_to_string()?;
let tks = self.analyse(input);
inputs.push(tks);
}
let mut inputs = vec![];
self.analyze_content(field_content, &mut inputs)?;
// We then collect every unique terms and count the frequency
let mut terms = HashMap::new();
for tokens in &inputs {
for token in tokens.list() {
doc_length += 1;
match terms.entry(tokens.get_token_string(token)) {
let mut tf: HashMap<&str, TermFrequency> = HashMap::new();
for tks in &inputs {
for tk in tks.list() {
dl += 1;
let s = tks.get_token_string(tk);
match tf.entry(s) {
Entry::Vacant(e) => {
e.insert(1);
}
@ -89,15 +91,76 @@ impl Analyzer {
}
}
}
// Now we can extract the term ids
let mut res = Vec::with_capacity(terms.len());
for (term, freq) in terms {
res.push((t.resolve_term_id(tx, term).await?, freq));
// Now we can resolve the term ids
let mut tfid = Vec::with_capacity(tf.len());
for (t, f) in tf {
tfid.push((terms.resolve_term_id(tx, t).await?, f));
}
Ok((doc_length, res))
Ok((dl, tfid))
}
fn analyse(&self, input: String) -> Tokens {
/// This method is used for indexing.
/// It will create new term ids for non already existing terms.
pub(super) async fn extract_terms_with_frequencies_with_offsets(
&self,
terms: &mut Terms,
tx: &mut Transaction,
field_content: &Array,
) -> Result<(DocLength, Vec<(TermId, TermFrequency)>, Vec<(TermId, OffsetRecords)>), Error> {
let mut dl = 0;
// Let's first collect all the inputs, and collect the tokens.
// We need to store them because everything after is zero-copy
let mut inputs = Vec::with_capacity(field_content.len());
self.analyze_content(field_content, &mut inputs)?;
// We then collect every unique terms and count the frequency and extract the offsets
let mut tfos: HashMap<&str, Vec<Offset>> = HashMap::new();
for (i, tks) in inputs.iter().enumerate() {
for tk in tks.list() {
dl += 1;
let s = tks.get_token_string(tk);
let o = tk.new_offset(i as u32);
match tfos.entry(s) {
Entry::Vacant(e) => {
e.insert(vec![o]);
}
Entry::Occupied(mut e) => e.get_mut().push(o),
}
}
}
// Now we can resolve the term ids
let mut tfid = Vec::with_capacity(tfos.len());
let mut osid = Vec::with_capacity(tfos.len());
for (t, o) in tfos {
let id = terms.resolve_term_id(tx, t).await?;
tfid.push((id, o.len() as TermFrequency));
osid.push((id, OffsetRecords(o)));
}
Ok((dl, tfid, osid))
}
fn analyze_content(&self, field_content: &Array, tks: &mut Vec<Tokens>) -> Result<(), Error> {
for v in &field_content.0 {
self.analyze_value(v, tks);
}
Ok(())
}
fn analyze_value(&self, val: &Value, tks: &mut Vec<Tokens>) {
match val {
Value::Strand(s) => tks.push(self.analyze(s.0.clone())),
Value::Number(n) => tks.push(self.analyze(n.to_string())),
Value::Bool(b) => tks.push(self.analyze(b.to_string())),
Value::Array(a) => {
for v in &a.0 {
self.analyze_value(v, tks);
}
}
_ => {}
}
}
fn analyze(&self, input: String) -> Tokens {
if let Some(t) = &self.t {
if !input.is_empty() {
let t = Tokenizer::tokenize(t, input);
@ -113,11 +176,11 @@ mod tests {
use super::Analyzer;
use crate::sql::statements::define::analyzer;
pub(super) fn test_analyser(def: &str, input: &str, expected: &[&str]) {
pub(super) fn test_analyzer(def: &str, input: &str, expected: &[&str]) {
let (_, az) = analyzer(def).unwrap();
let a: Analyzer = az.into();
let tokens = a.analyse(input.to_string());
let tokens = a.analyze(input.to_string());
let mut res = vec![];
for t in tokens.list() {
res.push(tokens.get_token_string(t));

View file

@ -1,4 +1,5 @@
use crate::idx::ft::analyzer::filter::{Filter, FilterResult, Term};
use crate::idx::ft::offsets::{Offset, Position};
use crate::sql::tokenizer::Tokenizer as SqlTokenizer;
pub(super) struct Tokens {
@ -35,18 +36,19 @@ impl Tokens {
match fr {
FilterResult::Term(t) => match t {
Term::Unchanged => tks.push(tk),
Term::NewTerm(s) => tks.push(Token::String(s)),
Term::NewTerm(s) => tks.push(tk.new_token(s)),
},
FilterResult::Terms(ts) => {
let mut tk = Some(tk);
let mut already_pushed = false;
for t in ts {
match t {
Term::Unchanged => {
if let Some(tk) = tk.take() {
tks.push(tk)
if !already_pushed {
tks.push(tk.clone());
already_pushed = true;
}
}
Term::NewTerm(s) => tks.push(Token::String(s)),
Term::NewTerm(s) => tks.push(tk.new_token(s)),
}
}
}
@ -66,22 +68,36 @@ impl Tokens {
#[derive(Clone, Debug, PartialOrd, PartialEq, Eq, Ord, Hash)]
pub(super) enum Token {
Ref(usize, usize),
String(String),
Ref(Position, Position),
String(Position, Position, String),
}
impl Token {
fn new_token(&self, t: String) -> Self {
match self {
Token::Ref(s, e) => Token::String(*s, *e, t),
Token::String(s, e, _) => Token::String(*s, *e, t),
}
}
pub(super) fn new_offset(&self, i: u32) -> Offset {
match self {
Token::Ref(s, e) => Offset::new(i, *s, *e),
Token::String(s, e, _) => Offset::new(i, *s, *e),
}
}
fn is_empty(&self) -> bool {
match self {
Token::Ref(start, end) => start == end,
Token::String(s) => s.is_empty(),
Token::String(_, _, s) => s.is_empty(),
}
}
pub(super) fn get_str<'a>(&'a self, i: &'a str) -> &str {
match self {
Token::Ref(s, e) => &i[*s..*e],
Token::String(s) => s,
Token::Ref(s, e) => &i[(*s as usize)..(*e as usize)],
Token::String(_, _, s) => s,
}
}
}
@ -128,10 +144,10 @@ impl Tokenizer {
// If the character is not valid for indexing (space, control...)
// Then we increase the last position to the next character
if !is_valid {
last_pos += c.len_utf8();
last_pos += c.len_utf8() as Position;
}
}
current_pos += c.len_utf8();
current_pos += c.len_utf8() as Position;
}
if current_pos != last_pos {
t.push(Token::Ref(last_pos, current_pos));
@ -229,11 +245,11 @@ impl Splitter {
#[cfg(test)]
mod tests {
use crate::idx::ft::analyzer::tests::test_analyser;
use crate::idx::ft::analyzer::tests::test_analyzer;
#[test]
fn test_tokenize_blank_class() {
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS lowercase",
"Abc12345xYZ DL1809 item123456 978-3-16-148410-0 1HGCM82633A123456",
&[
@ -245,7 +261,7 @@ mod tests {
#[test]
fn test_tokenize_source_code() {
test_analyser(
test_analyzer(
"DEFINE ANALYZER test TOKENIZERS blank,class,camel,punct FILTERS lowercase",
r#"struct MyRectangle {
// specified by corners

View file

@ -0,0 +1,141 @@
use crate::err::Error;
use crate::idx::ft::offsets::{Offset, Position};
use crate::sql::{Array, Idiom, Object, Value};
use std::collections::btree_map::Entry as BEntry;
use std::collections::hash_map::Entry as HEntry;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::default::Default;
pub(super) struct Highlighter {
prefix: Vec<char>,
suffix: Vec<char>,
fields: Vec<(Idiom, Value)>,
offseter: Offseter,
}
impl Highlighter {
pub(super) fn new(prefix: Value, suffix: Value, idiom: &Idiom, doc: &Value) -> Self {
let prefix = prefix.to_raw_string().chars().collect();
let suffix = suffix.to_raw_string().chars().collect();
// Extract the fields we want to highlight
let fields = doc.walk(idiom);
Self {
fields,
prefix,
suffix,
offseter: Offseter::default(),
}
}
pub(super) fn highlight(&mut self, os: Vec<Offset>) {
self.offseter.highlight(os);
}
fn extract(val: Value, vals: &mut Vec<Option<String>>) {
match val {
Value::Strand(s) => vals.push(Some(s.0)),
Value::Number(n) => vals.push(Some(n.to_string())),
Value::Bool(b) => vals.push(Some(b.to_string())),
Value::Array(a) => {
for v in a.0 {
Self::extract(v, vals);
}
}
_ => vals.push(None),
}
}
}
impl TryFrom<Highlighter> for Value {
type Error = Error;
fn try_from(hl: Highlighter) -> Result<Self, Error> {
if hl.fields.is_empty() {
return Ok(Self::None);
}
let mut vals = vec![];
for (_, f) in hl.fields {
Highlighter::extract(f, &mut vals);
}
let mut res = Vec::with_capacity(vals.len());
for (idx, val) in vals.into_iter().enumerate() {
let idx = idx as u32;
if let Some(v) = val {
if let Some(m) = hl.offseter.offsets.get(&idx) {
let mut v: Vec<char> = v.chars().collect();
let mut d = 0;
for (s, e) in m {
let p = (*s as usize) + d;
v.splice(p..p, hl.prefix.clone());
d += hl.prefix.len();
let p = (*e as usize) + d;
v.splice(p..p, hl.suffix.clone());
d += hl.suffix.len();
}
let s: String = v.iter().collect();
res.push(Value::from(s));
} else {
res.push(Value::from(v));
}
}
}
Ok(match res.len() {
0 => Value::None,
1 => res.remove(0),
_ => Value::from(res),
})
}
}
#[derive(Default)]
pub(super) struct Offseter {
offsets: HashMap<u32, BTreeMap<Position, Position>>,
}
impl Offseter {
pub(super) fn highlight(&mut self, os: Vec<Offset>) {
for o in os {
match self.offsets.entry(o.index) {
HEntry::Occupied(mut e) => match e.get_mut().entry(o.start) {
BEntry::Vacant(e) => {
e.insert(o.end);
}
BEntry::Occupied(mut e) => {
if o.end.gt(e.get()) {
e.insert(o.end);
}
}
},
HEntry::Vacant(e) => {
e.insert(BTreeMap::from([(o.start, o.end)]));
}
}
}
}
}
impl TryFrom<Offseter> for Value {
type Error = Error;
fn try_from(or: Offseter) -> Result<Self, Error> {
if or.offsets.is_empty() {
return Ok(Self::None);
}
let mut res = BTreeMap::default();
for (idx, offsets) in or.offsets {
let mut r = Vec::with_capacity(offsets.len());
for (s, e) in offsets {
let mut o = BTreeMap::default();
o.insert("s".to_string(), Value::from(s));
o.insert("e".to_string(), Value::from(e));
r.push(Value::Object(Object::from(o)));
}
res.insert(idx.to_string(), Value::Array(Array::from(r)));
}
Ok(match res.len() {
0 => Value::None,
_ => Value::from(Object::from(res)),
})
}
}

View file

@ -1,6 +1,8 @@
pub(crate) mod analyzer;
pub(crate) mod docids;
mod doclength;
mod highlighter;
mod offsets;
mod postings;
mod scorer;
mod termdocs;
@ -10,26 +12,32 @@ use crate::err::Error;
use crate::idx::ft::analyzer::Analyzer;
use crate::idx::ft::docids::DocIds;
use crate::idx::ft::doclength::DocLengths;
use crate::idx::ft::highlighter::{Highlighter, Offseter};
use crate::idx::ft::offsets::Offsets;
use crate::idx::ft::postings::Postings;
use crate::idx::ft::scorer::{BM25Scorer, Score};
use crate::idx::ft::termdocs::TermDocs;
use crate::idx::ft::terms::{TermId, Terms};
use crate::idx::{btree, IndexKeyBase, SerdeState};
use crate::kvs::{Key, Transaction};
use crate::sql::scoring::Scoring;
use crate::sql::statements::DefineAnalyzerStatement;
use crate::sql::{Array, Object, Thing, Value};
use crate::sql::{Array, Idiom, Object, Thing, Value};
use roaring::treemap::IntoIter;
use roaring::RoaringTreemap;
use serde::{Deserialize, Serialize};
use std::ops::BitAnd;
pub(crate) type MatchRef = u8;
pub(crate) struct FtIndex {
analyzer: Analyzer,
state_key: Key,
index_key_base: IndexKeyBase,
state: State,
bm25: Bm25Params,
btree_default_order: u32,
bm25: Option<Bm25Params>,
order: u32,
highlighting: bool,
}
#[derive(Clone)]
@ -78,7 +86,9 @@ impl FtIndex {
tx: &mut Transaction,
az: DefineAnalyzerStatement,
index_key_base: IndexKeyBase,
btree_default_order: u32,
order: u32,
scoring: &Scoring,
hl: bool,
) -> Result<Self, Error> {
let state_key: Key = index_key_base.new_bs_key();
let state: State = if let Some(val) = tx.get(state_key.clone()).await? {
@ -86,22 +96,34 @@ impl FtIndex {
} else {
State::default()
};
let mut bm25 = None;
if let Scoring::Bm {
k1,
b,
} = scoring
{
bm25 = Some(Bm25Params {
k1: *k1,
b: *b,
});
}
Ok(Self {
state,
state_key,
index_key_base,
bm25: Bm25Params::default(),
btree_default_order,
bm25,
order,
highlighting: hl,
analyzer: az.into(),
})
}
async fn doc_ids(&self, tx: &mut Transaction) -> Result<DocIds, Error> {
DocIds::new(tx, self.index_key_base.clone(), self.btree_default_order).await
DocIds::new(tx, self.index_key_base.clone(), self.order).await
}
async fn terms(&self, tx: &mut Transaction) -> Result<Terms, Error> {
Terms::new(tx, self.index_key_base.clone(), self.btree_default_order).await
Terms::new(tx, self.index_key_base.clone(), self.order).await
}
fn term_docs(&self) -> TermDocs {
@ -109,11 +131,15 @@ impl FtIndex {
}
async fn doc_lengths(&self, tx: &mut Transaction) -> Result<DocLengths, Error> {
DocLengths::new(tx, self.index_key_base.clone(), self.btree_default_order).await
DocLengths::new(tx, self.index_key_base.clone(), self.order).await
}
async fn postings(&self, tx: &mut Transaction) -> Result<Postings, Error> {
Postings::new(tx, self.index_key_base.clone(), self.btree_default_order).await
Postings::new(tx, self.index_key_base.clone(), self.order).await
}
fn offsets(&self) -> Offsets {
Offsets::new(self.index_key_base.clone())
}
pub(crate) async fn remove_document(
@ -140,7 +166,7 @@ impl FtIndex {
let mut p = self.postings(tx).await?;
let mut t = self.terms(tx).await?;
let td = self.term_docs();
for term_id in term_list {
for term_id in &term_list {
p.remove_posting(tx, term_id, doc_id).await?;
// if the term is not present in any document in the index, we can remove it
let doc_count = td.remove_doc(tx, term_id, doc_id).await?;
@ -148,6 +174,14 @@ impl FtIndex {
t.remove_term_id(tx, term_id).await?;
}
}
// Remove the offsets if any
if self.highlighting {
let o = self.offsets();
for term_id in term_list {
// TODO?: Removal can be done with a prefix on doc_id
o.remove_offsets(tx, doc_id, term_id).await?;
}
}
t.finish(tx).await?;
p.finish(tx).await?;
}
@ -168,10 +202,19 @@ impl FtIndex {
let resolved = d.resolve_doc_id(tx, rid.into()).await?;
let doc_id = *resolved.doc_id();
// Extract the doc_lengths, terms en frequencies
// Extract the doc_lengths, terms en frequencies (and offset)
let mut t = self.terms(tx).await?;
let (doc_length, terms_and_frequencies) =
self.analyzer.extract_terms_with_frequencies(&mut t, tx, field_content).await?;
let (doc_length, terms_and_frequencies, offsets) = if self.highlighting {
let (dl, tf, ofs) = self
.analyzer
.extract_terms_with_frequencies_with_offsets(&mut t, tx, field_content)
.await?;
(dl, tf, Some(ofs))
} else {
let (dl, tf) =
self.analyzer.extract_terms_with_frequencies(&mut t, tx, field_content).await?;
(dl, tf, None)
};
// Set the doc length
let mut l = self.doc_lengths(tx).await?;
@ -204,7 +247,7 @@ impl FtIndex {
}
// Remove any remaining postings
if let Some(old_term_ids) = old_term_ids {
if let Some(old_term_ids) = &old_term_ids {
for old_term_id in old_term_ids {
p.remove_posting(tx, old_term_id, doc_id).await?;
let doc_count = term_docs.remove_doc(tx, old_term_id, doc_id).await?;
@ -215,6 +258,24 @@ impl FtIndex {
}
}
if self.highlighting {
let o = self.offsets();
// Set the offset if any
if let Some(ofs) = offsets {
if !ofs.is_empty() {
for (tid, or) in ofs {
o.set_offsets(tx, doc_id, tid, or).await?;
}
}
}
// In case of an update, w remove the offset for the terms that does not exist anymore
if let Some(old_term_ids) = old_term_ids {
for old_term_id in old_term_ids {
o.remove_offsets(tx, doc_id, old_term_id).await?;
}
}
}
// Stores the term list for this doc_id
tx.set(term_ids_key, terms_ids.try_to_val()?).await?;
@ -233,52 +294,63 @@ impl FtIndex {
Ok(())
}
pub(super) async fn extract_terms(
&self,
tx: &mut Transaction,
query_string: String,
) -> Result<Vec<TermId>, Error> {
let t = self.terms(tx).await?;
let (terms, _) = self.analyzer.extract_terms(&t, tx, query_string).await?;
Ok(terms)
}
pub(super) async fn search(
&self,
tx: &mut Transaction,
query_string: String,
) -> Result<Option<HitsIterator>, Error> {
) -> Result<(Vec<TermId>, Option<HitsIterator>), Error> {
let t = self.terms(tx).await?;
let td = self.term_docs();
let terms = self.analyzer.extract_terms(&t, tx, query_string).await?;
let (terms, missing) = self.analyzer.extract_terms(&t, tx, query_string).await?;
if missing {
// If any term does not exists, as we are doing an AND query,
// we can return an empty results set
return Ok((terms, None));
}
let mut hits: Option<RoaringTreemap> = None;
let mut terms_docs = Vec::with_capacity(terms.len());
for term_id in terms {
if let Some(term_id) = term_id {
if let Some(term_docs) = td.get_docs(tx, term_id).await? {
if let Some(h) = hits {
hits = Some(h.bitand(&term_docs));
} else {
hits = Some(term_docs.clone());
}
terms_docs.push((term_id, term_docs));
continue;
for term_id in &terms {
if let Some(term_docs) = td.get_docs(tx, *term_id).await? {
if let Some(h) = hits {
hits = Some(h.bitand(&term_docs));
} else {
hits = Some(term_docs.clone());
}
terms_docs.push((*term_id, term_docs));
}
return Ok(None);
}
if let Some(hits) = hits {
if !hits.is_empty() {
let postings = self.postings(tx).await?;
let doc_lengths = self.doc_lengths(tx).await?;
// TODO: Scoring should be optional
let scorer = BM25Scorer::new(
doc_lengths,
self.state.total_docs_lengths,
self.state.doc_count,
self.bm25.clone(),
);
let mut scorer = None;
if let Some(bm25) = &self.bm25 {
scorer = Some(BM25Scorer::new(
doc_lengths,
self.state.total_docs_lengths,
self.state.doc_count,
bm25.clone(),
));
}
let doc_ids = self.doc_ids(tx).await?;
return Ok(Some(HitsIterator::new(
doc_ids,
postings,
hits,
terms_docs,
Some(scorer),
)));
return Ok((
terms,
Some(HitsIterator::new(doc_ids, postings, hits, terms_docs, scorer)),
));
}
}
Ok(None)
Ok((terms, None))
}
pub(super) async fn match_id_value(
@ -303,6 +375,55 @@ impl FtIndex {
Ok(false)
}
#[allow(clippy::too_many_arguments)]
pub(super) async fn highlight(
&self,
tx: &mut Transaction,
thg: &Thing,
terms: &Vec<TermId>,
prefix: Value,
suffix: Value,
idiom: &Idiom,
doc: &Value,
) -> Result<Value, Error> {
let doc_key: Key = thg.into();
let doc_ids = self.doc_ids(tx).await?;
if let Some(doc_id) = doc_ids.get_doc_id(tx, doc_key).await? {
let o = self.offsets();
let mut hl = Highlighter::new(prefix, suffix, idiom, doc);
for term_id in terms {
let o = o.get_offsets(tx, doc_id, *term_id).await?;
if let Some(o) = o {
hl.highlight(o.0);
}
}
return hl.try_into();
}
Ok(Value::None)
}
pub(super) async fn extract_offsets(
&self,
tx: &mut Transaction,
thg: &Thing,
terms: &Vec<TermId>,
) -> Result<Value, Error> {
let doc_key: Key = thg.into();
let doc_ids = self.doc_ids(tx).await?;
if let Some(doc_id) = doc_ids.get_doc_id(tx, doc_key).await? {
let o = self.offsets();
let mut or = Offseter::default();
for term_id in terms {
let o = o.get_offsets(tx, doc_id, *term_id).await?;
if let Some(o) = o {
or.highlight(o.0);
}
}
return or.try_into();
}
Ok(Value::None)
}
pub(crate) async fn statistics(&self, tx: &mut Transaction) -> Result<Statistics, Error> {
// TODO do parallel execution
Ok(Statistics {
@ -372,6 +493,7 @@ mod tests {
use crate::idx::ft::{FtIndex, HitsIterator, Score};
use crate::idx::IndexKeyBase;
use crate::kvs::{Datastore, Transaction};
use crate::sql::scoring::Scoring;
use crate::sql::statements::define::analyzer;
use crate::sql::{Array, Thing};
use std::collections::HashMap;
@ -410,10 +532,16 @@ mod tests {
{
// Add one document
let mut tx = ds.transaction(true, false).await.unwrap();
let mut fti =
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
.await
.unwrap();
let mut fti = FtIndex::new(
&mut tx,
az.clone(),
IndexKeyBase::default(),
default_btree_order,
&Scoring::default(),
false,
)
.await
.unwrap();
fti.index_document(&mut tx, &doc1, &Array::from(vec!["hello the world"]))
.await
.unwrap();
@ -423,10 +551,16 @@ mod tests {
{
// Add two documents
let mut tx = ds.transaction(true, false).await.unwrap();
let mut fti =
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
.await
.unwrap();
let mut fti = FtIndex::new(
&mut tx,
az.clone(),
IndexKeyBase::default(),
default_btree_order,
&Scoring::default(),
false,
)
.await
.unwrap();
fti.index_document(&mut tx, &doc2, &Array::from(vec!["a yellow hello"])).await.unwrap();
fti.index_document(&mut tx, &doc3, &Array::from(vec!["foo bar"])).await.unwrap();
tx.commit().await.unwrap();
@ -434,10 +568,16 @@ mod tests {
{
let mut tx = ds.transaction(true, false).await.unwrap();
let fti =
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
.await
.unwrap();
let fti = FtIndex::new(
&mut tx,
az.clone(),
IndexKeyBase::default(),
default_btree_order,
&Scoring::default(),
false,
)
.await
.unwrap();
// Check the statistics
let statistics = fti.statistics(&mut tx).await.unwrap();
@ -447,72 +587,85 @@ mod tests {
assert_eq!(statistics.doc_lengths.keys_count, 3);
// Search & score
let i = fti.search(&mut tx, "hello".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "hello".to_string()).await.unwrap();
check_hits(i, &mut tx, vec![(&doc1, Some(0.0)), (&doc2, Some(0.0))]).await;
let i = fti.search(&mut tx, "world".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "world".to_string()).await.unwrap();
check_hits(i, &mut tx, vec![(&doc1, Some(0.4859746))]).await;
let i = fti.search(&mut tx, "yellow".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "yellow".to_string()).await.unwrap();
check_hits(i, &mut tx, vec![(&doc2, Some(0.4859746))]).await;
let i = fti.search(&mut tx, "foo".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "foo".to_string()).await.unwrap();
check_hits(i, &mut tx, vec![(&doc3, Some(0.56902087))]).await;
let i = fti.search(&mut tx, "bar".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "bar".to_string()).await.unwrap();
check_hits(i, &mut tx, vec![(&doc3, Some(0.56902087))]).await;
let i = fti.search(&mut tx, "dummy".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "dummy".to_string()).await.unwrap();
assert!(i.is_none());
}
{
// Reindex one document
let mut tx = ds.transaction(true, false).await.unwrap();
let mut fti =
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
.await
.unwrap();
let mut fti = FtIndex::new(
&mut tx,
az.clone(),
IndexKeyBase::default(),
default_btree_order,
&Scoring::default(),
false,
)
.await
.unwrap();
fti.index_document(&mut tx, &doc3, &Array::from(vec!["nobar foo"])).await.unwrap();
tx.commit().await.unwrap();
// We can still find 'foo'
let mut tx = ds.transaction(false, false).await.unwrap();
let i = fti.search(&mut tx, "foo".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "foo".to_string()).await.unwrap();
check_hits(i, &mut tx, vec![(&doc3, Some(0.56902087))]).await;
// We can't anymore find 'bar'
let i = fti.search(&mut tx, "bar".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "bar".to_string()).await.unwrap();
assert!(i.is_none());
// We can now find 'nobar'
let i = fti.search(&mut tx, "nobar".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "nobar".to_string()).await.unwrap();
check_hits(i, &mut tx, vec![(&doc3, Some(0.56902087))]).await;
}
{
// Remove documents
let mut tx = ds.transaction(true, false).await.unwrap();
let mut fti =
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
.await
.unwrap();
let mut fti = FtIndex::new(
&mut tx,
az.clone(),
IndexKeyBase::default(),
default_btree_order,
&Scoring::default(),
false,
)
.await
.unwrap();
fti.remove_document(&mut tx, &doc1).await.unwrap();
fti.remove_document(&mut tx, &doc2).await.unwrap();
fti.remove_document(&mut tx, &doc3).await.unwrap();
tx.commit().await.unwrap();
let mut tx = ds.transaction(false, false).await.unwrap();
let i = fti.search(&mut tx, "hello".to_string()).await.unwrap();
assert!(i.is_none());
let (v, h) = fti.search(&mut tx, "hello".to_string()).await.unwrap();
assert!(v.is_empty());
assert!(h.is_none());
let i = fti.search(&mut tx, "foo".to_string()).await.unwrap();
assert!(i.is_none());
let (v, h) = fti.search(&mut tx, "foo".to_string()).await.unwrap();
assert!(v.is_empty());
assert!(h.is_none());
}
}
#[test(tokio::test)]
async fn test_ft_index_bm_25() {
async fn test_ft_index_bm_25(hl: bool) {
// The function `extract_sorted_terms_with_frequencies` is non-deterministic.
// the inner structures (BTrees) are built with the same terms and frequencies,
// but the insertion order is different, ending up in different BTree structures.
@ -529,10 +682,16 @@ mod tests {
let default_btree_order = 5;
{
let mut tx = ds.transaction(true, false).await.unwrap();
let mut fti =
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
.await
.unwrap();
let mut fti = FtIndex::new(
&mut tx,
az.clone(),
IndexKeyBase::default(),
default_btree_order,
&Scoring::default(),
hl,
)
.await
.unwrap();
fti.index_document(
&mut tx,
&doc1,
@ -566,10 +725,16 @@ mod tests {
{
let mut tx = ds.transaction(true, false).await.unwrap();
let fti =
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
.await
.unwrap();
let fti = FtIndex::new(
&mut tx,
az.clone(),
IndexKeyBase::default(),
default_btree_order,
&Scoring::default(),
hl,
)
.await
.unwrap();
let statistics = fti.statistics(&mut tx).await.unwrap();
assert_eq!(statistics.terms.keys_count, 17);
@ -577,7 +742,7 @@ mod tests {
assert_eq!(statistics.doc_ids.keys_count, 4);
assert_eq!(statistics.doc_lengths.keys_count, 4);
let i = fti.search(&mut tx, "the".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "the".to_string()).await.unwrap();
check_hits(
i,
&mut tx,
@ -590,7 +755,7 @@ mod tests {
)
.await;
let i = fti.search(&mut tx, "dog".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "dog".to_string()).await.unwrap();
check_hits(
i,
&mut tx,
@ -598,27 +763,37 @@ mod tests {
)
.await;
let i = fti.search(&mut tx, "fox".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "fox".to_string()).await.unwrap();
check_hits(i, &mut tx, vec![(&doc1, Some(0.0)), (&doc2, Some(0.0))]).await;
let i = fti.search(&mut tx, "over".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "over".to_string()).await.unwrap();
check_hits(i, &mut tx, vec![(&doc1, Some(0.0)), (&doc2, Some(0.0))]).await;
let i = fti.search(&mut tx, "lazy".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "lazy".to_string()).await.unwrap();
check_hits(i, &mut tx, vec![(&doc1, Some(0.0)), (&doc2, Some(0.0))]).await;
let i = fti.search(&mut tx, "jumped".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "jumped".to_string()).await.unwrap();
check_hits(i, &mut tx, vec![(&doc1, Some(0.0)), (&doc2, Some(0.0))]).await;
let i = fti.search(&mut tx, "nothing".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "nothing".to_string()).await.unwrap();
check_hits(i, &mut tx, vec![(&doc3, Some(0.87105393))]).await;
let i = fti.search(&mut tx, "animals".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "animals".to_string()).await.unwrap();
check_hits(i, &mut tx, vec![(&doc4, Some(0.92279965))]).await;
let i = fti.search(&mut tx, "dummy".to_string()).await.unwrap();
let (_, i) = fti.search(&mut tx, "dummy".to_string()).await.unwrap();
assert!(i.is_none());
}
}
}
#[test(tokio::test)]
async fn test_ft_index_bm_25_without_highlighting() {
test_ft_index_bm_25(false).await;
}
#[test(tokio::test)]
async fn test_ft_index_bm_25_with_highlighting() {
test_ft_index_bm_25(true).await;
}
}

156
lib/src/idx/ft/offsets.rs Normal file
View file

@ -0,0 +1,156 @@
use crate::err::Error;
use crate::idx::ft::docids::DocId;
use crate::idx::ft::terms::TermId;
use crate::idx::IndexKeyBase;
use crate::kvs::{Transaction, Val};
pub(super) type Position = u32;
pub(super) struct Offsets {
index_key_base: IndexKeyBase,
}
impl Offsets {
pub(super) fn new(index_key_base: IndexKeyBase) -> Self {
Self {
index_key_base,
}
}
pub(super) async fn set_offsets(
&self,
tx: &mut Transaction,
doc_id: DocId,
term_id: TermId,
offsets: OffsetRecords,
) -> Result<(), Error> {
let key = self.index_key_base.new_bo_key(doc_id, term_id);
let val: Val = offsets.try_into()?;
tx.set(key, val).await?;
Ok(())
}
pub(super) async fn get_offsets(
&self,
tx: &mut Transaction,
doc_id: DocId,
term_id: TermId,
) -> Result<Option<OffsetRecords>, Error> {
let key = self.index_key_base.new_bo_key(doc_id, term_id);
if let Some(val) = tx.get(key).await? {
let offsets = val.try_into()?;
Ok(Some(offsets))
} else {
Ok(None)
}
}
pub(super) async fn remove_offsets(
&self,
tx: &mut Transaction,
doc_id: DocId,
term_id: TermId,
) -> Result<(), Error> {
let key = self.index_key_base.new_bo_key(doc_id, term_id);
tx.del(key).await
}
}
#[derive(Clone, Debug, PartialEq)]
pub(super) struct Offset {
pub(super) index: u32,
pub(super) start: Position,
pub(super) end: Position,
}
impl Offset {
pub(super) fn new(index: u32, start: Position, end: Position) -> Self {
Self {
index,
start,
end,
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub(super) struct OffsetRecords(pub(super) Vec<Offset>);
impl TryFrom<OffsetRecords> for Val {
type Error = Error;
fn try_from(offsets: OffsetRecords) -> Result<Self, Self::Error> {
// We build a unique vector with every values (start and offset).
let mut decompressed = Vec::new();
// The first push the size of the index,
// so we can rebuild the OffsetsRecord on deserialization.
decompressed.push(offsets.0.len() as u32);
// We want the value to be more or less sorted so the RLE compression
// will be more effective
// Indexes are likely to be very small
for o in &offsets.0 {
decompressed.push(o.index);
}
// `starts` and `offsets` are likely to be ascending
for o in &offsets.0 {
decompressed.push(o.start);
decompressed.push(o.end);
}
Ok(bincode::serialize(&decompressed)?)
}
}
impl TryFrom<Val> for OffsetRecords {
type Error = Error;
fn try_from(val: Val) -> Result<Self, Self::Error> {
if val.is_empty() {
return Ok(Self(vec![]));
}
let decompressed: Vec<u32> = bincode::deserialize(&val)?;
let mut iter = decompressed.iter();
let s = *iter.next().ok_or(Error::CorruptedIndex)?;
let mut indexes = Vec::with_capacity(s as usize);
for _ in 0..s {
let index = *iter.next().ok_or(Error::CorruptedIndex)?;
indexes.push(index);
}
let mut res = Vec::with_capacity(s as usize);
for index in indexes {
let start = *iter.next().ok_or(Error::CorruptedIndex)?;
let end = *iter.next().ok_or(Error::CorruptedIndex)?;
res.push(Offset::new(index, start, end));
}
Ok(OffsetRecords(res))
}
}
#[cfg(test)]
mod tests {
use crate::idx::ft::offsets::{Offset, OffsetRecords};
use crate::kvs::Val;
#[test]
fn test_offset_records() {
let o = OffsetRecords(vec![
Offset {
index: 0,
start: 1,
end: 2,
},
Offset {
index: 0,
start: 11,
end: 22,
},
Offset {
index: 1,
start: 3,
end: 4,
},
]);
let v: Val = o.clone().try_into().unwrap();
let o2 = v.try_into().unwrap();
assert_eq!(o, o2)
}
}

View file

@ -18,7 +18,7 @@ impl Postings {
pub(super) async fn new(
tx: &mut Transaction,
index_key_base: IndexKeyBase,
default_btree_order: u32,
order: u32,
) -> Result<Self, Error> {
let keys = PostingsKeyProvider {
index_key_base: index_key_base.clone(),
@ -27,7 +27,7 @@ impl Postings {
let state: btree::State = if let Some(val) = tx.get(state_key.clone()).await? {
btree::State::try_from_val(val)?
} else {
btree::State::new(default_btree_order)
btree::State::new(order)
};
Ok(Self {
state_key,

View file

@ -14,6 +14,7 @@ use crate::key::bf::Bf;
use crate::key::bi::Bi;
use crate::key::bk::Bk;
use crate::key::bl::Bl;
use crate::key::bo::Bo;
use crate::key::bp::Bp;
use crate::key::bs::Bs;
use crate::key::bt::Bt;
@ -105,6 +106,18 @@ impl IndexKeyBase {
.into()
}
fn new_bo_key(&self, doc_id: DocId, term_id: TermId) -> Key {
Bo::new(
self.inner.ns.as_str(),
self.inner.db.as_str(),
self.inner.tb.as_str(),
self.inner.ix.as_str(),
doc_id,
term_id,
)
.into()
}
fn new_bp_key(&self, node_id: Option<NodeId>) -> Key {
Bp::new(
self.inner.ns.as_str(),

View file

@ -1,11 +1,13 @@
use crate::dbs::{Options, Transaction};
use crate::err::Error;
use crate::idx::ft::FtIndex;
use crate::idx::ft::terms::TermId;
use crate::idx::ft::{FtIndex, MatchRef};
use crate::idx::planner::plan::IndexOption;
use crate::idx::planner::tree::IndexMap;
use crate::idx::IndexKeyBase;
use crate::sql::index::Index;
use crate::sql::{Expression, Table, Thing, Value};
use std::collections::HashMap;
use crate::sql::{Expression, Idiom, Table, Thing, Value};
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
#[derive(Clone)]
@ -15,9 +17,16 @@ pub(crate) struct QueryExecutor {
struct Inner {
table: String,
index_map: IndexMap,
index: HashMap<Expression, HashSet<IndexOption>>,
pre_match: Option<Expression>,
ft_map: HashMap<String, FtIndex>,
terms: HashMap<MatchRef, IndexFieldTerms>,
}
struct IndexFieldTerms {
ix: String,
id: Idiom,
t: Vec<TermId>,
}
impl QueryExecutor {
@ -30,29 +39,45 @@ impl QueryExecutor {
) -> Result<Self, Error> {
let mut run = txn.lock().await;
let mut ft_map = HashMap::new();
for ios in index_map.values() {
for ios in index_map.index.values() {
for io in ios {
if let Index::Search {
az,
order,
..
sc,
hl,
} = &io.ix.index
{
if !ft_map.contains_key(&io.ix.name.0) {
let ikb = IndexKeyBase::new(opt, &io.ix);
let az = run.get_az(opt.ns(), opt.db(), az.as_str()).await?;
let ft = FtIndex::new(&mut run, az, ikb, *order).await?;
let ft = FtIndex::new(&mut run, az, ikb, *order, sc, *hl).await?;
ft_map.insert(io.ix.name.0.clone(), ft);
}
}
}
}
let mut terms = HashMap::with_capacity(index_map.terms.len());
for (mr, ifv) in index_map.terms {
if let Some(ft) = ft_map.get(&ifv.ix) {
let term_ids = ft.extract_terms(&mut run, ifv.val.clone()).await?;
terms.insert(
mr,
IndexFieldTerms {
ix: ifv.ix,
id: ifv.id,
t: term_ids,
},
);
}
}
Ok(Self {
inner: Arc::new(Inner {
table: table.0.clone(),
index_map,
index: index_map.index,
pre_match,
ft_map,
terms,
}),
})
}
@ -74,8 +99,9 @@ impl QueryExecutor {
// Otherwise, we look for the first possible index options, and evaluate the expression
// Does the record id match this executor's table?
// Does the record id match this executor's table?
if thg.tb.eq(&self.inner.table) {
if let Some(ios) = self.inner.index_map.get(exp) {
if let Some(ios) = self.inner.index.get(exp) {
for io in ios {
if let Some(fti) = self.inner.ft_map.get(&io.ix.name.0) {
let mut run = txn.lock().await;
@ -94,4 +120,51 @@ impl QueryExecutor {
value: exp.to_string(),
})
}
pub(crate) async fn highlight(
&self,
txn: Transaction,
thg: &Thing,
prefix: Value,
suffix: Value,
match_ref: Value,
doc: &Value,
) -> Result<Value, Error> {
let mut tx = txn.lock().await;
// We have to make the connection between the match ref from the highlight function...
if let Value::Number(n) = match_ref {
let m = n.as_int() as u8;
// ... and from the match operator (@{matchref}@)
if let Some(ift) = self.inner.terms.get(&m) {
// Check we have an index?
if let Some(ft) = self.inner.ft_map.get(&ift.ix) {
// All good, we can do the highlight
return ft.highlight(&mut tx, thg, &ift.t, prefix, suffix, &ift.id, doc).await;
}
}
}
Ok(Value::None)
}
pub(crate) async fn offsets(
&self,
txn: Transaction,
thg: &Thing,
match_ref: Value,
) -> Result<Value, Error> {
let mut tx = txn.lock().await;
// We have to make the connection between the match ref from the highlight function...
if let Value::Number(n) = match_ref {
let m = n.as_int() as u8;
// ... and from the match operator (@{matchref}@)
if let Some(ift) = self.inner.terms.get(&m) {
// Check we have an index?
if let Some(ft) = self.inner.ft_map.get(&ift.ix) {
// All good, we can extract the offsets
return ft.extract_offsets(&mut tx, thg, &ift.t).await;
}
}
}
Ok(Value::None)
}
}

View file

@ -14,6 +14,7 @@ use std::collections::HashMap;
pub(crate) struct QueryPlanner<'a> {
opt: &'a Options,
cond: &'a Option<Cond>,
/// There is one executor per table
executors: HashMap<String, QueryExecutor>,
}
@ -32,7 +33,7 @@ impl<'a> QueryPlanner<'a> {
opt: &Options,
t: Table,
) -> Result<Iterable, Error> {
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
let res = Tree::build(self.opt, &txn, &t, self.cond).await?;
if let Some((node, im)) = res {
if let Some(plan) = AllAndStrategy::build(&node)? {

View file

@ -1,8 +1,9 @@
use crate::dbs::{Options, Transaction};
use crate::err::Error;
use crate::idx::ft::{FtIndex, HitsIterator};
use crate::idx::ft::terms::TermId;
use crate::idx::ft::{FtIndex, HitsIterator, MatchRef};
use crate::idx::planner::executor::QueryExecutor;
use crate::idx::planner::tree::{IndexMap, Node};
use crate::idx::planner::tree::IndexMap;
use crate::idx::IndexKeyBase;
use crate::key;
use crate::kvs::Key;
@ -79,7 +80,7 @@ pub(super) struct IndexOption {
}
impl IndexOption {
fn new(ix: DefineIndexStatement, op: Operator, v: Value, ep: Expression) -> Self {
pub(super) fn new(ix: DefineIndexStatement, op: Operator, v: Value, ep: Expression) -> Self {
Self {
ix,
op,
@ -98,28 +99,6 @@ impl IndexOption {
QueryExecutor::new(opt, txn, t, i, Some(self.ep.clone())).await
}
pub(super) fn found(
ix: &DefineIndexStatement,
op: &Operator,
v: &Node,
ep: &Expression,
) -> Option<Self> {
if let Some(v) = v.is_scalar() {
if match ix.index {
Index::Idx => Operator::Equal.eq(op),
Index::Uniq => Operator::Equal.eq(op),
Index::Search {
..
} => {
matches!(op, Operator::Matches(_))
}
} {
return Some(IndexOption::new(ix.clone(), op.to_owned(), v.clone(), ep.clone()));
}
}
None
}
async fn new_iterator(
&self,
opt: &Options,
@ -144,8 +123,8 @@ impl IndexOption {
sc,
order,
} => match self.op {
Operator::Matches(_) => Ok(Box::new(
MatchesThingIterator::new(opt, txn, &self.ix, az, *hl, sc, *order, &self.v)
Operator::Matches(mr) => Ok(Box::new(
MatchesThingIterator::new(opt, txn, &self.ix, az, *hl, sc, *order, mr, &self.v)
.await?,
)),
_ => Err(Error::BypassQueryPlanner),
@ -221,6 +200,7 @@ impl ThingIterator for UniqueEqualThingIterator {
}
struct MatchesThingIterator {
_terms: Option<(MatchRef, Vec<TermId>)>,
hits: Option<HitsIterator>,
}
@ -231,9 +211,10 @@ impl MatchesThingIterator {
txn: &Transaction,
ix: &DefineIndexStatement,
az: &Ident,
_hl: bool,
hl: bool,
sc: &Scoring,
order: u32,
mr: Option<MatchRef>,
v: &Value,
) -> Result<Self, Error> {
let ikb = IndexKeyBase::new(opt, ix);
@ -244,10 +225,11 @@ impl MatchesThingIterator {
{
let query_string = v.clone().convert_to_string()?;
let az = run.get_az(opt.ns(), opt.db(), az.as_str()).await?;
let fti = FtIndex::new(&mut run, az, ikb, order).await?;
let hits = fti.search(&mut run, query_string).await?;
let fti = FtIndex::new(&mut run, az, ikb, order, sc, hl).await?;
let (terms, hits) = fti.search(&mut run, query_string).await?;
Ok(Self {
hits,
_terms: mr.map(|mr| (mr, terms)),
})
} else {
Err(Error::FeatureNotYetImplemented {

View file

@ -1,6 +1,8 @@
use crate::dbs::{Options, Transaction};
use crate::err::Error;
use crate::idx::ft::MatchRef;
use crate::idx::planner::plan::IndexOption;
use crate::sql::index::Index;
use crate::sql::statements::DefineIndexStatement;
use crate::sql::{Cond, Expression, Idiom, Operator, Subquery, Table, Value};
use async_recursion::async_recursion;
@ -8,9 +10,19 @@ use std::collections::hash_map::Entry;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
pub(super) struct Tree {}
#[derive(Default)]
pub(super) struct IndexMap {
pub(super) index: HashMap<Expression, HashSet<IndexOption>>,
pub(super) terms: HashMap<MatchRef, IndexFieldValue>,
}
pub(super) type IndexMap = HashMap<Expression, HashSet<IndexOption>>;
pub(super) struct IndexFieldValue {
pub(super) ix: String,
pub(super) id: Idiom,
pub(super) val: String,
}
pub(super) struct Tree {}
impl Tree {
pub(super) async fn build<'a>(
@ -80,7 +92,7 @@ impl<'a> TreeBuilder<'a> {
async fn eval_idiom(&mut self, i: &Idiom) -> Result<Node, Error> {
Ok(if let Some(ix) = self.find_index(i).await? {
Node::IndexedField(ix)
Node::IndexedField(i.to_owned(), ix)
} else {
Node::NonIndexedField
})
@ -101,18 +113,11 @@ impl<'a> TreeBuilder<'a> {
let left = self.eval_value(l).await?;
let right = self.eval_value(r).await?;
let mut index_option = None;
if let Some(ix) = left.is_indexed_field() {
if let Some(io) = IndexOption::found(ix, o, &right, e) {
index_option = Some(io.clone());
self.add_index(e, io);
}
}
if let Some(ix) = right.is_indexed_field() {
if let Some(io) = IndexOption::found(ix, o, &left, e) {
index_option = Some(io.clone());
self.add_index(e, io);
}
}
if let Some((id, ix)) = left.is_indexed_field() {
index_option = self.lookup_index_option(ix, o, id, &right, e);
} else if let Some((id, ix)) = right.is_indexed_field() {
index_option = self.lookup_index_option(ix, o, id, &left, e);
};
Ok(Node::Expression {
index_option,
left: Box::new(left),
@ -123,8 +128,48 @@ impl<'a> TreeBuilder<'a> {
}
}
fn lookup_index_option(
&mut self,
ix: &DefineIndexStatement,
op: &Operator,
id: &Idiom,
v: &Node,
ep: &Expression,
) -> Option<IndexOption> {
if let Some(v) = v.is_scalar() {
if match &ix.index {
Index::Idx => Operator::Equal.eq(op),
Index::Uniq => Operator::Equal.eq(op),
Index::Search {
..
} => {
if let Operator::Matches(mr) = op {
if let Some(mr) = mr {
self.index_map.terms.insert(
*mr,
IndexFieldValue {
ix: ix.name.0.to_owned(),
id: id.to_owned(),
val: v.to_raw_string(),
},
);
}
true
} else {
false
}
}
} {
let io = IndexOption::new(ix.clone(), op.to_owned(), v.clone(), ep.clone());
self.add_index(ep, io.clone());
return Some(io);
}
}
None
}
fn add_index(&mut self, e: &Expression, io: IndexOption) {
match self.index_map.entry(e.clone()) {
match self.index_map.index.entry(e.clone()) {
Entry::Occupied(mut e) => {
e.get_mut().insert(io);
}
@ -150,7 +195,7 @@ pub(super) enum Node {
right: Box<Node>,
operator: Operator,
},
IndexedField(DefineIndexStatement),
IndexedField(Idiom, DefineIndexStatement),
NonIndexedField,
Scalar(Value),
Unsupported,
@ -165,9 +210,9 @@ impl Node {
}
}
pub(super) fn is_indexed_field(&self) -> Option<&DefineIndexStatement> {
if let Node::IndexedField(ix) = self {
Some(ix)
pub(super) fn is_indexed_field(&self) -> Option<(&Idiom, &DefineIndexStatement)> {
if let Node::IndexedField(id, ix) = self {
Some((id, ix))
} else {
None
}

69
lib/src/key/bo.rs Normal file
View file

@ -0,0 +1,69 @@
use crate::idx::ft::docids::DocId;
use crate::idx::ft::terms::TermId;
use derive::Key;
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Key)]
pub struct Bo<'a> {
__: u8,
_a: u8,
pub ns: &'a str,
_b: u8,
pub db: &'a str,
_c: u8,
pub tb: &'a str,
_d: u8,
_e: u8,
_f: u8,
pub ix: &'a str,
_g: u8,
pub doc_id: DocId,
pub term_id: TermId,
}
impl<'a> Bo<'a> {
pub fn new(
ns: &'a str,
db: &'a str,
tb: &'a str,
ix: &'a str,
doc_id: DocId,
term_id: TermId,
) -> Self {
Self {
__: b'/',
_a: b'*',
ns,
_b: b'*',
db,
_c: b'*',
tb,
_d: b'!',
_e: b'b',
_f: b'o',
ix,
_g: b'*',
doc_id,
term_id,
}
}
}
#[cfg(test)]
mod tests {
#[test]
fn key() {
use super::*;
#[rustfmt::skip]
let val = Bo::new(
"test",
"test",
"test",
"test",
1,2
);
let enc = Bo::encode(&val).unwrap();
let dec = Bo::decode(&enc).unwrap();
assert_eq!(val, dec);
}
}

View file

@ -57,6 +57,7 @@ pub mod bf; // Stores Term/Doc frequency
pub mod bi; // Stores doc keys for doc_ids
pub mod bk; // Stores the term list for doc_ids
pub mod bl; // Stores BTree nodes for doc lengths
pub mod bo; // Stores the offsets
pub mod bp; // Stores BTree nodes for postings
pub mod bs; // Stores FullText index states
pub mod bt; // Stores BTree nodes for terms

View file

@ -148,7 +148,7 @@ impl Function {
// Get the function definition
let val = {
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Get the function definition
@ -255,6 +255,7 @@ pub(crate) fn function_names(i: &str) -> IResult<&str, &str> {
preceded(tag("meta::"), function_meta),
preceded(tag("parse::"), function_parse),
preceded(tag("rand::"), function_rand),
preceded(tag("search::"), function_search),
preceded(tag("session::"), function_session),
preceded(tag("string::"), function_string),
preceded(tag("time::"), function_time),
@ -453,6 +454,10 @@ fn function_rand(i: &str) -> IResult<&str, &str> {
))(i)
}
fn function_search(i: &str) -> IResult<&str, &str> {
alt((tag("highlight"), tag("offsets")))(i)
}
fn function_session(i: &str) -> IResult<&str, &str> {
alt((
tag("db"),

View file

@ -1,3 +1,4 @@
use crate::idx::ft::MatchRef;
use crate::sql::comment::mightbespace;
use crate::sql::comment::shouldbespace;
use crate::sql::error::IResult;
@ -38,11 +39,11 @@ pub enum Operator {
AllEqual, // *=
AnyEqual, // ?=
//
Like, // ~
NotLike, // !~
AllLike, // *~
AnyLike, // ?~
Matches(Option<u8>), // @{ref}@
Like, // ~
NotLike, // !~
AllLike, // *~
AnyLike, // ?~
Matches(Option<MatchRef>), // @{ref}@
//
LessThan, // <
LessThanOrEqual, // <=

View file

@ -60,7 +60,7 @@ impl Param {
// The param has not been set locally
None => {
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Get the param definition

View file

@ -30,7 +30,7 @@ impl AnalyzeStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Read the index
@ -42,10 +42,11 @@ impl AnalyzeStatement {
Index::Search {
az,
order,
..
sc,
hl,
} => {
let az = run.get_az(opt.ns(), opt.db(), az.as_str()).await?;
let ft = FtIndex::new(&mut run, az, ikb, *order).await?;
let ft = FtIndex::new(&mut run, az, ikb, *order, sc, *hl).await?;
ft.statistics(&mut run).await?
}
_ => {

View file

@ -135,7 +135,7 @@ impl DefineNamespaceStatement {
// Process the statement
let key = crate::key::ns::new(&self.name);
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
run.set(key, self).await?;
@ -182,7 +182,7 @@ impl DefineDatabaseStatement {
// Allowed to run?
opt.check(Level::Ns)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -234,7 +234,7 @@ impl DefineFunctionStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -312,7 +312,7 @@ impl DefineAnalyzerStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -385,7 +385,7 @@ impl DefineLoginStatement {
// Allowed to run?
opt.check(Level::Kv)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -401,7 +401,7 @@ impl DefineLoginStatement {
// Allowed to run?
opt.check(Level::Ns)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -504,7 +504,7 @@ impl DefineTokenStatement {
// Allowed to run?
opt.check(Level::Kv)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -520,7 +520,7 @@ impl DefineTokenStatement {
// Allowed to run?
opt.check(Level::Ns)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -537,7 +537,7 @@ impl DefineTokenStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -618,7 +618,7 @@ impl DefineScopeStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -733,7 +733,7 @@ impl DefineParamStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -793,7 +793,7 @@ impl DefineTableStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -972,7 +972,7 @@ impl DefineEventStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -1053,7 +1053,7 @@ impl DefineFieldStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -1210,7 +1210,7 @@ impl DefineIndexStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement

View file

@ -35,7 +35,7 @@ impl InfoStatement {
// Create the result set
let mut res = Object::default();
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the statement
@ -53,7 +53,7 @@ impl InfoStatement {
// Allowed to run?
opt.check(Level::Ns)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Create the result set
@ -85,7 +85,7 @@ impl InfoStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Create the result set
@ -141,7 +141,7 @@ impl InfoStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Create the result set
@ -161,7 +161,7 @@ impl InfoStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Create the result set

View file

@ -26,7 +26,7 @@ impl KillStatement {
// Allowed to run?
opt.check(Level::No)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Fetch the live query key

View file

@ -40,7 +40,7 @@ impl LiveStatement {
// Allowed to run?
opt.check(Level::No)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Process the live query table

View file

@ -111,7 +111,7 @@ impl RemoveNamespaceStatement {
// Allowed to run?
opt.check(Level::Kv)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -163,7 +163,7 @@ impl RemoveDatabaseStatement {
// Allowed to run?
opt.check(Level::Ns)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -215,7 +215,7 @@ impl RemoveFunctionStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -270,7 +270,7 @@ impl RemoveAnalyzerStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -323,7 +323,7 @@ impl RemoveLoginStatement {
// Allowed to run?
opt.check(Level::Kv)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -338,7 +338,7 @@ impl RemoveLoginStatement {
// Allowed to run?
opt.check(Level::Ns)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -398,7 +398,7 @@ impl RemoveTokenStatement {
// Allowed to run?
opt.check(Level::Kv)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -413,7 +413,7 @@ impl RemoveTokenStatement {
// Allowed to run?
opt.check(Level::Ns)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -428,7 +428,7 @@ impl RemoveTokenStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -485,7 +485,7 @@ impl RemoveScopeStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -537,7 +537,7 @@ impl RemoveParamStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -587,7 +587,7 @@ impl RemoveTableStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -640,7 +640,7 @@ impl RemoveEventStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -699,7 +699,7 @@ impl RemoveFieldStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition
@ -759,7 +759,7 @@ impl RemoveIndexStatement {
// Allowed to run?
opt.check(Level::Db)?;
// Clone transaction
let txn = ctx.clone_transaction()?;
let txn = ctx.try_clone_transaction()?;
// Claim transaction
let mut run = txn.lock().await;
// Delete the definition

View file

@ -10,8 +10,8 @@ async fn select_where_matches_using_index() -> Result<(), Error> {
let sql = r"
CREATE blog:1 SET title = 'Hello World!';
DEFINE ANALYZER simple TOKENIZERS blank,class;
DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75);
SELECT id,title FROM blog WHERE title @@ 'Hello' EXPLAIN;
DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
SELECT id, search::highlight('<em>', '</em>', 1) AS title FROM blog WHERE title @1@ 'Hello' EXPLAIN;
";
let dbs = Datastore::new("memory").await?;
let ses = Session::for_kv().with_ns("test").with_db("test");
@ -26,7 +26,7 @@ async fn select_where_matches_using_index() -> Result<(), Error> {
"[
{
id: blog:1,
title: 'Hello World!'
title: '<em>Hello</em> World!'
},
{
explain:
@ -35,7 +35,7 @@ async fn select_where_matches_using_index() -> Result<(), Error> {
detail: {
plan: {
index: 'blog_title',
operator: '@@',
operator: '@1@',
value: 'Hello'
},
table: 'blog',
@ -56,8 +56,8 @@ async fn select_where_matches_without_using_index_iterator() -> Result<(), Error
CREATE blog:1 SET title = 'Hello World!';
CREATE blog:2 SET title = 'Foo Bar!';
DEFINE ANALYZER simple TOKENIZERS blank,class FILTERS lowercase;
DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75);
SELECT id,title FROM blog WHERE (title @@ 'hello' AND id>0) OR (title @@ 'world' AND id<99) EXPLAIN;
DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
SELECT id,search::highlight('<em>', '</em>', 1) AS title FROM blog WHERE (title @0@ 'hello' AND id>0) OR (title @1@ 'world' AND id<99) EXPLAIN;
";
let dbs = Datastore::new("memory").await?;
let ses = Session::for_kv().with_ns("test").with_db("test");
@ -73,7 +73,7 @@ async fn select_where_matches_without_using_index_iterator() -> Result<(), Error
"[
{
id: blog:1,
title: 'Hello World!'
title: 'Hello <em>World</em>!'
},
{
explain:
@ -91,3 +91,104 @@ async fn select_where_matches_without_using_index_iterator() -> Result<(), Error
assert_eq!(tmp, val);
Ok(())
}
#[tokio::test]
async fn select_where_matches_using_index_and_arrays() -> Result<(), Error> {
let sql = r"
CREATE blog:1 SET content = ['Hello World!', 'Be Bop', 'Foo Bar'];
DEFINE ANALYZER simple TOKENIZERS blank,class;
DEFINE INDEX blog_content ON blog FIELDS content SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
SELECT id, search::highlight('<em>', '</em>', 1) AS content FROM blog WHERE content @1@ 'Hello Bar' EXPLAIN;
";
let dbs = Datastore::new("memory").await?;
let ses = Session::for_kv().with_ns("test").with_db("test");
let res = &mut dbs.execute(&sql, &ses, None, false).await?;
assert_eq!(res.len(), 4);
//
let _ = res.remove(0).result?;
let _ = res.remove(0).result?;
let _ = res.remove(0).result?;
let tmp = res.remove(0).result?;
let val = Value::parse(
"[
{
id: blog:1,
content: [
'<em>Hello</em> World!',
'Be Bop',
'Foo <em>Bar</em>'
]
},
{
explain:
[
{
detail: {
plan: {
index: 'blog_content',
operator: '@1@',
value: 'Hello Bar'
},
table: 'blog',
},
operation: 'Iterate Index'
}
]
}
]",
);
assert_eq!(tmp, val);
Ok(())
}
#[tokio::test]
async fn select_where_matches_using_index_offsets() -> Result<(), Error> {
let sql = r"
CREATE blog:1 SET title = 'Blog title!', content = ['Hello World!', 'Be Bop', 'Foo Bar'];
DEFINE ANALYZER simple TOKENIZERS blank,class;
DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
DEFINE INDEX blog_content ON blog FIELDS content SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
SELECT id, search::offsets(0) AS title, search::offsets(1) AS content FROM blog WHERE title @0@ 'title' AND content @1@ 'Hello Bar' EXPLAIN;
";
let dbs = Datastore::new("memory").await?;
let ses = Session::for_kv().with_ns("test").with_db("test");
let res = &mut dbs.execute(&sql, &ses, None, false).await?;
assert_eq!(res.len(), 5);
//
for _ in 0..4 {
let _ = res.remove(0).result?;
}
let tmp = res.remove(0).result?;
let val = Value::parse(
"[
{
id: blog:1,
title: {
0: [{s:5, e:10}],
},
content: {
0: [{s:0, e:5}],
2: [{s:4, e:7}]
}
},
{
explain:
[
{
detail: {
plan: {
index: 'blog_content',
operator: '@1@',
value: 'Hello Bar'
},
table: 'blog',
},
operation: 'Iterate Index'
}
]
}
]",
);
assert_eq!(tmp, val);
Ok(())
}