Feature - Search highlighting (#2132)
This commit is contained in:
parent
000a594ad5
commit
8e64750a60
44 changed files with 1229 additions and 347 deletions
|
@ -58,8 +58,8 @@ async-trait = "0.1.68"
|
|||
async-recursion = "1.0.4"
|
||||
base64_lib = { version = "0.21.1", package = "base64" }
|
||||
bcrypt = "0.14.0"
|
||||
bung = "0.1.0"
|
||||
bincode = "1.3.3"
|
||||
bung = "0.1.0"
|
||||
channel = { version = "1.8.0", package = "async-channel" }
|
||||
chrono = { version = "0.4.24", features = ["serde"] }
|
||||
derive = { version = "0.8.0", package = "surrealdb-derive" }
|
||||
|
|
|
@ -152,7 +152,9 @@ impl<'a> Context<'a> {
|
|||
self.deadline.map(|v| v.saturating_duration_since(Instant::now()))
|
||||
}
|
||||
|
||||
pub fn clone_transaction(&self) -> Result<Transaction, Error> {
|
||||
/// Returns a transaction if any.
|
||||
/// Otherwise it fails by returning a Error::NoTx error.
|
||||
pub fn try_clone_transaction(&self) -> Result<Transaction, Error> {
|
||||
match &self.transaction {
|
||||
None => Err(Error::NoTx),
|
||||
Some(txn) => Ok(txn.clone()),
|
||||
|
|
|
@ -23,7 +23,7 @@ impl Iterable {
|
|||
) -> Result<(), Error> {
|
||||
if ctx.is_ok() {
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
match self {
|
||||
Iterable::Value(v) => {
|
||||
// Pass the value through
|
||||
|
|
|
@ -64,7 +64,7 @@ impl Iterable {
|
|||
ite: &mut Iterator,
|
||||
) -> Result<(), Error> {
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Check that the table exists
|
||||
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &v.tb, opt.strict).await?;
|
||||
// Fetch the data from the store
|
||||
|
@ -92,7 +92,7 @@ impl Iterable {
|
|||
ite: &mut Iterator,
|
||||
) -> Result<(), Error> {
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Check that the table exists
|
||||
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &v.tb, opt.strict).await?;
|
||||
// Fetch the data from the store
|
||||
|
@ -123,7 +123,7 @@ impl Iterable {
|
|||
ite: &mut Iterator,
|
||||
) -> Result<(), Error> {
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Check that the table exists
|
||||
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &v.tb, opt.strict).await?;
|
||||
// Fetch the data from the store
|
||||
|
@ -152,7 +152,7 @@ impl Iterable {
|
|||
ite: &mut Iterator,
|
||||
) -> Result<(), Error> {
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Check that the table exists
|
||||
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &v, opt.strict).await?;
|
||||
// Prepare the start and end keys
|
||||
|
@ -220,7 +220,7 @@ impl Iterable {
|
|||
ite: &mut Iterator,
|
||||
) -> Result<(), Error> {
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Check that the table exists
|
||||
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &v.tb, opt.strict).await?;
|
||||
// Prepare the range start key
|
||||
|
@ -387,13 +387,13 @@ impl Iterable {
|
|||
None => {
|
||||
let min = beg.clone();
|
||||
let max = end.clone();
|
||||
ctx.clone_transaction()?.lock().await.scan(min..max, 1000).await?
|
||||
ctx.try_clone_transaction()?.lock().await.scan(min..max, 1000).await?
|
||||
}
|
||||
Some(ref mut beg) => {
|
||||
beg.push(0x00);
|
||||
let min = beg.clone();
|
||||
let max = end.clone();
|
||||
ctx.clone_transaction()?.lock().await.scan(min..max, 1000).await?
|
||||
ctx.try_clone_transaction()?.lock().await.scan(min..max, 1000).await?
|
||||
}
|
||||
};
|
||||
// If there are key-value entries then fetch them
|
||||
|
@ -418,7 +418,7 @@ impl Iterable {
|
|||
let gra: crate::key::graph::Graph = (&k).into();
|
||||
// Fetch the data from the store
|
||||
let key = thing::new(opt.ns(), opt.db(), gra.ft, &gra.fk);
|
||||
let val = ctx.clone_transaction()?.lock().await.get(key).await?;
|
||||
let val = ctx.try_clone_transaction()?.lock().await.get(key).await?;
|
||||
let rid = Thing::from((gra.ft, gra.fk));
|
||||
let mut ctx = Context::new(ctx);
|
||||
ctx.add_thing(&rid);
|
||||
|
@ -446,7 +446,7 @@ impl Iterable {
|
|||
plan: Plan,
|
||||
ite: &mut Iterator,
|
||||
) -> Result<(), Error> {
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Check that the table exists
|
||||
txn.lock().await.check_ns_db_tb(opt.ns(), opt.db(), &table.0, opt.strict).await?;
|
||||
let mut iterator = plan.new_iterator(opt, &txn).await?;
|
||||
|
|
|
@ -17,7 +17,7 @@ impl<'a> Document<'a> {
|
|||
// Should we run permissions checks?
|
||||
if opt.perms && opt.auth.perms() {
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Get the table
|
||||
let tb = self.tb(opt, &txn).await?;
|
||||
// Get the permission clause
|
||||
|
|
|
@ -13,7 +13,7 @@ impl<'a> Document<'a> {
|
|||
_stm: &Statement<'_>,
|
||||
) -> Result<(), Error> {
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Get the table
|
||||
let tb = self.tb(opt, &txn).await?;
|
||||
// This table is schemafull
|
||||
|
|
|
@ -18,7 +18,7 @@ impl<'a> Document<'a> {
|
|||
_stm: &Statement<'_>,
|
||||
) -> Result<(), Error> {
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Check if the table is a view
|
||||
if self.tb(opt, &txn).await?.drop {
|
||||
return Ok(());
|
||||
|
|
|
@ -24,7 +24,7 @@ impl<'a> Document<'a> {
|
|||
// Don't run permissions
|
||||
let opt = &opt.perms(false);
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Loop through all event statements
|
||||
for ev in self.ev(opt, &txn).await?.iter() {
|
||||
// Get the event action
|
||||
|
|
|
@ -22,7 +22,7 @@ impl<'a> Document<'a> {
|
|||
// Get the user applied input
|
||||
let inp = self.initial.changed(self.current.as_ref());
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Loop through all field statements
|
||||
for fd in self.fd(opt, &txn).await?.iter() {
|
||||
// Loop over each field in document
|
||||
|
|
|
@ -28,7 +28,7 @@ impl<'a> Document<'a> {
|
|||
return Ok(());
|
||||
}
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Check if the table is a view
|
||||
if self.tb(opt, &txn).await?.drop {
|
||||
return Ok(());
|
||||
|
@ -49,7 +49,7 @@ impl<'a> Document<'a> {
|
|||
let mut run = txn.lock().await;
|
||||
|
||||
// Store all the variable and parameters required by the index operation
|
||||
let mut ic = IndexOperation::new(opt, ix, o, n, rid);
|
||||
let ic = IndexOperation::new(opt, ix, o, n, rid);
|
||||
|
||||
// Index operation dispatching
|
||||
match &ix.index {
|
||||
|
@ -60,12 +60,7 @@ impl<'a> Document<'a> {
|
|||
sc,
|
||||
hl,
|
||||
order,
|
||||
} => match sc {
|
||||
Scoring::Bm {
|
||||
..
|
||||
} => ic.index_best_matching_search(&mut run, az, *order, *hl).await?,
|
||||
Scoring::Vs => ic.index_vector_search(az, *hl).await?,
|
||||
},
|
||||
} => ic.index_full_text(&mut run, az, *order, sc, *hl).await?,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@ -182,16 +177,17 @@ impl<'a> IndexOperation<'a> {
|
|||
})
|
||||
}
|
||||
|
||||
async fn index_best_matching_search(
|
||||
async fn index_full_text(
|
||||
&self,
|
||||
run: &mut kvs::Transaction,
|
||||
az: &Ident,
|
||||
order: u32,
|
||||
_hl: bool,
|
||||
scoring: &Scoring,
|
||||
hl: bool,
|
||||
) -> Result<(), Error> {
|
||||
let ikb = IndexKeyBase::new(self.opt, self.ix);
|
||||
let az = run.get_az(self.opt.ns(), self.opt.db(), az.as_str()).await?;
|
||||
let mut ft = FtIndex::new(run, az, ikb, order).await?;
|
||||
let mut ft = FtIndex::new(run, az, ikb, order, scoring, hl).await?;
|
||||
if let Some(n) = &self.n {
|
||||
// TODO: Apply the analyzer
|
||||
ft.index_document(run, self.rid, n).await
|
||||
|
@ -199,10 +195,4 @@ impl<'a> IndexOperation<'a> {
|
|||
ft.remove_document(run, self.rid).await
|
||||
}
|
||||
}
|
||||
|
||||
async fn index_vector_search(&mut self, _az: &Ident, _hl: bool) -> Result<(), Error> {
|
||||
Err(Error::FeatureNotYetImplemented {
|
||||
feature: "VectorSearch indexing",
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ impl<'a> Document<'a> {
|
|||
return Ok(());
|
||||
}
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Get the record id
|
||||
let id = self.id.as_ref().unwrap();
|
||||
// Loop through all index statements
|
||||
|
|
|
@ -82,7 +82,7 @@ impl<'a> Document<'a> {
|
|||
// Should we run permissions checks?
|
||||
if opt.perms && opt.auth.perms() {
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Loop through all field statements
|
||||
for fd in self.fd(opt, &txn).await?.iter() {
|
||||
// Loop over each field in document
|
||||
|
|
|
@ -24,7 +24,7 @@ impl<'a> Document<'a> {
|
|||
return Ok(());
|
||||
}
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Check if the table is a view
|
||||
if self.tb(opt, &txn).await?.drop {
|
||||
return Ok(());
|
||||
|
|
|
@ -16,7 +16,7 @@ impl<'a> Document<'a> {
|
|||
return Ok(());
|
||||
}
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Check if the table is a view
|
||||
if self.tb(opt, &txn).await?.drop {
|
||||
return Ok(());
|
||||
|
|
|
@ -56,7 +56,7 @@ impl<'a> Document<'a> {
|
|||
Action::Update
|
||||
};
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Loop through all foreign table statements
|
||||
for ft in self.ft(opt, &txn).await?.iter() {
|
||||
// Get the table definition
|
||||
|
|
|
@ -20,6 +20,7 @@ pub mod operate;
|
|||
pub mod parse;
|
||||
pub mod rand;
|
||||
pub mod script;
|
||||
pub mod search;
|
||||
pub mod session;
|
||||
pub mod sleep;
|
||||
pub mod string;
|
||||
|
@ -30,6 +31,7 @@ pub mod util;
|
|||
/// Attempts to run any function
|
||||
pub async fn run(ctx: &Context<'_>, name: &str, args: Vec<Value>) -> Result<Value, Error> {
|
||||
if name.eq("sleep")
|
||||
|| name.starts_with("search")
|
||||
|| name.starts_with("http")
|
||||
|| name.starts_with("crypto::argon2")
|
||||
|| name.starts_with("crypto::bcrypt")
|
||||
|
@ -300,6 +302,9 @@ pub async fn asynchronous(ctx: &Context<'_>, name: &str, args: Vec<Value>) -> Re
|
|||
"http::patch" => http::patch(ctx).await,
|
||||
"http::delete" => http::delete(ctx).await,
|
||||
//
|
||||
"search::highlight" => search::highlight(ctx).await,
|
||||
"search::offsets" => search::offsets(ctx).await,
|
||||
//
|
||||
"sleep" => sleep::sleep(ctx).await,
|
||||
)
|
||||
}
|
||||
|
|
|
@ -169,7 +169,7 @@ pub(crate) async fn matches(ctx: &Context<'_>, e: &Expression) -> Result<Value,
|
|||
if let Some(thg) = ctx.thing() {
|
||||
if let Some(exe) = ctx.get_query_executor(&thg.tb) {
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Check the matches
|
||||
return exe.matches(&txn, thg, e).await;
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ mod math;
|
|||
mod meta;
|
||||
mod parse;
|
||||
mod rand;
|
||||
mod search;
|
||||
mod session;
|
||||
mod string;
|
||||
mod time;
|
||||
|
@ -42,6 +43,7 @@ impl_module_def!(
|
|||
"parse" => (parse::Package),
|
||||
"rand" => (rand::Package),
|
||||
"array" => (array::Package),
|
||||
"search" => (search::Package),
|
||||
"session" => (session::Package),
|
||||
"sleep" => fut Async,
|
||||
"string" => (string::Package),
|
||||
|
|
12
lib/src/fnc/script/modules/surrealdb/functions/search.rs
Normal file
12
lib/src/fnc/script/modules/surrealdb/functions/search.rs
Normal file
|
@ -0,0 +1,12 @@
|
|||
use super::fut;
|
||||
use crate::fnc::script::modules::impl_module_def;
|
||||
use js::prelude::Async;
|
||||
|
||||
pub struct Package;
|
||||
|
||||
impl_module_def!(
|
||||
Package,
|
||||
"search",
|
||||
"highlight" => fut Async,
|
||||
"offsets" => fut Async
|
||||
);
|
28
lib/src/fnc/search.rs
Normal file
28
lib/src/fnc/search.rs
Normal file
|
@ -0,0 +1,28 @@
|
|||
use crate::ctx::Context;
|
||||
use crate::err::Error;
|
||||
use crate::sql::Value;
|
||||
|
||||
pub async fn highlight(
|
||||
ctx: &Context<'_>,
|
||||
(prefix, suffix, match_ref): (Value, Value, Value),
|
||||
) -> Result<Value, Error> {
|
||||
if let Some(doc) = ctx.doc() {
|
||||
if let Some(thg) = ctx.thing() {
|
||||
if let Some(exe) = ctx.get_query_executor(&thg.tb) {
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
return exe.highlight(txn, thg, prefix, suffix, match_ref.clone(), doc).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Value::None)
|
||||
}
|
||||
|
||||
pub async fn offsets(ctx: &Context<'_>, (match_ref,): (Value,)) -> Result<Value, Error> {
|
||||
if let Some(thg) = ctx.thing() {
|
||||
if let Some(exe) = ctx.get_query_executor(&thg.tb) {
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
return exe.offsets(txn, thg, match_ref.clone()).await;
|
||||
}
|
||||
}
|
||||
Ok(Value::None)
|
||||
}
|
|
@ -179,7 +179,7 @@ pub(super) enum Term {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::idx::ft::analyzer::tests::test_analyser;
|
||||
use crate::idx::ft::analyzer::tests::test_analyzer;
|
||||
|
||||
#[test]
|
||||
fn test_arabic_stemmer() {
|
||||
|
@ -189,17 +189,17 @@ mod tests {
|
|||
"كلاب", "تحب", "الجر", "في", "حديق", "لكن", "كلب", "صغير", "يفضل", "نوم", "في", "سرير",
|
||||
"بدل", "من", "الجر",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(arabic);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ar);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ara);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -233,17 +233,17 @@ mod tests {
|
|||
"løb",
|
||||
".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(danish);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(dan);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(da);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -257,17 +257,17 @@ mod tests {
|
|||
"hond", "houd", "ervan", "om", "in", "het", "park", "te", "renn", ",", "mar", "mijn",
|
||||
"klein", "hond", "slaapt", "liever", "in", "zijn", "mand", "dan", "te", "renn", ".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(dutch);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(nl);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(nld);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -281,17 +281,17 @@ mod tests {
|
|||
"teacher", "are", "often", "teach", ",", "but", "my", "favorit", "teacher", "prefer",
|
||||
"read", "in", "her", "spare", "time", "rather", "than", "teach", ".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(english);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(eng);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(en);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -305,17 +305,17 @@ mod tests {
|
|||
"le", "chien", "adorent", "cour", "dan", "le", "parc", ",", "mais", "mon", "pet",
|
||||
"chien", "aim", "plutôt", "se", "blott", "sur", "le", "canap", "que", "de", "cour",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(french);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(fr);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(fra);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -330,17 +330,17 @@ mod tests {
|
|||
"hund", "zieht", "es", "vor", ",", "auf", "dem", "sofa", "zu", "schlaf", ",", "statt",
|
||||
"zu", "lauf", ".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(german);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(de);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(deu);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -375,17 +375,17 @@ mod tests {
|
|||
"τρεχ",
|
||||
".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(greek);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ell);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(el);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -399,17 +399,17 @@ mod tests {
|
|||
"a", "kutya", "szeret", "futn", "a", "par", ",", "de", "az", "én", "kics", "kutya",
|
||||
"inkább", "alsz", "a", "kosar", ",", "mints", "fu", ".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(hungarian);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(hu);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(hun);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -423,17 +423,17 @@ mod tests {
|
|||
"i", "can", "aman", "corr", "nel", "parc", ",", "ma", "il", "mio", "piccol", "can",
|
||||
"prefer", "dorm", "nel", "suo", "cest", "piuttost", "che", "corr", ".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(italian);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(it);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ita);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -467,17 +467,17 @@ mod tests {
|
|||
"løp",
|
||||
".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(norwegian);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(no);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(nor);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -491,17 +491,17 @@ mod tests {
|
|||
"os", "cã", "ador", "corr", "no", "parqu", ",", "mas", "o", "meu", "pequen", "cã",
|
||||
"prefer", "dorm", "na", "sua", "cam", "em", "vez", "de", "corr", ".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(portuguese);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(pt);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(por);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -515,17 +515,17 @@ mod tests {
|
|||
"câin", "ador", "să", "alerg", "în", "parc", ",", "dar", "cățel", "meu", "prefer",
|
||||
"să", "doarm", "în", "coș", "lui", "decât", "să", "alerg", ".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(romanian);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ro);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ron);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -557,17 +557,17 @@ mod tests {
|
|||
"бега",
|
||||
".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(russian);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ru);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(rus);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -581,17 +581,17 @@ mod tests {
|
|||
"los", "perr", "aman", "corr", "en", "el", "parqu", ",", "per", "mi", "pequeñ", "perr",
|
||||
"prefier", "dorm", "en", "su", "cam", "en", "lug", "de", "corr", ".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(spanish);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(es);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(spa);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -625,17 +625,17 @@ mod tests {
|
|||
"spring",
|
||||
".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(swedish);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(sv);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(swe);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -672,17 +672,17 @@ mod tests {
|
|||
"லை",
|
||||
".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(tamil);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(ta);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(tam);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -696,17 +696,17 @@ mod tests {
|
|||
"köpek", "park", "koşma", "sever", ",", "am", "be", "küçük", "köpek", "koşmak",
|
||||
"yatak", "uyuma", "tercih", "eder", ".",
|
||||
];
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(turkish);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(tr);",
|
||||
input,
|
||||
&output,
|
||||
);
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS snowball(tur);",
|
||||
input,
|
||||
&output,
|
||||
|
@ -715,7 +715,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_ngram() {
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS lowercase,ngram(2,3);",
|
||||
"Ālea iacta est",
|
||||
&["āl", "āle", "le", "lea", "ia", "iac", "ac", "act", "ct", "cta", "es", "est"],
|
||||
|
@ -724,7 +724,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_edgengram() {
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS lowercase,edgengram(2,3);",
|
||||
"Ālea iacta est",
|
||||
&["āl", "āle", "ia", "iac", "es", "est"],
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::ft::analyzer::tokenizer::{Tokenizer, Tokens};
|
||||
use crate::idx::ft::doclength::DocLength;
|
||||
use crate::idx::ft::offsets::{Offset, OffsetRecords};
|
||||
use crate::idx::ft::postings::TermFrequency;
|
||||
use crate::idx::ft::terms::{TermId, Terms};
|
||||
use crate::kvs::Transaction;
|
||||
use crate::sql::statements::DefineAnalyzerStatement;
|
||||
use crate::sql::tokenizer::Tokenizer as SqlTokenizer;
|
||||
use crate::sql::Array;
|
||||
use crate::sql::{Array, Value};
|
||||
use filter::Filter;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
@ -40,46 +41,47 @@ impl Analyzer {
|
|||
t: &Terms,
|
||||
tx: &mut Transaction,
|
||||
query_string: String,
|
||||
) -> Result<Vec<Option<TermId>>, Error> {
|
||||
let tokens = self.analyse(query_string);
|
||||
) -> Result<(Vec<TermId>, bool), Error> {
|
||||
let tokens = self.analyze(query_string);
|
||||
// We first collect every unique terms
|
||||
// as it can contains duplicates
|
||||
let mut terms = HashSet::new();
|
||||
for token in tokens.list() {
|
||||
terms.insert(token);
|
||||
}
|
||||
let mut missing = false;
|
||||
// Now we can extract the term ids
|
||||
let mut res = Vec::with_capacity(terms.len());
|
||||
for term in terms {
|
||||
let term_id = t.get_term_id(tx, tokens.get_token_string(term)).await?;
|
||||
if let Some(term_id) = t.get_term_id(tx, tokens.get_token_string(term)).await? {
|
||||
res.push(term_id);
|
||||
} else {
|
||||
missing = false;
|
||||
}
|
||||
Ok(res)
|
||||
}
|
||||
Ok((res, missing))
|
||||
}
|
||||
|
||||
/// This method is used for indexing.
|
||||
/// It will create new term ids for non already existing terms.
|
||||
pub(super) async fn extract_terms_with_frequencies(
|
||||
&self,
|
||||
t: &mut Terms,
|
||||
terms: &mut Terms,
|
||||
tx: &mut Transaction,
|
||||
field_content: &Array,
|
||||
) -> Result<(DocLength, Vec<(TermId, TermFrequency)>), Error> {
|
||||
let mut doc_length = 0;
|
||||
let mut dl = 0;
|
||||
// Let's first collect all the inputs, and collect the tokens.
|
||||
// We need to store them because everything after is zero-copy
|
||||
let mut inputs = Vec::with_capacity(field_content.0.len());
|
||||
for v in &field_content.0 {
|
||||
let input = v.to_owned().convert_to_string()?;
|
||||
let tks = self.analyse(input);
|
||||
inputs.push(tks);
|
||||
}
|
||||
let mut inputs = vec![];
|
||||
self.analyze_content(field_content, &mut inputs)?;
|
||||
// We then collect every unique terms and count the frequency
|
||||
let mut terms = HashMap::new();
|
||||
for tokens in &inputs {
|
||||
for token in tokens.list() {
|
||||
doc_length += 1;
|
||||
match terms.entry(tokens.get_token_string(token)) {
|
||||
let mut tf: HashMap<&str, TermFrequency> = HashMap::new();
|
||||
for tks in &inputs {
|
||||
for tk in tks.list() {
|
||||
dl += 1;
|
||||
let s = tks.get_token_string(tk);
|
||||
match tf.entry(s) {
|
||||
Entry::Vacant(e) => {
|
||||
e.insert(1);
|
||||
}
|
||||
|
@ -89,15 +91,76 @@ impl Analyzer {
|
|||
}
|
||||
}
|
||||
}
|
||||
// Now we can extract the term ids
|
||||
let mut res = Vec::with_capacity(terms.len());
|
||||
for (term, freq) in terms {
|
||||
res.push((t.resolve_term_id(tx, term).await?, freq));
|
||||
// Now we can resolve the term ids
|
||||
let mut tfid = Vec::with_capacity(tf.len());
|
||||
for (t, f) in tf {
|
||||
tfid.push((terms.resolve_term_id(tx, t).await?, f));
|
||||
}
|
||||
Ok((doc_length, res))
|
||||
Ok((dl, tfid))
|
||||
}
|
||||
|
||||
fn analyse(&self, input: String) -> Tokens {
|
||||
/// This method is used for indexing.
|
||||
/// It will create new term ids for non already existing terms.
|
||||
pub(super) async fn extract_terms_with_frequencies_with_offsets(
|
||||
&self,
|
||||
terms: &mut Terms,
|
||||
tx: &mut Transaction,
|
||||
field_content: &Array,
|
||||
) -> Result<(DocLength, Vec<(TermId, TermFrequency)>, Vec<(TermId, OffsetRecords)>), Error> {
|
||||
let mut dl = 0;
|
||||
// Let's first collect all the inputs, and collect the tokens.
|
||||
// We need to store them because everything after is zero-copy
|
||||
let mut inputs = Vec::with_capacity(field_content.len());
|
||||
self.analyze_content(field_content, &mut inputs)?;
|
||||
// We then collect every unique terms and count the frequency and extract the offsets
|
||||
let mut tfos: HashMap<&str, Vec<Offset>> = HashMap::new();
|
||||
for (i, tks) in inputs.iter().enumerate() {
|
||||
for tk in tks.list() {
|
||||
dl += 1;
|
||||
let s = tks.get_token_string(tk);
|
||||
let o = tk.new_offset(i as u32);
|
||||
match tfos.entry(s) {
|
||||
Entry::Vacant(e) => {
|
||||
e.insert(vec![o]);
|
||||
}
|
||||
Entry::Occupied(mut e) => e.get_mut().push(o),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now we can resolve the term ids
|
||||
let mut tfid = Vec::with_capacity(tfos.len());
|
||||
let mut osid = Vec::with_capacity(tfos.len());
|
||||
for (t, o) in tfos {
|
||||
let id = terms.resolve_term_id(tx, t).await?;
|
||||
tfid.push((id, o.len() as TermFrequency));
|
||||
osid.push((id, OffsetRecords(o)));
|
||||
}
|
||||
Ok((dl, tfid, osid))
|
||||
}
|
||||
|
||||
fn analyze_content(&self, field_content: &Array, tks: &mut Vec<Tokens>) -> Result<(), Error> {
|
||||
for v in &field_content.0 {
|
||||
self.analyze_value(v, tks);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn analyze_value(&self, val: &Value, tks: &mut Vec<Tokens>) {
|
||||
match val {
|
||||
Value::Strand(s) => tks.push(self.analyze(s.0.clone())),
|
||||
Value::Number(n) => tks.push(self.analyze(n.to_string())),
|
||||
Value::Bool(b) => tks.push(self.analyze(b.to_string())),
|
||||
Value::Array(a) => {
|
||||
for v in &a.0 {
|
||||
self.analyze_value(v, tks);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
fn analyze(&self, input: String) -> Tokens {
|
||||
if let Some(t) = &self.t {
|
||||
if !input.is_empty() {
|
||||
let t = Tokenizer::tokenize(t, input);
|
||||
|
@ -113,11 +176,11 @@ mod tests {
|
|||
use super::Analyzer;
|
||||
use crate::sql::statements::define::analyzer;
|
||||
|
||||
pub(super) fn test_analyser(def: &str, input: &str, expected: &[&str]) {
|
||||
pub(super) fn test_analyzer(def: &str, input: &str, expected: &[&str]) {
|
||||
let (_, az) = analyzer(def).unwrap();
|
||||
let a: Analyzer = az.into();
|
||||
|
||||
let tokens = a.analyse(input.to_string());
|
||||
let tokens = a.analyze(input.to_string());
|
||||
let mut res = vec![];
|
||||
for t in tokens.list() {
|
||||
res.push(tokens.get_token_string(t));
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use crate::idx::ft::analyzer::filter::{Filter, FilterResult, Term};
|
||||
use crate::idx::ft::offsets::{Offset, Position};
|
||||
use crate::sql::tokenizer::Tokenizer as SqlTokenizer;
|
||||
|
||||
pub(super) struct Tokens {
|
||||
|
@ -35,18 +36,19 @@ impl Tokens {
|
|||
match fr {
|
||||
FilterResult::Term(t) => match t {
|
||||
Term::Unchanged => tks.push(tk),
|
||||
Term::NewTerm(s) => tks.push(Token::String(s)),
|
||||
Term::NewTerm(s) => tks.push(tk.new_token(s)),
|
||||
},
|
||||
FilterResult::Terms(ts) => {
|
||||
let mut tk = Some(tk);
|
||||
let mut already_pushed = false;
|
||||
for t in ts {
|
||||
match t {
|
||||
Term::Unchanged => {
|
||||
if let Some(tk) = tk.take() {
|
||||
tks.push(tk)
|
||||
if !already_pushed {
|
||||
tks.push(tk.clone());
|
||||
already_pushed = true;
|
||||
}
|
||||
}
|
||||
Term::NewTerm(s) => tks.push(Token::String(s)),
|
||||
Term::NewTerm(s) => tks.push(tk.new_token(s)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -66,22 +68,36 @@ impl Tokens {
|
|||
|
||||
#[derive(Clone, Debug, PartialOrd, PartialEq, Eq, Ord, Hash)]
|
||||
pub(super) enum Token {
|
||||
Ref(usize, usize),
|
||||
String(String),
|
||||
Ref(Position, Position),
|
||||
String(Position, Position, String),
|
||||
}
|
||||
|
||||
impl Token {
|
||||
fn new_token(&self, t: String) -> Self {
|
||||
match self {
|
||||
Token::Ref(s, e) => Token::String(*s, *e, t),
|
||||
Token::String(s, e, _) => Token::String(*s, *e, t),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn new_offset(&self, i: u32) -> Offset {
|
||||
match self {
|
||||
Token::Ref(s, e) => Offset::new(i, *s, *e),
|
||||
Token::String(s, e, _) => Offset::new(i, *s, *e),
|
||||
}
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
Token::Ref(start, end) => start == end,
|
||||
Token::String(s) => s.is_empty(),
|
||||
Token::String(_, _, s) => s.is_empty(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn get_str<'a>(&'a self, i: &'a str) -> &str {
|
||||
match self {
|
||||
Token::Ref(s, e) => &i[*s..*e],
|
||||
Token::String(s) => s,
|
||||
Token::Ref(s, e) => &i[(*s as usize)..(*e as usize)],
|
||||
Token::String(_, _, s) => s,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -128,10 +144,10 @@ impl Tokenizer {
|
|||
// If the character is not valid for indexing (space, control...)
|
||||
// Then we increase the last position to the next character
|
||||
if !is_valid {
|
||||
last_pos += c.len_utf8();
|
||||
last_pos += c.len_utf8() as Position;
|
||||
}
|
||||
}
|
||||
current_pos += c.len_utf8();
|
||||
current_pos += c.len_utf8() as Position;
|
||||
}
|
||||
if current_pos != last_pos {
|
||||
t.push(Token::Ref(last_pos, current_pos));
|
||||
|
@ -229,11 +245,11 @@ impl Splitter {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::idx::ft::analyzer::tests::test_analyser;
|
||||
use crate::idx::ft::analyzer::tests::test_analyzer;
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_blank_class() {
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class FILTERS lowercase",
|
||||
"Abc12345xYZ DL1809 item123456 978-3-16-148410-0 1HGCM82633A123456",
|
||||
&[
|
||||
|
@ -245,7 +261,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_tokenize_source_code() {
|
||||
test_analyser(
|
||||
test_analyzer(
|
||||
"DEFINE ANALYZER test TOKENIZERS blank,class,camel,punct FILTERS lowercase",
|
||||
r#"struct MyRectangle {
|
||||
// specified by corners
|
||||
|
|
141
lib/src/idx/ft/highlighter.rs
Normal file
141
lib/src/idx/ft/highlighter.rs
Normal file
|
@ -0,0 +1,141 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::ft::offsets::{Offset, Position};
|
||||
use crate::sql::{Array, Idiom, Object, Value};
|
||||
use std::collections::btree_map::Entry as BEntry;
|
||||
use std::collections::hash_map::Entry as HEntry;
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::HashMap;
|
||||
use std::default::Default;
|
||||
|
||||
pub(super) struct Highlighter {
|
||||
prefix: Vec<char>,
|
||||
suffix: Vec<char>,
|
||||
fields: Vec<(Idiom, Value)>,
|
||||
offseter: Offseter,
|
||||
}
|
||||
|
||||
impl Highlighter {
|
||||
pub(super) fn new(prefix: Value, suffix: Value, idiom: &Idiom, doc: &Value) -> Self {
|
||||
let prefix = prefix.to_raw_string().chars().collect();
|
||||
let suffix = suffix.to_raw_string().chars().collect();
|
||||
// Extract the fields we want to highlight
|
||||
let fields = doc.walk(idiom);
|
||||
Self {
|
||||
fields,
|
||||
prefix,
|
||||
suffix,
|
||||
offseter: Offseter::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn highlight(&mut self, os: Vec<Offset>) {
|
||||
self.offseter.highlight(os);
|
||||
}
|
||||
|
||||
fn extract(val: Value, vals: &mut Vec<Option<String>>) {
|
||||
match val {
|
||||
Value::Strand(s) => vals.push(Some(s.0)),
|
||||
Value::Number(n) => vals.push(Some(n.to_string())),
|
||||
Value::Bool(b) => vals.push(Some(b.to_string())),
|
||||
Value::Array(a) => {
|
||||
for v in a.0 {
|
||||
Self::extract(v, vals);
|
||||
}
|
||||
}
|
||||
_ => vals.push(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Highlighter> for Value {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(hl: Highlighter) -> Result<Self, Error> {
|
||||
if hl.fields.is_empty() {
|
||||
return Ok(Self::None);
|
||||
}
|
||||
let mut vals = vec![];
|
||||
for (_, f) in hl.fields {
|
||||
Highlighter::extract(f, &mut vals);
|
||||
}
|
||||
let mut res = Vec::with_capacity(vals.len());
|
||||
for (idx, val) in vals.into_iter().enumerate() {
|
||||
let idx = idx as u32;
|
||||
if let Some(v) = val {
|
||||
if let Some(m) = hl.offseter.offsets.get(&idx) {
|
||||
let mut v: Vec<char> = v.chars().collect();
|
||||
let mut d = 0;
|
||||
for (s, e) in m {
|
||||
let p = (*s as usize) + d;
|
||||
v.splice(p..p, hl.prefix.clone());
|
||||
d += hl.prefix.len();
|
||||
let p = (*e as usize) + d;
|
||||
v.splice(p..p, hl.suffix.clone());
|
||||
d += hl.suffix.len();
|
||||
}
|
||||
let s: String = v.iter().collect();
|
||||
res.push(Value::from(s));
|
||||
} else {
|
||||
res.push(Value::from(v));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(match res.len() {
|
||||
0 => Value::None,
|
||||
1 => res.remove(0),
|
||||
_ => Value::from(res),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub(super) struct Offseter {
|
||||
offsets: HashMap<u32, BTreeMap<Position, Position>>,
|
||||
}
|
||||
|
||||
impl Offseter {
|
||||
pub(super) fn highlight(&mut self, os: Vec<Offset>) {
|
||||
for o in os {
|
||||
match self.offsets.entry(o.index) {
|
||||
HEntry::Occupied(mut e) => match e.get_mut().entry(o.start) {
|
||||
BEntry::Vacant(e) => {
|
||||
e.insert(o.end);
|
||||
}
|
||||
BEntry::Occupied(mut e) => {
|
||||
if o.end.gt(e.get()) {
|
||||
e.insert(o.end);
|
||||
}
|
||||
}
|
||||
},
|
||||
HEntry::Vacant(e) => {
|
||||
e.insert(BTreeMap::from([(o.start, o.end)]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Offseter> for Value {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(or: Offseter) -> Result<Self, Error> {
|
||||
if or.offsets.is_empty() {
|
||||
return Ok(Self::None);
|
||||
}
|
||||
let mut res = BTreeMap::default();
|
||||
for (idx, offsets) in or.offsets {
|
||||
let mut r = Vec::with_capacity(offsets.len());
|
||||
for (s, e) in offsets {
|
||||
let mut o = BTreeMap::default();
|
||||
o.insert("s".to_string(), Value::from(s));
|
||||
o.insert("e".to_string(), Value::from(e));
|
||||
r.push(Value::Object(Object::from(o)));
|
||||
}
|
||||
res.insert(idx.to_string(), Value::Array(Array::from(r)));
|
||||
}
|
||||
Ok(match res.len() {
|
||||
0 => Value::None,
|
||||
_ => Value::from(Object::from(res)),
|
||||
})
|
||||
}
|
||||
}
|
|
@ -1,6 +1,8 @@
|
|||
pub(crate) mod analyzer;
|
||||
pub(crate) mod docids;
|
||||
mod doclength;
|
||||
mod highlighter;
|
||||
mod offsets;
|
||||
mod postings;
|
||||
mod scorer;
|
||||
mod termdocs;
|
||||
|
@ -10,26 +12,32 @@ use crate::err::Error;
|
|||
use crate::idx::ft::analyzer::Analyzer;
|
||||
use crate::idx::ft::docids::DocIds;
|
||||
use crate::idx::ft::doclength::DocLengths;
|
||||
use crate::idx::ft::highlighter::{Highlighter, Offseter};
|
||||
use crate::idx::ft::offsets::Offsets;
|
||||
use crate::idx::ft::postings::Postings;
|
||||
use crate::idx::ft::scorer::{BM25Scorer, Score};
|
||||
use crate::idx::ft::termdocs::TermDocs;
|
||||
use crate::idx::ft::terms::{TermId, Terms};
|
||||
use crate::idx::{btree, IndexKeyBase, SerdeState};
|
||||
use crate::kvs::{Key, Transaction};
|
||||
use crate::sql::scoring::Scoring;
|
||||
use crate::sql::statements::DefineAnalyzerStatement;
|
||||
use crate::sql::{Array, Object, Thing, Value};
|
||||
use crate::sql::{Array, Idiom, Object, Thing, Value};
|
||||
use roaring::treemap::IntoIter;
|
||||
use roaring::RoaringTreemap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::ops::BitAnd;
|
||||
|
||||
pub(crate) type MatchRef = u8;
|
||||
|
||||
pub(crate) struct FtIndex {
|
||||
analyzer: Analyzer,
|
||||
state_key: Key,
|
||||
index_key_base: IndexKeyBase,
|
||||
state: State,
|
||||
bm25: Bm25Params,
|
||||
btree_default_order: u32,
|
||||
bm25: Option<Bm25Params>,
|
||||
order: u32,
|
||||
highlighting: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -78,7 +86,9 @@ impl FtIndex {
|
|||
tx: &mut Transaction,
|
||||
az: DefineAnalyzerStatement,
|
||||
index_key_base: IndexKeyBase,
|
||||
btree_default_order: u32,
|
||||
order: u32,
|
||||
scoring: &Scoring,
|
||||
hl: bool,
|
||||
) -> Result<Self, Error> {
|
||||
let state_key: Key = index_key_base.new_bs_key();
|
||||
let state: State = if let Some(val) = tx.get(state_key.clone()).await? {
|
||||
|
@ -86,22 +96,34 @@ impl FtIndex {
|
|||
} else {
|
||||
State::default()
|
||||
};
|
||||
let mut bm25 = None;
|
||||
if let Scoring::Bm {
|
||||
k1,
|
||||
b,
|
||||
} = scoring
|
||||
{
|
||||
bm25 = Some(Bm25Params {
|
||||
k1: *k1,
|
||||
b: *b,
|
||||
});
|
||||
}
|
||||
Ok(Self {
|
||||
state,
|
||||
state_key,
|
||||
index_key_base,
|
||||
bm25: Bm25Params::default(),
|
||||
btree_default_order,
|
||||
bm25,
|
||||
order,
|
||||
highlighting: hl,
|
||||
analyzer: az.into(),
|
||||
})
|
||||
}
|
||||
|
||||
async fn doc_ids(&self, tx: &mut Transaction) -> Result<DocIds, Error> {
|
||||
DocIds::new(tx, self.index_key_base.clone(), self.btree_default_order).await
|
||||
DocIds::new(tx, self.index_key_base.clone(), self.order).await
|
||||
}
|
||||
|
||||
async fn terms(&self, tx: &mut Transaction) -> Result<Terms, Error> {
|
||||
Terms::new(tx, self.index_key_base.clone(), self.btree_default_order).await
|
||||
Terms::new(tx, self.index_key_base.clone(), self.order).await
|
||||
}
|
||||
|
||||
fn term_docs(&self) -> TermDocs {
|
||||
|
@ -109,11 +131,15 @@ impl FtIndex {
|
|||
}
|
||||
|
||||
async fn doc_lengths(&self, tx: &mut Transaction) -> Result<DocLengths, Error> {
|
||||
DocLengths::new(tx, self.index_key_base.clone(), self.btree_default_order).await
|
||||
DocLengths::new(tx, self.index_key_base.clone(), self.order).await
|
||||
}
|
||||
|
||||
async fn postings(&self, tx: &mut Transaction) -> Result<Postings, Error> {
|
||||
Postings::new(tx, self.index_key_base.clone(), self.btree_default_order).await
|
||||
Postings::new(tx, self.index_key_base.clone(), self.order).await
|
||||
}
|
||||
|
||||
fn offsets(&self) -> Offsets {
|
||||
Offsets::new(self.index_key_base.clone())
|
||||
}
|
||||
|
||||
pub(crate) async fn remove_document(
|
||||
|
@ -140,7 +166,7 @@ impl FtIndex {
|
|||
let mut p = self.postings(tx).await?;
|
||||
let mut t = self.terms(tx).await?;
|
||||
let td = self.term_docs();
|
||||
for term_id in term_list {
|
||||
for term_id in &term_list {
|
||||
p.remove_posting(tx, term_id, doc_id).await?;
|
||||
// if the term is not present in any document in the index, we can remove it
|
||||
let doc_count = td.remove_doc(tx, term_id, doc_id).await?;
|
||||
|
@ -148,6 +174,14 @@ impl FtIndex {
|
|||
t.remove_term_id(tx, term_id).await?;
|
||||
}
|
||||
}
|
||||
// Remove the offsets if any
|
||||
if self.highlighting {
|
||||
let o = self.offsets();
|
||||
for term_id in term_list {
|
||||
// TODO?: Removal can be done with a prefix on doc_id
|
||||
o.remove_offsets(tx, doc_id, term_id).await?;
|
||||
}
|
||||
}
|
||||
t.finish(tx).await?;
|
||||
p.finish(tx).await?;
|
||||
}
|
||||
|
@ -168,10 +202,19 @@ impl FtIndex {
|
|||
let resolved = d.resolve_doc_id(tx, rid.into()).await?;
|
||||
let doc_id = *resolved.doc_id();
|
||||
|
||||
// Extract the doc_lengths, terms en frequencies
|
||||
// Extract the doc_lengths, terms en frequencies (and offset)
|
||||
let mut t = self.terms(tx).await?;
|
||||
let (doc_length, terms_and_frequencies) =
|
||||
let (doc_length, terms_and_frequencies, offsets) = if self.highlighting {
|
||||
let (dl, tf, ofs) = self
|
||||
.analyzer
|
||||
.extract_terms_with_frequencies_with_offsets(&mut t, tx, field_content)
|
||||
.await?;
|
||||
(dl, tf, Some(ofs))
|
||||
} else {
|
||||
let (dl, tf) =
|
||||
self.analyzer.extract_terms_with_frequencies(&mut t, tx, field_content).await?;
|
||||
(dl, tf, None)
|
||||
};
|
||||
|
||||
// Set the doc length
|
||||
let mut l = self.doc_lengths(tx).await?;
|
||||
|
@ -204,7 +247,7 @@ impl FtIndex {
|
|||
}
|
||||
|
||||
// Remove any remaining postings
|
||||
if let Some(old_term_ids) = old_term_ids {
|
||||
if let Some(old_term_ids) = &old_term_ids {
|
||||
for old_term_id in old_term_ids {
|
||||
p.remove_posting(tx, old_term_id, doc_id).await?;
|
||||
let doc_count = term_docs.remove_doc(tx, old_term_id, doc_id).await?;
|
||||
|
@ -215,6 +258,24 @@ impl FtIndex {
|
|||
}
|
||||
}
|
||||
|
||||
if self.highlighting {
|
||||
let o = self.offsets();
|
||||
// Set the offset if any
|
||||
if let Some(ofs) = offsets {
|
||||
if !ofs.is_empty() {
|
||||
for (tid, or) in ofs {
|
||||
o.set_offsets(tx, doc_id, tid, or).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
// In case of an update, w remove the offset for the terms that does not exist anymore
|
||||
if let Some(old_term_ids) = old_term_ids {
|
||||
for old_term_id in old_term_ids {
|
||||
o.remove_offsets(tx, doc_id, old_term_id).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Stores the term list for this doc_id
|
||||
tx.set(term_ids_key, terms_ids.try_to_val()?).await?;
|
||||
|
||||
|
@ -233,52 +294,63 @@ impl FtIndex {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) async fn extract_terms(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
query_string: String,
|
||||
) -> Result<Vec<TermId>, Error> {
|
||||
let t = self.terms(tx).await?;
|
||||
let (terms, _) = self.analyzer.extract_terms(&t, tx, query_string).await?;
|
||||
Ok(terms)
|
||||
}
|
||||
|
||||
pub(super) async fn search(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
query_string: String,
|
||||
) -> Result<Option<HitsIterator>, Error> {
|
||||
) -> Result<(Vec<TermId>, Option<HitsIterator>), Error> {
|
||||
let t = self.terms(tx).await?;
|
||||
let td = self.term_docs();
|
||||
let terms = self.analyzer.extract_terms(&t, tx, query_string).await?;
|
||||
let (terms, missing) = self.analyzer.extract_terms(&t, tx, query_string).await?;
|
||||
if missing {
|
||||
// If any term does not exists, as we are doing an AND query,
|
||||
// we can return an empty results set
|
||||
return Ok((terms, None));
|
||||
}
|
||||
let mut hits: Option<RoaringTreemap> = None;
|
||||
let mut terms_docs = Vec::with_capacity(terms.len());
|
||||
for term_id in terms {
|
||||
if let Some(term_id) = term_id {
|
||||
if let Some(term_docs) = td.get_docs(tx, term_id).await? {
|
||||
for term_id in &terms {
|
||||
if let Some(term_docs) = td.get_docs(tx, *term_id).await? {
|
||||
if let Some(h) = hits {
|
||||
hits = Some(h.bitand(&term_docs));
|
||||
} else {
|
||||
hits = Some(term_docs.clone());
|
||||
}
|
||||
terms_docs.push((term_id, term_docs));
|
||||
continue;
|
||||
terms_docs.push((*term_id, term_docs));
|
||||
}
|
||||
}
|
||||
return Ok(None);
|
||||
}
|
||||
if let Some(hits) = hits {
|
||||
if !hits.is_empty() {
|
||||
let postings = self.postings(tx).await?;
|
||||
let doc_lengths = self.doc_lengths(tx).await?;
|
||||
// TODO: Scoring should be optional
|
||||
let scorer = BM25Scorer::new(
|
||||
|
||||
let mut scorer = None;
|
||||
if let Some(bm25) = &self.bm25 {
|
||||
scorer = Some(BM25Scorer::new(
|
||||
doc_lengths,
|
||||
self.state.total_docs_lengths,
|
||||
self.state.doc_count,
|
||||
self.bm25.clone(),
|
||||
);
|
||||
bm25.clone(),
|
||||
));
|
||||
}
|
||||
let doc_ids = self.doc_ids(tx).await?;
|
||||
return Ok(Some(HitsIterator::new(
|
||||
doc_ids,
|
||||
postings,
|
||||
hits,
|
||||
terms_docs,
|
||||
Some(scorer),
|
||||
)));
|
||||
return Ok((
|
||||
terms,
|
||||
Some(HitsIterator::new(doc_ids, postings, hits, terms_docs, scorer)),
|
||||
));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
Ok((terms, None))
|
||||
}
|
||||
|
||||
pub(super) async fn match_id_value(
|
||||
|
@ -303,6 +375,55 @@ impl FtIndex {
|
|||
Ok(false)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(super) async fn highlight(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
thg: &Thing,
|
||||
terms: &Vec<TermId>,
|
||||
prefix: Value,
|
||||
suffix: Value,
|
||||
idiom: &Idiom,
|
||||
doc: &Value,
|
||||
) -> Result<Value, Error> {
|
||||
let doc_key: Key = thg.into();
|
||||
let doc_ids = self.doc_ids(tx).await?;
|
||||
if let Some(doc_id) = doc_ids.get_doc_id(tx, doc_key).await? {
|
||||
let o = self.offsets();
|
||||
let mut hl = Highlighter::new(prefix, suffix, idiom, doc);
|
||||
for term_id in terms {
|
||||
let o = o.get_offsets(tx, doc_id, *term_id).await?;
|
||||
if let Some(o) = o {
|
||||
hl.highlight(o.0);
|
||||
}
|
||||
}
|
||||
return hl.try_into();
|
||||
}
|
||||
Ok(Value::None)
|
||||
}
|
||||
|
||||
pub(super) async fn extract_offsets(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
thg: &Thing,
|
||||
terms: &Vec<TermId>,
|
||||
) -> Result<Value, Error> {
|
||||
let doc_key: Key = thg.into();
|
||||
let doc_ids = self.doc_ids(tx).await?;
|
||||
if let Some(doc_id) = doc_ids.get_doc_id(tx, doc_key).await? {
|
||||
let o = self.offsets();
|
||||
let mut or = Offseter::default();
|
||||
for term_id in terms {
|
||||
let o = o.get_offsets(tx, doc_id, *term_id).await?;
|
||||
if let Some(o) = o {
|
||||
or.highlight(o.0);
|
||||
}
|
||||
}
|
||||
return or.try_into();
|
||||
}
|
||||
Ok(Value::None)
|
||||
}
|
||||
|
||||
pub(crate) async fn statistics(&self, tx: &mut Transaction) -> Result<Statistics, Error> {
|
||||
// TODO do parallel execution
|
||||
Ok(Statistics {
|
||||
|
@ -372,6 +493,7 @@ mod tests {
|
|||
use crate::idx::ft::{FtIndex, HitsIterator, Score};
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::{Datastore, Transaction};
|
||||
use crate::sql::scoring::Scoring;
|
||||
use crate::sql::statements::define::analyzer;
|
||||
use crate::sql::{Array, Thing};
|
||||
use std::collections::HashMap;
|
||||
|
@ -410,8 +532,14 @@ mod tests {
|
|||
{
|
||||
// Add one document
|
||||
let mut tx = ds.transaction(true, false).await.unwrap();
|
||||
let mut fti =
|
||||
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
|
||||
let mut fti = FtIndex::new(
|
||||
&mut tx,
|
||||
az.clone(),
|
||||
IndexKeyBase::default(),
|
||||
default_btree_order,
|
||||
&Scoring::default(),
|
||||
false,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
fti.index_document(&mut tx, &doc1, &Array::from(vec!["hello the world"]))
|
||||
|
@ -423,8 +551,14 @@ mod tests {
|
|||
{
|
||||
// Add two documents
|
||||
let mut tx = ds.transaction(true, false).await.unwrap();
|
||||
let mut fti =
|
||||
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
|
||||
let mut fti = FtIndex::new(
|
||||
&mut tx,
|
||||
az.clone(),
|
||||
IndexKeyBase::default(),
|
||||
default_btree_order,
|
||||
&Scoring::default(),
|
||||
false,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
fti.index_document(&mut tx, &doc2, &Array::from(vec!["a yellow hello"])).await.unwrap();
|
||||
|
@ -434,8 +568,14 @@ mod tests {
|
|||
|
||||
{
|
||||
let mut tx = ds.transaction(true, false).await.unwrap();
|
||||
let fti =
|
||||
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
|
||||
let fti = FtIndex::new(
|
||||
&mut tx,
|
||||
az.clone(),
|
||||
IndexKeyBase::default(),
|
||||
default_btree_order,
|
||||
&Scoring::default(),
|
||||
false,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -447,30 +587,36 @@ mod tests {
|
|||
assert_eq!(statistics.doc_lengths.keys_count, 3);
|
||||
|
||||
// Search & score
|
||||
let i = fti.search(&mut tx, "hello".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "hello".to_string()).await.unwrap();
|
||||
check_hits(i, &mut tx, vec![(&doc1, Some(0.0)), (&doc2, Some(0.0))]).await;
|
||||
|
||||
let i = fti.search(&mut tx, "world".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "world".to_string()).await.unwrap();
|
||||
check_hits(i, &mut tx, vec![(&doc1, Some(0.4859746))]).await;
|
||||
|
||||
let i = fti.search(&mut tx, "yellow".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "yellow".to_string()).await.unwrap();
|
||||
check_hits(i, &mut tx, vec![(&doc2, Some(0.4859746))]).await;
|
||||
|
||||
let i = fti.search(&mut tx, "foo".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "foo".to_string()).await.unwrap();
|
||||
check_hits(i, &mut tx, vec![(&doc3, Some(0.56902087))]).await;
|
||||
|
||||
let i = fti.search(&mut tx, "bar".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "bar".to_string()).await.unwrap();
|
||||
check_hits(i, &mut tx, vec![(&doc3, Some(0.56902087))]).await;
|
||||
|
||||
let i = fti.search(&mut tx, "dummy".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "dummy".to_string()).await.unwrap();
|
||||
assert!(i.is_none());
|
||||
}
|
||||
|
||||
{
|
||||
// Reindex one document
|
||||
let mut tx = ds.transaction(true, false).await.unwrap();
|
||||
let mut fti =
|
||||
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
|
||||
let mut fti = FtIndex::new(
|
||||
&mut tx,
|
||||
az.clone(),
|
||||
IndexKeyBase::default(),
|
||||
default_btree_order,
|
||||
&Scoring::default(),
|
||||
false,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
fti.index_document(&mut tx, &doc3, &Array::from(vec!["nobar foo"])).await.unwrap();
|
||||
|
@ -478,23 +624,29 @@ mod tests {
|
|||
|
||||
// We can still find 'foo'
|
||||
let mut tx = ds.transaction(false, false).await.unwrap();
|
||||
let i = fti.search(&mut tx, "foo".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "foo".to_string()).await.unwrap();
|
||||
check_hits(i, &mut tx, vec![(&doc3, Some(0.56902087))]).await;
|
||||
|
||||
// We can't anymore find 'bar'
|
||||
let i = fti.search(&mut tx, "bar".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "bar".to_string()).await.unwrap();
|
||||
assert!(i.is_none());
|
||||
|
||||
// We can now find 'nobar'
|
||||
let i = fti.search(&mut tx, "nobar".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "nobar".to_string()).await.unwrap();
|
||||
check_hits(i, &mut tx, vec![(&doc3, Some(0.56902087))]).await;
|
||||
}
|
||||
|
||||
{
|
||||
// Remove documents
|
||||
let mut tx = ds.transaction(true, false).await.unwrap();
|
||||
let mut fti =
|
||||
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
|
||||
let mut fti = FtIndex::new(
|
||||
&mut tx,
|
||||
az.clone(),
|
||||
IndexKeyBase::default(),
|
||||
default_btree_order,
|
||||
&Scoring::default(),
|
||||
false,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
fti.remove_document(&mut tx, &doc1).await.unwrap();
|
||||
|
@ -503,16 +655,17 @@ mod tests {
|
|||
tx.commit().await.unwrap();
|
||||
|
||||
let mut tx = ds.transaction(false, false).await.unwrap();
|
||||
let i = fti.search(&mut tx, "hello".to_string()).await.unwrap();
|
||||
assert!(i.is_none());
|
||||
let (v, h) = fti.search(&mut tx, "hello".to_string()).await.unwrap();
|
||||
assert!(v.is_empty());
|
||||
assert!(h.is_none());
|
||||
|
||||
let i = fti.search(&mut tx, "foo".to_string()).await.unwrap();
|
||||
assert!(i.is_none());
|
||||
let (v, h) = fti.search(&mut tx, "foo".to_string()).await.unwrap();
|
||||
assert!(v.is_empty());
|
||||
assert!(h.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
#[test(tokio::test)]
|
||||
async fn test_ft_index_bm_25() {
|
||||
async fn test_ft_index_bm_25(hl: bool) {
|
||||
// The function `extract_sorted_terms_with_frequencies` is non-deterministic.
|
||||
// the inner structures (BTrees) are built with the same terms and frequencies,
|
||||
// but the insertion order is different, ending up in different BTree structures.
|
||||
|
@ -529,8 +682,14 @@ mod tests {
|
|||
let default_btree_order = 5;
|
||||
{
|
||||
let mut tx = ds.transaction(true, false).await.unwrap();
|
||||
let mut fti =
|
||||
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
|
||||
let mut fti = FtIndex::new(
|
||||
&mut tx,
|
||||
az.clone(),
|
||||
IndexKeyBase::default(),
|
||||
default_btree_order,
|
||||
&Scoring::default(),
|
||||
hl,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
fti.index_document(
|
||||
|
@ -566,8 +725,14 @@ mod tests {
|
|||
|
||||
{
|
||||
let mut tx = ds.transaction(true, false).await.unwrap();
|
||||
let fti =
|
||||
FtIndex::new(&mut tx, az.clone(), IndexKeyBase::default(), default_btree_order)
|
||||
let fti = FtIndex::new(
|
||||
&mut tx,
|
||||
az.clone(),
|
||||
IndexKeyBase::default(),
|
||||
default_btree_order,
|
||||
&Scoring::default(),
|
||||
hl,
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
|
@ -577,7 +742,7 @@ mod tests {
|
|||
assert_eq!(statistics.doc_ids.keys_count, 4);
|
||||
assert_eq!(statistics.doc_lengths.keys_count, 4);
|
||||
|
||||
let i = fti.search(&mut tx, "the".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "the".to_string()).await.unwrap();
|
||||
check_hits(
|
||||
i,
|
||||
&mut tx,
|
||||
|
@ -590,7 +755,7 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
let i = fti.search(&mut tx, "dog".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "dog".to_string()).await.unwrap();
|
||||
check_hits(
|
||||
i,
|
||||
&mut tx,
|
||||
|
@ -598,27 +763,37 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
let i = fti.search(&mut tx, "fox".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "fox".to_string()).await.unwrap();
|
||||
check_hits(i, &mut tx, vec![(&doc1, Some(0.0)), (&doc2, Some(0.0))]).await;
|
||||
|
||||
let i = fti.search(&mut tx, "over".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "over".to_string()).await.unwrap();
|
||||
check_hits(i, &mut tx, vec![(&doc1, Some(0.0)), (&doc2, Some(0.0))]).await;
|
||||
|
||||
let i = fti.search(&mut tx, "lazy".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "lazy".to_string()).await.unwrap();
|
||||
check_hits(i, &mut tx, vec![(&doc1, Some(0.0)), (&doc2, Some(0.0))]).await;
|
||||
|
||||
let i = fti.search(&mut tx, "jumped".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "jumped".to_string()).await.unwrap();
|
||||
check_hits(i, &mut tx, vec![(&doc1, Some(0.0)), (&doc2, Some(0.0))]).await;
|
||||
|
||||
let i = fti.search(&mut tx, "nothing".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "nothing".to_string()).await.unwrap();
|
||||
check_hits(i, &mut tx, vec![(&doc3, Some(0.87105393))]).await;
|
||||
|
||||
let i = fti.search(&mut tx, "animals".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "animals".to_string()).await.unwrap();
|
||||
check_hits(i, &mut tx, vec![(&doc4, Some(0.92279965))]).await;
|
||||
|
||||
let i = fti.search(&mut tx, "dummy".to_string()).await.unwrap();
|
||||
let (_, i) = fti.search(&mut tx, "dummy".to_string()).await.unwrap();
|
||||
assert!(i.is_none());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test(tokio::test)]
|
||||
async fn test_ft_index_bm_25_without_highlighting() {
|
||||
test_ft_index_bm_25(false).await;
|
||||
}
|
||||
|
||||
#[test(tokio::test)]
|
||||
async fn test_ft_index_bm_25_with_highlighting() {
|
||||
test_ft_index_bm_25(true).await;
|
||||
}
|
||||
}
|
||||
|
|
156
lib/src/idx/ft/offsets.rs
Normal file
156
lib/src/idx/ft/offsets.rs
Normal file
|
@ -0,0 +1,156 @@
|
|||
use crate::err::Error;
|
||||
use crate::idx::ft::docids::DocId;
|
||||
use crate::idx::ft::terms::TermId;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::kvs::{Transaction, Val};
|
||||
|
||||
pub(super) type Position = u32;
|
||||
|
||||
pub(super) struct Offsets {
|
||||
index_key_base: IndexKeyBase,
|
||||
}
|
||||
|
||||
impl Offsets {
|
||||
pub(super) fn new(index_key_base: IndexKeyBase) -> Self {
|
||||
Self {
|
||||
index_key_base,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn set_offsets(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
doc_id: DocId,
|
||||
term_id: TermId,
|
||||
offsets: OffsetRecords,
|
||||
) -> Result<(), Error> {
|
||||
let key = self.index_key_base.new_bo_key(doc_id, term_id);
|
||||
let val: Val = offsets.try_into()?;
|
||||
tx.set(key, val).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(super) async fn get_offsets(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
doc_id: DocId,
|
||||
term_id: TermId,
|
||||
) -> Result<Option<OffsetRecords>, Error> {
|
||||
let key = self.index_key_base.new_bo_key(doc_id, term_id);
|
||||
if let Some(val) = tx.get(key).await? {
|
||||
let offsets = val.try_into()?;
|
||||
Ok(Some(offsets))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) async fn remove_offsets(
|
||||
&self,
|
||||
tx: &mut Transaction,
|
||||
doc_id: DocId,
|
||||
term_id: TermId,
|
||||
) -> Result<(), Error> {
|
||||
let key = self.index_key_base.new_bo_key(doc_id, term_id);
|
||||
tx.del(key).await
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub(super) struct Offset {
|
||||
pub(super) index: u32,
|
||||
pub(super) start: Position,
|
||||
pub(super) end: Position,
|
||||
}
|
||||
|
||||
impl Offset {
|
||||
pub(super) fn new(index: u32, start: Position, end: Position) -> Self {
|
||||
Self {
|
||||
index,
|
||||
start,
|
||||
end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub(super) struct OffsetRecords(pub(super) Vec<Offset>);
|
||||
|
||||
impl TryFrom<OffsetRecords> for Val {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(offsets: OffsetRecords) -> Result<Self, Self::Error> {
|
||||
// We build a unique vector with every values (start and offset).
|
||||
let mut decompressed = Vec::new();
|
||||
// The first push the size of the index,
|
||||
// so we can rebuild the OffsetsRecord on deserialization.
|
||||
decompressed.push(offsets.0.len() as u32);
|
||||
// We want the value to be more or less sorted so the RLE compression
|
||||
// will be more effective
|
||||
// Indexes are likely to be very small
|
||||
for o in &offsets.0 {
|
||||
decompressed.push(o.index);
|
||||
}
|
||||
// `starts` and `offsets` are likely to be ascending
|
||||
for o in &offsets.0 {
|
||||
decompressed.push(o.start);
|
||||
decompressed.push(o.end);
|
||||
}
|
||||
Ok(bincode::serialize(&decompressed)?)
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Val> for OffsetRecords {
|
||||
type Error = Error;
|
||||
|
||||
fn try_from(val: Val) -> Result<Self, Self::Error> {
|
||||
if val.is_empty() {
|
||||
return Ok(Self(vec![]));
|
||||
}
|
||||
let decompressed: Vec<u32> = bincode::deserialize(&val)?;
|
||||
let mut iter = decompressed.iter();
|
||||
let s = *iter.next().ok_or(Error::CorruptedIndex)?;
|
||||
let mut indexes = Vec::with_capacity(s as usize);
|
||||
for _ in 0..s {
|
||||
let index = *iter.next().ok_or(Error::CorruptedIndex)?;
|
||||
indexes.push(index);
|
||||
}
|
||||
let mut res = Vec::with_capacity(s as usize);
|
||||
for index in indexes {
|
||||
let start = *iter.next().ok_or(Error::CorruptedIndex)?;
|
||||
let end = *iter.next().ok_or(Error::CorruptedIndex)?;
|
||||
res.push(Offset::new(index, start, end));
|
||||
}
|
||||
Ok(OffsetRecords(res))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::idx::ft::offsets::{Offset, OffsetRecords};
|
||||
use crate::kvs::Val;
|
||||
|
||||
#[test]
|
||||
fn test_offset_records() {
|
||||
let o = OffsetRecords(vec![
|
||||
Offset {
|
||||
index: 0,
|
||||
start: 1,
|
||||
end: 2,
|
||||
},
|
||||
Offset {
|
||||
index: 0,
|
||||
start: 11,
|
||||
end: 22,
|
||||
},
|
||||
Offset {
|
||||
index: 1,
|
||||
start: 3,
|
||||
end: 4,
|
||||
},
|
||||
]);
|
||||
let v: Val = o.clone().try_into().unwrap();
|
||||
let o2 = v.try_into().unwrap();
|
||||
assert_eq!(o, o2)
|
||||
}
|
||||
}
|
|
@ -18,7 +18,7 @@ impl Postings {
|
|||
pub(super) async fn new(
|
||||
tx: &mut Transaction,
|
||||
index_key_base: IndexKeyBase,
|
||||
default_btree_order: u32,
|
||||
order: u32,
|
||||
) -> Result<Self, Error> {
|
||||
let keys = PostingsKeyProvider {
|
||||
index_key_base: index_key_base.clone(),
|
||||
|
@ -27,7 +27,7 @@ impl Postings {
|
|||
let state: btree::State = if let Some(val) = tx.get(state_key.clone()).await? {
|
||||
btree::State::try_from_val(val)?
|
||||
} else {
|
||||
btree::State::new(default_btree_order)
|
||||
btree::State::new(order)
|
||||
};
|
||||
Ok(Self {
|
||||
state_key,
|
||||
|
|
|
@ -14,6 +14,7 @@ use crate::key::bf::Bf;
|
|||
use crate::key::bi::Bi;
|
||||
use crate::key::bk::Bk;
|
||||
use crate::key::bl::Bl;
|
||||
use crate::key::bo::Bo;
|
||||
use crate::key::bp::Bp;
|
||||
use crate::key::bs::Bs;
|
||||
use crate::key::bt::Bt;
|
||||
|
@ -105,6 +106,18 @@ impl IndexKeyBase {
|
|||
.into()
|
||||
}
|
||||
|
||||
fn new_bo_key(&self, doc_id: DocId, term_id: TermId) -> Key {
|
||||
Bo::new(
|
||||
self.inner.ns.as_str(),
|
||||
self.inner.db.as_str(),
|
||||
self.inner.tb.as_str(),
|
||||
self.inner.ix.as_str(),
|
||||
doc_id,
|
||||
term_id,
|
||||
)
|
||||
.into()
|
||||
}
|
||||
|
||||
fn new_bp_key(&self, node_id: Option<NodeId>) -> Key {
|
||||
Bp::new(
|
||||
self.inner.ns.as_str(),
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
use crate::dbs::{Options, Transaction};
|
||||
use crate::err::Error;
|
||||
use crate::idx::ft::FtIndex;
|
||||
use crate::idx::ft::terms::TermId;
|
||||
use crate::idx::ft::{FtIndex, MatchRef};
|
||||
use crate::idx::planner::plan::IndexOption;
|
||||
use crate::idx::planner::tree::IndexMap;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::sql::index::Index;
|
||||
use crate::sql::{Expression, Table, Thing, Value};
|
||||
use std::collections::HashMap;
|
||||
use crate::sql::{Expression, Idiom, Table, Thing, Value};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -15,9 +17,16 @@ pub(crate) struct QueryExecutor {
|
|||
|
||||
struct Inner {
|
||||
table: String,
|
||||
index_map: IndexMap,
|
||||
index: HashMap<Expression, HashSet<IndexOption>>,
|
||||
pre_match: Option<Expression>,
|
||||
ft_map: HashMap<String, FtIndex>,
|
||||
terms: HashMap<MatchRef, IndexFieldTerms>,
|
||||
}
|
||||
|
||||
struct IndexFieldTerms {
|
||||
ix: String,
|
||||
id: Idiom,
|
||||
t: Vec<TermId>,
|
||||
}
|
||||
|
||||
impl QueryExecutor {
|
||||
|
@ -30,29 +39,45 @@ impl QueryExecutor {
|
|||
) -> Result<Self, Error> {
|
||||
let mut run = txn.lock().await;
|
||||
let mut ft_map = HashMap::new();
|
||||
for ios in index_map.values() {
|
||||
for ios in index_map.index.values() {
|
||||
for io in ios {
|
||||
if let Index::Search {
|
||||
az,
|
||||
order,
|
||||
..
|
||||
sc,
|
||||
hl,
|
||||
} = &io.ix.index
|
||||
{
|
||||
if !ft_map.contains_key(&io.ix.name.0) {
|
||||
let ikb = IndexKeyBase::new(opt, &io.ix);
|
||||
let az = run.get_az(opt.ns(), opt.db(), az.as_str()).await?;
|
||||
let ft = FtIndex::new(&mut run, az, ikb, *order).await?;
|
||||
let ft = FtIndex::new(&mut run, az, ikb, *order, sc, *hl).await?;
|
||||
ft_map.insert(io.ix.name.0.clone(), ft);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut terms = HashMap::with_capacity(index_map.terms.len());
|
||||
for (mr, ifv) in index_map.terms {
|
||||
if let Some(ft) = ft_map.get(&ifv.ix) {
|
||||
let term_ids = ft.extract_terms(&mut run, ifv.val.clone()).await?;
|
||||
terms.insert(
|
||||
mr,
|
||||
IndexFieldTerms {
|
||||
ix: ifv.ix,
|
||||
id: ifv.id,
|
||||
t: term_ids,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(Self {
|
||||
inner: Arc::new(Inner {
|
||||
table: table.0.clone(),
|
||||
index_map,
|
||||
index: index_map.index,
|
||||
pre_match,
|
||||
ft_map,
|
||||
terms,
|
||||
}),
|
||||
})
|
||||
}
|
||||
|
@ -74,8 +99,9 @@ impl QueryExecutor {
|
|||
|
||||
// Otherwise, we look for the first possible index options, and evaluate the expression
|
||||
// Does the record id match this executor's table?
|
||||
// Does the record id match this executor's table?
|
||||
if thg.tb.eq(&self.inner.table) {
|
||||
if let Some(ios) = self.inner.index_map.get(exp) {
|
||||
if let Some(ios) = self.inner.index.get(exp) {
|
||||
for io in ios {
|
||||
if let Some(fti) = self.inner.ft_map.get(&io.ix.name.0) {
|
||||
let mut run = txn.lock().await;
|
||||
|
@ -94,4 +120,51 @@ impl QueryExecutor {
|
|||
value: exp.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn highlight(
|
||||
&self,
|
||||
txn: Transaction,
|
||||
thg: &Thing,
|
||||
prefix: Value,
|
||||
suffix: Value,
|
||||
match_ref: Value,
|
||||
doc: &Value,
|
||||
) -> Result<Value, Error> {
|
||||
let mut tx = txn.lock().await;
|
||||
// We have to make the connection between the match ref from the highlight function...
|
||||
if let Value::Number(n) = match_ref {
|
||||
let m = n.as_int() as u8;
|
||||
// ... and from the match operator (@{matchref}@)
|
||||
if let Some(ift) = self.inner.terms.get(&m) {
|
||||
// Check we have an index?
|
||||
if let Some(ft) = self.inner.ft_map.get(&ift.ix) {
|
||||
// All good, we can do the highlight
|
||||
return ft.highlight(&mut tx, thg, &ift.t, prefix, suffix, &ift.id, doc).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Value::None)
|
||||
}
|
||||
|
||||
pub(crate) async fn offsets(
|
||||
&self,
|
||||
txn: Transaction,
|
||||
thg: &Thing,
|
||||
match_ref: Value,
|
||||
) -> Result<Value, Error> {
|
||||
let mut tx = txn.lock().await;
|
||||
// We have to make the connection between the match ref from the highlight function...
|
||||
if let Value::Number(n) = match_ref {
|
||||
let m = n.as_int() as u8;
|
||||
// ... and from the match operator (@{matchref}@)
|
||||
if let Some(ift) = self.inner.terms.get(&m) {
|
||||
// Check we have an index?
|
||||
if let Some(ft) = self.inner.ft_map.get(&ift.ix) {
|
||||
// All good, we can extract the offsets
|
||||
return ft.extract_offsets(&mut tx, thg, &ift.t).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Value::None)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ use std::collections::HashMap;
|
|||
pub(crate) struct QueryPlanner<'a> {
|
||||
opt: &'a Options,
|
||||
cond: &'a Option<Cond>,
|
||||
/// There is one executor per table
|
||||
executors: HashMap<String, QueryExecutor>,
|
||||
}
|
||||
|
||||
|
@ -32,7 +33,7 @@ impl<'a> QueryPlanner<'a> {
|
|||
opt: &Options,
|
||||
t: Table,
|
||||
) -> Result<Iterable, Error> {
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
let res = Tree::build(self.opt, &txn, &t, self.cond).await?;
|
||||
if let Some((node, im)) = res {
|
||||
if let Some(plan) = AllAndStrategy::build(&node)? {
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
use crate::dbs::{Options, Transaction};
|
||||
use crate::err::Error;
|
||||
use crate::idx::ft::{FtIndex, HitsIterator};
|
||||
use crate::idx::ft::terms::TermId;
|
||||
use crate::idx::ft::{FtIndex, HitsIterator, MatchRef};
|
||||
use crate::idx::planner::executor::QueryExecutor;
|
||||
use crate::idx::planner::tree::{IndexMap, Node};
|
||||
use crate::idx::planner::tree::IndexMap;
|
||||
use crate::idx::IndexKeyBase;
|
||||
use crate::key;
|
||||
use crate::kvs::Key;
|
||||
|
@ -79,7 +80,7 @@ pub(super) struct IndexOption {
|
|||
}
|
||||
|
||||
impl IndexOption {
|
||||
fn new(ix: DefineIndexStatement, op: Operator, v: Value, ep: Expression) -> Self {
|
||||
pub(super) fn new(ix: DefineIndexStatement, op: Operator, v: Value, ep: Expression) -> Self {
|
||||
Self {
|
||||
ix,
|
||||
op,
|
||||
|
@ -98,28 +99,6 @@ impl IndexOption {
|
|||
QueryExecutor::new(opt, txn, t, i, Some(self.ep.clone())).await
|
||||
}
|
||||
|
||||
pub(super) fn found(
|
||||
ix: &DefineIndexStatement,
|
||||
op: &Operator,
|
||||
v: &Node,
|
||||
ep: &Expression,
|
||||
) -> Option<Self> {
|
||||
if let Some(v) = v.is_scalar() {
|
||||
if match ix.index {
|
||||
Index::Idx => Operator::Equal.eq(op),
|
||||
Index::Uniq => Operator::Equal.eq(op),
|
||||
Index::Search {
|
||||
..
|
||||
} => {
|
||||
matches!(op, Operator::Matches(_))
|
||||
}
|
||||
} {
|
||||
return Some(IndexOption::new(ix.clone(), op.to_owned(), v.clone(), ep.clone()));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
async fn new_iterator(
|
||||
&self,
|
||||
opt: &Options,
|
||||
|
@ -144,8 +123,8 @@ impl IndexOption {
|
|||
sc,
|
||||
order,
|
||||
} => match self.op {
|
||||
Operator::Matches(_) => Ok(Box::new(
|
||||
MatchesThingIterator::new(opt, txn, &self.ix, az, *hl, sc, *order, &self.v)
|
||||
Operator::Matches(mr) => Ok(Box::new(
|
||||
MatchesThingIterator::new(opt, txn, &self.ix, az, *hl, sc, *order, mr, &self.v)
|
||||
.await?,
|
||||
)),
|
||||
_ => Err(Error::BypassQueryPlanner),
|
||||
|
@ -221,6 +200,7 @@ impl ThingIterator for UniqueEqualThingIterator {
|
|||
}
|
||||
|
||||
struct MatchesThingIterator {
|
||||
_terms: Option<(MatchRef, Vec<TermId>)>,
|
||||
hits: Option<HitsIterator>,
|
||||
}
|
||||
|
||||
|
@ -231,9 +211,10 @@ impl MatchesThingIterator {
|
|||
txn: &Transaction,
|
||||
ix: &DefineIndexStatement,
|
||||
az: &Ident,
|
||||
_hl: bool,
|
||||
hl: bool,
|
||||
sc: &Scoring,
|
||||
order: u32,
|
||||
mr: Option<MatchRef>,
|
||||
v: &Value,
|
||||
) -> Result<Self, Error> {
|
||||
let ikb = IndexKeyBase::new(opt, ix);
|
||||
|
@ -244,10 +225,11 @@ impl MatchesThingIterator {
|
|||
{
|
||||
let query_string = v.clone().convert_to_string()?;
|
||||
let az = run.get_az(opt.ns(), opt.db(), az.as_str()).await?;
|
||||
let fti = FtIndex::new(&mut run, az, ikb, order).await?;
|
||||
let hits = fti.search(&mut run, query_string).await?;
|
||||
let fti = FtIndex::new(&mut run, az, ikb, order, sc, hl).await?;
|
||||
let (terms, hits) = fti.search(&mut run, query_string).await?;
|
||||
Ok(Self {
|
||||
hits,
|
||||
_terms: mr.map(|mr| (mr, terms)),
|
||||
})
|
||||
} else {
|
||||
Err(Error::FeatureNotYetImplemented {
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
use crate::dbs::{Options, Transaction};
|
||||
use crate::err::Error;
|
||||
use crate::idx::ft::MatchRef;
|
||||
use crate::idx::planner::plan::IndexOption;
|
||||
use crate::sql::index::Index;
|
||||
use crate::sql::statements::DefineIndexStatement;
|
||||
use crate::sql::{Cond, Expression, Idiom, Operator, Subquery, Table, Value};
|
||||
use async_recursion::async_recursion;
|
||||
|
@ -8,9 +10,19 @@ use std::collections::hash_map::Entry;
|
|||
use std::collections::{HashMap, HashSet};
|
||||
use std::sync::Arc;
|
||||
|
||||
pub(super) struct Tree {}
|
||||
#[derive(Default)]
|
||||
pub(super) struct IndexMap {
|
||||
pub(super) index: HashMap<Expression, HashSet<IndexOption>>,
|
||||
pub(super) terms: HashMap<MatchRef, IndexFieldValue>,
|
||||
}
|
||||
|
||||
pub(super) type IndexMap = HashMap<Expression, HashSet<IndexOption>>;
|
||||
pub(super) struct IndexFieldValue {
|
||||
pub(super) ix: String,
|
||||
pub(super) id: Idiom,
|
||||
pub(super) val: String,
|
||||
}
|
||||
|
||||
pub(super) struct Tree {}
|
||||
|
||||
impl Tree {
|
||||
pub(super) async fn build<'a>(
|
||||
|
@ -80,7 +92,7 @@ impl<'a> TreeBuilder<'a> {
|
|||
|
||||
async fn eval_idiom(&mut self, i: &Idiom) -> Result<Node, Error> {
|
||||
Ok(if let Some(ix) = self.find_index(i).await? {
|
||||
Node::IndexedField(ix)
|
||||
Node::IndexedField(i.to_owned(), ix)
|
||||
} else {
|
||||
Node::NonIndexedField
|
||||
})
|
||||
|
@ -101,18 +113,11 @@ impl<'a> TreeBuilder<'a> {
|
|||
let left = self.eval_value(l).await?;
|
||||
let right = self.eval_value(r).await?;
|
||||
let mut index_option = None;
|
||||
if let Some(ix) = left.is_indexed_field() {
|
||||
if let Some(io) = IndexOption::found(ix, o, &right, e) {
|
||||
index_option = Some(io.clone());
|
||||
self.add_index(e, io);
|
||||
}
|
||||
}
|
||||
if let Some(ix) = right.is_indexed_field() {
|
||||
if let Some(io) = IndexOption::found(ix, o, &left, e) {
|
||||
index_option = Some(io.clone());
|
||||
self.add_index(e, io);
|
||||
}
|
||||
}
|
||||
if let Some((id, ix)) = left.is_indexed_field() {
|
||||
index_option = self.lookup_index_option(ix, o, id, &right, e);
|
||||
} else if let Some((id, ix)) = right.is_indexed_field() {
|
||||
index_option = self.lookup_index_option(ix, o, id, &left, e);
|
||||
};
|
||||
Ok(Node::Expression {
|
||||
index_option,
|
||||
left: Box::new(left),
|
||||
|
@ -123,8 +128,48 @@ impl<'a> TreeBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn lookup_index_option(
|
||||
&mut self,
|
||||
ix: &DefineIndexStatement,
|
||||
op: &Operator,
|
||||
id: &Idiom,
|
||||
v: &Node,
|
||||
ep: &Expression,
|
||||
) -> Option<IndexOption> {
|
||||
if let Some(v) = v.is_scalar() {
|
||||
if match &ix.index {
|
||||
Index::Idx => Operator::Equal.eq(op),
|
||||
Index::Uniq => Operator::Equal.eq(op),
|
||||
Index::Search {
|
||||
..
|
||||
} => {
|
||||
if let Operator::Matches(mr) = op {
|
||||
if let Some(mr) = mr {
|
||||
self.index_map.terms.insert(
|
||||
*mr,
|
||||
IndexFieldValue {
|
||||
ix: ix.name.0.to_owned(),
|
||||
id: id.to_owned(),
|
||||
val: v.to_raw_string(),
|
||||
},
|
||||
);
|
||||
}
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
} {
|
||||
let io = IndexOption::new(ix.clone(), op.to_owned(), v.clone(), ep.clone());
|
||||
self.add_index(ep, io.clone());
|
||||
return Some(io);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn add_index(&mut self, e: &Expression, io: IndexOption) {
|
||||
match self.index_map.entry(e.clone()) {
|
||||
match self.index_map.index.entry(e.clone()) {
|
||||
Entry::Occupied(mut e) => {
|
||||
e.get_mut().insert(io);
|
||||
}
|
||||
|
@ -150,7 +195,7 @@ pub(super) enum Node {
|
|||
right: Box<Node>,
|
||||
operator: Operator,
|
||||
},
|
||||
IndexedField(DefineIndexStatement),
|
||||
IndexedField(Idiom, DefineIndexStatement),
|
||||
NonIndexedField,
|
||||
Scalar(Value),
|
||||
Unsupported,
|
||||
|
@ -165,9 +210,9 @@ impl Node {
|
|||
}
|
||||
}
|
||||
|
||||
pub(super) fn is_indexed_field(&self) -> Option<&DefineIndexStatement> {
|
||||
if let Node::IndexedField(ix) = self {
|
||||
Some(ix)
|
||||
pub(super) fn is_indexed_field(&self) -> Option<(&Idiom, &DefineIndexStatement)> {
|
||||
if let Node::IndexedField(id, ix) = self {
|
||||
Some((id, ix))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
|
69
lib/src/key/bo.rs
Normal file
69
lib/src/key/bo.rs
Normal file
|
@ -0,0 +1,69 @@
|
|||
use crate::idx::ft::docids::DocId;
|
||||
use crate::idx::ft::terms::TermId;
|
||||
use derive::Key;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize, Key)]
|
||||
pub struct Bo<'a> {
|
||||
__: u8,
|
||||
_a: u8,
|
||||
pub ns: &'a str,
|
||||
_b: u8,
|
||||
pub db: &'a str,
|
||||
_c: u8,
|
||||
pub tb: &'a str,
|
||||
_d: u8,
|
||||
_e: u8,
|
||||
_f: u8,
|
||||
pub ix: &'a str,
|
||||
_g: u8,
|
||||
pub doc_id: DocId,
|
||||
pub term_id: TermId,
|
||||
}
|
||||
|
||||
impl<'a> Bo<'a> {
|
||||
pub fn new(
|
||||
ns: &'a str,
|
||||
db: &'a str,
|
||||
tb: &'a str,
|
||||
ix: &'a str,
|
||||
doc_id: DocId,
|
||||
term_id: TermId,
|
||||
) -> Self {
|
||||
Self {
|
||||
__: b'/',
|
||||
_a: b'*',
|
||||
ns,
|
||||
_b: b'*',
|
||||
db,
|
||||
_c: b'*',
|
||||
tb,
|
||||
_d: b'!',
|
||||
_e: b'b',
|
||||
_f: b'o',
|
||||
ix,
|
||||
_g: b'*',
|
||||
doc_id,
|
||||
term_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn key() {
|
||||
use super::*;
|
||||
#[rustfmt::skip]
|
||||
let val = Bo::new(
|
||||
"test",
|
||||
"test",
|
||||
"test",
|
||||
"test",
|
||||
1,2
|
||||
);
|
||||
let enc = Bo::encode(&val).unwrap();
|
||||
let dec = Bo::decode(&enc).unwrap();
|
||||
assert_eq!(val, dec);
|
||||
}
|
||||
}
|
|
@ -57,6 +57,7 @@ pub mod bf; // Stores Term/Doc frequency
|
|||
pub mod bi; // Stores doc keys for doc_ids
|
||||
pub mod bk; // Stores the term list for doc_ids
|
||||
pub mod bl; // Stores BTree nodes for doc lengths
|
||||
pub mod bo; // Stores the offsets
|
||||
pub mod bp; // Stores BTree nodes for postings
|
||||
pub mod bs; // Stores FullText index states
|
||||
pub mod bt; // Stores BTree nodes for terms
|
||||
|
|
|
@ -148,7 +148,7 @@ impl Function {
|
|||
// Get the function definition
|
||||
let val = {
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Get the function definition
|
||||
|
@ -255,6 +255,7 @@ pub(crate) fn function_names(i: &str) -> IResult<&str, &str> {
|
|||
preceded(tag("meta::"), function_meta),
|
||||
preceded(tag("parse::"), function_parse),
|
||||
preceded(tag("rand::"), function_rand),
|
||||
preceded(tag("search::"), function_search),
|
||||
preceded(tag("session::"), function_session),
|
||||
preceded(tag("string::"), function_string),
|
||||
preceded(tag("time::"), function_time),
|
||||
|
@ -453,6 +454,10 @@ fn function_rand(i: &str) -> IResult<&str, &str> {
|
|||
))(i)
|
||||
}
|
||||
|
||||
fn function_search(i: &str) -> IResult<&str, &str> {
|
||||
alt((tag("highlight"), tag("offsets")))(i)
|
||||
}
|
||||
|
||||
fn function_session(i: &str) -> IResult<&str, &str> {
|
||||
alt((
|
||||
tag("db"),
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use crate::idx::ft::MatchRef;
|
||||
use crate::sql::comment::mightbespace;
|
||||
use crate::sql::comment::shouldbespace;
|
||||
use crate::sql::error::IResult;
|
||||
|
@ -42,7 +43,7 @@ pub enum Operator {
|
|||
NotLike, // !~
|
||||
AllLike, // *~
|
||||
AnyLike, // ?~
|
||||
Matches(Option<u8>), // @{ref}@
|
||||
Matches(Option<MatchRef>), // @{ref}@
|
||||
//
|
||||
LessThan, // <
|
||||
LessThanOrEqual, // <=
|
||||
|
|
|
@ -60,7 +60,7 @@ impl Param {
|
|||
// The param has not been set locally
|
||||
None => {
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Get the param definition
|
||||
|
|
|
@ -30,7 +30,7 @@ impl AnalyzeStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Read the index
|
||||
|
@ -42,10 +42,11 @@ impl AnalyzeStatement {
|
|||
Index::Search {
|
||||
az,
|
||||
order,
|
||||
..
|
||||
sc,
|
||||
hl,
|
||||
} => {
|
||||
let az = run.get_az(opt.ns(), opt.db(), az.as_str()).await?;
|
||||
let ft = FtIndex::new(&mut run, az, ikb, *order).await?;
|
||||
let ft = FtIndex::new(&mut run, az, ikb, *order, sc, *hl).await?;
|
||||
ft.statistics(&mut run).await?
|
||||
}
|
||||
_ => {
|
||||
|
|
|
@ -135,7 +135,7 @@ impl DefineNamespaceStatement {
|
|||
// Process the statement
|
||||
let key = crate::key::ns::new(&self.name);
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
run.set(key, self).await?;
|
||||
|
@ -182,7 +182,7 @@ impl DefineDatabaseStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Ns)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -234,7 +234,7 @@ impl DefineFunctionStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -312,7 +312,7 @@ impl DefineAnalyzerStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -385,7 +385,7 @@ impl DefineLoginStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Kv)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -401,7 +401,7 @@ impl DefineLoginStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Ns)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -504,7 +504,7 @@ impl DefineTokenStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Kv)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -520,7 +520,7 @@ impl DefineTokenStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Ns)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -537,7 +537,7 @@ impl DefineTokenStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -618,7 +618,7 @@ impl DefineScopeStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -733,7 +733,7 @@ impl DefineParamStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -793,7 +793,7 @@ impl DefineTableStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -972,7 +972,7 @@ impl DefineEventStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -1053,7 +1053,7 @@ impl DefineFieldStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -1210,7 +1210,7 @@ impl DefineIndexStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
|
|
@ -35,7 +35,7 @@ impl InfoStatement {
|
|||
// Create the result set
|
||||
let mut res = Object::default();
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the statement
|
||||
|
@ -53,7 +53,7 @@ impl InfoStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Ns)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Create the result set
|
||||
|
@ -85,7 +85,7 @@ impl InfoStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Create the result set
|
||||
|
@ -141,7 +141,7 @@ impl InfoStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Create the result set
|
||||
|
@ -161,7 +161,7 @@ impl InfoStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Create the result set
|
||||
|
|
|
@ -26,7 +26,7 @@ impl KillStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::No)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Fetch the live query key
|
||||
|
|
|
@ -40,7 +40,7 @@ impl LiveStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::No)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Process the live query table
|
||||
|
|
|
@ -111,7 +111,7 @@ impl RemoveNamespaceStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Kv)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -163,7 +163,7 @@ impl RemoveDatabaseStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Ns)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -215,7 +215,7 @@ impl RemoveFunctionStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -270,7 +270,7 @@ impl RemoveAnalyzerStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -323,7 +323,7 @@ impl RemoveLoginStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Kv)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -338,7 +338,7 @@ impl RemoveLoginStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Ns)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -398,7 +398,7 @@ impl RemoveTokenStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Kv)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -413,7 +413,7 @@ impl RemoveTokenStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Ns)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -428,7 +428,7 @@ impl RemoveTokenStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -485,7 +485,7 @@ impl RemoveScopeStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -537,7 +537,7 @@ impl RemoveParamStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -587,7 +587,7 @@ impl RemoveTableStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -640,7 +640,7 @@ impl RemoveEventStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -699,7 +699,7 @@ impl RemoveFieldStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
@ -759,7 +759,7 @@ impl RemoveIndexStatement {
|
|||
// Allowed to run?
|
||||
opt.check(Level::Db)?;
|
||||
// Clone transaction
|
||||
let txn = ctx.clone_transaction()?;
|
||||
let txn = ctx.try_clone_transaction()?;
|
||||
// Claim transaction
|
||||
let mut run = txn.lock().await;
|
||||
// Delete the definition
|
||||
|
|
|
@ -10,8 +10,8 @@ async fn select_where_matches_using_index() -> Result<(), Error> {
|
|||
let sql = r"
|
||||
CREATE blog:1 SET title = 'Hello World!';
|
||||
DEFINE ANALYZER simple TOKENIZERS blank,class;
|
||||
DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75);
|
||||
SELECT id,title FROM blog WHERE title @@ 'Hello' EXPLAIN;
|
||||
DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
|
||||
SELECT id, search::highlight('<em>', '</em>', 1) AS title FROM blog WHERE title @1@ 'Hello' EXPLAIN;
|
||||
";
|
||||
let dbs = Datastore::new("memory").await?;
|
||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
||||
|
@ -26,7 +26,7 @@ async fn select_where_matches_using_index() -> Result<(), Error> {
|
|||
"[
|
||||
{
|
||||
id: blog:1,
|
||||
title: 'Hello World!'
|
||||
title: '<em>Hello</em> World!'
|
||||
},
|
||||
{
|
||||
explain:
|
||||
|
@ -35,7 +35,7 @@ async fn select_where_matches_using_index() -> Result<(), Error> {
|
|||
detail: {
|
||||
plan: {
|
||||
index: 'blog_title',
|
||||
operator: '@@',
|
||||
operator: '@1@',
|
||||
value: 'Hello'
|
||||
},
|
||||
table: 'blog',
|
||||
|
@ -56,8 +56,8 @@ async fn select_where_matches_without_using_index_iterator() -> Result<(), Error
|
|||
CREATE blog:1 SET title = 'Hello World!';
|
||||
CREATE blog:2 SET title = 'Foo Bar!';
|
||||
DEFINE ANALYZER simple TOKENIZERS blank,class FILTERS lowercase;
|
||||
DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75);
|
||||
SELECT id,title FROM blog WHERE (title @@ 'hello' AND id>0) OR (title @@ 'world' AND id<99) EXPLAIN;
|
||||
DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
|
||||
SELECT id,search::highlight('<em>', '</em>', 1) AS title FROM blog WHERE (title @0@ 'hello' AND id>0) OR (title @1@ 'world' AND id<99) EXPLAIN;
|
||||
";
|
||||
let dbs = Datastore::new("memory").await?;
|
||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
||||
|
@ -73,7 +73,7 @@ async fn select_where_matches_without_using_index_iterator() -> Result<(), Error
|
|||
"[
|
||||
{
|
||||
id: blog:1,
|
||||
title: 'Hello World!'
|
||||
title: 'Hello <em>World</em>!'
|
||||
},
|
||||
{
|
||||
explain:
|
||||
|
@ -91,3 +91,104 @@ async fn select_where_matches_without_using_index_iterator() -> Result<(), Error
|
|||
assert_eq!(tmp, val);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn select_where_matches_using_index_and_arrays() -> Result<(), Error> {
|
||||
let sql = r"
|
||||
CREATE blog:1 SET content = ['Hello World!', 'Be Bop', 'Foo Bar'];
|
||||
DEFINE ANALYZER simple TOKENIZERS blank,class;
|
||||
DEFINE INDEX blog_content ON blog FIELDS content SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
|
||||
SELECT id, search::highlight('<em>', '</em>', 1) AS content FROM blog WHERE content @1@ 'Hello Bar' EXPLAIN;
|
||||
";
|
||||
let dbs = Datastore::new("memory").await?;
|
||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
||||
let res = &mut dbs.execute(&sql, &ses, None, false).await?;
|
||||
assert_eq!(res.len(), 4);
|
||||
//
|
||||
let _ = res.remove(0).result?;
|
||||
let _ = res.remove(0).result?;
|
||||
let _ = res.remove(0).result?;
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
"[
|
||||
{
|
||||
id: blog:1,
|
||||
content: [
|
||||
'<em>Hello</em> World!',
|
||||
'Be Bop',
|
||||
'Foo <em>Bar</em>'
|
||||
]
|
||||
},
|
||||
{
|
||||
explain:
|
||||
[
|
||||
{
|
||||
detail: {
|
||||
plan: {
|
||||
index: 'blog_content',
|
||||
operator: '@1@',
|
||||
value: 'Hello Bar'
|
||||
},
|
||||
table: 'blog',
|
||||
},
|
||||
operation: 'Iterate Index'
|
||||
}
|
||||
]
|
||||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(tmp, val);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn select_where_matches_using_index_offsets() -> Result<(), Error> {
|
||||
let sql = r"
|
||||
CREATE blog:1 SET title = 'Blog title!', content = ['Hello World!', 'Be Bop', 'Foo Bar'];
|
||||
DEFINE ANALYZER simple TOKENIZERS blank,class;
|
||||
DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
|
||||
DEFINE INDEX blog_content ON blog FIELDS content SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
|
||||
SELECT id, search::offsets(0) AS title, search::offsets(1) AS content FROM blog WHERE title @0@ 'title' AND content @1@ 'Hello Bar' EXPLAIN;
|
||||
";
|
||||
let dbs = Datastore::new("memory").await?;
|
||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
||||
let res = &mut dbs.execute(&sql, &ses, None, false).await?;
|
||||
assert_eq!(res.len(), 5);
|
||||
//
|
||||
for _ in 0..4 {
|
||||
let _ = res.remove(0).result?;
|
||||
}
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::parse(
|
||||
"[
|
||||
{
|
||||
id: blog:1,
|
||||
title: {
|
||||
0: [{s:5, e:10}],
|
||||
},
|
||||
content: {
|
||||
0: [{s:0, e:5}],
|
||||
2: [{s:4, e:7}]
|
||||
}
|
||||
},
|
||||
{
|
||||
explain:
|
||||
[
|
||||
{
|
||||
detail: {
|
||||
plan: {
|
||||
index: 'blog_content',
|
||||
operator: '@1@',
|
||||
value: 'Hello Bar'
|
||||
},
|
||||
table: 'blog',
|
||||
},
|
||||
operation: 'Iterate Index'
|
||||
}
|
||||
]
|
||||
}
|
||||
]",
|
||||
);
|
||||
assert_eq!(tmp, val);
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue