Fixes highlighting bug (out or range for slice length...) (#2169)
This commit is contained in:
parent
be3e2637b9
commit
f688eb4ef5
9 changed files with 164 additions and 99 deletions
16
Cargo.lock
generated
16
Cargo.lock
generated
|
@ -3847,9 +3847,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rquickjs"
|
name = "rquickjs"
|
||||||
version = "0.3.0"
|
version = "0.3.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9a9b746ee3a4f6dc8b8f9b369d13c043a2f21fb645253aeac3f1404b12cff7d9"
|
checksum = "6db7788c2818f4546daabe9ae2d1ee2f4db61ab1998d4b483494c4193cc38dab"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"rquickjs-core",
|
"rquickjs-core",
|
||||||
"rquickjs-macro",
|
"rquickjs-macro",
|
||||||
|
@ -3857,9 +3857,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rquickjs-core"
|
name = "rquickjs-core"
|
||||||
version = "0.3.0"
|
version = "0.3.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9f4f48d37b8e455a76544505915dcd7a82b7a3a1898cf2b4a0aba2d7bd66dcc4"
|
checksum = "b12cf8646fe0af5bcff2822ccd162990f0679a1f9287c7257f4f4193a9d31ea9"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-lock",
|
"async-lock",
|
||||||
"relative-path",
|
"relative-path",
|
||||||
|
@ -3868,9 +3868,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rquickjs-macro"
|
name = "rquickjs-macro"
|
||||||
version = "0.3.0"
|
version = "0.3.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6ecbbe8ec4319fe42d3ec165dc7fa473580c2fa09747a39aa93f81ab422792a8"
|
checksum = "80564583a91b0ae6b2d6b9b3d0f8ffd69a4b17202cc63a12df78dfa8983885fc"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"darling",
|
"darling",
|
||||||
"fnv",
|
"fnv",
|
||||||
|
@ -3886,9 +3886,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rquickjs-sys"
|
name = "rquickjs-sys"
|
||||||
version = "0.3.0"
|
version = "0.3.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3f65ccee59bf4eb61361e7d516fea4a77273b96fe17811fc7da2558369914f55"
|
checksum = "b747058afd4d988d056e4972ec8516a5a86fdfc103c1c1485bfee8966a0743ae"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bindgen 0.65.1",
|
"bindgen 0.65.1",
|
||||||
"cc",
|
"cc",
|
||||||
|
|
|
@ -76,7 +76,7 @@ fuzzy-matcher = "0.3.7"
|
||||||
geo = { version = "0.25.0", features = ["use-serde"] }
|
geo = { version = "0.25.0", features = ["use-serde"] }
|
||||||
indexmap = { version = "1.9.3", features = ["serde"] }
|
indexmap = { version = "1.9.3", features = ["serde"] }
|
||||||
indxdb = { version = "0.3.0", optional = true }
|
indxdb = { version = "0.3.0", optional = true }
|
||||||
js = { version = "0.3.0" , package = "rquickjs", features = ["array-buffer", "bindgen", "classes", "futures", "loader", "macro", "parallel", "properties","rust-alloc"], optional = true }
|
js = { version = "0.3.1" , package = "rquickjs", features = ["array-buffer", "bindgen", "classes", "futures", "loader", "macro", "parallel", "properties","rust-alloc"], optional = true }
|
||||||
jsonwebtoken = "8.3.0"
|
jsonwebtoken = "8.3.0"
|
||||||
lexicmp = "0.1.0"
|
lexicmp = "0.1.0"
|
||||||
log = "0.4.17"
|
log = "0.4.17"
|
||||||
|
|
|
@ -469,9 +469,13 @@ pub enum Error {
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Represents an error when analyzing a value
|
/// Represents an error when analyzing a value
|
||||||
#[error("A string can't be analyzed: {0}")]
|
#[error("A value can't be analyzed: {0}")]
|
||||||
AnalyzerError(String),
|
AnalyzerError(String),
|
||||||
|
|
||||||
|
/// Represents an error when trying to highlight a value
|
||||||
|
#[error("A value can't be highlighted: {0}")]
|
||||||
|
HighlightError(String),
|
||||||
|
|
||||||
/// Represents an underlying error with Bincode serializing / deserializing
|
/// Represents an underlying error with Bincode serializing / deserializing
|
||||||
#[error("Bincode error: {0}")]
|
#[error("Bincode error: {0}")]
|
||||||
Bincode(#[from] BincodeError),
|
Bincode(#[from] BincodeError),
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
use crate::err::Error;
|
||||||
use crate::idx::ft::analyzer::tokenizer::Tokens;
|
use crate::idx::ft::analyzer::tokenizer::Tokens;
|
||||||
use crate::sql::filter::Filter as SqlFilter;
|
use crate::sql::filter::Filter as SqlFilter;
|
||||||
use crate::sql::language::Language;
|
use crate::sql::language::Language;
|
||||||
|
@ -60,13 +61,13 @@ impl Filter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn apply_filters(mut t: Tokens, f: &Option<Vec<Filter>>) -> Tokens {
|
pub(super) fn apply_filters(mut t: Tokens, f: &Option<Vec<Filter>>) -> Result<Tokens, Error> {
|
||||||
if let Some(f) = f {
|
if let Some(f) = f {
|
||||||
for f in f {
|
for f in f {
|
||||||
t = t.filter(f);
|
t = t.filter(f)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
t
|
Ok(t)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn apply_filter(&self, c: &str) -> FilterResult {
|
pub(super) fn apply_filter(&self, c: &str) -> FilterResult {
|
||||||
|
|
|
@ -42,7 +42,7 @@ impl Analyzer {
|
||||||
tx: &mut Transaction,
|
tx: &mut Transaction,
|
||||||
query_string: String,
|
query_string: String,
|
||||||
) -> Result<(Vec<TermId>, bool), Error> {
|
) -> Result<(Vec<TermId>, bool), Error> {
|
||||||
let tokens = self.analyze(query_string);
|
let tokens = self.analyze(query_string)?;
|
||||||
// We first collect every unique terms
|
// We first collect every unique terms
|
||||||
// as it can contains duplicates
|
// as it can contains duplicates
|
||||||
let mut terms = HashSet::new();
|
let mut terms = HashSet::new();
|
||||||
|
@ -53,7 +53,7 @@ impl Analyzer {
|
||||||
// Now we can extract the term ids
|
// Now we can extract the term ids
|
||||||
let mut res = Vec::with_capacity(terms.len());
|
let mut res = Vec::with_capacity(terms.len());
|
||||||
for term in terms {
|
for term in terms {
|
||||||
if let Some(term_id) = t.get_term_id(tx, tokens.get_token_string(term)).await? {
|
if let Some(term_id) = t.get_term_id(tx, tokens.get_token_string(term)?).await? {
|
||||||
res.push(term_id);
|
res.push(term_id);
|
||||||
} else {
|
} else {
|
||||||
missing = false;
|
missing = false;
|
||||||
|
@ -80,7 +80,7 @@ impl Analyzer {
|
||||||
for tks in &inputs {
|
for tks in &inputs {
|
||||||
for tk in tks.list() {
|
for tk in tks.list() {
|
||||||
dl += 1;
|
dl += 1;
|
||||||
let s = tks.get_token_string(tk);
|
let s = tks.get_token_string(tk)?;
|
||||||
match tf.entry(s) {
|
match tf.entry(s) {
|
||||||
Entry::Vacant(e) => {
|
Entry::Vacant(e) => {
|
||||||
e.insert(1);
|
e.insert(1);
|
||||||
|
@ -117,7 +117,7 @@ impl Analyzer {
|
||||||
for (i, tks) in inputs.iter().enumerate() {
|
for (i, tks) in inputs.iter().enumerate() {
|
||||||
for tk in tks.list() {
|
for tk in tks.list() {
|
||||||
dl += 1;
|
dl += 1;
|
||||||
let s = tks.get_token_string(tk);
|
let s = tks.get_token_string(tk)?;
|
||||||
let o = tk.new_offset(i as u32);
|
let o = tk.new_offset(i as u32);
|
||||||
match tfos.entry(s) {
|
match tfos.entry(s) {
|
||||||
Entry::Vacant(e) => {
|
Entry::Vacant(e) => {
|
||||||
|
@ -141,33 +141,34 @@ impl Analyzer {
|
||||||
|
|
||||||
fn analyze_content(&self, field_content: &Array, tks: &mut Vec<Tokens>) -> Result<(), Error> {
|
fn analyze_content(&self, field_content: &Array, tks: &mut Vec<Tokens>) -> Result<(), Error> {
|
||||||
for v in &field_content.0 {
|
for v in &field_content.0 {
|
||||||
self.analyze_value(v, tks);
|
self.analyze_value(v, tks)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn analyze_value(&self, val: &Value, tks: &mut Vec<Tokens>) {
|
fn analyze_value(&self, val: &Value, tks: &mut Vec<Tokens>) -> Result<(), Error> {
|
||||||
match val {
|
match val {
|
||||||
Value::Strand(s) => tks.push(self.analyze(s.0.clone())),
|
Value::Strand(s) => tks.push(self.analyze(s.0.clone())?),
|
||||||
Value::Number(n) => tks.push(self.analyze(n.to_string())),
|
Value::Number(n) => tks.push(self.analyze(n.to_string())?),
|
||||||
Value::Bool(b) => tks.push(self.analyze(b.to_string())),
|
Value::Bool(b) => tks.push(self.analyze(b.to_string())?),
|
||||||
Value::Array(a) => {
|
Value::Array(a) => {
|
||||||
for v in &a.0 {
|
for v in &a.0 {
|
||||||
self.analyze_value(v, tks);
|
self.analyze_value(v, tks)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
};
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn analyze(&self, input: String) -> Tokens {
|
fn analyze(&self, input: String) -> Result<Tokens, Error> {
|
||||||
if let Some(t) = &self.t {
|
if let Some(t) = &self.t {
|
||||||
if !input.is_empty() {
|
if !input.is_empty() {
|
||||||
let t = Tokenizer::tokenize(t, input);
|
let t = Tokenizer::tokenize(t, input);
|
||||||
return Filter::apply_filters(t, &self.f);
|
return Filter::apply_filters(t, &self.f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Tokens::new(input)
|
Ok(Tokens::new(input))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,10 +181,10 @@ mod tests {
|
||||||
let (_, az) = analyzer(def).unwrap();
|
let (_, az) = analyzer(def).unwrap();
|
||||||
let a: Analyzer = az.into();
|
let a: Analyzer = az.into();
|
||||||
|
|
||||||
let tokens = a.analyze(input.to_string());
|
let tokens = a.analyze(input.to_string()).unwrap();
|
||||||
let mut res = vec![];
|
let mut res = vec![];
|
||||||
for t in tokens.list() {
|
for t in tokens.list() {
|
||||||
res.push(tokens.get_token_string(t));
|
res.push(tokens.get_token_string(t).unwrap());
|
||||||
}
|
}
|
||||||
assert_eq!(&res, expected);
|
assert_eq!(&res, expected);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
use crate::err::Error;
|
||||||
use crate::idx::ft::analyzer::filter::{Filter, FilterResult, Term};
|
use crate::idx::ft::analyzer::filter::{Filter, FilterResult, Term};
|
||||||
use crate::idx::ft::offsets::{Offset, Position};
|
use crate::idx::ft::offsets::{Offset, Position};
|
||||||
use crate::sql::tokenizer::Tokenizer as SqlTokenizer;
|
use crate::sql::tokenizer::Tokenizer as SqlTokenizer;
|
||||||
|
@ -17,18 +18,18 @@ impl Tokens {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn get_token_string<'a>(&'a self, t: &'a Token) -> &str {
|
pub(super) fn get_token_string<'a>(&'a self, t: &'a Token) -> Result<&str, Error> {
|
||||||
t.get_str(&self.i)
|
t.get_str(&self.i)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn filter(self, f: &Filter) -> Tokens {
|
pub(super) fn filter(self, f: &Filter) -> Result<Tokens, Error> {
|
||||||
let mut tks = Vec::new();
|
let mut tks = Vec::new();
|
||||||
let mut res = vec![];
|
let mut res = vec![];
|
||||||
for t in self.t {
|
for t in self.t {
|
||||||
if t.is_empty() {
|
if t.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let c = t.get_str(&self.i);
|
let c = t.get_str(&self.i)?;
|
||||||
let r = f.apply_filter(c);
|
let r = f.apply_filter(c);
|
||||||
res.push((t, r));
|
res.push((t, r));
|
||||||
}
|
}
|
||||||
|
@ -55,10 +56,10 @@ impl Tokens {
|
||||||
FilterResult::Ignore => {}
|
FilterResult::Ignore => {}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
Tokens {
|
Ok(Tokens {
|
||||||
i: self.i,
|
i: self.i,
|
||||||
t: tks,
|
t: tks,
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn list(&self) -> &Vec<Token> {
|
pub(super) fn list(&self) -> &Vec<Token> {
|
||||||
|
@ -68,36 +69,86 @@ impl Tokens {
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialOrd, PartialEq, Eq, Ord, Hash)]
|
#[derive(Clone, Debug, PartialOrd, PartialEq, Eq, Ord, Hash)]
|
||||||
pub(super) enum Token {
|
pub(super) enum Token {
|
||||||
Ref(Position, Position),
|
Ref {
|
||||||
String(Position, Position, String),
|
chars: (Position, Position),
|
||||||
|
bytes: (Position, Position),
|
||||||
|
},
|
||||||
|
String {
|
||||||
|
chars: (Position, Position),
|
||||||
|
bytes: (Position, Position),
|
||||||
|
term: String,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
fn new_token(&self, t: String) -> Self {
|
fn new_token(&self, term: String) -> Self {
|
||||||
match self {
|
match self {
|
||||||
Token::Ref(s, e) => Token::String(*s, *e, t),
|
Token::Ref {
|
||||||
Token::String(s, e, _) => Token::String(*s, *e, t),
|
chars,
|
||||||
|
bytes,
|
||||||
|
} => Token::String {
|
||||||
|
chars: *chars,
|
||||||
|
bytes: *bytes,
|
||||||
|
term,
|
||||||
|
},
|
||||||
|
Token::String {
|
||||||
|
chars,
|
||||||
|
bytes,
|
||||||
|
..
|
||||||
|
} => Token::String {
|
||||||
|
chars: *chars,
|
||||||
|
bytes: *bytes,
|
||||||
|
term,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn new_offset(&self, i: u32) -> Offset {
|
pub(super) fn new_offset(&self, i: u32) -> Offset {
|
||||||
match self {
|
match self {
|
||||||
Token::Ref(s, e) => Offset::new(i, *s, *e),
|
Token::Ref {
|
||||||
Token::String(s, e, _) => Offset::new(i, *s, *e),
|
chars,
|
||||||
|
..
|
||||||
|
} => Offset::new(i, chars.0, chars.1),
|
||||||
|
Token::String {
|
||||||
|
chars,
|
||||||
|
..
|
||||||
|
} => Offset::new(i, chars.0, chars.1),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_empty(&self) -> bool {
|
fn is_empty(&self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
Token::Ref(start, end) => start == end,
|
Token::Ref {
|
||||||
Token::String(_, _, s) => s.is_empty(),
|
chars,
|
||||||
|
..
|
||||||
|
} => chars.0 == chars.1,
|
||||||
|
Token::String {
|
||||||
|
term,
|
||||||
|
..
|
||||||
|
} => term.is_empty(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn get_str<'a>(&'a self, i: &'a str) -> &str {
|
pub(super) fn get_str<'a>(&'a self, i: &'a str) -> Result<&str, Error> {
|
||||||
match self {
|
match self {
|
||||||
Token::Ref(s, e) => &i[(*s as usize)..(*e as usize)],
|
Token::Ref {
|
||||||
Token::String(_, _, s) => s,
|
bytes,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
let s = bytes.0 as usize;
|
||||||
|
let e = bytes.1 as usize;
|
||||||
|
let l = i.len();
|
||||||
|
if s >= l || e > l {
|
||||||
|
return Err(Error::AnalyzerError(format!(
|
||||||
|
"Unable to extract the token. The offset position ({s},{e}) is out of range ({l})."
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
Ok(&i[(bytes.0 as usize)..(bytes.1 as usize)])
|
||||||
|
}
|
||||||
|
Token::String {
|
||||||
|
term,
|
||||||
|
..
|
||||||
|
} => Ok(term),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -129,28 +180,40 @@ impl Tokenizer {
|
||||||
|
|
||||||
pub(super) fn tokenize(t: &[SqlTokenizer], i: String) -> Tokens {
|
pub(super) fn tokenize(t: &[SqlTokenizer], i: String) -> Tokens {
|
||||||
let mut w = Tokenizer::new(t);
|
let mut w = Tokenizer::new(t);
|
||||||
let mut last_pos = 0;
|
let mut last_char_pos = 0;
|
||||||
let mut current_pos = 0;
|
let mut last_byte_pos = 0;
|
||||||
|
let mut current_char_pos = 0;
|
||||||
|
let mut current_byte_pos = 0;
|
||||||
let mut t = Vec::new();
|
let mut t = Vec::new();
|
||||||
for c in i.chars() {
|
for c in i.chars() {
|
||||||
|
let char_len = c.len_utf8() as Position;
|
||||||
let is_valid = Self::is_valid(c);
|
let is_valid = Self::is_valid(c);
|
||||||
let should_split = w.should_split(c);
|
let should_split = w.should_split(c);
|
||||||
if should_split || !is_valid {
|
if should_split || !is_valid {
|
||||||
// The last pos may be more advanced due to the is_valid process
|
// The last pos may be more advanced due to the is_valid process
|
||||||
if last_pos < current_pos {
|
if last_char_pos < current_char_pos {
|
||||||
t.push(Token::Ref(last_pos, current_pos));
|
t.push(Token::Ref {
|
||||||
|
chars: (last_char_pos, current_char_pos),
|
||||||
|
bytes: (last_byte_pos, current_byte_pos),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
last_pos = current_pos;
|
last_char_pos = current_char_pos;
|
||||||
|
last_byte_pos = current_byte_pos;
|
||||||
// If the character is not valid for indexing (space, control...)
|
// If the character is not valid for indexing (space, control...)
|
||||||
// Then we increase the last position to the next character
|
// Then we increase the last position to the next character
|
||||||
if !is_valid {
|
if !is_valid {
|
||||||
last_pos += c.len_utf8() as Position;
|
last_char_pos += 1;
|
||||||
|
last_byte_pos += char_len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
current_pos += c.len_utf8() as Position;
|
current_char_pos += 1;
|
||||||
|
current_byte_pos += char_len;
|
||||||
}
|
}
|
||||||
if current_pos != last_pos {
|
if current_char_pos != last_char_pos {
|
||||||
t.push(Token::Ref(last_pos, current_pos));
|
t.push(Token::Ref {
|
||||||
|
chars: (last_char_pos, current_char_pos),
|
||||||
|
bytes: (last_byte_pos, current_byte_pos),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
Tokens {
|
Tokens {
|
||||||
i,
|
i,
|
||||||
|
|
|
@ -32,17 +32,17 @@ impl Highlighter {
|
||||||
self.offseter.highlight(os);
|
self.offseter.highlight(os);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract(val: Value, vals: &mut Vec<Option<String>>) {
|
fn extract(val: Value, vals: &mut Vec<String>) {
|
||||||
match val {
|
match val {
|
||||||
Value::Strand(s) => vals.push(Some(s.0)),
|
Value::Strand(s) => vals.push(s.0),
|
||||||
Value::Number(n) => vals.push(Some(n.to_string())),
|
Value::Number(n) => vals.push(n.to_string()),
|
||||||
Value::Bool(b) => vals.push(Some(b.to_string())),
|
Value::Bool(b) => vals.push(b.to_string()),
|
||||||
Value::Array(a) => {
|
Value::Array(a) => {
|
||||||
for v in a.0 {
|
for v in a.0 {
|
||||||
Self::extract(v, vals);
|
Self::extract(v, vals);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => vals.push(None),
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -60,24 +60,35 @@ impl TryFrom<Highlighter> for Value {
|
||||||
}
|
}
|
||||||
let mut res = Vec::with_capacity(vals.len());
|
let mut res = Vec::with_capacity(vals.len());
|
||||||
for (idx, val) in vals.into_iter().enumerate() {
|
for (idx, val) in vals.into_iter().enumerate() {
|
||||||
let idx = idx as u32;
|
if let Some(m) = hl.offseter.offsets.get(&(idx as u32)) {
|
||||||
if let Some(v) = val {
|
let mut v: Vec<char> = val.chars().collect();
|
||||||
if let Some(m) = hl.offseter.offsets.get(&idx) {
|
let mut l = v.len();
|
||||||
let mut v: Vec<char> = v.chars().collect();
|
|
||||||
let mut d = 0;
|
let mut d = 0;
|
||||||
for (s, e) in m {
|
|
||||||
let p = (*s as usize) + d;
|
// We use a closure to append the prefix and the suffix
|
||||||
v.splice(p..p, hl.prefix.clone());
|
let mut append = |s: u32, ix: &Vec<char>| -> Result<(), Error> {
|
||||||
d += hl.prefix.len();
|
let p = (s as usize) + d;
|
||||||
let p = (*e as usize) + d;
|
if p > l {
|
||||||
v.splice(p..p, hl.suffix.clone());
|
return Err(Error::HighlightError(format!(
|
||||||
d += hl.suffix.len();
|
"position overflow: {s} - len: {l}"
|
||||||
|
)));
|
||||||
}
|
}
|
||||||
|
v.splice(p..p, ix.clone());
|
||||||
|
let xl = ix.len();
|
||||||
|
d += xl;
|
||||||
|
l += xl;
|
||||||
|
Ok(())
|
||||||
|
};
|
||||||
|
|
||||||
|
for (s, e) in m {
|
||||||
|
append(*s, &hl.prefix)?;
|
||||||
|
append(*e, &hl.suffix)?;
|
||||||
|
}
|
||||||
|
|
||||||
let s: String = v.iter().collect();
|
let s: String = v.iter().collect();
|
||||||
res.push(Value::from(s));
|
res.push(Value::from(s));
|
||||||
} else {
|
} else {
|
||||||
res.push(Value::from(v));
|
res.push(Value::from(val));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(match res.len() {
|
Ok(match res.len() {
|
||||||
|
|
|
@ -132,23 +132,8 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_offset_records() {
|
fn test_offset_records() {
|
||||||
let o = OffsetRecords(vec![
|
let o =
|
||||||
Offset {
|
OffsetRecords(vec![Offset::new(0, 1, 2), Offset::new(0, 11, 22), Offset::new(1, 3, 4)]);
|
||||||
index: 0,
|
|
||||||
start: 1,
|
|
||||||
end: 2,
|
|
||||||
},
|
|
||||||
Offset {
|
|
||||||
index: 0,
|
|
||||||
start: 11,
|
|
||||||
end: 22,
|
|
||||||
},
|
|
||||||
Offset {
|
|
||||||
index: 1,
|
|
||||||
start: 3,
|
|
||||||
end: 4,
|
|
||||||
},
|
|
||||||
]);
|
|
||||||
let v: Val = o.clone().try_into().unwrap();
|
let v: Val = o.clone().try_into().unwrap();
|
||||||
let o2 = v.try_into().unwrap();
|
let o2 = v.try_into().unwrap();
|
||||||
assert_eq!(o, o2)
|
assert_eq!(o, o2)
|
||||||
|
|
|
@ -95,10 +95,10 @@ async fn select_where_matches_without_using_index_iterator() -> Result<(), Error
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn select_where_matches_using_index_and_arrays() -> Result<(), Error> {
|
async fn select_where_matches_using_index_and_arrays() -> Result<(), Error> {
|
||||||
let sql = r"
|
let sql = r"
|
||||||
CREATE blog:1 SET content = ['Hello World!', 'Be Bop', 'Foo Bar'];
|
CREATE blog:1 SET content = ['Hello World!', 'Be Bop', 'Foo Bãr'];
|
||||||
DEFINE ANALYZER simple TOKENIZERS blank,class;
|
DEFINE ANALYZER simple TOKENIZERS blank,class;
|
||||||
DEFINE INDEX blog_content ON blog FIELDS content SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
|
DEFINE INDEX blog_content ON blog FIELDS content SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
|
||||||
SELECT id, search::highlight('<em>', '</em>', 1) AS content FROM blog WHERE content @1@ 'Hello Bar' EXPLAIN;
|
SELECT id, search::highlight('<em>', '</em>', 1) AS content FROM blog WHERE content @1@ 'Hello Bãr' EXPLAIN;
|
||||||
";
|
";
|
||||||
let dbs = Datastore::new("memory").await?;
|
let dbs = Datastore::new("memory").await?;
|
||||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
let ses = Session::for_kv().with_ns("test").with_db("test");
|
||||||
|
@ -116,7 +116,7 @@ async fn select_where_matches_using_index_and_arrays() -> Result<(), Error> {
|
||||||
content: [
|
content: [
|
||||||
'<em>Hello</em> World!',
|
'<em>Hello</em> World!',
|
||||||
'Be Bop',
|
'Be Bop',
|
||||||
'Foo <em>Bar</em>'
|
'Foo <em>Bãr</em>'
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -127,7 +127,7 @@ async fn select_where_matches_using_index_and_arrays() -> Result<(), Error> {
|
||||||
plan: {
|
plan: {
|
||||||
index: 'blog_content',
|
index: 'blog_content',
|
||||||
operator: '@1@',
|
operator: '@1@',
|
||||||
value: 'Hello Bar'
|
value: 'Hello Bãr'
|
||||||
},
|
},
|
||||||
table: 'blog',
|
table: 'blog',
|
||||||
},
|
},
|
||||||
|
@ -144,11 +144,11 @@ async fn select_where_matches_using_index_and_arrays() -> Result<(), Error> {
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn select_where_matches_using_index_offsets() -> Result<(), Error> {
|
async fn select_where_matches_using_index_offsets() -> Result<(), Error> {
|
||||||
let sql = r"
|
let sql = r"
|
||||||
CREATE blog:1 SET title = 'Blog title!', content = ['Hello World!', 'Be Bop', 'Foo Bar'];
|
CREATE blog:1 SET title = 'Blog title!', content = ['Hello World!', 'Be Bop', 'Foo Bãr'];
|
||||||
DEFINE ANALYZER simple TOKENIZERS blank,class;
|
DEFINE ANALYZER simple TOKENIZERS blank,class;
|
||||||
DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
|
DEFINE INDEX blog_title ON blog FIELDS title SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
|
||||||
DEFINE INDEX blog_content ON blog FIELDS content SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
|
DEFINE INDEX blog_content ON blog FIELDS content SEARCH ANALYZER simple BM25(1.2,0.75) HIGHLIGHTS;
|
||||||
SELECT id, search::offsets(0) AS title, search::offsets(1) AS content FROM blog WHERE title @0@ 'title' AND content @1@ 'Hello Bar' EXPLAIN;
|
SELECT id, search::offsets(0) AS title, search::offsets(1) AS content FROM blog WHERE title @0@ 'title' AND content @1@ 'Hello Bãr' EXPLAIN;
|
||||||
";
|
";
|
||||||
let dbs = Datastore::new("memory").await?;
|
let dbs = Datastore::new("memory").await?;
|
||||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
let ses = Session::for_kv().with_ns("test").with_db("test");
|
||||||
|
@ -179,7 +179,7 @@ async fn select_where_matches_using_index_offsets() -> Result<(), Error> {
|
||||||
plan: {
|
plan: {
|
||||||
index: 'blog_content',
|
index: 'blog_content',
|
||||||
operator: '@1@',
|
operator: '@1@',
|
||||||
value: 'Hello Bar'
|
value: 'Hello Bãr'
|
||||||
},
|
},
|
||||||
table: 'blog',
|
table: 'blog',
|
||||||
},
|
},
|
||||||
|
|
Loading…
Reference in a new issue