search::highlight option to highlight only the searched characters (edgengram, ngram) (#3719)

2024-03-19 17:47:50 +00:00 · 2024-03-19 17:47:50 +00:00 · 45c296a270
commit 45c296a270
parent 00bc9db496
10 changed files with 300 additions and 125 deletions
--- a/core/src/fnc/args.rs
+++ b/core/src/fnc/args.rs
@ -1,6 +1,7 @@
 use crate::err::Error;
 use crate::sql::value::Value;
 use crate::sql::{Array, Bytes, Datetime, Duration, Kind, Number, Object, Regex, Strand, Thing};
+use std::vec::IntoIter;

 /// Implemented by types that are commonly used, in a certain way, as arguments.
 pub trait FromArg: Sized {
@ -186,6 +187,7 @@ impl_tuple!(0,);
 impl_tuple!(1, A);
 impl_tuple!(2, A, B);
 impl_tuple!(3, A, B, C);
+impl_tuple!(4, A, B, C, D);

 // Some functions take a single, optional argument, or no arguments at all.
 impl<A: FromArg> FromArgs for (Option<A>,) {
@ -240,6 +242,34 @@ impl<A: FromArg, B: FromArg> FromArgs for (A, Option<B>) {
 	}
 }

+#[inline]
+fn get_arg<T: FromArg, E: Fn() -> Error>(
+	name: &str,
+	pos: usize,
+	args: &mut IntoIter<Value>,
+	err: E,
+) -> Result<T, Error> {
+	T::from_arg(args.next().ok_or_else(err)?).map_err(|e| Error::InvalidArguments {
+		name: name.to_owned(),
+		message: format!("Argument {pos} was the wrong type. {e}"),
+	})
+}
+
+#[inline]
+fn get_opt_arg<T: FromArg>(
+	name: &str,
+	pos: usize,
+	args: &mut IntoIter<Value>,
+) -> Result<Option<T>, Error> {
+	Ok(match args.next() {
+		Some(v) => Some(T::from_arg(v).map_err(|e| Error::InvalidArguments {
+			name: name.to_owned(),
+			message: format!("Argument {pos} was the wrong type. {e}"),
+		})?),
+		None => None,
+	})
+}
+
 // Some functions take 2 or 3 arguments, so the third argument is optional.
 impl<A: FromArg, B: FromArg, C: FromArg> FromArgs for (A, B, Option<C>) {
 	fn from_args(name: &str, args: Vec<Value>) -> Result<Self, Error> {
@ -249,24 +279,11 @@ impl<A: FromArg, B: FromArg, C: FromArg> FromArgs for (A, B, Option<C>) {
 		};
 		// Process the function arguments
 		let mut args = args.into_iter();
-		// Process the first function argument
-		let a = A::from_arg(args.next().ok_or_else(err)?).map_err(|e| Error::InvalidArguments {
-			name: name.to_owned(),
-			message: format!("Argument 1 was the wrong type. {e}"),
-		})?;
-		// Process the second function argument
-		let b = B::from_arg(args.next().ok_or_else(err)?).map_err(|e| Error::InvalidArguments {
-			name: name.to_owned(),
-			message: format!("Argument 2 was the wrong type. {e}"),
-		})?;
-		// Process the third function argument
-		let c = match args.next() {
-			Some(c) => Some(C::from_arg(c).map_err(|e| Error::InvalidArguments {
-				name: name.to_owned(),
-				message: format!("Argument 3 was the wrong type. {e}"),
-			})?),
-			None => None,
-		};
+
+		let a: A = get_arg(name, 1, &mut args, err)?;
+		let b: B = get_arg(name, 2, &mut args, err)?;
+		let c: Option<C> = get_opt_arg(name, 3, &mut args)?;
+
 		// Process additional function arguments
 		if args.next().is_some() {
 			// Too many arguments
@ -276,6 +293,30 @@ impl<A: FromArg, B: FromArg, C: FromArg> FromArgs for (A, B, Option<C>) {
 	}
 }

+// Some functions take 3 or 4 arguments, so the fourth argument is optional.
+impl<A: FromArg, B: FromArg, C: FromArg, D: FromArg> FromArgs for (A, B, C, Option<D>) {
+	fn from_args(name: &str, args: Vec<Value>) -> Result<Self, Error> {
+		let err = || Error::InvalidArguments {
+			name: name.to_owned(),
+			message: String::from("Expected 3 or 4 arguments."),
+		};
+		// Process the function arguments
+		let mut args = args.into_iter();
+
+		let a: A = get_arg(name, 1, &mut args, err)?;
+		let b: B = get_arg(name, 2, &mut args, err)?;
+		let c: C = get_arg(name, 3, &mut args, err)?;
+		let d: Option<D> = get_opt_arg(name, 4, &mut args)?;
+
+		// Process additional function arguments
+		if args.next().is_some() {
+			// Too many arguments
+			return Err(err());
+		}
+		Ok((a, b, c, d))
+	}
+}
+
 // Some functions take 0, 1, or 2 arguments, so both arguments are optional.
 // It is safe to assume that, if the first argument is None, the second argument will also be None.
 impl<A: FromArg, B: FromArg> FromArgs for (Option<A>, Option<B>) {
@ -286,22 +327,10 @@ impl<A: FromArg, B: FromArg> FromArgs for (Option<A>, Option<B>) {
 		};
 		// Process the function arguments
 		let mut args = args.into_iter();
-		// Process the first function argument
-		let a = match args.next() {
-			Some(a) => Some(A::from_arg(a).map_err(|e| Error::InvalidArguments {
-				name: name.to_owned(),
-				message: format!("Argument 1 was the wrong type. {e}"),
-			})?),
-			None => None,
-		};
-		// Process the second function argument
-		let b = match args.next() {
-			Some(b) => Some(B::from_arg(b).map_err(|e| Error::InvalidArguments {
-				name: name.to_owned(),
-				message: format!("Argument 2 was the wrong type. {e}"),
-			})?),
-			None => None,
-		};
+
+		let a: Option<A> = get_opt_arg(name, 1, &mut args)?;
+		let b: Option<B> = get_opt_arg(name, 2, &mut args)?;
+
 		// Process additional function arguments
 		if args.next().is_some() {
 			// Too many arguments
@ -320,22 +349,10 @@ impl<A: FromArg, B: FromArg> FromArgs for (Option<(A, B)>,) {
 		};
 		// Process the function arguments
 		let mut args = args.into_iter();
-		// Process the first function argument
-		let a = match args.next() {
-			Some(a) => Some(A::from_arg(a).map_err(|e| Error::InvalidArguments {
-				name: name.to_owned(),
-				message: format!("Argument 1 was the wrong type. {e}"),
-			})?),
-			None => None,
-		};
-		// Process the second function argument
-		let b = match args.next() {
-			Some(b) => Some(B::from_arg(b).map_err(|e| Error::InvalidArguments {
-				name: name.to_owned(),
-				message: format!("Argument 2 was the wrong type. {e}"),
-			})?),
-			None => None,
-		};
+
+		let a: Option<A> = get_opt_arg(name, 1, &mut args)?;
+		let b: Option<B> = get_opt_arg(name, 2, &mut args)?;
+
 		// Process additional function arguments
 		if a.is_some() != b.is_some() || args.next().is_some() {
 			// One argument, or too many arguments
@ -355,27 +372,11 @@ impl<A: FromArg, B: FromArg, C: FromArg> FromArgs for (A, Option<B>, Option<C>)
 		};
 		// Process the function arguments
 		let mut args = args.into_iter();
-		// Process the first function argument
-		let a = A::from_arg(args.next().ok_or_else(err)?).map_err(|e| Error::InvalidArguments {
-			name: name.to_owned(),
-			message: format!("Argument 1 was the wrong type. {e}"),
-		})?;
-		// Process the second function argument
-		let b = match args.next() {
-			Some(b) => Some(B::from_arg(b).map_err(|e| Error::InvalidArguments {
-				name: name.to_owned(),
-				message: format!("Argument 2 was the wrong type. {e}"),
-			})?),
-			None => None,
-		};
-		// Process the third function argument
-		let c = match args.next() {
-			Some(c) => Some(C::from_arg(c).map_err(|e| Error::InvalidArguments {
-				name: name.to_owned(),
-				message: format!("Argument 3 was the wrong type. {e}"),
-			})?),
-			None => None,
-		};
+
+		let a: A = get_arg(name, 1, &mut args, err)?;
+		let b: Option<B> = get_opt_arg(name, 2, &mut args)?;
+		let c: Option<C> = get_opt_arg(name, 3, &mut args)?;
+
 		// Process additional function arguments
 		if args.next().is_some() {
 			// Too many arguments
--- a/core/src/fnc/search.rs
+++ b/core/src/fnc/search.rs
@ -51,10 +51,11 @@ pub async fn score(

 pub async fn highlight(
 	(ctx, txn, doc): (&Context<'_>, Option<&Transaction>, Option<&CursorDoc<'_>>),
-	(prefix, suffix, match_ref): (Value, Value, Value),
+	(prefix, suffix, match_ref, partial): (Value, Value, Value, Option<Value>),
 ) -> Result<Value, Error> {
 	if let Some((txn, exe, doc, thg)) = get_execution_context(ctx, txn, doc) {
-		exe.highlight(txn, thg, prefix, suffix, &match_ref, doc.doc.as_ref()).await
+		let partial = partial.map(|p| p.convert_to_bool()).unwrap_or(Ok(false))?;
+		exe.highlight(txn, thg, prefix, suffix, match_ref, partial, doc.doc.as_ref()).await
 	} else {
 		Ok(Value::None)
 	}
@ -62,10 +63,11 @@ pub async fn highlight(

 pub async fn offsets(
 	(ctx, txn, doc): (&Context<'_>, Option<&Transaction>, Option<&CursorDoc<'_>>),
-	(match_ref,): (Value,),
+	(match_ref, partial): (Value, Option<Value>),
 ) -> Result<Value, Error> {
 	if let Some((txn, exe, _, thg)) = get_execution_context(ctx, txn, doc) {
-		exe.offsets(txn, thg, &match_ref).await
+		let partial = partial.map(|p| p.convert_to_bool()).unwrap_or(Ok(false))?;
+		exe.offsets(txn, thg, match_ref, partial).await
 	} else {
 		Ok(Value::None)
 	}
--- a/core/src/idx/ft/analyzer/mod.rs
+++ b/core/src/idx/ft/analyzer/mod.rs
@ -41,7 +41,7 @@ impl Analyzer {
 		txn: &Transaction,
 		t: &Terms,
 		query_string: String,
-	) -> Result<Vec<Option<TermId>>, Error> {
+	) -> Result<Vec<Option<(TermId, u32)>>, Error> {
 		let tokens = self.generate_tokens(ctx, opt, txn, query_string).await?;
 		// We first collect every unique terms
 		// as it can contains duplicates
@ -54,7 +54,7 @@ impl Analyzer {
 		let mut tx = txn.lock().await;
 		for term in terms {
 			let opt_term_id = t.get_term_id(&mut tx, tokens.get_token_string(term)?).await?;
-			res.push(opt_term_id);
+			res.push(opt_term_id.map(|tid| (tid, term.get_char_len())));
 		}
 		Ok(res)
 	}
@ -233,6 +233,7 @@ mod tests {
 	use super::Analyzer;
 	use crate::ctx::Context;
 	use crate::dbs::{Options, Transaction};
+	use crate::idx::ft::analyzer::tokenizer::{Token, Tokens};
 	use crate::kvs::{Datastore, LockType, TransactionType};
 	use crate::{
 		sql::{statements::DefineStatement, Statement},
@ -241,7 +242,7 @@ mod tests {
 	use futures::lock::Mutex;
 	use std::sync::Arc;

-	pub(super) async fn test_analyzer(def: &str, input: &str, expected: &[&str]) {
+	async fn get_analyzer_tokens(def: &str, input: &str) -> Tokens {
 		let ds = Datastore::new("memory").await.unwrap();
 		let tx = ds.transaction(TransactionType::Read, LockType::Optimistic).await.unwrap();
 		let txn: Transaction = Arc::new(Mutex::new(tx));
@ -251,11 +252,15 @@ mod tests {
 			panic!()
 		};
 		let a: Analyzer = az.into();
-
 		let tokens = a
 			.generate_tokens(&Context::default(), &Options::default(), &txn, input.to_string())
 			.await
 			.unwrap();
+		tokens
+	}
+
+	pub(super) async fn test_analyzer(def: &str, input: &str, expected: &[&str]) {
+		let tokens = get_analyzer_tokens(def, input).await;
 		let mut res = vec![];
 		for t in tokens.list() {
 			res.push(tokens.get_token_string(t).unwrap());
--- a/core/src/idx/ft/analyzer/tokenizer.rs
+++ b/core/src/idx/ft/analyzer/tokenizer.rs
@ -84,26 +84,31 @@ impl TryFrom<Tokens> for Value {
 #[derive(Clone, Debug, PartialOrd, PartialEq, Eq, Ord, Hash)]
 pub(super) enum Token {
 	Ref {
-		chars: (Position, Position),
+		chars: (Position, Position, Position),
 		bytes: (Position, Position),
+		len: u32,
 	},
 	String {
-		chars: (Position, Position),
+		chars: (Position, Position, Position),
 		bytes: (Position, Position),
 		term: String,
+		len: u32,
 	},
 }

 impl Token {
 	fn new_token(&self, term: String) -> Self {
+		let len = term.chars().count() as u32;
 		match self {
 			Token::Ref {
 				chars,
 				bytes,
+				..
 			} => Token::String {
 				chars: *chars,
 				bytes: *bytes,
 				term,
+				len,
 			},
 			Token::String {
 				chars,
@ -113,6 +118,7 @@ impl Token {
 				chars: *chars,
 				bytes: *bytes,
 				term,
+				len,
 			},
 		}
 	}
@ -122,11 +128,11 @@ impl Token {
 			Token::Ref {
 				chars,
 				..
-			} => Offset::new(i, chars.0, chars.1),
+			} => Offset::new(i, chars.0, chars.1, chars.2),
 			Token::String {
 				chars,
 				..
-			} => Offset::new(i, chars.0, chars.1),
+			} => Offset::new(i, chars.0, chars.1, chars.2),
 		}
 	}

@ -135,7 +141,7 @@ impl Token {
 			Token::Ref {
 				chars,
 				..
-			} => chars.0 == chars.1,
+			} => chars.0 == chars.2,
 			Token::String {
 				term,
 				..
@ -143,6 +149,19 @@ impl Token {
 		}
 	}

+	pub(super) fn get_char_len(&self) -> u32 {
+		match self {
+			Token::Ref {
+				len,
+				..
+			} => *len,
+			Token::String {
+				len,
+				..
+			} => *len,
+		}
+	}
+
 	pub(super) fn get_str<'a>(&'a self, i: &'a str) -> Result<&str, Error> {
 		match self {
 			Token::Ref {
@ -207,8 +226,9 @@ impl Tokenizer {
 				// The last pos may be more advanced due to the is_valid process
 				if last_char_pos < current_char_pos {
 					t.push(Token::Ref {
-						chars: (last_char_pos, current_char_pos),
+						chars: (last_char_pos, last_char_pos, current_char_pos),
 						bytes: (last_byte_pos, current_byte_pos),
+						len: current_char_pos - last_char_pos,
 					});
 				}
 				last_char_pos = current_char_pos;
@ -225,8 +245,9 @@ impl Tokenizer {
 		}
 		if current_char_pos != last_char_pos {
 			t.push(Token::Ref {
-				chars: (last_char_pos, current_char_pos),
+				chars: (last_char_pos, last_char_pos, current_char_pos),
 				bytes: (last_byte_pos, current_byte_pos),
+				len: current_char_pos - last_char_pos,
 			});
 		}
 		Tokens {
--- a/core/src/idx/ft/highlighter.rs
+++ b/core/src/idx/ft/highlighter.rs
@ -15,7 +15,13 @@ pub(super) struct Highlighter {
 }

 impl Highlighter {
-	pub(super) fn new(prefix: Value, suffix: Value, idiom: &Idiom, doc: &Value) -> Self {
+	pub(super) fn new(
+		prefix: Value,
+		suffix: Value,
+		partial: bool,
+		idiom: &Idiom,
+		doc: &Value,
+	) -> Self {
 		let prefix = prefix.to_raw_string().chars().collect();
 		let suffix = suffix.to_raw_string().chars().collect();
 		// Extract the fields we want to highlight
@ -24,12 +30,12 @@ impl Highlighter {
 			fields,
 			prefix,
 			suffix,
-			offseter: Offseter::default(),
+			offseter: Offseter::new(partial),
 		}
 	}

-	pub(super) fn highlight(&mut self, os: Vec<Offset>) {
-		self.offseter.highlight(os);
+	pub(super) fn highlight(&mut self, term_len: u32, os: Vec<Offset>) {
+		self.offseter.highlight(term_len, os);
 	}

 	fn extract(val: Value, vals: &mut Vec<String>) {
@ -104,27 +110,41 @@ impl TryFrom<Highlighter> for Value {
 	}
 }

-#[derive(Default)]
 pub(super) struct Offseter {
+	partial: bool,
 	offsets: HashMap<u32, BTreeMap<Position, Position>>,
 }

 impl Offseter {
-	pub(super) fn highlight(&mut self, os: Vec<Offset>) {
+	pub(super) fn new(partial: bool) -> Self {
+		Self {
+			partial,
+			offsets: Default::default(),
+		}
+	}
+
+	pub(super) fn highlight(&mut self, term_len: u32, os: Vec<Offset>) {
 		for o in os {
+			let (start, end) = if self.partial {
+				let start = o.gen_start.min(o.end);
+				let end = (start + term_len).min(o.end);
+				(start, end)
+			} else {
+				(o.start, o.end)
+			};
 			match self.offsets.entry(o.index) {
-				HEntry::Occupied(mut e) => match e.get_mut().entry(o.start) {
+				HEntry::Occupied(mut e) => match e.get_mut().entry(start) {
 					BEntry::Vacant(e) => {
-						e.insert(o.end);
+						e.insert(end);
 					}
 					BEntry::Occupied(mut e) => {
 						if o.end.gt(e.get()) {
-							e.insert(o.end);
+							e.insert(end);
 						}
 					}
 				},
 				HEntry::Vacant(e) => {
-					e.insert(BTreeMap::from([(o.start, o.end)]));
+					e.insert(BTreeMap::from([(start, end)]));
 				}
 			}
 		}
--- a/core/src/idx/ft/mod.rs
+++ b/core/src/idx/ft/mod.rs
@ -332,7 +332,7 @@ impl FtIndex {
 		opt: &Options,
 		txn: &Transaction,
 		query_string: String,
-	) -> Result<Vec<Option<TermId>>, Error> {
+	) -> Result<Vec<Option<(TermId, u32)>>, Error> {
 		let t = self.terms.read().await;
 		let terms = self.analyzer.extract_terms(ctx, opt, txn, &t, query_string).await?;
 		Ok(terms)
@ -341,11 +341,11 @@ impl FtIndex {
 	pub(super) async fn get_terms_docs(
 		&self,
 		tx: &mut kvs::Transaction,
-		terms: &Vec<Option<TermId>>,
+		terms: &Vec<Option<(TermId, u32)>>,
 	) -> Result<Vec<Option<(TermId, RoaringTreemap)>>, Error> {
 		let mut terms_docs = Vec::with_capacity(terms.len());
-		for opt_term_id in terms {
-			if let Some(term_id) = opt_term_id {
+		for opt_term in terms {
+			if let Some((term_id, _)) = opt_term {
 				let docs = self.term_docs.get_docs(tx, *term_id).await?;
 				if let Some(docs) = docs {
 					terms_docs.push(Some((*term_id, docs)));
@ -402,19 +402,20 @@ impl FtIndex {
 		&self,
 		tx: &mut kvs::Transaction,
 		thg: &Thing,
-		terms: &[Option<TermId>],
+		terms: &[Option<(TermId, u32)>],
 		prefix: Value,
 		suffix: Value,
+		partial: bool,
 		idiom: &Idiom,
 		doc: &Value,
 	) -> Result<Value, Error> {
 		let doc_key: Key = thg.into();
 		if let Some(doc_id) = self.doc_ids.read().await.get_doc_id(tx, doc_key).await? {
-			let mut hl = Highlighter::new(prefix, suffix, idiom, doc);
-			for term_id in terms.iter().flatten() {
+			let mut hl = Highlighter::new(prefix, suffix, partial, idiom, doc);
+			for (term_id, term_len) in terms.iter().flatten() {
 				let o = self.offsets.get_offsets(tx, doc_id, *term_id).await?;
 				if let Some(o) = o {
-					hl.highlight(o.0);
+					hl.highlight(*term_len, o.0);
 				}
 			}
 			return hl.try_into();
@ -426,15 +427,16 @@ impl FtIndex {
 		&self,
 		tx: &mut kvs::Transaction,
 		thg: &Thing,
-		terms: &[Option<TermId>],
+		terms: &[Option<(TermId, u32)>],
+		partial: bool,
 	) -> Result<Value, Error> {
 		let doc_key: Key = thg.into();
 		if let Some(doc_id) = self.doc_ids.read().await.get_doc_id(tx, doc_key).await? {
-			let mut or = Offseter::default();
-			for term_id in terms.iter().flatten() {
+			let mut or = Offseter::new(partial);
+			for (term_id, term_len) in terms.iter().flatten() {
 				let o = self.offsets.get_offsets(tx, doc_id, *term_id).await?;
 				if let Some(o) = o {
-					or.highlight(o.0);
+					or.highlight(*term_len, o.0);
 				}
 			}
 			return or.try_into();
--- a/core/src/idx/ft/offsets.rs
+++ b/core/src/idx/ft/offsets.rs
@ -59,15 +59,20 @@ impl Offsets {
 #[derive(Clone, Debug, PartialEq)]
 pub(super) struct Offset {
 	pub(super) index: u32,
+	// Start position of the original term
 	pub(super) start: Position,
+	// Start position of the generated term
+	pub(super) gen_start: Position,
+	// End position of the original term
 	pub(super) end: Position,
 }

 impl Offset {
-	pub(super) fn new(index: u32, start: Position, end: Position) -> Self {
+	pub(super) fn new(index: u32, start: Position, gen_start: Position, end: Position) -> Self {
 		Self {
 			index,
 			start,
+			gen_start,
 			end,
 		}
 	}
@ -94,6 +99,7 @@ impl TryFrom<OffsetRecords> for Val {
 		// `starts` and `offsets` are likely to be ascending
 		for o in &offsets.0 {
 			decompressed.push(o.start);
+			decompressed.push(o.gen_start);
 			decompressed.push(o.end);
 		}
 		Ok(bincode::serialize(&decompressed)?)
@ -109,17 +115,25 @@ impl TryFrom<Val> for OffsetRecords {
 		}
 		let decompressed: Vec<u32> = bincode::deserialize(&val)?;
 		let mut iter = decompressed.iter();
-		let s = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(1)"))?;
-		let mut indexes = Vec::with_capacity(s as usize);
-		for _ in 0..s {
+		let n_offsets = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(1)"))?;
+		// <= v1.4 the Offset contains only two field: start and end.
+		// We check the number of integers. If there is only 3 per offset this is the old format.
+		let without_gen_start = n_offsets * 3 + 1 == (decompressed.len() as u32);
+		let mut indexes = Vec::with_capacity(n_offsets as usize);
+		for _ in 0..n_offsets {
 			let index = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(2)"))?;
 			indexes.push(index);
 		}
-		let mut res = Vec::with_capacity(s as usize);
+		let mut res = Vec::with_capacity(n_offsets as usize);
 		for index in indexes {
 			let start = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(3)"))?;
-			let end = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(4)"))?;
-			res.push(Offset::new(index, start, end));
+			let gen_start = if without_gen_start {
+				start
+			} else {
+				*iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(4)"))?
+			};
+			let end = *iter.next().ok_or(Error::CorruptedIndex("OffsetRecords::try_from(5)"))?;
+			res.push(Offset::new(index, start, gen_start, end));
 		}
 		Ok(OffsetRecords(res))
 	}
@ -132,10 +146,28 @@ mod tests {

 	#[test]
 	fn test_offset_records() {
-		let o =
-			OffsetRecords(vec![Offset::new(0, 1, 2), Offset::new(0, 11, 22), Offset::new(1, 3, 4)]);
+		let o = OffsetRecords(vec![
+			Offset::new(0, 1, 2, 3),
+			Offset::new(0, 11, 13, 22),
+			Offset::new(1, 1, 3, 4),
+		]);
 		let v: Val = o.clone().try_into().unwrap();
 		let o2 = v.try_into().unwrap();
 		assert_eq!(o, o2)
 	}
+
+	#[test]
+	fn test_migrate_v1_offset_records() {
+		let decompressed = vec![3u32, 0, 0, 1, 1, 3, 11, 22, 1, 4];
+		let v = bincode::serialize(&decompressed).unwrap();
+		let o: OffsetRecords = v.try_into().unwrap();
+		assert_eq!(
+			o,
+			OffsetRecords(vec![
+				Offset::new(0, 1, 1, 3),
+				Offset::new(0, 11, 11, 22),
+				Offset::new(1, 1, 1, 4),
+			])
+		)
+	}
 }
--- a/core/src/idx/planner/executor.rs
+++ b/core/src/idx/planner/executor.rs
@ -433,16 +433,18 @@ impl QueryExecutor {
 		None
 	}

+	#[allow(clippy::too_many_arguments)]
 	pub(crate) async fn highlight(
 		&self,
 		txn: &Transaction,
 		thg: &Thing,
 		prefix: Value,
 		suffix: Value,
-		match_ref: &Value,
+		match_ref: Value,
+		partial: bool,
 		doc: &Value,
 	) -> Result<Value, Error> {
-		if let Some((e, ft)) = self.get_ft_entry_and_index(match_ref) {
+		if let Some((e, ft)) = self.get_ft_entry_and_index(&match_ref) {
 			let mut run = txn.lock().await;
 			return ft
 				.highlight(
@ -451,6 +453,7 @@ impl QueryExecutor {
 					&e.0.terms,
 					prefix,
 					suffix,
+					partial,
 					e.0.index_option.id_ref(),
 					doc,
 				)
@ -463,11 +466,12 @@ impl QueryExecutor {
 		&self,
 		txn: &Transaction,
 		thg: &Thing,
-		match_ref: &Value,
+		match_ref: Value,
+		partial: bool,
 	) -> Result<Value, Error> {
-		if let Some((e, ft)) = self.get_ft_entry_and_index(match_ref) {
+		if let Some((e, ft)) = self.get_ft_entry_and_index(&match_ref) {
 			let mut run = txn.lock().await;
-			return ft.extract_offsets(&mut run, thg, &e.0.terms).await;
+			return ft.extract_offsets(&mut run, thg, &e.0.terms, partial).await;
 		}
 		Ok(Value::None)
 	}
@ -504,7 +508,7 @@ struct FtEntry(Arc<Inner>);
 struct Inner {
 	index_option: IndexOption,
 	doc_ids: Arc<RwLock<DocIds>>,
-	terms: Vec<Option<TermId>>,
+	terms: Vec<Option<(TermId, u32)>>,
 	terms_docs: TermsDocs,
 	scorer: Option<BM25Scorer>,
 }
--- a/lib/tests/matches.rs
+++ b/lib/tests/matches.rs
@ -190,6 +190,92 @@ async fn select_where_matches_using_index_and_arrays_with_parallel() -> Result<(
 	select_where_matches_using_index_and_arrays(true).await
 }

+#[tokio::test]
+async fn select_where_matches_partial_highlight() -> Result<(), Error> {
+	let sql = r"
+		CREATE blog:1 SET content = 'Hello World!';
+		DEFINE ANALYZER simple TOKENIZERS blank,class FILTERS lowercase,edgengram(2,100);
+		DEFINE INDEX blog_content ON blog FIELDS content SEARCH ANALYZER simple BM25 HIGHLIGHTS;
+		SELECT id, search::highlight('<em>', '</em>', 1) AS content FROM blog WHERE content @1@ 'he';
+		SELECT id, search::highlight('<em>', '</em>', 1, false) AS content FROM blog WHERE content @1@ 'he';
+		SELECT id, search::highlight('<em>', '</em>', 1, true) AS content FROM blog WHERE content @1@ 'he';
+		SELECT id, search::offsets(1) AS content FROM blog WHERE content @1@ 'he';
+		SELECT id, search::offsets(1, false) AS content FROM blog WHERE content @1@ 'he';
+		SELECT id, search::offsets(1, true) AS content FROM blog WHERE content @1@ 'he';
+	";
+	let dbs = new_ds().await?;
+	let ses = Session::owner().with_ns("test").with_db("test");
+	let res = &mut dbs.execute(&sql, &ses, None).await?;
+	assert_eq!(res.len(), 9);
+	//
+	for _ in 0..3 {
+		let _ = res.remove(0).result?;
+	}
+	//
+	for i in 0..2 {
+		let tmp = res.remove(0).result?;
+		let val = Value::parse(
+			"[
+			{
+				id: blog:1,
+				content: '<em>Hello</em> World!'
+			}
+		]",
+		);
+		assert_eq!(format!("{:#}", tmp), format!("{:#}", val), "{i}");
+	}
+	//
+	let tmp = res.remove(0).result?;
+	let val = Value::parse(
+		"[
+			{
+				id: blog:1,
+				content: '<em>He</em>llo World!'
+			}
+		]",
+	);
+	assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
+	//
+	for i in 0..2 {
+		let tmp = res.remove(0).result?;
+		let val = Value::parse(
+			"[
+					{
+						content: {
+							0: [
+								{
+									e: 5,
+									s: 0
+								}
+							]
+						},
+						id: blog:1
+					}
+				]",
+		);
+		assert_eq!(format!("{:#}", tmp), format!("{:#}", val), "{i}");
+	}
+	//
+	let tmp = res.remove(0).result?;
+	let val = Value::parse(
+		"[
+					{
+						content: {
+							0: [
+								{
+									e: 2,
+									s: 0
+								}
+							]
+						},
+						id: blog:1
+					}
+				]",
+	);
+	assert_eq!(format!("{:#}", tmp), format!("{:#}", val));
+	Ok(())
+}
+
 async fn select_where_matches_using_index_and_objects(parallel: bool) -> Result<(), Error> {
 	let p = if parallel {
 		"PARALLEL"
--- a/tests/database_upgrade.rs
+++ b/tests/database_upgrade.rs
@ -160,8 +160,10 @@ mod database_upgrade {
 	];

 	// Set of QUERY and RESULT to check for Full Text Search
-	const CHECK_FTS: [Check; 1] =
-		[("SELECT name FROM account WHERE name @@ 'Tobie'", Expected::One("{\"name\":\"Tobie\"}"))];
+	const CHECK_FTS: [Check; 1] = [(
+		"SELECT search::highlight('<em>','</em>', 1) AS name FROM account WHERE name @1@ 'Tobie'",
+		Expected::One("{\"name\":\"<em>Tobie</em>\"}"),
+	)];

 	// Set of DATA for VectorSearch and  Knn Operator checking
 	const DATA_MTREE: [&str; 4] = [