[Feat] Async indexing: appending queue is persistent (#4622)
This commit is contained in:
parent
a5abc66e06
commit
e5bf40ae01
5 changed files with 159 additions and 32 deletions
|
@ -73,7 +73,7 @@ impl Document {
|
|||
) -> Result<(), Error> {
|
||||
#[cfg(not(target_arch = "wasm32"))]
|
||||
let (o, n) = if let Some(ib) = ctx.get_index_builder() {
|
||||
match ib.consume(ix, o, n, rid).await? {
|
||||
match ib.consume(ctx, ix, o, n, rid).await? {
|
||||
// The index builder consumed the value, which means it is currently building the index asynchronously,
|
||||
// we don't index the document and let the index builder do it later.
|
||||
ConsumeResult::Enqueued => return Ok(()),
|
||||
|
|
|
@ -134,6 +134,8 @@ pub enum Category {
|
|||
IndexHnswThings,
|
||||
/// crate::key::index::hv /*{ns}*{db}*{tb}+{ix}!hv{vec}
|
||||
IndexHnswVec,
|
||||
/// crate::key::index::ia /*{ns}*{db}*{tb}+{ix}!ia{id}
|
||||
IndexAppendings,
|
||||
/// crate::key::index /*{ns}*{db}*{tb}+{ix}*{fd}{id}
|
||||
Index,
|
||||
///
|
||||
|
@ -209,6 +211,7 @@ impl Display for Category {
|
|||
Self::IndexHnswDocIds => "IndexHnswDocIds",
|
||||
Self::IndexHnswThings => "IndexHnswThings",
|
||||
Self::IndexHnswVec => "IndexHnswVec",
|
||||
Self::IndexAppendings => "IndexAppendings",
|
||||
Self::Index => "Index",
|
||||
Self::ChangeFeed => "ChangeFeed",
|
||||
Self::Thing => "Thing",
|
||||
|
|
62
core/src/key/index/ia.rs
Normal file
62
core/src/key/index/ia.rs
Normal file
|
@ -0,0 +1,62 @@
|
|||
//! Store appended records for concurrent index building
|
||||
use derive::Key;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::Debug;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Key)]
|
||||
#[non_exhaustive]
|
||||
pub struct Ia<'a> {
|
||||
__: u8,
|
||||
_a: u8,
|
||||
pub ns: &'a str,
|
||||
_b: u8,
|
||||
pub db: &'a str,
|
||||
_c: u8,
|
||||
pub tb: &'a str,
|
||||
_d: u8,
|
||||
pub ix: &'a str,
|
||||
_e: u8,
|
||||
_f: u8,
|
||||
_g: u8,
|
||||
pub i: u32,
|
||||
}
|
||||
|
||||
impl<'a> Ia<'a> {
|
||||
pub fn new(ns: &'a str, db: &'a str, tb: &'a str, ix: &'a str, i: u32) -> Self {
|
||||
Self {
|
||||
__: b'/',
|
||||
_a: b'*',
|
||||
ns,
|
||||
_b: b'*',
|
||||
db,
|
||||
_c: b'*',
|
||||
tb,
|
||||
_d: b'+',
|
||||
ix,
|
||||
_e: b'!',
|
||||
_f: b'i',
|
||||
_g: b'a',
|
||||
i,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
||||
#[test]
|
||||
fn key() {
|
||||
use super::*;
|
||||
let val = Ia::new("testns", "testdb", "testtb", "testix", 1);
|
||||
let enc = Ia::encode(&val).unwrap();
|
||||
assert_eq!(
|
||||
enc,
|
||||
b"/*testns\0*testdb\0*testtb\0+testix\0!ia\x00\x00\x00\x01",
|
||||
"{}",
|
||||
String::from_utf8_lossy(&enc)
|
||||
);
|
||||
|
||||
let dec = Ia::decode(&enc).unwrap();
|
||||
assert_eq!(val, dec);
|
||||
}
|
||||
}
|
|
@ -17,6 +17,7 @@ pub mod hi;
|
|||
pub mod hl;
|
||||
pub mod hs;
|
||||
pub mod hv;
|
||||
pub mod ia;
|
||||
pub mod vm;
|
||||
|
||||
use crate::key::category::Categorise;
|
||||
|
|
|
@ -4,6 +4,7 @@ use crate::dbs::Options;
|
|||
use crate::doc::{CursorDoc, Document};
|
||||
use crate::err::Error;
|
||||
use crate::idx::index::IndexOperation;
|
||||
use crate::key::index::ia::Ia;
|
||||
use crate::key::thing;
|
||||
use crate::kvs::ds::TransactionFactory;
|
||||
use crate::kvs::LockType::Optimistic;
|
||||
|
@ -12,8 +13,11 @@ use crate::sql::statements::DefineIndexStatement;
|
|||
use crate::sql::{Id, Object, Thing, Value};
|
||||
use dashmap::mapref::entry::Entry;
|
||||
use dashmap::DashMap;
|
||||
use derive::Store;
|
||||
use reblessive::TreeStack;
|
||||
use std::collections::VecDeque;
|
||||
use revision::revisioned;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio::task;
|
||||
|
@ -117,6 +121,7 @@ impl IndexBuilder {
|
|||
|
||||
pub(crate) async fn consume(
|
||||
&self,
|
||||
ctx: &Context,
|
||||
ix: &DefineIndexStatement,
|
||||
old_values: Option<Vec<Value>>,
|
||||
new_values: Option<Vec<Value>>,
|
||||
|
@ -124,7 +129,7 @@ impl IndexBuilder {
|
|||
) -> Result<ConsumeResult, Error> {
|
||||
if let Some(r) = self.indexes.get(ix) {
|
||||
let (b, _) = r.value();
|
||||
return Ok(b.maybe_consume(old_values, new_values, rid).await);
|
||||
return b.maybe_consume(ctx, old_values, new_values, rid).await;
|
||||
}
|
||||
Ok(ConsumeResult::Ignored(old_values, new_values))
|
||||
}
|
||||
|
@ -138,12 +143,50 @@ impl IndexBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
#[revisioned(revision = 1)]
|
||||
#[derive(Serialize, Deserialize, Store, Debug)]
|
||||
#[non_exhaustive]
|
||||
struct Appending {
|
||||
old_values: Option<Vec<Value>>,
|
||||
new_values: Option<Vec<Value>>,
|
||||
id: Id,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct QueueSequences {
|
||||
/// The index of the next appending to be indexed
|
||||
to_index: u32,
|
||||
/// The index of the next appending to be added
|
||||
next: u32,
|
||||
}
|
||||
|
||||
impl QueueSequences {
|
||||
fn is_empty(&self) -> bool {
|
||||
self.to_index == self.next
|
||||
}
|
||||
|
||||
fn add_update(&mut self) -> u32 {
|
||||
let i = self.next;
|
||||
self.next += 1;
|
||||
i
|
||||
}
|
||||
|
||||
fn clear(&mut self) {
|
||||
self.to_index = 0;
|
||||
self.next = 0;
|
||||
}
|
||||
|
||||
fn set_to_index(&mut self, i: u32) {
|
||||
self.to_index = i;
|
||||
}
|
||||
|
||||
fn next_indexing_batch(&self, page: u32) -> Range<u32> {
|
||||
let s = self.to_index;
|
||||
let e = (s + page).min(self.next);
|
||||
s..e
|
||||
}
|
||||
}
|
||||
|
||||
struct Building {
|
||||
ctx: Context,
|
||||
opt: Options,
|
||||
|
@ -152,7 +195,7 @@ struct Building {
|
|||
tb: String,
|
||||
status: Arc<Mutex<BuildingStatus>>,
|
||||
// Should be stored on a temporary table
|
||||
appended: Arc<Mutex<VecDeque<Appending>>>,
|
||||
queue: Arc<Mutex<QueueSequences>>,
|
||||
}
|
||||
|
||||
impl Building {
|
||||
|
@ -169,7 +212,7 @@ impl Building {
|
|||
tb: ix.what.to_string(),
|
||||
ix,
|
||||
status: Arc::new(Mutex::new(BuildingStatus::Started)),
|
||||
appended: Default::default(),
|
||||
queue: Default::default(),
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -183,25 +226,34 @@ impl Building {
|
|||
|
||||
async fn maybe_consume(
|
||||
&self,
|
||||
ctx: &Context,
|
||||
old_values: Option<Vec<Value>>,
|
||||
new_values: Option<Vec<Value>>,
|
||||
rid: &Thing,
|
||||
) -> ConsumeResult {
|
||||
let mut a = self.appended.lock().await;
|
||||
) -> Result<ConsumeResult, Error> {
|
||||
let mut queue = self.queue.lock().await;
|
||||
// Now that the queue is locked, we have the possibility to assess if the asynchronous build is done.
|
||||
if a.is_empty() {
|
||||
if queue.is_empty() {
|
||||
// If the appending queue is empty and the index is built...
|
||||
if self.status.lock().await.is_built() {
|
||||
// ... we return the values back, so the document can be updated the usual way
|
||||
return ConsumeResult::Ignored(old_values, new_values);
|
||||
return Ok(ConsumeResult::Ignored(old_values, new_values));
|
||||
}
|
||||
}
|
||||
a.push_back(Appending {
|
||||
let a = Appending {
|
||||
old_values,
|
||||
new_values,
|
||||
id: rid.id.clone(),
|
||||
});
|
||||
ConsumeResult::Enqueued
|
||||
};
|
||||
let ia = self.new_ia_key(queue.add_update())?;
|
||||
ctx.tx().set(ia, a, None).await?;
|
||||
Ok(ConsumeResult::Enqueued)
|
||||
}
|
||||
|
||||
fn new_ia_key(&self, i: u32) -> Result<Ia, Error> {
|
||||
let ns = self.opt.ns()?;
|
||||
let db = self.opt.db()?;
|
||||
Ok(Ia::new(ns, db, &self.ix.what, &self.ix.name, i))
|
||||
}
|
||||
|
||||
async fn new_read_tx(&self) -> Result<Transaction, Error> {
|
||||
|
@ -259,35 +311,44 @@ impl Building {
|
|||
// Second iteration, we index/remove any records that has been added or removed since the initial indexing
|
||||
self.set_status(BuildingStatus::UpdatesIndexing(0)).await;
|
||||
loop {
|
||||
let mut batch = self.appended.lock().await;
|
||||
if batch.is_empty() {
|
||||
let mut queue = self.queue.lock().await;
|
||||
if queue.is_empty() {
|
||||
// If the batch is empty, we are done.
|
||||
// Due to the lock on self.appended, we know that no external process can add an item to the queue.
|
||||
self.set_status(BuildingStatus::Built).await;
|
||||
// This is here to be sure the lock on back is not released early
|
||||
batch.clear();
|
||||
queue.clear();
|
||||
break;
|
||||
}
|
||||
let fetch = (*NORMAL_FETCH_SIZE as usize).min(batch.len());
|
||||
let drain = batch.drain(0..fetch);
|
||||
let range = queue.next_indexing_batch(*NORMAL_FETCH_SIZE);
|
||||
if range.is_empty() {
|
||||
continue;
|
||||
}
|
||||
// Create a new context with a write transaction
|
||||
let ctx = self.new_write_tx_ctx().await?;
|
||||
|
||||
for a in drain {
|
||||
let rid = Thing::from((self.tb.clone(), a.id));
|
||||
let mut io = IndexOperation::new(
|
||||
&ctx,
|
||||
&self.opt,
|
||||
&self.ix,
|
||||
a.old_values,
|
||||
a.new_values,
|
||||
&rid,
|
||||
);
|
||||
stack.enter(|stk| io.compute(stk)).finish().await?;
|
||||
count += 1;
|
||||
self.set_status(BuildingStatus::UpdatesIndexing(count)).await;
|
||||
let tx = ctx.tx();
|
||||
let next_to_index = range.end;
|
||||
for i in range {
|
||||
let ia = self.new_ia_key(i)?;
|
||||
if let Some(v) = tx.get(ia.clone(), None).await? {
|
||||
tx.del(ia).await?;
|
||||
let a: Appending = v.into();
|
||||
let rid = Thing::from((self.tb.clone(), a.id));
|
||||
let mut io = IndexOperation::new(
|
||||
&ctx,
|
||||
&self.opt,
|
||||
&self.ix,
|
||||
a.old_values,
|
||||
a.new_values,
|
||||
&rid,
|
||||
);
|
||||
stack.enter(|stk| io.compute(stk)).finish().await?;
|
||||
count += 1;
|
||||
self.set_status(BuildingStatus::UpdatesIndexing(count)).await;
|
||||
}
|
||||
}
|
||||
ctx.tx().commit().await?;
|
||||
tx.commit().await?;
|
||||
queue.set_to_index(next_to_index);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue