2023-10-26 21:33:06 +00:00
|
|
|
use criterion::measurement::WallTime;
|
|
|
|
use criterion::{criterion_group, criterion_main, BenchmarkGroup, Criterion, Throughput};
|
|
|
|
use futures::executor::block_on;
|
2024-05-24 13:45:21 +00:00
|
|
|
use rand::rngs::StdRng;
|
|
|
|
use rand::{Rng, SeedableRng};
|
2024-04-30 18:09:54 +00:00
|
|
|
use reblessive::TreeStack;
|
2024-05-24 13:45:21 +00:00
|
|
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
|
|
use std::sync::Arc;
|
2023-10-26 21:33:06 +00:00
|
|
|
use surrealdb::kvs::Datastore;
|
|
|
|
use surrealdb::kvs::LockType::Optimistic;
|
|
|
|
use surrealdb::kvs::TransactionType::{Read, Write};
|
2024-05-28 10:43:45 +00:00
|
|
|
use surrealdb_core::ctx::Context;
|
2024-05-24 13:45:21 +00:00
|
|
|
use surrealdb_core::idx::planner::checker::MTreeConditionChecker;
|
|
|
|
use surrealdb_core::idx::trees::mtree::MTreeIndex;
|
|
|
|
use surrealdb_core::idx::IndexKeyBase;
|
|
|
|
use surrealdb_core::kvs::{Transaction, TransactionType};
|
|
|
|
use surrealdb_core::sql::index::{Distance, MTreeParams, VectorType};
|
|
|
|
use surrealdb_core::sql::{Id, Number, Thing, Value};
|
|
|
|
use tokio::runtime::{Builder, Runtime};
|
|
|
|
use tokio::task;
|
2023-10-26 21:33:06 +00:00
|
|
|
|
|
|
|
fn bench_index_mtree_dim_3(c: &mut Criterion) {
|
2024-07-20 08:27:14 +00:00
|
|
|
bench_index_mtree(c, 250, 2500, 3, 100);
|
2023-12-13 13:37:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn bench_index_mtree_dim_3_full_cache(c: &mut Criterion) {
|
2024-07-20 08:27:14 +00:00
|
|
|
bench_index_mtree(c, 250, 2500, 3, 0);
|
2023-10-26 21:33:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn bench_index_mtree_dim_50(c: &mut Criterion) {
|
2024-07-20 08:27:14 +00:00
|
|
|
bench_index_mtree(c, 100, 1000, 50, 100);
|
2023-12-13 13:37:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn bench_index_mtree_dim_50_full_cache(c: &mut Criterion) {
|
2024-07-20 08:27:14 +00:00
|
|
|
bench_index_mtree(c, 100, 1000, 50, 0);
|
2023-10-26 21:33:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn bench_index_mtree_dim_300(c: &mut Criterion) {
|
2024-07-20 08:27:14 +00:00
|
|
|
bench_index_mtree(c, 50, 500, 300, 100);
|
2023-12-13 13:37:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn bench_index_mtree_dim_300_full_cache(c: &mut Criterion) {
|
2024-07-20 08:27:14 +00:00
|
|
|
bench_index_mtree(c, 50, 500, 300, 0);
|
2023-10-26 21:33:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn bench_index_mtree_dim_2048(c: &mut Criterion) {
|
2024-07-20 08:27:14 +00:00
|
|
|
bench_index_mtree(c, 10, 100, 2048, 100);
|
2023-12-13 13:37:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn bench_index_mtree_dim_2048_full_cache(c: &mut Criterion) {
|
2024-07-20 08:27:14 +00:00
|
|
|
bench_index_mtree(c, 10, 100, 2048, 0);
|
2024-05-24 13:45:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
async fn mtree_index(
|
|
|
|
ds: &Datastore,
|
2024-07-17 22:44:05 +00:00
|
|
|
tx: &Transaction,
|
2024-05-24 13:45:21 +00:00
|
|
|
dimension: usize,
|
|
|
|
cache_size: usize,
|
|
|
|
tt: TransactionType,
|
|
|
|
) -> MTreeIndex {
|
|
|
|
let p = MTreeParams::new(
|
|
|
|
dimension as u16,
|
|
|
|
Distance::Euclidean,
|
|
|
|
VectorType::F64,
|
|
|
|
40,
|
|
|
|
100,
|
|
|
|
cache_size as u32,
|
|
|
|
cache_size as u32,
|
|
|
|
);
|
|
|
|
MTreeIndex::new(ds.index_store(), tx, IndexKeyBase::default(), &p, tt).await.unwrap()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn runtime() -> Runtime {
|
|
|
|
Builder::new_multi_thread().worker_threads(4).enable_all().build().unwrap()
|
2023-10-26 21:33:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn bench_index_mtree(
|
|
|
|
c: &mut Criterion,
|
|
|
|
debug_samples_len: usize,
|
|
|
|
release_samples_len: usize,
|
|
|
|
vector_dimension: usize,
|
2023-12-13 13:37:24 +00:00
|
|
|
cache_size: usize,
|
2023-10-26 21:33:06 +00:00
|
|
|
) {
|
|
|
|
let samples_len = if cfg!(debug_assertions) {
|
|
|
|
debug_samples_len // Debug is slow
|
|
|
|
} else {
|
|
|
|
release_samples_len // Release is fast
|
|
|
|
};
|
|
|
|
|
|
|
|
// Both benchmark groups are sharing the same datastore
|
|
|
|
let ds = block_on(Datastore::new("memory")).unwrap();
|
|
|
|
|
|
|
|
// Indexing benchmark group
|
|
|
|
{
|
2024-05-24 13:45:21 +00:00
|
|
|
let mut group = get_group(c, "index_mtree_insert", samples_len);
|
2023-12-13 13:37:24 +00:00
|
|
|
let id = format!("len_{}_dim_{}_cache_{}", samples_len, vector_dimension, cache_size);
|
2023-10-26 21:33:06 +00:00
|
|
|
group.bench_function(id, |b| {
|
2024-05-24 13:45:21 +00:00
|
|
|
b.to_async(runtime())
|
2023-12-13 13:37:24 +00:00
|
|
|
.iter(|| insert_objects(&ds, samples_len, vector_dimension, cache_size));
|
2023-10-26 21:33:06 +00:00
|
|
|
});
|
|
|
|
group.finish();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Knn lookup benchmark group
|
|
|
|
{
|
2024-05-24 13:45:21 +00:00
|
|
|
let mut group = get_group(c, "index_mtree_lookup", samples_len);
|
2023-10-26 21:33:06 +00:00
|
|
|
for knn in [1, 10] {
|
2023-12-13 13:37:24 +00:00
|
|
|
let id = format!(
|
|
|
|
"knn_{}_len_{}_dim_{}_cache_{}",
|
|
|
|
knn, samples_len, vector_dimension, cache_size
|
|
|
|
);
|
2023-10-26 21:33:06 +00:00
|
|
|
group.bench_function(id, |b| {
|
2024-05-24 13:45:21 +00:00
|
|
|
b.to_async(runtime()).iter(|| {
|
|
|
|
knn_lookup_objects(&ds, samples_len, vector_dimension, cache_size, knn)
|
2023-12-13 13:37:24 +00:00
|
|
|
});
|
2023-10-26 21:33:06 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
group.finish();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn get_group<'a>(
|
|
|
|
c: &'a mut Criterion,
|
|
|
|
group_name: &str,
|
|
|
|
samples_len: usize,
|
|
|
|
) -> BenchmarkGroup<'a, WallTime> {
|
|
|
|
let mut group = c.benchmark_group(group_name);
|
|
|
|
group.throughput(Throughput::Elements(samples_len as u64));
|
|
|
|
group.sample_size(10);
|
|
|
|
group
|
|
|
|
}
|
2024-05-24 13:45:21 +00:00
|
|
|
fn random_object(rng: &mut StdRng, vector_size: usize) -> Vec<Number> {
|
2023-10-26 21:33:06 +00:00
|
|
|
let mut vec = Vec::with_capacity(vector_size);
|
|
|
|
for _ in 0..vector_size {
|
2024-05-24 13:45:21 +00:00
|
|
|
vec.push(rng.gen_range(-1.0..=1.0).into());
|
2023-10-26 21:33:06 +00:00
|
|
|
}
|
2024-05-24 13:45:21 +00:00
|
|
|
vec
|
2023-10-26 21:33:06 +00:00
|
|
|
}
|
|
|
|
|
2023-12-13 13:37:24 +00:00
|
|
|
async fn insert_objects(
|
|
|
|
ds: &Datastore,
|
|
|
|
samples_size: usize,
|
|
|
|
vector_size: usize,
|
|
|
|
cache_size: usize,
|
|
|
|
) {
|
2024-07-17 22:44:05 +00:00
|
|
|
let tx = ds.transaction(Write, Optimistic).await.unwrap();
|
|
|
|
let mut mt = mtree_index(ds, &tx, vector_size, cache_size, Write).await;
|
2024-04-30 18:09:54 +00:00
|
|
|
let mut stack = TreeStack::new();
|
2024-05-24 13:45:21 +00:00
|
|
|
let mut rng = StdRng::from_entropy();
|
2024-04-30 18:09:54 +00:00
|
|
|
stack
|
|
|
|
.enter(|stk| async {
|
|
|
|
for i in 0..samples_size {
|
2024-05-24 13:45:21 +00:00
|
|
|
let vector: Vec<Number> = random_object(&mut rng, vector_size);
|
2024-04-30 18:09:54 +00:00
|
|
|
// Insert the sample
|
2024-05-24 13:45:21 +00:00
|
|
|
let rid = Thing::from(("test", Id::from(i as i64)));
|
2024-07-17 22:44:05 +00:00
|
|
|
mt.index_document(stk, &tx, &rid, &vec![Value::from(vector)]).await.unwrap();
|
2024-04-30 18:09:54 +00:00
|
|
|
}
|
|
|
|
})
|
|
|
|
.finish()
|
|
|
|
.await;
|
2024-07-17 22:44:05 +00:00
|
|
|
mt.finish(&tx).await.unwrap();
|
2023-10-26 21:33:06 +00:00
|
|
|
tx.commit().await.unwrap();
|
|
|
|
}
|
|
|
|
|
2023-12-13 13:37:24 +00:00
|
|
|
async fn knn_lookup_objects(
|
|
|
|
ds: &Datastore,
|
|
|
|
samples_size: usize,
|
|
|
|
vector_size: usize,
|
|
|
|
cache_size: usize,
|
2024-05-24 13:45:21 +00:00
|
|
|
knn: usize,
|
2023-12-13 13:37:24 +00:00
|
|
|
) {
|
2024-07-17 22:44:05 +00:00
|
|
|
let txn = ds.transaction(Read, Optimistic).await.unwrap();
|
|
|
|
let mt = Arc::new(mtree_index(ds, &txn, vector_size, cache_size, Read).await);
|
|
|
|
let ctx = Arc::new(Context::from(txn));
|
2024-05-24 13:45:21 +00:00
|
|
|
|
|
|
|
let counter = Arc::new(AtomicUsize::new(0));
|
|
|
|
|
|
|
|
let mut consumers = Vec::with_capacity(4);
|
|
|
|
for _ in 0..4 {
|
2024-05-28 10:43:45 +00:00
|
|
|
let (ctx, mt, counter) = (ctx.clone(), mt.clone(), counter.clone());
|
2024-05-24 13:45:21 +00:00
|
|
|
let c = task::spawn(async move {
|
|
|
|
let mut rng = StdRng::from_entropy();
|
|
|
|
while counter.fetch_add(1, Ordering::Relaxed) < samples_size {
|
|
|
|
let object = random_object(&mut rng, vector_size);
|
2024-05-28 10:43:45 +00:00
|
|
|
knn_lookup_object(mt.as_ref(), &ctx, object, knn).await;
|
2024-05-24 13:45:21 +00:00
|
|
|
}
|
|
|
|
});
|
|
|
|
consumers.push(c);
|
2023-10-26 21:33:06 +00:00
|
|
|
}
|
2024-05-24 13:45:21 +00:00
|
|
|
for c in consumers {
|
|
|
|
c.await.unwrap();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-28 10:43:45 +00:00
|
|
|
async fn knn_lookup_object(mt: &MTreeIndex, ctx: &Context<'_>, object: Vec<Number>, knn: usize) {
|
2024-05-24 13:45:21 +00:00
|
|
|
let mut stack = TreeStack::new();
|
|
|
|
stack
|
|
|
|
.enter(|stk| async {
|
2024-07-05 09:34:43 +00:00
|
|
|
let chk = MTreeConditionChecker::new(ctx);
|
2024-05-28 10:43:45 +00:00
|
|
|
let r = mt.knn_search(stk, ctx, &object, knn, chk).await.unwrap();
|
2024-05-24 13:45:21 +00:00
|
|
|
assert_eq!(r.len(), knn);
|
|
|
|
})
|
|
|
|
.finish()
|
|
|
|
.await;
|
2023-10-26 21:33:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
criterion_group!(
|
|
|
|
benches,
|
|
|
|
bench_index_mtree_dim_3,
|
2023-12-13 13:37:24 +00:00
|
|
|
bench_index_mtree_dim_3_full_cache,
|
2023-10-26 21:33:06 +00:00
|
|
|
bench_index_mtree_dim_50,
|
2023-12-13 13:37:24 +00:00
|
|
|
bench_index_mtree_dim_50_full_cache,
|
2023-10-26 21:33:06 +00:00
|
|
|
bench_index_mtree_dim_300,
|
2023-12-13 13:37:24 +00:00
|
|
|
bench_index_mtree_dim_300_full_cache,
|
2023-10-26 21:33:06 +00:00
|
|
|
bench_index_mtree_dim_2048,
|
2023-12-13 13:37:24 +00:00
|
|
|
bench_index_mtree_dim_2048_full_cache
|
2023-10-26 21:33:06 +00:00
|
|
|
);
|
|
|
|
criterion_main!(benches);
|