From c415d37a72de213aa0352e54df1e59f1bfb5b79b Mon Sep 17 00:00:00 2001 From: Emmanuel Keller Date: Sat, 20 Jul 2024 09:27:14 +0100 Subject: [PATCH] MTree/HNSW tests/benches work (#4387) --- core/src/ctx/context.rs | 3 +-- core/src/idx/trees/hnsw/mod.rs | 12 ++++++------ core/src/idx/trees/mtree.rs | 21 +++++++++++++-------- core/src/idx/trees/vector.rs | 16 ++++++++-------- lib/benches/hash_trie_btree.rs | 13 ++++++------- lib/benches/index_mtree.rs | 16 ++++++++-------- 6 files changed, 42 insertions(+), 39 deletions(-) diff --git a/core/src/ctx/context.rs b/core/src/ctx/context.rs index d0dabf76..bda3d6b9 100644 --- a/core/src/ctx/context.rs +++ b/core/src/ctx/context.rs @@ -255,8 +255,7 @@ impl<'a> Context<'a> { pub(crate) fn tx(&self) -> Arc { self.transaction - .as_ref() - .map(Arc::clone) + .clone() .unwrap_or_else(|| unreachable!("The context was not associated with a transaction")) } diff --git a/core/src/idx/trees/hnsw/mod.rs b/core/src/idx/trees/hnsw/mod.rs index f02f4d9d..e02060d5 100644 --- a/core/src/idx/trees/hnsw/mod.rs +++ b/core/src/idx/trees/hnsw/mod.rs @@ -678,9 +678,9 @@ mod tests { let p = new_params(20, VectorType::F32, Distance::Euclidean, 8, 100, false, false); test_recall( "hnsw-random-9000-20-euclidean.gz", - 3000, + 1000, "hnsw-random-5000-20-euclidean.gz", - 500, + 300, p, &[(10, 0.98), (40, 1.0)], ) @@ -692,9 +692,9 @@ mod tests { let p = new_params(20, VectorType::F32, Distance::Euclidean, 8, 100, false, true); test_recall( "hnsw-random-9000-20-euclidean.gz", - 3000, + 750, "hnsw-random-5000-20-euclidean.gz", - 500, + 200, p, &[(10, 0.98), (40, 1.0)], ) @@ -706,9 +706,9 @@ mod tests { let p = new_params(20, VectorType::F32, Distance::Euclidean, 8, 100, true, true); test_recall( "hnsw-random-9000-20-euclidean.gz", - 1000, + 500, "hnsw-random-5000-20-euclidean.gz", - 200, + 100, p, &[(10, 0.98), (40, 1.0)], ) diff --git a/core/src/idx/trees/mtree.rs b/core/src/idx/trees/mtree.rs index da94b986..183ff5a0 100644 --- a/core/src/idx/trees/mtree.rs +++ b/core/src/idx/trees/mtree.rs @@ -1886,7 +1886,7 @@ mod tests { stk, &[40], vt, - TestCollection::new(true, 1000, vt, 10, &Distance::Euclidean), + TestCollection::new(true, 500, vt, 5, &Distance::Euclidean), false, true, false, @@ -1910,7 +1910,7 @@ mod tests { stk, &[40], vt, - TestCollection::new(true, 1000, vt, 10, &Distance::Euclidean), + TestCollection::new(true, 500, vt, 5, &Distance::Euclidean), false, true, false, @@ -1934,7 +1934,7 @@ mod tests { stk, &[40], vt, - TestCollection::new(true, 1000, vt, 10, &Distance::Euclidean), + TestCollection::new(true, 500, vt, 5, &Distance::Euclidean), false, true, false, @@ -1961,13 +1961,18 @@ mod tests { VectorType::I32, VectorType::I16, ] { - for i in 0..30 { - // 10, 40 + for collection_size in [0, 1, 5, 10, 15, 20, 30, 40] { test_mtree_collection( stk, - &[3, 40], + &[3, 10, 40], vt, - TestCollection::new(false, i, vt, 1, &Distance::Euclidean), + TestCollection::new( + false, + collection_size, + vt, + 1, + &Distance::Euclidean, + ), true, true, true, @@ -2017,7 +2022,7 @@ mod tests { stk, &[40], vt, - TestCollection::new(false, 1000, vt, 10, &Distance::Euclidean), + TestCollection::new(false, 500, vt, 5, &Distance::Euclidean), false, true, false, diff --git a/core/src/idx/trees/vector.rs b/core/src/idx/trees/vector.rs index d06ffa18..625c44c6 100644 --- a/core/src/idx/trees/vector.rs +++ b/core/src/idx/trees/vector.rs @@ -579,41 +579,41 @@ mod tests { #[test] fn test_distance_chebyshev() { - test_distance_collection(Distance::Chebyshev, 2000, 1536); + test_distance_collection(Distance::Chebyshev, 100, 1536); test_distance(Distance::Chebyshev, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 1.0); } #[test] fn test_distance_cosine() { - test_distance_collection(Distance::Cosine, 2000, 1536); + test_distance_collection(Distance::Cosine, 100, 1536); test_distance(Distance::Cosine, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 0.007416666029069652); } #[test] fn test_distance_euclidean() { - test_distance_collection(Distance::Euclidean, 2000, 1536); + test_distance_collection(Distance::Euclidean, 100, 1536); test_distance(Distance::Euclidean, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 1.7320508075688772); } #[test] fn test_distance_hamming() { - test_distance_collection(Distance::Hamming, 2000, 1536); + test_distance_collection(Distance::Hamming, 100, 1536); test_distance(Distance::Hamming, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 3.0); } #[test] fn test_distance_jaccard() { - test_distance_collection(Distance::Jaccard, 1000, 768); + test_distance_collection(Distance::Jaccard, 100, 768); test_distance(Distance::Jaccard, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 0.5); } #[test] fn test_distance_manhattan() { - test_distance_collection(Distance::Manhattan, 2000, 1536); + test_distance_collection(Distance::Manhattan, 100, 1536); test_distance(Distance::Manhattan, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 3.0); } #[test] fn test_distance_minkowski() { - test_distance_collection(Distance::Minkowski(3.into()), 2000, 1536); + test_distance_collection(Distance::Minkowski(3.into()), 100, 1536); test_distance( Distance::Minkowski(3.into()), &[1.0, 2.0, 3.0], @@ -624,7 +624,7 @@ mod tests { #[test] fn test_distance_pearson() { - test_distance_collection(Distance::Pearson, 2000, 1536); + test_distance_collection(Distance::Pearson, 100, 1536); test_distance(Distance::Pearson, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 1.0); } } diff --git a/lib/benches/hash_trie_btree.rs b/lib/benches/hash_trie_btree.rs index 8ea63e52..5c33e723 100644 --- a/lib/benches/hash_trie_btree.rs +++ b/lib/benches/hash_trie_btree.rs @@ -4,7 +4,6 @@ use radix_trie::{Trie, TrieCommon, TrieKey}; use std::collections::{BTreeMap, HashMap}; use std::hash::Hash; use std::time::Duration; -// use surrealdb::key::table::ix; use surrealdb::sql::{value, Array, Id, Thing}; // Common use case: VectorSearch @@ -23,13 +22,13 @@ fn bench_hash_trie_btree_large_vector(c: &mut Criterion) { g.finish(); } -// TODO: @emmanuel-keller this is disabled because `ix` is now private -/*fn bench_hash_trie_btree_ix_key(c: &mut Criterion) { +fn bench_hash_trie_btree_ix_key(c: &mut Criterion) { const N: usize = 100_000; let mut samples = Vec::with_capacity(N); for i in 0..N { - let key = ix::new("test", "test", "test", &format!("test{i}")).encode().unwrap(); - samples.push((key, i)); + let mut key = b"/*test\0*test\0*test\0!ixtest".to_vec(); + key.append(&mut i.to_be_bytes().to_vec()); + samples.push((key.to_vec(), i)); } let mut g = new_group(c, "bench_hash_trie_btree_ix_key", N); @@ -37,7 +36,7 @@ fn bench_hash_trie_btree_large_vector(c: &mut Criterion) { bench_trie(&mut g, &samples); bench_btree(&mut g, &samples); g.finish(); -}*/ +} fn bench_hash_trie_btree_small_string(c: &mut Criterion) { const N: usize = 100_000; @@ -193,7 +192,7 @@ fn bench_btree_get(samples: &[(K, V)], map: &BTreeMap) { criterion_group!( benches, bench_hash_trie_btree_large_vector, - // bench_hash_trie_btree_ix_key, + bench_hash_trie_btree_ix_key, bench_hash_trie_btree_small_string, bench_hash_trie_btree_thing, bench_hash_trie_btree_value diff --git a/lib/benches/index_mtree.rs b/lib/benches/index_mtree.rs index 7dc9d0e3..354159e9 100644 --- a/lib/benches/index_mtree.rs +++ b/lib/benches/index_mtree.rs @@ -20,35 +20,35 @@ use tokio::runtime::{Builder, Runtime}; use tokio::task; fn bench_index_mtree_dim_3(c: &mut Criterion) { - bench_index_mtree(c, 250, 25_000, 3, 100); + bench_index_mtree(c, 250, 2500, 3, 100); } fn bench_index_mtree_dim_3_full_cache(c: &mut Criterion) { - bench_index_mtree(c, 250, 25_000, 3, 0); + bench_index_mtree(c, 250, 2500, 3, 0); } fn bench_index_mtree_dim_50(c: &mut Criterion) { - bench_index_mtree(c, 100, 10_000, 50, 100); + bench_index_mtree(c, 100, 1000, 50, 100); } fn bench_index_mtree_dim_50_full_cache(c: &mut Criterion) { - bench_index_mtree(c, 100, 10_000, 50, 0); + bench_index_mtree(c, 100, 1000, 50, 0); } fn bench_index_mtree_dim_300(c: &mut Criterion) { - bench_index_mtree(c, 50, 5_000, 300, 100); + bench_index_mtree(c, 50, 500, 300, 100); } fn bench_index_mtree_dim_300_full_cache(c: &mut Criterion) { - bench_index_mtree(c, 50, 5_000, 300, 0); + bench_index_mtree(c, 50, 500, 300, 0); } fn bench_index_mtree_dim_2048(c: &mut Criterion) { - bench_index_mtree(c, 10, 1_000, 2048, 100); + bench_index_mtree(c, 10, 100, 2048, 100); } fn bench_index_mtree_dim_2048_full_cache(c: &mut Criterion) { - bench_index_mtree(c, 10, 1_000, 2048, 0); + bench_index_mtree(c, 10, 100, 2048, 0); } async fn mtree_index(