MTree/HNSW tests/benches work (#4387)

This commit is contained in:
Emmanuel Keller 2024-07-20 09:27:14 +01:00 committed by GitHub
parent 70c682987c
commit c415d37a72
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 42 additions and 39 deletions

View file

@ -255,8 +255,7 @@ impl<'a> Context<'a> {
pub(crate) fn tx(&self) -> Arc<Transaction> {
self.transaction
.as_ref()
.map(Arc::clone)
.clone()
.unwrap_or_else(|| unreachable!("The context was not associated with a transaction"))
}

View file

@ -678,9 +678,9 @@ mod tests {
let p = new_params(20, VectorType::F32, Distance::Euclidean, 8, 100, false, false);
test_recall(
"hnsw-random-9000-20-euclidean.gz",
3000,
1000,
"hnsw-random-5000-20-euclidean.gz",
500,
300,
p,
&[(10, 0.98), (40, 1.0)],
)
@ -692,9 +692,9 @@ mod tests {
let p = new_params(20, VectorType::F32, Distance::Euclidean, 8, 100, false, true);
test_recall(
"hnsw-random-9000-20-euclidean.gz",
3000,
750,
"hnsw-random-5000-20-euclidean.gz",
500,
200,
p,
&[(10, 0.98), (40, 1.0)],
)
@ -706,9 +706,9 @@ mod tests {
let p = new_params(20, VectorType::F32, Distance::Euclidean, 8, 100, true, true);
test_recall(
"hnsw-random-9000-20-euclidean.gz",
1000,
500,
"hnsw-random-5000-20-euclidean.gz",
200,
100,
p,
&[(10, 0.98), (40, 1.0)],
)

View file

@ -1886,7 +1886,7 @@ mod tests {
stk,
&[40],
vt,
TestCollection::new(true, 1000, vt, 10, &Distance::Euclidean),
TestCollection::new(true, 500, vt, 5, &Distance::Euclidean),
false,
true,
false,
@ -1910,7 +1910,7 @@ mod tests {
stk,
&[40],
vt,
TestCollection::new(true, 1000, vt, 10, &Distance::Euclidean),
TestCollection::new(true, 500, vt, 5, &Distance::Euclidean),
false,
true,
false,
@ -1934,7 +1934,7 @@ mod tests {
stk,
&[40],
vt,
TestCollection::new(true, 1000, vt, 10, &Distance::Euclidean),
TestCollection::new(true, 500, vt, 5, &Distance::Euclidean),
false,
true,
false,
@ -1961,13 +1961,18 @@ mod tests {
VectorType::I32,
VectorType::I16,
] {
for i in 0..30 {
// 10, 40
for collection_size in [0, 1, 5, 10, 15, 20, 30, 40] {
test_mtree_collection(
stk,
&[3, 40],
&[3, 10, 40],
vt,
TestCollection::new(false, i, vt, 1, &Distance::Euclidean),
TestCollection::new(
false,
collection_size,
vt,
1,
&Distance::Euclidean,
),
true,
true,
true,
@ -2017,7 +2022,7 @@ mod tests {
stk,
&[40],
vt,
TestCollection::new(false, 1000, vt, 10, &Distance::Euclidean),
TestCollection::new(false, 500, vt, 5, &Distance::Euclidean),
false,
true,
false,

View file

@ -579,41 +579,41 @@ mod tests {
#[test]
fn test_distance_chebyshev() {
test_distance_collection(Distance::Chebyshev, 2000, 1536);
test_distance_collection(Distance::Chebyshev, 100, 1536);
test_distance(Distance::Chebyshev, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 1.0);
}
#[test]
fn test_distance_cosine() {
test_distance_collection(Distance::Cosine, 2000, 1536);
test_distance_collection(Distance::Cosine, 100, 1536);
test_distance(Distance::Cosine, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 0.007416666029069652);
}
#[test]
fn test_distance_euclidean() {
test_distance_collection(Distance::Euclidean, 2000, 1536);
test_distance_collection(Distance::Euclidean, 100, 1536);
test_distance(Distance::Euclidean, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 1.7320508075688772);
}
#[test]
fn test_distance_hamming() {
test_distance_collection(Distance::Hamming, 2000, 1536);
test_distance_collection(Distance::Hamming, 100, 1536);
test_distance(Distance::Hamming, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 3.0);
}
#[test]
fn test_distance_jaccard() {
test_distance_collection(Distance::Jaccard, 1000, 768);
test_distance_collection(Distance::Jaccard, 100, 768);
test_distance(Distance::Jaccard, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 0.5);
}
#[test]
fn test_distance_manhattan() {
test_distance_collection(Distance::Manhattan, 2000, 1536);
test_distance_collection(Distance::Manhattan, 100, 1536);
test_distance(Distance::Manhattan, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 3.0);
}
#[test]
fn test_distance_minkowski() {
test_distance_collection(Distance::Minkowski(3.into()), 2000, 1536);
test_distance_collection(Distance::Minkowski(3.into()), 100, 1536);
test_distance(
Distance::Minkowski(3.into()),
&[1.0, 2.0, 3.0],
@ -624,7 +624,7 @@ mod tests {
#[test]
fn test_distance_pearson() {
test_distance_collection(Distance::Pearson, 2000, 1536);
test_distance_collection(Distance::Pearson, 100, 1536);
test_distance(Distance::Pearson, &[1.0, 2.0, 3.0], &[2.0, 3.0, 4.0], 1.0);
}
}

View file

@ -4,7 +4,6 @@ use radix_trie::{Trie, TrieCommon, TrieKey};
use std::collections::{BTreeMap, HashMap};
use std::hash::Hash;
use std::time::Duration;
// use surrealdb::key::table::ix;
use surrealdb::sql::{value, Array, Id, Thing};
// Common use case: VectorSearch
@ -23,13 +22,13 @@ fn bench_hash_trie_btree_large_vector(c: &mut Criterion) {
g.finish();
}
// TODO: @emmanuel-keller this is disabled because `ix` is now private
/*fn bench_hash_trie_btree_ix_key(c: &mut Criterion) {
fn bench_hash_trie_btree_ix_key(c: &mut Criterion) {
const N: usize = 100_000;
let mut samples = Vec::with_capacity(N);
for i in 0..N {
let key = ix::new("test", "test", "test", &format!("test{i}")).encode().unwrap();
samples.push((key, i));
let mut key = b"/*test\0*test\0*test\0!ixtest".to_vec();
key.append(&mut i.to_be_bytes().to_vec());
samples.push((key.to_vec(), i));
}
let mut g = new_group(c, "bench_hash_trie_btree_ix_key", N);
@ -37,7 +36,7 @@ fn bench_hash_trie_btree_large_vector(c: &mut Criterion) {
bench_trie(&mut g, &samples);
bench_btree(&mut g, &samples);
g.finish();
}*/
}
fn bench_hash_trie_btree_small_string(c: &mut Criterion) {
const N: usize = 100_000;
@ -193,7 +192,7 @@ fn bench_btree_get<K: Ord, V>(samples: &[(K, V)], map: &BTreeMap<K, V>) {
criterion_group!(
benches,
bench_hash_trie_btree_large_vector,
// bench_hash_trie_btree_ix_key,
bench_hash_trie_btree_ix_key,
bench_hash_trie_btree_small_string,
bench_hash_trie_btree_thing,
bench_hash_trie_btree_value

View file

@ -20,35 +20,35 @@ use tokio::runtime::{Builder, Runtime};
use tokio::task;
fn bench_index_mtree_dim_3(c: &mut Criterion) {
bench_index_mtree(c, 250, 25_000, 3, 100);
bench_index_mtree(c, 250, 2500, 3, 100);
}
fn bench_index_mtree_dim_3_full_cache(c: &mut Criterion) {
bench_index_mtree(c, 250, 25_000, 3, 0);
bench_index_mtree(c, 250, 2500, 3, 0);
}
fn bench_index_mtree_dim_50(c: &mut Criterion) {
bench_index_mtree(c, 100, 10_000, 50, 100);
bench_index_mtree(c, 100, 1000, 50, 100);
}
fn bench_index_mtree_dim_50_full_cache(c: &mut Criterion) {
bench_index_mtree(c, 100, 10_000, 50, 0);
bench_index_mtree(c, 100, 1000, 50, 0);
}
fn bench_index_mtree_dim_300(c: &mut Criterion) {
bench_index_mtree(c, 50, 5_000, 300, 100);
bench_index_mtree(c, 50, 500, 300, 100);
}
fn bench_index_mtree_dim_300_full_cache(c: &mut Criterion) {
bench_index_mtree(c, 50, 5_000, 300, 0);
bench_index_mtree(c, 50, 500, 300, 0);
}
fn bench_index_mtree_dim_2048(c: &mut Criterion) {
bench_index_mtree(c, 10, 1_000, 2048, 100);
bench_index_mtree(c, 10, 100, 2048, 100);
}
fn bench_index_mtree_dim_2048_full_cache(c: &mut Criterion) {
bench_index_mtree(c, 10, 1_000, 2048, 0);
bench_index_mtree(c, 10, 100, 2048, 0);
}
async fn mtree_index(