Implements additional function for vectors (#2266)

This commit is contained in:
Emmanuel Keller 2023-07-16 14:04:22 +01:00 committed by GitHub
parent a2ef2d83df
commit 98a482e471
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 890 additions and 245 deletions

View file

@ -276,6 +276,9 @@
"uuid"
"rand::uuid::v4("
"rand::uuid::v7("
"search::score("
"search::highlight("
"search::offsets("
"session"
"session::"
"session::db("
@ -343,17 +346,25 @@
"type::string("
"type::table("
"type::thing("
"vector::dotproduct(",
"vector::magnitude(",
"vector::distance::chebyshev(",
"vector::distance::euclidean(",
"vector::distance::hamming(",
"vector::distance::mahalanobis(",
"vector::distance::manhattan(",
"vector::distance::minkowski(",
"vector::similarity::cosine(",
"vector::similarity::jaccard(",
"vector::similarity::pearson(",
"vector::similarity::spearman(",
"vector::add("
"vector::angle("
"vector::cross("
"vector::divide("
"vector::dot("
"vector::magnitude("
"vector::multiply("
"vector::normalize("
"vector::project("
"vector::subtract("
"vector::distance::chebyshev("
"vector::distance::euclidean("
"vector::distance::hamming("
"vector::distance::mahalanobis("
"vector::distance::manhattan("
"vector::distance::minkowski("
"vector::similarity::cosine("
"vector::similarity::jaccard("
"vector::similarity::pearson("
"vector::similarity::spearman("
# TODO: Add Javascript keywords

View file

@ -276,6 +276,9 @@
"uuid"
"rand::uuid::v4("
"rand::uuid::v7("
"search::score("
"search::highlight("
"search::offsets("
"session"
"session::"
"session::db("
@ -340,17 +343,25 @@
"type::string("
"type::table("
"type::thing("
"vector::dotproduct(",
"vector::magnitude(",
"vector::distance::chebyshev(",
"vector::distance::euclidean(",
"vector::distance::hamming(",
"vector::distance::mahalanobis(",
"vector::distance::manhattan(",
"vector::distance::minkowski(",
"vector::similarity::cosine(",
"vector::similarity::jaccard(",
"vector::similarity::pearson(",
"vector::similarity::spearman(",
"vector::add("
"vector::angle("
"vector::cross("
"vector::divide("
"vector::dot("
"vector::magnitude("
"vector::multiply("
"vector::normalize("
"vector::project("
"vector::subtract("
"vector::distance::chebyshev("
"vector::distance::euclidean("
"vector::distance::hamming("
"vector::distance::mahalanobis("
"vector::distance::manhattan("
"vector::distance::minkowski("
"vector::similarity::cosine("
"vector::similarity::jaccard("
"vector::similarity::pearson("
"vector::similarity::spearman("
# TODO: Add Javascript keywords

View file

@ -289,8 +289,16 @@ pub fn synchronous(ctx: &Context<'_>, name: &str, args: Vec<Value>) -> Result<Va
"type::table" => r#type::table,
"type::thing" => r#type::thing,
//
"vector::dotproduct" => vector::dotproduct,
"vector::add" => vector::add,
"vector::angle" => vector::angle,
"vector::cross" => vector::cross,
"vector::dot" => vector::dot,
"vector::divide" => vector::divide,
"vector::magnitude" => vector::magnitude,
"vector::multiply" => vector::multiply,
"vector::normalize" => vector::normalize,
"vector::project" => vector::project,
"vector::subtract" => vector::subtract,
"vector::distance::chebyshev" => vector::distance::chebyshev,
"vector::distance::euclidean" => vector::distance::euclidean,
"vector::distance::hamming" => vector::distance::hamming,

View file

@ -3,13 +3,22 @@ use crate::fnc::script::modules::impl_module_def;
mod distance;
mod similarity;
pub struct Package;
impl_module_def!(
Package,
"vector",
"distance" => (distance::Package),
"dotproduct" => run,
"similarity" => (similarity::Package),
"add" => run,
"angle" => run,
"cross" => run,
"divide" => run,
"dot" => run,
"magnitude" => run,
"similarity" => (similarity::Package)
"multiply" => run,
"normalize" => run,
"project" => run,
"subtract" => run
);

View file

@ -1,4 +1,5 @@
use super::variance::Variance;
use crate::fnc::util::math::mean::Mean;
use crate::fnc::util::math::variance::variance;
use crate::sql::number::Number;
pub trait Deviation {
@ -8,6 +9,10 @@ pub trait Deviation {
impl Deviation for Vec<Number> {
fn deviation(self, sample: bool) -> f64 {
self.variance(sample).sqrt()
deviation(&self, self.mean(), sample)
}
}
pub(super) fn deviation(v: &[Number], mean: f64, sample: bool) -> f64 {
variance(v, mean, sample).sqrt()
}

View file

@ -1,15 +0,0 @@
use crate::sql::Number;
pub trait DotProduct {
/// Dot Product of two vectors
fn dotproduct(&self, other: &Self) -> Option<Number>;
}
impl DotProduct for Vec<Number> {
fn dotproduct(&self, other: &Self) -> Option<Number> {
if self.len() != other.len() {
return None;
}
Some(self.iter().zip(other.iter()).map(|(a, b)| a * b).sum())
}
}

View file

@ -1,21 +0,0 @@
use crate::sql::Number;
pub trait EuclideanDistance {
/// Euclidean Distance between two vectors (L2 Norm)
fn euclidean_distance(&self, other: &Self) -> Option<Number>;
}
impl EuclideanDistance for Vec<Number> {
fn euclidean_distance(&self, other: &Self) -> Option<Number> {
if self.len() != other.len() {
return None;
}
Some(
self.iter()
.zip(other.iter())
.map(|(a, b)| (a - b).pow(Number::Int(2)))
.sum::<Number>()
.sqrt(),
)
}
}

View file

@ -1,12 +0,0 @@
use crate::sql::Number;
pub trait Magnitude {
/// Calculate the magnitude of a vector
fn magnitude(&self) -> Number;
}
impl Magnitude for Vec<Number> {
fn magnitude(&self) -> Number {
self.iter().map(|a| a.clone().pow(Number::Int(2))).sum::<Number>().sqrt()
}
}

View file

@ -4,10 +4,7 @@
pub mod bottom;
pub mod deviation;
pub mod dotproduct;
pub mod euclideandistance;
pub mod interquartile;
pub mod magnitude;
pub mod mean;
pub mod median;
pub mod midhinge;
@ -19,3 +16,4 @@ pub mod spread;
pub mod top;
pub mod trimean;
pub mod variance;
pub mod vector;

View file

@ -9,15 +9,18 @@ pub trait Variance {
impl Variance for Vec<Number> {
fn variance(self, sample: bool) -> f64 {
match self.len() {
variance(&self, self.mean(), sample)
}
}
pub(super) fn variance(v: &[Number], mean: f64, sample: bool) -> f64 {
match v.len() {
0 => f64::NAN,
1 => 0.0,
len => {
let mean = self.mean();
let len = (len - sample as usize) as f64;
let out = self.iter().map(|x| (x.to_float() - mean).powi(2)).sum::<f64>() / len;
let out = v.iter().map(|x| (x.to_float() - mean).powi(2)).sum::<f64>() / len;
out
}
}
}
}

View file

@ -0,0 +1,289 @@
use crate::err::Error;
use crate::fnc::util::math::deviation::deviation;
use crate::fnc::util::math::mean::Mean;
use crate::sql::Number;
use std::collections::HashSet;
pub trait Add {
/// Addition of two vectors
fn add(&self, other: &Self) -> Result<Vec<Number>, Error>;
}
fn check_same_dimension(fnc: &str, a: &Vec<Number>, b: &Vec<Number>) -> Result<(), Error> {
if a.len() != b.len() {
Err(Error::InvalidArguments {
name: String::from(fnc),
message: String::from("The two vectors must be of the same dimension."),
})
} else {
Ok(())
}
}
impl Add for Vec<Number> {
fn add(&self, other: &Self) -> Result<Vec<Number>, Error> {
check_same_dimension("vector::add", self, other)?;
Ok(self.iter().zip(other.iter()).map(|(a, b)| a + b).collect())
}
}
pub trait Angle {
/// Compute the angle between two vectors
fn angle(&self, other: &Self) -> Result<Number, Error>;
}
impl Angle for Vec<Number> {
fn angle(&self, other: &Self) -> Result<Number, Error> {
check_same_dimension("vector::angle", self, other)?;
let dp = dot(self, other);
let m = self.magnitude() * other.magnitude();
let d = vector_div(&dp, &m);
Ok(d.acos())
}
}
pub trait CosineSimilarity {
fn cosine_similarity(&self, other: &Self) -> Result<Number, Error>;
}
impl CosineSimilarity for Vec<Number> {
fn cosine_similarity(&self, other: &Self) -> Result<Number, Error> {
check_same_dimension("vector::similarity::cosine", self, other)?;
let d = dot(self, other);
Ok(d / (self.magnitude() * other.magnitude()))
}
}
pub trait Divide {
/// Division of two vectors
fn divide(&self, other: &Self) -> Result<Vec<Number>, Error>;
}
fn vector_div(a: &Number, b: &Number) -> Number {
if a.is_nan() || b.is_nan() || b.is_zero() {
Number::NAN
} else {
a / b
}
}
impl Divide for Vec<Number> {
fn divide(&self, other: &Self) -> Result<Vec<Number>, Error> {
check_same_dimension("vector::divide", self, other)?;
Ok(self.iter().zip(other.iter()).map(|(a, b)| vector_div(a, b)).collect())
}
}
pub trait HammingDistance {
fn hamming_distance(&self, other: &Self) -> Result<Number, Error>;
}
impl HammingDistance for Vec<Number> {
fn hamming_distance(&self, other: &Self) -> Result<Number, Error> {
check_same_dimension("vector::distance::hamming", self, other)?;
Ok(self.iter().zip(other.iter()).filter(|&(a, b)| a != b).count().into())
}
}
pub trait JaccardSimilarity {
fn jaccard_similarity(&self, other: &Self) -> Result<Number, Error>;
}
impl JaccardSimilarity for Vec<Number> {
fn jaccard_similarity(&self, other: &Self) -> Result<Number, Error> {
let set_a: HashSet<_> = HashSet::from_iter(self.iter());
let set_b: HashSet<_> = HashSet::from_iter(other.iter());
let intersection_size = set_a.intersection(&set_b).count() as f64;
let union_size = set_a.union(&set_b).count() as f64;
Ok((intersection_size / union_size).into())
}
}
pub trait PearsonSimilarity {
fn pearson_similarity(&self, other: &Self) -> Result<Number, Error>;
}
impl PearsonSimilarity for Vec<Number> {
fn pearson_similarity(&self, other: &Self) -> Result<Number, Error> {
check_same_dimension("vector::similarity::pearson", self, other)?;
let m1 = self.mean();
let m2 = other.mean();
let covar: f64 = self
.iter()
.zip(other.iter())
.map(|(x, y)| (x.to_float() - m1) * (y.to_float() - m2))
.sum();
let covar = covar / self.len() as f64;
let std_dev1 = deviation(self, m1, false);
let std_dev2 = deviation(other, m2, false);
Ok((covar / (std_dev1 * std_dev2)).into())
}
}
pub trait ManhattanDistance {
fn manhattan_distance(&self, other: &Self) -> Result<Number, Error>;
}
impl ManhattanDistance for Vec<Number> {
fn manhattan_distance(&self, other: &Self) -> Result<Number, Error> {
check_same_dimension("vector::distance::manhattan", self, other)?;
Ok(self.iter().zip(other.iter()).map(|(a, b)| (a - b).abs()).sum())
}
}
pub trait MinkowskiDistance {
fn minkowski_distance(&self, other: &Self, order: Number) -> Result<Number, Error>;
}
impl MinkowskiDistance for Vec<Number> {
fn minkowski_distance(&self, other: &Self, order: Number) -> Result<Number, Error> {
check_same_dimension("vector::distance::minkowski", self, other)?;
let p = order.to_float();
let dist: f64 = self
.iter()
.zip(other.iter())
.map(|(a, b)| (a.to_float() - b.to_float()).abs().powf(p))
.sum();
Ok(dist.powf(1.0 / p).into())
}
}
pub trait Multiply {
/// Multiplication of two vectors
fn multiply(&self, other: &Self) -> Result<Vec<Number>, Error>;
}
impl Multiply for Vec<Number> {
fn multiply(&self, other: &Self) -> Result<Vec<Number>, Error> {
check_same_dimension("vector::multiply", self, other)?;
Ok(self.iter().zip(other.iter()).map(|(a, b)| a * b).collect())
}
}
pub trait Project {
/// Projection of two vectors
fn project(&self, other: &Self) -> Result<Vec<Number>, Error>;
}
impl Project for Vec<Number> {
fn project(&self, other: &Self) -> Result<Vec<Number>, Error> {
check_same_dimension("vector::project", self, other)?;
let d = dot(self, other);
let m = magnitude_squared(other).into();
let s = vector_div(&d, &m);
Ok(other.iter().map(|x| &s * x).collect())
}
}
pub trait ChebyshevDistance {
fn chebyshev_distance(&self, other: &Self) -> Result<Number, Error>;
}
impl ChebyshevDistance for Vec<Number> {
fn chebyshev_distance(&self, other: &Self) -> Result<Number, Error> {
check_same_dimension("vector::distance::chebyshev", self, other)?;
Ok(self
.iter()
.zip(other.iter())
.map(|(a, b)| (a.to_float() - b.to_float()).abs())
.fold(f64::MIN, f64::max)
.into())
}
}
pub trait Subtract {
/// Subtraction of two vectors
fn subtract(&self, other: &Self) -> Result<Vec<Number>, Error>;
}
impl Subtract for Vec<Number> {
fn subtract(&self, other: &Self) -> Result<Vec<Number>, Error> {
check_same_dimension("vector::subtract", self, other)?;
Ok(self.iter().zip(other.iter()).map(|(a, b)| a - b).collect())
}
}
pub trait CrossProduct {
/// Cross product of two vectors
fn cross(&self, other: &Self) -> Result<Vec<Number>, Error>;
}
impl CrossProduct for Vec<Number> {
fn cross(&self, other: &Self) -> Result<Vec<Number>, Error> {
if self.len() != 3 || other.len() != 3 {
return Err(Error::InvalidArguments {
name: "vector::cross".to_string(),
message: String::from("Both vectors must have a dimension of 3."),
});
}
let a0 = &self[0];
let a1 = &self[1];
let a2 = &self[2];
let b0 = &other[0];
let b1 = &other[1];
let b2 = &other[2];
let v = vec![a1 * b2 - a2 * b1, a2 * b0 - a0 * b2, a0 * b1 - a1 * b0];
Ok(v)
}
}
pub trait DotProduct {
/// Dot Product of two vectors
fn dot(&self, other: &Self) -> Result<Number, Error>;
}
impl DotProduct for Vec<Number> {
fn dot(&self, other: &Self) -> Result<Number, Error> {
check_same_dimension("vector::dot", self, other)?;
Ok(dot(self, other))
}
}
fn dot(a: &[Number], b: &[Number]) -> Number {
a.iter().zip(b.iter()).map(|(a, b)| a * b).sum()
}
pub trait EuclideanDistance {
/// Euclidean Distance between two vectors (L2 Norm)
fn euclidean_distance(&self, other: &Self) -> Result<Number, Error>;
}
impl EuclideanDistance for Vec<Number> {
fn euclidean_distance(&self, other: &Self) -> Result<Number, Error> {
check_same_dimension("vector::distance::euclidean", self, other)?;
Ok(self
.iter()
.zip(other.iter())
.map(|(a, b)| (a - b).to_float().powi(2))
.sum::<f64>()
.sqrt()
.into())
}
}
fn magnitude_squared(v: &[Number]) -> f64 {
v.iter().map(|a| a.to_float().powi(2)).sum::<f64>()
}
pub trait Magnitude {
/// Calculate the magnitude of a vector
fn magnitude(&self) -> Number;
}
impl Magnitude for Vec<Number> {
fn magnitude(&self) -> Number {
magnitude_squared(self).sqrt().into()
}
}
pub trait Normalize {
/// Normalize a vector
fn normalize(&self) -> Vec<Number>;
}
impl Normalize for Vec<Number> {
fn normalize(&self) -> Vec<Number> {
let m = self.magnitude();
self.iter().map(|a| vector_div(a, &m)).collect()
}
}

View file

@ -1,48 +1,67 @@
use crate::err::Error;
use crate::fnc::util::math::dotproduct::DotProduct;
use crate::fnc::util::math::magnitude::Magnitude;
use crate::fnc::util::math::vector::{
Add, Angle, CrossProduct, Divide, DotProduct, Magnitude, Multiply, Normalize, Project, Subtract,
};
use crate::sql::{Number, Value};
pub fn dotproduct((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
match a.dotproduct(&b) {
None => Err(Error::InvalidArguments {
name: String::from("vector::dotproduct"),
message: String::from("The two vectors must be of the same length."),
}),
Some(dot) => Ok(dot.into()),
}
pub fn add((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Ok(a.add(&b)?.into())
}
pub fn angle((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Ok(a.angle(&b)?.into())
}
pub fn divide((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Ok(a.divide(&b)?.into())
}
pub fn cross((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Ok(a.cross(&b)?.into())
}
pub fn dot((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Ok(a.dot(&b)?.into())
}
pub fn magnitude((a,): (Vec<Number>,)) -> Result<Value, Error> {
Ok(a.magnitude().into())
}
pub fn multiply((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Ok(a.multiply(&b)?.into())
}
pub fn normalize((a,): (Vec<Number>,)) -> Result<Value, Error> {
Ok(a.normalize().into())
}
pub fn project((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Ok(a.project(&b)?.into())
}
pub fn subtract((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Ok(a.subtract(&b)?.into())
}
pub mod distance {
use crate::err::Error;
use crate::fnc::util::math::euclideandistance::EuclideanDistance;
use crate::fnc::util::math::vector::{
ChebyshevDistance, EuclideanDistance, HammingDistance, ManhattanDistance, MinkowskiDistance,
};
use crate::sql::{Number, Value};
pub fn chebyshev((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Err(Error::FeatureNotYetImplemented {
feature: "vector::distance::chebyshev() function",
})
pub fn chebyshev((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Ok(a.chebyshev_distance(&b)?.into())
}
pub fn euclidean((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
match a.euclidean_distance(&b) {
None => Err(Error::InvalidArguments {
name: String::from("vector::distance::euclidean"),
message: String::from("The two vectors must be of the same length."),
}),
Some(distance) => Ok(distance.into()),
}
Ok(a.euclidean_distance(&b)?.into())
}
pub fn hamming((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Err(Error::FeatureNotYetImplemented {
feature: "vector::distance::hamming() function",
})
pub fn hamming((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Ok(a.hamming_distance(&b)?.into())
}
pub fn mahalanobis((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
@ -51,46 +70,31 @@ pub mod distance {
})
}
pub fn manhattan((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Err(Error::FeatureNotYetImplemented {
feature: "vector::distance::manhattan() function",
})
pub fn manhattan((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Ok(a.manhattan_distance(&b)?.into())
}
pub fn minkowski((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Err(Error::FeatureNotYetImplemented {
feature: "vector::distance::minkowski() function",
})
pub fn minkowski((a, b, o): (Vec<Number>, Vec<Number>, Number)) -> Result<Value, Error> {
Ok(a.minkowski_distance(&b, o)?.into())
}
}
pub mod similarity {
use crate::err::Error;
use crate::fnc::util::math::dotproduct::DotProduct;
use crate::fnc::util::math::magnitude::Magnitude;
use crate::fnc::util::math::vector::{CosineSimilarity, JaccardSimilarity, PearsonSimilarity};
use crate::sql::{Number, Value};
pub fn cosine((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
match a.dotproduct(&b) {
None => Err(Error::InvalidArguments {
name: String::from("vector::similarity::cosine"),
message: String::from("The two vectors must be of the same length."),
}),
Some(dot) => Ok((dot / (a.magnitude() * b.magnitude())).into()),
}
Ok(a.cosine_similarity(&b)?.into())
}
pub fn jaccard((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Err(Error::FeatureNotYetImplemented {
feature: "vector::similarity::jaccard() function",
})
pub fn jaccard((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Ok(a.jaccard_similarity(&b)?.into())
}
pub fn pearson((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Err(Error::FeatureNotYetImplemented {
feature: "vector::similarity::pearson() function",
})
pub fn pearson((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
Ok(a.pearson_similarity(&b)?.into())
}
pub fn spearman((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {

View file

@ -555,8 +555,16 @@ fn function_type(i: &str) -> IResult<&str, &str> {
fn function_vector(i: &str) -> IResult<&str, &str> {
alt((
tag("dotproduct"),
tag("add"),
tag("angle"),
tag("divide"),
tag("cross"),
tag("dot"),
tag("magnitude"),
tag("multiply"),
tag("normalize"),
tag("project"),
tag("subtract"),
preceded(
tag("distance::"),
alt((

View file

@ -234,6 +234,14 @@ impl Number {
}
}
pub fn is_zero(&self) -> bool {
match self {
Number::Int(v) => v == &0,
Number::Float(v) => v == &0.0,
Number::Decimal(v) => v == &Decimal::ZERO,
}
}
pub fn is_zero_or_positive(&self) -> bool {
match self {
Number::Int(v) => v >= &0,
@ -334,6 +342,10 @@ impl Number {
}
}
pub fn acos(self) -> Self {
self.to_float().acos().into()
}
pub fn ceil(self) -> Self {
match self {
Number::Int(v) => v.into(),
@ -637,7 +649,7 @@ impl Sort for Vec<Number> {
}
}
pub fn number(i: &str) -> IResult<&str, Number> {
fn not_nan(i: &str) -> IResult<&str, Number> {
let (i, v) = recognize_float(i)?;
let (i, suffix) = suffix(i)?;
let (i, _) = ending(i)?;
@ -649,6 +661,10 @@ pub fn number(i: &str) -> IResult<&str, Number> {
Ok((i, number))
}
pub fn number(i: &str) -> IResult<&str, Number> {
alt((map(tag("NaN"), |_| Number::NAN), not_nan))(i)
}
#[derive(Debug)]
enum Suffix {
None,
@ -691,6 +707,15 @@ mod tests {
assert!(!decimal_is_integer(&Decimal::HALF_PI));
}
#[test]
fn number_nan() {
let sql = "NaN";
let res = number(sql);
assert!(res.is_ok());
let out = res.unwrap().1;
assert_eq!("NaN", format!("{}", out));
}
#[test]
fn number_int() {
let sql = "123";

View file

@ -13,16 +13,19 @@ async fn test_queries(sql: &str, desired_responses: &[&str]) -> Result<(), Error
let v = r?;
if let Some(desired_response) = desired_responses.get(i) {
let desired_value = Value::parse(*desired_response);
// If both values are NaN, they are equal from a test PoV
if !desired_value.is_nan() || !v.is_nan() {
assert_eq!(
v,
desired_value,
"Recieved responce did not match \
"Received response did not match \
expected.
Query responce #{},
Desired responce: {desired_value},
Query response #{},
Desired response: {desired_value},
Actual response: {v}",
i + 1
);
}
} else {
panic!("Response index {i} out of bounds of desired responses.");
}
@ -30,6 +33,31 @@ async fn test_queries(sql: &str, desired_responses: &[&str]) -> Result<(), Error
Ok(())
}
async fn check_test_is_error(sql: &str, expected_errors: &[&str]) -> Result<(), Error> {
let db = Datastore::new("memory").await?;
let session = Session::for_kv().with_ns("test").with_db("test");
let response = db.execute(sql, &session, None).await?;
if response.len() != expected_errors.len() {
panic!(
"Wrong number of responses {} - expected {}.",
response.len(),
expected_errors.len()
);
}
for (i, r) in response.into_iter().map(|r| r.result).enumerate() {
if let Some(expected_error) = expected_errors.get(i) {
if let Err(e) = r {
assert_eq!(e.to_string().as_str(), *expected_error)
} else {
panic!("Response index {i} is not an error.");
}
} else {
panic!("Response index {i} out of bounds of expected responses.");
}
}
Ok(())
}
// --------------------------------------------------
// array
// --------------------------------------------------
@ -4640,123 +4668,417 @@ async fn function_type_thing() -> Result<(), Error> {
}
#[tokio::test]
async fn function_vector_distance_euclidean() -> Result<(), Error> {
let sql = r#"
RETURN vector::distance::euclidean([1, 2, 3], [1, 2, 3]);
RETURN vector::distance::euclidean([1, 2, 3], [-1, -2, -3]);
RETURN vector::distance::euclidean([1, 2, 3], [4, 5]);
RETURN vector::distance::euclidean([1, 2], [4, 5, 5]);
"#;
let dbs = Datastore::new("memory").await?;
let ses = Session::for_kv().with_ns("test").with_db("test");
let res = &mut dbs.execute(&sql, &ses, None).await?;
assert_eq!(res.len(), 4);
//
let tmp = res.remove(0).result?;
let val = Value::from(0);
assert_eq!(tmp, val);
//
let tmp = res.remove(0).result?;
let val = Value::from(7.483314773547883);
assert_eq!(tmp, val);
//
let tmp = res.remove(0).result;
assert!(tmp.is_err());
//
let tmp = res.remove(0).result;
assert!(tmp.is_err());
async fn function_vector_add() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::add([1, 2, 3], [1, 2, 3]);
RETURN vector::add([1, 2, 3], [-1, -2, -3]);
"#,
&["[2, 4, 6]", "[0, 0, 0]"],
)
.await?;
check_test_is_error(
r#"
RETURN vector::add([1, 2, 3], [4, 5]);
RETURN vector::add([1, 2], [4, 5, 5]);
"#,
&[
"Incorrect arguments for function vector::add(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::add(). The two vectors must be of the same dimension."
],
)
.await?;
Ok(())
}
#[tokio::test]
async fn function_vector_dotproduct() -> Result<(), Error> {
let sql = r#"
RETURN vector::dotproduct([1, 2, 3], [1, 2, 3]);
RETURN vector::dotproduct([1, 2, 3], [-1, -2, -3]);
RETURN vector::dotproduct([1, 2, 3], [4, 5]);
RETURN vector::dotproduct([1, 2], [4, 5, 5]);
"#;
async fn function_vector_angle() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::angle([1,0,0], [0,1,0]);
RETURN vector::angle([5, 10, 15], [10, 5, 20]);
RETURN vector::angle([-3, 2, 5], [4, -1, 2]);
RETURN vector::angle([NaN, 2, 3], [-1, -2, NaN]);
"#,
&["1.5707963267948966", "0.36774908225917935", "1.7128722906354115", "NaN"],
)
.await?;
let dbs = Datastore::new("memory").await?;
let ses = Session::for_kv().with_ns("test").with_db("test");
let res = &mut dbs.execute(&sql, &ses, None).await?;
assert_eq!(res.len(), 4);
//
let tmp = res.remove(0).result?;
let val = Value::from(14);
assert_eq!(tmp, val);
//
let tmp = res.remove(0).result?;
let val = Value::from(-14);
assert_eq!(tmp, val);
//
let tmp = res.remove(0).result;
assert!(tmp.is_err());
//
let tmp = res.remove(0).result;
assert!(tmp.is_err());
check_test_is_error(
r#"
RETURN vector::angle([1, 2, 3], [4, 5]);
RETURN vector::angle([1, 2], [4, 5, 5]);
"#,
&[
"Incorrect arguments for function vector::angle(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::angle(). The two vectors must be of the same dimension."
],
).await?;
Ok(())
}
#[tokio::test]
async fn function_vector_cross() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::cross([1, 2, 3], [4, 5, 6]);
RETURN vector::cross([1, 2, 3], [-4, -5, -6]);
RETURN vector::cross([1, NaN, 3], [NaN, -5, -6]);
"#,
&["[-3, 6, -3]", "[3, -6, 3]", "[NaN, NaN, NaN]"],
)
.await?;
check_test_is_error(
r#"
RETURN vector::cross([1, 2, 3], [4, 5]);
RETURN vector::cross([1, 2], [4, 5, 5]);
"#,
&[
"Incorrect arguments for function vector::cross(). Both vectors must have a dimension of 3.",
"Incorrect arguments for function vector::cross(). Both vectors must have a dimension of 3."
],
)
.await?;
Ok(())
}
#[tokio::test]
async fn function_vector_dot() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::dot([1, 2, 3], [1, 2, 3]);
RETURN vector::dot([1, 2, 3], [-1, -2, -3]);
"#,
&["14", "-14"],
)
.await?;
check_test_is_error(
r#"
RETURN vector::dot([1, 2, 3], [4, 5]);
RETURN vector::dot([1, 2], [4, 5, 5]);
"#,
&[
"Incorrect arguments for function vector::dot(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::dot(). The two vectors must be of the same dimension."
],
).await?;
Ok(())
}
#[tokio::test]
async fn function_vector_magnitude() -> Result<(), Error> {
let sql = r#"
test_queries(
r#"
RETURN vector::magnitude([]);
RETURN vector::magnitude([1]);
RETURN vector::magnitude([5]);
RETURN vector::magnitude([1,2,3,3,3,4,5]);
"#;
"#,
&["0", "1", "5", "8.54400374531753"],
)
.await
}
let dbs = Datastore::new("memory").await?;
let ses = Session::for_kv().with_ns("test").with_db("test");
let res = &mut dbs.execute(&sql, &ses, None).await?;
assert_eq!(res.len(), 4);
//
let tmp = res.remove(0).result?;
let val = Value::from(0);
assert_eq!(tmp, val);
//
let tmp = res.remove(0).result?;
let val = Value::from(1);
assert_eq!(tmp, val);
//
let tmp = res.remove(0).result?;
let val = Value::from(5);
assert_eq!(tmp, val);
//
let tmp = res.remove(0).result?;
let val = Value::from(8.54400374531753);
assert_eq!(tmp, val);
#[tokio::test]
async fn function_vector_normalize() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::normalize([]);
RETURN vector::normalize([1]);
RETURN vector::normalize([5]);
RETURN vector::normalize([4,3]);
"#,
&["[]", "[1]", "[1]", "[0.8,0.6]"],
)
.await
}
#[tokio::test]
async fn function_vector_multiply() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::multiply([1, 2, 3], [1, 2, 3]);
RETURN vector::multiply([1, 2, 3], [-1, -2, -3]);
"#,
&["[1, 4, 9]", "[-1, -4, -9]"],
)
.await?;
check_test_is_error(
r#"
RETURN vector::multiply([1, 2, 3], [4, 5]);
RETURN vector::multiply([1, 2], [4, 5, 5]);
"#,
&[
"Incorrect arguments for function vector::multiply(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::multiply(). The two vectors must be of the same dimension."
],
)
.await?;
Ok(())
}
#[tokio::test]
async fn function_vector_project() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::project([1, 2, 3], [4, 5, 6]);
RETURN vector::project([1, -2, 3], [-4, 5, 6]);
RETURN vector::project([NaN, -2, 3], [-4, NaN, NaN]);
"#,
&[
"[1.6623376623376624, 2.077922077922078, 2.4935064935064934]",
"[-0.2077922077922078, 0.25974025974025977, 0.3116883116883117]",
"[NaN, NaN, NaN]",
],
)
.await?;
check_test_is_error(
r#"
RETURN vector::project([1, 2, 3], [4, 5]);
RETURN vector::project([1, 2], [4, 5, 5]);
"#,
&[
"Incorrect arguments for function vector::project(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::project(). The two vectors must be of the same dimension."
],
)
.await?;
Ok(())
}
#[tokio::test]
async fn function_vector_divide() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::divide([10, NaN, 20, 30, 0], [0, 1, 2, 0, 4]);
RETURN vector::divide([10, -20, 30, 0], [0, -1, 2, -3]);
"#,
&["[NaN, NaN, 10, NaN, 0]", "[NaN, 20, 15, 0]"],
)
.await?;
check_test_is_error(
r#"
RETURN vector::divide([1, 2, 3], [4, 5]);
RETURN vector::divide([1, 2], [4, 5, 5]);
"#,
&[
"Incorrect arguments for function vector::divide(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::divide(). The two vectors must be of the same dimension."
],
)
.await?;
Ok(())
}
#[tokio::test]
async fn function_vector_subtract() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::subtract([1, 2, 3], [1, 2, 3]);
RETURN vector::subtract([1, 2, 3], [-1, -2, -3]);
"#,
&["[0, 0, 0]", "[2, 4, 6]"],
)
.await?;
check_test_is_error(
r#"
RETURN vector::subtract([1, 2, 3], [4, 5]);
RETURN vector::subtract([1, 2], [4, 5, 5]);
"#,
&[
"Incorrect arguments for function vector::subtract(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::subtract(). The two vectors must be of the same dimension."
],
)
.await?;
Ok(())
}
#[tokio::test]
async fn function_vector_similarity_cosine() -> Result<(), Error> {
let sql = r#"
test_queries(
r#"
RETURN vector::similarity::cosine([1, 2, 3], [1, 2, 3]);
RETURN vector::similarity::cosine([1, 2, 3], [-1, -2, -3]);
RETURN vector::similarity::cosine([1, 2, 3], [4, 5]);
RETURN vector::similarity::cosine([1, 2], [4, 5, 5]);
"#;
RETURN vector::similarity::cosine([NaN, 1, 2, 3], [NaN, 1, 2, 3]);
RETURN vector::similarity::cosine([10, 50, 200], [400, 100, 20]);
"#,
&["1.0", "-1.0", "NaN", "0.15258215962441316"],
)
.await?;
let dbs = Datastore::new("memory").await?;
let ses = Session::for_kv().with_ns("test").with_db("test");
let res = &mut dbs.execute(&sql, &ses, None).await?;
assert_eq!(res.len(), 4);
//
let tmp = res.remove(0).result?;
let val = Value::from(1.0);
assert_eq!(tmp, val);
//
let tmp = res.remove(0).result?;
let val = Value::from(-1.0);
assert_eq!(tmp, val);
//
let tmp = res.remove(0).result;
assert!(tmp.is_err());
//
let tmp = res.remove(0).result;
assert!(tmp.is_err());
check_test_is_error(
r"RETURN vector::similarity::cosine([1, 2, 3], [4, 5]);
RETURN vector::similarity::cosine([1, 2], [4, 5, 5]);",
&[
"Incorrect arguments for function vector::similarity::cosine(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::similarity::cosine(). The two vectors must be of the same dimension."
]).await?;
Ok(())
}
#[tokio::test]
async fn function_vector_similarity_jaccard() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::similarity::jaccard([1, 2, 3], [3, 2, 1]);
RETURN vector::similarity::jaccard([1, 2, 3], [-3, -2, -1]);
RETURN vector::similarity::jaccard([1, -2, 3, -4], [4, 3, 2, 1]);
RETURN vector::similarity::jaccard([NaN, 1, 2, 3], [NaN, 2, 3, 4]);
RETURN vector::similarity::jaccard([0,1,2,5,6], [0,2,3,4,5,7,9]);
"#,
&["1.0", "0", "0.3333333333333333", "0.6", "0.3333333333333333"],
)
.await?;
Ok(())
}
#[tokio::test]
async fn function_vector_similarity_pearson() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::similarity::pearson([1, 2, 3, 4, 5], [1, 2.5, 3.5, 4.2, 5.1]);
RETURN vector::similarity::pearson([NaN, 1, 2, 3, 4, 5], [NaN, 1, 2.5, 3.5, 4.2, 5.1]);
RETURN vector::similarity::pearson([1,2,3], [1,5,7]);
"#,
&["0.9894065340659606", "NaN", "0.9819805060619659"],
)
.await?;
check_test_is_error(
r"RETURN vector::similarity::pearson([1, 2, 3], [4, 5]);
RETURN vector::similarity::pearson([1, 2], [4, 5, 5]);",
&[
"Incorrect arguments for function vector::similarity::pearson(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::similarity::pearson(). The two vectors must be of the same dimension."
]).await?;
Ok(())
}
#[tokio::test]
async fn function_vector_distance_euclidean() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::distance::euclidean([1, 2, 3], [1, 2, 3]);
RETURN vector::distance::euclidean([NaN, 2, 3], [-1, NaN, -3]);
RETURN vector::distance::euclidean([1, 2, 3], [-1, -2, -3]);
RETURN vector::distance::euclidean([10, 50, 200], [400, 100, 20]);
RETURN vector::distance::euclidean([10, 20, 15, 10, 5], [12, 24, 18, 8, 7]);
"#,
&["0", "NaN", "7.483314773547883", "432.43496620879307", "6.082762530298219"],
)
.await?;
check_test_is_error(
r"RETURN vector::distance::euclidean([1, 2, 3], [4, 5]);
RETURN vector::distance::euclidean([1, 2], [4, 5, 5]);",
&[
"Incorrect arguments for function vector::distance::euclidean(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::distance::euclidean(). The two vectors must be of the same dimension."
]).await?;
Ok(())
}
#[tokio::test]
async fn function_vector_distance_manhattan() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::distance::manhattan([1, 2, 3], [4, 5, 6]);
RETURN vector::distance::manhattan([1, 2, 3], [-4, -5, -6]);
RETURN vector::distance::manhattan([1.1, 2, 3.3], [4, 5.5, 6.6]);
RETURN vector::distance::manhattan([NaN, 1, 2, 3], [NaN, 4, 5, 6]);
RETURN vector::distance::manhattan([10, 20, 15, 10, 5], [12, 24, 18, 8, 7]);
"#,
&["9", "21", "9.7", "NaN", "13"],
)
.await?;
check_test_is_error(
r"RETURN vector::distance::manhattan([1, 2, 3], [4, 5]);
RETURN vector::distance::manhattan([1, 2], [4, 5, 5]);",
&[
"Incorrect arguments for function vector::distance::manhattan(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::distance::manhattan(). The two vectors must be of the same dimension."
]).await?;
Ok(())
}
#[tokio::test]
async fn function_vector_distance_hamming() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::distance::hamming([1, 2, 2], [1, 2, 3]);
RETURN vector::distance::hamming([-1, -2, -3], [-2, -2, -2]);
RETURN vector::distance::hamming([1.1, 2.2, -3.3], [1.1, 2, -3.3]);
RETURN vector::distance::hamming([NaN, 1, 2, 3], [NaN, 1, 2, 3]);
RETURN vector::distance::hamming([0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0]);
"#,
&["1", "2", "1", "0", "2"],
)
.await?;
check_test_is_error(
r"RETURN vector::distance::hamming([1, 2, 3], [4, 5]);
RETURN vector::distance::hamming([1, 2], [4, 5, 5]);",
&[
"Incorrect arguments for function vector::distance::hamming(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::distance::hamming(). The two vectors must be of the same dimension."
]).await?;
Ok(())
}
#[tokio::test]
async fn function_vector_distance_minkowski() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::distance::minkowski([1, 2, 3], [4, 5, 6], 3);
RETURN vector::distance::minkowski([-1, -2, -3], [-4, -5, -6], 3);
RETURN vector::distance::minkowski([1.1, 2.2, 3], [4, 5.5, 6.6], 3);
RETURN vector::distance::minkowski([NaN, 1, 2, 3], [NaN, 4, 5, 6], 3);
RETURN vector::distance::minkowski([10, 20, 15, 10, 5], [12, 24, 18, 8, 7], 1);
RETURN vector::distance::minkowski([10, 20, 15, 10, 5], [12, 24, 18, 8, 7], 2);
"#,
&[
"4.3267487109222245",
"4.3267487109222245",
"4.747193170917638",
"NaN",
"13.0",
"6.082762530298219",
],
)
.await?;
check_test_is_error(
r"RETURN vector::distance::minkowski([1, 2, 3], [4, 5], 3);
RETURN vector::distance::minkowski([1, 2], [4, 5, 5], 3);",
&[
"Incorrect arguments for function vector::distance::minkowski(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::distance::minkowski(). The two vectors must be of the same dimension."
]).await?;
Ok(())
}
#[tokio::test]
async fn function_vector_distance_chebyshev() -> Result<(), Error> {
test_queries(
r#"
RETURN vector::distance::chebyshev([1, 2, 3], [4, 5, 6]);
RETURN vector::distance::chebyshev([-1, -2, -3], [-4, -5, -6]);
RETURN vector::distance::chebyshev([1.1, 2.2, 3], [4, 5.5, 6.6]);
RETURN vector::distance::chebyshev([NaN, 1, 2, 3], [NaN, 4, 5, 6]);
RETURN vector::distance::chebyshev([2, 4, 5, 3, 8, 2], [3, 1, 5, -3, 7, 2]);
"#,
&["3.0", "3.0", "3.5999999999999996", "3.0", "6.0"],
)
.await?;
check_test_is_error(
r"RETURN vector::distance::chebyshev([1, 2, 3], [4, 5]);
RETURN vector::distance::chebyshev([1, 2], [4, 5, 5]);",
&[
"Incorrect arguments for function vector::distance::chebyshev(). The two vectors must be of the same dimension.",
"Incorrect arguments for function vector::distance::chebyshev(). The two vectors must be of the same dimension."
]).await?;
Ok(())
}