Implements additional function for vectors (#2266)
This commit is contained in:
parent
a2ef2d83df
commit
98a482e471
15 changed files with 890 additions and 245 deletions
|
@ -276,6 +276,9 @@
|
|||
"uuid"
|
||||
"rand::uuid::v4("
|
||||
"rand::uuid::v7("
|
||||
"search::score("
|
||||
"search::highlight("
|
||||
"search::offsets("
|
||||
"session"
|
||||
"session::"
|
||||
"session::db("
|
||||
|
@ -343,17 +346,25 @@
|
|||
"type::string("
|
||||
"type::table("
|
||||
"type::thing("
|
||||
"vector::dotproduct(",
|
||||
"vector::magnitude(",
|
||||
"vector::distance::chebyshev(",
|
||||
"vector::distance::euclidean(",
|
||||
"vector::distance::hamming(",
|
||||
"vector::distance::mahalanobis(",
|
||||
"vector::distance::manhattan(",
|
||||
"vector::distance::minkowski(",
|
||||
"vector::similarity::cosine(",
|
||||
"vector::similarity::jaccard(",
|
||||
"vector::similarity::pearson(",
|
||||
"vector::similarity::spearman(",
|
||||
"vector::add("
|
||||
"vector::angle("
|
||||
"vector::cross("
|
||||
"vector::divide("
|
||||
"vector::dot("
|
||||
"vector::magnitude("
|
||||
"vector::multiply("
|
||||
"vector::normalize("
|
||||
"vector::project("
|
||||
"vector::subtract("
|
||||
"vector::distance::chebyshev("
|
||||
"vector::distance::euclidean("
|
||||
"vector::distance::hamming("
|
||||
"vector::distance::mahalanobis("
|
||||
"vector::distance::manhattan("
|
||||
"vector::distance::minkowski("
|
||||
"vector::similarity::cosine("
|
||||
"vector::similarity::jaccard("
|
||||
"vector::similarity::pearson("
|
||||
"vector::similarity::spearman("
|
||||
# TODO: Add Javascript keywords
|
||||
|
||||
|
|
|
@ -276,6 +276,9 @@
|
|||
"uuid"
|
||||
"rand::uuid::v4("
|
||||
"rand::uuid::v7("
|
||||
"search::score("
|
||||
"search::highlight("
|
||||
"search::offsets("
|
||||
"session"
|
||||
"session::"
|
||||
"session::db("
|
||||
|
@ -340,17 +343,25 @@
|
|||
"type::string("
|
||||
"type::table("
|
||||
"type::thing("
|
||||
"vector::dotproduct(",
|
||||
"vector::magnitude(",
|
||||
"vector::distance::chebyshev(",
|
||||
"vector::distance::euclidean(",
|
||||
"vector::distance::hamming(",
|
||||
"vector::distance::mahalanobis(",
|
||||
"vector::distance::manhattan(",
|
||||
"vector::distance::minkowski(",
|
||||
"vector::similarity::cosine(",
|
||||
"vector::similarity::jaccard(",
|
||||
"vector::similarity::pearson(",
|
||||
"vector::similarity::spearman(",
|
||||
"vector::add("
|
||||
"vector::angle("
|
||||
"vector::cross("
|
||||
"vector::divide("
|
||||
"vector::dot("
|
||||
"vector::magnitude("
|
||||
"vector::multiply("
|
||||
"vector::normalize("
|
||||
"vector::project("
|
||||
"vector::subtract("
|
||||
"vector::distance::chebyshev("
|
||||
"vector::distance::euclidean("
|
||||
"vector::distance::hamming("
|
||||
"vector::distance::mahalanobis("
|
||||
"vector::distance::manhattan("
|
||||
"vector::distance::minkowski("
|
||||
"vector::similarity::cosine("
|
||||
"vector::similarity::jaccard("
|
||||
"vector::similarity::pearson("
|
||||
"vector::similarity::spearman("
|
||||
# TODO: Add Javascript keywords
|
||||
|
||||
|
|
|
@ -289,8 +289,16 @@ pub fn synchronous(ctx: &Context<'_>, name: &str, args: Vec<Value>) -> Result<Va
|
|||
"type::table" => r#type::table,
|
||||
"type::thing" => r#type::thing,
|
||||
//
|
||||
"vector::dotproduct" => vector::dotproduct,
|
||||
"vector::add" => vector::add,
|
||||
"vector::angle" => vector::angle,
|
||||
"vector::cross" => vector::cross,
|
||||
"vector::dot" => vector::dot,
|
||||
"vector::divide" => vector::divide,
|
||||
"vector::magnitude" => vector::magnitude,
|
||||
"vector::multiply" => vector::multiply,
|
||||
"vector::normalize" => vector::normalize,
|
||||
"vector::project" => vector::project,
|
||||
"vector::subtract" => vector::subtract,
|
||||
"vector::distance::chebyshev" => vector::distance::chebyshev,
|
||||
"vector::distance::euclidean" => vector::distance::euclidean,
|
||||
"vector::distance::hamming" => vector::distance::hamming,
|
||||
|
|
|
@ -3,13 +3,22 @@ use crate::fnc::script::modules::impl_module_def;
|
|||
|
||||
mod distance;
|
||||
mod similarity;
|
||||
|
||||
pub struct Package;
|
||||
|
||||
impl_module_def!(
|
||||
Package,
|
||||
"vector",
|
||||
"distance" => (distance::Package),
|
||||
"dotproduct" => run,
|
||||
"similarity" => (similarity::Package),
|
||||
"add" => run,
|
||||
"angle" => run,
|
||||
"cross" => run,
|
||||
"divide" => run,
|
||||
"dot" => run,
|
||||
"magnitude" => run,
|
||||
"similarity" => (similarity::Package)
|
||||
"multiply" => run,
|
||||
"normalize" => run,
|
||||
"project" => run,
|
||||
"subtract" => run
|
||||
);
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use super::variance::Variance;
|
||||
use crate::fnc::util::math::mean::Mean;
|
||||
use crate::fnc::util::math::variance::variance;
|
||||
use crate::sql::number::Number;
|
||||
|
||||
pub trait Deviation {
|
||||
|
@ -8,6 +9,10 @@ pub trait Deviation {
|
|||
|
||||
impl Deviation for Vec<Number> {
|
||||
fn deviation(self, sample: bool) -> f64 {
|
||||
self.variance(sample).sqrt()
|
||||
deviation(&self, self.mean(), sample)
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn deviation(v: &[Number], mean: f64, sample: bool) -> f64 {
|
||||
variance(v, mean, sample).sqrt()
|
||||
}
|
||||
|
|
|
@ -1,15 +0,0 @@
|
|||
use crate::sql::Number;
|
||||
|
||||
pub trait DotProduct {
|
||||
/// Dot Product of two vectors
|
||||
fn dotproduct(&self, other: &Self) -> Option<Number>;
|
||||
}
|
||||
|
||||
impl DotProduct for Vec<Number> {
|
||||
fn dotproduct(&self, other: &Self) -> Option<Number> {
|
||||
if self.len() != other.len() {
|
||||
return None;
|
||||
}
|
||||
Some(self.iter().zip(other.iter()).map(|(a, b)| a * b).sum())
|
||||
}
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
use crate::sql::Number;
|
||||
|
||||
pub trait EuclideanDistance {
|
||||
/// Euclidean Distance between two vectors (L2 Norm)
|
||||
fn euclidean_distance(&self, other: &Self) -> Option<Number>;
|
||||
}
|
||||
|
||||
impl EuclideanDistance for Vec<Number> {
|
||||
fn euclidean_distance(&self, other: &Self) -> Option<Number> {
|
||||
if self.len() != other.len() {
|
||||
return None;
|
||||
}
|
||||
Some(
|
||||
self.iter()
|
||||
.zip(other.iter())
|
||||
.map(|(a, b)| (a - b).pow(Number::Int(2)))
|
||||
.sum::<Number>()
|
||||
.sqrt(),
|
||||
)
|
||||
}
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
use crate::sql::Number;
|
||||
|
||||
pub trait Magnitude {
|
||||
/// Calculate the magnitude of a vector
|
||||
fn magnitude(&self) -> Number;
|
||||
}
|
||||
|
||||
impl Magnitude for Vec<Number> {
|
||||
fn magnitude(&self) -> Number {
|
||||
self.iter().map(|a| a.clone().pow(Number::Int(2))).sum::<Number>().sqrt()
|
||||
}
|
||||
}
|
|
@ -4,10 +4,7 @@
|
|||
|
||||
pub mod bottom;
|
||||
pub mod deviation;
|
||||
pub mod dotproduct;
|
||||
pub mod euclideandistance;
|
||||
pub mod interquartile;
|
||||
pub mod magnitude;
|
||||
pub mod mean;
|
||||
pub mod median;
|
||||
pub mod midhinge;
|
||||
|
@ -19,3 +16,4 @@ pub mod spread;
|
|||
pub mod top;
|
||||
pub mod trimean;
|
||||
pub mod variance;
|
||||
pub mod vector;
|
||||
|
|
|
@ -9,15 +9,18 @@ pub trait Variance {
|
|||
|
||||
impl Variance for Vec<Number> {
|
||||
fn variance(self, sample: bool) -> f64 {
|
||||
match self.len() {
|
||||
variance(&self, self.mean(), sample)
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn variance(v: &[Number], mean: f64, sample: bool) -> f64 {
|
||||
match v.len() {
|
||||
0 => f64::NAN,
|
||||
1 => 0.0,
|
||||
len => {
|
||||
let mean = self.mean();
|
||||
let len = (len - sample as usize) as f64;
|
||||
let out = self.iter().map(|x| (x.to_float() - mean).powi(2)).sum::<f64>() / len;
|
||||
let out = v.iter().map(|x| (x.to_float() - mean).powi(2)).sum::<f64>() / len;
|
||||
out
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
289
lib/src/fnc/util/math/vector.rs
Normal file
289
lib/src/fnc/util/math/vector.rs
Normal file
|
@ -0,0 +1,289 @@
|
|||
use crate::err::Error;
|
||||
use crate::fnc::util::math::deviation::deviation;
|
||||
use crate::fnc::util::math::mean::Mean;
|
||||
use crate::sql::Number;
|
||||
use std::collections::HashSet;
|
||||
|
||||
pub trait Add {
|
||||
/// Addition of two vectors
|
||||
fn add(&self, other: &Self) -> Result<Vec<Number>, Error>;
|
||||
}
|
||||
|
||||
fn check_same_dimension(fnc: &str, a: &Vec<Number>, b: &Vec<Number>) -> Result<(), Error> {
|
||||
if a.len() != b.len() {
|
||||
Err(Error::InvalidArguments {
|
||||
name: String::from(fnc),
|
||||
message: String::from("The two vectors must be of the same dimension."),
|
||||
})
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Add for Vec<Number> {
|
||||
fn add(&self, other: &Self) -> Result<Vec<Number>, Error> {
|
||||
check_same_dimension("vector::add", self, other)?;
|
||||
Ok(self.iter().zip(other.iter()).map(|(a, b)| a + b).collect())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Angle {
|
||||
/// Compute the angle between two vectors
|
||||
fn angle(&self, other: &Self) -> Result<Number, Error>;
|
||||
}
|
||||
|
||||
impl Angle for Vec<Number> {
|
||||
fn angle(&self, other: &Self) -> Result<Number, Error> {
|
||||
check_same_dimension("vector::angle", self, other)?;
|
||||
let dp = dot(self, other);
|
||||
let m = self.magnitude() * other.magnitude();
|
||||
let d = vector_div(&dp, &m);
|
||||
Ok(d.acos())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait CosineSimilarity {
|
||||
fn cosine_similarity(&self, other: &Self) -> Result<Number, Error>;
|
||||
}
|
||||
|
||||
impl CosineSimilarity for Vec<Number> {
|
||||
fn cosine_similarity(&self, other: &Self) -> Result<Number, Error> {
|
||||
check_same_dimension("vector::similarity::cosine", self, other)?;
|
||||
let d = dot(self, other);
|
||||
Ok(d / (self.magnitude() * other.magnitude()))
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Divide {
|
||||
/// Division of two vectors
|
||||
fn divide(&self, other: &Self) -> Result<Vec<Number>, Error>;
|
||||
}
|
||||
|
||||
fn vector_div(a: &Number, b: &Number) -> Number {
|
||||
if a.is_nan() || b.is_nan() || b.is_zero() {
|
||||
Number::NAN
|
||||
} else {
|
||||
a / b
|
||||
}
|
||||
}
|
||||
|
||||
impl Divide for Vec<Number> {
|
||||
fn divide(&self, other: &Self) -> Result<Vec<Number>, Error> {
|
||||
check_same_dimension("vector::divide", self, other)?;
|
||||
Ok(self.iter().zip(other.iter()).map(|(a, b)| vector_div(a, b)).collect())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait HammingDistance {
|
||||
fn hamming_distance(&self, other: &Self) -> Result<Number, Error>;
|
||||
}
|
||||
|
||||
impl HammingDistance for Vec<Number> {
|
||||
fn hamming_distance(&self, other: &Self) -> Result<Number, Error> {
|
||||
check_same_dimension("vector::distance::hamming", self, other)?;
|
||||
Ok(self.iter().zip(other.iter()).filter(|&(a, b)| a != b).count().into())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait JaccardSimilarity {
|
||||
fn jaccard_similarity(&self, other: &Self) -> Result<Number, Error>;
|
||||
}
|
||||
|
||||
impl JaccardSimilarity for Vec<Number> {
|
||||
fn jaccard_similarity(&self, other: &Self) -> Result<Number, Error> {
|
||||
let set_a: HashSet<_> = HashSet::from_iter(self.iter());
|
||||
let set_b: HashSet<_> = HashSet::from_iter(other.iter());
|
||||
let intersection_size = set_a.intersection(&set_b).count() as f64;
|
||||
let union_size = set_a.union(&set_b).count() as f64;
|
||||
Ok((intersection_size / union_size).into())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait PearsonSimilarity {
|
||||
fn pearson_similarity(&self, other: &Self) -> Result<Number, Error>;
|
||||
}
|
||||
|
||||
impl PearsonSimilarity for Vec<Number> {
|
||||
fn pearson_similarity(&self, other: &Self) -> Result<Number, Error> {
|
||||
check_same_dimension("vector::similarity::pearson", self, other)?;
|
||||
let m1 = self.mean();
|
||||
let m2 = other.mean();
|
||||
let covar: f64 = self
|
||||
.iter()
|
||||
.zip(other.iter())
|
||||
.map(|(x, y)| (x.to_float() - m1) * (y.to_float() - m2))
|
||||
.sum();
|
||||
let covar = covar / self.len() as f64;
|
||||
let std_dev1 = deviation(self, m1, false);
|
||||
let std_dev2 = deviation(other, m2, false);
|
||||
Ok((covar / (std_dev1 * std_dev2)).into())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ManhattanDistance {
|
||||
fn manhattan_distance(&self, other: &Self) -> Result<Number, Error>;
|
||||
}
|
||||
|
||||
impl ManhattanDistance for Vec<Number> {
|
||||
fn manhattan_distance(&self, other: &Self) -> Result<Number, Error> {
|
||||
check_same_dimension("vector::distance::manhattan", self, other)?;
|
||||
Ok(self.iter().zip(other.iter()).map(|(a, b)| (a - b).abs()).sum())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait MinkowskiDistance {
|
||||
fn minkowski_distance(&self, other: &Self, order: Number) -> Result<Number, Error>;
|
||||
}
|
||||
|
||||
impl MinkowskiDistance for Vec<Number> {
|
||||
fn minkowski_distance(&self, other: &Self, order: Number) -> Result<Number, Error> {
|
||||
check_same_dimension("vector::distance::minkowski", self, other)?;
|
||||
let p = order.to_float();
|
||||
let dist: f64 = self
|
||||
.iter()
|
||||
.zip(other.iter())
|
||||
.map(|(a, b)| (a.to_float() - b.to_float()).abs().powf(p))
|
||||
.sum();
|
||||
Ok(dist.powf(1.0 / p).into())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Multiply {
|
||||
/// Multiplication of two vectors
|
||||
fn multiply(&self, other: &Self) -> Result<Vec<Number>, Error>;
|
||||
}
|
||||
|
||||
impl Multiply for Vec<Number> {
|
||||
fn multiply(&self, other: &Self) -> Result<Vec<Number>, Error> {
|
||||
check_same_dimension("vector::multiply", self, other)?;
|
||||
Ok(self.iter().zip(other.iter()).map(|(a, b)| a * b).collect())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Project {
|
||||
/// Projection of two vectors
|
||||
fn project(&self, other: &Self) -> Result<Vec<Number>, Error>;
|
||||
}
|
||||
|
||||
impl Project for Vec<Number> {
|
||||
fn project(&self, other: &Self) -> Result<Vec<Number>, Error> {
|
||||
check_same_dimension("vector::project", self, other)?;
|
||||
let d = dot(self, other);
|
||||
let m = magnitude_squared(other).into();
|
||||
let s = vector_div(&d, &m);
|
||||
Ok(other.iter().map(|x| &s * x).collect())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ChebyshevDistance {
|
||||
fn chebyshev_distance(&self, other: &Self) -> Result<Number, Error>;
|
||||
}
|
||||
|
||||
impl ChebyshevDistance for Vec<Number> {
|
||||
fn chebyshev_distance(&self, other: &Self) -> Result<Number, Error> {
|
||||
check_same_dimension("vector::distance::chebyshev", self, other)?;
|
||||
Ok(self
|
||||
.iter()
|
||||
.zip(other.iter())
|
||||
.map(|(a, b)| (a.to_float() - b.to_float()).abs())
|
||||
.fold(f64::MIN, f64::max)
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Subtract {
|
||||
/// Subtraction of two vectors
|
||||
fn subtract(&self, other: &Self) -> Result<Vec<Number>, Error>;
|
||||
}
|
||||
|
||||
impl Subtract for Vec<Number> {
|
||||
fn subtract(&self, other: &Self) -> Result<Vec<Number>, Error> {
|
||||
check_same_dimension("vector::subtract", self, other)?;
|
||||
Ok(self.iter().zip(other.iter()).map(|(a, b)| a - b).collect())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait CrossProduct {
|
||||
/// Cross product of two vectors
|
||||
fn cross(&self, other: &Self) -> Result<Vec<Number>, Error>;
|
||||
}
|
||||
|
||||
impl CrossProduct for Vec<Number> {
|
||||
fn cross(&self, other: &Self) -> Result<Vec<Number>, Error> {
|
||||
if self.len() != 3 || other.len() != 3 {
|
||||
return Err(Error::InvalidArguments {
|
||||
name: "vector::cross".to_string(),
|
||||
message: String::from("Both vectors must have a dimension of 3."),
|
||||
});
|
||||
}
|
||||
let a0 = &self[0];
|
||||
let a1 = &self[1];
|
||||
let a2 = &self[2];
|
||||
let b0 = &other[0];
|
||||
let b1 = &other[1];
|
||||
let b2 = &other[2];
|
||||
let v = vec![a1 * b2 - a2 * b1, a2 * b0 - a0 * b2, a0 * b1 - a1 * b0];
|
||||
Ok(v)
|
||||
}
|
||||
}
|
||||
|
||||
pub trait DotProduct {
|
||||
/// Dot Product of two vectors
|
||||
fn dot(&self, other: &Self) -> Result<Number, Error>;
|
||||
}
|
||||
|
||||
impl DotProduct for Vec<Number> {
|
||||
fn dot(&self, other: &Self) -> Result<Number, Error> {
|
||||
check_same_dimension("vector::dot", self, other)?;
|
||||
Ok(dot(self, other))
|
||||
}
|
||||
}
|
||||
|
||||
fn dot(a: &[Number], b: &[Number]) -> Number {
|
||||
a.iter().zip(b.iter()).map(|(a, b)| a * b).sum()
|
||||
}
|
||||
|
||||
pub trait EuclideanDistance {
|
||||
/// Euclidean Distance between two vectors (L2 Norm)
|
||||
fn euclidean_distance(&self, other: &Self) -> Result<Number, Error>;
|
||||
}
|
||||
|
||||
impl EuclideanDistance for Vec<Number> {
|
||||
fn euclidean_distance(&self, other: &Self) -> Result<Number, Error> {
|
||||
check_same_dimension("vector::distance::euclidean", self, other)?;
|
||||
Ok(self
|
||||
.iter()
|
||||
.zip(other.iter())
|
||||
.map(|(a, b)| (a - b).to_float().powi(2))
|
||||
.sum::<f64>()
|
||||
.sqrt()
|
||||
.into())
|
||||
}
|
||||
}
|
||||
|
||||
fn magnitude_squared(v: &[Number]) -> f64 {
|
||||
v.iter().map(|a| a.to_float().powi(2)).sum::<f64>()
|
||||
}
|
||||
|
||||
pub trait Magnitude {
|
||||
/// Calculate the magnitude of a vector
|
||||
fn magnitude(&self) -> Number;
|
||||
}
|
||||
|
||||
impl Magnitude for Vec<Number> {
|
||||
fn magnitude(&self) -> Number {
|
||||
magnitude_squared(self).sqrt().into()
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Normalize {
|
||||
/// Normalize a vector
|
||||
fn normalize(&self) -> Vec<Number>;
|
||||
}
|
||||
|
||||
impl Normalize for Vec<Number> {
|
||||
fn normalize(&self) -> Vec<Number> {
|
||||
let m = self.magnitude();
|
||||
self.iter().map(|a| vector_div(a, &m)).collect()
|
||||
}
|
||||
}
|
|
@ -1,48 +1,67 @@
|
|||
use crate::err::Error;
|
||||
use crate::fnc::util::math::dotproduct::DotProduct;
|
||||
use crate::fnc::util::math::magnitude::Magnitude;
|
||||
use crate::fnc::util::math::vector::{
|
||||
Add, Angle, CrossProduct, Divide, DotProduct, Magnitude, Multiply, Normalize, Project, Subtract,
|
||||
};
|
||||
use crate::sql::{Number, Value};
|
||||
|
||||
pub fn dotproduct((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
match a.dotproduct(&b) {
|
||||
None => Err(Error::InvalidArguments {
|
||||
name: String::from("vector::dotproduct"),
|
||||
message: String::from("The two vectors must be of the same length."),
|
||||
}),
|
||||
Some(dot) => Ok(dot.into()),
|
||||
}
|
||||
pub fn add((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Ok(a.add(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn angle((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Ok(a.angle(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn divide((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Ok(a.divide(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn cross((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Ok(a.cross(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn dot((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Ok(a.dot(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn magnitude((a,): (Vec<Number>,)) -> Result<Value, Error> {
|
||||
Ok(a.magnitude().into())
|
||||
}
|
||||
|
||||
pub fn multiply((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Ok(a.multiply(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn normalize((a,): (Vec<Number>,)) -> Result<Value, Error> {
|
||||
Ok(a.normalize().into())
|
||||
}
|
||||
|
||||
pub fn project((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Ok(a.project(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn subtract((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Ok(a.subtract(&b)?.into())
|
||||
}
|
||||
|
||||
pub mod distance {
|
||||
|
||||
use crate::err::Error;
|
||||
use crate::fnc::util::math::euclideandistance::EuclideanDistance;
|
||||
use crate::fnc::util::math::vector::{
|
||||
ChebyshevDistance, EuclideanDistance, HammingDistance, ManhattanDistance, MinkowskiDistance,
|
||||
};
|
||||
use crate::sql::{Number, Value};
|
||||
|
||||
pub fn chebyshev((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Err(Error::FeatureNotYetImplemented {
|
||||
feature: "vector::distance::chebyshev() function",
|
||||
})
|
||||
pub fn chebyshev((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Ok(a.chebyshev_distance(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn euclidean((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
match a.euclidean_distance(&b) {
|
||||
None => Err(Error::InvalidArguments {
|
||||
name: String::from("vector::distance::euclidean"),
|
||||
message: String::from("The two vectors must be of the same length."),
|
||||
}),
|
||||
Some(distance) => Ok(distance.into()),
|
||||
}
|
||||
Ok(a.euclidean_distance(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn hamming((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Err(Error::FeatureNotYetImplemented {
|
||||
feature: "vector::distance::hamming() function",
|
||||
})
|
||||
pub fn hamming((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Ok(a.hamming_distance(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn mahalanobis((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
|
@ -51,46 +70,31 @@ pub mod distance {
|
|||
})
|
||||
}
|
||||
|
||||
pub fn manhattan((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Err(Error::FeatureNotYetImplemented {
|
||||
feature: "vector::distance::manhattan() function",
|
||||
})
|
||||
pub fn manhattan((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Ok(a.manhattan_distance(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn minkowski((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Err(Error::FeatureNotYetImplemented {
|
||||
feature: "vector::distance::minkowski() function",
|
||||
})
|
||||
pub fn minkowski((a, b, o): (Vec<Number>, Vec<Number>, Number)) -> Result<Value, Error> {
|
||||
Ok(a.minkowski_distance(&b, o)?.into())
|
||||
}
|
||||
}
|
||||
|
||||
pub mod similarity {
|
||||
|
||||
use crate::err::Error;
|
||||
use crate::fnc::util::math::dotproduct::DotProduct;
|
||||
use crate::fnc::util::math::magnitude::Magnitude;
|
||||
use crate::fnc::util::math::vector::{CosineSimilarity, JaccardSimilarity, PearsonSimilarity};
|
||||
use crate::sql::{Number, Value};
|
||||
|
||||
pub fn cosine((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
match a.dotproduct(&b) {
|
||||
None => Err(Error::InvalidArguments {
|
||||
name: String::from("vector::similarity::cosine"),
|
||||
message: String::from("The two vectors must be of the same length."),
|
||||
}),
|
||||
Some(dot) => Ok((dot / (a.magnitude() * b.magnitude())).into()),
|
||||
}
|
||||
Ok(a.cosine_similarity(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn jaccard((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Err(Error::FeatureNotYetImplemented {
|
||||
feature: "vector::similarity::jaccard() function",
|
||||
})
|
||||
pub fn jaccard((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Ok(a.jaccard_similarity(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn pearson((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Err(Error::FeatureNotYetImplemented {
|
||||
feature: "vector::similarity::pearson() function",
|
||||
})
|
||||
pub fn pearson((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
Ok(a.pearson_similarity(&b)?.into())
|
||||
}
|
||||
|
||||
pub fn spearman((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||
|
|
|
@ -555,8 +555,16 @@ fn function_type(i: &str) -> IResult<&str, &str> {
|
|||
|
||||
fn function_vector(i: &str) -> IResult<&str, &str> {
|
||||
alt((
|
||||
tag("dotproduct"),
|
||||
tag("add"),
|
||||
tag("angle"),
|
||||
tag("divide"),
|
||||
tag("cross"),
|
||||
tag("dot"),
|
||||
tag("magnitude"),
|
||||
tag("multiply"),
|
||||
tag("normalize"),
|
||||
tag("project"),
|
||||
tag("subtract"),
|
||||
preceded(
|
||||
tag("distance::"),
|
||||
alt((
|
||||
|
|
|
@ -234,6 +234,14 @@ impl Number {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn is_zero(&self) -> bool {
|
||||
match self {
|
||||
Number::Int(v) => v == &0,
|
||||
Number::Float(v) => v == &0.0,
|
||||
Number::Decimal(v) => v == &Decimal::ZERO,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_zero_or_positive(&self) -> bool {
|
||||
match self {
|
||||
Number::Int(v) => v >= &0,
|
||||
|
@ -334,6 +342,10 @@ impl Number {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn acos(self) -> Self {
|
||||
self.to_float().acos().into()
|
||||
}
|
||||
|
||||
pub fn ceil(self) -> Self {
|
||||
match self {
|
||||
Number::Int(v) => v.into(),
|
||||
|
@ -637,7 +649,7 @@ impl Sort for Vec<Number> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn number(i: &str) -> IResult<&str, Number> {
|
||||
fn not_nan(i: &str) -> IResult<&str, Number> {
|
||||
let (i, v) = recognize_float(i)?;
|
||||
let (i, suffix) = suffix(i)?;
|
||||
let (i, _) = ending(i)?;
|
||||
|
@ -649,6 +661,10 @@ pub fn number(i: &str) -> IResult<&str, Number> {
|
|||
Ok((i, number))
|
||||
}
|
||||
|
||||
pub fn number(i: &str) -> IResult<&str, Number> {
|
||||
alt((map(tag("NaN"), |_| Number::NAN), not_nan))(i)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Suffix {
|
||||
None,
|
||||
|
@ -691,6 +707,15 @@ mod tests {
|
|||
assert!(!decimal_is_integer(&Decimal::HALF_PI));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn number_nan() {
|
||||
let sql = "NaN";
|
||||
let res = number(sql);
|
||||
assert!(res.is_ok());
|
||||
let out = res.unwrap().1;
|
||||
assert_eq!("NaN", format!("{}", out));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn number_int() {
|
||||
let sql = "123";
|
||||
|
|
|
@ -13,16 +13,19 @@ async fn test_queries(sql: &str, desired_responses: &[&str]) -> Result<(), Error
|
|||
let v = r?;
|
||||
if let Some(desired_response) = desired_responses.get(i) {
|
||||
let desired_value = Value::parse(*desired_response);
|
||||
// If both values are NaN, they are equal from a test PoV
|
||||
if !desired_value.is_nan() || !v.is_nan() {
|
||||
assert_eq!(
|
||||
v,
|
||||
desired_value,
|
||||
"Recieved responce did not match \
|
||||
"Received response did not match \
|
||||
expected.
|
||||
Query responce #{},
|
||||
Desired responce: {desired_value},
|
||||
Query response #{},
|
||||
Desired response: {desired_value},
|
||||
Actual response: {v}",
|
||||
i + 1
|
||||
);
|
||||
}
|
||||
} else {
|
||||
panic!("Response index {i} out of bounds of desired responses.");
|
||||
}
|
||||
|
@ -30,6 +33,31 @@ async fn test_queries(sql: &str, desired_responses: &[&str]) -> Result<(), Error
|
|||
Ok(())
|
||||
}
|
||||
|
||||
async fn check_test_is_error(sql: &str, expected_errors: &[&str]) -> Result<(), Error> {
|
||||
let db = Datastore::new("memory").await?;
|
||||
let session = Session::for_kv().with_ns("test").with_db("test");
|
||||
let response = db.execute(sql, &session, None).await?;
|
||||
if response.len() != expected_errors.len() {
|
||||
panic!(
|
||||
"Wrong number of responses {} - expected {}.",
|
||||
response.len(),
|
||||
expected_errors.len()
|
||||
);
|
||||
}
|
||||
for (i, r) in response.into_iter().map(|r| r.result).enumerate() {
|
||||
if let Some(expected_error) = expected_errors.get(i) {
|
||||
if let Err(e) = r {
|
||||
assert_eq!(e.to_string().as_str(), *expected_error)
|
||||
} else {
|
||||
panic!("Response index {i} is not an error.");
|
||||
}
|
||||
} else {
|
||||
panic!("Response index {i} out of bounds of expected responses.");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// --------------------------------------------------
|
||||
// array
|
||||
// --------------------------------------------------
|
||||
|
@ -4640,123 +4668,417 @@ async fn function_type_thing() -> Result<(), Error> {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_distance_euclidean() -> Result<(), Error> {
|
||||
let sql = r#"
|
||||
RETURN vector::distance::euclidean([1, 2, 3], [1, 2, 3]);
|
||||
RETURN vector::distance::euclidean([1, 2, 3], [-1, -2, -3]);
|
||||
RETURN vector::distance::euclidean([1, 2, 3], [4, 5]);
|
||||
RETURN vector::distance::euclidean([1, 2], [4, 5, 5]);
|
||||
"#;
|
||||
|
||||
let dbs = Datastore::new("memory").await?;
|
||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
||||
let res = &mut dbs.execute(&sql, &ses, None).await?;
|
||||
assert_eq!(res.len(), 4);
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::from(0);
|
||||
assert_eq!(tmp, val);
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::from(7.483314773547883);
|
||||
assert_eq!(tmp, val);
|
||||
//
|
||||
let tmp = res.remove(0).result;
|
||||
assert!(tmp.is_err());
|
||||
//
|
||||
let tmp = res.remove(0).result;
|
||||
assert!(tmp.is_err());
|
||||
async fn function_vector_add() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::add([1, 2, 3], [1, 2, 3]);
|
||||
RETURN vector::add([1, 2, 3], [-1, -2, -3]);
|
||||
"#,
|
||||
&["[2, 4, 6]", "[0, 0, 0]"],
|
||||
)
|
||||
.await?;
|
||||
check_test_is_error(
|
||||
r#"
|
||||
RETURN vector::add([1, 2, 3], [4, 5]);
|
||||
RETURN vector::add([1, 2], [4, 5, 5]);
|
||||
"#,
|
||||
&[
|
||||
"Incorrect arguments for function vector::add(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::add(). The two vectors must be of the same dimension."
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_dotproduct() -> Result<(), Error> {
|
||||
let sql = r#"
|
||||
RETURN vector::dotproduct([1, 2, 3], [1, 2, 3]);
|
||||
RETURN vector::dotproduct([1, 2, 3], [-1, -2, -3]);
|
||||
RETURN vector::dotproduct([1, 2, 3], [4, 5]);
|
||||
RETURN vector::dotproduct([1, 2], [4, 5, 5]);
|
||||
"#;
|
||||
async fn function_vector_angle() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::angle([1,0,0], [0,1,0]);
|
||||
RETURN vector::angle([5, 10, 15], [10, 5, 20]);
|
||||
RETURN vector::angle([-3, 2, 5], [4, -1, 2]);
|
||||
RETURN vector::angle([NaN, 2, 3], [-1, -2, NaN]);
|
||||
"#,
|
||||
&["1.5707963267948966", "0.36774908225917935", "1.7128722906354115", "NaN"],
|
||||
)
|
||||
.await?;
|
||||
|
||||
let dbs = Datastore::new("memory").await?;
|
||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
||||
let res = &mut dbs.execute(&sql, &ses, None).await?;
|
||||
assert_eq!(res.len(), 4);
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::from(14);
|
||||
assert_eq!(tmp, val);
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::from(-14);
|
||||
assert_eq!(tmp, val);
|
||||
//
|
||||
let tmp = res.remove(0).result;
|
||||
assert!(tmp.is_err());
|
||||
//
|
||||
let tmp = res.remove(0).result;
|
||||
assert!(tmp.is_err());
|
||||
check_test_is_error(
|
||||
r#"
|
||||
RETURN vector::angle([1, 2, 3], [4, 5]);
|
||||
RETURN vector::angle([1, 2], [4, 5, 5]);
|
||||
"#,
|
||||
&[
|
||||
"Incorrect arguments for function vector::angle(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::angle(). The two vectors must be of the same dimension."
|
||||
],
|
||||
).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_cross() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::cross([1, 2, 3], [4, 5, 6]);
|
||||
RETURN vector::cross([1, 2, 3], [-4, -5, -6]);
|
||||
RETURN vector::cross([1, NaN, 3], [NaN, -5, -6]);
|
||||
"#,
|
||||
&["[-3, 6, -3]", "[3, -6, 3]", "[NaN, NaN, NaN]"],
|
||||
)
|
||||
.await?;
|
||||
check_test_is_error(
|
||||
r#"
|
||||
RETURN vector::cross([1, 2, 3], [4, 5]);
|
||||
RETURN vector::cross([1, 2], [4, 5, 5]);
|
||||
"#,
|
||||
&[
|
||||
"Incorrect arguments for function vector::cross(). Both vectors must have a dimension of 3.",
|
||||
"Incorrect arguments for function vector::cross(). Both vectors must have a dimension of 3."
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_dot() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::dot([1, 2, 3], [1, 2, 3]);
|
||||
RETURN vector::dot([1, 2, 3], [-1, -2, -3]);
|
||||
"#,
|
||||
&["14", "-14"],
|
||||
)
|
||||
.await?;
|
||||
|
||||
check_test_is_error(
|
||||
r#"
|
||||
RETURN vector::dot([1, 2, 3], [4, 5]);
|
||||
RETURN vector::dot([1, 2], [4, 5, 5]);
|
||||
"#,
|
||||
&[
|
||||
"Incorrect arguments for function vector::dot(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::dot(). The two vectors must be of the same dimension."
|
||||
],
|
||||
).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_magnitude() -> Result<(), Error> {
|
||||
let sql = r#"
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::magnitude([]);
|
||||
RETURN vector::magnitude([1]);
|
||||
RETURN vector::magnitude([5]);
|
||||
RETURN vector::magnitude([1,2,3,3,3,4,5]);
|
||||
"#;
|
||||
"#,
|
||||
&["0", "1", "5", "8.54400374531753"],
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
let dbs = Datastore::new("memory").await?;
|
||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
||||
let res = &mut dbs.execute(&sql, &ses, None).await?;
|
||||
assert_eq!(res.len(), 4);
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::from(0);
|
||||
assert_eq!(tmp, val);
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::from(1);
|
||||
assert_eq!(tmp, val);
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::from(5);
|
||||
assert_eq!(tmp, val);
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::from(8.54400374531753);
|
||||
assert_eq!(tmp, val);
|
||||
#[tokio::test]
|
||||
async fn function_vector_normalize() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::normalize([]);
|
||||
RETURN vector::normalize([1]);
|
||||
RETURN vector::normalize([5]);
|
||||
RETURN vector::normalize([4,3]);
|
||||
"#,
|
||||
&["[]", "[1]", "[1]", "[0.8,0.6]"],
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_multiply() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::multiply([1, 2, 3], [1, 2, 3]);
|
||||
RETURN vector::multiply([1, 2, 3], [-1, -2, -3]);
|
||||
"#,
|
||||
&["[1, 4, 9]", "[-1, -4, -9]"],
|
||||
)
|
||||
.await?;
|
||||
check_test_is_error(
|
||||
r#"
|
||||
RETURN vector::multiply([1, 2, 3], [4, 5]);
|
||||
RETURN vector::multiply([1, 2], [4, 5, 5]);
|
||||
"#,
|
||||
&[
|
||||
"Incorrect arguments for function vector::multiply(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::multiply(). The two vectors must be of the same dimension."
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_project() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::project([1, 2, 3], [4, 5, 6]);
|
||||
RETURN vector::project([1, -2, 3], [-4, 5, 6]);
|
||||
RETURN vector::project([NaN, -2, 3], [-4, NaN, NaN]);
|
||||
"#,
|
||||
&[
|
||||
"[1.6623376623376624, 2.077922077922078, 2.4935064935064934]",
|
||||
"[-0.2077922077922078, 0.25974025974025977, 0.3116883116883117]",
|
||||
"[NaN, NaN, NaN]",
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
check_test_is_error(
|
||||
r#"
|
||||
RETURN vector::project([1, 2, 3], [4, 5]);
|
||||
RETURN vector::project([1, 2], [4, 5, 5]);
|
||||
"#,
|
||||
&[
|
||||
"Incorrect arguments for function vector::project(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::project(). The two vectors must be of the same dimension."
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_divide() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::divide([10, NaN, 20, 30, 0], [0, 1, 2, 0, 4]);
|
||||
RETURN vector::divide([10, -20, 30, 0], [0, -1, 2, -3]);
|
||||
"#,
|
||||
&["[NaN, NaN, 10, NaN, 0]", "[NaN, 20, 15, 0]"],
|
||||
)
|
||||
.await?;
|
||||
check_test_is_error(
|
||||
r#"
|
||||
RETURN vector::divide([1, 2, 3], [4, 5]);
|
||||
RETURN vector::divide([1, 2], [4, 5, 5]);
|
||||
"#,
|
||||
&[
|
||||
"Incorrect arguments for function vector::divide(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::divide(). The two vectors must be of the same dimension."
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_subtract() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::subtract([1, 2, 3], [1, 2, 3]);
|
||||
RETURN vector::subtract([1, 2, 3], [-1, -2, -3]);
|
||||
"#,
|
||||
&["[0, 0, 0]", "[2, 4, 6]"],
|
||||
)
|
||||
.await?;
|
||||
check_test_is_error(
|
||||
r#"
|
||||
RETURN vector::subtract([1, 2, 3], [4, 5]);
|
||||
RETURN vector::subtract([1, 2], [4, 5, 5]);
|
||||
"#,
|
||||
&[
|
||||
"Incorrect arguments for function vector::subtract(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::subtract(). The two vectors must be of the same dimension."
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_similarity_cosine() -> Result<(), Error> {
|
||||
let sql = r#"
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::similarity::cosine([1, 2, 3], [1, 2, 3]);
|
||||
RETURN vector::similarity::cosine([1, 2, 3], [-1, -2, -3]);
|
||||
RETURN vector::similarity::cosine([1, 2, 3], [4, 5]);
|
||||
RETURN vector::similarity::cosine([1, 2], [4, 5, 5]);
|
||||
"#;
|
||||
RETURN vector::similarity::cosine([NaN, 1, 2, 3], [NaN, 1, 2, 3]);
|
||||
RETURN vector::similarity::cosine([10, 50, 200], [400, 100, 20]);
|
||||
"#,
|
||||
&["1.0", "-1.0", "NaN", "0.15258215962441316"],
|
||||
)
|
||||
.await?;
|
||||
|
||||
let dbs = Datastore::new("memory").await?;
|
||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
||||
let res = &mut dbs.execute(&sql, &ses, None).await?;
|
||||
assert_eq!(res.len(), 4);
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::from(1.0);
|
||||
assert_eq!(tmp, val);
|
||||
//
|
||||
let tmp = res.remove(0).result?;
|
||||
let val = Value::from(-1.0);
|
||||
assert_eq!(tmp, val);
|
||||
//
|
||||
let tmp = res.remove(0).result;
|
||||
assert!(tmp.is_err());
|
||||
//
|
||||
let tmp = res.remove(0).result;
|
||||
assert!(tmp.is_err());
|
||||
check_test_is_error(
|
||||
r"RETURN vector::similarity::cosine([1, 2, 3], [4, 5]);
|
||||
RETURN vector::similarity::cosine([1, 2], [4, 5, 5]);",
|
||||
&[
|
||||
"Incorrect arguments for function vector::similarity::cosine(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::similarity::cosine(). The two vectors must be of the same dimension."
|
||||
]).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_similarity_jaccard() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::similarity::jaccard([1, 2, 3], [3, 2, 1]);
|
||||
RETURN vector::similarity::jaccard([1, 2, 3], [-3, -2, -1]);
|
||||
RETURN vector::similarity::jaccard([1, -2, 3, -4], [4, 3, 2, 1]);
|
||||
RETURN vector::similarity::jaccard([NaN, 1, 2, 3], [NaN, 2, 3, 4]);
|
||||
RETURN vector::similarity::jaccard([0,1,2,5,6], [0,2,3,4,5,7,9]);
|
||||
"#,
|
||||
&["1.0", "0", "0.3333333333333333", "0.6", "0.3333333333333333"],
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_similarity_pearson() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::similarity::pearson([1, 2, 3, 4, 5], [1, 2.5, 3.5, 4.2, 5.1]);
|
||||
RETURN vector::similarity::pearson([NaN, 1, 2, 3, 4, 5], [NaN, 1, 2.5, 3.5, 4.2, 5.1]);
|
||||
RETURN vector::similarity::pearson([1,2,3], [1,5,7]);
|
||||
"#,
|
||||
&["0.9894065340659606", "NaN", "0.9819805060619659"],
|
||||
)
|
||||
.await?;
|
||||
|
||||
check_test_is_error(
|
||||
r"RETURN vector::similarity::pearson([1, 2, 3], [4, 5]);
|
||||
RETURN vector::similarity::pearson([1, 2], [4, 5, 5]);",
|
||||
&[
|
||||
"Incorrect arguments for function vector::similarity::pearson(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::similarity::pearson(). The two vectors must be of the same dimension."
|
||||
]).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_distance_euclidean() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::distance::euclidean([1, 2, 3], [1, 2, 3]);
|
||||
RETURN vector::distance::euclidean([NaN, 2, 3], [-1, NaN, -3]);
|
||||
RETURN vector::distance::euclidean([1, 2, 3], [-1, -2, -3]);
|
||||
RETURN vector::distance::euclidean([10, 50, 200], [400, 100, 20]);
|
||||
RETURN vector::distance::euclidean([10, 20, 15, 10, 5], [12, 24, 18, 8, 7]);
|
||||
"#,
|
||||
&["0", "NaN", "7.483314773547883", "432.43496620879307", "6.082762530298219"],
|
||||
)
|
||||
.await?;
|
||||
check_test_is_error(
|
||||
r"RETURN vector::distance::euclidean([1, 2, 3], [4, 5]);
|
||||
RETURN vector::distance::euclidean([1, 2], [4, 5, 5]);",
|
||||
&[
|
||||
"Incorrect arguments for function vector::distance::euclidean(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::distance::euclidean(). The two vectors must be of the same dimension."
|
||||
]).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_distance_manhattan() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::distance::manhattan([1, 2, 3], [4, 5, 6]);
|
||||
RETURN vector::distance::manhattan([1, 2, 3], [-4, -5, -6]);
|
||||
RETURN vector::distance::manhattan([1.1, 2, 3.3], [4, 5.5, 6.6]);
|
||||
RETURN vector::distance::manhattan([NaN, 1, 2, 3], [NaN, 4, 5, 6]);
|
||||
RETURN vector::distance::manhattan([10, 20, 15, 10, 5], [12, 24, 18, 8, 7]);
|
||||
"#,
|
||||
&["9", "21", "9.7", "NaN", "13"],
|
||||
)
|
||||
.await?;
|
||||
|
||||
check_test_is_error(
|
||||
r"RETURN vector::distance::manhattan([1, 2, 3], [4, 5]);
|
||||
RETURN vector::distance::manhattan([1, 2], [4, 5, 5]);",
|
||||
&[
|
||||
"Incorrect arguments for function vector::distance::manhattan(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::distance::manhattan(). The two vectors must be of the same dimension."
|
||||
]).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_distance_hamming() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::distance::hamming([1, 2, 2], [1, 2, 3]);
|
||||
RETURN vector::distance::hamming([-1, -2, -3], [-2, -2, -2]);
|
||||
RETURN vector::distance::hamming([1.1, 2.2, -3.3], [1.1, 2, -3.3]);
|
||||
RETURN vector::distance::hamming([NaN, 1, 2, 3], [NaN, 1, 2, 3]);
|
||||
RETURN vector::distance::hamming([0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0]);
|
||||
"#,
|
||||
&["1", "2", "1", "0", "2"],
|
||||
)
|
||||
.await?;
|
||||
|
||||
check_test_is_error(
|
||||
r"RETURN vector::distance::hamming([1, 2, 3], [4, 5]);
|
||||
RETURN vector::distance::hamming([1, 2], [4, 5, 5]);",
|
||||
&[
|
||||
"Incorrect arguments for function vector::distance::hamming(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::distance::hamming(). The two vectors must be of the same dimension."
|
||||
]).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_distance_minkowski() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::distance::minkowski([1, 2, 3], [4, 5, 6], 3);
|
||||
RETURN vector::distance::minkowski([-1, -2, -3], [-4, -5, -6], 3);
|
||||
RETURN vector::distance::minkowski([1.1, 2.2, 3], [4, 5.5, 6.6], 3);
|
||||
RETURN vector::distance::minkowski([NaN, 1, 2, 3], [NaN, 4, 5, 6], 3);
|
||||
RETURN vector::distance::minkowski([10, 20, 15, 10, 5], [12, 24, 18, 8, 7], 1);
|
||||
RETURN vector::distance::minkowski([10, 20, 15, 10, 5], [12, 24, 18, 8, 7], 2);
|
||||
"#,
|
||||
&[
|
||||
"4.3267487109222245",
|
||||
"4.3267487109222245",
|
||||
"4.747193170917638",
|
||||
"NaN",
|
||||
"13.0",
|
||||
"6.082762530298219",
|
||||
],
|
||||
)
|
||||
.await?;
|
||||
|
||||
check_test_is_error(
|
||||
r"RETURN vector::distance::minkowski([1, 2, 3], [4, 5], 3);
|
||||
RETURN vector::distance::minkowski([1, 2], [4, 5, 5], 3);",
|
||||
&[
|
||||
"Incorrect arguments for function vector::distance::minkowski(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::distance::minkowski(). The two vectors must be of the same dimension."
|
||||
]).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn function_vector_distance_chebyshev() -> Result<(), Error> {
|
||||
test_queries(
|
||||
r#"
|
||||
RETURN vector::distance::chebyshev([1, 2, 3], [4, 5, 6]);
|
||||
RETURN vector::distance::chebyshev([-1, -2, -3], [-4, -5, -6]);
|
||||
RETURN vector::distance::chebyshev([1.1, 2.2, 3], [4, 5.5, 6.6]);
|
||||
RETURN vector::distance::chebyshev([NaN, 1, 2, 3], [NaN, 4, 5, 6]);
|
||||
RETURN vector::distance::chebyshev([2, 4, 5, 3, 8, 2], [3, 1, 5, -3, 7, 2]);
|
||||
"#,
|
||||
&["3.0", "3.0", "3.5999999999999996", "3.0", "6.0"],
|
||||
)
|
||||
.await?;
|
||||
|
||||
check_test_is_error(
|
||||
r"RETURN vector::distance::chebyshev([1, 2, 3], [4, 5]);
|
||||
RETURN vector::distance::chebyshev([1, 2], [4, 5, 5]);",
|
||||
&[
|
||||
"Incorrect arguments for function vector::distance::chebyshev(). The two vectors must be of the same dimension.",
|
||||
"Incorrect arguments for function vector::distance::chebyshev(). The two vectors must be of the same dimension."
|
||||
]).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue