Implements additional function for vectors (#2266)
This commit is contained in:
parent
a2ef2d83df
commit
98a482e471
15 changed files with 890 additions and 245 deletions
|
@ -276,6 +276,9 @@
|
||||||
"uuid"
|
"uuid"
|
||||||
"rand::uuid::v4("
|
"rand::uuid::v4("
|
||||||
"rand::uuid::v7("
|
"rand::uuid::v7("
|
||||||
|
"search::score("
|
||||||
|
"search::highlight("
|
||||||
|
"search::offsets("
|
||||||
"session"
|
"session"
|
||||||
"session::"
|
"session::"
|
||||||
"session::db("
|
"session::db("
|
||||||
|
@ -343,17 +346,25 @@
|
||||||
"type::string("
|
"type::string("
|
||||||
"type::table("
|
"type::table("
|
||||||
"type::thing("
|
"type::thing("
|
||||||
"vector::dotproduct(",
|
"vector::add("
|
||||||
"vector::magnitude(",
|
"vector::angle("
|
||||||
"vector::distance::chebyshev(",
|
"vector::cross("
|
||||||
"vector::distance::euclidean(",
|
"vector::divide("
|
||||||
"vector::distance::hamming(",
|
"vector::dot("
|
||||||
"vector::distance::mahalanobis(",
|
"vector::magnitude("
|
||||||
"vector::distance::manhattan(",
|
"vector::multiply("
|
||||||
"vector::distance::minkowski(",
|
"vector::normalize("
|
||||||
"vector::similarity::cosine(",
|
"vector::project("
|
||||||
"vector::similarity::jaccard(",
|
"vector::subtract("
|
||||||
"vector::similarity::pearson(",
|
"vector::distance::chebyshev("
|
||||||
"vector::similarity::spearman(",
|
"vector::distance::euclidean("
|
||||||
|
"vector::distance::hamming("
|
||||||
|
"vector::distance::mahalanobis("
|
||||||
|
"vector::distance::manhattan("
|
||||||
|
"vector::distance::minkowski("
|
||||||
|
"vector::similarity::cosine("
|
||||||
|
"vector::similarity::jaccard("
|
||||||
|
"vector::similarity::pearson("
|
||||||
|
"vector::similarity::spearman("
|
||||||
# TODO: Add Javascript keywords
|
# TODO: Add Javascript keywords
|
||||||
|
|
||||||
|
|
|
@ -276,6 +276,9 @@
|
||||||
"uuid"
|
"uuid"
|
||||||
"rand::uuid::v4("
|
"rand::uuid::v4("
|
||||||
"rand::uuid::v7("
|
"rand::uuid::v7("
|
||||||
|
"search::score("
|
||||||
|
"search::highlight("
|
||||||
|
"search::offsets("
|
||||||
"session"
|
"session"
|
||||||
"session::"
|
"session::"
|
||||||
"session::db("
|
"session::db("
|
||||||
|
@ -340,17 +343,25 @@
|
||||||
"type::string("
|
"type::string("
|
||||||
"type::table("
|
"type::table("
|
||||||
"type::thing("
|
"type::thing("
|
||||||
"vector::dotproduct(",
|
"vector::add("
|
||||||
"vector::magnitude(",
|
"vector::angle("
|
||||||
"vector::distance::chebyshev(",
|
"vector::cross("
|
||||||
"vector::distance::euclidean(",
|
"vector::divide("
|
||||||
"vector::distance::hamming(",
|
"vector::dot("
|
||||||
"vector::distance::mahalanobis(",
|
"vector::magnitude("
|
||||||
"vector::distance::manhattan(",
|
"vector::multiply("
|
||||||
"vector::distance::minkowski(",
|
"vector::normalize("
|
||||||
"vector::similarity::cosine(",
|
"vector::project("
|
||||||
"vector::similarity::jaccard(",
|
"vector::subtract("
|
||||||
"vector::similarity::pearson(",
|
"vector::distance::chebyshev("
|
||||||
"vector::similarity::spearman(",
|
"vector::distance::euclidean("
|
||||||
|
"vector::distance::hamming("
|
||||||
|
"vector::distance::mahalanobis("
|
||||||
|
"vector::distance::manhattan("
|
||||||
|
"vector::distance::minkowski("
|
||||||
|
"vector::similarity::cosine("
|
||||||
|
"vector::similarity::jaccard("
|
||||||
|
"vector::similarity::pearson("
|
||||||
|
"vector::similarity::spearman("
|
||||||
# TODO: Add Javascript keywords
|
# TODO: Add Javascript keywords
|
||||||
|
|
||||||
|
|
|
@ -289,8 +289,16 @@ pub fn synchronous(ctx: &Context<'_>, name: &str, args: Vec<Value>) -> Result<Va
|
||||||
"type::table" => r#type::table,
|
"type::table" => r#type::table,
|
||||||
"type::thing" => r#type::thing,
|
"type::thing" => r#type::thing,
|
||||||
//
|
//
|
||||||
"vector::dotproduct" => vector::dotproduct,
|
"vector::add" => vector::add,
|
||||||
|
"vector::angle" => vector::angle,
|
||||||
|
"vector::cross" => vector::cross,
|
||||||
|
"vector::dot" => vector::dot,
|
||||||
|
"vector::divide" => vector::divide,
|
||||||
"vector::magnitude" => vector::magnitude,
|
"vector::magnitude" => vector::magnitude,
|
||||||
|
"vector::multiply" => vector::multiply,
|
||||||
|
"vector::normalize" => vector::normalize,
|
||||||
|
"vector::project" => vector::project,
|
||||||
|
"vector::subtract" => vector::subtract,
|
||||||
"vector::distance::chebyshev" => vector::distance::chebyshev,
|
"vector::distance::chebyshev" => vector::distance::chebyshev,
|
||||||
"vector::distance::euclidean" => vector::distance::euclidean,
|
"vector::distance::euclidean" => vector::distance::euclidean,
|
||||||
"vector::distance::hamming" => vector::distance::hamming,
|
"vector::distance::hamming" => vector::distance::hamming,
|
||||||
|
|
|
@ -3,13 +3,22 @@ use crate::fnc::script::modules::impl_module_def;
|
||||||
|
|
||||||
mod distance;
|
mod distance;
|
||||||
mod similarity;
|
mod similarity;
|
||||||
|
|
||||||
pub struct Package;
|
pub struct Package;
|
||||||
|
|
||||||
impl_module_def!(
|
impl_module_def!(
|
||||||
Package,
|
Package,
|
||||||
"vector",
|
"vector",
|
||||||
"distance" => (distance::Package),
|
"distance" => (distance::Package),
|
||||||
"dotproduct" => run,
|
"similarity" => (similarity::Package),
|
||||||
|
"add" => run,
|
||||||
|
"angle" => run,
|
||||||
|
"cross" => run,
|
||||||
|
"divide" => run,
|
||||||
|
"dot" => run,
|
||||||
"magnitude" => run,
|
"magnitude" => run,
|
||||||
"similarity" => (similarity::Package)
|
"multiply" => run,
|
||||||
|
"normalize" => run,
|
||||||
|
"project" => run,
|
||||||
|
"subtract" => run
|
||||||
);
|
);
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
use super::variance::Variance;
|
use crate::fnc::util::math::mean::Mean;
|
||||||
|
use crate::fnc::util::math::variance::variance;
|
||||||
use crate::sql::number::Number;
|
use crate::sql::number::Number;
|
||||||
|
|
||||||
pub trait Deviation {
|
pub trait Deviation {
|
||||||
|
@ -8,6 +9,10 @@ pub trait Deviation {
|
||||||
|
|
||||||
impl Deviation for Vec<Number> {
|
impl Deviation for Vec<Number> {
|
||||||
fn deviation(self, sample: bool) -> f64 {
|
fn deviation(self, sample: bool) -> f64 {
|
||||||
self.variance(sample).sqrt()
|
deviation(&self, self.mean(), sample)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(super) fn deviation(v: &[Number], mean: f64, sample: bool) -> f64 {
|
||||||
|
variance(v, mean, sample).sqrt()
|
||||||
|
}
|
||||||
|
|
|
@ -1,15 +0,0 @@
|
||||||
use crate::sql::Number;
|
|
||||||
|
|
||||||
pub trait DotProduct {
|
|
||||||
/// Dot Product of two vectors
|
|
||||||
fn dotproduct(&self, other: &Self) -> Option<Number>;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl DotProduct for Vec<Number> {
|
|
||||||
fn dotproduct(&self, other: &Self) -> Option<Number> {
|
|
||||||
if self.len() != other.len() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
Some(self.iter().zip(other.iter()).map(|(a, b)| a * b).sum())
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,21 +0,0 @@
|
||||||
use crate::sql::Number;
|
|
||||||
|
|
||||||
pub trait EuclideanDistance {
|
|
||||||
/// Euclidean Distance between two vectors (L2 Norm)
|
|
||||||
fn euclidean_distance(&self, other: &Self) -> Option<Number>;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl EuclideanDistance for Vec<Number> {
|
|
||||||
fn euclidean_distance(&self, other: &Self) -> Option<Number> {
|
|
||||||
if self.len() != other.len() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
Some(
|
|
||||||
self.iter()
|
|
||||||
.zip(other.iter())
|
|
||||||
.map(|(a, b)| (a - b).pow(Number::Int(2)))
|
|
||||||
.sum::<Number>()
|
|
||||||
.sqrt(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,12 +0,0 @@
|
||||||
use crate::sql::Number;
|
|
||||||
|
|
||||||
pub trait Magnitude {
|
|
||||||
/// Calculate the magnitude of a vector
|
|
||||||
fn magnitude(&self) -> Number;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Magnitude for Vec<Number> {
|
|
||||||
fn magnitude(&self) -> Number {
|
|
||||||
self.iter().map(|a| a.clone().pow(Number::Int(2))).sum::<Number>().sqrt()
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -4,10 +4,7 @@
|
||||||
|
|
||||||
pub mod bottom;
|
pub mod bottom;
|
||||||
pub mod deviation;
|
pub mod deviation;
|
||||||
pub mod dotproduct;
|
|
||||||
pub mod euclideandistance;
|
|
||||||
pub mod interquartile;
|
pub mod interquartile;
|
||||||
pub mod magnitude;
|
|
||||||
pub mod mean;
|
pub mod mean;
|
||||||
pub mod median;
|
pub mod median;
|
||||||
pub mod midhinge;
|
pub mod midhinge;
|
||||||
|
@ -19,3 +16,4 @@ pub mod spread;
|
||||||
pub mod top;
|
pub mod top;
|
||||||
pub mod trimean;
|
pub mod trimean;
|
||||||
pub mod variance;
|
pub mod variance;
|
||||||
|
pub mod vector;
|
||||||
|
|
|
@ -9,15 +9,18 @@ pub trait Variance {
|
||||||
|
|
||||||
impl Variance for Vec<Number> {
|
impl Variance for Vec<Number> {
|
||||||
fn variance(self, sample: bool) -> f64 {
|
fn variance(self, sample: bool) -> f64 {
|
||||||
match self.len() {
|
variance(&self, self.mean(), sample)
|
||||||
0 => f64::NAN,
|
}
|
||||||
1 => 0.0,
|
}
|
||||||
len => {
|
|
||||||
let mean = self.mean();
|
pub(super) fn variance(v: &[Number], mean: f64, sample: bool) -> f64 {
|
||||||
let len = (len - sample as usize) as f64;
|
match v.len() {
|
||||||
let out = self.iter().map(|x| (x.to_float() - mean).powi(2)).sum::<f64>() / len;
|
0 => f64::NAN,
|
||||||
out
|
1 => 0.0,
|
||||||
}
|
len => {
|
||||||
|
let len = (len - sample as usize) as f64;
|
||||||
|
let out = v.iter().map(|x| (x.to_float() - mean).powi(2)).sum::<f64>() / len;
|
||||||
|
out
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
289
lib/src/fnc/util/math/vector.rs
Normal file
289
lib/src/fnc/util/math/vector.rs
Normal file
|
@ -0,0 +1,289 @@
|
||||||
|
use crate::err::Error;
|
||||||
|
use crate::fnc::util::math::deviation::deviation;
|
||||||
|
use crate::fnc::util::math::mean::Mean;
|
||||||
|
use crate::sql::Number;
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
pub trait Add {
|
||||||
|
/// Addition of two vectors
|
||||||
|
fn add(&self, other: &Self) -> Result<Vec<Number>, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_same_dimension(fnc: &str, a: &Vec<Number>, b: &Vec<Number>) -> Result<(), Error> {
|
||||||
|
if a.len() != b.len() {
|
||||||
|
Err(Error::InvalidArguments {
|
||||||
|
name: String::from(fnc),
|
||||||
|
message: String::from("The two vectors must be of the same dimension."),
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Add for Vec<Number> {
|
||||||
|
fn add(&self, other: &Self) -> Result<Vec<Number>, Error> {
|
||||||
|
check_same_dimension("vector::add", self, other)?;
|
||||||
|
Ok(self.iter().zip(other.iter()).map(|(a, b)| a + b).collect())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Angle {
|
||||||
|
/// Compute the angle between two vectors
|
||||||
|
fn angle(&self, other: &Self) -> Result<Number, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Angle for Vec<Number> {
|
||||||
|
fn angle(&self, other: &Self) -> Result<Number, Error> {
|
||||||
|
check_same_dimension("vector::angle", self, other)?;
|
||||||
|
let dp = dot(self, other);
|
||||||
|
let m = self.magnitude() * other.magnitude();
|
||||||
|
let d = vector_div(&dp, &m);
|
||||||
|
Ok(d.acos())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait CosineSimilarity {
|
||||||
|
fn cosine_similarity(&self, other: &Self) -> Result<Number, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CosineSimilarity for Vec<Number> {
|
||||||
|
fn cosine_similarity(&self, other: &Self) -> Result<Number, Error> {
|
||||||
|
check_same_dimension("vector::similarity::cosine", self, other)?;
|
||||||
|
let d = dot(self, other);
|
||||||
|
Ok(d / (self.magnitude() * other.magnitude()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Divide {
|
||||||
|
/// Division of two vectors
|
||||||
|
fn divide(&self, other: &Self) -> Result<Vec<Number>, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn vector_div(a: &Number, b: &Number) -> Number {
|
||||||
|
if a.is_nan() || b.is_nan() || b.is_zero() {
|
||||||
|
Number::NAN
|
||||||
|
} else {
|
||||||
|
a / b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Divide for Vec<Number> {
|
||||||
|
fn divide(&self, other: &Self) -> Result<Vec<Number>, Error> {
|
||||||
|
check_same_dimension("vector::divide", self, other)?;
|
||||||
|
Ok(self.iter().zip(other.iter()).map(|(a, b)| vector_div(a, b)).collect())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait HammingDistance {
|
||||||
|
fn hamming_distance(&self, other: &Self) -> Result<Number, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl HammingDistance for Vec<Number> {
|
||||||
|
fn hamming_distance(&self, other: &Self) -> Result<Number, Error> {
|
||||||
|
check_same_dimension("vector::distance::hamming", self, other)?;
|
||||||
|
Ok(self.iter().zip(other.iter()).filter(|&(a, b)| a != b).count().into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait JaccardSimilarity {
|
||||||
|
fn jaccard_similarity(&self, other: &Self) -> Result<Number, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl JaccardSimilarity for Vec<Number> {
|
||||||
|
fn jaccard_similarity(&self, other: &Self) -> Result<Number, Error> {
|
||||||
|
let set_a: HashSet<_> = HashSet::from_iter(self.iter());
|
||||||
|
let set_b: HashSet<_> = HashSet::from_iter(other.iter());
|
||||||
|
let intersection_size = set_a.intersection(&set_b).count() as f64;
|
||||||
|
let union_size = set_a.union(&set_b).count() as f64;
|
||||||
|
Ok((intersection_size / union_size).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait PearsonSimilarity {
|
||||||
|
fn pearson_similarity(&self, other: &Self) -> Result<Number, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PearsonSimilarity for Vec<Number> {
|
||||||
|
fn pearson_similarity(&self, other: &Self) -> Result<Number, Error> {
|
||||||
|
check_same_dimension("vector::similarity::pearson", self, other)?;
|
||||||
|
let m1 = self.mean();
|
||||||
|
let m2 = other.mean();
|
||||||
|
let covar: f64 = self
|
||||||
|
.iter()
|
||||||
|
.zip(other.iter())
|
||||||
|
.map(|(x, y)| (x.to_float() - m1) * (y.to_float() - m2))
|
||||||
|
.sum();
|
||||||
|
let covar = covar / self.len() as f64;
|
||||||
|
let std_dev1 = deviation(self, m1, false);
|
||||||
|
let std_dev2 = deviation(other, m2, false);
|
||||||
|
Ok((covar / (std_dev1 * std_dev2)).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait ManhattanDistance {
|
||||||
|
fn manhattan_distance(&self, other: &Self) -> Result<Number, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ManhattanDistance for Vec<Number> {
|
||||||
|
fn manhattan_distance(&self, other: &Self) -> Result<Number, Error> {
|
||||||
|
check_same_dimension("vector::distance::manhattan", self, other)?;
|
||||||
|
Ok(self.iter().zip(other.iter()).map(|(a, b)| (a - b).abs()).sum())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait MinkowskiDistance {
|
||||||
|
fn minkowski_distance(&self, other: &Self, order: Number) -> Result<Number, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MinkowskiDistance for Vec<Number> {
|
||||||
|
fn minkowski_distance(&self, other: &Self, order: Number) -> Result<Number, Error> {
|
||||||
|
check_same_dimension("vector::distance::minkowski", self, other)?;
|
||||||
|
let p = order.to_float();
|
||||||
|
let dist: f64 = self
|
||||||
|
.iter()
|
||||||
|
.zip(other.iter())
|
||||||
|
.map(|(a, b)| (a.to_float() - b.to_float()).abs().powf(p))
|
||||||
|
.sum();
|
||||||
|
Ok(dist.powf(1.0 / p).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Multiply {
|
||||||
|
/// Multiplication of two vectors
|
||||||
|
fn multiply(&self, other: &Self) -> Result<Vec<Number>, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Multiply for Vec<Number> {
|
||||||
|
fn multiply(&self, other: &Self) -> Result<Vec<Number>, Error> {
|
||||||
|
check_same_dimension("vector::multiply", self, other)?;
|
||||||
|
Ok(self.iter().zip(other.iter()).map(|(a, b)| a * b).collect())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Project {
|
||||||
|
/// Projection of two vectors
|
||||||
|
fn project(&self, other: &Self) -> Result<Vec<Number>, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Project for Vec<Number> {
|
||||||
|
fn project(&self, other: &Self) -> Result<Vec<Number>, Error> {
|
||||||
|
check_same_dimension("vector::project", self, other)?;
|
||||||
|
let d = dot(self, other);
|
||||||
|
let m = magnitude_squared(other).into();
|
||||||
|
let s = vector_div(&d, &m);
|
||||||
|
Ok(other.iter().map(|x| &s * x).collect())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait ChebyshevDistance {
|
||||||
|
fn chebyshev_distance(&self, other: &Self) -> Result<Number, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ChebyshevDistance for Vec<Number> {
|
||||||
|
fn chebyshev_distance(&self, other: &Self) -> Result<Number, Error> {
|
||||||
|
check_same_dimension("vector::distance::chebyshev", self, other)?;
|
||||||
|
Ok(self
|
||||||
|
.iter()
|
||||||
|
.zip(other.iter())
|
||||||
|
.map(|(a, b)| (a.to_float() - b.to_float()).abs())
|
||||||
|
.fold(f64::MIN, f64::max)
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Subtract {
|
||||||
|
/// Subtraction of two vectors
|
||||||
|
fn subtract(&self, other: &Self) -> Result<Vec<Number>, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Subtract for Vec<Number> {
|
||||||
|
fn subtract(&self, other: &Self) -> Result<Vec<Number>, Error> {
|
||||||
|
check_same_dimension("vector::subtract", self, other)?;
|
||||||
|
Ok(self.iter().zip(other.iter()).map(|(a, b)| a - b).collect())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait CrossProduct {
|
||||||
|
/// Cross product of two vectors
|
||||||
|
fn cross(&self, other: &Self) -> Result<Vec<Number>, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CrossProduct for Vec<Number> {
|
||||||
|
fn cross(&self, other: &Self) -> Result<Vec<Number>, Error> {
|
||||||
|
if self.len() != 3 || other.len() != 3 {
|
||||||
|
return Err(Error::InvalidArguments {
|
||||||
|
name: "vector::cross".to_string(),
|
||||||
|
message: String::from("Both vectors must have a dimension of 3."),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
let a0 = &self[0];
|
||||||
|
let a1 = &self[1];
|
||||||
|
let a2 = &self[2];
|
||||||
|
let b0 = &other[0];
|
||||||
|
let b1 = &other[1];
|
||||||
|
let b2 = &other[2];
|
||||||
|
let v = vec![a1 * b2 - a2 * b1, a2 * b0 - a0 * b2, a0 * b1 - a1 * b0];
|
||||||
|
Ok(v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait DotProduct {
|
||||||
|
/// Dot Product of two vectors
|
||||||
|
fn dot(&self, other: &Self) -> Result<Number, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DotProduct for Vec<Number> {
|
||||||
|
fn dot(&self, other: &Self) -> Result<Number, Error> {
|
||||||
|
check_same_dimension("vector::dot", self, other)?;
|
||||||
|
Ok(dot(self, other))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn dot(a: &[Number], b: &[Number]) -> Number {
|
||||||
|
a.iter().zip(b.iter()).map(|(a, b)| a * b).sum()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait EuclideanDistance {
|
||||||
|
/// Euclidean Distance between two vectors (L2 Norm)
|
||||||
|
fn euclidean_distance(&self, other: &Self) -> Result<Number, Error>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EuclideanDistance for Vec<Number> {
|
||||||
|
fn euclidean_distance(&self, other: &Self) -> Result<Number, Error> {
|
||||||
|
check_same_dimension("vector::distance::euclidean", self, other)?;
|
||||||
|
Ok(self
|
||||||
|
.iter()
|
||||||
|
.zip(other.iter())
|
||||||
|
.map(|(a, b)| (a - b).to_float().powi(2))
|
||||||
|
.sum::<f64>()
|
||||||
|
.sqrt()
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn magnitude_squared(v: &[Number]) -> f64 {
|
||||||
|
v.iter().map(|a| a.to_float().powi(2)).sum::<f64>()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Magnitude {
|
||||||
|
/// Calculate the magnitude of a vector
|
||||||
|
fn magnitude(&self) -> Number;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Magnitude for Vec<Number> {
|
||||||
|
fn magnitude(&self) -> Number {
|
||||||
|
magnitude_squared(self).sqrt().into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Normalize {
|
||||||
|
/// Normalize a vector
|
||||||
|
fn normalize(&self) -> Vec<Number>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Normalize for Vec<Number> {
|
||||||
|
fn normalize(&self) -> Vec<Number> {
|
||||||
|
let m = self.magnitude();
|
||||||
|
self.iter().map(|a| vector_div(a, &m)).collect()
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,48 +1,67 @@
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::fnc::util::math::dotproduct::DotProduct;
|
use crate::fnc::util::math::vector::{
|
||||||
use crate::fnc::util::math::magnitude::Magnitude;
|
Add, Angle, CrossProduct, Divide, DotProduct, Magnitude, Multiply, Normalize, Project, Subtract,
|
||||||
|
};
|
||||||
use crate::sql::{Number, Value};
|
use crate::sql::{Number, Value};
|
||||||
|
|
||||||
pub fn dotproduct((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
pub fn add((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
match a.dotproduct(&b) {
|
Ok(a.add(&b)?.into())
|
||||||
None => Err(Error::InvalidArguments {
|
}
|
||||||
name: String::from("vector::dotproduct"),
|
|
||||||
message: String::from("The two vectors must be of the same length."),
|
pub fn angle((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
}),
|
Ok(a.angle(&b)?.into())
|
||||||
Some(dot) => Ok(dot.into()),
|
}
|
||||||
}
|
|
||||||
|
pub fn divide((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
|
Ok(a.divide(&b)?.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn cross((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
|
Ok(a.cross(&b)?.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn dot((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
|
Ok(a.dot(&b)?.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn magnitude((a,): (Vec<Number>,)) -> Result<Value, Error> {
|
pub fn magnitude((a,): (Vec<Number>,)) -> Result<Value, Error> {
|
||||||
Ok(a.magnitude().into())
|
Ok(a.magnitude().into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn multiply((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
|
Ok(a.multiply(&b)?.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn normalize((a,): (Vec<Number>,)) -> Result<Value, Error> {
|
||||||
|
Ok(a.normalize().into())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn project((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
|
Ok(a.project(&b)?.into())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn subtract((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
|
Ok(a.subtract(&b)?.into())
|
||||||
|
}
|
||||||
|
|
||||||
pub mod distance {
|
pub mod distance {
|
||||||
|
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::fnc::util::math::euclideandistance::EuclideanDistance;
|
use crate::fnc::util::math::vector::{
|
||||||
|
ChebyshevDistance, EuclideanDistance, HammingDistance, ManhattanDistance, MinkowskiDistance,
|
||||||
|
};
|
||||||
use crate::sql::{Number, Value};
|
use crate::sql::{Number, Value};
|
||||||
|
|
||||||
pub fn chebyshev((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
pub fn chebyshev((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
Err(Error::FeatureNotYetImplemented {
|
Ok(a.chebyshev_distance(&b)?.into())
|
||||||
feature: "vector::distance::chebyshev() function",
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn euclidean((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
pub fn euclidean((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
match a.euclidean_distance(&b) {
|
Ok(a.euclidean_distance(&b)?.into())
|
||||||
None => Err(Error::InvalidArguments {
|
|
||||||
name: String::from("vector::distance::euclidean"),
|
|
||||||
message: String::from("The two vectors must be of the same length."),
|
|
||||||
}),
|
|
||||||
Some(distance) => Ok(distance.into()),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn hamming((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
pub fn hamming((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
Err(Error::FeatureNotYetImplemented {
|
Ok(a.hamming_distance(&b)?.into())
|
||||||
feature: "vector::distance::hamming() function",
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn mahalanobis((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
pub fn mahalanobis((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
|
@ -51,46 +70,31 @@ pub mod distance {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn manhattan((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
pub fn manhattan((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
Err(Error::FeatureNotYetImplemented {
|
Ok(a.manhattan_distance(&b)?.into())
|
||||||
feature: "vector::distance::manhattan() function",
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn minkowski((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
pub fn minkowski((a, b, o): (Vec<Number>, Vec<Number>, Number)) -> Result<Value, Error> {
|
||||||
Err(Error::FeatureNotYetImplemented {
|
Ok(a.minkowski_distance(&b, o)?.into())
|
||||||
feature: "vector::distance::minkowski() function",
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod similarity {
|
pub mod similarity {
|
||||||
|
|
||||||
use crate::err::Error;
|
use crate::err::Error;
|
||||||
use crate::fnc::util::math::dotproduct::DotProduct;
|
use crate::fnc::util::math::vector::{CosineSimilarity, JaccardSimilarity, PearsonSimilarity};
|
||||||
use crate::fnc::util::math::magnitude::Magnitude;
|
|
||||||
use crate::sql::{Number, Value};
|
use crate::sql::{Number, Value};
|
||||||
|
|
||||||
pub fn cosine((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
pub fn cosine((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
match a.dotproduct(&b) {
|
Ok(a.cosine_similarity(&b)?.into())
|
||||||
None => Err(Error::InvalidArguments {
|
|
||||||
name: String::from("vector::similarity::cosine"),
|
|
||||||
message: String::from("The two vectors must be of the same length."),
|
|
||||||
}),
|
|
||||||
Some(dot) => Ok((dot / (a.magnitude() * b.magnitude())).into()),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn jaccard((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
pub fn jaccard((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
Err(Error::FeatureNotYetImplemented {
|
Ok(a.jaccard_similarity(&b)?.into())
|
||||||
feature: "vector::similarity::jaccard() function",
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn pearson((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
pub fn pearson((a, b): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
Err(Error::FeatureNotYetImplemented {
|
Ok(a.pearson_similarity(&b)?.into())
|
||||||
feature: "vector::similarity::pearson() function",
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn spearman((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
pub fn spearman((_, _): (Vec<Number>, Vec<Number>)) -> Result<Value, Error> {
|
||||||
|
|
|
@ -555,8 +555,16 @@ fn function_type(i: &str) -> IResult<&str, &str> {
|
||||||
|
|
||||||
fn function_vector(i: &str) -> IResult<&str, &str> {
|
fn function_vector(i: &str) -> IResult<&str, &str> {
|
||||||
alt((
|
alt((
|
||||||
tag("dotproduct"),
|
tag("add"),
|
||||||
|
tag("angle"),
|
||||||
|
tag("divide"),
|
||||||
|
tag("cross"),
|
||||||
|
tag("dot"),
|
||||||
tag("magnitude"),
|
tag("magnitude"),
|
||||||
|
tag("multiply"),
|
||||||
|
tag("normalize"),
|
||||||
|
tag("project"),
|
||||||
|
tag("subtract"),
|
||||||
preceded(
|
preceded(
|
||||||
tag("distance::"),
|
tag("distance::"),
|
||||||
alt((
|
alt((
|
||||||
|
|
|
@ -234,6 +234,14 @@ impl Number {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_zero(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
Number::Int(v) => v == &0,
|
||||||
|
Number::Float(v) => v == &0.0,
|
||||||
|
Number::Decimal(v) => v == &Decimal::ZERO,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn is_zero_or_positive(&self) -> bool {
|
pub fn is_zero_or_positive(&self) -> bool {
|
||||||
match self {
|
match self {
|
||||||
Number::Int(v) => v >= &0,
|
Number::Int(v) => v >= &0,
|
||||||
|
@ -334,6 +342,10 @@ impl Number {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn acos(self) -> Self {
|
||||||
|
self.to_float().acos().into()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn ceil(self) -> Self {
|
pub fn ceil(self) -> Self {
|
||||||
match self {
|
match self {
|
||||||
Number::Int(v) => v.into(),
|
Number::Int(v) => v.into(),
|
||||||
|
@ -637,7 +649,7 @@ impl Sort for Vec<Number> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn number(i: &str) -> IResult<&str, Number> {
|
fn not_nan(i: &str) -> IResult<&str, Number> {
|
||||||
let (i, v) = recognize_float(i)?;
|
let (i, v) = recognize_float(i)?;
|
||||||
let (i, suffix) = suffix(i)?;
|
let (i, suffix) = suffix(i)?;
|
||||||
let (i, _) = ending(i)?;
|
let (i, _) = ending(i)?;
|
||||||
|
@ -649,6 +661,10 @@ pub fn number(i: &str) -> IResult<&str, Number> {
|
||||||
Ok((i, number))
|
Ok((i, number))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn number(i: &str) -> IResult<&str, Number> {
|
||||||
|
alt((map(tag("NaN"), |_| Number::NAN), not_nan))(i)
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum Suffix {
|
enum Suffix {
|
||||||
None,
|
None,
|
||||||
|
@ -691,6 +707,15 @@ mod tests {
|
||||||
assert!(!decimal_is_integer(&Decimal::HALF_PI));
|
assert!(!decimal_is_integer(&Decimal::HALF_PI));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn number_nan() {
|
||||||
|
let sql = "NaN";
|
||||||
|
let res = number(sql);
|
||||||
|
assert!(res.is_ok());
|
||||||
|
let out = res.unwrap().1;
|
||||||
|
assert_eq!("NaN", format!("{}", out));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn number_int() {
|
fn number_int() {
|
||||||
let sql = "123";
|
let sql = "123";
|
||||||
|
|
|
@ -13,16 +13,19 @@ async fn test_queries(sql: &str, desired_responses: &[&str]) -> Result<(), Error
|
||||||
let v = r?;
|
let v = r?;
|
||||||
if let Some(desired_response) = desired_responses.get(i) {
|
if let Some(desired_response) = desired_responses.get(i) {
|
||||||
let desired_value = Value::parse(*desired_response);
|
let desired_value = Value::parse(*desired_response);
|
||||||
assert_eq!(
|
// If both values are NaN, they are equal from a test PoV
|
||||||
v,
|
if !desired_value.is_nan() || !v.is_nan() {
|
||||||
desired_value,
|
assert_eq!(
|
||||||
"Recieved responce did not match \
|
v,
|
||||||
|
desired_value,
|
||||||
|
"Received response did not match \
|
||||||
expected.
|
expected.
|
||||||
Query responce #{},
|
Query response #{},
|
||||||
Desired responce: {desired_value},
|
Desired response: {desired_value},
|
||||||
Actual response: {v}",
|
Actual response: {v}",
|
||||||
i + 1
|
i + 1
|
||||||
);
|
);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
panic!("Response index {i} out of bounds of desired responses.");
|
panic!("Response index {i} out of bounds of desired responses.");
|
||||||
}
|
}
|
||||||
|
@ -30,6 +33,31 @@ async fn test_queries(sql: &str, desired_responses: &[&str]) -> Result<(), Error
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async fn check_test_is_error(sql: &str, expected_errors: &[&str]) -> Result<(), Error> {
|
||||||
|
let db = Datastore::new("memory").await?;
|
||||||
|
let session = Session::for_kv().with_ns("test").with_db("test");
|
||||||
|
let response = db.execute(sql, &session, None).await?;
|
||||||
|
if response.len() != expected_errors.len() {
|
||||||
|
panic!(
|
||||||
|
"Wrong number of responses {} - expected {}.",
|
||||||
|
response.len(),
|
||||||
|
expected_errors.len()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
for (i, r) in response.into_iter().map(|r| r.result).enumerate() {
|
||||||
|
if let Some(expected_error) = expected_errors.get(i) {
|
||||||
|
if let Err(e) = r {
|
||||||
|
assert_eq!(e.to_string().as_str(), *expected_error)
|
||||||
|
} else {
|
||||||
|
panic!("Response index {i} is not an error.");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
panic!("Response index {i} out of bounds of expected responses.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
// --------------------------------------------------
|
// --------------------------------------------------
|
||||||
// array
|
// array
|
||||||
// --------------------------------------------------
|
// --------------------------------------------------
|
||||||
|
@ -4640,123 +4668,417 @@ async fn function_type_thing() -> Result<(), Error> {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn function_vector_distance_euclidean() -> Result<(), Error> {
|
async fn function_vector_add() -> Result<(), Error> {
|
||||||
let sql = r#"
|
test_queries(
|
||||||
RETURN vector::distance::euclidean([1, 2, 3], [1, 2, 3]);
|
r#"
|
||||||
RETURN vector::distance::euclidean([1, 2, 3], [-1, -2, -3]);
|
RETURN vector::add([1, 2, 3], [1, 2, 3]);
|
||||||
RETURN vector::distance::euclidean([1, 2, 3], [4, 5]);
|
RETURN vector::add([1, 2, 3], [-1, -2, -3]);
|
||||||
RETURN vector::distance::euclidean([1, 2], [4, 5, 5]);
|
"#,
|
||||||
"#;
|
&["[2, 4, 6]", "[0, 0, 0]"],
|
||||||
|
)
|
||||||
let dbs = Datastore::new("memory").await?;
|
.await?;
|
||||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
check_test_is_error(
|
||||||
let res = &mut dbs.execute(&sql, &ses, None).await?;
|
r#"
|
||||||
assert_eq!(res.len(), 4);
|
RETURN vector::add([1, 2, 3], [4, 5]);
|
||||||
//
|
RETURN vector::add([1, 2], [4, 5, 5]);
|
||||||
let tmp = res.remove(0).result?;
|
"#,
|
||||||
let val = Value::from(0);
|
&[
|
||||||
assert_eq!(tmp, val);
|
"Incorrect arguments for function vector::add(). The two vectors must be of the same dimension.",
|
||||||
//
|
"Incorrect arguments for function vector::add(). The two vectors must be of the same dimension."
|
||||||
let tmp = res.remove(0).result?;
|
],
|
||||||
let val = Value::from(7.483314773547883);
|
)
|
||||||
assert_eq!(tmp, val);
|
.await?;
|
||||||
//
|
|
||||||
let tmp = res.remove(0).result;
|
|
||||||
assert!(tmp.is_err());
|
|
||||||
//
|
|
||||||
let tmp = res.remove(0).result;
|
|
||||||
assert!(tmp.is_err());
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn function_vector_dotproduct() -> Result<(), Error> {
|
async fn function_vector_angle() -> Result<(), Error> {
|
||||||
let sql = r#"
|
test_queries(
|
||||||
RETURN vector::dotproduct([1, 2, 3], [1, 2, 3]);
|
r#"
|
||||||
RETURN vector::dotproduct([1, 2, 3], [-1, -2, -3]);
|
RETURN vector::angle([1,0,0], [0,1,0]);
|
||||||
RETURN vector::dotproduct([1, 2, 3], [4, 5]);
|
RETURN vector::angle([5, 10, 15], [10, 5, 20]);
|
||||||
RETURN vector::dotproduct([1, 2], [4, 5, 5]);
|
RETURN vector::angle([-3, 2, 5], [4, -1, 2]);
|
||||||
"#;
|
RETURN vector::angle([NaN, 2, 3], [-1, -2, NaN]);
|
||||||
|
"#,
|
||||||
|
&["1.5707963267948966", "0.36774908225917935", "1.7128722906354115", "NaN"],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
let dbs = Datastore::new("memory").await?;
|
check_test_is_error(
|
||||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
r#"
|
||||||
let res = &mut dbs.execute(&sql, &ses, None).await?;
|
RETURN vector::angle([1, 2, 3], [4, 5]);
|
||||||
assert_eq!(res.len(), 4);
|
RETURN vector::angle([1, 2], [4, 5, 5]);
|
||||||
//
|
"#,
|
||||||
let tmp = res.remove(0).result?;
|
&[
|
||||||
let val = Value::from(14);
|
"Incorrect arguments for function vector::angle(). The two vectors must be of the same dimension.",
|
||||||
assert_eq!(tmp, val);
|
"Incorrect arguments for function vector::angle(). The two vectors must be of the same dimension."
|
||||||
//
|
],
|
||||||
let tmp = res.remove(0).result?;
|
).await?;
|
||||||
let val = Value::from(-14);
|
Ok(())
|
||||||
assert_eq!(tmp, val);
|
}
|
||||||
//
|
|
||||||
let tmp = res.remove(0).result;
|
#[tokio::test]
|
||||||
assert!(tmp.is_err());
|
async fn function_vector_cross() -> Result<(), Error> {
|
||||||
//
|
test_queries(
|
||||||
let tmp = res.remove(0).result;
|
r#"
|
||||||
assert!(tmp.is_err());
|
RETURN vector::cross([1, 2, 3], [4, 5, 6]);
|
||||||
|
RETURN vector::cross([1, 2, 3], [-4, -5, -6]);
|
||||||
|
RETURN vector::cross([1, NaN, 3], [NaN, -5, -6]);
|
||||||
|
"#,
|
||||||
|
&["[-3, 6, -3]", "[3, -6, 3]", "[NaN, NaN, NaN]"],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
check_test_is_error(
|
||||||
|
r#"
|
||||||
|
RETURN vector::cross([1, 2, 3], [4, 5]);
|
||||||
|
RETURN vector::cross([1, 2], [4, 5, 5]);
|
||||||
|
"#,
|
||||||
|
&[
|
||||||
|
"Incorrect arguments for function vector::cross(). Both vectors must have a dimension of 3.",
|
||||||
|
"Incorrect arguments for function vector::cross(). Both vectors must have a dimension of 3."
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn function_vector_dot() -> Result<(), Error> {
|
||||||
|
test_queries(
|
||||||
|
r#"
|
||||||
|
RETURN vector::dot([1, 2, 3], [1, 2, 3]);
|
||||||
|
RETURN vector::dot([1, 2, 3], [-1, -2, -3]);
|
||||||
|
"#,
|
||||||
|
&["14", "-14"],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
check_test_is_error(
|
||||||
|
r#"
|
||||||
|
RETURN vector::dot([1, 2, 3], [4, 5]);
|
||||||
|
RETURN vector::dot([1, 2], [4, 5, 5]);
|
||||||
|
"#,
|
||||||
|
&[
|
||||||
|
"Incorrect arguments for function vector::dot(). The two vectors must be of the same dimension.",
|
||||||
|
"Incorrect arguments for function vector::dot(). The two vectors must be of the same dimension."
|
||||||
|
],
|
||||||
|
).await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn function_vector_magnitude() -> Result<(), Error> {
|
async fn function_vector_magnitude() -> Result<(), Error> {
|
||||||
let sql = r#"
|
test_queries(
|
||||||
|
r#"
|
||||||
RETURN vector::magnitude([]);
|
RETURN vector::magnitude([]);
|
||||||
RETURN vector::magnitude([1]);
|
RETURN vector::magnitude([1]);
|
||||||
RETURN vector::magnitude([5]);
|
RETURN vector::magnitude([5]);
|
||||||
RETURN vector::magnitude([1,2,3,3,3,4,5]);
|
RETURN vector::magnitude([1,2,3,3,3,4,5]);
|
||||||
"#;
|
"#,
|
||||||
|
&["0", "1", "5", "8.54400374531753"],
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
let dbs = Datastore::new("memory").await?;
|
#[tokio::test]
|
||||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
async fn function_vector_normalize() -> Result<(), Error> {
|
||||||
let res = &mut dbs.execute(&sql, &ses, None).await?;
|
test_queries(
|
||||||
assert_eq!(res.len(), 4);
|
r#"
|
||||||
//
|
RETURN vector::normalize([]);
|
||||||
let tmp = res.remove(0).result?;
|
RETURN vector::normalize([1]);
|
||||||
let val = Value::from(0);
|
RETURN vector::normalize([5]);
|
||||||
assert_eq!(tmp, val);
|
RETURN vector::normalize([4,3]);
|
||||||
//
|
"#,
|
||||||
let tmp = res.remove(0).result?;
|
&["[]", "[1]", "[1]", "[0.8,0.6]"],
|
||||||
let val = Value::from(1);
|
)
|
||||||
assert_eq!(tmp, val);
|
.await
|
||||||
//
|
}
|
||||||
let tmp = res.remove(0).result?;
|
|
||||||
let val = Value::from(5);
|
#[tokio::test]
|
||||||
assert_eq!(tmp, val);
|
async fn function_vector_multiply() -> Result<(), Error> {
|
||||||
//
|
test_queries(
|
||||||
let tmp = res.remove(0).result?;
|
r#"
|
||||||
let val = Value::from(8.54400374531753);
|
RETURN vector::multiply([1, 2, 3], [1, 2, 3]);
|
||||||
assert_eq!(tmp, val);
|
RETURN vector::multiply([1, 2, 3], [-1, -2, -3]);
|
||||||
|
"#,
|
||||||
|
&["[1, 4, 9]", "[-1, -4, -9]"],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
check_test_is_error(
|
||||||
|
r#"
|
||||||
|
RETURN vector::multiply([1, 2, 3], [4, 5]);
|
||||||
|
RETURN vector::multiply([1, 2], [4, 5, 5]);
|
||||||
|
"#,
|
||||||
|
&[
|
||||||
|
"Incorrect arguments for function vector::multiply(). The two vectors must be of the same dimension.",
|
||||||
|
"Incorrect arguments for function vector::multiply(). The two vectors must be of the same dimension."
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn function_vector_project() -> Result<(), Error> {
|
||||||
|
test_queries(
|
||||||
|
r#"
|
||||||
|
RETURN vector::project([1, 2, 3], [4, 5, 6]);
|
||||||
|
RETURN vector::project([1, -2, 3], [-4, 5, 6]);
|
||||||
|
RETURN vector::project([NaN, -2, 3], [-4, NaN, NaN]);
|
||||||
|
"#,
|
||||||
|
&[
|
||||||
|
"[1.6623376623376624, 2.077922077922078, 2.4935064935064934]",
|
||||||
|
"[-0.2077922077922078, 0.25974025974025977, 0.3116883116883117]",
|
||||||
|
"[NaN, NaN, NaN]",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
check_test_is_error(
|
||||||
|
r#"
|
||||||
|
RETURN vector::project([1, 2, 3], [4, 5]);
|
||||||
|
RETURN vector::project([1, 2], [4, 5, 5]);
|
||||||
|
"#,
|
||||||
|
&[
|
||||||
|
"Incorrect arguments for function vector::project(). The two vectors must be of the same dimension.",
|
||||||
|
"Incorrect arguments for function vector::project(). The two vectors must be of the same dimension."
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn function_vector_divide() -> Result<(), Error> {
|
||||||
|
test_queries(
|
||||||
|
r#"
|
||||||
|
RETURN vector::divide([10, NaN, 20, 30, 0], [0, 1, 2, 0, 4]);
|
||||||
|
RETURN vector::divide([10, -20, 30, 0], [0, -1, 2, -3]);
|
||||||
|
"#,
|
||||||
|
&["[NaN, NaN, 10, NaN, 0]", "[NaN, 20, 15, 0]"],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
check_test_is_error(
|
||||||
|
r#"
|
||||||
|
RETURN vector::divide([1, 2, 3], [4, 5]);
|
||||||
|
RETURN vector::divide([1, 2], [4, 5, 5]);
|
||||||
|
"#,
|
||||||
|
&[
|
||||||
|
"Incorrect arguments for function vector::divide(). The two vectors must be of the same dimension.",
|
||||||
|
"Incorrect arguments for function vector::divide(). The two vectors must be of the same dimension."
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn function_vector_subtract() -> Result<(), Error> {
|
||||||
|
test_queries(
|
||||||
|
r#"
|
||||||
|
RETURN vector::subtract([1, 2, 3], [1, 2, 3]);
|
||||||
|
RETURN vector::subtract([1, 2, 3], [-1, -2, -3]);
|
||||||
|
"#,
|
||||||
|
&["[0, 0, 0]", "[2, 4, 6]"],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
check_test_is_error(
|
||||||
|
r#"
|
||||||
|
RETURN vector::subtract([1, 2, 3], [4, 5]);
|
||||||
|
RETURN vector::subtract([1, 2], [4, 5, 5]);
|
||||||
|
"#,
|
||||||
|
&[
|
||||||
|
"Incorrect arguments for function vector::subtract(). The two vectors must be of the same dimension.",
|
||||||
|
"Incorrect arguments for function vector::subtract(). The two vectors must be of the same dimension."
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn function_vector_similarity_cosine() -> Result<(), Error> {
|
async fn function_vector_similarity_cosine() -> Result<(), Error> {
|
||||||
let sql = r#"
|
test_queries(
|
||||||
|
r#"
|
||||||
RETURN vector::similarity::cosine([1, 2, 3], [1, 2, 3]);
|
RETURN vector::similarity::cosine([1, 2, 3], [1, 2, 3]);
|
||||||
RETURN vector::similarity::cosine([1, 2, 3], [-1, -2, -3]);
|
RETURN vector::similarity::cosine([1, 2, 3], [-1, -2, -3]);
|
||||||
RETURN vector::similarity::cosine([1, 2, 3], [4, 5]);
|
RETURN vector::similarity::cosine([NaN, 1, 2, 3], [NaN, 1, 2, 3]);
|
||||||
RETURN vector::similarity::cosine([1, 2], [4, 5, 5]);
|
RETURN vector::similarity::cosine([10, 50, 200], [400, 100, 20]);
|
||||||
"#;
|
"#,
|
||||||
|
&["1.0", "-1.0", "NaN", "0.15258215962441316"],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
let dbs = Datastore::new("memory").await?;
|
check_test_is_error(
|
||||||
let ses = Session::for_kv().with_ns("test").with_db("test");
|
r"RETURN vector::similarity::cosine([1, 2, 3], [4, 5]);
|
||||||
let res = &mut dbs.execute(&sql, &ses, None).await?;
|
RETURN vector::similarity::cosine([1, 2], [4, 5, 5]);",
|
||||||
assert_eq!(res.len(), 4);
|
&[
|
||||||
//
|
"Incorrect arguments for function vector::similarity::cosine(). The two vectors must be of the same dimension.",
|
||||||
let tmp = res.remove(0).result?;
|
"Incorrect arguments for function vector::similarity::cosine(). The two vectors must be of the same dimension."
|
||||||
let val = Value::from(1.0);
|
]).await?;
|
||||||
assert_eq!(tmp, val);
|
Ok(())
|
||||||
//
|
}
|
||||||
let tmp = res.remove(0).result?;
|
|
||||||
let val = Value::from(-1.0);
|
#[tokio::test]
|
||||||
assert_eq!(tmp, val);
|
async fn function_vector_similarity_jaccard() -> Result<(), Error> {
|
||||||
//
|
test_queries(
|
||||||
let tmp = res.remove(0).result;
|
r#"
|
||||||
assert!(tmp.is_err());
|
RETURN vector::similarity::jaccard([1, 2, 3], [3, 2, 1]);
|
||||||
//
|
RETURN vector::similarity::jaccard([1, 2, 3], [-3, -2, -1]);
|
||||||
let tmp = res.remove(0).result;
|
RETURN vector::similarity::jaccard([1, -2, 3, -4], [4, 3, 2, 1]);
|
||||||
assert!(tmp.is_err());
|
RETURN vector::similarity::jaccard([NaN, 1, 2, 3], [NaN, 2, 3, 4]);
|
||||||
|
RETURN vector::similarity::jaccard([0,1,2,5,6], [0,2,3,4,5,7,9]);
|
||||||
|
"#,
|
||||||
|
&["1.0", "0", "0.3333333333333333", "0.6", "0.3333333333333333"],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn function_vector_similarity_pearson() -> Result<(), Error> {
|
||||||
|
test_queries(
|
||||||
|
r#"
|
||||||
|
RETURN vector::similarity::pearson([1, 2, 3, 4, 5], [1, 2.5, 3.5, 4.2, 5.1]);
|
||||||
|
RETURN vector::similarity::pearson([NaN, 1, 2, 3, 4, 5], [NaN, 1, 2.5, 3.5, 4.2, 5.1]);
|
||||||
|
RETURN vector::similarity::pearson([1,2,3], [1,5,7]);
|
||||||
|
"#,
|
||||||
|
&["0.9894065340659606", "NaN", "0.9819805060619659"],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
check_test_is_error(
|
||||||
|
r"RETURN vector::similarity::pearson([1, 2, 3], [4, 5]);
|
||||||
|
RETURN vector::similarity::pearson([1, 2], [4, 5, 5]);",
|
||||||
|
&[
|
||||||
|
"Incorrect arguments for function vector::similarity::pearson(). The two vectors must be of the same dimension.",
|
||||||
|
"Incorrect arguments for function vector::similarity::pearson(). The two vectors must be of the same dimension."
|
||||||
|
]).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn function_vector_distance_euclidean() -> Result<(), Error> {
|
||||||
|
test_queries(
|
||||||
|
r#"
|
||||||
|
RETURN vector::distance::euclidean([1, 2, 3], [1, 2, 3]);
|
||||||
|
RETURN vector::distance::euclidean([NaN, 2, 3], [-1, NaN, -3]);
|
||||||
|
RETURN vector::distance::euclidean([1, 2, 3], [-1, -2, -3]);
|
||||||
|
RETURN vector::distance::euclidean([10, 50, 200], [400, 100, 20]);
|
||||||
|
RETURN vector::distance::euclidean([10, 20, 15, 10, 5], [12, 24, 18, 8, 7]);
|
||||||
|
"#,
|
||||||
|
&["0", "NaN", "7.483314773547883", "432.43496620879307", "6.082762530298219"],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
check_test_is_error(
|
||||||
|
r"RETURN vector::distance::euclidean([1, 2, 3], [4, 5]);
|
||||||
|
RETURN vector::distance::euclidean([1, 2], [4, 5, 5]);",
|
||||||
|
&[
|
||||||
|
"Incorrect arguments for function vector::distance::euclidean(). The two vectors must be of the same dimension.",
|
||||||
|
"Incorrect arguments for function vector::distance::euclidean(). The two vectors must be of the same dimension."
|
||||||
|
]).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn function_vector_distance_manhattan() -> Result<(), Error> {
|
||||||
|
test_queries(
|
||||||
|
r#"
|
||||||
|
RETURN vector::distance::manhattan([1, 2, 3], [4, 5, 6]);
|
||||||
|
RETURN vector::distance::manhattan([1, 2, 3], [-4, -5, -6]);
|
||||||
|
RETURN vector::distance::manhattan([1.1, 2, 3.3], [4, 5.5, 6.6]);
|
||||||
|
RETURN vector::distance::manhattan([NaN, 1, 2, 3], [NaN, 4, 5, 6]);
|
||||||
|
RETURN vector::distance::manhattan([10, 20, 15, 10, 5], [12, 24, 18, 8, 7]);
|
||||||
|
"#,
|
||||||
|
&["9", "21", "9.7", "NaN", "13"],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
check_test_is_error(
|
||||||
|
r"RETURN vector::distance::manhattan([1, 2, 3], [4, 5]);
|
||||||
|
RETURN vector::distance::manhattan([1, 2], [4, 5, 5]);",
|
||||||
|
&[
|
||||||
|
"Incorrect arguments for function vector::distance::manhattan(). The two vectors must be of the same dimension.",
|
||||||
|
"Incorrect arguments for function vector::distance::manhattan(). The two vectors must be of the same dimension."
|
||||||
|
]).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn function_vector_distance_hamming() -> Result<(), Error> {
|
||||||
|
test_queries(
|
||||||
|
r#"
|
||||||
|
RETURN vector::distance::hamming([1, 2, 2], [1, 2, 3]);
|
||||||
|
RETURN vector::distance::hamming([-1, -2, -3], [-2, -2, -2]);
|
||||||
|
RETURN vector::distance::hamming([1.1, 2.2, -3.3], [1.1, 2, -3.3]);
|
||||||
|
RETURN vector::distance::hamming([NaN, 1, 2, 3], [NaN, 1, 2, 3]);
|
||||||
|
RETURN vector::distance::hamming([0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0]);
|
||||||
|
"#,
|
||||||
|
&["1", "2", "1", "0", "2"],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
check_test_is_error(
|
||||||
|
r"RETURN vector::distance::hamming([1, 2, 3], [4, 5]);
|
||||||
|
RETURN vector::distance::hamming([1, 2], [4, 5, 5]);",
|
||||||
|
&[
|
||||||
|
"Incorrect arguments for function vector::distance::hamming(). The two vectors must be of the same dimension.",
|
||||||
|
"Incorrect arguments for function vector::distance::hamming(). The two vectors must be of the same dimension."
|
||||||
|
]).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn function_vector_distance_minkowski() -> Result<(), Error> {
|
||||||
|
test_queries(
|
||||||
|
r#"
|
||||||
|
RETURN vector::distance::minkowski([1, 2, 3], [4, 5, 6], 3);
|
||||||
|
RETURN vector::distance::minkowski([-1, -2, -3], [-4, -5, -6], 3);
|
||||||
|
RETURN vector::distance::minkowski([1.1, 2.2, 3], [4, 5.5, 6.6], 3);
|
||||||
|
RETURN vector::distance::minkowski([NaN, 1, 2, 3], [NaN, 4, 5, 6], 3);
|
||||||
|
RETURN vector::distance::minkowski([10, 20, 15, 10, 5], [12, 24, 18, 8, 7], 1);
|
||||||
|
RETURN vector::distance::minkowski([10, 20, 15, 10, 5], [12, 24, 18, 8, 7], 2);
|
||||||
|
"#,
|
||||||
|
&[
|
||||||
|
"4.3267487109222245",
|
||||||
|
"4.3267487109222245",
|
||||||
|
"4.747193170917638",
|
||||||
|
"NaN",
|
||||||
|
"13.0",
|
||||||
|
"6.082762530298219",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
check_test_is_error(
|
||||||
|
r"RETURN vector::distance::minkowski([1, 2, 3], [4, 5], 3);
|
||||||
|
RETURN vector::distance::minkowski([1, 2], [4, 5, 5], 3);",
|
||||||
|
&[
|
||||||
|
"Incorrect arguments for function vector::distance::minkowski(). The two vectors must be of the same dimension.",
|
||||||
|
"Incorrect arguments for function vector::distance::minkowski(). The two vectors must be of the same dimension."
|
||||||
|
]).await?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn function_vector_distance_chebyshev() -> Result<(), Error> {
|
||||||
|
test_queries(
|
||||||
|
r#"
|
||||||
|
RETURN vector::distance::chebyshev([1, 2, 3], [4, 5, 6]);
|
||||||
|
RETURN vector::distance::chebyshev([-1, -2, -3], [-4, -5, -6]);
|
||||||
|
RETURN vector::distance::chebyshev([1.1, 2.2, 3], [4, 5.5, 6.6]);
|
||||||
|
RETURN vector::distance::chebyshev([NaN, 1, 2, 3], [NaN, 4, 5, 6]);
|
||||||
|
RETURN vector::distance::chebyshev([2, 4, 5, 3, 8, 2], [3, 1, 5, -3, 7, 2]);
|
||||||
|
"#,
|
||||||
|
&["3.0", "3.0", "3.5999999999999996", "3.0", "6.0"],
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
check_test_is_error(
|
||||||
|
r"RETURN vector::distance::chebyshev([1, 2, 3], [4, 5]);
|
||||||
|
RETURN vector::distance::chebyshev([1, 2], [4, 5, 5]);",
|
||||||
|
&[
|
||||||
|
"Incorrect arguments for function vector::distance::chebyshev(). The two vectors must be of the same dimension.",
|
||||||
|
"Incorrect arguments for function vector::distance::chebyshev(). The two vectors must be of the same dimension."
|
||||||
|
]).await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue