Implement math functions that were marked as todo (#1218)
This commit is contained in:
parent
2015227dc3
commit
d5a378b1be
17 changed files with 217 additions and 55 deletions
|
@ -12,7 +12,7 @@ use crate::fnc::util::math::spread::Spread;
|
|||
use crate::fnc::util::math::top::Top;
|
||||
use crate::fnc::util::math::trimean::Trimean;
|
||||
use crate::fnc::util::math::variance::Variance;
|
||||
use crate::sql::number::Number;
|
||||
use crate::sql::number::{Number, Sort};
|
||||
use crate::sql::value::Value;
|
||||
|
||||
pub fn abs((arg,): (Number,)) -> Result<Value, Error> {
|
||||
|
@ -47,7 +47,7 @@ pub fn floor((arg,): (Number,)) -> Result<Value, Error> {
|
|||
|
||||
pub fn interquartile((array,): (Value,)) -> Result<Value, Error> {
|
||||
Ok(match array {
|
||||
Value::Array(v) => v.as_numbers().interquartile().into(),
|
||||
Value::Array(v) => v.as_numbers().sorted().interquartile().into(),
|
||||
_ => Value::None,
|
||||
})
|
||||
}
|
||||
|
@ -76,7 +76,7 @@ pub fn median((array,): (Value,)) -> Result<Value, Error> {
|
|||
Ok(match array {
|
||||
Value::Array(v) => match v.is_empty() {
|
||||
true => Value::None,
|
||||
false => v.as_numbers().median().into(),
|
||||
false => v.as_numbers().sorted().median().into(),
|
||||
},
|
||||
_ => Value::None,
|
||||
})
|
||||
|
@ -84,7 +84,7 @@ pub fn median((array,): (Value,)) -> Result<Value, Error> {
|
|||
|
||||
pub fn midhinge((array,): (Value,)) -> Result<Value, Error> {
|
||||
Ok(match array {
|
||||
Value::Array(v) => v.as_numbers().midhinge().into(),
|
||||
Value::Array(v) => v.as_numbers().sorted().midhinge().into(),
|
||||
_ => Value::None,
|
||||
})
|
||||
}
|
||||
|
@ -108,14 +108,14 @@ pub fn mode((array,): (Value,)) -> Result<Value, Error> {
|
|||
|
||||
pub fn nearestrank((array, n): (Value, Number)) -> Result<Value, Error> {
|
||||
Ok(match array {
|
||||
Value::Array(v) => v.as_numbers().nearestrank(n).into(),
|
||||
Value::Array(v) => v.as_numbers().sorted().nearestrank(n).into(),
|
||||
_ => Value::None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn percentile((array, n): (Value, Number)) -> Result<Value, Error> {
|
||||
Ok(match array {
|
||||
Value::Array(v) => v.as_numbers().percentile(n).into(),
|
||||
Value::Array(v) => v.as_numbers().sorted().percentile(n).into(),
|
||||
_ => Value::None,
|
||||
})
|
||||
}
|
||||
|
@ -147,7 +147,7 @@ pub fn sqrt((arg,): (Number,)) -> Result<Value, Error> {
|
|||
|
||||
pub fn stddev((array,): (Value,)) -> Result<Value, Error> {
|
||||
Ok(match array {
|
||||
Value::Array(v) => v.as_numbers().deviation().into(),
|
||||
Value::Array(v) => v.as_numbers().deviation(true).into(),
|
||||
_ => Value::None,
|
||||
})
|
||||
}
|
||||
|
@ -168,14 +168,14 @@ pub fn top((array, c): (Value, i64)) -> Result<Value, Error> {
|
|||
|
||||
pub fn trimean((array,): (Value,)) -> Result<Value, Error> {
|
||||
Ok(match array {
|
||||
Value::Array(v) => v.as_numbers().trimean().into(),
|
||||
Value::Array(v) => v.as_numbers().sorted().trimean().into(),
|
||||
_ => Value::None,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn variance((array,): (Value,)) -> Result<Value, Error> {
|
||||
Ok(match array {
|
||||
Value::Array(v) => v.as_numbers().variance().into(),
|
||||
Value::Array(v) => v.as_numbers().variance(true).into(),
|
||||
_ => Value::None,
|
||||
})
|
||||
}
|
||||
|
|
|
@ -1,11 +1,26 @@
|
|||
use crate::sql::number::Number;
|
||||
use std::collections::BinaryHeap;
|
||||
|
||||
pub trait Bottom {
|
||||
fn bottom(self, _c: i64) -> Number;
|
||||
/// Find the lowest `k` records from the collection in arbetrary order
|
||||
/// O(n*k*log(k)) time complex
|
||||
fn bottom(self, k: i64) -> Vec<Number>;
|
||||
}
|
||||
|
||||
impl Bottom for Vec<Number> {
|
||||
fn bottom(self, _c: i64) -> Number {
|
||||
todo!()
|
||||
fn bottom(self, k: i64) -> Vec<Number> {
|
||||
// Convert to usize
|
||||
let k = k as usize;
|
||||
// Create a heap to store the numbers
|
||||
let mut heap = BinaryHeap::new();
|
||||
// Iterate and store the bottom numbers
|
||||
for (i, v) in self.into_iter().enumerate() {
|
||||
heap.push(v);
|
||||
if i >= k {
|
||||
heap.pop();
|
||||
}
|
||||
}
|
||||
// Return the numbers as a vector
|
||||
heap.into_iter().collect()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
use super::variance::Variance;
|
||||
use crate::sql::number::Number;
|
||||
|
||||
pub trait Deviation {
|
||||
fn deviation(self) -> Number;
|
||||
/// Population Standard Deviation
|
||||
fn deviation(self, sample: bool) -> Number;
|
||||
}
|
||||
|
||||
impl Deviation for Vec<Number> {
|
||||
fn deviation(self) -> Number {
|
||||
todo!()
|
||||
fn deviation(self, sample: bool) -> Number {
|
||||
self.variance(sample).sqrt()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
use crate::sql::number::Number;
|
||||
use super::percentile::Percentile;
|
||||
use crate::sql::number::{Number, Sorted};
|
||||
|
||||
pub trait Interquartile {
|
||||
/// Interquartile Range - the difference between the upper and lower quartiles
|
||||
/// Q_3 - Q_1 [ or P_75 - P-25 ]
|
||||
fn interquartile(self) -> Number;
|
||||
}
|
||||
|
||||
impl Interquartile for Vec<Number> {
|
||||
impl Interquartile for Sorted<&Vec<Number>> {
|
||||
fn interquartile(self) -> Number {
|
||||
todo!()
|
||||
self.percentile(Number::from(75)) - self.percentile(Number::from(25))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
use crate::sql::number::Number;
|
||||
use crate::sql::number::{Number, Sorted};
|
||||
|
||||
pub trait Median {
|
||||
fn median(&mut self) -> Number;
|
||||
fn median(self) -> Number;
|
||||
}
|
||||
|
||||
impl Median for Vec<Number> {
|
||||
fn median(&mut self) -> Number {
|
||||
self.sort();
|
||||
self.remove(self.len() / 2)
|
||||
impl Median for Sorted<&Vec<Number>> {
|
||||
fn median(self) -> Number {
|
||||
self.0.get(self.0.len() / 2).unwrap_or(&Number::NAN).clone()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,13 @@
|
|||
use crate::sql::number::Number;
|
||||
use super::percentile::Percentile;
|
||||
use crate::sql::number::{Number, Sorted};
|
||||
|
||||
pub trait Midhinge {
|
||||
fn midhinge(self) -> Number;
|
||||
/// Tukey Midhinge - the average of the 1st and 3rd Quartiles
|
||||
fn midhinge(&self) -> Number;
|
||||
}
|
||||
|
||||
impl Midhinge for Vec<Number> {
|
||||
fn midhinge(self) -> Number {
|
||||
todo!()
|
||||
impl Midhinge for Sorted<&Vec<Number>> {
|
||||
fn midhinge(&self) -> Number {
|
||||
(self.percentile(Number::from(75)) + self.percentile(Number::from(25))) / Number::from(2)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,21 @@
|
|||
use crate::sql::number::Number;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
pub trait Mode {
|
||||
/// Find the most frequent number in collection
|
||||
/// O(n*w) time complex s.t. w = distinct count
|
||||
fn mode(self) -> Number;
|
||||
}
|
||||
|
||||
impl Mode for Vec<Number> {
|
||||
fn mode(self) -> Number {
|
||||
todo!()
|
||||
// Iterate over all numbers, and get their frequency
|
||||
let frequencies = self.into_iter().fold(BTreeMap::new(), |mut freqs, value| {
|
||||
let entry = freqs.entry(value).or_insert_with(|| 0);
|
||||
*entry += 1;
|
||||
freqs
|
||||
});
|
||||
// Get the maximum number by frequency
|
||||
frequencies.into_iter().max_by_key(|(_, n)| *n).map(|(v, _)| v).unwrap_or(Number::NAN)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,30 @@
|
|||
use crate::sql::number::Number;
|
||||
use crate::sql::number::{Number, Sorted};
|
||||
|
||||
pub trait Nearestrank {
|
||||
fn nearestrank(self, _: Number) -> Number;
|
||||
/// Pull the closest extant record from the dataset at the %-th percentile
|
||||
fn nearestrank(self, perc: Number) -> Number;
|
||||
}
|
||||
|
||||
impl Nearestrank for Vec<Number> {
|
||||
fn nearestrank(self, _: Number) -> Number {
|
||||
todo!()
|
||||
impl Nearestrank for Sorted<&Vec<Number>> {
|
||||
fn nearestrank(self, perc: Number) -> Number {
|
||||
// If an empty set, then return NaN
|
||||
if self.0.is_empty() {
|
||||
return Number::NAN;
|
||||
}
|
||||
// If an invalid percentile, then return NaN
|
||||
if (perc <= Number::from(0)) | (perc > Number::from(100)) {
|
||||
return Number::NAN;
|
||||
}
|
||||
// If 100%, then get the last value in the set
|
||||
if perc == Number::from(100) {
|
||||
return self.0.get(self.0.len()).unwrap_or(&Number::NAN).clone();
|
||||
}
|
||||
// Get the index of the specified percentile
|
||||
let n_percent_idx = Number::from(self.0.len()) * perc / Number::from(100);
|
||||
// Return the closest extant record for the index
|
||||
match n_percent_idx.as_float().ceil() as usize {
|
||||
0 => self.0.get(0).unwrap_or(&Number::NAN).clone(),
|
||||
idx => self.0.get(idx - 1).unwrap_or(&Number::NAN).clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,34 @@
|
|||
use crate::sql::number::Number;
|
||||
use crate::sql::number::{Number, Sorted};
|
||||
|
||||
pub trait Percentile {
|
||||
fn percentile(self, _: Number) -> Number;
|
||||
/// Gets the N percentile, averaging neighboring records if non-exact
|
||||
fn percentile(&self, perc: Number) -> Number;
|
||||
}
|
||||
|
||||
impl Percentile for Vec<Number> {
|
||||
fn percentile(self, _: Number) -> Number {
|
||||
todo!()
|
||||
impl Percentile for Sorted<&Vec<Number>> {
|
||||
fn percentile(&self, perc: Number) -> Number {
|
||||
// If an empty set, then return NaN
|
||||
if self.0.is_empty() {
|
||||
return Number::NAN;
|
||||
}
|
||||
// If an invalid percentile, then return NaN
|
||||
if (perc <= Number::from(0)) | (perc > Number::from(100)) {
|
||||
return Number::NAN;
|
||||
}
|
||||
// Get the index of the specified percentile
|
||||
let n_percent_idx = Number::from(self.0.len()) * perc / Number::from(100);
|
||||
// Calculate the N percentile for the index
|
||||
if n_percent_idx.to_float().fract().abs() < 1e-10 {
|
||||
let idx = n_percent_idx.as_usize();
|
||||
let val = self.0.get(idx - 1).unwrap_or(&Number::NAN).clone();
|
||||
val
|
||||
} else if n_percent_idx > Number::from(1) {
|
||||
let idx = n_percent_idx.as_usize();
|
||||
let val = self.0.get(idx - 1).unwrap_or(&Number::NAN);
|
||||
let val = val + self.0.get(idx).unwrap_or(&Number::NAN);
|
||||
val / Number::from(2)
|
||||
} else {
|
||||
Number::NAN
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,19 @@
|
|||
use crate::sql::number::Number;
|
||||
use super::percentile::Percentile;
|
||||
use crate::sql::number::{Number, Sorted};
|
||||
|
||||
pub trait Quartile {
|
||||
fn quartile(self) -> Number;
|
||||
/// Divides the set of numbers into Q_0 (min), Q_1, Q_2, Q_3, and Q_4 (max)
|
||||
fn quartile(self) -> (Number, Number, Number, Number, Number);
|
||||
}
|
||||
|
||||
impl Quartile for Vec<Number> {
|
||||
fn quartile(self) -> Number {
|
||||
todo!()
|
||||
impl Quartile for Sorted<&Vec<Number>> {
|
||||
fn quartile(self) -> (Number, Number, Number, Number, Number) {
|
||||
(
|
||||
self.percentile(Number::from(0)),
|
||||
self.percentile(Number::from(25)),
|
||||
self.percentile(Number::from(50)),
|
||||
self.percentile(Number::from(75)),
|
||||
self.percentile(Number::from(100)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,25 @@
|
|||
use crate::sql::number::Number;
|
||||
|
||||
pub trait Spread {
|
||||
/// Gets the extent to which a distribution is stretched
|
||||
/// O(n) time complex
|
||||
fn spread(self) -> Number;
|
||||
}
|
||||
|
||||
impl Spread for Vec<Number> {
|
||||
fn spread(self) -> Number {
|
||||
todo!()
|
||||
// Get the initial number
|
||||
let init = self.get(0);
|
||||
// Get the minimum and the maximum
|
||||
let min_max = self.iter().fold((init, init), |(mut min, mut max), val| {
|
||||
min = std::cmp::min(min, Some(val));
|
||||
max = std::cmp::max(max, Some(val));
|
||||
(min, max)
|
||||
});
|
||||
// Return the maximum - minimum or NaN
|
||||
match min_max {
|
||||
(Some(min), Some(max)) => max - min,
|
||||
_ => Number::NAN,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,27 @@
|
|||
use crate::sql::number::Number;
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::BinaryHeap;
|
||||
|
||||
pub trait Top {
|
||||
fn top(self, _c: i64) -> Number;
|
||||
/// Find the greatest `k` records from the collection in arbetrary order
|
||||
/// O(n*k*log(k)) time complex
|
||||
fn top(self, k: i64) -> Vec<Number>;
|
||||
}
|
||||
|
||||
impl Top for Vec<Number> {
|
||||
fn top(self, _c: i64) -> Number {
|
||||
todo!()
|
||||
fn top(self, k: i64) -> Vec<Number> {
|
||||
// Convert to usize
|
||||
let k = k as usize;
|
||||
// Create a heap to store the numbers
|
||||
let mut heap = BinaryHeap::new();
|
||||
// Iterate and store the top numbers
|
||||
for (i, v) in self.into_iter().enumerate() {
|
||||
heap.push(Reverse(v));
|
||||
if i >= k {
|
||||
heap.pop();
|
||||
}
|
||||
}
|
||||
// Return the numbers as a vector
|
||||
heap.into_iter().map(|x| x.0).collect()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
use crate::sql::number::Number;
|
||||
use super::{median::Median, midhinge::Midhinge};
|
||||
use crate::sql::number::{Number, Sorted};
|
||||
|
||||
pub trait Trimean {
|
||||
/// Bowley's Trimean - the Average of the median and the MidHinge
|
||||
/// ( 2 * Q_2 + Q_1 + Q_3 ) / 4 == ( Q_2 + ( Q_1 + Q_3 ) ) / 2
|
||||
fn trimean(self) -> Number;
|
||||
}
|
||||
|
||||
impl Trimean for Vec<Number> {
|
||||
impl Trimean for Sorted<&Vec<Number>> {
|
||||
fn trimean(self) -> Number {
|
||||
todo!()
|
||||
(self.midhinge() + self.median()) / Number::from(2)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,18 @@
|
|||
use super::mean::Mean;
|
||||
use crate::sql::number::Number;
|
||||
|
||||
pub trait Variance {
|
||||
fn variance(self) -> Number;
|
||||
/// Population Variance of Data
|
||||
/// O(n) time complex
|
||||
fn variance(self, sample: bool) -> Number;
|
||||
}
|
||||
|
||||
impl Variance for Vec<Number> {
|
||||
fn variance(self) -> Number {
|
||||
todo!()
|
||||
fn variance(self, sample: bool) -> Number {
|
||||
let mean = self.mean();
|
||||
let len = Number::from(self.len() - sample as usize);
|
||||
let out = self.iter().map(|x| (x - &mean) * (x - &mean)).sum::<Number>() / len;
|
||||
|
||||
out
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,6 +46,12 @@ impl From<Vec<&str>> for Array {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Number>> for Array {
|
||||
fn from(v: Vec<Number>) -> Self {
|
||||
Array(v.into_iter().map(Value::from).collect())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Operation>> for Array {
|
||||
fn from(v: Vec<Operation>) -> Self {
|
||||
Array(v.into_iter().map(Value::from).collect())
|
||||
|
|
|
@ -151,6 +151,12 @@ impl Serialize for Number {
|
|||
}
|
||||
|
||||
impl Number {
|
||||
// -----------------------------------
|
||||
// Constants
|
||||
// -----------------------------------
|
||||
|
||||
pub const NAN: Number = Number::Float(f64::NAN);
|
||||
|
||||
// -----------------------------------
|
||||
// Simple number detection
|
||||
// -----------------------------------
|
||||
|
@ -179,6 +185,14 @@ impl Number {
|
|||
// Simple conversion of number
|
||||
// -----------------------------------
|
||||
|
||||
pub fn as_usize(self) -> usize {
|
||||
match self {
|
||||
Number::Int(v) => v as usize,
|
||||
Number::Float(v) => v as usize,
|
||||
Number::Decimal(v) => v.to_usize().unwrap_or_default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_int(self) -> i64 {
|
||||
match self {
|
||||
Number::Int(v) => v,
|
||||
|
@ -498,6 +512,21 @@ impl<'a> Product<&'a Self> for Number {
|
|||
}
|
||||
}
|
||||
|
||||
pub struct Sorted<T>(pub T);
|
||||
|
||||
pub trait Sort {
|
||||
fn sorted(&mut self) -> Sorted<&Self>
|
||||
where
|
||||
Self: Sized;
|
||||
}
|
||||
|
||||
impl Sort for Vec<Number> {
|
||||
fn sorted(&mut self) -> Sorted<&Vec<Number>> {
|
||||
self.sort();
|
||||
Sorted(self)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn number(i: &str) -> IResult<&str, Number> {
|
||||
alt((map(integer, Number::from), map(decimal, Number::from)))(i)
|
||||
}
|
||||
|
|
|
@ -398,6 +398,12 @@ impl From<Vec<Value>> for Value {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Number>> for Value {
|
||||
fn from(v: Vec<Number>) -> Self {
|
||||
Value::Array(Array::from(v))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<Operation>> for Value {
|
||||
fn from(v: Vec<Operation>) -> Self {
|
||||
Value::Array(Array::from(v))
|
||||
|
|
Loading…
Reference in a new issue