Implement math functions that were marked as todo ()

This commit is contained in:
CelebrateVC 2022-09-25 05:55:07 -04:00 committed by GitHub
parent 2015227dc3
commit d5a378b1be
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 217 additions and 55 deletions

View file

@ -12,7 +12,7 @@ use crate::fnc::util::math::spread::Spread;
use crate::fnc::util::math::top::Top;
use crate::fnc::util::math::trimean::Trimean;
use crate::fnc::util::math::variance::Variance;
use crate::sql::number::Number;
use crate::sql::number::{Number, Sort};
use crate::sql::value::Value;
pub fn abs((arg,): (Number,)) -> Result<Value, Error> {
@ -47,7 +47,7 @@ pub fn floor((arg,): (Number,)) -> Result<Value, Error> {
pub fn interquartile((array,): (Value,)) -> Result<Value, Error> {
Ok(match array {
Value::Array(v) => v.as_numbers().interquartile().into(),
Value::Array(v) => v.as_numbers().sorted().interquartile().into(),
_ => Value::None,
})
}
@ -76,7 +76,7 @@ pub fn median((array,): (Value,)) -> Result<Value, Error> {
Ok(match array {
Value::Array(v) => match v.is_empty() {
true => Value::None,
false => v.as_numbers().median().into(),
false => v.as_numbers().sorted().median().into(),
},
_ => Value::None,
})
@ -84,7 +84,7 @@ pub fn median((array,): (Value,)) -> Result<Value, Error> {
pub fn midhinge((array,): (Value,)) -> Result<Value, Error> {
Ok(match array {
Value::Array(v) => v.as_numbers().midhinge().into(),
Value::Array(v) => v.as_numbers().sorted().midhinge().into(),
_ => Value::None,
})
}
@ -108,14 +108,14 @@ pub fn mode((array,): (Value,)) -> Result<Value, Error> {
pub fn nearestrank((array, n): (Value, Number)) -> Result<Value, Error> {
Ok(match array {
Value::Array(v) => v.as_numbers().nearestrank(n).into(),
Value::Array(v) => v.as_numbers().sorted().nearestrank(n).into(),
_ => Value::None,
})
}
pub fn percentile((array, n): (Value, Number)) -> Result<Value, Error> {
Ok(match array {
Value::Array(v) => v.as_numbers().percentile(n).into(),
Value::Array(v) => v.as_numbers().sorted().percentile(n).into(),
_ => Value::None,
})
}
@ -147,7 +147,7 @@ pub fn sqrt((arg,): (Number,)) -> Result<Value, Error> {
pub fn stddev((array,): (Value,)) -> Result<Value, Error> {
Ok(match array {
Value::Array(v) => v.as_numbers().deviation().into(),
Value::Array(v) => v.as_numbers().deviation(true).into(),
_ => Value::None,
})
}
@ -168,14 +168,14 @@ pub fn top((array, c): (Value, i64)) -> Result<Value, Error> {
pub fn trimean((array,): (Value,)) -> Result<Value, Error> {
Ok(match array {
Value::Array(v) => v.as_numbers().trimean().into(),
Value::Array(v) => v.as_numbers().sorted().trimean().into(),
_ => Value::None,
})
}
pub fn variance((array,): (Value,)) -> Result<Value, Error> {
Ok(match array {
Value::Array(v) => v.as_numbers().variance().into(),
Value::Array(v) => v.as_numbers().variance(true).into(),
_ => Value::None,
})
}

View file

@ -1,11 +1,26 @@
use crate::sql::number::Number;
use std::collections::BinaryHeap;
pub trait Bottom {
fn bottom(self, _c: i64) -> Number;
/// Find the lowest `k` records from the collection in arbetrary order
/// O(n*k*log(k)) time complex
fn bottom(self, k: i64) -> Vec<Number>;
}
impl Bottom for Vec<Number> {
fn bottom(self, _c: i64) -> Number {
todo!()
fn bottom(self, k: i64) -> Vec<Number> {
// Convert to usize
let k = k as usize;
// Create a heap to store the numbers
let mut heap = BinaryHeap::new();
// Iterate and store the bottom numbers
for (i, v) in self.into_iter().enumerate() {
heap.push(v);
if i >= k {
heap.pop();
}
}
// Return the numbers as a vector
heap.into_iter().collect()
}
}

View file

@ -1,11 +1,13 @@
use super::variance::Variance;
use crate::sql::number::Number;
pub trait Deviation {
fn deviation(self) -> Number;
/// Population Standard Deviation
fn deviation(self, sample: bool) -> Number;
}
impl Deviation for Vec<Number> {
fn deviation(self) -> Number {
todo!()
fn deviation(self, sample: bool) -> Number {
self.variance(sample).sqrt()
}
}

View file

@ -1,11 +1,14 @@
use crate::sql::number::Number;
use super::percentile::Percentile;
use crate::sql::number::{Number, Sorted};
pub trait Interquartile {
/// Interquartile Range - the difference between the upper and lower quartiles
/// Q_3 - Q_1 [ or P_75 - P-25 ]
fn interquartile(self) -> Number;
}
impl Interquartile for Vec<Number> {
impl Interquartile for Sorted<&Vec<Number>> {
fn interquartile(self) -> Number {
todo!()
self.percentile(Number::from(75)) - self.percentile(Number::from(25))
}
}

View file

@ -1,12 +1,11 @@
use crate::sql::number::Number;
use crate::sql::number::{Number, Sorted};
pub trait Median {
fn median(&mut self) -> Number;
fn median(self) -> Number;
}
impl Median for Vec<Number> {
fn median(&mut self) -> Number {
self.sort();
self.remove(self.len() / 2)
impl Median for Sorted<&Vec<Number>> {
fn median(self) -> Number {
self.0.get(self.0.len() / 2).unwrap_or(&Number::NAN).clone()
}
}

View file

@ -1,11 +1,13 @@
use crate::sql::number::Number;
use super::percentile::Percentile;
use crate::sql::number::{Number, Sorted};
pub trait Midhinge {
fn midhinge(self) -> Number;
/// Tukey Midhinge - the average of the 1st and 3rd Quartiles
fn midhinge(&self) -> Number;
}
impl Midhinge for Vec<Number> {
fn midhinge(self) -> Number {
todo!()
impl Midhinge for Sorted<&Vec<Number>> {
fn midhinge(&self) -> Number {
(self.percentile(Number::from(75)) + self.percentile(Number::from(25))) / Number::from(2)
}
}

View file

@ -1,11 +1,21 @@
use crate::sql::number::Number;
use std::collections::BTreeMap;
pub trait Mode {
/// Find the most frequent number in collection
/// O(n*w) time complex s.t. w = distinct count
fn mode(self) -> Number;
}
impl Mode for Vec<Number> {
fn mode(self) -> Number {
todo!()
// Iterate over all numbers, and get their frequency
let frequencies = self.into_iter().fold(BTreeMap::new(), |mut freqs, value| {
let entry = freqs.entry(value).or_insert_with(|| 0);
*entry += 1;
freqs
});
// Get the maximum number by frequency
frequencies.into_iter().max_by_key(|(_, n)| *n).map(|(v, _)| v).unwrap_or(Number::NAN)
}
}

View file

@ -1,11 +1,30 @@
use crate::sql::number::Number;
use crate::sql::number::{Number, Sorted};
pub trait Nearestrank {
fn nearestrank(self, _: Number) -> Number;
/// Pull the closest extant record from the dataset at the %-th percentile
fn nearestrank(self, perc: Number) -> Number;
}
impl Nearestrank for Vec<Number> {
fn nearestrank(self, _: Number) -> Number {
todo!()
impl Nearestrank for Sorted<&Vec<Number>> {
fn nearestrank(self, perc: Number) -> Number {
// If an empty set, then return NaN
if self.0.is_empty() {
return Number::NAN;
}
// If an invalid percentile, then return NaN
if (perc <= Number::from(0)) | (perc > Number::from(100)) {
return Number::NAN;
}
// If 100%, then get the last value in the set
if perc == Number::from(100) {
return self.0.get(self.0.len()).unwrap_or(&Number::NAN).clone();
}
// Get the index of the specified percentile
let n_percent_idx = Number::from(self.0.len()) * perc / Number::from(100);
// Return the closest extant record for the index
match n_percent_idx.as_float().ceil() as usize {
0 => self.0.get(0).unwrap_or(&Number::NAN).clone(),
idx => self.0.get(idx - 1).unwrap_or(&Number::NAN).clone(),
}
}
}

View file

@ -1,11 +1,34 @@
use crate::sql::number::Number;
use crate::sql::number::{Number, Sorted};
pub trait Percentile {
fn percentile(self, _: Number) -> Number;
/// Gets the N percentile, averaging neighboring records if non-exact
fn percentile(&self, perc: Number) -> Number;
}
impl Percentile for Vec<Number> {
fn percentile(self, _: Number) -> Number {
todo!()
impl Percentile for Sorted<&Vec<Number>> {
fn percentile(&self, perc: Number) -> Number {
// If an empty set, then return NaN
if self.0.is_empty() {
return Number::NAN;
}
// If an invalid percentile, then return NaN
if (perc <= Number::from(0)) | (perc > Number::from(100)) {
return Number::NAN;
}
// Get the index of the specified percentile
let n_percent_idx = Number::from(self.0.len()) * perc / Number::from(100);
// Calculate the N percentile for the index
if n_percent_idx.to_float().fract().abs() < 1e-10 {
let idx = n_percent_idx.as_usize();
let val = self.0.get(idx - 1).unwrap_or(&Number::NAN).clone();
val
} else if n_percent_idx > Number::from(1) {
let idx = n_percent_idx.as_usize();
let val = self.0.get(idx - 1).unwrap_or(&Number::NAN);
let val = val + self.0.get(idx).unwrap_or(&Number::NAN);
val / Number::from(2)
} else {
Number::NAN
}
}
}

View file

@ -1,11 +1,19 @@
use crate::sql::number::Number;
use super::percentile::Percentile;
use crate::sql::number::{Number, Sorted};
pub trait Quartile {
fn quartile(self) -> Number;
/// Divides the set of numbers into Q_0 (min), Q_1, Q_2, Q_3, and Q_4 (max)
fn quartile(self) -> (Number, Number, Number, Number, Number);
}
impl Quartile for Vec<Number> {
fn quartile(self) -> Number {
todo!()
impl Quartile for Sorted<&Vec<Number>> {
fn quartile(self) -> (Number, Number, Number, Number, Number) {
(
self.percentile(Number::from(0)),
self.percentile(Number::from(25)),
self.percentile(Number::from(50)),
self.percentile(Number::from(75)),
self.percentile(Number::from(100)),
)
}
}

View file

@ -1,11 +1,25 @@
use crate::sql::number::Number;
pub trait Spread {
/// Gets the extent to which a distribution is stretched
/// O(n) time complex
fn spread(self) -> Number;
}
impl Spread for Vec<Number> {
fn spread(self) -> Number {
todo!()
// Get the initial number
let init = self.get(0);
// Get the minimum and the maximum
let min_max = self.iter().fold((init, init), |(mut min, mut max), val| {
min = std::cmp::min(min, Some(val));
max = std::cmp::max(max, Some(val));
(min, max)
});
// Return the maximum - minimum or NaN
match min_max {
(Some(min), Some(max)) => max - min,
_ => Number::NAN,
}
}
}

View file

@ -1,11 +1,27 @@
use crate::sql::number::Number;
use std::cmp::Reverse;
use std::collections::BinaryHeap;
pub trait Top {
fn top(self, _c: i64) -> Number;
/// Find the greatest `k` records from the collection in arbetrary order
/// O(n*k*log(k)) time complex
fn top(self, k: i64) -> Vec<Number>;
}
impl Top for Vec<Number> {
fn top(self, _c: i64) -> Number {
todo!()
fn top(self, k: i64) -> Vec<Number> {
// Convert to usize
let k = k as usize;
// Create a heap to store the numbers
let mut heap = BinaryHeap::new();
// Iterate and store the top numbers
for (i, v) in self.into_iter().enumerate() {
heap.push(Reverse(v));
if i >= k {
heap.pop();
}
}
// Return the numbers as a vector
heap.into_iter().map(|x| x.0).collect()
}
}

View file

@ -1,11 +1,14 @@
use crate::sql::number::Number;
use super::{median::Median, midhinge::Midhinge};
use crate::sql::number::{Number, Sorted};
pub trait Trimean {
/// Bowley's Trimean - the Average of the median and the MidHinge
/// ( 2 * Q_2 + Q_1 + Q_3 ) / 4 == ( Q_2 + ( Q_1 + Q_3 ) ) / 2
fn trimean(self) -> Number;
}
impl Trimean for Vec<Number> {
impl Trimean for Sorted<&Vec<Number>> {
fn trimean(self) -> Number {
todo!()
(self.midhinge() + self.median()) / Number::from(2)
}
}

View file

@ -1,11 +1,18 @@
use super::mean::Mean;
use crate::sql::number::Number;
pub trait Variance {
fn variance(self) -> Number;
/// Population Variance of Data
/// O(n) time complex
fn variance(self, sample: bool) -> Number;
}
impl Variance for Vec<Number> {
fn variance(self) -> Number {
todo!()
fn variance(self, sample: bool) -> Number {
let mean = self.mean();
let len = Number::from(self.len() - sample as usize);
let out = self.iter().map(|x| (x - &mean) * (x - &mean)).sum::<Number>() / len;
out
}
}

View file

@ -46,6 +46,12 @@ impl From<Vec<&str>> for Array {
}
}
impl From<Vec<Number>> for Array {
fn from(v: Vec<Number>) -> Self {
Array(v.into_iter().map(Value::from).collect())
}
}
impl From<Vec<Operation>> for Array {
fn from(v: Vec<Operation>) -> Self {
Array(v.into_iter().map(Value::from).collect())

View file

@ -151,6 +151,12 @@ impl Serialize for Number {
}
impl Number {
// -----------------------------------
// Constants
// -----------------------------------
pub const NAN: Number = Number::Float(f64::NAN);
// -----------------------------------
// Simple number detection
// -----------------------------------
@ -179,6 +185,14 @@ impl Number {
// Simple conversion of number
// -----------------------------------
pub fn as_usize(self) -> usize {
match self {
Number::Int(v) => v as usize,
Number::Float(v) => v as usize,
Number::Decimal(v) => v.to_usize().unwrap_or_default(),
}
}
pub fn as_int(self) -> i64 {
match self {
Number::Int(v) => v,
@ -498,6 +512,21 @@ impl<'a> Product<&'a Self> for Number {
}
}
pub struct Sorted<T>(pub T);
pub trait Sort {
fn sorted(&mut self) -> Sorted<&Self>
where
Self: Sized;
}
impl Sort for Vec<Number> {
fn sorted(&mut self) -> Sorted<&Vec<Number>> {
self.sort();
Sorted(self)
}
}
pub fn number(i: &str) -> IResult<&str, Number> {
alt((map(integer, Number::from), map(decimal, Number::from)))(i)
}

View file

@ -398,6 +398,12 @@ impl From<Vec<Value>> for Value {
}
}
impl From<Vec<Number>> for Value {
fn from(v: Vec<Number>) -> Self {
Value::Array(Array::from(v))
}
}
impl From<Vec<Operation>> for Value {
fn from(v: Vec<Operation>) -> Self {
Value::Array(Array::from(v))