Implement GROUP BY clauses

This commit is contained in:
Tobie Morgan Hitchcock 2022-03-25 18:43:22 +00:00
parent c2c25f68cf
commit 87840e3e05
13 changed files with 340 additions and 20 deletions

View file

@ -7,6 +7,8 @@ use crate::dbs::Statement;
use crate::dbs::Transaction; use crate::dbs::Transaction;
use crate::doc::Document; use crate::doc::Document;
use crate::err::Error; use crate::err::Error;
use crate::sql::array::Array;
use crate::sql::field::Field;
use crate::sql::id::Id; use crate::sql::id::Id;
use crate::sql::part::Part; use crate::sql::part::Part;
use crate::sql::statements::create::CreateStatement; use crate::sql::statements::create::CreateStatement;
@ -19,6 +21,7 @@ use crate::sql::table::Table;
use crate::sql::thing::Thing; use crate::sql::thing::Thing;
use crate::sql::value::Value; use crate::sql::value::Value;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::collections::BTreeMap;
use std::mem; use std::mem;
use std::sync::Arc; use std::sync::Arc;
@ -155,7 +158,8 @@ impl Iterator {
txn: &Transaction, txn: &Transaction,
) -> Result<(), Error> { ) -> Result<(), Error> {
if let Some(splits) = self.stm.split() { if let Some(splits) = self.stm.split() {
for split in &splits.0 { // Loop over each split clause
for split in splits.iter() {
// Get the query result // Get the query result
let res = mem::take(&mut self.results); let res = mem::take(&mut self.results);
// Loop over each value // Loop over each value
@ -192,12 +196,86 @@ impl Iterator {
#[inline] #[inline]
async fn output_group( async fn output_group(
&mut self, &mut self,
_ctx: &Runtime, ctx: &Runtime,
_opt: &Options, opt: &Options,
_txn: &Transaction, txn: &Transaction,
) -> Result<(), Error> { ) -> Result<(), Error> {
if self.stm.group().is_some() { if let Some(fields) = self.stm.expr() {
// Ignore if let Some(groups) = self.stm.group() {
// Create the new grouped collection
let mut grp: BTreeMap<Array, Array> = BTreeMap::new();
// Get the query result
let res = mem::take(&mut self.results);
// Loop over each value
for obj in res {
// Create a new column set
let mut arr = Array::with_capacity(groups.len());
// Loop over each group clause
for group in groups.iter() {
// Get the value at the path
let val = obj.pick(&group.group);
// Set the value at the path
arr.value.push(val);
}
// Add to grouped collection
match grp.get_mut(&arr) {
Some(v) => v.value.push(obj),
None => {
grp.insert(arr, Array::from(obj));
}
}
}
// Loop over each grouped collection
for (_, vals) in grp {
// Create a new value
let mut obj = Value::base();
// Save the collected values
let vals = Value::from(vals);
// Loop over each group clause
for field in fields.other() {
// Process it if it is a normal field
if let Field::Alone(v) = field {
match v {
Value::Function(f) if f.is_aggregate() => {
let x = vals
.all(ctx, opt, txn)
.await?
.get(ctx, opt, txn, v.to_idiom().as_ref())
.await?;
let x = f.aggregate(x).compute(ctx, opt, txn, None).await?;
obj.set(ctx, opt, txn, v.to_idiom().as_ref(), x).await?;
}
_ => {
let x = vals.first(ctx, opt, txn).await?;
let x = v.compute(ctx, opt, txn, Some(&x)).await?;
obj.set(ctx, opt, txn, v.to_idiom().as_ref(), x).await?;
}
}
}
// Process it if it is a aliased field
if let Field::Alias(v, i) = field {
match v {
Value::Function(f) if f.is_aggregate() => {
let x = vals
.all(ctx, opt, txn)
.await?
.get(ctx, opt, txn, i)
.await?;
let x = f.aggregate(x).compute(ctx, opt, txn, None).await?;
obj.set(ctx, opt, txn, i, x).await?;
}
_ => {
let x = vals.first(ctx, opt, txn).await?;
let x = v.compute(ctx, opt, txn, Some(&x)).await?;
obj.set(ctx, opt, txn, i, x).await?;
}
}
}
}
// Add the object to the results
self.results.push(obj);
}
}
} }
Ok(()) Ok(())
} }
@ -210,8 +288,11 @@ impl Iterator {
_txn: &Transaction, _txn: &Transaction,
) -> Result<(), Error> { ) -> Result<(), Error> {
if let Some(orders) = self.stm.order() { if let Some(orders) = self.stm.order() {
// Sort the full result set
self.results.sort_by(|a, b| { self.results.sort_by(|a, b| {
for order in &orders.0 { // Loop over each order clause
for order in orders.iter() {
// Reverse the ordering if DESC
let o = match order.direction { let o = match order.direction {
true => a.compare(b, &order.order), true => a.compare(b, &order.order),
false => b.compare(a, &order.order), false => b.compare(a, &order.order),

View file

@ -1,5 +1,6 @@
use crate::sql::cond::Cond; use crate::sql::cond::Cond;
use crate::sql::fetch::Fetchs; use crate::sql::fetch::Fetchs;
use crate::sql::field::Fields;
use crate::sql::group::Groups; use crate::sql::group::Groups;
use crate::sql::limit::Limit; use crate::sql::limit::Limit;
use crate::sql::order::Orders; use crate::sql::order::Orders;
@ -83,6 +84,13 @@ impl fmt::Display for Statement {
} }
impl Statement { impl Statement {
// Returns any query fields if specified
pub fn expr(self: &Statement) -> Option<&Fields> {
match self {
Statement::Select(v) => Some(&v.expr),
_ => None,
}
}
// Returns any SPLIT clause if specified // Returns any SPLIT clause if specified
pub fn conds(self: &Statement) -> Option<&Cond> { pub fn conds(self: &Statement) -> Option<&Cond> {
match self { match self {

View file

@ -67,14 +67,40 @@ impl<'a> Document<'a> {
for v in stm.expr.other() { for v in stm.expr.other() {
match v { match v {
Field::All => (), Field::All => (),
Field::Alone(v) => { Field::Alone(v) => match v {
Value::Function(f) if stm.group.is_some() && f.is_aggregate() => {
let x = match f.args().len() {
0 => f.compute(ctx, opt, txn, Some(&self.current)).await?,
_ => {
f.args()[0]
.compute(ctx, opt, txn, Some(&self.current))
.await?
}
};
out.set(ctx, opt, txn, v.to_idiom().as_ref(), x).await?;
}
_ => {
let x = v.compute(ctx, opt, txn, Some(&self.current)).await?; let x = v.compute(ctx, opt, txn, Some(&self.current)).await?;
out.set(ctx, opt, txn, v.to_idiom().as_ref(), x).await?; out.set(ctx, opt, txn, v.to_idiom().as_ref(), x).await?;
} }
Field::Alias(v, i) => { },
Field::Alias(v, i) => match v {
Value::Function(f) if stm.group.is_some() && f.is_aggregate() => {
let x = match f.args().len() {
0 => f.compute(ctx, opt, txn, Some(&self.current)).await?,
_ => {
f.args()[0]
.compute(ctx, opt, txn, Some(&self.current))
.await?
}
};
out.set(ctx, opt, txn, i, x).await?;
}
_ => {
let x = v.compute(ctx, opt, txn, Some(&self.current)).await?; let x = v.compute(ctx, opt, txn, Some(&self.current)).await?;
out.set(ctx, opt, txn, i, x).await?; out.set(ctx, opt, txn, i, x).await?;
} }
},
} }
} }
Ok(out) Ok(out)

View file

@ -17,11 +17,19 @@ use serde::{Deserialize, Serialize};
use std::fmt; use std::fmt;
use std::ops; use std::ops;
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Deserialize)] #[derive(Clone, Debug, Default, Eq, Ord, PartialEq, PartialOrd, Deserialize)]
pub struct Array { pub struct Array {
pub value: Vec<Value>, pub value: Vec<Value>,
} }
impl From<Value> for Array {
fn from(v: Value) -> Self {
Array {
value: vec![v],
}
}
}
impl From<Vec<Value>> for Array { impl From<Vec<Value>> for Array {
fn from(v: Vec<Value>) -> Self { fn from(v: Vec<Value>) -> Self {
Array { Array {
@ -71,6 +79,18 @@ impl From<Vec<Operation>> for Array {
} }
impl Array { impl Array {
pub fn new() -> Self {
Array {
value: Vec::default(),
}
}
pub fn with_capacity(len: usize) -> Self {
Array {
value: Vec::with_capacity(len),
}
}
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.value.len() self.value.len()
} }

View file

@ -8,6 +8,7 @@ use nom::bytes::complete::tag_no_case;
use nom::multi::separated_list1; use nom::multi::separated_list1;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::fmt; use std::fmt;
use std::ops::Deref;
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] #[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize)]
pub struct Fields(pub Vec<Field>); pub struct Fields(pub Vec<Field>);
@ -32,6 +33,21 @@ impl Fields {
} }
} }
impl Deref for Fields {
type Target = Vec<Field>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl IntoIterator for Fields {
type Item = Field;
type IntoIter = std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl fmt::Display for Fields { impl fmt::Display for Fields {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0.iter().map(|ref v| format!("{}", v)).collect::<Vec<_>>().join(", ")) write!(f, "{}", self.0.iter().map(|ref v| format!("{}", v)).collect::<Vec<_>>().join(", "))

View file

@ -31,6 +31,76 @@ impl PartialOrd for Function {
} }
} }
impl Function {
// Get function arguments if applicable
pub fn args(&self) -> &[Value] {
match self {
Function::Normal(_, a) => a,
_ => &[],
}
}
// Convert this function to an aggregate
pub fn aggregate(&self, val: Value) -> Function {
match self {
Function::Normal(n, a) => {
let mut a = a.to_owned();
match a.len() {
0 => a.insert(0, val),
_ => {
a.remove(0);
a.insert(0, val);
}
}
Function::Normal(n.to_owned(), a)
}
_ => unreachable!(),
}
}
// Check if this function is a rolling function
pub fn is_rolling(&self) -> bool {
match self {
Function::Normal(f, _) if f == "array::concat" => true,
Function::Normal(f, _) if f == "array::distinct" => true,
Function::Normal(f, _) if f == "array::union" => true,
Function::Normal(f, _) if f == "count" => true,
Function::Normal(f, _) if f == "math::max" => true,
Function::Normal(f, _) if f == "math::mean" => true,
Function::Normal(f, _) if f == "math::min" => true,
Function::Normal(f, _) if f == "math::stddev" => true,
Function::Normal(f, _) if f == "math::sum" => true,
Function::Normal(f, _) if f == "math::variance" => true,
_ => false,
}
}
// Check if this function is a grouping function
pub fn is_aggregate(&self) -> bool {
match self {
Function::Normal(f, _) if f == "array::concat" => true,
Function::Normal(f, _) if f == "array::distinct" => true,
Function::Normal(f, _) if f == "array::union" => true,
Function::Normal(f, _) if f == "count" => true,
Function::Normal(f, _) if f == "math::bottom" => true,
Function::Normal(f, _) if f == "math::interquartile" => true,
Function::Normal(f, _) if f == "math::max" => true,
Function::Normal(f, _) if f == "math::mean" => true,
Function::Normal(f, _) if f == "math::median" => true,
Function::Normal(f, _) if f == "math::midhinge" => true,
Function::Normal(f, _) if f == "math::min" => true,
Function::Normal(f, _) if f == "math::mode" => true,
Function::Normal(f, _) if f == "math::nearestrank" => true,
Function::Normal(f, _) if f == "math::percentile" => true,
Function::Normal(f, _) if f == "math::sample" => true,
Function::Normal(f, _) if f == "math::spread" => true,
Function::Normal(f, _) if f == "math::stddev" => true,
Function::Normal(f, _) if f == "math::sum" => true,
Function::Normal(f, _) if f == "math::top" => true,
Function::Normal(f, _) if f == "math::trimean" => true,
Function::Normal(f, _) if f == "math::variance" => true,
_ => false,
}
}
}
impl Function { impl Function {
pub async fn compute( pub async fn compute(
&self, &self,

View file

@ -8,10 +8,32 @@ use nom::multi::separated_list1;
use nom::sequence::tuple; use nom::sequence::tuple;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::fmt; use std::fmt;
use std::ops::Deref;
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] #[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize)]
pub struct Groups(pub Vec<Group>); pub struct Groups(pub Vec<Group>);
impl Groups {
pub fn len(&self) -> usize {
self.0.len()
}
}
impl Deref for Groups {
type Target = Vec<Group>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl IntoIterator for Groups {
type Item = Group;
type IntoIter = std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl fmt::Display for Groups { impl fmt::Display for Groups {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!( write!(

View file

@ -21,7 +21,7 @@ use std::collections::BTreeMap;
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt; use std::fmt;
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Deserialize)] #[derive(Clone, Debug, Default, Eq, Ord, PartialEq, PartialOrd, Deserialize)]
pub struct Object { pub struct Object {
pub value: BTreeMap<String, Value>, pub value: BTreeMap<String, Value>,
} }

View file

@ -9,10 +9,32 @@ use nom::multi::separated_list1;
use nom::sequence::tuple; use nom::sequence::tuple;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::fmt; use std::fmt;
use std::ops::Deref;
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] #[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize)]
pub struct Orders(pub Vec<Order>); pub struct Orders(pub Vec<Order>);
impl Orders {
pub fn len(&self) -> usize {
self.0.len()
}
}
impl Deref for Orders {
type Target = Vec<Order>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl IntoIterator for Orders {
type Item = Order;
type IntoIter = std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl fmt::Display for Orders { impl fmt::Display for Orders {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!( write!(

View file

@ -8,10 +8,32 @@ use nom::multi::separated_list1;
use nom::sequence::tuple; use nom::sequence::tuple;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::fmt; use std::fmt;
use std::ops::Deref;
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] #[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Serialize, Deserialize)]
pub struct Splits(pub Vec<Split>); pub struct Splits(pub Vec<Split>);
impl Splits {
pub fn len(&self) -> usize {
self.0.len()
}
}
impl Deref for Splits {
type Target = Vec<Split>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl IntoIterator for Splits {
type Item = Split;
type IntoIter = std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.0.into_iter()
}
}
impl fmt::Display for Splits { impl fmt::Display for Splits {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!( write!(

17
lib/src/sql/value/all.rs Normal file
View file

@ -0,0 +1,17 @@
use crate::dbs::Options;
use crate::dbs::Runtime;
use crate::dbs::Transaction;
use crate::err::Error;
use crate::sql::part::Part;
use crate::sql::value::Value;
impl Value {
pub async fn all(
&self,
ctx: &Runtime,
opt: &Options,
txn: &Transaction,
) -> Result<Self, Error> {
self.get(ctx, opt, txn, &[Part::All]).await
}
}

View file

@ -1,5 +1,6 @@
pub use self::value::*; pub use self::value::*;
mod all;
mod array; mod array;
mod clear; mod clear;
mod compare; mod compare;

View file

@ -36,10 +36,12 @@ use nom::combinator::map;
use nom::multi::separated_list1; use nom::multi::separated_list1;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::cmp::Ordering;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt; use std::fmt;
use std::ops; use std::ops;
use std::ops::Deref;
use std::str::FromStr; use std::str::FromStr;
static MATCHER: Lazy<SkimMatcherV2> = Lazy::new(|| SkimMatcherV2::default().ignore_case()); static MATCHER: Lazy<SkimMatcherV2> = Lazy::new(|| SkimMatcherV2::default().ignore_case());
@ -47,9 +49,10 @@ static MATCHER: Lazy<SkimMatcherV2> = Lazy::new(|| SkimMatcherV2::default().igno
#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] #[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
pub struct Values(pub Vec<Value>); pub struct Values(pub Vec<Value>);
impl fmt::Display for Values { impl Deref for Values {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { type Target = Vec<Value>;
write!(f, "{}", self.0.iter().map(|ref v| format!("{}", v)).collect::<Vec<_>>().join(", ")) fn deref(&self) -> &Self::Target {
&self.0
} }
} }
@ -61,6 +64,12 @@ impl IntoIterator for Values {
} }
} }
impl fmt::Display for Values {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0.iter().map(|ref v| format!("{}", v)).collect::<Vec<_>>().join(", "))
}
}
pub fn values(i: &str) -> IResult<&str, Values> { pub fn values(i: &str) -> IResult<&str, Values> {
let (i, v) = separated_list1(commas, value)(i)?; let (i, v) = separated_list1(commas, value)(i)?;
Ok((i, Values(v))) Ok((i, Values(v)))
@ -104,6 +113,12 @@ pub enum Value {
impl Eq for Value {} impl Eq for Value {}
impl Ord for Value {
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap_or(Ordering::Equal)
}
}
impl Default for Value { impl Default for Value {
fn default() -> Value { fn default() -> Value {
Value::None Value::None