From 4c8d9dbb638af51410702a3124caeda490b1e16a Mon Sep 17 00:00:00 2001
From: Tobie Morgan Hitchcock <tobie@abcum.com>
Date: Wed, 27 Apr 2022 16:21:51 +0100
Subject: [PATCH] implement COLLATE and NUMERIC ordering in ORDER BY clauses

---
 Cargo.lock                   | 16 +++++++++++
 lib/Cargo.toml               |  1 +
 lib/src/dbs/iterator.rs      |  4 +--
 lib/src/sql/value/compare.rs | 51 ++++++++++++++++++++++--------------
 lib/src/sql/value/value.rs   | 31 ++++++++++++++++++++++
 5 files changed, 81 insertions(+), 22 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 391e2f36..ddba264c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -32,6 +32,12 @@ dependencies = [
  "alloc-no-stdlib",
 ]
 
+[[package]]
+name = "any_ascii"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70033777eb8b5124a81a1889416543dddef2de240019b674c81285a2635a7e1e"
+
 [[package]]
 name = "anyhow"
 version = "1.0.56"
@@ -1123,6 +1129,15 @@ version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
 
+[[package]]
+name = "lexical-sort"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c09e4591611e231daf4d4c685a66cb0410cc1e502027a20ae55f2bb9e997207a"
+dependencies = [
+ "any_ascii",
+]
+
 [[package]]
 name = "libc"
 version = "0.2.121"
@@ -2242,6 +2257,7 @@ dependencies = [
  "fuzzy-matcher",
  "geo",
  "indxdb",
+ "lexical-sort",
  "log",
  "md-5",
  "nanoid",
diff --git a/lib/Cargo.toml b/lib/Cargo.toml
index 3eb09d88..4c864146 100644
--- a/lib/Cargo.toml
+++ b/lib/Cargo.toml
@@ -26,6 +26,7 @@ futures = "0.3.21"
 fuzzy-matcher = "0.3.7"
 geo = { version = "0.19.0", features = ["use-serde"] }
 indxdb = { version = "0.2.0", optional = true }
+lexical-sort = "0.3.1"
 log = "0.4.16"
 md-5 = "0.10.1"
 nanoid = "0.4.0"
diff --git a/lib/src/dbs/iterator.rs b/lib/src/dbs/iterator.rs
index 5f6916ec..d11f06c1 100644
--- a/lib/src/dbs/iterator.rs
+++ b/lib/src/dbs/iterator.rs
@@ -296,8 +296,8 @@ impl Iterator {
 							a.partial_cmp(&b)
 						}
 						false => match order.direction {
-							true => a.compare(b, &order.order),
-							false => b.compare(a, &order.order),
+							true => a.compare(b, &order.order, order.collate, order.numeric),
+							false => b.compare(a, &order.order, order.collate, order.numeric),
 						},
 					};
 					//
diff --git a/lib/src/sql/value/compare.rs b/lib/src/sql/value/compare.rs
index 4d9f8d8b..0dec7568 100644
--- a/lib/src/sql/value/compare.rs
+++ b/lib/src/sql/value/compare.rs
@@ -4,14 +4,20 @@ use crate::sql::value::Value;
 use std::cmp::Ordering;
 
 impl Value {
-	pub fn compare(&self, other: &Self, path: &[Part]) -> Option<Ordering> {
+	pub fn compare(
+		&self,
+		other: &Self,
+		path: &[Part],
+		collate: bool,
+		numeric: bool,
+	) -> Option<Ordering> {
 		match path.first() {
 			// Get the current path part
 			Some(p) => match (self, other) {
 				// Current path part is an object
 				(Value::Object(a), Value::Object(b)) => match p {
 					Part::Field(f) => match (a.value.get(&f.name), b.value.get(&f.name)) {
-						(Some(a), Some(b)) => a.compare(b, path.next()),
+						(Some(a), Some(b)) => a.compare(b, path.next(), collate, numeric),
 						(Some(_), None) => Some(Ordering::Greater),
 						(None, Some(_)) => Some(Ordering::Less),
 						(_, _) => Some(Ordering::Equal),
@@ -22,7 +28,7 @@ impl Value {
 				(Value::Array(a), Value::Array(b)) => match p {
 					Part::All => {
 						for (a, b) in a.value.iter().zip(b.value.iter()) {
-							match a.compare(b, path.next()) {
+							match a.compare(b, path.next(), collate, numeric) {
 								Some(Ordering::Equal) => continue,
 								None => continue,
 								o => return o,
@@ -35,20 +41,20 @@ impl Value {
 						}
 					}
 					Part::First => match (a.value.first(), b.value.first()) {
-						(Some(a), Some(b)) => a.compare(b, path.next()),
+						(Some(a), Some(b)) => a.compare(b, path.next(), collate, numeric),
 						(Some(_), None) => Some(Ordering::Greater),
 						(None, Some(_)) => Some(Ordering::Less),
 						(_, _) => Some(Ordering::Equal),
 					},
 					Part::Last => match (a.value.first(), b.value.first()) {
-						(Some(a), Some(b)) => a.compare(b, path.next()),
+						(Some(a), Some(b)) => a.compare(b, path.next(), collate, numeric),
 						(Some(_), None) => Some(Ordering::Greater),
 						(None, Some(_)) => Some(Ordering::Less),
 						(_, _) => Some(Ordering::Equal),
 					},
 					Part::Index(i) => {
 						match (a.value.get(i.to_usize()), b.value.get(i.to_usize())) {
-							(Some(a), Some(b)) => a.compare(b, path.next()),
+							(Some(a), Some(b)) => a.compare(b, path.next(), collate, numeric),
 							(Some(_), None) => Some(Ordering::Greater),
 							(None, Some(_)) => Some(Ordering::Less),
 							(_, _) => Some(Ordering::Equal),
@@ -56,7 +62,7 @@ impl Value {
 					}
 					_ => {
 						for (a, b) in a.value.iter().zip(b.value.iter()) {
-							match a.compare(b, path) {
+							match a.compare(b, path, collate, numeric) {
 								Some(Ordering::Equal) => continue,
 								None => continue,
 								o => return o,
@@ -70,10 +76,15 @@ impl Value {
 					}
 				},
 				// Ignore everything else
-				(a, b) => a.compare(b, path.next()),
+				(a, b) => a.compare(b, path.next(), collate, numeric),
 			},
 			// No more parts so get the value
-			None => self.partial_cmp(other),
+			None => match (collate, numeric) {
+				(true, true) => self.natural_lexical_cmp(other),
+				(true, false) => self.lexical_cmp(other),
+				(false, true) => self.natural_cmp(other),
+				_ => self.partial_cmp(other),
+			},
 		}
 	}
 }
@@ -90,7 +101,7 @@ mod tests {
 		let idi = Idiom::default();
 		let one = Value::parse("{ test: { other: null, something: 456 } }");
 		let two = Value::parse("{ test: { other: null, something: 123 } }");
-		let res = one.compare(&two, &idi);
+		let res = one.compare(&two, &idi, false, false);
 		assert_eq!(res, Some(Ordering::Greater));
 	}
 
@@ -99,7 +110,7 @@ mod tests {
 		let idi = Idiom::parse("test.something");
 		let one = Value::parse("{ test: { other: null, something: 456 } }");
 		let two = Value::parse("{ test: { other: null, something: 123 } }");
-		let res = one.compare(&two, &idi);
+		let res = one.compare(&two, &idi, false, false);
 		assert_eq!(res, Some(Ordering::Greater));
 	}
 
@@ -108,7 +119,7 @@ mod tests {
 		let idi = Idiom::parse("test.something");
 		let one = Value::parse("{ test: { other: null } }");
 		let two = Value::parse("{ test: { other: null, something: 123 } }");
-		let res = one.compare(&two, &idi);
+		let res = one.compare(&two, &idi, false, false);
 		assert_eq!(res, Some(Ordering::Less));
 	}
 
@@ -117,7 +128,7 @@ mod tests {
 		let idi = Idiom::parse("test.something");
 		let one = Value::parse("{ test: { other: null, something: 456 } }");
 		let two = Value::parse("{ test: { other: null } }");
-		let res = one.compare(&two, &idi);
+		let res = one.compare(&two, &idi, false, false);
 		assert_eq!(res, Some(Ordering::Greater));
 	}
 
@@ -126,7 +137,7 @@ mod tests {
 		let idi = Idiom::parse("test.something.*");
 		let one = Value::parse("{ test: { other: null, something: [4, 5, 6] } }");
 		let two = Value::parse("{ test: { other: null, something: [1, 2, 3] } }");
-		let res = one.compare(&two, &idi);
+		let res = one.compare(&two, &idi, false, false);
 		assert_eq!(res, Some(Ordering::Greater));
 	}
 
@@ -135,7 +146,7 @@ mod tests {
 		let idi = Idiom::parse("test.something.*");
 		let one = Value::parse("{ test: { other: null, something: [1, 2, 3, 4, 5, 6] } }");
 		let two = Value::parse("{ test: { other: null, something: [1, 2, 3] } }");
-		let res = one.compare(&two, &idi);
+		let res = one.compare(&two, &idi, false, false);
 		assert_eq!(res, Some(Ordering::Greater));
 	}
 
@@ -144,7 +155,7 @@ mod tests {
 		let idi = Idiom::parse("test.something.*");
 		let one = Value::parse("{ test: { other: null, something: [1, 2, 3] } }");
 		let two = Value::parse("{ test: { other: null, something: [1, 2, 3, 4, 5, 6] } }");
-		let res = one.compare(&two, &idi);
+		let res = one.compare(&two, &idi, false, false);
 		assert_eq!(res, Some(Ordering::Less));
 	}
 
@@ -153,7 +164,7 @@ mod tests {
 		let idi = Idiom::parse("test.something.*");
 		let one = Value::parse("{ test: { other: null, something: null } }");
 		let two = Value::parse("{ test: { other: null, something: [1, 2, 3] } }");
-		let res = one.compare(&two, &idi);
+		let res = one.compare(&two, &idi, false, false);
 		assert_eq!(res, Some(Ordering::Less));
 	}
 
@@ -162,7 +173,7 @@ mod tests {
 		let idi = Idiom::parse("test.something.*");
 		let one = Value::parse("{ test: { other: null, something: [4, 5, 6] } }");
 		let two = Value::parse("{ test: { other: null, something: null } }");
-		let res = one.compare(&two, &idi);
+		let res = one.compare(&two, &idi, false, false);
 		assert_eq!(res, Some(Ordering::Greater));
 	}
 
@@ -171,7 +182,7 @@ mod tests {
 		let idi = Idiom::parse("test.something.*");
 		let one = Value::parse("{ test: { other: null, something: [1, null, 3] } }");
 		let two = Value::parse("{ test: { other: null, something: [1, 2, 3] } }");
-		let res = one.compare(&two, &idi);
+		let res = one.compare(&two, &idi, false, false);
 		assert_eq!(res, Some(Ordering::Less));
 	}
 
@@ -180,7 +191,7 @@ mod tests {
 		let idi = Idiom::parse("test.something.*");
 		let one = Value::parse("{ test: { other: null, something: [1, 2, 3] } }");
 		let two = Value::parse("{ test: { other: null, something: [1, null, 3] } }");
-		let res = one.compare(&two, &idi);
+		let res = one.compare(&two, &idi, false, false);
 		assert_eq!(res, Some(Ordering::Greater));
 	}
 }
diff --git a/lib/src/sql/value/value.rs b/lib/src/sql/value/value.rs
index ff86aca6..eeaec141 100644
--- a/lib/src/sql/value/value.rs
+++ b/lib/src/sql/value/value.rs
@@ -913,6 +913,37 @@ impl Value {
 			_ => false,
 		}
 	}
+
+	// -----------------------------------
+	// Sorting operations
+	// -----------------------------------
+
+	pub fn lexical_cmp(&self, other: &Value) -> Option<Ordering> {
+		match (self, other) {
+			(Value::Strand(a), Value::Strand(b)) => {
+				Some(lexical_sort::lexical_cmp(&a.value, &b.value))
+			}
+			_ => self.partial_cmp(other),
+		}
+	}
+
+	pub fn natural_cmp(&self, other: &Value) -> Option<Ordering> {
+		match (self, other) {
+			(Value::Strand(a), Value::Strand(b)) => {
+				Some(lexical_sort::natural_cmp(&a.value, &b.value))
+			}
+			_ => self.partial_cmp(other),
+		}
+	}
+
+	pub fn natural_lexical_cmp(&self, other: &Value) -> Option<Ordering> {
+		match (self, other) {
+			(Value::Strand(a), Value::Strand(b)) => {
+				Some(lexical_sort::natural_lexical_cmp(&a.value, &b.value))
+			}
+			_ => self.partial_cmp(other),
+		}
+	}
 }
 
 impl fmt::Display for Value {