From ba8dd7bd12a62a0db508f6862fdfb1eaaccd276c Mon Sep 17 00:00:00 2001 From: Tobie Morgan Hitchcock Date: Tue, 15 Mar 2022 12:36:41 +0000 Subject: [PATCH] Use external storekey library for key encoding --- Cargo.lock | 12 + lib/Cargo.toml | 1 + lib/src/err/mod.rs | 4 +- lib/src/key/bytes/decode.rs | 535 ---------------------------- lib/src/key/bytes/encode.rs | 675 ------------------------------------ lib/src/key/bytes/mod.rs | 42 --- lib/src/key/database.rs | 2 +- lib/src/key/db.rs | 2 +- lib/src/key/dl.rs | 2 +- lib/src/key/dt.rs | 2 +- lib/src/key/ev.rs | 2 +- lib/src/key/fd.rs | 2 +- lib/src/key/ft.rs | 2 +- lib/src/key/index.rs | 2 +- lib/src/key/ix.rs | 2 +- lib/src/key/key.rs | 2 +- lib/src/key/lv.rs | 2 +- lib/src/key/mod.rs | 1 - lib/src/key/namespace.rs | 2 +- lib/src/key/nl.rs | 2 +- lib/src/key/ns.rs | 2 +- lib/src/key/nt.rs | 2 +- lib/src/key/point.rs | 2 +- lib/src/key/sc.rs | 2 +- lib/src/key/st.rs | 2 +- lib/src/key/table.rs | 2 +- lib/src/key/tb.rs | 2 +- lib/src/key/thing.rs | 2 +- 28 files changed, 36 insertions(+), 1276 deletions(-) delete mode 100644 lib/src/key/bytes/decode.rs delete mode 100644 lib/src/key/bytes/encode.rs delete mode 100644 lib/src/key/bytes/mod.rs diff --git a/Cargo.lock b/Cargo.lock index f4a97378..0c10fdc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2173,6 +2173,17 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "storekey" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eb2d79b9cdf125f28f0d57d28b17cb7871f43fff9e34db594ecf0fc8dcc5e65" +dependencies = [ + "byteorder", + "serde", + "thiserror", +] + [[package]] name = "strsim" version = "0.10.0" @@ -2238,6 +2249,7 @@ dependencies = [ "sha-1 0.10.0", "sha2", "slug", + "storekey", "surrealdb-derive", "thiserror", "tikv-client", diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 58e4e490..7cadd8cf 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -38,6 +38,7 @@ regex = "1.5.4" msgpack = { version = "1.0.0", package = "rmp-serde" } scrypt = "0.9.0" serde = { version = "1.0.136", features = ["derive", "rc"] } +storekey = { version = "0.1.0" } sha-1 = "0.10.0" sha2 = "0.10.2" slug = "0.1.4" diff --git a/lib/src/err/mod.rs b/lib/src/err/mod.rs index 0f6d0267..b29a8839 100644 --- a/lib/src/err/mod.rs +++ b/lib/src/err/mod.rs @@ -1,9 +1,9 @@ -use crate::key::bytes::decode::Error as DecodeError; -use crate::key::bytes::encode::Error as EncodeError; use crate::sql::thing::Thing; use crate::sql::value::Value; use msgpack::encode::Error as SerdeError; use std::time::Duration; +use storekey::decode::Error as DecodeError; +use storekey::encode::Error as EncodeError; use thiserror::Error; #[cfg(feature = "kv-tikv")] diff --git a/lib/src/key/bytes/decode.rs b/lib/src/key/bytes/decode.rs deleted file mode 100644 index 014e80b5..00000000 --- a/lib/src/key/bytes/decode.rs +++ /dev/null @@ -1,535 +0,0 @@ -use byteorder::{ReadBytesExt, BE}; -use serde; -use serde::de::{Deserialize, Visitor}; -use std; -use std::fmt; -use std::io::{self, Read}; -use std::str; -use std::{i16, i32, i64, i8}; -use thiserror::Error; - -/// A decoder for deserializing bytes from an order preserving format to a value. -#[derive(Debug)] -pub struct Deserializer { - reader: R, -} - -/// Errors that may be occur when deserializing. -#[derive(Error, Debug)] -pub enum Error { - #[error("Couldn't setup connection to underlying datastore")] - DeserializeAnyUnsupported, - #[error("Couldn't setup connection to underlying datastore")] - UnexpectedEof, - #[error("Couldn't setup connection to underlying datastore")] - InvalidUtf8, - #[error("Couldn't setup connection to underlying datastore")] - Io(#[from] io::Error), - #[error("Couldn't setup connection to underlying datastore")] - Message(String), -} - -impl serde::de::Error for Error { - fn custom(msg: T) -> Self { - Error::Message(msg.to_string()) - } -} - -/// Shorthand for `Result`. -pub type Result = std::result::Result; - -/// Deserialize data from the given slice of bytes. -pub fn deserialize(bytes: &[u8]) -> Result -where - T: for<'de> Deserialize<'de>, -{ - deserialize_from(bytes) -} - -/// Deserialize data from the given byte reader. -pub fn deserialize_from(reader: R) -> Result -where - R: io::BufRead, - T: for<'de> Deserialize<'de>, -{ - let mut deserializer = Deserializer::new(reader); - T::deserialize(&mut deserializer) -} - -impl Deserializer { - /// Creates a new ordered bytes encoder whose output will be written to the provided writer. - pub fn new(reader: R) -> Deserializer { - Deserializer { - reader, - } - } - - /// Deserialize a `u64` that has been serialized using the `serialize_var_u64` method. - pub fn deserialize_var_u64(&mut self) -> Result { - let header = self.reader.read_u8()?; - let n = header >> 4; - let (mut val, _) = ((header & 0x0F) as u64).overflowing_shl(n as u32 * 8); - for i in 1..n + 1 { - let byte = self.reader.read_u8()?; - val += (byte as u64) << ((n - i) * 8); - } - Ok(val) - } - - /// Deserialize an `i64` that has been serialized using the `serialize_var_i64` method. - pub fn deserialize_var_i64(&mut self) -> Result { - let header = self.reader.read_u8()?; - let mask = ((header ^ 0x80) as i8 >> 7) as u8; - let n = ((header >> 3) ^ mask) & 0x0F; - let (mut val, _) = (((header ^ mask) & 0x07) as u64).overflowing_shl(n as u32 * 8); - for i in 1..n + 1 { - let byte = self.reader.read_u8()?; - val += ((byte ^ mask) as u64) << ((n - i) * 8); - } - let final_mask = (((mask as i64) << 63) >> 63) as u64; - val ^= final_mask; - Ok(val as i64) - } -} - -impl<'de, 'a, R> serde::de::Deserializer<'de> for &'a mut Deserializer -where - R: io::BufRead, -{ - type Error = Error; - - fn deserialize_any(self, _visitor: V) -> Result - where - V: Visitor<'de>, - { - Err(Error::DeserializeAnyUnsupported) - } - - fn deserialize_bool(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let b = !matches!(self.reader.read_u8()?, 0); - visitor.visit_bool(b) - } - - fn deserialize_i8(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let i = self.reader.read_i8()?; - visitor.visit_i8(i ^ i8::MIN) - } - - fn deserialize_i16(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let i = self.reader.read_i16::()?; - visitor.visit_i16(i ^ i16::MIN) - } - - fn deserialize_i32(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let i = self.reader.read_i32::()?; - visitor.visit_i32(i ^ i32::MIN) - } - - fn deserialize_i64(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let i = self.reader.read_i64::()?; - visitor.visit_i64(i ^ i64::MIN) - } - - fn deserialize_u8(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let u = self.reader.read_u8()?; - visitor.visit_u8(u) - } - - fn deserialize_u16(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let u = self.reader.read_u16::()?; - visitor.visit_u16(u) - } - - fn deserialize_u32(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let u = self.reader.read_u32::()?; - visitor.visit_u32(u) - } - - fn deserialize_u64(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let u = self.reader.read_u64::()?; - visitor.visit_u64(u) - } - - fn deserialize_f32(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let val = self.reader.read_i32::()?; - let t = ((val ^ i32::MIN) >> 31) | i32::MIN; - let f: f32 = f32::from_bits((val ^ t) as u32); - visitor.visit_f32(f) - } - - fn deserialize_f64(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let val = self.reader.read_i64::()?; - let t = ((val ^ i64::MIN) >> 63) | i64::MIN; - let f: f64 = f64::from_bits((val ^ t) as u64); - visitor.visit_f64(f) - } - - fn deserialize_char(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let mut string = String::new(); - let mut buffer: Vec = vec![]; - match self.reader.read_until(0u8, &mut buffer) { - Ok(_) => match str::from_utf8(&buffer) { - Ok(mut s) => { - const EOF: char = '\u{0}'; - const EOF_STR: &str = "\u{0}"; - if s.len() >= EOF.len_utf8() { - let eof_start = s.len() - EOF.len_utf8(); - if &s[eof_start..] == EOF_STR { - s = &s[..eof_start]; - } - } - string.push_str(s) - } - Err(_) => panic!("1"), - }, - Err(_) => panic!("2"), - } - visitor.visit_string(string) - } - - fn deserialize_str(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let mut string = String::new(); - let mut buffer: Vec = vec![]; - match self.reader.read_until(0u8, &mut buffer) { - Ok(_) => match str::from_utf8(&buffer) { - Ok(mut s) => { - const EOF: char = '\u{0}'; - const EOF_STR: &str = "\u{0}"; - if s.len() >= EOF.len_utf8() { - let eof_start = s.len() - EOF.len_utf8(); - if &s[eof_start..] == EOF_STR { - s = &s[..eof_start]; - } - } - string.push_str(s) - } - Err(_) => panic!("1"), - }, - Err(_) => panic!("2"), - } - visitor.visit_string(string) - } - - fn deserialize_string(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_str(visitor) - } - - fn deserialize_bytes(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - let mut bytes = vec![]; - for byte in (&mut self.reader).bytes() { - bytes.push(byte?); - } - visitor.visit_byte_buf(bytes) - } - - fn deserialize_byte_buf(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - self.deserialize_bytes(visitor) - } - - fn deserialize_option(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - match self.reader.read_u8()? { - 0 => visitor.visit_none(), - 1 => visitor.visit_some(&mut *self), - b => { - let msg = format!("expected `0` or `1` for option tag - found {}", b); - Err(Error::Message(msg)) - } - } - } - - fn deserialize_unit(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_unit() - } - - fn deserialize_unit_struct(self, _name: &'static str, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_unit() - } - - fn deserialize_newtype_struct(self, _name: &'static str, visitor: V) -> Result - where - V: Visitor<'de>, - { - visitor.visit_newtype_struct(self) - } - - fn deserialize_seq(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - struct Access<'a, R> - where - R: 'a + io::BufRead, - { - deserializer: &'a mut Deserializer, - } - - impl<'de, 'a, R> serde::de::SeqAccess<'de> for Access<'a, R> - where - R: io::BufRead, - { - type Error = Error; - - fn next_element_seed(&mut self, seed: T) -> Result> - where - T: serde::de::DeserializeSeed<'de>, - { - match serde::de::DeserializeSeed::deserialize(seed, &mut *self.deserializer) { - Ok(v) => Ok(Some(v)), - Err(Error::Io(ref err)) if err.kind() == io::ErrorKind::UnexpectedEof => { - Ok(None) - } - Err(err) => Err(err), - } - } - } - - visitor.visit_seq(Access { - deserializer: self, - }) - } - - fn deserialize_tuple(self, len: usize, visitor: V) -> Result - where - V: Visitor<'de>, - { - struct Access<'a, R> - where - R: 'a + io::BufRead, - { - deserializer: &'a mut Deserializer, - len: usize, - } - - impl<'de, 'a, R> serde::de::SeqAccess<'de> for Access<'a, R> - where - R: io::BufRead, - { - type Error = Error; - - fn next_element_seed(&mut self, seed: T) -> Result> - where - T: serde::de::DeserializeSeed<'de>, - { - if self.len == 0 { - return Ok(None); - } - self.len -= 1; - let value = serde::de::DeserializeSeed::deserialize(seed, &mut *self.deserializer)?; - Ok(Some(value)) - } - - fn size_hint(&self) -> Option { - Some(self.len) - } - } - - visitor.visit_seq(Access { - deserializer: self, - len, - }) - } - - fn deserialize_tuple_struct( - self, - _name: &'static str, - len: usize, - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - self.deserialize_tuple(len, visitor) - } - - fn deserialize_map(self, visitor: V) -> Result - where - V: Visitor<'de>, - { - struct Access<'a, R> - where - R: 'a + io::BufRead, - { - deserializer: &'a mut Deserializer, - } - - impl<'de, 'a, R> serde::de::MapAccess<'de> for Access<'a, R> - where - R: io::BufRead, - { - type Error = Error; - - fn next_key_seed(&mut self, seed: T) -> Result> - where - T: serde::de::DeserializeSeed<'de>, - { - match serde::de::DeserializeSeed::deserialize(seed, &mut *self.deserializer) { - Ok(v) => Ok(Some(v)), - Err(Error::Io(ref err)) if err.kind() == io::ErrorKind::UnexpectedEof => { - Ok(None) - } - Err(err) => Err(err), - } - } - - fn next_value_seed(&mut self, seed: T) -> Result - where - T: serde::de::DeserializeSeed<'de>, - { - serde::de::DeserializeSeed::deserialize(seed, &mut *self.deserializer) - } - } - - visitor.visit_map(Access { - deserializer: self, - }) - } - - fn deserialize_struct( - self, - _name: &'static str, - fields: &'static [&'static str], - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - self.deserialize_tuple(fields.len(), visitor) - } - - fn deserialize_enum( - self, - _name: &'static str, - _fields: &'static [&'static str], - visitor: V, - ) -> Result - where - V: Visitor<'de>, - { - impl<'de, 'a, R> serde::de::EnumAccess<'de> for &'a mut Deserializer - where - R: io::BufRead, - { - type Error = Error; - type Variant = Self; - - fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant)> - where - V: serde::de::DeserializeSeed<'de>, - { - let idx: u32 = serde::de::Deserialize::deserialize(&mut *self)?; - let val: Result<_> = - seed.deserialize(serde::de::IntoDeserializer::into_deserializer(idx)); - Ok((val?, self)) - } - } - - impl<'de, 'a, R> serde::de::VariantAccess<'de> for &'a mut Deserializer - where - R: io::BufRead, - { - type Error = Error; - - fn unit_variant(self) -> Result<()> { - Ok(()) - } - - fn newtype_variant_seed(self, seed: T) -> Result - where - T: serde::de::DeserializeSeed<'de>, - { - serde::de::DeserializeSeed::deserialize(seed, self) - } - - fn tuple_variant(self, len: usize, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - serde::de::Deserializer::deserialize_tuple(self, len, visitor) - } - - fn struct_variant( - self, - fields: &'static [&'static str], - visitor: V, - ) -> Result - where - V: serde::de::Visitor<'de>, - { - serde::de::Deserializer::deserialize_tuple(self, fields.len(), visitor) - } - } - - visitor.visit_enum(self) - } - - fn deserialize_ignored_any(self, _visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - Err(Error::DeserializeAnyUnsupported) - } - - fn deserialize_identifier(self, _visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - Err(Error::DeserializeAnyUnsupported) - } -} diff --git a/lib/src/key/bytes/encode.rs b/lib/src/key/bytes/encode.rs deleted file mode 100644 index 36b4cadf..00000000 --- a/lib/src/key/bytes/encode.rs +++ /dev/null @@ -1,675 +0,0 @@ -use byteorder::{WriteBytesExt, BE}; -use serde::{self, Serialize}; -use std::fmt; -use std::io::{self, Write}; -use std::{self, i16, i32, i64, i8}; -use thiserror::Error; - -/// A serializer for a byte format that preserves lexicographic sort order. -/// -/// The byte format is designed with a few goals: -/// -/// * Order must be preserved -/// * Serialized representations should be as compact as possible -/// * Type information is *not* serialized with values -/// -/// #### Supported Data Types -/// -/// ##### Unsigned Integers -/// -/// `u8`, `u16`, `u32`, and `u64` are serialized into 1, 2, 4, and 8 bytes of output, respectively. -/// Order is preserved by encoding the bytes in big-endian (most-significant bytes first) format. -/// `usize` is always serialized as if it were `u64`. -/// -/// The `Serializer` also supports variable-length serialization of unsigned integers via the -/// `serialize_var_u64` method. Smaller magnitude values (closer to 0) will encode into fewer -/// bytes. -/// -/// ##### Signed Integers -/// -/// `i8`, `i16`, `i32`, and `i64` are encoded into 1, 2, 4, and 8 bytes of output, respectively. -/// Order is preserved by taking the bitwise complement of the value, and encoding the resulting -/// bytes in big-endian format. `isize` is always serialized as if it were `i64`. -/// -/// The `Serializer` also supports variable-length serialization of signed integers via the -/// `serialize_var_i64` method. Smaller magnitude values (closer to 0) will encode into fewer -/// bytes. -/// -/// ##### Floating Point Numbers -/// -/// `f32` and `f64` are serialized into 4 and 8 bytes of output, respectively. Order is preserved -/// by encoding the value, or the bitwise complement of the value if negative, into bytes in -/// big-endian format. `NAN` values will sort after all other values. In general, it is unwise to -/// use IEEE 754 floating point values in keys, because rounding errors are pervasive. It is -/// typically hard or impossible to use an approximate 'epsilon' approach when using keys for -/// lookup. -/// -/// ##### Characters -/// -/// Characters are serialized into between 1 and 4 bytes of output. The resulting length is -/// equivalent to the result of `char::len_utf8`. -/// -/// ##### Booleans -/// -/// Booleans are serialized into a single byte of output. `false` values will sort before `true` -/// values. -/// -/// ##### Options -/// -/// An optional wrapper type adds a 1 byte overhead to the wrapped data type. `None` values will -/// sort before `Some` values. -/// -/// ##### Structs, Tuples and Fixed-Size Arrays -/// -/// Structs and tuples are serialized by serializing their consituent fields in order with no -/// prefix, suffix, or padding bytes. -/// -/// ##### Enums -/// -/// Enums are encoded with a `u32` variant index tag, plus the consituent fields in the case of an -/// enum-struct. -/// -/// ##### Sequences, Strings and Maps -/// -/// Sequences are ordered from the most significant to the least. Strings are serialized into their -/// natural UTF8 representation. -/// -/// The ordering of sequential elements follows the `Ord` implementation of `slice`, that is, from -/// left to write when viewing a `Vec` printed via the `{:?}` formatter. -/// -/// The caveat with these types is that their length must be known before deserialization. This is -/// because the length is *not* serialized prior to the elements in order to preserve ordering and -/// there is no trivial way to tokenise between sequential elements that 1. does not corrupt -/// ordering and 2. may not confuse tokenisation with following elements of a different type during -/// tuple or struct deserialization. Thus, when deserializing sequences, strings and maps, the -/// process will only be considered complete once the inner `reader` produces an EOF character. -#[derive(Debug)] -pub struct Serializer -where - W: Write, -{ - writer: W, -} - -/// Errors that might occur while serializing. -#[derive(Error, Debug)] -pub enum Error { - #[error("Couldn't setup connection to underlying datastore")] - Message(String), - #[error("Couldn't setup connection to underlying datastore")] - Io(#[from] io::Error), -} - -impl serde::ser::Error for Error { - fn custom(msg: T) -> Self { - Error::Message(msg.to_string()) - } -} - -/// Shorthand for `Result`. -pub type Result = std::result::Result; - -/// Serialize data into a vector of `u8` bytes. -pub fn serialize(v: &T) -> Result> -where - T: Serialize, -{ - let mut bytes = vec![]; - { - let mut buffered = io::BufWriter::new(&mut bytes); - serialize_into(&mut buffered, v)?; - } - Ok(bytes) -} - -/// Serialize data into the given vector of `u8` bytes. -pub fn serialize_into(writer: W, value: &T) -> Result<()> -where - W: Write, - T: Serialize, -{ - let mut serializer = Serializer::new(writer); - value.serialize(&mut serializer) -} - -impl Serializer -where - W: Write, -{ - /// Creates a new ordered bytes encoder whose output will be written to the provided writer. - pub fn new(writer: W) -> Serializer { - Serializer { - writer, - } - } - - /// Encode a `u64` into a variable number of bytes. - /// - /// The variable-length encoding scheme uses between 1 and 9 bytes depending on the value. - /// Smaller magnitude (closer to 0) `u64`s will encode to fewer bytes. - /// - /// ##### Encoding - /// - /// The encoding uses the first 4 bits to store the number of trailing bytes, between 0 and 8. - /// Subsequent bits are the input value in big-endian format with leading 0 bytes removed. - /// - /// ##### Encoded Size - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - ///
rangesize (bytes)
[0, 24)1
[24, 212)2
[212, 220)3
[220, 228)4
[228, 236)5
[236, 244)6
[244, 252)7
[252, 260)8
[260, 264)9
- pub fn serialize_var_u64(&mut self, val: u64) -> Result<()> { - if val < 1 << 4 { - self.writer.write_u8(val as u8) - } else if val < 1 << 12 { - self.writer.write_u16::((val as u16) | 1 << 12) - } else if val < 1 << 20 { - self.writer.write_u8(((val >> 16) as u8) | 2 << 4)?; - self.writer.write_u16::(val as u16) - } else if val < 1 << 28 { - self.writer.write_u32::((val as u32) | 3 << 28) - } else if val < 1 << 36 { - self.writer.write_u8(((val >> 32) as u8) | 4 << 4)?; - self.writer.write_u32::(val as u32) - } else if val < 1 << 44 { - self.writer.write_u16::(((val >> 32) as u16) | 5 << 12)?; - self.writer.write_u32::(val as u32) - } else if val < 1 << 52 { - self.writer.write_u8(((val >> 48) as u8) | 6 << 4)?; - self.writer.write_u16::((val >> 32) as u16)?; - self.writer.write_u32::(val as u32) - } else if val < 1 << 60 { - self.writer.write_u64::((val as u64) | 7 << 60) - } else { - self.writer.write_u8(8 << 4)?; - self.writer.write_u64::(val) - } - .map_err(From::from) - } - - /// Encode an `i64` into a variable number of bytes. - /// - /// The variable-length encoding scheme uses between 1 and 9 bytes depending on the value. - /// Smaller magnitude (closer to 0) `i64`s will encode to fewer bytes. - /// - /// ##### Encoding - /// - /// The encoding uses the first bit to encode the sign: `0` for negative values and `1` for - /// positive values. The following 4 bits store the number of trailing bytes, between 0 and 8. - /// Subsequent bits are the absolute value of the input value in big-endian format with leading - /// 0 bytes removed. If the original value was negative, than 1 is subtracted from the absolute - /// value before encoding. Finally, if the value is negative, all bits except the sign bit are - /// flipped (1s become 0s and 0s become 1s). - /// - /// ##### Encoded Size - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - ///
negative rangepositive rangesize (bytes)
[-23, 0)[0, 23)1
[-211, -23)[23, 211)2
[-219, -211)[211, 219)3
[-227, -219)[219, 227)4
[-235, -227)[227, 235)5
[-243, -235)[235, 243)6
[-251, -243)[243, 251)7
[-259, -251)[251, 259)8
[-263, -259)[259, 263)9
- pub fn serialize_var_i64(&mut self, v: i64) -> Result<()> { - // The mask is 0 for positive input and u64::MAX for negative input - let mask = (v >> 63) as u64; - let val = v.abs() as u64 - (1 & mask); - if val < 1 << 3 { - let masked = (val | (0x10 << 3)) ^ mask; - self.writer.write_u8(masked as u8) - } else if val < 1 << 11 { - let masked = (val | (0x11 << 11)) ^ mask; - self.writer.write_u16::(masked as u16) - } else if val < 1 << 19 { - let masked = (val | (0x12 << 19)) ^ mask; - self.writer.write_u8((masked >> 16) as u8)?; - self.writer.write_u16::(masked as u16) - } else if val < 1 << 27 { - let masked = (val | (0x13 << 27)) ^ mask; - self.writer.write_u32::(masked as u32) - } else if val < 1 << 35 { - let masked = (val | (0x14 << 35)) ^ mask; - self.writer.write_u8((masked >> 32) as u8)?; - self.writer.write_u32::(masked as u32) - } else if val < 1 << 43 { - let masked = (val | (0x15 << 43)) ^ mask; - self.writer.write_u16::((masked >> 32) as u16)?; - self.writer.write_u32::(masked as u32) - } else if val < 1 << 51 { - let masked = (val | (0x16 << 51)) ^ mask; - self.writer.write_u8((masked >> 48) as u8)?; - self.writer.write_u16::((masked >> 32) as u16)?; - self.writer.write_u32::(masked as u32) - } else if val < 1 << 59 { - let masked = (val | (0x17 << 59)) ^ mask; - self.writer.write_u64::(masked as u64) - } else { - self.writer.write_u8((0x18 << 3) ^ mask as u8)?; - self.writer.write_u64::(val ^ mask) - } - .map_err(From::from) - } -} - -impl<'a, W> serde::Serializer for &'a mut Serializer -where - W: Write, -{ - type Ok = (); - type Error = Error; - type SerializeSeq = Self; - type SerializeTuple = Self; - type SerializeTupleStruct = Self; - type SerializeTupleVariant = Self; - type SerializeMap = Self; - type SerializeStruct = Self; - type SerializeStructVariant = Self; - - fn is_human_readable(&self) -> bool { - false - } - - fn serialize_bool(self, v: bool) -> Result<()> { - let b = if v { - 1 - } else { - 0 - }; - self.writer.write_u8(b)?; - Ok(()) - } - - fn serialize_i8(self, v: i8) -> Result<()> { - self.writer.write_i8(v ^ i8::MIN)?; - Ok(()) - } - - fn serialize_i16(self, v: i16) -> Result<()> { - self.writer.write_i16::(v ^ i16::MIN)?; - Ok(()) - } - - fn serialize_i32(self, v: i32) -> Result<()> { - self.writer.write_i32::(v ^ i32::MIN)?; - Ok(()) - } - - fn serialize_i64(self, v: i64) -> Result<()> { - self.writer.write_i64::(v ^ i64::MIN)?; - Ok(()) - } - - fn serialize_u8(self, v: u8) -> Result<()> { - self.writer.write_u8(v)?; - Ok(()) - } - - fn serialize_u16(self, v: u16) -> Result<()> { - self.writer.write_u16::(v)?; - Ok(()) - } - - fn serialize_u32(self, v: u32) -> Result<()> { - self.writer.write_u32::(v)?; - Ok(()) - } - - fn serialize_u64(self, v: u64) -> Result<()> { - self.writer.write_u64::(v)?; - Ok(()) - } - - fn serialize_f32(self, v: f32) -> Result<()> { - let val = v.to_bits() as i32; - let t = (val >> 31) | i32::MIN; - self.writer.write_i32::(val ^ t)?; - Ok(()) - } - - fn serialize_f64(self, v: f64) -> Result<()> { - let val = v.to_bits() as i64; - let t = (val >> 63) | i64::MIN; - self.writer.write_i64::(val ^ t)?; - Ok(()) - } - - fn serialize_char(self, v: char) -> Result<()> { - self.serialize_str(&v.to_string())?; - Ok(()) - } - - fn serialize_str(self, v: &str) -> Result<()> { - self.writer.write_all(v.as_bytes())?; - self.writer.write_u8(0)?; - Ok(()) - } - - fn serialize_bytes(self, v: &[u8]) -> Result<()> { - self.writer.write_all(v)?; - Ok(()) - } - - fn serialize_none(self) -> Result<()> { - self.writer.write_u8(0)?; - Ok(()) - } - - fn serialize_some(self, v: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - self.writer.write_u8(1)?; - v.serialize(self) - } - - fn serialize_unit(self) -> Result<()> { - self.writer.write_all(&[])?; - Ok(()) - } - - fn serialize_unit_struct(self, _name: &'static str) -> Result<()> { - self.serialize_unit() - } - - fn serialize_unit_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - ) -> Result<()> { - self.serialize_u32(variant_index) - } - - fn serialize_newtype_struct(self, _name: &'static str, value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - value.serialize(self) - } - - fn serialize_newtype_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - value: &T, - ) -> Result<()> - where - T: ?Sized + Serialize, - { - self.writer.write_u32::(variant_index)?; - value.serialize(self) - } - - fn serialize_seq(self, _len: Option) -> Result { - Ok(self) - } - - fn serialize_tuple(self, _len: usize) -> Result { - Ok(self) - } - - fn serialize_tuple_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - Ok(self) - } - - fn serialize_tuple_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - self.writer.write_u32::(variant_index)?; - Ok(self) - } - - fn serialize_map(self, _len: Option) -> Result { - Ok(self) - } - - fn serialize_struct(self, _name: &'static str, _len: usize) -> Result { - Ok(self) - } - - fn serialize_struct_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - self.writer.write_u32::(variant_index)?; - Ok(self) - } -} - -// Compound Implementations. - -impl<'a, W> serde::ser::SerializeSeq for &'a mut Serializer -where - W: Write, -{ - type Ok = (); - type Error = Error; - - fn serialize_element(&mut self, value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - value.serialize(&mut **self) - } - - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, W> serde::ser::SerializeTuple for &'a mut Serializer -where - W: Write, -{ - type Ok = (); - type Error = Error; - - fn serialize_element(&mut self, value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - value.serialize(&mut **self) - } - - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, W> serde::ser::SerializeTupleStruct for &'a mut Serializer -where - W: Write, -{ - type Ok = (); - type Error = Error; - - fn serialize_field(&mut self, value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - value.serialize(&mut **self) - } - - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, W> serde::ser::SerializeTupleVariant for &'a mut Serializer -where - W: Write, -{ - type Ok = (); - type Error = Error; - - fn serialize_field(&mut self, value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - value.serialize(&mut **self) - } - - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, W> serde::ser::SerializeMap for &'a mut Serializer -where - W: Write, -{ - type Ok = (); - type Error = Error; - - fn serialize_key(&mut self, key: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - key.serialize(&mut **self) - } - - fn serialize_value(&mut self, value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - value.serialize(&mut **self) - } - - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, W> serde::ser::SerializeStruct for &'a mut Serializer -where - W: Write, -{ - type Ok = (); - type Error = Error; - - fn serialize_field(&mut self, _key: &'static str, value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - value.serialize(&mut **self) - } - - fn end(self) -> Result<()> { - Ok(()) - } -} - -impl<'a, W> serde::ser::SerializeStructVariant for &'a mut Serializer -where - W: Write, -{ - type Ok = (); - type Error = Error; - - fn serialize_field(&mut self, _key: &'static str, value: &T) -> Result<()> - where - T: ?Sized + Serialize, - { - value.serialize(&mut **self) - } - - fn end(self) -> Result<()> { - Ok(()) - } -} diff --git a/lib/src/key/bytes/mod.rs b/lib/src/key/bytes/mod.rs deleted file mode 100644 index 77227e28..00000000 --- a/lib/src/key/bytes/mod.rs +++ /dev/null @@ -1,42 +0,0 @@ -//! Binary encoding for Rust values which preserves lexicographic sort order. Order-preserving -//! encoding is useful for creating keys for sorted key-value stores with byte string typed keys, -//! such as [leveldb](https://github.com/google/leveldb) and -//! [sled](https://github.com/spacejam/sled). -//! -//! `bytekey` is *not* a self-describing format. In other words, Type information is *not* -//! serialized alongside values, and thus the type of serialized data must be known in order to -//! perform deserialization. -//! -//! #### Supported Data Types -//! -//! `bytekey` currently supports all Rust primitives, strings, options, structs, enums, vecs, and -//! tuples. See **Serializer** for details on the serialization format. -//! -//! #### Type Evolution -//! -//! In general, the exact type of a serialized value must be known in order to correctly -//! deserialize it. For structs and enums, the type is effectively frozen once any values of the -//! type have been serialized: changes to the struct or enum will cause deserialization of already -//! serialized values to fail or return incorrect values. The only exception is adding new variants -//! to the end of an existing enum. Enum variants may *not* change type, be removed, or be -//! reordered. All changes to structs, including adding, removing, reordering, or changing the type -//! of a field are forbidden. -//! -//! These restrictions lead to a few best-practices when using `bytekey` serialization: -//! -//! * Don't use `bytekey` unless you need lexicographic ordering of serialized values! A more -//! general encoding library such as [Cap'n Proto](https://github.com/dwrensha/capnproto-rust) or -//! [bincode](https://github.com/TyOverby/binary-encode) will serve you better if this feature is -//! not necessary. -//! * If you persist serialized values for longer than the life of a process (i.e. you write the -//! serialized values to a file or a database), consider using an enum as a top-level wrapper -//! type. This will allow you to seamlessly add a new variant when you need to change the key -//! format in a backwards-compatible manner (the different key types will sort seperately). If -//! your enum has less than 16 variants, then the overhead is just a single byte in serialized -//! output. - -pub mod decode; -pub mod encode; - -pub use self::decode::{deserialize, Deserializer}; -pub use self::encode::{serialize, Serializer}; diff --git a/lib/src/key/database.rs b/lib/src/key/database.rs index f2a25a07..e2b8c08e 100644 --- a/lib/src/key/database.rs +++ b/lib/src/key/database.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Database { diff --git a/lib/src/key/db.rs b/lib/src/key/db.rs index fd19ea5b..049be8d4 100644 --- a/lib/src/key/db.rs +++ b/lib/src/key/db.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Db { diff --git a/lib/src/key/dl.rs b/lib/src/key/dl.rs index 8e2da4c9..d732a39a 100644 --- a/lib/src/key/dl.rs +++ b/lib/src/key/dl.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Dl { diff --git a/lib/src/key/dt.rs b/lib/src/key/dt.rs index 0cb03f95..e858065d 100644 --- a/lib/src/key/dt.rs +++ b/lib/src/key/dt.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Dt { diff --git a/lib/src/key/ev.rs b/lib/src/key/ev.rs index 18c32bc7..50063cf5 100644 --- a/lib/src/key/ev.rs +++ b/lib/src/key/ev.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Ev { diff --git a/lib/src/key/fd.rs b/lib/src/key/fd.rs index 929864ba..895f7e6c 100644 --- a/lib/src/key/fd.rs +++ b/lib/src/key/fd.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Fd { diff --git a/lib/src/key/ft.rs b/lib/src/key/ft.rs index b5d01035..7ea37a85 100644 --- a/lib/src/key/ft.rs +++ b/lib/src/key/ft.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Ft { diff --git a/lib/src/key/index.rs b/lib/src/key/index.rs index 88a6c5f2..ac02df7a 100644 --- a/lib/src/key/index.rs +++ b/lib/src/key/index.rs @@ -1,7 +1,7 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use crate::sql::value::Value; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Index { diff --git a/lib/src/key/ix.rs b/lib/src/key/ix.rs index bbe23df3..c9052712 100644 --- a/lib/src/key/ix.rs +++ b/lib/src/key/ix.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Ix { diff --git a/lib/src/key/key.rs b/lib/src/key/key.rs index 03cb163e..d9d66842 100644 --- a/lib/src/key/key.rs +++ b/lib/src/key/key.rs @@ -1,7 +1,7 @@ use super::*; use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; // Ignore specifies an ignored field pub const IGNORE: &str = "\x00"; diff --git a/lib/src/key/lv.rs b/lib/src/key/lv.rs index a3951bc8..5556cbf0 100644 --- a/lib/src/key/lv.rs +++ b/lib/src/key/lv.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Lv { diff --git a/lib/src/key/mod.rs b/lib/src/key/mod.rs index ecfb1a53..5fe49f75 100644 --- a/lib/src/key/mod.rs +++ b/lib/src/key/mod.rs @@ -1,6 +1,5 @@ pub use self::key::*; -pub mod bytes; pub mod database; pub mod db; pub mod dl; diff --git a/lib/src/key/namespace.rs b/lib/src/key/namespace.rs index d77acc52..ac34d167 100644 --- a/lib/src/key/namespace.rs +++ b/lib/src/key/namespace.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Namespace { diff --git a/lib/src/key/nl.rs b/lib/src/key/nl.rs index bba52631..3c4d46c9 100644 --- a/lib/src/key/nl.rs +++ b/lib/src/key/nl.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Nl { diff --git a/lib/src/key/ns.rs b/lib/src/key/ns.rs index bada44ed..8eb284f4 100644 --- a/lib/src/key/ns.rs +++ b/lib/src/key/ns.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Ns { diff --git a/lib/src/key/nt.rs b/lib/src/key/nt.rs index f92642a7..c4a1b501 100644 --- a/lib/src/key/nt.rs +++ b/lib/src/key/nt.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Nt { diff --git a/lib/src/key/point.rs b/lib/src/key/point.rs index e5cde9a4..850d7eb2 100644 --- a/lib/src/key/point.rs +++ b/lib/src/key/point.rs @@ -1,7 +1,7 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use crate::sql::value::Value; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Point { diff --git a/lib/src/key/sc.rs b/lib/src/key/sc.rs index 7b3aabbe..7a177e5a 100644 --- a/lib/src/key/sc.rs +++ b/lib/src/key/sc.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Sc { diff --git a/lib/src/key/st.rs b/lib/src/key/st.rs index c7890779..5b49ef80 100644 --- a/lib/src/key/st.rs +++ b/lib/src/key/st.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct St { diff --git a/lib/src/key/table.rs b/lib/src/key/table.rs index bf4231ce..57be2c91 100644 --- a/lib/src/key/table.rs +++ b/lib/src/key/table.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Table { diff --git a/lib/src/key/tb.rs b/lib/src/key/tb.rs index 9cac5a0c..7b39f546 100644 --- a/lib/src/key/tb.rs +++ b/lib/src/key/tb.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Tb { diff --git a/lib/src/key/thing.rs b/lib/src/key/thing.rs index 1c443d55..c16fe1bf 100644 --- a/lib/src/key/thing.rs +++ b/lib/src/key/thing.rs @@ -1,6 +1,6 @@ use crate::err::Error; -use crate::key::bytes::{deserialize, serialize}; use serde::{Deserialize, Serialize}; +use storekey::{deserialize, serialize}; #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Serialize, Deserialize)] pub struct Thing {