From 04831b183198c2f20f48d7e9d76718dd5bd526ee Mon Sep 17 00:00:00 2001 From: Rushmore Mushambi Date: Thu, 1 Sep 2022 19:46:36 +0200 Subject: [PATCH] Make validation and parser functions more robust (#89) Closes #87 --- Cargo.lock | 17 +++++ lib/Cargo.toml | 2 + lib/src/fnc/is.rs | 140 +++++++++++++++++++++++++++++++++++----- lib/src/fnc/mod.rs | 2 +- lib/src/fnc/parse.rs | 73 +++++++++------------ lib/src/sql/function.rs | 2 +- 6 files changed, 176 insertions(+), 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3ac6a973..cb2d45db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,15 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a93b8a41dbe230ad5087cc721f8d41611de654542180586b315d9f4cf6b72bef" +dependencies = [ + "psl-types", +] + [[package]] name = "adler" version = "1.0.2" @@ -2727,6 +2736,12 @@ dependencies = [ "syn", ] +[[package]] +name = "psl-types" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac" + [[package]] name = "quick-error" version = "1.2.3" @@ -3641,6 +3656,7 @@ dependencies = [ name = "surrealdb" version = "1.0.0-beta.7" dependencies = [ + "addr", "argon2", "async-channel", "async-executor", @@ -3668,6 +3684,7 @@ dependencies = [ "rocksdb", "rquickjs", "scrypt", + "semver 1.0.13", "serde", "sha-1 0.10.0", "sha2 0.10.2", diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 78c3d701..e0dd9ddc 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -25,6 +25,7 @@ scripting = ["dep:js", "dep:executor"] http = ["dep:surf"] [dependencies] +addr = { version = "0.15.6", default-features = false, features = ["std"] } argon2 = "0.4.1" async-recursion = "1.0.0" bigdecimal = { version = "0.3.0", features = ["serde", "string-only"] } @@ -53,6 +54,7 @@ rand = "0.8.5" regex = "1.6.0" rocksdb = { version = "0.19.0", optional = true } scrypt = "0.10.0" +semver = { version = "1.0.13", default-features = false } serde = { version = "1.0.144", features = ["derive"] } sha-1 = "0.10.0" sha2 = "0.10.2" diff --git a/lib/src/fnc/is.rs b/lib/src/fnc/is.rs index 290fb238..e5bcd1e2 100644 --- a/lib/src/fnc/is.rs +++ b/lib/src/fnc/is.rs @@ -3,62 +3,172 @@ use crate::err::Error; use crate::sql::value::Value; use once_cell::sync::Lazy; use regex::Regex; +use semver::Version; use std::char; +use uuid::Uuid; -#[rustfmt::skip] static UUID_RE: Lazy = Lazy::new(|| Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$").unwrap()); -#[rustfmt::skip] static USER_RE: Lazy = Lazy::new(|| Regex::new(r"^(?i)[a-z0-9.!#$%&'*+/=?^_`{|}~-]+\z").unwrap()); -#[rustfmt::skip] static HOST_RE: Lazy = Lazy::new(|| Regex::new(r"(?i)^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)*$").unwrap()); -#[rustfmt::skip] static DOMAIN_RE: Lazy = Lazy::new(|| Regex::new(r"^([a-zA-Z0-9_]{1}[a-zA-Z0-9_-]{0,62}){1}(\.[a-zA-Z0-9_]{1}[a-zA-Z0-9_-]{0,62})*[\._]?$",).unwrap()); -#[rustfmt::skip] static SEMVER_RE: Lazy = Lazy::new(|| Regex::new("^v?(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)(-(0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(\\.(0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\\+[0-9a-zA-Z-]+(\\.[0-9a-zA-Z-]+)*)?$").unwrap()); #[rustfmt::skip] static LATITUDE_RE: Lazy = Lazy::new(|| Regex::new("^[-+]?([1-8]?\\d(\\.\\d+)?|90(\\.0+)?)$").unwrap()); #[rustfmt::skip] static LONGITUDE_RE: Lazy = Lazy::new(|| Regex::new("^[-+]?([1-8]?\\d(\\.\\d+)?|90(\\.0+)?)$").unwrap()); +#[inline] pub fn alphanum(_: &Context, mut args: Vec) -> Result { Ok(args.remove(0).as_string().chars().all(char::is_alphanumeric).into()) } +#[inline] pub fn alpha(_: &Context, mut args: Vec) -> Result { Ok(args.remove(0).as_string().chars().all(char::is_alphabetic).into()) } +#[inline] pub fn ascii(_: &Context, mut args: Vec) -> Result { Ok(args.remove(0).as_string().is_ascii().into()) } +#[inline] pub fn domain(_: &Context, mut args: Vec) -> Result { - Ok(DOMAIN_RE.is_match(args.remove(0).as_string().as_str()).into()) + Ok(addr::parse_domain_name(args.remove(0).as_string().as_str()).is_ok().into()) } +#[inline] pub fn email(_: &Context, mut args: Vec) -> Result { - Ok(args - .remove(0) - .as_string() - .rsplit_once('@') - .map(|(user, host)| USER_RE.is_match(user) && HOST_RE.is_match(host)) - .unwrap_or(false) - .into()) + Ok(addr::parse_email_address(args.remove(0).as_string().as_str()).is_ok().into()) } +#[inline] pub fn hexadecimal(_: &Context, mut args: Vec) -> Result { Ok(args.remove(0).as_string().chars().all(|x| char::is_ascii_hexdigit(&x)).into()) } +#[inline] pub fn latitude(_: &Context, mut args: Vec) -> Result { Ok(LATITUDE_RE.is_match(args.remove(0).as_string().as_str()).into()) } +#[inline] pub fn longitude(_: &Context, mut args: Vec) -> Result { Ok(LONGITUDE_RE.is_match(args.remove(0).as_string().as_str()).into()) } +#[inline] pub fn numeric(_: &Context, mut args: Vec) -> Result { Ok(args.remove(0).as_string().chars().all(char::is_numeric).into()) } +#[inline] pub fn semver(_: &Context, mut args: Vec) -> Result { - Ok(SEMVER_RE.is_match(args.remove(0).as_string().as_str()).into()) + Ok(Version::parse(args.remove(0).as_string().as_str()).is_ok().into()) } +#[inline] pub fn uuid(_: &Context, mut args: Vec) -> Result { - Ok(UUID_RE.is_match(args.remove(0).as_string().as_str()).into()) + Ok(Uuid::parse_str(args.remove(0).as_string().as_str()).is_ok().into()) +} + +#[cfg(test)] +mod tests { + use crate::sql::value::Value; + + #[test] + fn alphanum() { + let value = super::alphanum(&Default::default(), vec!["abc123".into()]).unwrap(); + assert_eq!(value, Value::True); + + let value = super::alphanum(&Default::default(), vec!["y%*".into()]).unwrap(); + assert_eq!(value, Value::False); + } + + #[test] + fn alpha() { + let value = super::alpha(&Default::default(), vec!["abc".into()]).unwrap(); + assert_eq!(value, Value::True); + + let value = super::alpha(&Default::default(), vec!["1234".into()]).unwrap(); + assert_eq!(value, Value::False); + } + + #[test] + fn ascii() { + let value = super::ascii(&Default::default(), vec!["abc".into()]).unwrap(); + assert_eq!(value, Value::True); + + let value = super::ascii(&Default::default(), vec!["中国".into()]).unwrap(); + assert_eq!(value, Value::False); + } + + #[test] + fn domain() { + let value = super::domain(&Default::default(), vec!["食狮.中国".into()]).unwrap(); + assert_eq!(value, Value::True); + + let value = super::domain(&Default::default(), vec!["example-.com".into()]).unwrap(); + assert_eq!(value, Value::False); + } + + #[test] + fn email() { + let input = vec!["user@[fd79:cdcb:38cc:9dd:f686:e06d:32f3:c123]".into()]; + let value = super::email(&Default::default(), input).unwrap(); + assert_eq!(value, Value::True); + + let input = vec!["john..doe@example.com".into()]; + let value = super::email(&Default::default(), input).unwrap(); + assert_eq!(value, Value::False); + } + + #[test] + fn hexadecimal() { + let value = super::hexadecimal(&Default::default(), vec!["00FF00".into()]).unwrap(); + assert_eq!(value, Value::True); + + let value = super::hexadecimal(&Default::default(), vec!["SurrealDB".into()]).unwrap(); + assert_eq!(value, Value::False); + } + + #[test] + fn latitude() { + let value = super::latitude(&Default::default(), vec!["-0.118092".into()]).unwrap(); + assert_eq!(value, Value::True); + + let value = super::latitude(&Default::default(), vec![12345.into()]).unwrap(); + assert_eq!(value, Value::False); + } + + #[test] + fn longitude() { + let value = super::longitude(&Default::default(), vec!["51.509865".into()]).unwrap(); + assert_eq!(value, Value::True); + + let value = super::longitude(&Default::default(), vec![12345.into()]).unwrap(); + assert_eq!(value, Value::False); + } + + #[test] + fn numeric() { + let value = super::numeric(&Default::default(), vec![12345.into()]).unwrap(); + assert_eq!(value, Value::True); + + let value = super::numeric(&Default::default(), vec!["abcde".into()]).unwrap(); + assert_eq!(value, Value::False); + } + + #[test] + fn semver() { + let value = super::semver(&Default::default(), vec!["1.0.0".into()]).unwrap(); + assert_eq!(value, Value::True); + + let value = super::semver(&Default::default(), vec!["1.0".into()]).unwrap(); + assert_eq!(value, Value::False); + } + + #[test] + fn uuid() { + let input = vec!["123e4567-e89b-12d3-a456-426614174000".into()]; + let value = super::uuid(&Default::default(), input).unwrap(); + assert_eq!(value, Value::True); + + let input = vec!["foo-bar".into()]; + let value = super::uuid(&Default::default(), input).unwrap(); + assert_eq!(value, Value::False); + } } diff --git a/lib/src/fnc/mod.rs b/lib/src/fnc/mod.rs index 54ffdb76..79972645 100644 --- a/lib/src/fnc/mod.rs +++ b/lib/src/fnc/mod.rs @@ -108,7 +108,7 @@ pub fn synchronous(ctx: &Context<'_>, name: &str, args: Vec) -> Result args::check(ctx, name, args, Args::One, math::trimean), "math::variance" => args::check(ctx, name, args, Args::One, math::variance), // - "parse::email::domain" => args::check(ctx, name, args, Args::One, parse::email::domain), + "parse::email::host" => args::check(ctx, name, args, Args::One, parse::email::host), "parse::email::user" => args::check(ctx, name, args, Args::One, parse::email::user), "parse::url::domain" => args::check(ctx, name, args, Args::One, parse::url::domain), "parse::url::fragment" => args::check(ctx, name, args, Args::One, parse::url::fragment), diff --git a/lib/src/fnc/parse.rs b/lib/src/fnc/parse.rs index 1e7f13aa..3ba021f6 100644 --- a/lib/src/fnc/parse.rs +++ b/lib/src/fnc/parse.rs @@ -3,60 +3,47 @@ pub mod email { use crate::ctx::Context; use crate::err::Error; use crate::sql::value::Value; - use once_cell::sync::Lazy; - use regex::Regex; + use addr::email::Host; - #[rustfmt::skip] static USER_RE: Lazy = Lazy::new(|| Regex::new(r"^(?i)[a-z0-9.!#$%&'*+/=?^_`{|}~-]+\z").unwrap()); - #[rustfmt::skip] static HOST_RE: Lazy = Lazy::new(|| Regex::new(r"(?i)^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)*$",).unwrap()); - - pub fn domain(_: &Context, mut args: Vec) -> Result { + pub fn host(_: &Context, mut args: Vec) -> Result { // Convert to a String let val = args.remove(0).as_string(); - // Check if value is empty - if val.is_empty() { - return Ok(Value::None); + // Parse the email address + match addr::parse_email_address(&val) { + // Return the host part + Ok(v) => match v.host() { + Host::Domain(name) => Ok(name.as_str().into()), + Host::IpAddr(ip_addr) => Ok(ip_addr.to_string().into()), + }, + Err(_) => Ok(Value::None), } - // Ensure the value contains @ - if !val.contains('@') { - return Ok(Value::None); - } - // Reverse split the value by @ - let parts: Vec<&str> = val.rsplitn(2, '@').collect(); - // Check the first part matches - if !USER_RE.is_match(parts[1]) { - return Ok(Value::None); - } - // Check the second part matches - if !HOST_RE.is_match(parts[0]) { - return Ok(Value::None); - } - // Return the domain - Ok(parts[0].into()) } pub fn user(_: &Context, mut args: Vec) -> Result { - // Convert to a String let val = args.remove(0).as_string(); - // Check if value is empty - if val.is_empty() { - return Ok(Value::None); + // Parse the email address + match addr::parse_email_address(&val) { + // Return the user part + Ok(v) => Ok(v.user().into()), + Err(_) => Ok(Value::None), } - // Ensure the value contains @ - if !val.contains('@') { - return Ok(Value::None); + } + + #[cfg(test)] + mod tests { + #[test] + fn host() { + let input = vec!["john.doe@example.com".into()]; + let value = super::host(&Default::default(), input).unwrap(); + assert_eq!(value, "example.com".into()); } - // Reverse split the value by @ - let parts: Vec<&str> = val.rsplitn(2, '@').collect(); - // Check the first part matches - if !USER_RE.is_match(parts[1]) { - return Ok(Value::None); + + #[test] + fn user() { + let input = vec!["john.doe@example.com".into()]; + let value = super::user(&Default::default(), input).unwrap(); + assert_eq!(value, "john.doe".into()); } - // Check the second part matches - if !HOST_RE.is_match(parts[0]) { - return Ok(Value::None); - } - // Return the domain - Ok(parts[1].into()) } } diff --git a/lib/src/sql/function.rs b/lib/src/sql/function.rs index 2f6ada45..1fa593cf 100644 --- a/lib/src/sql/function.rs +++ b/lib/src/sql/function.rs @@ -360,7 +360,7 @@ fn function_math(i: &str) -> IResult<&str, &str> { fn function_parse(i: &str) -> IResult<&str, &str> { alt(( - tag("parse::email::domain"), + tag("parse::email::host"), tag("parse::email::user"), tag("parse::url::domain"), tag("parse::url::fragment"),