Make validation and parser functions more robust (#89)

Closes #87
This commit is contained in:
Rushmore Mushambi 2022-09-01 19:46:36 +02:00 committed by GitHub
parent 9dabae1887
commit 04831b1831
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 176 additions and 60 deletions

17
Cargo.lock generated
View file

@ -2,6 +2,15 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "addr"
version = "0.15.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a93b8a41dbe230ad5087cc721f8d41611de654542180586b315d9f4cf6b72bef"
dependencies = [
"psl-types",
]
[[package]]
name = "adler"
version = "1.0.2"
@ -2727,6 +2736,12 @@ dependencies = [
"syn",
]
[[package]]
name = "psl-types"
version = "2.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac"
[[package]]
name = "quick-error"
version = "1.2.3"
@ -3641,6 +3656,7 @@ dependencies = [
name = "surrealdb"
version = "1.0.0-beta.7"
dependencies = [
"addr",
"argon2",
"async-channel",
"async-executor",
@ -3668,6 +3684,7 @@ dependencies = [
"rocksdb",
"rquickjs",
"scrypt",
"semver 1.0.13",
"serde",
"sha-1 0.10.0",
"sha2 0.10.2",

View file

@ -25,6 +25,7 @@ scripting = ["dep:js", "dep:executor"]
http = ["dep:surf"]
[dependencies]
addr = { version = "0.15.6", default-features = false, features = ["std"] }
argon2 = "0.4.1"
async-recursion = "1.0.0"
bigdecimal = { version = "0.3.0", features = ["serde", "string-only"] }
@ -53,6 +54,7 @@ rand = "0.8.5"
regex = "1.6.0"
rocksdb = { version = "0.19.0", optional = true }
scrypt = "0.10.0"
semver = { version = "1.0.13", default-features = false }
serde = { version = "1.0.144", features = ["derive"] }
sha-1 = "0.10.0"
sha2 = "0.10.2"

View file

@ -3,62 +3,172 @@ use crate::err::Error;
use crate::sql::value::Value;
use once_cell::sync::Lazy;
use regex::Regex;
use semver::Version;
use std::char;
use uuid::Uuid;
#[rustfmt::skip] static UUID_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$").unwrap());
#[rustfmt::skip] static USER_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(?i)[a-z0-9.!#$%&'*+/=?^_`{|}~-]+\z").unwrap());
#[rustfmt::skip] static HOST_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)*$").unwrap());
#[rustfmt::skip] static DOMAIN_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^([a-zA-Z0-9_]{1}[a-zA-Z0-9_-]{0,62}){1}(\.[a-zA-Z0-9_]{1}[a-zA-Z0-9_-]{0,62})*[\._]?$",).unwrap());
#[rustfmt::skip] static SEMVER_RE: Lazy<Regex> = Lazy::new(|| Regex::new("^v?(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)\\.(?:0|[1-9]\\d*)(-(0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(\\.(0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\\+[0-9a-zA-Z-]+(\\.[0-9a-zA-Z-]+)*)?$").unwrap());
#[rustfmt::skip] static LATITUDE_RE: Lazy<Regex> = Lazy::new(|| Regex::new("^[-+]?([1-8]?\\d(\\.\\d+)?|90(\\.0+)?)$").unwrap());
#[rustfmt::skip] static LONGITUDE_RE: Lazy<Regex> = Lazy::new(|| Regex::new("^[-+]?([1-8]?\\d(\\.\\d+)?|90(\\.0+)?)$").unwrap());
#[inline]
pub fn alphanum(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
Ok(args.remove(0).as_string().chars().all(char::is_alphanumeric).into())
}
#[inline]
pub fn alpha(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
Ok(args.remove(0).as_string().chars().all(char::is_alphabetic).into())
}
#[inline]
pub fn ascii(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
Ok(args.remove(0).as_string().is_ascii().into())
}
#[inline]
pub fn domain(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
Ok(DOMAIN_RE.is_match(args.remove(0).as_string().as_str()).into())
Ok(addr::parse_domain_name(args.remove(0).as_string().as_str()).is_ok().into())
}
#[inline]
pub fn email(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
Ok(args
.remove(0)
.as_string()
.rsplit_once('@')
.map(|(user, host)| USER_RE.is_match(user) && HOST_RE.is_match(host))
.unwrap_or(false)
.into())
Ok(addr::parse_email_address(args.remove(0).as_string().as_str()).is_ok().into())
}
#[inline]
pub fn hexadecimal(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
Ok(args.remove(0).as_string().chars().all(|x| char::is_ascii_hexdigit(&x)).into())
}
#[inline]
pub fn latitude(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
Ok(LATITUDE_RE.is_match(args.remove(0).as_string().as_str()).into())
}
#[inline]
pub fn longitude(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
Ok(LONGITUDE_RE.is_match(args.remove(0).as_string().as_str()).into())
}
#[inline]
pub fn numeric(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
Ok(args.remove(0).as_string().chars().all(char::is_numeric).into())
}
#[inline]
pub fn semver(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
Ok(SEMVER_RE.is_match(args.remove(0).as_string().as_str()).into())
Ok(Version::parse(args.remove(0).as_string().as_str()).is_ok().into())
}
#[inline]
pub fn uuid(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
Ok(UUID_RE.is_match(args.remove(0).as_string().as_str()).into())
Ok(Uuid::parse_str(args.remove(0).as_string().as_str()).is_ok().into())
}
#[cfg(test)]
mod tests {
use crate::sql::value::Value;
#[test]
fn alphanum() {
let value = super::alphanum(&Default::default(), vec!["abc123".into()]).unwrap();
assert_eq!(value, Value::True);
let value = super::alphanum(&Default::default(), vec!["y%*".into()]).unwrap();
assert_eq!(value, Value::False);
}
#[test]
fn alpha() {
let value = super::alpha(&Default::default(), vec!["abc".into()]).unwrap();
assert_eq!(value, Value::True);
let value = super::alpha(&Default::default(), vec!["1234".into()]).unwrap();
assert_eq!(value, Value::False);
}
#[test]
fn ascii() {
let value = super::ascii(&Default::default(), vec!["abc".into()]).unwrap();
assert_eq!(value, Value::True);
let value = super::ascii(&Default::default(), vec!["中国".into()]).unwrap();
assert_eq!(value, Value::False);
}
#[test]
fn domain() {
let value = super::domain(&Default::default(), vec!["食狮.中国".into()]).unwrap();
assert_eq!(value, Value::True);
let value = super::domain(&Default::default(), vec!["example-.com".into()]).unwrap();
assert_eq!(value, Value::False);
}
#[test]
fn email() {
let input = vec!["user@[fd79:cdcb:38cc:9dd:f686:e06d:32f3:c123]".into()];
let value = super::email(&Default::default(), input).unwrap();
assert_eq!(value, Value::True);
let input = vec!["john..doe@example.com".into()];
let value = super::email(&Default::default(), input).unwrap();
assert_eq!(value, Value::False);
}
#[test]
fn hexadecimal() {
let value = super::hexadecimal(&Default::default(), vec!["00FF00".into()]).unwrap();
assert_eq!(value, Value::True);
let value = super::hexadecimal(&Default::default(), vec!["SurrealDB".into()]).unwrap();
assert_eq!(value, Value::False);
}
#[test]
fn latitude() {
let value = super::latitude(&Default::default(), vec!["-0.118092".into()]).unwrap();
assert_eq!(value, Value::True);
let value = super::latitude(&Default::default(), vec![12345.into()]).unwrap();
assert_eq!(value, Value::False);
}
#[test]
fn longitude() {
let value = super::longitude(&Default::default(), vec!["51.509865".into()]).unwrap();
assert_eq!(value, Value::True);
let value = super::longitude(&Default::default(), vec![12345.into()]).unwrap();
assert_eq!(value, Value::False);
}
#[test]
fn numeric() {
let value = super::numeric(&Default::default(), vec![12345.into()]).unwrap();
assert_eq!(value, Value::True);
let value = super::numeric(&Default::default(), vec!["abcde".into()]).unwrap();
assert_eq!(value, Value::False);
}
#[test]
fn semver() {
let value = super::semver(&Default::default(), vec!["1.0.0".into()]).unwrap();
assert_eq!(value, Value::True);
let value = super::semver(&Default::default(), vec!["1.0".into()]).unwrap();
assert_eq!(value, Value::False);
}
#[test]
fn uuid() {
let input = vec!["123e4567-e89b-12d3-a456-426614174000".into()];
let value = super::uuid(&Default::default(), input).unwrap();
assert_eq!(value, Value::True);
let input = vec!["foo-bar".into()];
let value = super::uuid(&Default::default(), input).unwrap();
assert_eq!(value, Value::False);
}
}

View file

@ -108,7 +108,7 @@ pub fn synchronous(ctx: &Context<'_>, name: &str, args: Vec<Value>) -> Result<Va
"math::trimean" => args::check(ctx, name, args, Args::One, math::trimean),
"math::variance" => args::check(ctx, name, args, Args::One, math::variance),
//
"parse::email::domain" => args::check(ctx, name, args, Args::One, parse::email::domain),
"parse::email::host" => args::check(ctx, name, args, Args::One, parse::email::host),
"parse::email::user" => args::check(ctx, name, args, Args::One, parse::email::user),
"parse::url::domain" => args::check(ctx, name, args, Args::One, parse::url::domain),
"parse::url::fragment" => args::check(ctx, name, args, Args::One, parse::url::fragment),

View file

@ -3,60 +3,47 @@ pub mod email {
use crate::ctx::Context;
use crate::err::Error;
use crate::sql::value::Value;
use once_cell::sync::Lazy;
use regex::Regex;
use addr::email::Host;
#[rustfmt::skip] static USER_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(?i)[a-z0-9.!#$%&'*+/=?^_`{|}~-]+\z").unwrap());
#[rustfmt::skip] static HOST_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)^[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)*$",).unwrap());
pub fn domain(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
pub fn host(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
// Convert to a String
let val = args.remove(0).as_string();
// Check if value is empty
if val.is_empty() {
return Ok(Value::None);
// Parse the email address
match addr::parse_email_address(&val) {
// Return the host part
Ok(v) => match v.host() {
Host::Domain(name) => Ok(name.as_str().into()),
Host::IpAddr(ip_addr) => Ok(ip_addr.to_string().into()),
},
Err(_) => Ok(Value::None),
}
// Ensure the value contains @
if !val.contains('@') {
return Ok(Value::None);
}
// Reverse split the value by @
let parts: Vec<&str> = val.rsplitn(2, '@').collect();
// Check the first part matches
if !USER_RE.is_match(parts[1]) {
return Ok(Value::None);
}
// Check the second part matches
if !HOST_RE.is_match(parts[0]) {
return Ok(Value::None);
}
// Return the domain
Ok(parts[0].into())
}
pub fn user(_: &Context, mut args: Vec<Value>) -> Result<Value, Error> {
// Convert to a String
let val = args.remove(0).as_string();
// Check if value is empty
if val.is_empty() {
return Ok(Value::None);
// Parse the email address
match addr::parse_email_address(&val) {
// Return the user part
Ok(v) => Ok(v.user().into()),
Err(_) => Ok(Value::None),
}
// Ensure the value contains @
if !val.contains('@') {
return Ok(Value::None);
}
// Reverse split the value by @
let parts: Vec<&str> = val.rsplitn(2, '@').collect();
// Check the first part matches
if !USER_RE.is_match(parts[1]) {
return Ok(Value::None);
#[cfg(test)]
mod tests {
#[test]
fn host() {
let input = vec!["john.doe@example.com".into()];
let value = super::host(&Default::default(), input).unwrap();
assert_eq!(value, "example.com".into());
}
// Check the second part matches
if !HOST_RE.is_match(parts[0]) {
return Ok(Value::None);
#[test]
fn user() {
let input = vec!["john.doe@example.com".into()];
let value = super::user(&Default::default(), input).unwrap();
assert_eq!(value, "john.doe".into());
}
// Return the domain
Ok(parts[1].into())
}
}

View file

@ -360,7 +360,7 @@ fn function_math(i: &str) -> IResult<&str, &str> {
fn function_parse(i: &str) -> IResult<&str, &str> {
alt((
tag("parse::email::domain"),
tag("parse::email::host"),
tag("parse::email::user"),
tag("parse::url::domain"),
tag("parse::url::fragment"),