2023-11-20 18:36:21 +00:00
|
|
|
use once_cell::sync::Lazy;
|
2023-12-13 13:37:24 +00:00
|
|
|
use quick_cache::sync::Cache;
|
|
|
|
use quick_cache::GuardResult;
|
2023-08-17 18:03:46 +00:00
|
|
|
use revision::revisioned;
|
2023-04-23 11:55:18 +00:00
|
|
|
use serde::{
|
|
|
|
de::{self, Visitor},
|
|
|
|
Deserialize, Deserializer, Serialize, Serializer,
|
|
|
|
};
|
|
|
|
use std::cmp::Ordering;
|
|
|
|
use std::fmt::Debug;
|
|
|
|
use std::fmt::{self, Display, Formatter};
|
|
|
|
use std::hash::{Hash, Hasher};
|
|
|
|
use std::str::FromStr;
|
2023-11-20 18:36:21 +00:00
|
|
|
use std::{env, str};
|
2020-06-29 15:36:01 +00:00
|
|
|
|
2023-03-30 10:41:44 +00:00
|
|
|
pub(crate) const TOKEN: &str = "$surrealdb::private::sql::Regex";
|
|
|
|
|
2023-04-23 11:55:18 +00:00
|
|
|
#[derive(Clone)]
|
2023-08-17 18:03:46 +00:00
|
|
|
#[revisioned(revision = 1)]
|
2024-04-02 20:12:08 +00:00
|
|
|
#[non_exhaustive]
|
2023-08-17 18:03:46 +00:00
|
|
|
pub struct Regex(pub regex::Regex);
|
2020-06-29 15:36:01 +00:00
|
|
|
|
2023-04-23 11:55:18 +00:00
|
|
|
impl Regex {
|
|
|
|
// Deref would expose `regex::Regex::as_str` which wouldn't have the '/' delimiters.
|
|
|
|
pub fn regex(&self) -> ®ex::Regex {
|
|
|
|
&self.0
|
2020-06-29 15:36:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-11-20 18:36:21 +00:00
|
|
|
fn regex_new(str: &str) -> Result<regex::Regex, regex::Error> {
|
2023-12-13 13:37:24 +00:00
|
|
|
static REGEX_CACHE: Lazy<Cache<String, regex::Regex>> = Lazy::new(|| {
|
2023-11-20 18:36:21 +00:00
|
|
|
let cache_size: usize = env::var("SURREAL_REGEX_CACHE_SIZE")
|
|
|
|
.map_or(1000, |v| v.parse().unwrap_or(1000))
|
|
|
|
.max(10); // The minimum cache size is 10
|
2023-12-13 13:37:24 +00:00
|
|
|
Cache::new(cache_size)
|
2023-11-20 18:36:21 +00:00
|
|
|
});
|
2023-12-13 13:37:24 +00:00
|
|
|
match REGEX_CACHE.get_value_or_guard(str, None) {
|
|
|
|
GuardResult::Value(v) => Ok(v),
|
|
|
|
GuardResult::Guard(g) => {
|
|
|
|
let re = regex::Regex::new(str)?;
|
|
|
|
g.insert(re.clone()).ok();
|
|
|
|
Ok(re)
|
|
|
|
}
|
|
|
|
GuardResult::Timeout => {
|
|
|
|
warn!("Regex cache timeout");
|
|
|
|
regex::Regex::new(str)
|
|
|
|
}
|
2023-11-20 18:36:21 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-23 11:55:18 +00:00
|
|
|
impl FromStr for Regex {
|
|
|
|
type Err = <regex::Regex as FromStr>::Err;
|
|
|
|
|
|
|
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
2023-05-09 17:48:14 +00:00
|
|
|
if s.contains('\0') {
|
|
|
|
Err(regex::Error::Syntax("regex contained NUL byte".to_owned()))
|
|
|
|
} else {
|
2023-11-20 18:36:21 +00:00
|
|
|
regex_new(&s.replace("\\/", "/")).map(Self)
|
2023-05-09 17:48:14 +00:00
|
|
|
}
|
2021-03-29 15:43:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-23 11:55:18 +00:00
|
|
|
impl PartialEq for Regex {
|
|
|
|
fn eq(&self, other: &Self) -> bool {
|
2024-03-01 12:51:32 +00:00
|
|
|
let str_left = self.0.as_str();
|
|
|
|
let str_right = other.0.as_str();
|
|
|
|
str_left == str_right
|
2021-03-29 15:43:37 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-23 11:55:18 +00:00
|
|
|
impl Eq for Regex {}
|
|
|
|
|
|
|
|
impl Ord for Regex {
|
|
|
|
fn cmp(&self, other: &Self) -> Ordering {
|
|
|
|
self.0.as_str().cmp(other.0.as_str())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl PartialOrd for Regex {
|
|
|
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
|
|
|
Some(self.cmp(other))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Hash for Regex {
|
|
|
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
|
|
|
self.0.as_str().hash(state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Debug for Regex {
|
|
|
|
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
|
|
|
Display::fmt(self, f)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Display for Regex {
|
|
|
|
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
|
|
|
write!(f, "/{}/", &self.0)
|
2020-06-29 15:36:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-30 10:41:44 +00:00
|
|
|
impl Serialize for Regex {
|
|
|
|
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
|
|
|
where
|
2023-04-23 11:55:18 +00:00
|
|
|
S: Serializer,
|
2023-03-30 10:41:44 +00:00
|
|
|
{
|
2023-04-29 15:58:22 +00:00
|
|
|
serializer.serialize_newtype_struct(TOKEN, self.0.as_str())
|
2023-03-30 10:41:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-23 11:55:18 +00:00
|
|
|
impl<'de> Deserialize<'de> for Regex {
|
|
|
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
|
|
|
where
|
|
|
|
D: Deserializer<'de>,
|
|
|
|
{
|
2023-06-13 21:13:10 +00:00
|
|
|
struct RegexNewtypeVisitor;
|
2023-04-23 11:55:18 +00:00
|
|
|
|
2023-06-13 21:13:10 +00:00
|
|
|
impl<'de> Visitor<'de> for RegexNewtypeVisitor {
|
2023-04-23 11:55:18 +00:00
|
|
|
type Value = Regex;
|
|
|
|
|
|
|
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
2023-06-13 21:13:10 +00:00
|
|
|
formatter.write_str("a regex newtype")
|
2023-04-23 11:55:18 +00:00
|
|
|
}
|
|
|
|
|
2023-06-13 21:13:10 +00:00
|
|
|
fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
|
2023-04-23 11:55:18 +00:00
|
|
|
where
|
2023-06-13 21:13:10 +00:00
|
|
|
D: Deserializer<'de>,
|
2023-04-23 11:55:18 +00:00
|
|
|
{
|
2023-06-13 21:13:10 +00:00
|
|
|
struct RegexVisitor;
|
|
|
|
|
|
|
|
impl<'de> Visitor<'de> for RegexVisitor {
|
|
|
|
type Value = Regex;
|
|
|
|
|
|
|
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
formatter.write_str("a regex str")
|
|
|
|
}
|
|
|
|
|
|
|
|
fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
|
|
|
|
where
|
|
|
|
E: de::Error,
|
|
|
|
{
|
|
|
|
Regex::from_str(value).map_err(|_| de::Error::custom("invalid regex"))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
deserializer.deserialize_str(RegexVisitor)
|
2023-04-23 11:55:18 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-13 21:13:10 +00:00
|
|
|
deserializer.deserialize_newtype_struct(TOKEN, RegexNewtypeVisitor)
|
2023-04-23 11:55:18 +00:00
|
|
|
}
|
|
|
|
}
|