From df07bb32f593ecd7d8c2f26091dab026bad56b19 Mon Sep 17 00:00:00 2001 From: Finn Bear Date: Tue, 25 Apr 2023 14:58:53 -0700 Subject: [PATCH] Bugfix - make string::slice properly handle UTF-8 and reallocate less. (#1854) --- lib/src/fnc/string.rs | 64 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/lib/src/fnc/string.rs b/lib/src/fnc/string.rs index 900bedd4..52c7aba9 100644 --- a/lib/src/fnc/string.rs +++ b/lib/src/fnc/string.rs @@ -52,21 +52,33 @@ pub fn reverse((string,): (String,)) -> Result { } pub fn slice((val, beg, lim): (String, Option, Option)) -> Result { - let val = match beg { - Some(v) if v < 0 => { - val.chars().skip(val.len().saturating_sub(v.unsigned_abs())).collect::() + // Only count the chars if we need to and only do it once. + let mut char_count = usize::MAX; + let mut count_chars = || { + if char_count == usize::MAX { + char_count = val.chars().count(); } - Some(v) => val.chars().skip(v as usize).collect::(), - None => val, + char_count }; - let val = match lim { - Some(v) if v < 0 => { - val.chars().take(val.len().saturating_sub(v.unsigned_abs())).collect::() - } - Some(v) => val.chars().take(v as usize).collect::(), - None => val, + + let skip = match beg { + Some(v) if v < 0 => count_chars().saturating_sub(v.unsigned_abs()), + Some(v) => v as usize, + None => 0, }; - Ok(val.into()) + + let take = match lim { + Some(v) if v < 0 => count_chars().saturating_sub(skip).saturating_sub(v.unsigned_abs()), + Some(v) => v as usize, + None => usize::MAX, + }; + + Ok(if skip > 0 || take < usize::MAX { + val.chars().skip(skip).take(take).collect::() + } else { + val + } + .into()) } pub fn slug((string,): (String,)) -> Result { @@ -92,3 +104,31 @@ pub fn uppercase((string,): (String,)) -> Result { pub fn words((string,): (String,)) -> Result { Ok(string.split_whitespace().collect::>().into()) } + +#[cfg(test)] +mod tests { + use super::slice; + use crate::sql::Value; + + #[test] + fn string_slice() { + fn test(initial: &str, beg: Option, end: Option, expected: &str) { + assert_eq!(slice((initial.to_owned(), beg, end)).unwrap(), Value::from(expected)); + } + + let string = "abcdefg"; + test(string, None, None, string); + test(string, Some(2), None, &string[2..]); + test(string, Some(2), Some(3), &string[2..5]); + test(string, Some(2), Some(-1), "cdef"); + test(string, Some(-2), None, "fg"); + test(string, Some(-4), Some(2), "de"); + test(string, Some(-4), Some(-1), "def"); + + let string = "你好世界"; + test(string, None, None, string); + test(string, Some(1), None, "好世界"); + test(string, Some(-1), None, "界"); + test(string, Some(-2), Some(1), "世"); + } +}