Improve performance and security of simple JSON parser ()

This commit is contained in:
Finn Bear 2023-04-17 07:39:37 -07:00 committed by GitHub
parent ed20048358
commit c9a9336fdc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 124 additions and 36 deletions

View file

@ -220,6 +220,7 @@ impl Datastore {
_ => unreachable!(),
};
#[allow(unreachable_code)]
Ok(Transaction {
inner,
cache: super::cache::Cache::default(),

View file

@ -393,7 +393,7 @@ impl Uniq<Array> for Array {
pub fn array(i: &str) -> IResult<&str, Array> {
let (i, _) = char('[')(i)?;
let (i, _) = mightbespace(i)?;
let (i, v) = separated_list0(commas, item)(i)?;
let (i, v) = separated_list0(commas, value)(i)?;
let (i, _) = mightbespace(i)?;
let (i, _) = opt(char(','))(i)?;
let (i, _) = mightbespace(i)?;
@ -401,11 +401,6 @@ pub fn array(i: &str) -> IResult<&str, Array> {
Ok((i, Array(v)))
}
fn item(i: &str) -> IResult<&str, Value> {
let (i, v) = value(i)?;
Ok((i, v))
}
#[cfg(test)]
mod tests {

View file

@ -189,7 +189,14 @@ impl Serialize for Object {
pub fn object(i: &str) -> IResult<&str, Object> {
let (i, _) = char('{')(i)?;
let (i, _) = mightbespace(i)?;
let (i, v) = separated_list0(commas, item)(i)?;
let (i, v) = separated_list0(commas, |i| {
let (i, k) = key(i)?;
let (i, _) = mightbespace(i)?;
let (i, _) = char(':')(i)?;
let (i, _) = mightbespace(i)?;
let (i, v) = value(i)?;
Ok((i, (String::from(k), v)))
})(i)?;
let (i, _) = mightbespace(i)?;
let (i, _) = opt(char(','))(i)?;
let (i, _) = mightbespace(i)?;
@ -197,16 +204,7 @@ pub fn object(i: &str) -> IResult<&str, Object> {
Ok((i, Object(v.into_iter().collect())))
}
fn item(i: &str) -> IResult<&str, (String, Value)> {
let (i, k) = key(i)?;
let (i, _) = mightbespace(i)?;
let (i, _) = char(':')(i)?;
let (i, _) = mightbespace(i)?;
let (i, v) = value(i)?;
Ok((i, (String::from(k), v)))
}
fn key(i: &str) -> IResult<&str, &str> {
pub fn key(i: &str) -> IResult<&str, &str> {
alt((key_none, key_single, key_double))(i)
}

View file

@ -20,7 +20,13 @@ pub fn thing(input: &str) -> Result<Thing, Error> {
parse_impl(input, super::thing::thing)
}
/// Parses a SurrealQL [`Value`]
/// Parses a SurrealQL [`Value`].
#[instrument(name = "parser", skip_all, fields(length = input.len()))]
pub fn value(input: &str) -> Result<Value, Error> {
parse_impl(input, super::value::value)
}
/// Parses JSON into an inert SurrealQL [`Value`]
#[instrument(name = "parser", skip_all, fields(length = input.len()))]
pub fn json(input: &str) -> Result<Value, Error> {
parse_impl(input, super::value::json)
@ -105,6 +111,8 @@ fn locate<'a>(input: &str, tried: &'a str) -> (&'a str, usize, usize) {
mod tests {
use super::*;
use serde::Serialize;
use std::{collections::HashMap, time::Instant};
#[test]
fn no_ending() {
@ -174,4 +182,58 @@ mod tests {
let dec: Query = Query::from(enc);
assert_eq!(tmp, dec);
}
#[test]
#[cfg_attr(debug_assertions, ignore)]
fn json_benchmark() {
// From the top level of the repository,
// cargo test sql::parser::tests::json_benchmark --package surrealdb --lib --release -- --nocapture --exact
#[derive(Clone, Serialize)]
struct Data {
boolean: bool,
integer: i32,
decimal: f32,
string: String,
inner: Option<Box<Self>>,
inners: Vec<Self>,
inner_map: HashMap<String, Self>,
}
let inner = Data {
boolean: true,
integer: -1,
decimal: 0.5,
string: "foo".to_owned(),
inner: None,
inners: Vec::new(),
inner_map: HashMap::new(),
};
let inners = vec![inner.clone(); 10];
let data = Data {
boolean: false,
integer: 42,
decimal: 9000.0,
string: "SurrealDB".to_owned(),
inner_map: inners.iter().enumerate().map(|(i, d)| (i.to_string(), d.clone())).collect(),
inners,
inner: Some(Box::new(inner)),
};
let json = serde_json::to_string(&data).unwrap();
let json_pretty = serde_json::to_string_pretty(&data).unwrap();
let benchmark = |de: fn(&str) -> Value| {
let time = Instant::now();
const ITERATIONS: u32 = 32;
for _ in 0..ITERATIONS {
std::hint::black_box(de(std::hint::black_box(&json)));
std::hint::black_box(de(std::hint::black_box(&json_pretty)));
}
time.elapsed().as_secs_f32() / (2 * ITERATIONS) as f32
};
println!("sql::json took {:.10}s/iter", benchmark(|s| crate::sql::json(s).unwrap()));
}
}

View file

@ -7,6 +7,7 @@ use crate::err::Error;
use crate::sql::array::{array, Array};
use crate::sql::block::{block, Block};
use crate::sql::bytes::Bytes;
use crate::sql::comment::mightbespace;
use crate::sql::common::commas;
use crate::sql::constant::{constant, Constant};
use crate::sql::datetime::{datetime, Datetime};
@ -23,7 +24,7 @@ use crate::sql::idiom::{self, Idiom};
use crate::sql::kind::Kind;
use crate::sql::model::{model, Model};
use crate::sql::number::{number, Number};
use crate::sql::object::{object, Object};
use crate::sql::object::{key, object, Object};
use crate::sql::operation::Operation;
use crate::sql::param::{param, Param};
use crate::sql::part::Part;
@ -44,7 +45,9 @@ use fuzzy_matcher::FuzzyMatcher;
use geo::Point;
use nom::branch::alt;
use nom::bytes::complete::tag_no_case;
use nom::combinator::map;
use nom::character::complete::char;
use nom::combinator::{map, opt};
use nom::multi::separated_list0;
use nom::multi::separated_list1;
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
@ -1676,12 +1679,41 @@ pub fn what(i: &str) -> IResult<&str, Value> {
/// Used to parse any simple JSON-like value
pub fn json(i: &str) -> IResult<&str, Value> {
// Use a specific parser for JSON objects
pub fn object(i: &str) -> IResult<&str, Object> {
let (i, _) = char('{')(i)?;
let (i, _) = mightbespace(i)?;
let (i, v) = separated_list0(commas, |i| {
let (i, k) = key(i)?;
let (i, _) = mightbespace(i)?;
let (i, _) = char(':')(i)?;
let (i, _) = mightbespace(i)?;
let (i, v) = json(i)?;
Ok((i, (String::from(k), v)))
})(i)?;
let (i, _) = mightbespace(i)?;
let (i, _) = opt(char(','))(i)?;
let (i, _) = mightbespace(i)?;
let (i, _) = char('}')(i)?;
Ok((i, Object(v.into_iter().collect())))
}
// Use a specific parser for JSON arrays
pub fn array(i: &str) -> IResult<&str, Array> {
let (i, _) = char('[')(i)?;
let (i, _) = mightbespace(i)?;
let (i, v) = separated_list0(commas, json)(i)?;
let (i, _) = mightbespace(i)?;
let (i, _) = opt(char(','))(i)?;
let (i, _) = mightbespace(i)?;
let (i, _) = char(']')(i)?;
Ok((i, Array(v)))
}
// Parse any simple JSON-like value
alt((
map(tag_no_case("NULL"), |_| Value::Null),
map(tag_no_case("true"), |_| Value::True),
map(tag_no_case("false"), |_| Value::False),
map(tag_no_case("null".as_bytes()), |_| Value::Null),
map(tag_no_case("true".as_bytes()), |_| Value::True),
map(tag_no_case("false".as_bytes()), |_| Value::False),
map(datetime, Value::from),
map(duration, Value::from),
map(geometry, Value::from),
map(unique, Value::from),
map(number, Value::from),

View file

@ -1,5 +1,5 @@
use surrealdb::sql::json;
use surrealdb::sql::thing;
use surrealdb::sql::value;
use surrealdb::sql::Thing;
use surrealdb::sql::Value;
@ -9,7 +9,7 @@ pub trait Parse<T> {
impl Parse<Value> for Value {
fn parse(val: &str) -> Value {
json(val).unwrap()
value(val).unwrap()
}
}

View file

@ -179,7 +179,7 @@ async fn create_all(
// Convert the HTTP request body
let data = bytes_to_utf8(&body)?;
// Parse the request body as JSON
match surrealdb::sql::json(data) {
match surrealdb::sql::value(data) {
Ok(data) => {
// Specify the request statement
let sql = "CREATE type::table($table) CONTENT $data";
@ -260,7 +260,7 @@ async fn select_one(
// Specify the request statement
let sql = "SELECT * FROM type::thing($table, $id)";
// Parse the Record ID as a SurrealQL value
let rid = match surrealdb::sql::json(&id) {
let rid = match surrealdb::sql::value(&id) {
Ok(id) => id,
Err(_) => Value::from(id),
};
@ -301,12 +301,12 @@ async fn create_one(
// Convert the HTTP request body
let data = bytes_to_utf8(&body)?;
// Parse the Record ID as a SurrealQL value
let rid = match surrealdb::sql::json(&id) {
let rid = match surrealdb::sql::value(&id) {
Ok(id) => id,
Err(_) => Value::from(id),
};
// Parse the request body as JSON
match surrealdb::sql::json(data) {
match surrealdb::sql::value(data) {
Ok(data) => {
// Specify the request statement
let sql = "CREATE type::thing($table, $id) CONTENT $data";
@ -352,12 +352,12 @@ async fn update_one(
// Convert the HTTP request body
let data = bytes_to_utf8(&body)?;
// Parse the Record ID as a SurrealQL value
let rid = match surrealdb::sql::json(&id) {
let rid = match surrealdb::sql::value(&id) {
Ok(id) => id,
Err(_) => Value::from(id),
};
// Parse the request body as JSON
match surrealdb::sql::json(data) {
match surrealdb::sql::value(data) {
Ok(data) => {
// Specify the request statement
let sql = "UPDATE type::thing($table, $id) CONTENT $data";
@ -403,12 +403,12 @@ async fn modify_one(
// Convert the HTTP request body
let data = bytes_to_utf8(&body)?;
// Parse the Record ID as a SurrealQL value
let rid = match surrealdb::sql::json(&id) {
let rid = match surrealdb::sql::value(&id) {
Ok(id) => id,
Err(_) => Value::from(id),
};
// Parse the request body as JSON
match surrealdb::sql::json(data) {
match surrealdb::sql::value(data) {
Ok(data) => {
// Specify the request statement
let sql = "UPDATE type::thing($table, $id) MERGE $data";
@ -453,7 +453,7 @@ async fn delete_one(
// Specify the request statement
let sql = "DELETE type::thing($table, $id) RETURN BEFORE";
// Parse the Record ID as a SurrealQL value
let rid = match surrealdb::sql::json(&id) {
let rid = match surrealdb::sql::value(&id) {
Ok(id) => id,
Err(_) => Value::from(id),
};

View file

@ -193,7 +193,7 @@ impl Rpc {
// This won't panic due to the check above
let val = m.to_str().unwrap();
// Parse the SurrealQL object
match surrealdb::sql::json(val) {
match surrealdb::sql::value(val) {
// The SurrealQL message parsed ok
Ok(v) => v,
// The SurrealQL message failed to parse