parent
16bf727870
commit
ef7955c293
1 changed files with 11 additions and 4 deletions
|
@ -8,7 +8,7 @@ use nom::bytes::complete::is_not;
|
||||||
use nom::bytes::complete::take_while_m_n;
|
use nom::bytes::complete::take_while_m_n;
|
||||||
use nom::character::complete::char;
|
use nom::character::complete::char;
|
||||||
use nom::combinator::value;
|
use nom::combinator::value;
|
||||||
use nom::Err::Error;
|
use nom::Err::Failure;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use std::fmt::{self, Display, Formatter};
|
use std::fmt::{self, Display, Formatter};
|
||||||
use std::ops;
|
use std::ops;
|
||||||
|
@ -21,6 +21,8 @@ const SINGLE_ESC: &str = r#"\'"#;
|
||||||
const DOUBLE: char = '"';
|
const DOUBLE: char = '"';
|
||||||
const DOUBLE_ESC: &str = r#"\""#;
|
const DOUBLE_ESC: &str = r#"\""#;
|
||||||
|
|
||||||
|
const SURROGATES: [u32; 2] = [55296, 57343];
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Deserialize, Hash)]
|
#[derive(Clone, Debug, Default, Eq, PartialEq, PartialOrd, Deserialize, Hash)]
|
||||||
pub struct Strand(pub String);
|
pub struct Strand(pub String);
|
||||||
|
|
||||||
|
@ -164,14 +166,19 @@ fn strand_unicode(i: &str) -> IResult<&str, char> {
|
||||||
// We can convert this to u32 as we only have 6 chars
|
// We can convert this to u32 as we only have 6 chars
|
||||||
let v = match u32::from_str_radix(v, 16) {
|
let v = match u32::from_str_radix(v, 16) {
|
||||||
// We found an invalid unicode sequence
|
// We found an invalid unicode sequence
|
||||||
Err(_) => return Err(Error(Parser(i))),
|
Err(_) => return Err(Failure(Parser(i))),
|
||||||
// The unicode sequence was valid
|
// The unicode sequence was valid
|
||||||
Ok(v) => v,
|
Ok(v) => match v {
|
||||||
|
// This is a surrogate, so convert to a space
|
||||||
|
v if v >= SURROGATES[0] && v <= SURROGATES[1] => 32,
|
||||||
|
// This is a valid UTF-8 / UTF-16 character
|
||||||
|
_ => v,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
// We can convert this to char as we know it is valid
|
// We can convert this to char as we know it is valid
|
||||||
let v = match std::char::from_u32(v) {
|
let v = match std::char::from_u32(v) {
|
||||||
// We found an invalid unicode sequence
|
// We found an invalid unicode sequence
|
||||||
None => return Err(Error(Parser(i))),
|
None => return Err(Failure(Parser(i))),
|
||||||
// The unicode sequence was valid
|
// The unicode sequence was valid
|
||||||
Some(v) => v,
|
Some(v) => v,
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue