2023-05-10 02:08:09 +00:00
|
|
|
use crate::sql::comment::shouldbespace;
|
|
|
|
use crate::sql::common::commas;
|
|
|
|
use crate::sql::error::IResult;
|
|
|
|
use nom::branch::alt;
|
|
|
|
use nom::bytes::complete::tag_no_case;
|
|
|
|
use nom::combinator::map;
|
|
|
|
use nom::multi::separated_list1;
|
2023-08-17 18:03:46 +00:00
|
|
|
use revision::revisioned;
|
2023-05-10 02:08:09 +00:00
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
use std::fmt;
|
|
|
|
use std::fmt::Display;
|
|
|
|
|
|
|
|
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, Hash)]
|
2023-08-17 18:03:46 +00:00
|
|
|
#[revisioned(revision = 1)]
|
2023-05-10 02:08:09 +00:00
|
|
|
pub enum Tokenizer {
|
2023-06-19 18:41:13 +00:00
|
|
|
Blank,
|
|
|
|
Camel,
|
|
|
|
Class,
|
|
|
|
Punct,
|
2023-05-10 02:08:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Display for Tokenizer {
|
|
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
|
|
f.write_str(match self {
|
2023-06-19 18:41:13 +00:00
|
|
|
Self::Blank => "BLANK",
|
|
|
|
Self::Camel => "CAMEL",
|
|
|
|
Self::Class => "CLASS",
|
|
|
|
Self::Punct => "PUNCT",
|
2023-05-10 02:08:09 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn tokenizer(i: &str) -> IResult<&str, Tokenizer> {
|
|
|
|
let (i, t) = alt((
|
2023-06-19 18:41:13 +00:00
|
|
|
map(tag_no_case("BLANK"), |_| Tokenizer::Blank),
|
|
|
|
map(tag_no_case("CAMEL"), |_| Tokenizer::Camel),
|
|
|
|
map(tag_no_case("CLASS"), |_| Tokenizer::Class),
|
|
|
|
map(tag_no_case("PUNCT"), |_| Tokenizer::Punct),
|
2023-05-10 02:08:09 +00:00
|
|
|
))(i)?;
|
|
|
|
Ok((i, t))
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(super) fn tokenizers(i: &str) -> IResult<&str, Vec<Tokenizer>> {
|
|
|
|
let (i, _) = tag_no_case("TOKENIZERS")(i)?;
|
|
|
|
let (i, _) = shouldbespace(i)?;
|
|
|
|
let (i, t) = separated_list1(commas, tokenizer)(i)?;
|
|
|
|
Ok((i, t))
|
|
|
|
}
|