MVP-ish parser. AST and error handling need to be improved further

This commit is contained in:
Ermia Behzadifar 2024-11-17 03:33:54 +01:00
parent 3932bbff43
commit 05b7c70bc6
3 changed files with 78 additions and 57 deletions

View file

@ -7,11 +7,6 @@ pub struct Component {
pub enum Element { pub enum Element {
Html(HtmlTag), Html(HtmlTag),
Block(CodeBlock), Block(CodeBlock),
}
#[derive(Debug, Clone)]
pub enum HtmlContent {
Element(Element),
Text(String) Text(String)
} }
@ -19,19 +14,13 @@ pub enum HtmlContent {
pub struct HtmlTag { pub struct HtmlTag {
pub tag: String, pub tag: String,
pub attributes: Vec<Attribute>, pub attributes: Vec<Attribute>,
pub content: Vec<HtmlContent>, pub content: Vec<Element>,
}
#[derive(Debug, Clone)]
pub enum TextOrCode {
Text(String),
Code(CodeBlock),
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Attribute { pub struct Attribute {
pub name: TextOrCode, pub name: Element,
pub value: Option<TextOrCode>, pub value: Option<Element>,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]

View file

@ -1,17 +1,19 @@
use nom::{ use nom::{
branch::alt, bytes::complete::{is_not, tag, take_while1}, character::complete::{char, multispace0, space0, space1}, combinator::{opt, peek}, error::{Error, ErrorKind}, multi::{many0, many1, separated_list0}, sequence::{delimited, pair, preceded, terminated, tuple}, Err, IResult, InputLength branch::alt, bytes::complete::{is_not, tag, take_while1}, character::complete::{char, multispace0, one_of, space0, space1}, combinator::{opt, peek}, error::{Error, ErrorKind}, multi::{many0, many1, separated_list0}, sequence::{delimited, pair, preceded, terminated, tuple}, Err, IResult, InputLength
}; };
use cogs_ast::{Attribute, CodeBlock, Component, Element, HtmlContent, HtmlTag, TextOrCode}; use cogs_ast::{Attribute, CodeBlock, Component, Element, HtmlTag};
pub fn parse_cog(input: &str) -> IResult<&str, Component> { pub fn parse_cog(input: &str) -> IResult<&str, Component> {
let (input, elements) = parse_consecutive_elements(input)?; let (input, elements) = parse_consecutive_proper_elements(input)?;
Ok((input, Component { elements })) Ok((input, Component { elements }))
} }
pub fn parse_consecutive_elements(input: &str) -> IResult<&str, Vec<Element>> { pub fn parse_consecutive_proper_elements(input: &str) -> IResult<&str, Vec<Element>> {
let (input, _) = multispace0(input)?; let (input, _) = multispace0(input)?;
many0(parse_element)(input) let res = many0(parse_proper_element)(input);
// dbg!(&res);
res
} }
fn parse_element(input: &str) -> IResult<&str, Element> { fn parse_element(input: &str) -> IResult<&str, Element> {
@ -19,6 +21,13 @@ fn parse_element(input: &str) -> IResult<&str, Element> {
alt((parse_html, parse_code_block))(input) alt((parse_html, parse_code_block))(input)
} }
fn parse_proper_element(input: &str) -> IResult<&str, Element> {
// dbg!(&input);
let res = alt((parse_element, parse_text))(input);
// dbg!(&res);
res
}
fn is_valid_tag_name_char(c: char) -> bool { fn is_valid_tag_name_char(c: char) -> bool {
c.is_alphanumeric() || c == '-' c.is_alphanumeric() || c == '-'
@ -33,10 +42,13 @@ fn parse_tag_name(input: &str) -> IResult<&str, &str> {
} }
fn parse_attribute(input: &str) -> IResult<&str, Attribute> { fn parse_attribute(input: &str) -> IResult<&str, Attribute> {
// Shouldn't be needed with take_while1, re-add if fail
/*
if input.starts_with('>') || input.is_empty() { if input.starts_with('>') || input.is_empty() {
// Return a recoverable error so `separated_list0` stops parsing cleanly // Return a recoverable error so `separated_list0` stops parsing cleanly
return Err(Err::Error(Error::new(input, nom::error::ErrorKind::Eof))); return Err(Err::Error(Error::new(input, nom::error::ErrorKind::Eof)));
} }
*/
let (input, key) = take_while1(is_valid_attr_char)(input)?; let (input, key) = take_while1(is_valid_attr_char)(input)?;
let (input, value) = opt(preceded( let (input, value) = opt(preceded(
@ -47,13 +59,13 @@ fn parse_attribute(input: &str) -> IResult<&str, Attribute> {
let mut resulting_value = None; let mut resulting_value = None;
if value.is_some_and(|x| !x.is_empty()) { if value.is_some_and(|x| !x.is_empty()) {
resulting_value = Some(TextOrCode::Text(value.unwrap().to_string())); resulting_value = Some(Element::Text(value.unwrap().to_string()));
} }
Ok(( Ok((
input, input,
Attribute { Attribute {
name: TextOrCode::Text(key.to_string()), name: Element::Text(key.to_string()),
value: resulting_value, value: resulting_value,
}, },
)) ))
@ -68,7 +80,7 @@ fn parse_attributes(input: &str) -> IResult<&str, Vec<Attribute>> {
let (input, _) = space0(input)?; // dump any trailing spaces let (input, _) = space0(input)?; // dump any trailing spaces
dbg!(&attrs); // dbg!(&attrs);
Ok(( Ok((
input, input,
@ -78,10 +90,10 @@ fn parse_attributes(input: &str) -> IResult<&str, Vec<Attribute>> {
fn parse_inside_html_opening_tag(input: &str) -> IResult<&str, HtmlTag> { fn parse_inside_html_opening_tag(input: &str) -> IResult<&str, HtmlTag> {
let (input, tag) = parse_tag_name(input)?; let (input, tag) = parse_tag_name(input)?;
dbg!(&tag); // dbg!(&tag);
let (input, _) = space0(input)?; let (input, _) = space0(input)?;
let (input, attributes) = parse_attributes(input)?; let (input, attributes) = parse_attributes(input)?;
dbg!(&attributes); // dbg!(&attributes);
Ok(( Ok((
input, input,
@ -96,7 +108,7 @@ fn parse_inside_html_opening_tag(input: &str) -> IResult<&str, HtmlTag> {
fn parse_html_opening_tag(input: &str) -> IResult<&str, HtmlTag> { fn parse_html_opening_tag(input: &str) -> IResult<&str, HtmlTag> {
let (input, tag) = delimited(char('<'), parse_inside_html_opening_tag, char('>'))(input)?; let (input, tag) = delimited(char('<'), parse_inside_html_opening_tag, char('>'))(input)?;
dbg!(&tag); // dbg!(&tag);
Ok(( Ok((
input, input,
@ -114,55 +126,42 @@ fn parse_html_closing_tag(input: &str) -> IResult<&str, &str> {
} }
fn parse_text(input: &str) -> IResult<&str, &str> { fn parse_text(input: &str) -> IResult<&str, Element> {
dbg!(&input); // dbg!(&input);
let mut index = 0; let mut index = 0;
while index < input.len() { while index < input.len() {
let current_slice = &input[index..]; let current_slice = &input[index..];
if peek(parse_consecutive_elements)(current_slice).is_ok() && !peek(parse_consecutive_elements)(current_slice).unwrap().1.is_empty() { if peek(parse_element)(current_slice).is_ok() {
dbg!(&current_slice); // dbg!(&current_slice);
break; break;
} }
if peek(parse_html_closing_tag)(current_slice).is_ok() { if peek(parse_html_opening_tag)(current_slice).is_ok() ||
dbg!(&current_slice); peek(parse_html_closing_tag)(current_slice).is_ok() ||
peek::<_, _, Error<&str>, _>(char('}'))(current_slice).is_ok(){
// dbg!(&current_slice);
break; // Stop if any of these parsers match break; // Stop if any of these parsers match
} }
index += 1; // Increment to check the next character index += 1; // Increment to check the next character
} }
dbg!(&input[0..index]);
Ok((&input[index..], &input[0..index])) if input[0..index].is_empty() {
// dbg!(&input[0..index]);
return Err(Err::Error(Error::new(input, ErrorKind::Eof)));
} }
fn parse_single_html_content(input: &str) -> IResult<&str, HtmlContent> { // dbg!(&input[0..index]);
let (input, content) = alt((
|input| parse_element(input).map(|(next, res)| (next, HtmlContent::Element(res))),
|input| parse_text(input).map(|(next, res)| (next, HtmlContent::Text(res.to_string()))),
))(input)?;
match content.clone() { Ok((&input[index..], Element::Text(input[0..index].to_string())))
HtmlContent::Text(text) => {
if text.is_empty() {
return Err(Err::Error(Error::new(input, ErrorKind::NonEmpty)));
}
},
HtmlContent::Element(_) => {}
} }
Ok(( fn parse_html_contents(input: &str) -> IResult<&str, Vec<Element>> {
input, let (input, out) = parse_consecutive_proper_elements(input)?;
content
))
}
fn parse_html_contents(input: &str) -> IResult<&str, Vec<HtmlContent>> { // dbg!(&out);
let (input, out) = many0(parse_single_html_content)(input)?;
dbg!(&out);
Ok(( Ok((
input, input,
@ -171,6 +170,7 @@ fn parse_html_contents(input: &str) -> IResult<&str, Vec<HtmlContent>> {
} }
fn parse_html(input: &str) -> IResult<&str, Element> { fn parse_html(input: &str) -> IResult<&str, Element> {
let (input, _) = multispace0(input)?; // remove spaces when debugging is complete
let (input, mut htag) = parse_html_opening_tag(input)?; let (input, mut htag) = parse_html_opening_tag(input)?;
let (input, content) = parse_html_contents(input)?; // parse_consecutive_elements(input)?; let (input, content) = parse_html_contents(input)?; // parse_consecutive_elements(input)?;
htag.content = content; htag.content = content;
@ -180,16 +180,44 @@ fn parse_html(input: &str) -> IResult<&str, Element> {
return Err(Err::Failure(Error::new(input, ErrorKind::Fail))); // Is there a way to give a custom error message? return Err(Err::Failure(Error::new(input, ErrorKind::Fail))); // Is there a way to give a custom error message?
} }
Ok(( Ok((
input, input,
Element::Html(htag) Element::Html(htag)
)) ))
} }
/*
fn parse_code_until_interrupted(input: &str) -> IResult<&str, Element> {
let (input, code) = pair(is_not("{};"), one_of("{};"))(input)?;
Ok((
input,
Element::Text(format!("{}{}", code.0, code.1))
))
}
*/
fn parse_inside_code_block(input: &str) -> IResult<&str, Vec<Element>> {
println!("Attempting inside code block {input}");
let (input, elems) = parse_consecutive_proper_elements(input)?;
dbg!(&elems);
Ok((
input,
elems
))
}
fn parse_code_block(input: &str) -> IResult<&str, Element> { fn parse_code_block(input: &str) -> IResult<&str, Element> {
if input.chars().nth(0) == Some('{') {
println!("Attempting code block on {input}");
}
let (input, content) = delimited( let (input, content) = delimited(
char('{'), char('{'),
parse_consecutive_elements, // get here eventually, currently code blocks do not work at all. parse_inside_code_block, // get here eventually, currently code blocks do not work at all.
char('}'), char('}'),
)(input)?; )(input)?;

View file

@ -5,5 +5,9 @@ fn main() {
<body> <body>
<h1>Yo.</h1> <h1>Yo.</h1>
<a src=\"https://www.youtube.com/watch?v=dQw4w9WgXcQ\">Click this</a> <a src=\"https://www.youtube.com/watch?v=dQw4w9WgXcQ\">Click this</a>
{
println!(\"test\");
<p>More Html</p>
}
</body>")); </body>"));
} }