Working HTML, no working code blocks yet. AST changes
This commit is contained in:
parent
d1c99982c7
commit
3932bbff43
3 changed files with 200 additions and 63 deletions
|
@ -3,27 +3,39 @@ pub struct Component {
|
||||||
pub elements: Vec<Element>,
|
pub elements: Vec<Element>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum Element {
|
pub enum Element {
|
||||||
Html(HtmlTag),
|
Html(HtmlTag),
|
||||||
Block(CodeBlock),
|
Block(CodeBlock),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum HtmlContent {
|
||||||
|
Element(Element),
|
||||||
|
Text(String)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
pub struct HtmlTag {
|
pub struct HtmlTag {
|
||||||
pub tag: String,
|
pub tag: String,
|
||||||
pub attributes: Vec<Attribute>,
|
pub attributes: Vec<Attribute>,
|
||||||
|
pub content: Vec<HtmlContent>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum TextOrCode {
|
||||||
|
Text(String),
|
||||||
|
Code(CodeBlock),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Attribute {
|
||||||
|
pub name: TextOrCode,
|
||||||
|
pub value: Option<TextOrCode>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct CodeBlock {
|
||||||
|
// pub is_async: bool,
|
||||||
pub content: Vec<Element>,
|
pub content: Vec<Element>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Attribute {
|
|
||||||
pub name: String,
|
|
||||||
pub value: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct CodeBlock {
|
|
||||||
pub is_async: bool,
|
|
||||||
pub content: String,
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,82 +1,203 @@
|
||||||
use nom::{
|
use nom::{
|
||||||
IResult,
|
branch::alt, bytes::complete::{is_not, tag, take_while1}, character::complete::{char, multispace0, space0, space1}, combinator::{opt, peek}, error::{Error, ErrorKind}, multi::{many0, many1, separated_list0}, sequence::{delimited, pair, preceded, terminated, tuple}, Err, IResult, InputLength
|
||||||
character::{
|
|
||||||
complete::{char, multispace0},
|
|
||||||
streaming::none_of
|
|
||||||
},
|
|
||||||
combinator::{opt, value},
|
|
||||||
branch::alt,
|
|
||||||
multi::{many0, many1},
|
|
||||||
sequence::{delimited, pair, preceded, terminated},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
use cogs_ast::{Component, Element, HtmlTag, Attribute, CodeBlock};
|
use cogs_ast::{Attribute, CodeBlock, Component, Element, HtmlContent, HtmlTag, TextOrCode};
|
||||||
|
|
||||||
pub fn parse_cog(input: &str) -> IResult<&str, Component> {
|
pub fn parse_cog(input: &str) -> IResult<&str, Component> {
|
||||||
let (input, elements) = many0(parse_element)(input)?;
|
let (input, elements) = parse_consecutive_elements(input)?;
|
||||||
Ok((input, Component { elements }))
|
Ok((input, Component { elements }))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_element(input: &str) -> IResult<&str, Element> {
|
pub fn parse_consecutive_elements(input: &str) -> IResult<&str, Vec<Element>> {
|
||||||
alt((parse_html_tag, parse_code_block))(input)
|
let (input, _) = multispace0(input)?;
|
||||||
|
many0(parse_element)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_html_tag(input: &str) -> IResult<&str, Element> {
|
fn parse_element(input: &str) -> IResult<&str, Element> {
|
||||||
let (input, (tag, (attributes, content))) = delimited(
|
let (input, _) = multispace0(input)?;
|
||||||
char('<'),
|
alt((parse_html, parse_code_block))(input)
|
||||||
pair(
|
}
|
||||||
terminated(many1(none_of(" >/\n")), many0(multispace0)),
|
|
||||||
pair(
|
|
||||||
opt(delimited(char(' '), many1(parse_attribute), char('>'))),
|
|
||||||
many0(parse_element),
|
|
||||||
),
|
|
||||||
),
|
|
||||||
char('>'),
|
|
||||||
)(input)?;
|
|
||||||
|
|
||||||
println!("got result {tag:?}");
|
|
||||||
|
|
||||||
Ok((
|
fn is_valid_tag_name_char(c: char) -> bool {
|
||||||
input,
|
c.is_alphanumeric() || c == '-'
|
||||||
Element::Html(HtmlTag {
|
}
|
||||||
tag: tag.iter().collect(),
|
|
||||||
attributes: attributes.unwrap(),
|
fn is_valid_attr_char(c: char) -> bool {
|
||||||
content,
|
c.is_alphanumeric() || c == '-' || c == '_'
|
||||||
}),
|
}
|
||||||
))
|
|
||||||
|
fn parse_tag_name(input: &str) -> IResult<&str, &str> {
|
||||||
|
take_while1(is_valid_tag_name_char)(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_attribute(input: &str) -> IResult<&str, Attribute> {
|
fn parse_attribute(input: &str) -> IResult<&str, Attribute> {
|
||||||
println!("Attempting to parse attribute");
|
if input.starts_with('>') || input.is_empty() {
|
||||||
let (input, (name, value)) = pair(
|
// Return a recoverable error so `separated_list0` stops parsing cleanly
|
||||||
many1(none_of("= ")),
|
return Err(Err::Error(Error::new(input, nom::error::ErrorKind::Eof)));
|
||||||
preceded(char('='), delimited(char('"'), many0(none_of("\"")), char('"'))),
|
}
|
||||||
)(input)?;
|
|
||||||
|
let (input, key) = take_while1(is_valid_attr_char)(input)?;
|
||||||
|
let (input, value) = opt(preceded(
|
||||||
|
tuple((tag("="), space0)),
|
||||||
|
delimited(tag("\""), is_not("\""), tag("\""))
|
||||||
|
))(input)?;
|
||||||
|
|
||||||
|
let mut resulting_value = None;
|
||||||
|
|
||||||
|
if value.is_some_and(|x| !x.is_empty()) {
|
||||||
|
resulting_value = Some(TextOrCode::Text(value.unwrap().to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
input,
|
input,
|
||||||
Attribute {
|
Attribute {
|
||||||
name: name.iter().collect(),
|
name: TextOrCode::Text(key.to_string()),
|
||||||
value: value.iter().collect(),
|
value: resulting_value,
|
||||||
},
|
},
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fn parse_attributes(input: &str) -> IResult<&str, Vec<Attribute>> {
|
||||||
|
let (input, attrs) = separated_list0(
|
||||||
|
pair(alt((char(','), char(' '))), space0),
|
||||||
|
parse_attribute,
|
||||||
|
)(input)?;
|
||||||
|
|
||||||
|
let (input, _) = space0(input)?; // dump any trailing spaces
|
||||||
|
|
||||||
|
dbg!(&attrs);
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
attrs
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_inside_html_opening_tag(input: &str) -> IResult<&str, HtmlTag> {
|
||||||
|
let (input, tag) = parse_tag_name(input)?;
|
||||||
|
dbg!(&tag);
|
||||||
|
let (input, _) = space0(input)?;
|
||||||
|
let (input, attributes) = parse_attributes(input)?;
|
||||||
|
dbg!(&attributes);
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
HtmlTag{
|
||||||
|
tag: tag.to_string(),
|
||||||
|
attributes,
|
||||||
|
content: Vec::new()
|
||||||
|
}
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_html_opening_tag(input: &str) -> IResult<&str, HtmlTag> {
|
||||||
|
let (input, tag) = delimited(char('<'), parse_inside_html_opening_tag, char('>'))(input)?;
|
||||||
|
|
||||||
|
dbg!(&tag);
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
tag
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_html_closing_tag(input: &str) -> IResult<&str, &str> {
|
||||||
|
let (input, tag) = delimited(tag("</"), parse_tag_name, char('>'))(input)?;
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
tag
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fn parse_text(input: &str) -> IResult<&str, &str> {
|
||||||
|
dbg!(&input);
|
||||||
|
let mut index = 0;
|
||||||
|
while index < input.len() {
|
||||||
|
let current_slice = &input[index..];
|
||||||
|
|
||||||
|
|
||||||
|
if peek(parse_consecutive_elements)(current_slice).is_ok() && !peek(parse_consecutive_elements)(current_slice).unwrap().1.is_empty() {
|
||||||
|
dbg!(¤t_slice);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if peek(parse_html_closing_tag)(current_slice).is_ok() {
|
||||||
|
dbg!(¤t_slice);
|
||||||
|
break; // Stop if any of these parsers match
|
||||||
|
}
|
||||||
|
|
||||||
|
index += 1; // Increment to check the next character
|
||||||
|
}
|
||||||
|
dbg!(&input[0..index]);
|
||||||
|
|
||||||
|
Ok((&input[index..], &input[0..index]))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_single_html_content(input: &str) -> IResult<&str, HtmlContent> {
|
||||||
|
let (input, content) = alt((
|
||||||
|
|input| parse_element(input).map(|(next, res)| (next, HtmlContent::Element(res))),
|
||||||
|
|input| parse_text(input).map(|(next, res)| (next, HtmlContent::Text(res.to_string()))),
|
||||||
|
))(input)?;
|
||||||
|
|
||||||
|
match content.clone() {
|
||||||
|
HtmlContent::Text(text) => {
|
||||||
|
if text.is_empty() {
|
||||||
|
return Err(Err::Error(Error::new(input, ErrorKind::NonEmpty)));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
HtmlContent::Element(_) => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
content
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_html_contents(input: &str) -> IResult<&str, Vec<HtmlContent>> {
|
||||||
|
let (input, out) = many0(parse_single_html_content)(input)?;
|
||||||
|
|
||||||
|
dbg!(&out);
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
out
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_html(input: &str) -> IResult<&str, Element> {
|
||||||
|
let (input, mut htag) = parse_html_opening_tag(input)?;
|
||||||
|
let (input, content) = parse_html_contents(input)?; // parse_consecutive_elements(input)?;
|
||||||
|
htag.content = content;
|
||||||
|
|
||||||
|
let (input, close_name) = parse_html_closing_tag(input)?;
|
||||||
|
if htag.tag != close_name {
|
||||||
|
return Err(Err::Failure(Error::new(input, ErrorKind::Fail))); // Is there a way to give a custom error message?
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input,
|
||||||
|
Element::Html(htag)
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
fn parse_code_block(input: &str) -> IResult<&str, Element> {
|
fn parse_code_block(input: &str) -> IResult<&str, Element> {
|
||||||
let (input, (is_async, content)) = delimited(
|
let (input, content) = delimited(
|
||||||
char('{'),
|
char('{'),
|
||||||
pair(
|
parse_consecutive_elements, // get here eventually, currently code blocks do not work at all.
|
||||||
opt(value(true, preceded(char('a'), char('s')))),
|
|
||||||
many1(none_of("}")),
|
|
||||||
),
|
|
||||||
char('}'),
|
char('}'),
|
||||||
)(input)?;
|
)(input)?;
|
||||||
|
|
||||||
Ok((
|
Ok((
|
||||||
input,
|
input,
|
||||||
Element::Block(CodeBlock {
|
Element::Block(CodeBlock {
|
||||||
is_async: is_async.unwrap_or(false),
|
// is_async: is_async.unwrap_or(false),
|
||||||
content: content.into_iter().collect(),
|
content: content
|
||||||
}),
|
}),
|
||||||
))
|
))
|
||||||
}
|
}
|
|
@ -1,5 +1,9 @@
|
||||||
use cogs_parser::parse_cog;
|
use cogs_parser::parse_cog;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
println!("{:#?}", parse_cog("{println!(\"Hello\")}\n<h1>hi</h1>"));
|
println!("{:#?}", parse_cog("
|
||||||
|
<body>
|
||||||
|
<h1>Yo.</h1>
|
||||||
|
<a src=\"https://www.youtube.com/watch?v=dQw4w9WgXcQ\">Click this</a>
|
||||||
|
</body>"));
|
||||||
}
|
}
|
Loading…
Reference in a new issue