From d1c99982c7e6ac9c6a70289ec8db8a48126aa5e2 Mon Sep 17 00:00:00 2001 From: Ermia Behzadifar Date: Tue, 12 Nov 2024 16:34:27 +0100 Subject: [PATCH] Start working on cog parser. Codeblock parser seems to work --- Cargo.lock | 26 +++++++++++ crates/ast/src/lib.rs | 5 +++ crates/parser/Cargo.toml | 2 + crates/parser/src/lib.rs | 90 ++++++++++++++++++++++++++++++++++----- crates/parser/src/main.rs | 5 +++ 5 files changed, 117 insertions(+), 11 deletions(-) create mode 100644 crates/parser/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 4ff19a7..65d2e31 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,3 +17,29 @@ version = "0.1.0" [[package]] name = "cogs_parser" version = "0.1.0" +dependencies = [ + "cogs_ast", + "nom", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] diff --git a/crates/ast/src/lib.rs b/crates/ast/src/lib.rs index 90d2fd8..7cbfc53 100644 --- a/crates/ast/src/lib.rs +++ b/crates/ast/src/lib.rs @@ -1,23 +1,28 @@ +#[derive(Debug)] pub struct Component { pub elements: Vec, } +#[derive(Debug)] pub enum Element { Html(HtmlTag), Block(CodeBlock), } +#[derive(Debug)] pub struct HtmlTag { pub tag: String, pub attributes: Vec, pub content: Vec, } +#[derive(Debug)] pub struct Attribute { pub name: String, pub value: String, } +#[derive(Debug)] pub struct CodeBlock { pub is_async: bool, pub content: String, diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml index 1f550d8..78a4b88 100644 --- a/crates/parser/Cargo.toml +++ b/crates/parser/Cargo.toml @@ -4,3 +4,5 @@ version = "0.1.0" edition = "2021" [dependencies] +nom = "7.1.3" +cogs_ast.path = "../ast" \ No newline at end of file diff --git a/crates/parser/src/lib.rs b/crates/parser/src/lib.rs index b93cf3f..590f6d8 100644 --- a/crates/parser/src/lib.rs +++ b/crates/parser/src/lib.rs @@ -1,14 +1,82 @@ -pub fn add(left: u64, right: u64) -> u64 { - left + right +use nom::{ + IResult, + character::{ + complete::{char, multispace0}, + streaming::none_of + }, + combinator::{opt, value}, + branch::alt, + multi::{many0, many1}, + sequence::{delimited, pair, preceded, terminated}, +}; + +use cogs_ast::{Component, Element, HtmlTag, Attribute, CodeBlock}; + +pub fn parse_cog(input: &str) -> IResult<&str, Component> { + let (input, elements) = many0(parse_element)(input)?; + Ok((input, Component { elements })) } -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } +fn parse_element(input: &str) -> IResult<&str, Element> { + alt((parse_html_tag, parse_code_block))(input) } + +fn parse_html_tag(input: &str) -> IResult<&str, Element> { + let (input, (tag, (attributes, content))) = delimited( + char('<'), + pair( + terminated(many1(none_of(" >/\n")), many0(multispace0)), + pair( + opt(delimited(char(' '), many1(parse_attribute), char('>'))), + many0(parse_element), + ), + ), + char('>'), + )(input)?; + + println!("got result {tag:?}"); + + Ok(( + input, + Element::Html(HtmlTag { + tag: tag.iter().collect(), + attributes: attributes.unwrap(), + content, + }), + )) +} + +fn parse_attribute(input: &str) -> IResult<&str, Attribute> { + println!("Attempting to parse attribute"); + let (input, (name, value)) = pair( + many1(none_of("= ")), + preceded(char('='), delimited(char('"'), many0(none_of("\"")), char('"'))), + )(input)?; + + Ok(( + input, + Attribute { + name: name.iter().collect(), + value: value.iter().collect(), + }, + )) +} + +fn parse_code_block(input: &str) -> IResult<&str, Element> { + let (input, (is_async, content)) = delimited( + char('{'), + pair( + opt(value(true, preceded(char('a'), char('s')))), + many1(none_of("}")), + ), + char('}'), + )(input)?; + + Ok(( + input, + Element::Block(CodeBlock { + is_async: is_async.unwrap_or(false), + content: content.into_iter().collect(), + }), + )) +} \ No newline at end of file diff --git a/crates/parser/src/main.rs b/crates/parser/src/main.rs new file mode 100644 index 0000000..fded43c --- /dev/null +++ b/crates/parser/src/main.rs @@ -0,0 +1,5 @@ +use cogs_parser::parse_cog; + +fn main() { + println!("{:#?}", parse_cog("{println!(\"Hello\")}\n

hi

")); +} \ No newline at end of file