Skip to content

Instantly share code, notes, and snippets.

@abhayarawal
Last active December 1, 2019 07:22
Show Gist options
  • Save abhayarawal/fa1a3f2d2691c091552bd2b69543d06f to your computer and use it in GitHub Desktop.
Save abhayarawal/fa1a3f2d2691c091552bd2b69543d06f to your computer and use it in GitHub Desktop.
html parser
// https://bodil.lol/parser-combinators/
#[derive(Debug, Clone, PartialEq, Eq)]
struct Element {
name: String,
attributes: Vec<(String, String)>,
children: Vec<Element>,
}
type ParseResult<'a, Output> = Result<(&'a str, Output), &'a str>;
trait Parser<'a, Output> {
fn parse(&self, input: &'a str) -> ParseResult<'a, Output>;
}
impl<'a, F, Output> Parser<'a, Output> for F
where
F: Fn(&'a str) -> ParseResult<Output>
{
fn parse(&self, input: &'a str) -> ParseResult<'a, Output> {
self(input)
}
}
fn match_literal<'a>(expected: &'a str) -> impl Fn(&str) -> ParseResult<()> + 'a {
move |input| {
if input.starts_with(expected) {
return Ok((&input[expected.len()..], ()));
}
Err(input)
}
}
fn identifier(input: &str) -> ParseResult<String> {
let mut matched = String::new();
let mut chars = input.chars();
match chars.next() {
Some(next) if next.is_alphabetic() => matched.push(next),
_ => return Err(input),
}
while let Some(next) = chars.next() {
if next.is_alphanumeric() || next == '-' {
matched.push(next);
} else {
break;
}
}
let next_index = matched.len();
Ok((&input[next_index..], matched))
}
fn pair<P1, P2, R1, R2>(parser1: P1, parser2: P2) -> impl Fn(&str) -> ParseResult<(R1, R2)>
where
P1: Fn(&str) -> Result<(&str, R1), &str>,
P2: Fn(&str) -> Result<(&str, R2), &str>,
{
move |input| match parser1(input) {
Ok((next_input, result1)) => match parser2(next_input) {
Ok((final_input, result2)) => Ok((final_input, (result1, result2))),
Err(err) => Err(err),
},
Err(err) => Err(err),
}
}
fn map<P, F, A, B>(parser: P, map_fn: F) -> impl Fn(&str) -> Result<(&str, A), &str>
where
P: Fn(&str) -> Result<(&str, B), &str>,
F: Fn(B) -> A
{
move |input| match parser(input) {
Ok((next_input, result)) => Ok((next_input, map_fn(result))),
Err(err) => Err(err),
}
}
fn main() {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn text_expected() {
let parser = match_literal("hiya");
assert_eq!(parser("hiya there"), Ok((" there", ())));
assert_eq!(parser("hi there"), Err("hi there"));
}
#[test]
fn test_identifier() {
assert_eq!(identifier("small-span"), Ok(("", "small-span".to_string())));
assert_eq!(identifier("h2 span"), Ok((" span", "h2".to_string())));
assert_eq!(identifier("!span"), Err("!span"));
}
#[test]
fn test_pair() {
let tag = pair(match_literal("<"), identifier);
assert_eq!(Ok((">", ((), "span".to_string()))), tag("<span>"));
assert_eq!(Err("span>"), tag("span>"));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment