Created
March 31, 2022 13:11
-
-
Save marty1885/e20cc8a7d141b48ab9a661d756f913e5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <vector> | |
#include <functional> | |
#include <iostream> | |
#include <optional> | |
#include <cassert> | |
#include <unordered_set> | |
#include <cppcoro/generator.hpp> | |
#include <cppcoro/task.hpp> | |
#include <cppcoro/sync_wait.hpp> | |
using ParseItem = std::pair<std::string, std::string_view>; | |
using ResultGenerator = cppcoro::generator<ParseItem>; | |
using Parser = std::function<ResultGenerator(std::string_view)>; | |
auto digit() -> Parser | |
{ | |
return [](std::string_view input) -> ResultGenerator | |
{ | |
if (input.empty()) | |
co_return; | |
else if (input[0] >= '0' && input[0] <= '9') | |
co_yield { std::string(1, input[0]), input.substr(1) }; | |
}; | |
} | |
auto alphanumeric() -> Parser | |
{ | |
return [](std::string_view input) -> ResultGenerator | |
{ | |
if (input.empty()) | |
co_return; | |
else if (input[0] >= 'a' && input[0] <= 'z') | |
co_yield { std::string(1, input[0]), input.substr(1) }; | |
else if (input[0] >= 'A' && input[0] <= 'Z') | |
co_yield { std::string(1, input[0]), input.substr(1) }; | |
else if (input[0] >= '0' && input[0] <= '9' || input[0] == ' ') | |
co_yield { std::string(1, input[0]), input.substr(1) }; | |
}; | |
} | |
auto literal(std::string literal_) -> Parser | |
{ | |
return [literal = std::move(literal_)](std::string_view input) -> ResultGenerator | |
{ | |
if (input.starts_with(literal)) | |
co_yield { "["+literal+"]", input.substr(literal.size()) }; | |
}; | |
} | |
auto nullstring() -> Parser | |
{ | |
return [](std::string_view input) -> ResultGenerator | |
{ | |
if (input.empty()) | |
co_yield { "", input }; | |
}; | |
} | |
auto Or(Parser lhs, Parser rhs) -> Parser | |
{ | |
return [lhs = std::move(lhs), rhs = std::move(rhs)](std::string_view input) -> ResultGenerator | |
{ | |
for(auto item : lhs(input)) | |
co_yield item; | |
for(auto item : rhs(input)) | |
co_yield item; | |
}; | |
} | |
auto then(Parser lhs, Parser rhs) -> Parser | |
{ | |
return [lhs = std::move(lhs), rhs = std::move(rhs)](std::string_view input) -> ResultGenerator | |
{ | |
for(auto item : lhs(input)) | |
{ | |
auto [lhs_result, lhs_remainder] = item; | |
for(auto item : rhs(lhs_remainder)) | |
{ | |
auto [rhs_result, rhs_remainder] = item; | |
co_yield { lhs_result + rhs_result, rhs_remainder }; | |
} | |
} | |
}; | |
} | |
ResultGenerator recursiveParse(std::string_view input, const Parser& parser, size_t depth = 0) | |
{ | |
bool empty = true; | |
for(auto [result, remainder] : parser(input)) | |
{ | |
if(result.empty()) | |
continue; | |
empty = false; | |
for(auto [child_result, child_remainder] : recursiveParse(remainder, parser, depth + 1)) | |
co_yield { result + child_result, child_remainder }; | |
} | |
if(empty && depth != 0) | |
co_yield {"", input}; | |
} | |
// Apply the parser for more than one time. Each parse result is combined with the previous result. | |
// And each parse can generate multiple results. | |
auto some(Parser parser) -> Parser | |
{ | |
return [parser = std::move(parser)](std::string_view input) -> ResultGenerator | |
{ | |
auto result = recursiveParse(input, parser); | |
for(auto item : result) { | |
auto [result, remainder] = item; | |
co_yield item; | |
} | |
}; | |
} | |
auto zero_or_more(Parser parser) -> Parser | |
{ | |
return [parser = std::move(parser)](std::string_view input) -> ResultGenerator | |
{ | |
auto result = recursiveParse(input, parser, 1); | |
for(auto item : result) { | |
auto [result, remainder] = item; | |
co_yield item; | |
} | |
}; | |
} | |
Parser operator+ (Parser lhs, Parser rhs) | |
{ | |
return then(lhs, rhs); | |
} | |
Parser operator| (Parser lhs, Parser rhs) | |
{ | |
return Or(lhs, rhs); | |
} | |
Parser operator+ (Parser lhs, std::string rhs) | |
{ | |
return then(lhs, literal(rhs)); | |
} | |
Parser operator+ (std::string lhs, Parser rhs) | |
{ | |
return then(literal(lhs), rhs); | |
} | |
Parser operator| (Parser lhs, std::string rhs) | |
{ | |
return Or(lhs, literal(rhs)); | |
} | |
Parser operator| (std::string lhs, Parser rhs) | |
{ | |
return Or(literal(lhs), rhs); | |
} | |
// Returns a parser that incrementally yields when it encounters a *, **, _, __ or ` | |
auto markdown_text() -> Parser | |
{ | |
return [](std::string_view input) -> ResultGenerator | |
{ | |
bool last_space = false; | |
for(size_t i = 0; i < input.size(); ++i) | |
{ | |
if(input[i] == '*' || input[i] == '_' || input[i] == '`') { | |
co_yield { std::string(input.substr(0, i)), input.substr(i) }; | |
} | |
} | |
if(!input.empty() && (input.back() == '*' || input.back() != '_' || input.back() != '`')) | |
co_yield {std::string(input), ""}; | |
}; | |
} | |
cppcoro::task<> test() | |
{ | |
auto normal_text = markdown_text(); | |
auto code = "`" + normal_text + "`"; | |
auto em = ("*" + (code | normal_text) + "*") | ("_" + (code | normal_text) + "_"); | |
auto strong = ("**" + (em | normal_text | code) + "**") | ("__" + (em | normal_text | code) + "__"); | |
auto span = strong | em | code | normal_text; | |
auto text_run = some(span) + nullstring(); | |
std::string str = "This is some text *em* **strong** `code` *`code in em`* ***It works***"; | |
std::cout << "parsing:\t" << str << std::endl; | |
auto parse_result = text_run(str); | |
for(auto [result, remainder] : parse_result) { | |
std::cout << "result:\t\t" << result << std::endl; | |
break; | |
} | |
co_return; | |
} | |
int main() | |
{ | |
cppcoro::sync_wait(test()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment