Skip to content

Instantly share code, notes, and snippets.

@crlf0710
Created August 4, 2023 02:35
Show Gist options
  • Save crlf0710/87480a71ff9b9d35bd6da0054b0d0645 to your computer and use it in GitHub Desktop.
Save crlf0710/87480a71ff9b9d35bd6da0054b0d0645 to your computer and use it in GitHub Desktop.
#![allow(missing_docs)] // FIXME
pub trait Atom<'a> {
fn as_str(&self) -> &str;
fn is_whitespace(&self) -> bool;
fn is_comment_content(&self) -> bool;
fn can_prepend_lrm(&self) -> bool;
fn prepend_lrm(&self);
fn prepend_fsi(&self);
fn remove_lrm_and_rlm(&self);
// FIXME: split this into smaller functions
fn append_pdi_and_pdf(&self);
}
fn is_l_class_or_none(ch: Option<char>) -> bool {
todo!()
}
fn is_b_class(ch: Option<char>) -> bool {
todo!()
}
fn is_fsi(ch: Option<char>) -> bool {
todo!()
}
fn atom_first_cp(atom: &dyn Atom) -> Option<char> {
atom.as_str().chars().nth(0)
}
fn atom_last_cp(atom: &dyn Atom) -> Option<char> {
atom.as_str().chars().last()
}
fn atom_find_first_cp_in_10_class(atom: &dyn Atom) -> Option<char> {
todo!()
}
fn atom_find_first_cp_in_8_class(atom: &dyn Atom) -> Option<char> {
todo!()
}
fn atom_find_first_cp_in_5_class(atom: &dyn Atom) -> Option<char> {
todo!()
}
fn atom_has_unmatched_isolate_or_embedding_initiators(atom: &dyn Atom) -> bool {
todo!()
}
pub trait AtomList {
type Atom<'a>: Atom<'a>
where
Self: 'a;
type AtomIter<'a>: Iterator<Item = Self::Atom<'a>>
where
Self: 'a;
fn atoms(&self) -> Self::AtomIter<'_>;
fn record_conversion_error(&self);
}
pub fn conversion_to_plain_text<L, I>(lines: I)
where
L: AtomList,
I: IntoIterator<Item = L>,
{
let mut needs_lrm = false;
'outer: for line in lines {
let mut atom_list = line.atoms().peekable();
let mut last_atom = None;
loop {
let Some(atom) = atom_list.next() else {
break;
};
if atom.is_whitespace() {
atom.remove_lrm_and_rlm();
}
if needs_lrm {
if atom.can_prepend_lrm() {
atom.prepend_lrm();
needs_lrm = false;
} else {
let first_cp_in_10_class = atom_find_first_cp_in_10_class(&atom);
if !is_l_class_or_none(first_cp_in_10_class) {
line.record_conversion_error();
continue 'outer;
}
}
}
if atom.is_comment_content() {
let first_cp = atom_first_cp(&atom);
if !is_fsi(first_cp) {
let first_cp_in_8_class = atom_find_first_cp_in_8_class(&atom);
if !is_l_class_or_none(first_cp_in_8_class) {
atom.prepend_fsi();
}
}
if let Some(next_atom) = atom_list.peek() {
let next_first_cp = atom_first_cp(next_atom);
if !is_b_class(next_first_cp) {
// FIXME:
atom.append_pdi_and_pdf();
}
}
}
if let Some(next_atom) = atom_list.peek() {
let next_first_cp = atom_first_cp(next_atom);
if !is_b_class(next_first_cp) {
if atom_has_unmatched_isolate_or_embedding_initiators(&atom) {
line.record_conversion_error();
continue 'outer;
}
let first_cp_in_5_class = atom_find_first_cp_in_5_class(&atom);
if !is_l_class_or_none(first_cp_in_5_class) {
needs_lrm = true;
}
}
}
last_atom = Some(atom);
}
if let Some(last_atom) = last_atom {
let last_cp = atom_last_cp(&last_atom);
if !is_b_class(last_cp) {
needs_lrm = false;
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment