Last active
January 22, 2020 21:42
-
-
Save umanwizard/e9d7f619a98c032b671b910b1d812a4c to your computer and use it in GitHub Desktop.
Visitor to output the list of groups that may not exist in the output
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use regex::Regex; | |
use regex_syntax::hir::visit; | |
use regex_syntax::hir::Group; | |
use regex_syntax::hir::GroupKind; | |
use regex_syntax::hir::Hir; | |
use regex_syntax::hir::HirKind; | |
use regex_syntax::hir::RepetitionKind; | |
use regex_syntax::hir::RepetitionRange; | |
use regex_syntax::hir::Visitor; | |
#[derive(Default)] | |
struct MyVisitor { | |
stack: u32, | |
optional_groups: Vec<u32>, | |
} | |
impl MyVisitor { | |
fn new() -> Self { | |
Default::default() | |
} | |
} | |
fn get_group_idx(hir: &Hir) -> Option<u32> { | |
match hir.kind() { | |
HirKind::Group(Group { | |
kind: GroupKind::CaptureIndex(idx), | |
.. | |
}) | |
| HirKind::Group(Group { | |
kind: GroupKind::CaptureName { index: idx, .. }, | |
.. | |
}) => Some(*idx), | |
_ => None, | |
} | |
} | |
fn subexpressions_are_optional(hir: &Hir) -> bool { | |
match hir.kind() { | |
HirKind::Repetition(r) => match &r.kind { | |
RepetitionKind::ZeroOrMore | RepetitionKind::ZeroOrOne => true, | |
RepetitionKind::Range(rr) => match rr { | |
RepetitionRange::Exactly(0) => true, | |
RepetitionRange::AtLeast(0) => true, | |
RepetitionRange::Bounded(0, _) => true, | |
_ => false, | |
}, | |
_ => false, | |
}, | |
HirKind::Alternation(_) => true, | |
_ => false, | |
} | |
} | |
impl Visitor for MyVisitor { | |
type Output = Vec<u32>; | |
type Err = (); | |
fn finish(self) -> Result<Self::Output, Self::Err> { | |
Ok(self.optional_groups) | |
} | |
fn visit_pre(&mut self, hir: &Hir) -> Result<(), Self::Err> { | |
if subexpressions_are_optional(hir) { | |
self.stack += 1; | |
} | |
if self.stack > 0 { | |
if let Some(idx) = get_group_idx(hir) { | |
self.optional_groups.push(idx); | |
} | |
} | |
Ok(()) | |
} | |
fn visit_post(&mut self, hir: &Hir) -> Result<(), Self::Err> { | |
if subexpressions_are_optional(hir) { | |
self.stack -= 1; | |
} | |
Ok(()) | |
} | |
} | |
fn main() { | |
let s = r#"((a)?|s?|d?|f?)(t|(asdf)*(?P<foo_col>(?:foo)*)((((?P<bars_col>(?:bar)+))?(asdf))))"#; | |
let mut p = regex_syntax::Parser::new(); | |
let hir = p.parse(s).unwrap(); | |
let v = MyVisitor::new(); | |
let ops = visit(&hir, v).unwrap(); | |
println!("Optional groups: {:#?}", ops); | |
let r = Regex::new(s).unwrap(); | |
let v: Vec<_> = r | |
.captures("sasdf") | |
.unwrap() | |
.iter() | |
.map(|om| om.map(|m| m.as_str())) | |
.collect(); | |
println!("Captures: {:#?}", v); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment