Skip to content

Instantly share code, notes, and snippets.

@umanwizard
Last active January 22, 2020 21:42
Show Gist options
  • Save umanwizard/e9d7f619a98c032b671b910b1d812a4c to your computer and use it in GitHub Desktop.
Save umanwizard/e9d7f619a98c032b671b910b1d812a4c to your computer and use it in GitHub Desktop.
Visitor to output the list of groups that may not exist in the output
use regex::Regex;
use regex_syntax::hir::visit;
use regex_syntax::hir::Group;
use regex_syntax::hir::GroupKind;
use regex_syntax::hir::Hir;
use regex_syntax::hir::HirKind;
use regex_syntax::hir::RepetitionKind;
use regex_syntax::hir::RepetitionRange;
use regex_syntax::hir::Visitor;
#[derive(Default)]
struct MyVisitor {
stack: u32,
optional_groups: Vec<u32>,
}
impl MyVisitor {
fn new() -> Self {
Default::default()
}
}
fn get_group_idx(hir: &Hir) -> Option<u32> {
match hir.kind() {
HirKind::Group(Group {
kind: GroupKind::CaptureIndex(idx),
..
})
| HirKind::Group(Group {
kind: GroupKind::CaptureName { index: idx, .. },
..
}) => Some(*idx),
_ => None,
}
}
fn subexpressions_are_optional(hir: &Hir) -> bool {
match hir.kind() {
HirKind::Repetition(r) => match &r.kind {
RepetitionKind::ZeroOrMore | RepetitionKind::ZeroOrOne => true,
RepetitionKind::Range(rr) => match rr {
RepetitionRange::Exactly(0) => true,
RepetitionRange::AtLeast(0) => true,
RepetitionRange::Bounded(0, _) => true,
_ => false,
},
_ => false,
},
HirKind::Alternation(_) => true,
_ => false,
}
}
impl Visitor for MyVisitor {
type Output = Vec<u32>;
type Err = ();
fn finish(self) -> Result<Self::Output, Self::Err> {
Ok(self.optional_groups)
}
fn visit_pre(&mut self, hir: &Hir) -> Result<(), Self::Err> {
if subexpressions_are_optional(hir) {
self.stack += 1;
}
if self.stack > 0 {
if let Some(idx) = get_group_idx(hir) {
self.optional_groups.push(idx);
}
}
Ok(())
}
fn visit_post(&mut self, hir: &Hir) -> Result<(), Self::Err> {
if subexpressions_are_optional(hir) {
self.stack -= 1;
}
Ok(())
}
}
fn main() {
let s = r#"((a)?|s?|d?|f?)(t|(asdf)*(?P<foo_col>(?:foo)*)((((?P<bars_col>(?:bar)+))?(asdf))))"#;
let mut p = regex_syntax::Parser::new();
let hir = p.parse(s).unwrap();
let v = MyVisitor::new();
let ops = visit(&hir, v).unwrap();
println!("Optional groups: {:#?}", ops);
let r = Regex::new(s).unwrap();
let v: Vec<_> = r
.captures("sasdf")
.unwrap()
.iter()
.map(|om| om.map(|m| m.as_str()))
.collect();
println!("Captures: {:#?}", v);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment