Skip to content

Instantly share code, notes, and snippets.

@tcw165
Last active August 19, 2019 16:57
Show Gist options
  • Save tcw165/18e495073bd70fd14ff9aa7fc07b85f7 to your computer and use it in GitHub Desktop.
Save tcw165/18e495073bd70fd14ff9aa7fc07b85f7 to your computer and use it in GitHub Desktop.
An automata matcher inspired by https://leetcode.com/problems/regular-expression-matching/. However, the problem is not a typical Regex problem.
class Solution {
// Example:
// Input:
// search string: "missia"
// pattern string: "mis*is*p*."
// Output:
// Regex DFA graph: ["m", "i", "s*", "i", "s*", "p*", "."]
// [0]=m vs m
// [1]=i vs i
// [2]=s vs s* // optional 's'
// [3]=s vs s*
// [4]=i vs s*
// [4]=i vs i
// [5]=a vs s*
// [5]=a vs p* // optional 'p'
// [5]=a vs .
// Pattern match completes for [0..6], state=7
// ans: matched!
// Note: This solution doesn't pass the test because the solution tries to
// match PARTIAL string with the pattern.
// However, the problem requires to match the ENTIRE string.
public boolean isMatch(String s, String p) {
final List<String> graph = createDFAGraph(p);
System.out.println(String.format("Regex DFA graph: %s", graph));
final MatchResult result = traverseWithDFA(s, 0, graph);
return (result.matched &&
result.endAt >= s.length());
}
/**
* Represent the graph as a list of expression string.
* e.g., ["m","i","s*","."]
*/
private List<String> createDFAGraph(
String p
) {
final List<String> graph = new ArrayList<String>();
int end = p.length();
final int lastIndex = p.length() - 1;
for (int i = lastIndex; i >= 0; --i) {
final char c = p.charAt(i);
if (c >= 'a' && c <= 'z' || c == '.') {
graph.add(0, p.substring(i, end));
end = i;
} else if (c == '*') {
// No-op
} else {
throw new IllegalArgumentException();
}
}
return graph;
}
private MatchResult traverseWithDFA(
String s,
int start,
List<String> graph
) {
// Use an integer to represent the state.
int state = 0;
// The cursor to the search string.
int i = start;
while (i < s.length() &&
state < graph.size()) {
final char c = s.charAt(i);
final String expr = graph.get(state);
final char actualExpr = expr.charAt(0);
System.out.println(String.format("[%d]=%s vs %s", i, c, expr));
if (expr.length() == 1) {
// Case as an alphabet.
if (actualExpr == '.' || actualExpr == c) {
++i;
// Transition to next state.
++state;
} else {
break;
}
} else {
// Case as optional alphabet, e.g., "p*" or ".*".
if (actualExpr == '.' || actualExpr == c) {
++i;
if (i >= s.length()) {
// Advance the state as we visit all the characters of the
// string to denote we actually consume the final state!
++state;
}
} else {
// Not found, transition to next state since this expression
// is optional.
++state;
}
}
}
// Check if we consume all the state in the DFA graph.
final boolean matched = state >= graph.size();
final int endAt = i;
System.out.println(String.format("Pattern match completes for [%d..%d], state=%d", start, endAt, state));
return new MatchResult(matched, endAt);
}
private static class MatchResult {
final boolean matched;
final int endAt;
MatchResult(
boolean matched,
int endAt
) {
this.matched = matched;
this.endAt = endAt;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment