Created
August 9, 2013 13:49
-
-
Save tecywiz121/6193721 to your computer and use it in GitHub Desktop.
Java class that implements a state machine for converting glob-like patterns into regular expressions
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.lang.reflect.InvocationTargetException; | |
import java.lang.reflect.Method; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
import java.io.File; | |
public final class GlobBuilder { | |
private static final String PATTERN_STAR; | |
private static final String PATTERN_STAR_STAR = ".*"; | |
private static final String PATTERN_QUESTION = "."; | |
static { | |
String separator; | |
if (File.separatorChar == '\\') { | |
separator = "\\\\"; | |
} else { | |
separator = File.separator; | |
} | |
PATTERN_STAR = "[^" + separator + "]*"; | |
} | |
private final StringBuilder m_Regex = new StringBuilder(); | |
private int m_State = 0; | |
private boolean m_Consumed = false; | |
private char m_Current; | |
public static void main(String[] args) { | |
GlobBuilder gb = new GlobBuilder(); | |
gb.add("*\\\\*\\\\*\\\\test.html"); | |
try { | |
System.out.println(gb.toRegex()); | |
Pattern p = Pattern.compile(gb.toRegex()); | |
String[] tests = new String[] { | |
"test.png", | |
"test.html", | |
"subdir\\test.html", | |
"red\\blue\\green\\test.html", | |
"c:\\red\\blue\\green\\test.html", | |
}; | |
for (String s : tests) { | |
Matcher m = p.matcher(s); | |
System.out.println(s + ": " + (m.find() ? "matches" : "no match")); | |
} | |
} catch (Exception e) { | |
System.err.println(e.getMessage()); | |
} | |
} | |
/** | |
* Call the proper state function | |
*/ | |
private void invokeState() { | |
try { | |
Method m = this.getClass().getDeclaredMethod("_state" + m_State); | |
Integer state = (Integer) m.invoke(this); | |
m_State = state.intValue(); | |
} catch (SecurityException e) { | |
throw new RuntimeException("Security exception while accessing own methods", e); | |
} catch (NoSuchMethodException e) { | |
throw new RuntimeException("Invalid internal state " + m_State); | |
} catch (IllegalArgumentException e) { | |
throw new RuntimeException("Illegal arguments when calling state method", e); | |
} catch (IllegalAccessException e) { | |
throw new RuntimeException("Unable to access own methods", e); | |
} catch (InvocationTargetException e) { | |
if (e.getCause() instanceof RuntimeException) { | |
throw (RuntimeException) e.getCause(); | |
} else { | |
throw new RuntimeException("State method threw invalid exception type", e.getCause()); | |
} | |
} | |
} | |
/** | |
* Mark the character as consumed, meaning the state machine is ready to | |
* accept another character | |
*/ | |
private void consume() { | |
m_Consumed = true; | |
} | |
private char cc() { | |
return m_Current; | |
} | |
private void append(String str) { | |
m_Regex.append(str); | |
} | |
private void append(char c) { | |
m_Regex.append(c); | |
} | |
/** | |
* IN -> | |
* OUT -> 1, 2 | |
* @return | |
*/ | |
protected int _state0() { | |
switch (cc()) { | |
case '*': // * | |
consume(); | |
return 1; | |
default: | |
append('^'); | |
return 2; | |
} | |
} | |
/** | |
* IN -> 0 | |
* OUT -> 2 | |
* @return | |
*/ | |
protected int _state1() { | |
switch (cc()) { | |
case '*': // ** | |
consume(); | |
append(PATTERN_STAR_STAR); | |
return 2; | |
default: // *[^*] | |
append('^'); | |
append(PATTERN_STAR); | |
return 2; | |
} | |
} | |
/** | |
* IN -> 0, 1, 2 | |
* OUT -> 2, 3, 4 | |
* @return | |
*/ | |
protected int _state2() { | |
switch (cc()) { | |
// Escape Next Character | |
case '\\': | |
consume(); | |
return 3; | |
// Regular character that needs to be escaped in the regex | |
case '.': | |
case '(': | |
case ')': | |
case '+': | |
case '|': | |
case '^': | |
case '$': | |
case '@': | |
case '%': | |
consume(); | |
append('\\'); | |
append(cc()); | |
return 2; | |
case '*': | |
consume(); | |
return 4; | |
case '?': | |
consume(); | |
append(PATTERN_QUESTION); | |
return 2; | |
default: | |
consume(); | |
append(cc()); | |
return 2; | |
} | |
} | |
/** | |
* IN -> 2 | |
* OUT -> 2 | |
* @return | |
*/ | |
protected int _state3() { | |
switch (cc()) { | |
case ',': | |
case '*': | |
case '?': | |
case '\\': | |
case '{': | |
case '}': | |
case '.': | |
case '(': | |
case ')': | |
case '+': | |
case '|': | |
case '^': | |
case '$': | |
case '@': | |
case '%': | |
consume(); | |
append('\\'); | |
append(cc()); | |
return 2; | |
default: | |
consume(); | |
append(cc()); | |
return 2; | |
} | |
} | |
/** | |
* IN -> 2 | |
* OUT -> 2 | |
* @return | |
*/ | |
protected int _state4() { | |
switch (cc()) { | |
case '*': | |
consume(); | |
append(PATTERN_STAR_STAR); | |
return 2; | |
default: | |
append(PATTERN_STAR); | |
return 2; | |
} | |
} | |
/** | |
* Add the next character in the glob string | |
* @param cc | |
*/ | |
public void add(char cc) { | |
m_Consumed = false; | |
m_Current = cc; | |
while (!m_Consumed) { | |
invokeState(); | |
} | |
} | |
public void add(CharSequence str) { | |
for (int ii = 0; ii < str.length(); ii++) { | |
add(str.charAt(ii)); | |
} | |
} | |
public void add(String str) { | |
for (char c : str.toCharArray()) { | |
add(c); | |
} | |
} | |
public String toRegex() throws Exception { | |
StringBuilder sb = new StringBuilder(m_Regex); | |
switch (m_State) { | |
case 0: | |
case 2: | |
break; | |
case 1: | |
sb.append('^'); | |
sb.append(PATTERN_STAR); | |
break; | |
case 3: | |
throw new Exception("unterminated escape"); | |
} | |
sb.append('$'); | |
return sb.toString(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment