Last active
July 2, 2022 05:23
-
-
Save vrthra/0c52cb5ae6afdd6f97bc106061d3dfdc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
def add(L, u, j, R): | |
R.append((L, u, j)) | |
def pop(s, i, R): | |
s, (L, i_) = s # pop(s, i, R) | |
add(L, s, i, R) # check and add | |
return s, R | |
def push(s, t): | |
return (tuple(s), t) | |
def gll(I): | |
i = 0 | |
R = [] | |
s = push([], ('L0', 0)) | |
L = 'LS' | |
while True: | |
if L == 'L0': | |
if R: | |
(L, s1, j), *R = R # remove(L, si, j) from R | |
if L == 'L0' and s1 == () and j == len(I)-1: # discount $ | |
return "success" | |
else: | |
s = s1 | |
i = j | |
#L = 'L' # goto L | |
continue | |
else: | |
return "error" | |
elif L == 'LS': | |
#if I[i] in {'a', 'c'}: | |
add('LS1', s, i, R) | |
#if I[i] in {'a', 'b'}: | |
add('LS2', s, i, R) | |
#if I[i] in {'d', '$'}: | |
add('LS3', s, i, R) | |
add('LS4', s, i, R) | |
L = 'L0' # fall through | |
continue | |
elif L == 'LS1': | |
s = push(s, ('L1', i)) | |
L = 'LA' | |
continue | |
elif L == 'L1': | |
s = push(s, ('L2', i)) | |
L = 'LS' | |
continue | |
elif L == 'L2': | |
if I[i] == 'd': | |
i = i+1 | |
s, R = pop(s, i, R) | |
L = 'L0' | |
continue | |
elif L == 'LS2': | |
s = push(s, ('L3', i)) | |
L = 'LB' | |
continue | |
elif L == 'L3': | |
s = push(s, ('L4', i)) | |
L = 'LS' | |
continue | |
elif L == 'L4': | |
s, R = pop(s, i, R) | |
L = 'L0' | |
continue | |
elif L == 'LS3': | |
s, R = pop(s, i, R) | |
L = 'L0' | |
continue | |
elif L == 'LS4': | |
if I[i] == 'd': | |
i = i+1 | |
#s = push(s, ('L5', i)) | |
L = 'LS4.1' | |
continue | |
elif L == 'LS4.1': | |
if I[i] == 'g': | |
i = i+1 | |
s = push(s, ('L5', i)) | |
L = 'LC' | |
continue | |
elif L == 'L5': | |
s, R = pop(s, i, R) | |
L = 'L0' | |
continue | |
elif L == 'LC': | |
add('LC1', s, i, R) | |
L = 'L0' | |
continue | |
elif L == 'LA': | |
add('LA1', s, i, R) | |
add('LA2', s, i, R) | |
L = 'L0' | |
continue | |
elif L == 'LA1': | |
if I[i] == 'a': | |
i = i+1 | |
s, R = pop(s, i, R) | |
L = 'L0' | |
continue | |
elif L == 'LA2': | |
if I[i] == 'c': | |
i = i+1 | |
s, R = pop(s, i, R) | |
L = 'L0' | |
continue | |
elif L == 'LB': | |
add('LB1', s, i, R) | |
add('LB2', s, i, R) | |
L = 'L0' | |
continue | |
elif L == 'LB1': | |
if I[i] == 'a': | |
i = i+1 | |
s, R = pop(s, i, R) | |
L = 'L0' | |
continue | |
elif L == 'LB2': | |
if I[i] == 'b': | |
i = i+1 | |
s, R = pop(s, i, R) | |
L = 'L0' | |
continue | |
elif L == 'LC1': | |
if I[i] == 'c': | |
i = i+1 | |
s, R = pop(s, i, R) | |
L = 'L0' | |
continue | |
else: | |
assert False | |
I = 'dgc$' | |
assert gll(I) == 'success' | |
#I = 'aad$' | |
#assert gll(I) == 'success' | |
#I = 'aaadd$' | |
#assert gll(I) == 'success' | |
#I = 'bd$' | |
#assert gll(I) == 'error' | |
#print('.') | |
import simplefuzzer | |
G = { | |
'<S>': [ | |
['<A>', '<S>', 'd'], | |
['<B>', '<S>'], | |
['d', 'g', '<C>'], | |
[]], | |
'<A>': [['a'], ['c']], | |
'<B>': [['a'], ['b']], | |
'<C>': [['c']] | |
} | |
gf = simplefuzzer.LimitFuzzer(G) | |
for i in range(1000): | |
s = gf.iter_fuzz(key='<S>', max_depth=100) | |
print(s) | |
assert gll(s+'$') == 'success' | |
print(s) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# The final version. | |
class Node: | |
def __init__(self, L, j, children): | |
self.L, self.i, self.children = L, j, children | |
def __repr__(self): return str((self.L, self.i, self.children)) | |
class GSS: | |
def __init__(self): self.gss, self.P = {}, {} | |
def get(self, L, i, children): | |
my_label = (L, i) | |
if my_label not in self.gss: | |
self.gss[my_label] = Node(L, i, children) | |
assert my_label not in self.P | |
self.P[my_label] = [] | |
return self.gss[my_label] | |
def add_to_P(self, u, j): | |
label = (u.L, u.i) | |
self.P[label].append(j) | |
def __repr__(self): return str(self.gss) | |
class GLLParser: | |
def create(self, L, u, j): | |
v = self.gss.get(L, j, [u]) | |
if u not in v.children: | |
v.children.append(u) | |
label = (L, j) | |
for k in self.gss.P[label]: | |
self.add(L, u, k) | |
return v | |
def add(self, L, u, j): | |
if (L, u) not in self.U[j]: | |
self.U[j].append((L, u)) | |
assert (L,u,j) not in self.R | |
self.R.append((L, u, j)) | |
def pop(self, u, j): | |
if u != self.u0: | |
self.gss.add_to_P(u, j) | |
for v in u.children: | |
self.add(u.L, v, j) | |
return u | |
def __init__(self, input_str): | |
self.R = [] | |
self.gss = GSS() | |
self.I = input_str | |
self.m = len(self.I) # |I| + 1 | |
self.u1 = self.gss.get('L0', 0, []) | |
self.u0 = self.gss.get('$', self.m, []) | |
self.u1.children.append(self.u0) | |
self.U = [] | |
for j in range(self.m): # 0<=j<=m | |
self.U.append([]) # U_j = empty | |
def compile_terminal(key, n_alt, r_pos, r_len, token): | |
if r_len == r_pos: | |
Lnxt = '_' | |
else: | |
Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1) | |
return ''' | |
elif L == '%s[%d]_%d': | |
if parser.I[i] == '%s': | |
i = i+1 | |
L = '%s' | |
else: | |
L = 'L0' | |
continue | |
''' % (key, n_alt, r_pos, token, Lnxt) | |
def compile_nonterminal(key, n_alt, r_pos, r_len, token): | |
if r_len == r_pos: | |
Lnxt = '_' | |
else: | |
Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1) | |
return ''' | |
elif L == '%s[%d]_%d': | |
c_u = parser.create('%s', c_u, i) | |
L = '%s' | |
continue | |
''' % (key, n_alt, r_pos, Lnxt, token) | |
def compile_rule(key, n_alt, rule): | |
res = [] | |
for i, t in enumerate(rule): | |
if (t[0],t[-1]) == ('<', '>'): | |
r = compile_nonterminal(key, n_alt, i, len(rule), t) | |
else: | |
r = compile_terminal(key, n_alt, i, len(rule), t) | |
res.append(r) | |
res.append(''' | |
elif L == '%s[%d]_%d': | |
L = 'L_' | |
continue | |
''' % (key, n_alt, len(rule))) | |
return '\n'.join(res) | |
def compile_def(key, definition): | |
res = [] | |
res.append(''' | |
elif L == '%s': | |
''' % key) | |
for n_alt,rule in enumerate(definition): | |
res.append(''' | |
parser.add( '%s[%d]_0', c_u, i)''' % (key, n_alt)) | |
res.append(''' | |
# def | |
L = 'L0' | |
continue''') | |
for n_alt,rule in enumerate(definition): | |
r = compile_rule(key, n_alt, rule) | |
res.append(r) | |
return '\n'.join(res) | |
def compile_grammar(g, start): | |
res = [''' | |
def parse_string(parser): | |
c_u = parser.u1 | |
i = 0 | |
L = '%s' # starting state | |
while True: | |
if L == 'L0': | |
if parser.R: | |
(L, c_u, i), *parser.R = parser.R | |
continue | |
else: | |
if ('L0', parser.u0) in parser.U[parser.m-1]: return 'success' | |
else: return 'failed' | |
elif L == 'L_': | |
c_u = parser.pop(c_u, i) | |
L = 'L0' | |
continue | |
''' % start] | |
for k in g: | |
r = compile_def(k, g[k]) | |
res.append(r) | |
res.append(''' | |
else: | |
assert False | |
if __name__ == '__main__': | |
import simplefuzzer | |
G = { | |
'<S>': [ | |
['<A>', '<S>', 'd'], | |
['<B>', '<S>'], | |
['g', 'p', '<C>'], | |
[]], | |
'<A>': [['a'], ['c']], | |
'<B>': [['a'], ['b']], | |
'<C>': ['c'] | |
} | |
import sys | |
import gll | |
mystr = sys.argv[1] | |
g = gll.GLLParser(mystr) | |
assert parse_string(g) == 'success' | |
gf = simplefuzzer.LimitFuzzer(G) | |
for i in range(1000): | |
s = gf.iter_fuzz(key='<S>', max_depth=100) | |
print(s) | |
g = gll.GLLParser(s+'$') | |
assert parse_string(g) == 'success' | |
print(g.gss) | |
''') | |
return '\n'.join(res) | |
if __name__ == '__main__': | |
import simplefuzzer | |
G = { | |
'<S>': [ | |
['<A>', '<S>', 'd'], | |
['<B>', '<S>'], | |
['g', 'p', '<C>'], | |
[]], | |
'<A>': [['a'], ['c']], | |
'<B>': [['a'], ['b']], | |
'<C>': ['c'] | |
} | |
res = compile_grammar(G, '<S>') | |
print(res) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
'<S>': [ | |
['<A>', '<S>', 'd'], | |
['<B>', '<S>'], | |
[]], | |
'<A>': [ | |
['a'], | |
['c']], | |
'<B>': [ | |
['a'], | |
['b']] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Node: | |
def __init__(self, L, j, children): | |
self.L, self.i, self.children = L, j, children | |
def __repr__(self): return str((self.L, self.i, self.children)) | |
class GSS: | |
def __init__(self): self.gss, self.P = {}, {} | |
def get(self, L, i, children): | |
my_label = (L, i) | |
if my_label not in self.gss: | |
self.gss[my_label] = Node(L, i, children) | |
assert my_label not in self.P | |
self.P[my_label] = [] | |
return self.gss[my_label] | |
def add_to_P(self, u, j): | |
label = (u.L, u.i) | |
self.P[label].append(j) | |
def __repr__(self): return str(self.gss) | |
class GLLParser: | |
def create(self, L, u, j): | |
v = self.gss.get(L, j, [u]) | |
if u not in v.children: | |
v.children.append(u) | |
label = (L, j) | |
for k in self.gss.P[label]: | |
self.add(L, u, k) | |
return v | |
def add(self, L, u, j): | |
if (L, u) not in self.U[j]: | |
self.U[j].append((L, u)) | |
assert (L,u,j) not in self.R | |
self.R.append((L, u, j)) | |
def pop(self, u, j): | |
if u != self.u0: | |
self.gss.add_to_P(u, j) | |
for v in u.children: | |
self.add(u.L, v, j) | |
return u | |
def __init__(self, input_str): | |
self.R = [] | |
self.gss = GSS() | |
self.I = input_str | |
self.m = len(self.I) # |I| + 1 | |
self.u1 = self.gss.get('L0', 0, []) | |
self.u0 = self.gss.get('$', self.m, []) | |
self.u1.children.append(self.u0) | |
self.U = [] | |
for j in range(self.m): # 0<=j<=m | |
self.U.append([]) # U_j = empty |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def compile_terminal(key, n_alt, r_pos, r_len, token): | |
if r_len == r_pos: | |
Lnxt = '_' | |
else: | |
Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1) | |
return '''\ | |
elif L == '%s[%d]_%d': | |
if parser.I[i] == '%s': | |
i = i+1 | |
L = '%s' | |
else: | |
L = 'L0' | |
continue | |
''' % (key, n_alt, r_pos, token, Lnxt) | |
def compile_nonterminal(key, n_alt, r_pos, r_len, token): | |
if r_len == r_pos: | |
Lnxt = '_' | |
else: | |
Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1) | |
return '''\ | |
elif L == '%s[%d]_%d': | |
c_u = parser.create('%s', c_u, i) | |
L = '%s' | |
continue | |
''' % (key, n_alt, r_pos, Lnxt, token) | |
def compile_rule(key, n_alt, rule): | |
res = [] | |
for i, t in enumerate(rule): | |
if (t[0],t[-1]) == ('<', '>'): | |
r = compile_nonterminal(key, n_alt, i, len(rule), t) | |
else: | |
r = compile_terminal(key, n_alt, i, len(rule), t) | |
res.append(r) | |
res.append('''\ | |
elif L == '%s[%d]_%d': | |
L = 'L_' | |
continue | |
''' % (key, n_alt, len(rule))) | |
return '\n'.join(res) | |
def compile_def(key, definition): | |
res = [] | |
res.append('''\ | |
elif L == '%s': | |
''' % key) | |
for n_alt,rule in enumerate(definition): | |
res.append('''\ | |
parser.add( '%s[%d]_0', c_u, i)''' % (key, n_alt)) | |
res.append(''' | |
# def | |
L = 'L0' | |
continue''') | |
for n_alt,rule in enumerate(definition): | |
r = compile_rule(key, n_alt, rule) | |
res.append(r) | |
return '\n'.join(res) | |
def compile_grammar(g, start): | |
res = ['''\ | |
def parse_string(parser): | |
c_u = parser.u1 | |
i = 0 | |
L = '%s' # starting state | |
while True: | |
if L == 'L0': | |
if parser.R: | |
(L, c_u, i), *parser.R = parser.R | |
continue | |
else: | |
if ('L0', parser.u0) in parser.U[parser.m-1]: return 'success' | |
else: return 'failed' | |
elif L == 'L_': | |
c_u = parser.pop(c_u, i) | |
L = 'L0' | |
continue | |
''' % start] | |
for k in g: | |
r = compile_def(k, g[k]) | |
res.append(r) | |
res.append(''' | |
else: | |
assert False | |
if __name__ == '__main__': | |
import simplefuzzer | |
G = { | |
'<S>': [ | |
['<A>', '<S>', 'd'], | |
['<B>', '<S>'], | |
['g', 'p', '<C>'], | |
[]], | |
'<A>': [['a'], ['c']], | |
'<B>': [['a'], ['b']], | |
'<C>': ['c'] | |
} | |
import sys | |
import gss_gll as gll | |
mystr = sys.argv[1] | |
g = gll.GLLParser(mystr) | |
assert parse_string(g) == 'success' | |
gf = simplefuzzer.LimitFuzzer(G) | |
for i in range(1000): | |
s = gf.iter_fuzz(key='<S>', max_depth=100) | |
print(s) | |
g = gll.GLLParser(s+'$') | |
assert parse_string(g) == 'success' | |
print(g.gss) | |
''') | |
return '\n'.join(res) | |
if __name__ == '__main__': | |
import simplefuzzer | |
G = { | |
'<S>': [ | |
['<A>', '<S>', 'd'], | |
['<B>', '<S>'], | |
['g', 'p', '<C>'], | |
[]], | |
'<A>': [['a'], ['c']], | |
'<B>': [['a'], ['b']], | |
'<C>': ['c'] | |
} | |
res = compile_grammar(G, '<S>') | |
print(res) | |
# g = GLLParser('gpc$') | |
# assert gll(g) == 'success' # Success | |
# f = g.gss.gss[(g.u1.L, g.u1.i)] | |
# print(f) | |
# g = GLLParser('ad$') | |
# assert gll(g) == 'success' # Success | |
# f = g.gss.gss[(g.u1.L, g.u1.i)] | |
# print(f) | |
# g = GLLParser('aad$') | |
# assert gll(g) == 'success' # Success | |
# g = GLLParser('aaadd$') | |
# assert gll(g) == 'success' # Success | |
# g = GLLParser('bd$') | |
# assert gll(g) == 'failed' # Error |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import simplefuzzer | |
G = { | |
'<S>': [ | |
['<A>', '<S>', 'd'], | |
['<B>', '<S>'], | |
['g', 'p', '<C>'], | |
[]], | |
'<A>': [['a'], ['c']], | |
'<B>': [['a'], ['b']], | |
'<C>': ['c'] | |
} | |
class Node: | |
def __init__(self, L, j, children): | |
self.L, self.i, self.children = L, j, children | |
def __repr__(self): | |
return str((self.L, self.i, self.children)) | |
class GSS: | |
def __init__(self): | |
self.gss, self.P = {}, {} | |
def get(self, L, i, children): | |
my_label = (L, i) | |
if my_label not in self.gss: | |
self.gss[my_label] = Node(L, i, children) | |
assert my_label not in self.P | |
self.P[my_label] = [] | |
return self.gss[my_label] | |
def add_to_P(self, u, j): | |
label = (u.L, u.i) | |
self.P[label].append(j) | |
def __repr__(self): | |
return str(self.gss) | |
class GLLParser: | |
def create(self, L, u, j): | |
v = self.gss.get(L, j, [u]) | |
if u not in v.children: | |
v.children.append(u) | |
label = (L, j) | |
for k in self.gss.P[label]: | |
self.add(L, u, k) | |
return v | |
def add(self, L, u, j): | |
if (L, u) not in self.U[j]: | |
self.U[j].append((L, u)) | |
assert (L,u,j) not in self.R | |
self.R.append((L, u, j)) | |
def pop(self, u, j): | |
if u != self.u0: | |
self.gss.add_to_P(u, j) | |
for v in u.children: | |
self.add(u.L, v, j) | |
return u | |
def __init__(self, input_str): | |
self.R = [] | |
self.gss = GSS() | |
self.I = input_str | |
self.m = len(self.I) # |I| + 1 | |
self.u1 = self.gss.get('L0', 0, []) | |
self.u0 = self.gss.get('$', self.m, []) | |
self.u1.children.append(self.u0) | |
self.U = [] | |
for j in range(self.m): # 0<=j<=m | |
self.U.append([]) # U_j = empty | |
def gll(parser): | |
c_u = parser.u1 | |
i = 0 | |
L = 'LS' # starting state | |
while True: | |
if L == 'L0': | |
if parser.R: | |
(L, c_u, i), *parser.R = parser.R | |
continue | |
else: | |
if ('L0', parser.u0) in parser.U[parser.m-1]: return 'success' | |
else: return 'failed' | |
elif L == 'L_': | |
c_u = parser.pop(c_u, i) | |
L = 'L0' | |
continue | |
elif L == 'LS': | |
#if I[i] in {'a', 'c'}: | |
parser.add('LS1_0', c_u, i) | |
#if I[i] in {'a', 'b'}: | |
parser.add('LS2_0', c_u, i) | |
#if I[i] in {'d', '$'}: | |
parser.add('LS3_0', c_u, i) | |
parser.add('LS4_0', c_u, i) | |
L = 'L0' | |
continue | |
elif L == 'LS1_0': | |
c_u = parser.create('LS1_1', c_u, i) | |
L = 'LA' | |
continue | |
elif L == 'LS1_1': | |
c_u = parser.create('LS1_2', c_u, i) | |
L = 'LS' | |
continue | |
elif L == 'LS1_2': | |
if parser.I[i] == 'd': | |
i = i+1 | |
L = 'L_' | |
else: | |
L = 'L0' | |
continue | |
elif L == 'LS2_0': | |
c_u = parser.create('LS2_1', c_u, i) | |
L = 'LB' | |
continue | |
elif L == 'LS2_1': | |
c_u = parser.create('LS2_2', c_u, i) | |
L = 'LS' | |
continue | |
elif L == 'LS2_2': | |
L = 'L_' | |
continue | |
elif L == 'LS3_0': | |
if parser.I[i] == 'g': | |
i = i+1 | |
L = 'LS3_1' | |
else: | |
L = 'L0' | |
continue | |
elif L == 'LS3_1': | |
if parser.I[i] == 'p': | |
i = i+1 | |
L = 'LS3_2' | |
else: | |
L = 'L0' | |
continue | |
elif L == 'LS3_2': | |
c_u = parser.create('LS3_3', c_u, i) | |
L = 'LC' | |
continue | |
elif L == 'LS3_3': | |
L = 'L_' | |
continue | |
elif L == 'LS4_0': | |
L = 'L_' | |
continue | |
elif L == 'LC': | |
parser.add('LC1_0', c_u, i) | |
L = 'L0' | |
continue | |
elif L == 'LC1_0': | |
if parser.I[i] == 'c': | |
i = i+1 | |
L = 'L_' | |
else: | |
L = 'L0' | |
continue | |
elif L == 'LA': | |
#if I[i] == 'a': | |
parser.add('LA1_0', c_u, i) | |
#if I[i] == 'c': | |
parser.add('LA2_0', c_u, i) | |
L = 'L0' | |
continue | |
elif L == 'LA1_0': | |
if parser.I[i] == 'a': | |
i = i+1 | |
L = 'L_' | |
else: | |
L = 'L0' | |
continue | |
elif L == 'LA2_0': | |
if parser.I[i] == 'c': | |
i = i+1 | |
L = 'L_' | |
else: | |
L = 'L0' | |
continue | |
elif L == 'LB': | |
#if I[i] == 'a': | |
parser.add('LB1_0', c_u, i) | |
#if I[i] == 'b': | |
parser.add('LB2_0', c_u, i) | |
L = 'L0' | |
continue | |
elif L == 'LB1_0': | |
if parser.I[i] == 'a': | |
i = i+1 | |
L = 'L_' | |
else: | |
L = 'L0' | |
continue | |
elif L == 'LB2_0': | |
if parser.I[i] == 'b': | |
i = i+1 | |
L = 'L_' | |
else: | |
L = 'L0' | |
continue | |
else: | |
assert False | |
def compile_def(g, definition): | |
for rule in definition: | |
compile_rule(rule) | |
def compile_rule(g, rule): | |
res = [] | |
for i, t in enumerate(rule): | |
if (t[0],t[-1]) == ('<', '>'): | |
r = compile_nonterminal(t, i) | |
res.append(r) | |
else: | |
r = compile_terminal(t, i) | |
res.append(r) | |
def compile_terminal(t, i): | |
return ''' | |
if parser.I[i] == '%s': | |
i = i+1 | |
c_u = parser.pop(c_u, i) | |
L = 'L0' | |
continue | |
''' % t | |
g = GLLParser('gpc$') | |
assert gll(g) == 'success' # Success | |
f = g.gss.gss[(g.u1.L, g.u1.i)] | |
print(f) | |
g = GLLParser('ad$') | |
assert gll(g) == 'success' # Success | |
f = g.gss.gss[(g.u1.L, g.u1.i)] | |
print(f) | |
g = GLLParser('aad$') | |
assert gll(g) == 'success' # Success | |
g = GLLParser('aaadd$') | |
assert gll(g) == 'success' # Success | |
g = GLLParser('bd$') | |
assert gll(g) == 'failed' # Error | |
gf = simplefuzzer.LimitFuzzer(G) | |
for i in range(1000): | |
s = gf.iter_fuzz(key='<S>', max_depth=100) | |
print(s) | |
g = GLLParser(s+'$') | |
assert gll(g) == 'success' | |
print(g.gss) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class GLLParser: | |
def __init__(self, s): | |
self.R = [] | |
self.I = s | |
def add(self, L, u, j): | |
self.R.append((L, u, j)) | |
def pop(self, s, i): | |
s, (L, i_) = s | |
self.add(L, s, i) | |
return s | |
def push(self, L, s, i): | |
return (tuple(s), (L, i)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def compile_terminal(key, n_alt, r_pos, r_len, token): | |
if r_len == r_pos: | |
Lnxt = '_' | |
else: | |
Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1) | |
return '''\ | |
elif L == '%s[%d]_%d': | |
if parser.I[i] == '%s': | |
i = i+1 | |
L = '%s' | |
else: | |
L = 'L0' | |
continue | |
''' % (key, n_alt, r_pos, token, Lnxt) | |
def compile_nonterminal(key, n_alt, r_pos, r_len, token): | |
if r_len == r_pos: | |
Lnxt = '_' | |
else: | |
Lnxt = '%s[%d]_%d' % (key, n_alt, r_pos+1) | |
return '''\ | |
elif L == '%s[%d]_%d': | |
s = parser.push('%s', s, i) | |
L = '%s' | |
continue | |
''' % (key, n_alt, r_pos, Lnxt, token) | |
def compile_rule(key, n_alt, rule): | |
res = [] | |
for i, t in enumerate(rule): | |
if (t[0],t[-1]) == ('<', '>'): | |
r = compile_nonterminal(key, n_alt, i, len(rule), t) | |
else: | |
r = compile_terminal(key, n_alt, i, len(rule), t) | |
res.append(r) | |
res.append('''\ | |
elif L == '%s[%d]_%d': | |
L = 'L_' | |
continue | |
''' % (key, n_alt, len(rule))) | |
return '\n'.join(res) | |
def compile_def(key, definition): | |
res = [] | |
res.append('''\ | |
elif L == '%s': | |
''' % key) | |
for n_alt,rule in enumerate(definition): | |
res.append('''\ | |
parser.add( '%s[%d]_0', s, i)''' % (key, n_alt)) | |
res.append(''' | |
L = 'L0' | |
continue''') | |
for n_alt,rule in enumerate(definition): | |
r = compile_rule(key, n_alt, rule) | |
res.append(r) | |
return '\n'.join(res) | |
def compile_grammar(g, start): | |
res = ['''\ | |
def parse_string(parser): | |
L, s, i = '%s', parser.push('L0', [], 0), 0 | |
while True: | |
if L == 'L0': | |
if parser.R: | |
(L, s, i), *parser.R = parser.R | |
if (L, s, i) == ('L0', (), len(parser.I)-1): return 'success' | |
else: continue | |
else: return 'error' | |
elif L == 'L_': | |
s = parser.pop(s, i) | |
L = 'L0' | |
continue | |
''' % start] | |
for k in g: | |
r = compile_def(k, g[k]) | |
res.append(r) | |
res.append('''\ | |
else: | |
assert False | |
if __name__ == '__main__': | |
import simplefuzzer | |
G = { | |
'<S>': [ | |
['<A>', '<S>', 'd'], | |
['<B>', '<S>'], | |
['g', 'p', '<C>'], | |
[]], | |
'<A>': [['a'], ['c']], | |
'<B>': [['a'], ['b']], | |
'<C>': ['c'] | |
} | |
import sys | |
import stack_gll as gll | |
mystr = sys.argv[1] | |
g = gll.GLLParser(mystr) | |
assert parse_string(g) == 'success' | |
gf = simplefuzzer.LimitFuzzer(G) | |
for i in range(1000): | |
s = gf.iter_fuzz(key='<S>', max_depth=100) | |
print(s) | |
g = gll.GLLParser(s+'$') | |
assert parse_string(g) == 'success' | |
''') | |
return '\n'.join(res) | |
if __name__ == '__main__': | |
import simplefuzzer | |
G = { | |
'<S>': [ | |
['<A>', '<S>', 'd'], | |
['<B>', '<S>'], | |
['g', 'p', '<C>'], | |
[]], | |
'<A>': [['a'], ['c']], | |
'<B>': [['a'], ['b']], | |
'<C>': ['c'] | |
} | |
res = compile_grammar(G, '<S>') | |
print(res) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment