drslump · April 10, 2018 07:02
diff --git a/fsm.py b/fsm.py
 import sys, types, dis, struct

 BINARY_SUBSCR = lambda: Op('BINARY_SUBSCR')
 BUILD_TUPLE = lambda x: Op('BUILD_TUPLE', x)
 COMPARE_OP = lambda x: Op('COMPARE_OP', x)
 INPLACE_ADD = lambda: Op('INPLACE_ADD')
 INPLACE_SUBTRACT = lambda: Op('INPLACE_SUBTRACT')
 JUMP_ABSOLUTE = lambda x: Op('JUMP_ABSOLUTE', x)
 LOAD_CONST = lambda x: Op('LOAD_CONST', x)
 LOAD_FAST = lambda x: Op('LOAD_FAST', x)
 POP_JUMP_IF_TRUE = lambda x: Op('POP_JUMP_IF_TRUE', x)
 STORE_FAST = lambda x: Op('STORE_FAST', x)
 RETURN_VALUE = lambda: Op('RETURN_VALUE')

 STATE = lambda x: Op('$STATE', x)


 if sys.version_info < (3,6):
    def pack_opcode(opcode, arg=None):
        if opcode >= dis.HAVE_ARGUMENT:
            assert arg <= 0xFFFF, 'unsupported opcode arg over 16bits'
            return struct.pack('<BH', opcode, arg)
        else:
            return struct.pack('B', opcode)

    def patch_arg(buffer, offset, arg):
        assert arg <= 0xFFFF, 'unsupported opcode arg over 16bits'
        packed = struct.pack('<H', arg)
        buffer[offset + 1] = packed[0]
        buffer[offset + 2] = packed[1]

 else:
    def pack_opcode(opcode, arg=None):
        if opcode >= dis.HAVE_ARGUMENT:
            assert arg <= 0xFFFF, 'unsupported opcode arg over 16bits'
            if arg <= 0xFF:
                return struct.pack('BB', opcode, arg)
            else:
                return struct.pack('BBBB', dis.EXTENDED_ARG, arg>>8, opcode, arg&0xFF)
        else:
            return struct.pack('BB', opcode, 0)

    def patch_arg(buffer, offset, arg):
        assert arg <= 0xFFFF, 'unsupported opcode arg over 16bits'
        # TODO: Parse opcode and adapt extended arg if needed
        if False and arg <= 0xFF:
            buffer[offset+1] = arg
        else:
            buffer[offset+1] = arg >> 8
            buffer[offset+3] = arg & 0xFF


 class Op(object):
    __slots__ = ('opcode', 'arg')

    def __init__(self, opcode, arg=None):
        self.opcode = opcode
        self.arg = arg

    def __iter__(self):
        return iter((self.opcode, self.arg))


 class Ops(list):

    def __lshift__(self, other):
        if isinstance(other, (tuple, list)):
            self.extend(other)
        elif isinstance(other, Op):
            self.append(other)
        elif isinstance(other, Abstract):
            self.extend(other.opcodes())
        else:
            raise TypeError('Unsupported type: ' + type(other))


 class Abstract(object):

    def opcodes(self):
        raise AssertionError('Not implemented')

    def optimize(self):
        pass

    def _encode(self, constnames, varnames):
        """
        Note that constnames and varnames will be muteted
        """
        code = bytearray()
        states = {}
        jumps = []

        # First encode while keeping a registry of jumps and labels
        for opcode, arg in self.opcodes():
            if opcode == '$STATE':
                states[arg] = len(code)
                continue

            if opcode == 'LOAD_CONST':
                try:
                    arg = constnames.index(arg)
                except ValueError:
                    constnames.append(arg)
                    arg = len(constnames) - 1

            if opcode in ('STORE_FAST', 'LOAD_FAST'):
                try:
                    arg = varnames.index(arg)
                except ValueError:
                    varnames.append(arg)
                    arg = len(varnames) - 1

            if opcode == 'COMPARE_OP':
                arg = dis.cmp_op.index(arg)

            opcode = dis.opmap[opcode]

            if opcode in dis.hasjabs and not isinstance(arg, int):
                jumps.append((arg, len(code)))
                arg = 65535  # force a extended_arg on >3.6

            code.extend(pack_opcode(opcode, arg))

        # Now process the jumps to set the correct offsets
        for state, offset in jumps:
            patch_arg(code, offset, states[state])

        return bytes(code)

    def compile(self, name='fsmlex', docblock=None):
        """ Builds a function with the currently configured opcodes
        """
        argnames = ('stream', 'ofs')
        varnames = list(argnames)
        constnames = [docblock]

        code = self._encode(constnames, varnames)

        args = [
            len(argnames),          # co_argcount -> (stream, ofs)
            len(varnames),          # co_nlocals
            2,                      # co_stacksize -> maximum number of values in the stack
            0,                      # co_flags -> only if *args is used
            code,                   # co_code -> compiled bytecode
            tuple(constnames),      # co_consts -> literals in the code (first is docblock)
            (),                     # co_names -> ??? only used for closures?
            tuple(varnames),        # co_varnames -> list of local variables (starting with args)
            name + '.py',           # co_filename,
            name,                   # co_name,
            0,                      # co_firstlineno,
            bytes()                 # co_lnotab
        ]

        if sys.version_info >= (3,0,0):
            args.insert(1, 0)       # co_kwonlyargcount

        co = types.CodeType(*args)

        return types.FunctionType(co, {}, name, (0,))


 class State(Abstract):
    """ Holds the set of actions for a label
    """
    __slots__ = ('label', 'actions')

    def __init__(self, label=None, actions=None):
        self.label = str(label) if label is not None else None
        self.actions = actions if actions else []

    def add(self, *actions):
        self.actions.extend(actions)

    def optimize(self):
        # TODO: Collapse Matches with same target

        for action in self.actions:
            action.optimize()

    def opcodes(self):
        ops = Ops()

        if self.label:
            ops << STATE(self.label)

        for action in self.actions:
            ops << action

        return ops


 class Match(Abstract):
    """ Jumps if the character matches the set of values
    """
    __slots__ = ('value', 'label')

    def __init__(self, values, label):
        self.values = values
        self.label = str(label)

    def opcodes(self):
        ops = Ops()
        # > if ch in self.values
        ops << LOAD_FAST('ch')
        ops << LOAD_CONST(u''.join(self.values))
        if 1 == len(self.values):
            ops << COMPARE_OP('==')
        else:
            ops << COMPARE_OP('in')
        ops << POP_JUMP_IF_TRUE(self.label)
        return ops


 class Jump(Abstract):
    """ Jumps to a specific label
    """
    __slots__ = ('label',)

    def __init__(self, label):
        self.label = str(label)

    def opcodes(self):
        ops = Ops()
        ops << JUMP_ABSOLUTE(self.label)
        return ops


 class Consume(Abstract):
    """ Consumes the next character from the stream
    """
    __slots__ = ('advance',)

    def __init__(self, advance=True):
        self.advance = advance

    def opcodes(self):
        ops = Ops()

        if self.advance:
            ops << Advance()

        ops << LOAD_FAST('stream')
        ops << LOAD_FAST('ofs')
        ops << BINARY_SUBSCR()       # > stream[ofs]
        ops << STORE_FAST('ch')      # > ch = stream[ofs]

        return ops


 class Advance(Abstract):
    """ Advances to the next character in the stream
    """
    __slots__ = ()

    def opcodes(self):
        ops = Ops()
        ops << LOAD_FAST('ofs')
        ops << LOAD_CONST(1)
        ops << INPLACE_ADD()        # > ofs + 1
        ops << STORE_FAST('ofs')    # > ofs = ofs + 1
        return ops


 class Marker(Abstract):
    """ Marks the current offset for a look ahead
    """
    __slots__ = ('mark',)

    def __init__(self, mark):
        self.mark = mark

    def opcodes(self):
        ops = Ops()
        ops << LOAD_CONST(self.mark)
        ops << STORE_FAST('accept')   # > accept = self.mark
        ops << LOAD_FAST('ofs')
        ops << STORE_FAST('marker')   # > marker = ofs
        return ops


 class Backtrack(Abstract):
    """ Backtracks a look ahead
    """
    __slots__ = ()

    def opcodes(self):
        ops = Ops()
        # > ofs = marker
        ops << LOAD_FAST('marker')
        ops << STORE_FAST('ofs')
        return ops


 class Accept(Abstract):
    """ Accepts a look ahead
    """
    __slots__ = ('mark', 'label')

    def __init__(self, mark, label):
        self.mark = mark
        self.label = str(label)

    def opcodes(self):
        ops = Ops()
        # > if accept == self.mark
        ops << LOAD_FAST('accept')
        ops << LOAD_CONST(self.mark)
        ops << COMPARE_OP('==')
        ops << POP_JUMP_IF_TRUE(self.label)
        return ops


 class Produce(Abstract):
    """ Returns the current offset with an optional token
    """
    __slots__ = ('token',)

    def __init__(self, token=None):
        self.token = token

    def opcodes(self):
        ops = Ops()
        # > return (ofs, self.token)
        ops << LOAD_FAST('ofs')
        ops << LOAD_CONST(self.token)
        ops << BUILD_TUPLE(2)
        ops << RETURN_VALUE()
        return ops
diff --git a/fsmlex.py b/fsmlex.py
 from fsm import Accept, Advance, Backtrack, Consume, Jump, Match, Marker, Produce, State

 def factory_grammar():
    fsm = State(None, [
        Consume(False),

        Match(' \t', 'yy4'),
        Match('\n', 'yy7'),
        Match('\r', 'yy10'),
        Match('!', 'yy11'),
        Match('"', 'yy12'),
        Match('%', 'yy13'),
        Match('(', 'yy14'),
        Match(')', 'yy16'),
        Match('*', 'yy18'),
        Match('+', 'yy18'),
        Match('-', 'yy20'),
        Match('.', 'yy21'),
        Match('/', 'yy23'),
        Match(':', 'yy24'),
        Match('?', 'yy26'),
        Match('ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'yy27'),
        Match('[', 'yy30'),
        Match(']', 'yy32'),
        Match('_', 'yy34'),
        Match('abcdefghijklmnopqrstuvwxyz', 'yy27'),
        Match('|', 'yy38'),
        Match('~', 'yy40'),
        Jump('yy2'),

        State('yy2', [
            Advance(),
        ]),
        State('yy3', [
            Produce(None),
        ]),
        State('yy4', [
            Consume(),
            Match(' \t', 'yy4'),
            Jump('yy6')
        ]),
        State('yy6', [
            Produce('WS'),
        ]),
        State('yy7', [
            Consume(),
            Marker(0),
            Match(' \t', 'yy42'),
            Match('\n', 'yy7'),
            Match('\r', 'yy44'),
            Jump('yy9')
        ]),
        State('yy9', [
            Produce('_NL'),
        ]),
        State('yy10', [
            Consume(),
            Match('\n', 'yy7'),
            Jump('yy3'),
        ]),
        State('yy11', [
            Consume(),
            Marker(1),
            Match('?_', 'yy46'),
            Match('abcdefghijklmnopqrstuvwxyz', 'yy35'),
            Jump('yy3'),
        ]),
        State('yy12', [
            Consume(),
            Marker(1),
            Match('\n', 'yy3'),
            Jump('yy48'),
        ]),
        State('yy13', [
            Consume(),
            Marker(1),
            Match('i', 'yy53'),
            Jump('yy3'),
        ]),
        State('yy14', [
            Advance(),
            Produce('_LPAR'),
        ]),
        State('yy16', [
            Advance(),
            Produce('_RPAR'),
        ]),
        State('yy18', [
            Consume(),
            Match('?', 'yy54'),
            Match('abcdefghijklmnopqrstuvwxyz', 'yy55'),
            Jump('yy19'),
        ]),
        State('yy19', [
            Produce('OP'),
        ]),
        State('yy20', [
            Consume(),
            Match('>', 'yy57'),
            Jump('yy3'),
        ]),
        State('yy21', [
            Advance(),
            Produce('_DOT'),
        ]),
        State('yy23', [
            Consume(),
            Marker(1),
            Match('/', 'yy61'),
            Jump('yy59'),
        ]),
        State('yy24', [
            Advance(),
            Produce('_COLON'),
        ]),
        State('yy26', [
            Consume(),
            Match('abcdefghijklmnopqrstuvwxyz', 'yy35'),
            Jump('yy62'),
        ]),

        State('yy27', [
            Consume(),
            Match('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_', 'yy27'),
            Jump('yy29'),
        ]),
        State('yy29', [
            Produce('TOKEN'),
        ]),
        State('yy30', [
            Advance(),
            Produce('_LBRA'),
        ]),
        State('yy32', [
            Advance(),
            Produce('_RBRA'),
        ]),
        State('yy34', [
            Consume(),
            Match('ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'yy27'),
            Match('abcdefghijklmnopqrstuvwxyz', 'yy35'),
            Jump('yy3'),
        ]),
        State('yy35', [
            Consume(),
            Match('0123456789_abcdefghijklmnopqrstuvwxyz', 'yy35'),
            Jump('yy37'),
        ]),
        State('yy37', [
            Produce('RULE'),
        ]),
        State('yy38', [
            Advance(),
            Produce('_OR'),
        ]),
        State('yy40', [
            Advance(),
            Produce('_TILDE'),
        ]),
        State('yy42', [
            Consume(),
            Match(' \t', 'yy42'),
            Jump('yy9'),
        ]),
        State('yy44', [
            Consume(),
            Match('\n', 'yy7'),
            Jump('yy45'),
        ]),
        State('yy45', [
            Backtrack(),
            Accept(0, 'yy9'),
            Accept(1, 'yy3'),
            Accept(2, 'yy50'),
            Jump('yy69'),
        ]),
        State('yy46', [
            Consume(),
            Match('abcdefghijklmnopqrstuvwxyz', 'yy35'),
            Jump('yy45'),
        ]),
        State('yy47', [
            Consume()
        ]),
        State('yy48', [
            Match('\n', 'yy45'),
            Match('"', 'yy49'),
            Match('\\', 'yy51'),
            Jump('yy47'),
        ]),
        State('yy49', [
            Consume(),
            Match('i', 'yy63'),
            Jump('yy50'),
        ]),
        State('yy50', [
            Produce('STRING'),
        ]),
        State('yy51', [
            Consume(),
            Match('\n', 'yy45'),
            Match('"', 'yy64'),
            Match('\\', 'yy51'),
            Jump('yy47'),
        ]),
        State('yy53', [
            Consume(),
            Match('g', 'yy65'),
            Match('m', 'yy66'),
            Jump('yy45'),
        ]),
        State('yy54', [
            Consume(),
            Match('abcdefghijklmnopqrstuvwxyz', 'yy55'),
            Jump('yy19'),
        ]),
        State('yy55', [
            Advance(),
            Produce('OP'),
        ]),
        State('yy57', [
            Advance(),
            Produce('_TO'),
        ]),
        State('yy59', [
            Consume(),
            Match('\n', 'yy45'),
            Match('/', 'yy67'),
            Match('\\', 'yy70'),
            Jump('yy59'),
        ]),
        State('yy61', [
            Consume(),
            Match('\n', 'yy45'),
            Jump('yy72'),
        ]),
        State('yy62', [
            Advance(),
            Jump('yy19'),
        ]),
        State('yy63', [
            Advance(),
            Jump('yy50'),
        ]),
        State('yy64', [
            Consume(),
            Marker(2),
            Match('\n', 'yy50'),
            Match('"', 'yy49'),
            Match('\\', 'yy51'),
            Match('i', 'yy74'),
            Jump('yy47'),
        ]),
        State('yy65', [
            Consume(),
            Match('n', 'yy75'),
            Jump('yy45'),
        ]),
        State('yy66', [
            Consume(),
            Match('p', 'yy76'),
            Jump('yy45'),
        ]),
        State('yy67', [
            Consume(),
            Match('ilmsux', 'yy67'),
            Jump('yy69'),
        ]),
        State('yy69', [
            Produce('REGEXP'),
        ]),
        State('yy70', [
            Consume(),
            Match('\n', 'yy45'),
            Match('/', 'yy77'),
            Match('\\', 'yy70'),
            Jump('yy59'),
        ]),
        State('yy72', [
            Produce('COMMENT'),
        ]),
        State('yy74', [
            Consume(),
            Marker(2),
            Match('\n', 'yy50'),
            Match('"', 'yy49'),
            Match('\\', 'yy51'),
            Jump('yy47'),
        ]),
        State('yy75', [
            Consume(),
            Match('o', 'yy79'),
            Jump('yy45'),
        ]),
        State('yy76', [
            Consume(),
            Match('o', 'yy80'),
            Jump('yy45'),
        ]),
        State('yy77', [
            Consume(),
            Marker(3),
            Match('\n', 'yy69'),
            Match('/', 'yy67'),
            Match('\\', 'yy70'),
            Match('ilmsux', 'yy77'),
            Jump('yy59'),
        ]),
        State('yy79', [
            Consume(),
            Match('r', 'yy81'),
            Jump('yy45'),
        ]),
        State('yy80', [
            Consume(),
            Match('r', 'yy82'),
            Jump('yy45'),
        ]),
        State('yy81', [
            Consume(),
            Match('e', 'yy83'),
            Jump('yy45'),
        ]),
        State('yy82', [
            Consume(),
            Match('t', 'yy85'),
            Jump('yy45'),
        ]),
        State('yy83', [
            Advance(),
            Produce('_IGNORE'),
        ]),
        State('yy85', [
            Advance(),
            Produce('_IMPORT')
        ]),
    ])

    return fsm.compile()


 lex = factory_grammar()

 def genlex(stream):
    ofs = 0
    length = len(stream)
    while ofs < length:
        try:
            pos, token = lex(stream, ofs)
            yield ofs, token, stream[ofs:pos]
        except IndexError:
            break
        ofs = pos


 with open('lark/grammars/common.g') as fd:
    lines = []
    for line in fd:
        if not line.startswith('//'):
            lines.append(line)

    stream = u''.join(lines) * 100

    def lexit():
        for ofs, token, value in genlex(stream):
            pass


    from timeit import timeit
    print(timeit(lexit, number=100))

diff --git a/re2c.re b/re2c.re
 /* Generated by re2c 1.0.3 on Tue Apr 10 09:00:46 2018 */
 #line 1 "/Users/drslump/tmp/test.re2c"

 static bool lex(const char *s, unsigned long &u)
 {
    const char *YYMARKER;
    const char *YYCTXMARKER;
    int c = yycinit;
    u = 0;
    
 #line 17 "grammar.cc"
 {
 	char yych;
 	unsigned int yyaccept = 0;
 	yych = *s;
 	switch (yych) {
 	case '\t':
 	case ' ':	goto yy4;
 	case '\n':	goto yy7;
 	case '\r':	goto yy10;
 	case '!':	goto yy11;
 	case '"':	goto yy12;
 	case '%':	goto yy13;
 	case '(':	goto yy14;
 	case ')':	goto yy16;
 	case '*':
 	case '+':	goto yy18;
 	case '-':	goto yy20;
 	case '.':	goto yy21;
 	case '/':	goto yy23;
 	case ':':	goto yy24;
 	case '?':	goto yy26;
 	case 'A':
 	case 'B':
 	case 'C':
 	case 'D':
 	case 'E':
 	case 'F':
 	case 'G':
 	case 'H':
 	case 'I':
 	case 'J':
 	case 'K':
 	case 'L':
 	case 'M':
 	case 'N':
 	case 'O':
 	case 'P':
 	case 'Q':
 	case 'R':
 	case 'S':
 	case 'T':
 	case 'U':
 	case 'V':
 	case 'W':
 	case 'X':
 	case 'Y':
 	case 'Z':	goto yy27;
 	case '[':	goto yy30;
 	case ']':	goto yy32;
 	case '_':	goto yy34;
 	case 'a':
 	case 'b':
 	case 'c':
 	case 'd':
 	case 'e':
 	case 'f':
 	case 'g':
 	case 'h':
 	case 'i':
 	case 'j':
 	case 'k':
 	case 'l':
 	case 'm':
 	case 'n':
 	case 'o':
 	case 'p':
 	case 'q':
 	case 'r':
 	case 's':
 	case 't':
 	case 'u':
 	case 'v':
 	case 'w':
 	case 'x':
 	case 'y':
 	case 'z':	goto yy35;
 	case '|':	goto yy38;
 	case '~':	goto yy40;
 	default:	goto yy2;
 	}
 yy2:
 	++s;
 yy3:
 #line 40 "/Users/drslump/tmp/test.re2c"
 	{ return false; }
 #line 103 "grammar.cc"
 yy4:
 	yych = *++s;
 	switch (yych) {
 	case '\t':
 	case ' ':	goto yy4;
 	default:	goto yy6;
 	}
 yy6:
 #line 25 "/Users/drslump/tmp/test.re2c"
 	{ "WS" }
 #line 114 "grammar.cc"
 yy7:
 	yyaccept = 0;
 	yych = *(YYMARKER = ++s);
 	switch (yych) {
 	case '\t':
 	case ' ':	goto yy42;
 	case '\n':	goto yy7;
 	case '\r':	goto yy44;
 	default:	goto yy9;
 	}
 yy9:
 #line 19 "/Users/drslump/tmp/test.re2c"
 	{ "NL" }
 #line 128 "grammar.cc"
 yy10:
 	yych = *++s;
 	switch (yych) {
 	case '\n':	goto yy7;
 	default:	goto yy3;
 	}
 yy11:
 	yyaccept = 1;
 	yych = *(YYMARKER = ++s);
 	switch (yych) {
 	case '?':
 	case '_':	goto yy46;
 	case 'a':
 	case 'b':
 	case 'c':
 	case 'd':
 	case 'e':
 	case 'f':
 	case 'g':
 	case 'h':
 	case 'i':
 	case 'j':
 	case 'k':
 	case 'l':
 	case 'm':
 	case 'n':
 	case 'o':
 	case 'p':
 	case 'q':
 	case 'r':
 	case 's':
 	case 't':
 	case 'u':
 	case 'v':
 	case 'w':
 	case 'x':
 	case 'y':
 	case 'z':	goto yy35;
 	default:	goto yy3;
 	}
 yy12:
 	yyaccept = 1;
 	yych = *(YYMARKER = ++s);
 	switch (yych) {
 	case '\n':	goto yy3;
 	default:	goto yy48;
 	}
 yy13:
 	yyaccept = 1;
 	yych = *(YYMARKER = ++s);
 	switch (yych) {
 	case 'i':	goto yy53;
 	default:	goto yy3;
 	}
 yy14:
 	++s;
 #line 33 "/Users/drslump/tmp/test.re2c"
 	{ "LPAR" }
 #line 187 "grammar.cc"
 yy16:
 	++s;
 #line 36 "/Users/drslump/tmp/test.re2c"
 	{ "RPAR" }
 #line 192 "grammar.cc"
 yy18:
 	yych = *++s;
 	switch (yych) {
 	case '?':	goto yy54;
 	case 'a':
 	case 'b':
 	case 'c':
 	case 'd':
 	case 'e':
 	case 'f':
 	case 'g':
 	case 'h':
 	case 'i':
 	case 'j':
 	case 'k':
 	case 'l':
 	case 'm':
 	case 'n':
 	case 'o':
 	case 'p':
 	case 'q':
 	case 'r':
 	case 's':
 	case 't':
 	case 'u':
 	case 'v':
 	case 'w':
 	case 'x':
 	case 'y':
 	case 'z':	goto yy55;
 	default:	goto yy19;
 	}
 yy19:
 #line 29 "/Users/drslump/tmp/test.re2c"
 	{ "OP" }
 #line 228 "grammar.cc"
 yy20:
 	yych = *++s;
 	switch (yych) {
 	case '>':	goto yy57;
 	default:	goto yy3;
 	}
 yy21:
 	++s;
 #line 31 "/Users/drslump/tmp/test.re2c"
 	{ "DOT" }
 #line 239 "grammar.cc"
 yy23:
 	yyaccept = 1;
 	yych = *(YYMARKER = ++s);
 	switch (yych) {
 	case '/':	goto yy61;
 	default:	goto yy59;
 	}
 yy24:
 	++s;
 #line 38 "/Users/drslump/tmp/test.re2c"
 	{ return "COLON"; }
 #line 251 "grammar.cc"
 yy26:
 	yych = *++s;
 	switch (yych) {
 	case 'a':
 	case 'b':
 	case 'c':
 	case 'd':
 	case 'e':
 	case 'f':
 	case 'g':
 	case 'h':
 	case 'i':
 	case 'j':
 	case 'k':
 	case 'l':
 	case 'm':
 	case 'n':
 	case 'o':
 	case 'p':
 	case 'q':
 	case 'r':
 	case 's':
 	case 't':
 	case 'u':
 	case 'v':
 	case 'w':
 	case 'x':
 	case 'y':
 	case 'z':	goto yy35;
 	default:	goto yy62;
 	}
 yy27:
 	yych = *++s;
 	switch (yych) {
 	case '0':
 	case '1':
 	case '2':
 	case '3':
 	case '4':
 	case '5':
 	case '6':
 	case '7':
 	case '8':
 	case '9':
 	case 'A':
 	case 'B':
 	case 'C':
 	case 'D':
 	case 'E':
 	case 'F':
 	case 'G':
 	case 'H':
 	case 'I':
 	case 'J':
 	case 'K':
 	case 'L':
 	case 'M':
 	case 'N':
 	case 'O':
 	case 'P':
 	case 'Q':
 	case 'R':
 	case 'S':
 	case 'T':
 	case 'U':
 	case 'V':
 	case 'W':
 	case 'X':
 	case 'Y':
 	case 'Z':
 	case '_':	goto yy27;
 	default:	goto yy29;
 	}
 yy29:
 #line 23 "/Users/drslump/tmp/test.re2c"
 	{ "TOKEN" }
 #line 328 "grammar.cc"
 yy30:
 	++s;
 #line 32 "/Users/drslump/tmp/test.re2c"
 	{ "LBRA" }
 #line 333 "grammar.cc"
 yy32:
 	++s;
 #line 35 "/Users/drslump/tmp/test.re2c"
 	{ "RBRA" }
 #line 338 "grammar.cc"
 yy34:
 	yych = *++s;
 	switch (yych) {
 	case 'A':
 	case 'B':
 	case 'C':
 	case 'D':
 	case 'E':
 	case 'F':
 	case 'G':
 	case 'H':
 	case 'I':
 	case 'J':
 	case 'K':
 	case 'L':
 	case 'M':
 	case 'N':
 	case 'O':
 	case 'P':
 	case 'Q':
 	case 'R':
 	case 'S':
 	case 'T':
 	case 'U':
 	case 'V':
 	case 'W':
 	case 'X':
 	case 'Y':
 	case 'Z':	goto yy27;
 	case 'a':
 	case 'b':
 	case 'c':
 	case 'd':
 	case 'e':
 	case 'f':
 	case 'g':
 	case 'h':
 	case 'i':
 	case 'j':
 	case 'k':
 	case 'l':
 	case 'm':
 	case 'n':
 	case 'o':
 	case 'p':
 	case 'q':
 	case 'r':
 	case 's':
 	case 't':
 	case 'u':
 	case 'v':
 	case 'w':
 	case 'x':
 	case 'y':
 	case 'z':	goto yy35;
 	default:	goto yy3;
 	}
 yy35:
 	yych = *++s;
 	switch (yych) {
 	case '0':
 	case '1':
 	case '2':
 	case '3':
 	case '4':
 	case '5':
 	case '6':
 	case '7':
 	case '8':
 	case '9':
 	case '_':
 	case 'a':
 	case 'b':
 	case 'c':
 	case 'd':
 	case 'e':
 	case 'f':
 	case 'g':
 	case 'h':
 	case 'i':
 	case 'j':
 	case 'k':
 	case 'l':
 	case 'm':
 	case 'n':
 	case 'o':
 	case 'p':
 	case 'q':
 	case 'r':
 	case 's':
 	case 't':
 	case 'u':
 	case 'v':
 	case 'w':
 	case 'x':
 	case 'y':
 	case 'z':	goto yy35;
 	default:	goto yy37;
 	}
 yy37:
 #line 22 "/Users/drslump/tmp/test.re2c"
 	{ "RULE" }
 #line 441 "grammar.cc"
 yy38:
 	++s;
 #line 34 "/Users/drslump/tmp/test.re2c"
 	{ "OR" }
 #line 446 "grammar.cc"
 yy40:
 	++s;
 #line 37 "/Users/drslump/tmp/test.re2c"
 	{ "TILDE" }
 #line 451 "grammar.cc"
 yy42:
 	yych = *++s;
 	switch (yych) {
 	case '\t':
 	case ' ':	goto yy42;
 	default:	goto yy9;
 	}
 yy44:
 	yych = *++s;
 	switch (yych) {
 	case '\n':	goto yy7;
 	default:	goto yy45;
 	}
 yy45:
 	s = YYMARKER;
 	switch (yyaccept) {
 	case 0: 	goto yy9;
 	case 1: 	goto yy3;
 	case 2: 	goto yy50;
 	default:	goto yy69;
 	}
 yy46:
 	yych = *++s;
 	switch (yych) {
 	case 'a':
 	case 'b':
 	case 'c':
 	case 'd':
 	case 'e':
 	case 'f':
 	case 'g':
 	case 'h':
 	case 'i':
 	case 'j':
 	case 'k':
 	case 'l':
 	case 'm':
 	case 'n':
 	case 'o':
 	case 'p':
 	case 'q':
 	case 'r':
 	case 's':
 	case 't':
 	case 'u':
 	case 'v':
 	case 'w':
 	case 'x':
 	case 'y':
 	case 'z':	goto yy35;
 	default:	goto yy45;
 	}
 yy47:
 	yych = *++s;
 yy48:
 	switch (yych) {
 	case '\n':	goto yy45;
 	case '"':	goto yy49;
 	case '\\':	goto yy51;
 	default:	goto yy47;
 	}
 yy49:
 	yych = *++s;
 	switch (yych) {
 	case 'i':	goto yy63;
 	default:	goto yy50;
 	}
 yy50:
 #line 21 "/Users/drslump/tmp/test.re2c"
 	{ "STRING" }
 #line 522 "grammar.cc"
 yy51:
 	yych = *++s;
 	switch (yych) {
 	case '\n':	goto yy45;
 	case '"':	goto yy64;
 	case '\\':	goto yy51;
 	default:	goto yy47;
 	}
 yy53:
 	yych = *++s;
 	switch (yych) {
 	case 'g':	goto yy65;
 	case 'm':	goto yy66;
 	default:	goto yy45;
 	}
 yy54:
 	yych = *++s;
 	switch (yych) {
 	case 'a':
 	case 'b':
 	case 'c':
 	case 'd':
 	case 'e':
 	case 'f':
 	case 'g':
 	case 'h':
 	case 'i':
 	case 'j':
 	case 'k':
 	case 'l':
 	case 'm':
 	case 'n':
 	case 'o':
 	case 'p':
 	case 'q':
 	case 'r':
 	case 's':
 	case 't':
 	case 'u':
 	case 'v':
 	case 'w':
 	case 'x':
 	case 'y':
 	case 'z':	goto yy55;
 	default:	goto yy19;
 	}
 yy55:
 	++s;
 	s -= 1;
 #line 39 "/Users/drslump/tmp/test.re2c"
 	{ return "OP"; }
 #line 574 "grammar.cc"
 yy57:
 	++s;
 #line 30 "/Users/drslump/tmp/test.re2c"
 	{ "TO" }
 #line 579 "grammar.cc"
 yy59:
 	yych = *++s;
 	switch (yych) {
 	case '\n':	goto yy45;
 	case '/':	goto yy67;
 	case '\\':	goto yy70;
 	default:	goto yy59;
 	}
 yy61:
 	yych = *++s;
 	switch (yych) {
 	case '\n':	goto yy45;
 	default:	goto yy72;
 	}
 yy62:
 	++s;
 	goto yy19;
 yy63:
 	++s;
 	goto yy50;
 yy64:
 	yyaccept = 2;
 	yych = *(YYMARKER = ++s);
 	switch (yych) {
 	case '\n':	goto yy50;
 	case '"':	goto yy49;
 	case '\\':	goto yy51;
 	case 'i':	goto yy75;
 	default:	goto yy47;
 	}
 yy65:
 	yych = *++s;
 	switch (yych) {
 	case 'n':	goto yy76;
 	default:	goto yy45;
 	}
 yy66:
 	yych = *++s;
 	switch (yych) {
 	case 'p':	goto yy77;
 	default:	goto yy45;
 	}
 yy67:
 	yych = *++s;
 	switch (yych) {
 	case 'i':
 	case 'l':
 	case 'm':
 	case 's':
 	case 'u':
 	case 'x':	goto yy67;
 	default:	goto yy69;
 	}
 yy69:
 #line 20 "/Users/drslump/tmp/test.re2c"
 	{ "REGEXP" }
 #line 636 "grammar.cc"
 yy70:
 	yych = *++s;
 	switch (yych) {
 	case '\n':	goto yy45;
 	case '/':	goto yy78;
 	case '\\':	goto yy70;
 	default:	goto yy59;
 	}
 yy72:
 	yych = *++s;
 	switch (yych) {
 	case '\n':	goto yy74;
 	default:	goto yy72;
 	}
 yy74:
 #line 24 "/Users/drslump/tmp/test.re2c"
 	{ "COMMENT" }
 #line 654 "grammar.cc"
 yy75:
 	yyaccept = 2;
 	yych = *(YYMARKER = ++s);
 	switch (yych) {
 	case '\n':	goto yy50;
 	case '"':	goto yy49;
 	case '\\':	goto yy51;
 	default:	goto yy47;
 	}
 yy76:
 	yych = *++s;
 	switch (yych) {
 	case 'o':	goto yy80;
 	default:	goto yy45;
 	}
 yy77:
 	yych = *++s;
 	switch (yych) {
 	case 'o':	goto yy81;
 	default:	goto yy45;
 	}
 yy78:
 	yyaccept = 3;
 	yych = *(YYMARKER = ++s);
 	switch (yych) {
 	case '\n':	goto yy69;
 	case '/':	goto yy67;
 	case '\\':	goto yy70;
 	case 'i':
 	case 'l':
 	case 'm':
 	case 's':
 	case 'u':
 	case 'x':	goto yy78;
 	default:	goto yy59;
 	}
 yy80:
 	yych = *++s;
 	switch (yych) {
 	case 'r':	goto yy82;
 	default:	goto yy45;
 	}
 yy81:
 	yych = *++s;
 	switch (yych) {
 	case 'r':	goto yy83;
 	default:	goto yy45;
 	}
 yy82:
 	yych = *++s;
 	switch (yych) {
 	case 'e':	goto yy84;
 	default:	goto yy45;
 	}
 yy83:
 	yych = *++s;
 	switch (yych) {
 	case 't':	goto yy86;
 	default:	goto yy45;
 	}
 yy84:
 	++s;
 #line 27 "/Users/drslump/tmp/test.re2c"
 	{ "IGNORE" }
 #line 719 "grammar.cc"
 yy86:
 	++s;
 #line 28 "/Users/drslump/tmp/test.re2c"
 	{ "IMPORT" }
 #line 724 "grammar.cc"
 }
 #line 42 "/Users/drslump/tmp/test.re2c"

 }
diff --git a/relex.py b/relex.py
 import re

 rex = re.compile(r'''
    (?P<_NL>(\r?\n)+\s*)
    |(?P<STRING>"(\\"|\\\\|[^"\n])*?"i?)
    |(?P<RULE>!?[_?]?[a-z][_a-z0-9]*)
    |(?P<TOKEN>_?[A-Z][_A-Z0-9]*)
    |(?P<COMMENT>//[^\n]*)
    |(?P<WS>[ \t]+)
    |(?P<NUMBER>\d+)
    |(?P<_IGNORE>%ignore)
    |(?P<_IMPORT>%import)
    |(?P<OP>[+*][?]?|[?](?![a-z]))
    |(?P<_TO>->)
    |(?P<_DOT>\.)
    |(?P<_LBRA>\[)
    |(?P<_LPAR>\()
    |(?P<_OR>\|)
    |(?P<_RBRA>\])
    |(?P<_RPAR>\))
    |(?P<TILDE>~)
    |(?P<_COLON>:)
 ''', re.X | re.U)
 types = {1: u'_NL', 3: u'STRING', 5: u'RULE', 6: u'TOKEN', 7: u'COMMENT', 8: u'WS', 9: u'NUMBER',
 10: u'_IGNORE', 11: u'_IMPORT', 12: u'OP', 13: u'_TO', 14: u'_DOT', 15: u'_LBRA', 16: u'_LPAR',
 17: u'_OR', 18: u'_RBRA', 19: u'_RPAR', 20: u'TILDE', 21: u'_COLON'}

 def relex(stream):
    ofs = 0
    while ofs < len(stream):
        m = rex.match(stream, ofs)
        if m:
            token = types[m.lastindex]
            end = m.end()
            value = m.group(0)
            yield ofs, token, value
            ofs = end
            continue

        ofs += 1
        
        
 with open('lark/grammars/common.g') as fd:
    lines = []
    for line in fd:
        if not line.startswith('//'):
            lines.append(line)

    data = u''.join(lines) * 100
    
    def lexit():
        for pos, token, value in relex(data):
            pass
        
    from timeit import timeit
    print(timeit(lexit, number=100))
	import sys, types, dis, struct

	BINARY_SUBSCR = lambda: Op('BINARY_SUBSCR')
	BUILD_TUPLE = lambda x: Op('BUILD_TUPLE', x)
	COMPARE_OP = lambda x: Op('COMPARE_OP', x)
	INPLACE_ADD = lambda: Op('INPLACE_ADD')
	INPLACE_SUBTRACT = lambda: Op('INPLACE_SUBTRACT')
	JUMP_ABSOLUTE = lambda x: Op('JUMP_ABSOLUTE', x)
	LOAD_CONST = lambda x: Op('LOAD_CONST', x)
	LOAD_FAST = lambda x: Op('LOAD_FAST', x)
	POP_JUMP_IF_TRUE = lambda x: Op('POP_JUMP_IF_TRUE', x)
	STORE_FAST = lambda x: Op('STORE_FAST', x)
	RETURN_VALUE = lambda: Op('RETURN_VALUE')

	STATE = lambda x: Op('$STATE', x)


	if sys.version_info < (3,6):
	def pack_opcode(opcode, arg=None):
	if opcode >= dis.HAVE_ARGUMENT:
	assert arg <= 0xFFFF, 'unsupported opcode arg over 16bits'
	return struct.pack('<BH', opcode, arg)
	else:
	return struct.pack('B', opcode)

	def patch_arg(buffer, offset, arg):
	assert arg <= 0xFFFF, 'unsupported opcode arg over 16bits'
	packed = struct.pack('<H', arg)
	buffer[offset + 1] = packed[0]
	buffer[offset + 2] = packed[1]

	else:
	def pack_opcode(opcode, arg=None):
	if opcode >= dis.HAVE_ARGUMENT:
	assert arg <= 0xFFFF, 'unsupported opcode arg over 16bits'
	if arg <= 0xFF:
	return struct.pack('BB', opcode, arg)
	else:
	return struct.pack('BBBB', dis.EXTENDED_ARG, arg>>8, opcode, arg&0xFF)
	else:
	return struct.pack('BB', opcode, 0)

	def patch_arg(buffer, offset, arg):
	assert arg <= 0xFFFF, 'unsupported opcode arg over 16bits'
	# TODO: Parse opcode and adapt extended arg if needed
	if False and arg <= 0xFF:
	buffer[offset+1] = arg
	else:
	buffer[offset+1] = arg >> 8
	buffer[offset+3] = arg & 0xFF


	class Op(object):
	__slots__ = ('opcode', 'arg')

	def __init__(self, opcode, arg=None):
	self.opcode = opcode
	self.arg = arg

	def __iter__(self):
	return iter((self.opcode, self.arg))


	class Ops(list):

	def __lshift__(self, other):
	if isinstance(other, (tuple, list)):
	self.extend(other)
	elif isinstance(other, Op):
	self.append(other)
	elif isinstance(other, Abstract):
	self.extend(other.opcodes())
	else:
	raise TypeError('Unsupported type: ' + type(other))


	class Abstract(object):

	def opcodes(self):
	raise AssertionError('Not implemented')

	def optimize(self):
	pass

	def _encode(self, constnames, varnames):
	"""
	Note that constnames and varnames will be muteted
	"""
	code = bytearray()
	states = {}
	jumps = []

	# First encode while keeping a registry of jumps and labels
	for opcode, arg in self.opcodes():
	if opcode == '$STATE':
	states[arg] = len(code)
	continue

	if opcode == 'LOAD_CONST':
	try:
	arg = constnames.index(arg)
	except ValueError:
	constnames.append(arg)
	arg = len(constnames) - 1

	if opcode in ('STORE_FAST', 'LOAD_FAST'):
	try:
	arg = varnames.index(arg)
	except ValueError:
	varnames.append(arg)
	arg = len(varnames) - 1

	if opcode == 'COMPARE_OP':
	arg = dis.cmp_op.index(arg)

	opcode = dis.opmap[opcode]

	if opcode in dis.hasjabs and not isinstance(arg, int):
	jumps.append((arg, len(code)))
	arg = 65535 # force a extended_arg on >3.6

	code.extend(pack_opcode(opcode, arg))

	# Now process the jumps to set the correct offsets
	for state, offset in jumps:
	patch_arg(code, offset, states[state])

	return bytes(code)

	def compile(self, name='fsmlex', docblock=None):
	""" Builds a function with the currently configured opcodes
	"""
	argnames = ('stream', 'ofs')
	varnames = list(argnames)
	constnames = [docblock]

	code = self._encode(constnames, varnames)

	args = [
	len(argnames), # co_argcount -> (stream, ofs)
	len(varnames), # co_nlocals
	2, # co_stacksize -> maximum number of values in the stack
	0, # co_flags -> only if *args is used
	code, # co_code -> compiled bytecode
	tuple(constnames), # co_consts -> literals in the code (first is docblock)
	(), # co_names -> ??? only used for closures?
	tuple(varnames), # co_varnames -> list of local variables (starting with args)
	name + '.py', # co_filename,
	name, # co_name,
	0, # co_firstlineno,
	bytes() # co_lnotab
	]

	if sys.version_info >= (3,0,0):
	args.insert(1, 0) # co_kwonlyargcount

	co = types.CodeType(*args)

	return types.FunctionType(co, {}, name, (0,))


	class State(Abstract):
	""" Holds the set of actions for a label
	"""
	__slots__ = ('label', 'actions')

	def __init__(self, label=None, actions=None):
	self.label = str(label) if label is not None else None
	self.actions = actions if actions else []

	def add(self, *actions):
	self.actions.extend(actions)

	def optimize(self):
	# TODO: Collapse Matches with same target

	for action in self.actions:
	action.optimize()

	def opcodes(self):
	ops = Ops()

	if self.label:
	ops << STATE(self.label)

	for action in self.actions:
	ops << action

	return ops


	class Match(Abstract):
	""" Jumps if the character matches the set of values
	"""
	__slots__ = ('value', 'label')

	def __init__(self, values, label):
	self.values = values
	self.label = str(label)

	def opcodes(self):
	ops = Ops()
	# > if ch in self.values
	ops << LOAD_FAST('ch')
	ops << LOAD_CONST(u''.join(self.values))
	if 1 == len(self.values):
	ops << COMPARE_OP('==')
	else:
	ops << COMPARE_OP('in')
	ops << POP_JUMP_IF_TRUE(self.label)
	return ops


	class Jump(Abstract):
	""" Jumps to a specific label
	"""
	__slots__ = ('label',)

	def __init__(self, label):
	self.label = str(label)

	def opcodes(self):
	ops = Ops()
	ops << JUMP_ABSOLUTE(self.label)
	return ops


	class Consume(Abstract):
	""" Consumes the next character from the stream
	"""
	__slots__ = ('advance',)

	def __init__(self, advance=True):
	self.advance = advance

	def opcodes(self):
	ops = Ops()

	if self.advance:
	ops << Advance()

	ops << LOAD_FAST('stream')
	ops << LOAD_FAST('ofs')
	ops << BINARY_SUBSCR() # > stream[ofs]
	ops << STORE_FAST('ch') # > ch = stream[ofs]

	return ops


	class Advance(Abstract):
	""" Advances to the next character in the stream
	"""
	__slots__ = ()

	def opcodes(self):
	ops = Ops()
	ops << LOAD_FAST('ofs')
	ops << LOAD_CONST(1)
	ops << INPLACE_ADD() # > ofs + 1
	ops << STORE_FAST('ofs') # > ofs = ofs + 1
	return ops


	class Marker(Abstract):
	""" Marks the current offset for a look ahead
	"""
	__slots__ = ('mark',)

	def __init__(self, mark):
	self.mark = mark

	def opcodes(self):
	ops = Ops()
	ops << LOAD_CONST(self.mark)
	ops << STORE_FAST('accept') # > accept = self.mark
	ops << LOAD_FAST('ofs')
	ops << STORE_FAST('marker') # > marker = ofs
	return ops


	class Backtrack(Abstract):
	""" Backtracks a look ahead
	"""
	__slots__ = ()

	def opcodes(self):
	ops = Ops()
	# > ofs = marker
	ops << LOAD_FAST('marker')
	ops << STORE_FAST('ofs')
	return ops


	class Accept(Abstract):
	""" Accepts a look ahead
	"""
	__slots__ = ('mark', 'label')

	def __init__(self, mark, label):
	self.mark = mark
	self.label = str(label)

	def opcodes(self):
	ops = Ops()
	# > if accept == self.mark
	ops << LOAD_FAST('accept')
	ops << LOAD_CONST(self.mark)
	ops << COMPARE_OP('==')
	ops << POP_JUMP_IF_TRUE(self.label)
	return ops


	class Produce(Abstract):
	""" Returns the current offset with an optional token
	"""
	__slots__ = ('token',)

	def __init__(self, token=None):
	self.token = token

	def opcodes(self):
	ops = Ops()
	# > return (ofs, self.token)
	ops << LOAD_FAST('ofs')
	ops << LOAD_CONST(self.token)
	ops << BUILD_TUPLE(2)
	ops << RETURN_VALUE()
	return ops
	from fsm import Accept, Advance, Backtrack, Consume, Jump, Match, Marker, Produce, State

	def factory_grammar():
	fsm = State(None, [
	Consume(False),

	Match(' \t', 'yy4'),
	Match('\n', 'yy7'),
	Match('\r', 'yy10'),
	Match('!', 'yy11'),
	Match('"', 'yy12'),
	Match('%', 'yy13'),
	Match('(', 'yy14'),
	Match(')', 'yy16'),
	Match('*', 'yy18'),
	Match('+', 'yy18'),
	Match('-', 'yy20'),
	Match('.', 'yy21'),
	Match('/', 'yy23'),
	Match(':', 'yy24'),
	Match('?', 'yy26'),
	Match('ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'yy27'),
	Match('[', 'yy30'),
	Match(']', 'yy32'),
	Match('_', 'yy34'),
	Match('abcdefghijklmnopqrstuvwxyz', 'yy27'),
	Match('\|', 'yy38'),
	Match('~', 'yy40'),
	Jump('yy2'),

	State('yy2', [
	Advance(),
	]),
	State('yy3', [
	Produce(None),
	]),
	State('yy4', [
	Consume(),
	Match(' \t', 'yy4'),
	Jump('yy6')
	]),
	State('yy6', [
	Produce('WS'),
	]),
	State('yy7', [
	Consume(),
	Marker(0),
	Match(' \t', 'yy42'),
	Match('\n', 'yy7'),
	Match('\r', 'yy44'),
	Jump('yy9')
	]),
	State('yy9', [
	Produce('_NL'),
	]),
	State('yy10', [
	Consume(),
	Match('\n', 'yy7'),
	Jump('yy3'),
	]),
	State('yy11', [
	Consume(),
	Marker(1),
	Match('?_', 'yy46'),
	Match('abcdefghijklmnopqrstuvwxyz', 'yy35'),
	Jump('yy3'),
	]),
	State('yy12', [
	Consume(),
	Marker(1),
	Match('\n', 'yy3'),
	Jump('yy48'),
	]),
	State('yy13', [
	Consume(),
	Marker(1),
	Match('i', 'yy53'),
	Jump('yy3'),
	]),
	State('yy14', [
	Advance(),
	Produce('_LPAR'),
	]),
	State('yy16', [
	Advance(),
	Produce('_RPAR'),
	]),
	State('yy18', [
	Consume(),
	Match('?', 'yy54'),
	Match('abcdefghijklmnopqrstuvwxyz', 'yy55'),
	Jump('yy19'),
	]),
	State('yy19', [
	Produce('OP'),
	]),
	State('yy20', [
	Consume(),
	Match('>', 'yy57'),
	Jump('yy3'),
	]),
	State('yy21', [
	Advance(),
	Produce('_DOT'),
	]),
	State('yy23', [
	Consume(),
	Marker(1),
	Match('/', 'yy61'),
	Jump('yy59'),
	]),
	State('yy24', [
	Advance(),
	Produce('_COLON'),
	]),
	State('yy26', [
	Consume(),
	Match('abcdefghijklmnopqrstuvwxyz', 'yy35'),
	Jump('yy62'),
	]),

	State('yy27', [
	Consume(),
	Match('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_', 'yy27'),
	Jump('yy29'),
	]),
	State('yy29', [
	Produce('TOKEN'),
	]),
	State('yy30', [
	Advance(),
	Produce('_LBRA'),
	]),
	State('yy32', [
	Advance(),
	Produce('_RBRA'),
	]),
	State('yy34', [
	Consume(),
	Match('ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'yy27'),
	Match('abcdefghijklmnopqrstuvwxyz', 'yy35'),
	Jump('yy3'),
	]),
	State('yy35', [
	Consume(),
	Match('0123456789_abcdefghijklmnopqrstuvwxyz', 'yy35'),
	Jump('yy37'),
	]),
	State('yy37', [
	Produce('RULE'),
	]),
	State('yy38', [
	Advance(),
	Produce('_OR'),
	]),
	State('yy40', [
	Advance(),
	Produce('_TILDE'),
	]),
	State('yy42', [
	Consume(),
	Match(' \t', 'yy42'),
	Jump('yy9'),
	]),
	State('yy44', [
	Consume(),
	Match('\n', 'yy7'),
	Jump('yy45'),
	]),
	State('yy45', [
	Backtrack(),
	Accept(0, 'yy9'),
	Accept(1, 'yy3'),
	Accept(2, 'yy50'),
	Jump('yy69'),
	]),
	State('yy46', [
	Consume(),
	Match('abcdefghijklmnopqrstuvwxyz', 'yy35'),
	Jump('yy45'),
	]),
	State('yy47', [
	Consume()
	]),
	State('yy48', [
	Match('\n', 'yy45'),
	Match('"', 'yy49'),
	Match('\\', 'yy51'),
	Jump('yy47'),
	]),
	State('yy49', [
	Consume(),
	Match('i', 'yy63'),
	Jump('yy50'),
	]),
	State('yy50', [
	Produce('STRING'),
	]),
	State('yy51', [
	Consume(),
	Match('\n', 'yy45'),
	Match('"', 'yy64'),
	Match('\\', 'yy51'),
	Jump('yy47'),
	]),
	State('yy53', [
	Consume(),
	Match('g', 'yy65'),
	Match('m', 'yy66'),
	Jump('yy45'),
	]),
	State('yy54', [
	Consume(),
	Match('abcdefghijklmnopqrstuvwxyz', 'yy55'),
	Jump('yy19'),
	]),
	State('yy55', [
	Advance(),
	Produce('OP'),
	]),
	State('yy57', [
	Advance(),
	Produce('_TO'),
	]),
	State('yy59', [
	Consume(),
	Match('\n', 'yy45'),
	Match('/', 'yy67'),
	Match('\\', 'yy70'),
	Jump('yy59'),
	]),
	State('yy61', [
	Consume(),
	Match('\n', 'yy45'),
	Jump('yy72'),
	]),
	State('yy62', [
	Advance(),
	Jump('yy19'),
	]),
	State('yy63', [
	Advance(),
	Jump('yy50'),
	]),
	State('yy64', [
	Consume(),
	Marker(2),
	Match('\n', 'yy50'),
	Match('"', 'yy49'),
	Match('\\', 'yy51'),
	Match('i', 'yy74'),
	Jump('yy47'),
	]),
	State('yy65', [
	Consume(),
	Match('n', 'yy75'),
	Jump('yy45'),
	]),
	State('yy66', [
	Consume(),
	Match('p', 'yy76'),
	Jump('yy45'),
	]),
	State('yy67', [
	Consume(),
	Match('ilmsux', 'yy67'),
	Jump('yy69'),
	]),
	State('yy69', [
	Produce('REGEXP'),
	]),
	State('yy70', [
	Consume(),
	Match('\n', 'yy45'),
	Match('/', 'yy77'),
	Match('\\', 'yy70'),
	Jump('yy59'),
	]),
	State('yy72', [
	Produce('COMMENT'),
	]),
	State('yy74', [
	Consume(),
	Marker(2),
	Match('\n', 'yy50'),
	Match('"', 'yy49'),
	Match('\\', 'yy51'),
	Jump('yy47'),
	]),
	State('yy75', [
	Consume(),
	Match('o', 'yy79'),
	Jump('yy45'),
	]),
	State('yy76', [
	Consume(),
	Match('o', 'yy80'),
	Jump('yy45'),
	]),
	State('yy77', [
	Consume(),
	Marker(3),
	Match('\n', 'yy69'),
	Match('/', 'yy67'),
	Match('\\', 'yy70'),
	Match('ilmsux', 'yy77'),
	Jump('yy59'),
	]),
	State('yy79', [
	Consume(),
	Match('r', 'yy81'),
	Jump('yy45'),
	]),
	State('yy80', [
	Consume(),
	Match('r', 'yy82'),
	Jump('yy45'),
	]),
	State('yy81', [
	Consume(),
	Match('e', 'yy83'),
	Jump('yy45'),
	]),
	State('yy82', [
	Consume(),
	Match('t', 'yy85'),
	Jump('yy45'),
	]),
	State('yy83', [
	Advance(),
	Produce('_IGNORE'),
	]),
	State('yy85', [
	Advance(),
	Produce('_IMPORT')
	]),
	])

	return fsm.compile()


	lex = factory_grammar()

	def genlex(stream):
	ofs = 0
	length = len(stream)
	while ofs < length:
	try:
	pos, token = lex(stream, ofs)
	yield ofs, token, stream[ofs:pos]
	except IndexError:
	break
	ofs = pos


	with open('lark/grammars/common.g') as fd:
	lines = []
	for line in fd:
	if not line.startswith('//'):
	lines.append(line)

	stream = u''.join(lines) * 100

	def lexit():
	for ofs, token, value in genlex(stream):
	pass


	from timeit import timeit
	print(timeit(lexit, number=100))
	/* Generated by re2c 1.0.3 on Tue Apr 10 09:00:46 2018 */
	#line 1 "/Users/drslump/tmp/test.re2c"

	static bool lex(const char *s, unsigned long &u)
	{
	const char *YYMARKER;
	const char *YYCTXMARKER;
	int c = yycinit;
	u = 0;

	#line 17 "grammar.cc"
	{
	char yych;
	unsigned int yyaccept = 0;
	yych = *s;
	switch (yych) {
	case '\t':
	case ' ': goto yy4;
	case '\n': goto yy7;
	case '\r': goto yy10;
	case '!': goto yy11;
	case '"': goto yy12;
	case '%': goto yy13;
	case '(': goto yy14;
	case ')': goto yy16;
	case '*':
	case '+': goto yy18;
	case '-': goto yy20;
	case '.': goto yy21;
	case '/': goto yy23;
	case ':': goto yy24;
	case '?': goto yy26;
	case 'A':
	case 'B':
	case 'C':
	case 'D':
	case 'E':
	case 'F':
	case 'G':
	case 'H':
	case 'I':
	case 'J':
	case 'K':
	case 'L':
	case 'M':
	case 'N':
	case 'O':
	case 'P':
	case 'Q':
	case 'R':
	case 'S':
	case 'T':
	case 'U':
	case 'V':
	case 'W':
	case 'X':
	case 'Y':
	case 'Z': goto yy27;
	case '[': goto yy30;
	case ']': goto yy32;
	case '_': goto yy34;
	case 'a':
	case 'b':
	case 'c':
	case 'd':
	case 'e':
	case 'f':
	case 'g':
	case 'h':
	case 'i':
	case 'j':
	case 'k':
	case 'l':
	case 'm':
	case 'n':
	case 'o':
	case 'p':
	case 'q':
	case 'r':
	case 's':
	case 't':
	case 'u':
	case 'v':
	case 'w':
	case 'x':
	case 'y':
	case 'z': goto yy35;
	case '\|': goto yy38;
	case '~': goto yy40;
	default: goto yy2;
	}
	yy2:
	++s;
	yy3:
	#line 40 "/Users/drslump/tmp/test.re2c"
	{ return false; }
	#line 103 "grammar.cc"
	yy4:
	yych = *++s;
	switch (yych) {
	case '\t':
	case ' ': goto yy4;
	default: goto yy6;
	}
	yy6:
	#line 25 "/Users/drslump/tmp/test.re2c"
	{ "WS" }
	#line 114 "grammar.cc"
	yy7:
	yyaccept = 0;
	yych = *(YYMARKER = ++s);
	switch (yych) {
	case '\t':
	case ' ': goto yy42;
	case '\n': goto yy7;
	case '\r': goto yy44;
	default: goto yy9;
	}
	yy9:
	#line 19 "/Users/drslump/tmp/test.re2c"
	{ "NL" }
	#line 128 "grammar.cc"
	yy10:
	yych = *++s;
	switch (yych) {
	case '\n': goto yy7;
	default: goto yy3;
	}
	yy11:
	yyaccept = 1;
	yych = *(YYMARKER = ++s);
	switch (yych) {
	case '?':
	case '_': goto yy46;
	case 'a':
	case 'b':
	case 'c':
	case 'd':
	case 'e':
	case 'f':
	case 'g':
	case 'h':
	case 'i':
	case 'j':
	case 'k':
	case 'l':
	case 'm':
	case 'n':
	case 'o':
	case 'p':
	case 'q':
	case 'r':
	case 's':
	case 't':
	case 'u':
	case 'v':
	case 'w':
	case 'x':
	case 'y':
	case 'z': goto yy35;
	default: goto yy3;
	}
	yy12:
	yyaccept = 1;
	yych = *(YYMARKER = ++s);
	switch (yych) {
	case '\n': goto yy3;
	default: goto yy48;
	}
	yy13:
	yyaccept = 1;
	yych = *(YYMARKER = ++s);
	switch (yych) {
	case 'i': goto yy53;
	default: goto yy3;
	}
	yy14:
	++s;
	#line 33 "/Users/drslump/tmp/test.re2c"
	{ "LPAR" }
	#line 187 "grammar.cc"
	yy16:
	++s;
	#line 36 "/Users/drslump/tmp/test.re2c"
	{ "RPAR" }
	#line 192 "grammar.cc"
	yy18:
	yych = *++s;
	switch (yych) {
	case '?': goto yy54;
	case 'a':
	case 'b':
	case 'c':
	case 'd':
	case 'e':
	case 'f':
	case 'g':
	case 'h':
	case 'i':
	case 'j':
	case 'k':
	case 'l':
	case 'm':
	case 'n':
	case 'o':
	case 'p':
	case 'q':
	case 'r':
	case 's':
	case 't':
	case 'u':
	case 'v':
	case 'w':
	case 'x':
	case 'y':
	case 'z': goto yy55;
	default: goto yy19;
	}
	yy19:
	#line 29 "/Users/drslump/tmp/test.re2c"
	{ "OP" }
	#line 228 "grammar.cc"
	yy20:
	yych = *++s;
	switch (yych) {
	case '>': goto yy57;
	default: goto yy3;
	}
	yy21:
	++s;
	#line 31 "/Users/drslump/tmp/test.re2c"
	{ "DOT" }
	#line 239 "grammar.cc"
	yy23:
	yyaccept = 1;
	yych = *(YYMARKER = ++s);
	switch (yych) {
	case '/': goto yy61;
	default: goto yy59;
	}
	yy24:
	++s;
	#line 38 "/Users/drslump/tmp/test.re2c"
	{ return "COLON"; }
	#line 251 "grammar.cc"
	yy26:
	yych = *++s;
	switch (yych) {
	case 'a':
	case 'b':
	case 'c':
	case 'd':
	case 'e':
	case 'f':
	case 'g':
	case 'h':
	case 'i':
	case 'j':
	case 'k':
	case 'l':
	case 'm':
	case 'n':
	case 'o':
	case 'p':
	case 'q':
	case 'r':
	case 's':
	case 't':
	case 'u':
	case 'v':
	case 'w':
	case 'x':
	case 'y':
	case 'z': goto yy35;
	default: goto yy62;
	}
	yy27:
	yych = *++s;
	switch (yych) {
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
	case 'A':
	case 'B':
	case 'C':
	case 'D':
	case 'E':
	case 'F':
	case 'G':
	case 'H':
	case 'I':
	case 'J':
	case 'K':
	case 'L':
	case 'M':
	case 'N':
	case 'O':
	case 'P':
	case 'Q':
	case 'R':
	case 'S':
	case 'T':
	case 'U':
	case 'V':
	case 'W':
	case 'X':
	case 'Y':
	case 'Z':
	case '_': goto yy27;
	default: goto yy29;
	}
	yy29:
	#line 23 "/Users/drslump/tmp/test.re2c"
	{ "TOKEN" }
	#line 328 "grammar.cc"
	yy30:
	++s;
	#line 32 "/Users/drslump/tmp/test.re2c"
	{ "LBRA" }
	#line 333 "grammar.cc"
	yy32:
	++s;
	#line 35 "/Users/drslump/tmp/test.re2c"
	{ "RBRA" }
	#line 338 "grammar.cc"
	yy34:
	yych = *++s;
	switch (yych) {
	case 'A':
	case 'B':
	case 'C':
	case 'D':
	case 'E':
	case 'F':
	case 'G':
	case 'H':
	case 'I':
	case 'J':
	case 'K':
	case 'L':
	case 'M':
	case 'N':
	case 'O':
	case 'P':
	case 'Q':
	case 'R':
	case 'S':
	case 'T':
	case 'U':
	case 'V':
	case 'W':
	case 'X':
	case 'Y':
	case 'Z': goto yy27;
	case 'a':
	case 'b':
	case 'c':
	case 'd':
	case 'e':
	case 'f':
	case 'g':
	case 'h':
	case 'i':
	case 'j':
	case 'k':
	case 'l':
	case 'm':
	case 'n':
	case 'o':
	case 'p':
	case 'q':
	case 'r':
	case 's':
	case 't':
	case 'u':
	case 'v':
	case 'w':
	case 'x':
	case 'y':
	case 'z': goto yy35;
	default: goto yy3;
	}
	yy35:
	yych = *++s;
	switch (yych) {
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
	case '_':
	case 'a':
	case 'b':
	case 'c':
	case 'd':
	case 'e':
	case 'f':
	case 'g':
	case 'h':
	case 'i':
	case 'j':
	case 'k':
	case 'l':
	case 'm':
	case 'n':
	case 'o':
	case 'p':
	case 'q':
	case 'r':
	case 's':
	case 't':
	case 'u':
	case 'v':
	case 'w':
	case 'x':
	case 'y':
	case 'z': goto yy35;
	default: goto yy37;
	}
	yy37:
	#line 22 "/Users/drslump/tmp/test.re2c"
	{ "RULE" }
	#line 441 "grammar.cc"
	yy38:
	++s;
	#line 34 "/Users/drslump/tmp/test.re2c"
	{ "OR" }
	#line 446 "grammar.cc"
	yy40:
	++s;
	#line 37 "/Users/drslump/tmp/test.re2c"
	{ "TILDE" }
	#line 451 "grammar.cc"
	yy42:
	yych = *++s;
	switch (yych) {
	case '\t':
	case ' ': goto yy42;
	default: goto yy9;
	}
	yy44:
	yych = *++s;
	switch (yych) {
	case '\n': goto yy7;
	default: goto yy45;
	}
	yy45:
	s = YYMARKER;
	switch (yyaccept) {
	case 0: goto yy9;
	case 1: goto yy3;
	case 2: goto yy50;
	default: goto yy69;
	}
	yy46:
	yych = *++s;
	switch (yych) {
	case 'a':
	case 'b':
	case 'c':
	case 'd':
	case 'e':
	case 'f':
	case 'g':
	case 'h':
	case 'i':
	case 'j':
	case 'k':
	case 'l':
	case 'm':
	case 'n':
	case 'o':
	case 'p':
	case 'q':
	case 'r':
	case 's':
	case 't':
	case 'u':
	case 'v':
	case 'w':
	case 'x':
	case 'y':
	case 'z': goto yy35;
	default: goto yy45;
	}
	yy47:
	yych = *++s;
	yy48:
	switch (yych) {
	case '\n': goto yy45;
	case '"': goto yy49;
	case '\\': goto yy51;
	default: goto yy47;
	}
	yy49:
	yych = *++s;
	switch (yych) {
	case 'i': goto yy63;
	default: goto yy50;
	}
	yy50:
	#line 21 "/Users/drslump/tmp/test.re2c"
	{ "STRING" }
	#line 522 "grammar.cc"
	yy51:
	yych = *++s;
	switch (yych) {
	case '\n': goto yy45;
	case '"': goto yy64;
	case '\\': goto yy51;
	default: goto yy47;
	}
	yy53:
	yych = *++s;
	switch (yych) {
	case 'g': goto yy65;
	case 'm': goto yy66;
	default: goto yy45;
	}
	yy54:
	yych = *++s;
	switch (yych) {
	case 'a':
	case 'b':
	case 'c':
	case 'd':
	case 'e':
	case 'f':
	case 'g':
	case 'h':
	case 'i':
	case 'j':
	case 'k':
	case 'l':
	case 'm':
	case 'n':
	case 'o':
	case 'p':
	case 'q':
	case 'r':
	case 's':
	case 't':
	case 'u':
	case 'v':
	case 'w':
	case 'x':
	case 'y':
	case 'z': goto yy55;
	default: goto yy19;
	}
	yy55:
	++s;
	s -= 1;
	#line 39 "/Users/drslump/tmp/test.re2c"
	{ return "OP"; }
	#line 574 "grammar.cc"
	yy57:
	++s;
	#line 30 "/Users/drslump/tmp/test.re2c"
	{ "TO" }
	#line 579 "grammar.cc"
	yy59:
	yych = *++s;
	switch (yych) {
	case '\n': goto yy45;
	case '/': goto yy67;
	case '\\': goto yy70;
	default: goto yy59;
	}
	yy61:
	yych = *++s;
	switch (yych) {
	case '\n': goto yy45;
	default: goto yy72;
	}
	yy62:
	++s;
	goto yy19;
	yy63:
	++s;
	goto yy50;
	yy64:
	yyaccept = 2;
	yych = *(YYMARKER = ++s);
	switch (yych) {
	case '\n': goto yy50;
	case '"': goto yy49;
	case '\\': goto yy51;
	case 'i': goto yy75;
	default: goto yy47;
	}
	yy65:
	yych = *++s;
	switch (yych) {
	case 'n': goto yy76;
	default: goto yy45;
	}
	yy66:
	yych = *++s;
	switch (yych) {
	case 'p': goto yy77;
	default: goto yy45;
	}
	yy67:
	yych = *++s;
	switch (yych) {
	case 'i':
	case 'l':
	case 'm':
	case 's':
	case 'u':
	case 'x': goto yy67;
	default: goto yy69;
	}
	yy69:
	#line 20 "/Users/drslump/tmp/test.re2c"
	{ "REGEXP" }
	#line 636 "grammar.cc"
	yy70:
	yych = *++s;
	switch (yych) {
	case '\n': goto yy45;
	case '/': goto yy78;
	case '\\': goto yy70;
	default: goto yy59;
	}
	yy72:
	yych = *++s;
	switch (yych) {
	case '\n': goto yy74;
	default: goto yy72;
	}
	yy74:
	#line 24 "/Users/drslump/tmp/test.re2c"
	{ "COMMENT" }
	#line 654 "grammar.cc"
	yy75:
	yyaccept = 2;
	yych = *(YYMARKER = ++s);
	switch (yych) {
	case '\n': goto yy50;
	case '"': goto yy49;
	case '\\': goto yy51;
	default: goto yy47;
	}
	yy76:
	yych = *++s;
	switch (yych) {
	case 'o': goto yy80;
	default: goto yy45;
	}
	yy77:
	yych = *++s;
	switch (yych) {
	case 'o': goto yy81;
	default: goto yy45;
	}
	yy78:
	yyaccept = 3;
	yych = *(YYMARKER = ++s);
	switch (yych) {
	case '\n': goto yy69;
	case '/': goto yy67;
	case '\\': goto yy70;
	case 'i':
	case 'l':
	case 'm':
	case 's':
	case 'u':
	case 'x': goto yy78;
	default: goto yy59;
	}
	yy80:
	yych = *++s;
	switch (yych) {
	case 'r': goto yy82;
	default: goto yy45;
	}
	yy81:
	yych = *++s;
	switch (yych) {
	case 'r': goto yy83;
	default: goto yy45;
	}
	yy82:
	yych = *++s;
	switch (yych) {
	case 'e': goto yy84;
	default: goto yy45;
	}
	yy83:
	yych = *++s;
	switch (yych) {
	case 't': goto yy86;
	default: goto yy45;
	}
	yy84:
	++s;
	#line 27 "/Users/drslump/tmp/test.re2c"
	{ "IGNORE" }
	#line 719 "grammar.cc"
	yy86:
	++s;
	#line 28 "/Users/drslump/tmp/test.re2c"
	{ "IMPORT" }
	#line 724 "grammar.cc"
	}
	#line 42 "/Users/drslump/tmp/test.re2c"

	}
	import re

	rex = re.compile(r'''
	(?P<_NL>(\r?\n)+\s*)
	\|(?P<STRING>"(\\"\|\\\\\|[^"\n])*?"i?)
	\|(?P<RULE>!?[_?]?[a-z][_a-z0-9]*)
	\|(?P<TOKEN>_?[A-Z][_A-Z0-9]*)
	\|(?P<COMMENT>//[^\n]*)
	\|(?P<WS>[ \t]+)
	\|(?P<NUMBER>\d+)
	\|(?P<_IGNORE>%ignore)
	\|(?P<_IMPORT>%import)
	\|(?P<OP>[+*][?]?\|[?](?![a-z]))
	\|(?P<_TO>->)
	\|(?P<_DOT>\.)
	\|(?P<_LBRA>\[)
	\|(?P<_LPAR>\()
	\|(?P<_OR>\\|)
	\|(?P<_RBRA>\])
	\|(?P<_RPAR>\))
	\|(?P<TILDE>~)
	\|(?P<_COLON>:)
	''', re.X \| re.U)
	types = {1: u'_NL', 3: u'STRING', 5: u'RULE', 6: u'TOKEN', 7: u'COMMENT', 8: u'WS', 9: u'NUMBER',
	10: u'_IGNORE', 11: u'_IMPORT', 12: u'OP', 13: u'_TO', 14: u'_DOT', 15: u'_LBRA', 16: u'_LPAR',
	17: u'_OR', 18: u'_RBRA', 19: u'_RPAR', 20: u'TILDE', 21: u'_COLON'}

	def relex(stream):
	ofs = 0
	while ofs < len(stream):
	m = rex.match(stream, ofs)
	if m:
	token = types[m.lastindex]
	end = m.end()
	value = m.group(0)
	yield ofs, token, value
	ofs = end
	continue

	ofs += 1


	with open('lark/grammars/common.g') as fd:
	lines = []
	for line in fd:
	if not line.startswith('//'):
	lines.append(line)

	data = u''.join(lines) * 100

	def lexit():
	for pos, token, value in relex(data):
	pass

	from timeit import timeit
	print(timeit(lexit, number=100))