Created
October 23, 2023 12:43
-
-
Save legionus/6af0366b642fde5be5c3c6ba9094111e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import ply.lex as lex | |
import ply.yacc as yacc | |
import logging | |
import pprint | |
logging.basicConfig( | |
level = logging.DEBUG, | |
#filename = "parselog.txt", | |
#filemode = "w", | |
format = "%(filename)10s:%(lineno)4d:%(message)s" | |
) | |
logger = logging.getLogger() | |
### | |
### Lexer | |
### | |
reserved = [ | |
'FETCH', 'FETCH_ATTR_MACRO', 'FETCH_ATTR_KEYWORD', 'FETCH_BODY_SECTION'] | |
FETCH_ATTR_MACRO = [ | |
"ALL", "FULL", "FAST"] | |
FETCH_ATTR_KEYWORD = [ | |
"UID", "ENVELOPE", "FLAGS", "INTERNALDATE", | |
"RFC822.HEADER", "RFC822.SIZE", "RFC822.TEXT"] | |
tokens = ['SP', 'EOL', 'NUMBER', 'WORD', 'COLON', 'COMMA', 'DQUOTE', 'STAR', | |
'LSBRACKET', 'RSBRACKET', 'LPAREN', 'RPAREN'] + reserved | |
t_SP = r'\ ' | |
t_EOL = r'\s*\r?\n' | |
t_STAR = '\*' | |
t_COLON = ':' | |
t_COMMA = ',' | |
t_LSBRACKET = r'\[' | |
t_RSBRACKET = r'\]' | |
t_LPAREN = r'\(' | |
t_RPAREN = r'\)' | |
t_DQUOTE = r'"' | |
def t_NUMBER(t): | |
r'\d+' | |
try: | |
t.value = int(t.value) | |
except ValueError: | |
print("Integer value to large %d", t.value) | |
t.value = 0 | |
return t | |
def t_WORD(t): | |
r'[A-Za-z][A-Za-z0-9_.-]*[A-Za-z0-9]' | |
if t.value in reserved: | |
t.type = t.value | |
elif t.value in FETCH_ATTR_MACRO: | |
t.type = "FETCH_ATTR_MACRO" | |
elif t.value in FETCH_ATTR_KEYWORD: | |
t.type = "FETCH_ATTR_KEYWORD" | |
elif t.value in ("BODY", "BODY.PEEK"): | |
t.type = "FETCH_BODY_SECTION" | |
else: | |
t.type = "WORD" | |
return t | |
def t_error(t): | |
print("Illegal character '%s'" % t.value[0]) | |
t.lexer.skip(1) | |
### | |
### Parser | |
### | |
class Node: | |
def __init__(self, name, value): | |
self.name = name | |
self.value = value | |
def __repr__(self): | |
return f"<{self.name}>{self.value}</{self.name}>" | |
def p_line(p): | |
''' | |
line : tag SP cmd_fetch EOL | |
''' | |
p[0] = Node("line", [p[1], p[3]]) | |
def p_tag(p): | |
'tag : WORD' | |
p[0] = Node("tag", p[1]) | |
def p_range(p): | |
''' | |
range : NUMBER COLON NUMBER | |
| NUMBER COLON STAR | |
| NUMBER | |
''' | |
if len(p) == 4: | |
p[0] = Node("range", [p[1], p[3]]) | |
else: | |
p[0] = Node("range", [p[1], p[1]]) | |
def p_sequence(p): | |
''' | |
sequence : sequence COMMA range | |
| range | |
''' | |
if len(p) == 4: | |
p[1].value.append(p[3]) | |
p[0] = p[1] | |
else: | |
p[0] = Node("sequence", [p[1]]) | |
def p_fetch_section(p): | |
''' | |
fetch-section : WORD | |
''' | |
p[0] = Node("fetch-section", [p[1]]) | |
def p_fetch_attr(p): | |
''' | |
fetch-attr : FETCH_ATTR_MACRO | |
| FETCH_ATTR_KEYWORD | |
| FETCH_BODY_SECTION LSBRACKET fetch-section RSBRACKET | |
''' | |
if len(p) == 5: | |
p[0] = Node("fetch-body", p[3]) | |
else: | |
p[0] = Node("fetch-attr", p[1]) | |
def p_fetch_attrs(p): | |
''' | |
fetch-attrs : LPAREN fetch-attrs SP fetch-attr RPAREN | |
| fetch-attr | |
''' | |
if len(p) == 6: | |
p[2].value.append(p[4]) | |
p[0] = p[2] | |
else: | |
p[0] = Node("fetch-attrs", [p[1]]) | |
def p_cmd_fetch(p): | |
'cmd_fetch : FETCH SP sequence SP fetch-attrs' | |
p[0] = Node("fetch_cmd", [p[3], p[5]]) | |
def p_error(p): | |
print("Syntax error in input!") | |
if p: | |
print("Syntax error at token", p.type) | |
else: | |
print("Syntax error at EOF") | |
s = 'a0005 FETCH 1:10 (UID FLAGS INTERNALDATE RFC822.SIZE BODY.PEEK[HEADER.FIELDS (DATE FROM SENDER SUBJECT TO CC MESSAGE-ID REFERENCES CONTENT-TYPE CONTENT-DESCRIPTION IN-REPLY-TO REPLY-TO LINES LIST-POST X-LABEL)])\r\n' | |
s = 'a0005 FETCH 2,4:7,9,12:* (UID FLAGS INTERNALDATE)\r\n' | |
m = lex.lex(debug=True, debuglog=logger) | |
#m.input(s) | |
#while True: | |
# tok = m.token() | |
# if not tok: | |
# break | |
# print(tok) | |
p = yacc.yacc(start='line', debug=True, debuglog=logger) | |
r = p.parse(s) | |
pprint.pprint(r) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment