Skip to content

Instantly share code, notes, and snippets.

@legionus
Created October 23, 2023 12:43
Show Gist options
  • Save legionus/6af0366b642fde5be5c3c6ba9094111e to your computer and use it in GitHub Desktop.
Save legionus/6af0366b642fde5be5c3c6ba9094111e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import ply.lex as lex
import ply.yacc as yacc
import logging
import pprint
logging.basicConfig(
level = logging.DEBUG,
#filename = "parselog.txt",
#filemode = "w",
format = "%(filename)10s:%(lineno)4d:%(message)s"
)
logger = logging.getLogger()
###
### Lexer
###
reserved = [
'FETCH', 'FETCH_ATTR_MACRO', 'FETCH_ATTR_KEYWORD', 'FETCH_BODY_SECTION']
FETCH_ATTR_MACRO = [
"ALL", "FULL", "FAST"]
FETCH_ATTR_KEYWORD = [
"UID", "ENVELOPE", "FLAGS", "INTERNALDATE",
"RFC822.HEADER", "RFC822.SIZE", "RFC822.TEXT"]
tokens = ['SP', 'EOL', 'NUMBER', 'WORD', 'COLON', 'COMMA', 'DQUOTE', 'STAR',
'LSBRACKET', 'RSBRACKET', 'LPAREN', 'RPAREN'] + reserved
t_SP = r'\ '
t_EOL = r'\s*\r?\n'
t_STAR = '\*'
t_COLON = ':'
t_COMMA = ','
t_LSBRACKET = r'\['
t_RSBRACKET = r'\]'
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_DQUOTE = r'"'
def t_NUMBER(t):
r'\d+'
try:
t.value = int(t.value)
except ValueError:
print("Integer value to large %d", t.value)
t.value = 0
return t
def t_WORD(t):
r'[A-Za-z][A-Za-z0-9_.-]*[A-Za-z0-9]'
if t.value in reserved:
t.type = t.value
elif t.value in FETCH_ATTR_MACRO:
t.type = "FETCH_ATTR_MACRO"
elif t.value in FETCH_ATTR_KEYWORD:
t.type = "FETCH_ATTR_KEYWORD"
elif t.value in ("BODY", "BODY.PEEK"):
t.type = "FETCH_BODY_SECTION"
else:
t.type = "WORD"
return t
def t_error(t):
print("Illegal character '%s'" % t.value[0])
t.lexer.skip(1)
###
### Parser
###
class Node:
def __init__(self, name, value):
self.name = name
self.value = value
def __repr__(self):
return f"<{self.name}>{self.value}</{self.name}>"
def p_line(p):
'''
line : tag SP cmd_fetch EOL
'''
p[0] = Node("line", [p[1], p[3]])
def p_tag(p):
'tag : WORD'
p[0] = Node("tag", p[1])
def p_range(p):
'''
range : NUMBER COLON NUMBER
| NUMBER COLON STAR
| NUMBER
'''
if len(p) == 4:
p[0] = Node("range", [p[1], p[3]])
else:
p[0] = Node("range", [p[1], p[1]])
def p_sequence(p):
'''
sequence : sequence COMMA range
| range
'''
if len(p) == 4:
p[1].value.append(p[3])
p[0] = p[1]
else:
p[0] = Node("sequence", [p[1]])
def p_fetch_section(p):
'''
fetch-section : WORD
'''
p[0] = Node("fetch-section", [p[1]])
def p_fetch_attr(p):
'''
fetch-attr : FETCH_ATTR_MACRO
| FETCH_ATTR_KEYWORD
| FETCH_BODY_SECTION LSBRACKET fetch-section RSBRACKET
'''
if len(p) == 5:
p[0] = Node("fetch-body", p[3])
else:
p[0] = Node("fetch-attr", p[1])
def p_fetch_attrs(p):
'''
fetch-attrs : LPAREN fetch-attrs SP fetch-attr RPAREN
| fetch-attr
'''
if len(p) == 6:
p[2].value.append(p[4])
p[0] = p[2]
else:
p[0] = Node("fetch-attrs", [p[1]])
def p_cmd_fetch(p):
'cmd_fetch : FETCH SP sequence SP fetch-attrs'
p[0] = Node("fetch_cmd", [p[3], p[5]])
def p_error(p):
print("Syntax error in input!")
if p:
print("Syntax error at token", p.type)
else:
print("Syntax error at EOF")
s = 'a0005 FETCH 1:10 (UID FLAGS INTERNALDATE RFC822.SIZE BODY.PEEK[HEADER.FIELDS (DATE FROM SENDER SUBJECT TO CC MESSAGE-ID REFERENCES CONTENT-TYPE CONTENT-DESCRIPTION IN-REPLY-TO REPLY-TO LINES LIST-POST X-LABEL)])\r\n'
s = 'a0005 FETCH 2,4:7,9,12:* (UID FLAGS INTERNALDATE)\r\n'
m = lex.lex(debug=True, debuglog=logger)
#m.input(s)
#while True:
# tok = m.token()
# if not tok:
# break
# print(tok)
p = yacc.yacc(start='line', debug=True, debuglog=logger)
r = p.parse(s)
pprint.pprint(r)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment