Skip to content

Instantly share code, notes, and snippets.

@fpom
Created December 22, 2018 09:26
Show Gist options
  • Save fpom/2491c560b8125e4234d5fac5cd7a70a9 to your computer and use it in GitHub Desktop.
Save fpom/2491c560b8125e4234d5fac5cd7a70a9 to your computer and use it in GitHub Desktop.
Add indent/dedent as explict text to ease the parsing of Python-like languages
import io
import token
import tokenize
NL = None
for num, name in token.tok_name.items() :
if name == "NL" :
NL = num
break
class IndedentIO (io.StringIO) :
def __init__ (self, src, indent="$>", dedent="$<") :
super().__init__()
self.indent = indent
self.dedent = dedent
if isinstance(src, str) :
self.toks = tokenize.tokenize(io.BytesIO(src.encode("utf-8")).readline)
elif isinstance(src, bytes) :
self.toks = tokenize.tokenize(io.BytesIO(src).readline)
elif hasattr(src, "readline") :
self.toks = tokenize.tokenize(src.readline)
else :
self.toks = tokenize.tokenize(io.BytesIO(str(src).encode("utf-8")).readline)
self.ahead = 0
def _feed (self, size=None, eol=False) :
pos = self.tell()
while size is None or self.ahead < size:
kind, *rest, line = next(self.toks)
if kind == token.INDENT :
self.write(self.indent)
self.ahead += len(self.indent)
elif kind == token.DEDENT :
self.write(self.dedent)
self.ahead += len(self.dedent)
elif kind in (token.NEWLINE, NL) :
self.write(line)
self.ahead += len(line)
if eol :
break
elif kind == token.ENDMARKER :
break
self.seek(pos)
def read (self, size=None) :
self._feed(size)
txt = super().read(size)
self.ahead -= len(txt)
return txt
def readline (self) :
self._feed(eol=True)
txt = super().readline()
self.ahead -= len(txt)
return txt
@classmethod
def filter (cls, src, indent="$>", dedent="$<") :
return cls(src, indent, dedent).read()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment