Skip to content

Instantly share code, notes, and snippets.

@nixorn
Last active March 6, 2019 12:43
Show Gist options
  • Save nixorn/f88fb194bd92b4fee3f351e689e99e0a to your computer and use it in GitHub Desktop.
Save nixorn/f88fb194bd92b4fee3f351e689e99e0a to your computer and use it in GitHub Desktop.
Moo haskell ident sample(mostly stolen from https://gist.github.com/nathan/d8d1adea38a1ef3a6d6a06552da641aa)
<html>
<head>
<meta content="text/html; charset=utf-8" http-equiv="content-type">
</head>
<body>
<script type="text/javascript" src="./moo.js"> </script>
<script type="text/javascript">
const lexer = moo.compile({
plus: "+",
equal: "=",
where: "where",
minus: "-",
power: "**",
lbrace: "(",
rbrace: ")",
integer: "[0-9]*?",
ws: /[ \t]+/,
nl: { match: /(?:\r\n?|\n)+/, lineBreaks: true },
id: /\w+/,
})
// example
const tokens = indented(lexer, 'testIndent i = t1 i\n'
+ ' where t1 i = 2 + t2 i\n'
+ ' where t2 i = (t3 i) ** 2\n'
+ ' where t3 i = i - 3\n'
+ 'testIndent 0 = 5\n'
)
let result = [];
for (const tok of tokens) result.push(tok)
console.log(result)
// implementation
function* indented(lexer, source) {
let iter = peekable(lexer.reset(source))
let stack = []
// absorb initial blank lines and indentation
let indent = iter.nextIndent()
for (let tok; tok = iter.next(); ) {
if (tok.type === 'nl') {
const newIndent = iter.nextIndent()
if (newIndent == null) break // eof
if (newIndent === indent) {
yield {type: 'nl'}
} else if (newIndent > indent) {
stack.push(indent)
indent = newIndent
yield {type: 'indent'}
} else {
while (newIndent < indent) {
indent = stack.pop()
yield {type: 'dedent'}
}
if (newIndent !== indent) {
throw new Error('inconsistent indentation')
}
}
indent = newIndent
// ignore whitespace within lines
} else if (tok.type !== 'ws') {
yield tok
}
}
// dedent remaining blocks at eof
for (let i = stack.length; i--;) {
yield {type: 'dedent'}
}
}
function peekable(lexer) {
let here = lexer.next()
return {
next() {
const old = here
here = lexer.next()
return old
},
peek() {
return here
},
nextIndent() {
for (let tok; tok = this.peek(); ) {
if (tok.type === 'nl') {
this.next()
continue
}
if (tok.type === 'ws') {
const indent = tok.value.length
this.next()
const next = this.peek()
if (!next) return
if (next.type === 'nl') {
this.next()
continue
}
return indent
}
return 0
}
},
}
}
</script>
</body>
</html>
<!--
result:
[
{
"type": "id",
"value": "testIndent",
"text": "testIndent",
"offset": 0,
"lineBreaks": 0,
"line": 1,
"col": 1
},
{
"type": "id",
"value": "i",
"text": "i",
"offset": 11,
"lineBreaks": 0,
"line": 1,
"col": 12
},
{
"type": "equal",
"value": "=",
"text": "=",
"offset": 13,
"lineBreaks": 0,
"line": 1,
"col": 14
},
{
"type": "id",
"value": "t1",
"text": "t1",
"offset": 15,
"lineBreaks": 0,
"line": 1,
"col": 16
},
{
"type": "id",
"value": "i",
"text": "i",
"offset": 18,
"lineBreaks": 0,
"line": 1,
"col": 19
},
{
"type": "indent"
},
{
"type": "where",
"value": "where",
"text": "where",
"offset": 22,
"lineBreaks": 0,
"line": 2,
"col": 3
},
{
"type": "id",
"value": "t1",
"text": "t1",
"offset": 28,
"lineBreaks": 0,
"line": 2,
"col": 9
},
{
"type": "id",
"value": "i",
"text": "i",
"offset": 31,
"lineBreaks": 0,
"line": 2,
"col": 12
},
{
"type": "equal",
"value": "=",
"text": "=",
"offset": 33,
"lineBreaks": 0,
"line": 2,
"col": 14
},
{
"type": "id",
"value": "2",
"text": "2",
"offset": 35,
"lineBreaks": 0,
"line": 2,
"col": 16
},
{
"type": "plus",
"value": "+",
"text": "+",
"offset": 37,
"lineBreaks": 0,
"line": 2,
"col": 18
},
{
"type": "id",
"value": "t2",
"text": "t2",
"offset": 39,
"lineBreaks": 0,
"line": 2,
"col": 20
},
{
"type": "id",
"value": "i",
"text": "i",
"offset": 42,
"lineBreaks": 0,
"line": 2,
"col": 23
},
{
"type": "indent"
},
{
"type": "where",
"value": "where",
"text": "where",
"offset": 48,
"lineBreaks": 0,
"line": 3,
"col": 5
},
{
"type": "id",
"value": "t2",
"text": "t2",
"offset": 54,
"lineBreaks": 0,
"line": 3,
"col": 11
},
{
"type": "id",
"value": "i",
"text": "i",
"offset": 57,
"lineBreaks": 0,
"line": 3,
"col": 14
},
{
"type": "equal",
"value": "=",
"text": "=",
"offset": 59,
"lineBreaks": 0,
"line": 3,
"col": 16
},
{
"type": "lbrace",
"value": "(",
"text": "(",
"offset": 61,
"lineBreaks": 0,
"line": 3,
"col": 18
},
{
"type": "id",
"value": "t3",
"text": "t3",
"offset": 62,
"lineBreaks": 0,
"line": 3,
"col": 19
},
{
"type": "id",
"value": "i",
"text": "i",
"offset": 65,
"lineBreaks": 0,
"line": 3,
"col": 22
},
{
"type": "rbrace",
"value": ")",
"text": ")",
"offset": 66,
"lineBreaks": 0,
"line": 3,
"col": 23
},
{
"type": "power",
"value": "**",
"text": "**",
"offset": 68,
"lineBreaks": 0,
"line": 3,
"col": 25
},
{
"type": "id",
"value": "2",
"text": "2",
"offset": 71,
"lineBreaks": 0,
"line": 3,
"col": 28
},
{
"type": "indent"
},
{
"type": "where",
"value": "where",
"text": "where",
"offset": 79,
"lineBreaks": 0,
"line": 4,
"col": 7
},
{
"type": "id",
"value": "t3",
"text": "t3",
"offset": 85,
"lineBreaks": 0,
"line": 4,
"col": 13
},
{
"type": "id",
"value": "i",
"text": "i",
"offset": 88,
"lineBreaks": 0,
"line": 4,
"col": 16
},
{
"type": "equal",
"value": "=",
"text": "=",
"offset": 90,
"lineBreaks": 0,
"line": 4,
"col": 18
},
{
"type": "id",
"value": "i",
"text": "i",
"offset": 92,
"lineBreaks": 0,
"line": 4,
"col": 20
},
{
"type": "minus",
"value": "-",
"text": "-",
"offset": 94,
"lineBreaks": 0,
"line": 4,
"col": 22
},
{
"type": "id",
"value": "3",
"text": "3",
"offset": 96,
"lineBreaks": 0,
"line": 4,
"col": 24
},
{
"type": "dedent"
},
{
"type": "dedent"
},
{
"type": "dedent"
},
{
"type": "id",
"value": "testIndent",
"text": "testIndent",
"offset": 98,
"lineBreaks": 0,
"line": 5,
"col": 1
},
{
"type": "id",
"value": "0",
"text": "0",
"offset": 109,
"lineBreaks": 0,
"line": 5,
"col": 12
},
{
"type": "equal",
"value": "=",
"text": "=",
"offset": 111,
"lineBreaks": 0,
"line": 5,
"col": 14
},
{
"type": "id",
"value": "5",
"text": "5",
"offset": 113,
"lineBreaks": 0,
"line": 5,
"col": 16
}
]
-->
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment