-
-
Save glebec/572196e2ca30d9afe09c38b4e9d2b227 to your computer and use it in GitHub Desktop.
/** | |
* Minimal demo of parser combinators, possibly as a target for recent | |
* JS web dev graduates to implement as an exercise. | |
* | |
* Huge credit to Hutton, Meijer, and Swierstra for their papers | |
* on the subject. | |
*/ | |
class Parser { | |
// :: (String -> { result: a, remaining: String } | Null) -> Parser a | |
constructor (parserFn) { | |
this._parserFn = parserFn | |
} | |
// :: (String -> { result: a, remaining: String } | Null) -> Parser a | |
static from (parserFn) { | |
return new Parser(parserFn) | |
} | |
// :: Parser a ~> String -> { result: a, remaining: String } | Null | |
parse (string) { | |
return this._parserFn(string) | |
} | |
// :: String -> Parser String | |
static literal (string) { | |
return Parser.from(tokens => { | |
if (!tokens.startsWith(string)) return null | |
return { | |
result: string, | |
remaining: tokens.slice(string.length), | |
} | |
}) | |
} | |
// :: Parser a ~> Parser b -> Parser (a | b) | |
or (p2) { | |
return Parser.from(tokens => this.parse(tokens) || p2.parse(tokens)) | |
} | |
// :: ...Parser * -> Parser * | |
static any (...ps) { | |
return ps.reduce((anyP, p) => anyP.or(p)) | |
} | |
// :: (* -> Parser a) -> Parser a | |
static lazy (mkParser) { | |
return Parser.from(tokens => mkParser().parse(tokens)) | |
} | |
// :: a -> Parser a | |
static of (value) { // aka unit, pure, return, inject | |
return Parser.from(string => ({ | |
result: value, | |
remaining: string, | |
})) | |
} | |
// :: Parser a ~> (a -> Parser b) -> Parser b | |
chain (step) { // aka bind, then, flatMap | |
return Parser.from(tokens => { | |
const res1 = this.parse(tokens) | |
if (!res1) return null | |
const p2 = step(res1.result) | |
return p2.parse(res1.remaining) | |
}) | |
} | |
// :: Parser a ~> (a -> b) -> Parser b | |
map (f) { | |
return this.chain(x => Parser.of(f(x))) | |
} | |
// :: Parser a -> Parser [a] | |
static many0 (p1) { | |
return p1.chain(r => { | |
return Parser.many0(p1).chain(rs => { | |
return Parser.of([r, ...rs]) | |
}) | |
}).or(Parser.of([])) | |
} | |
// :: Parser a -> Parser [a] | |
static many1 (p1) { | |
return p1.chain(r => { | |
return Parser.many0(p1).chain(rs => { | |
return Parser.of([r, ...rs]) | |
}) | |
}) | |
} | |
// :: Parser a ~> Parser b -> Parser b | |
useRight (p2) { | |
return this.chain(() => p2) | |
} | |
// :: Parser a ~> Parser b -> Parser a | |
useLeft (p2) { | |
return this.chain(left => p2.chain(() => Parser.of(left))) | |
} | |
} | |
const P = Parser | |
/** | |
* Demonstration using math expressions | |
*/ | |
const DIGIT = // :: Parser Number | |
P.any(...'0123456789'.split('').map(P.literal)) | |
const SPACE = // :: Parser String | |
P.many0(P.literal(' ')) | |
const NUM = // :: Parser Number | |
P.many1(DIGIT) | |
.map(nums => +nums.join('')) | |
const FACTOR = // :: Parser Number | |
P.any( | |
P.literal('(') | |
.useRight(SPACE) | |
.useRight(P.lazy(() => EXPR)) | |
.useLeft(SPACE) | |
.useLeft(P.literal(')')), | |
P.literal('-') | |
.useRight(P.lazy(() => FACTOR)) | |
.map(n => -n), | |
NUM | |
) | |
const F2 = // :: Parser Number | |
P.any( | |
SPACE | |
.useRight(P.literal('*')) | |
.useRight(SPACE) | |
.useRight(FACTOR) | |
.chain(n1 => F2 | |
.map(n2 => n1 * n2)), | |
SPACE | |
.useRight(P.literal('/')) | |
.useRight(SPACE) | |
.useRight(FACTOR) | |
.chain(n1 => F2 | |
.map(n2 => 1/n1 * n2)), | |
P.of(1) | |
) | |
const TERM = // :: Parser Number | |
FACTOR | |
.chain(n1 => F2 | |
.map(n2 => n1 * n2)) | |
const T2 = // :: Parser Number | |
P.any( | |
SPACE | |
.useRight(P.literal('+')) | |
.useRight(SPACE) | |
.useRight(TERM) | |
.chain(n1 => T2 | |
.map(n2 => n1 + n2)), | |
SPACE | |
.useRight(P.literal('-')) | |
.useRight(SPACE) | |
.useRight(TERM) | |
.chain(n1 => T2 | |
.map(n2 => -n1 + n2)), | |
P.of(0) | |
) | |
const EXPR = // :: Parser Number | |
TERM | |
.chain(n1 => T2 | |
.map(n2 => n1 + n2)) | |
const { result } = EXPR.parse('-5 * -(4 + -2) / (0 + 5) - 3 * 2 is pretty cool') | |
console.log(result) // -4 |
Since a lot of the methods in this class are static, have you considered simply using an object with functions of the appropriate type? In other words:
// :: type Parser a = String -> [(a, String)]
const Parser = (() => {
// ...
// :: (a -> Parser b) -> Parser a -> Parser b
const chain = f => p => s => p(s).reduce((r, [a, s_]) => [...r, ...f(a)(s_)], [])
return { ..., chain }
})()
@masaeedu that's a good point and I will think on it, but the main reason I used a class was to have infix methods. Your chain
takes both parsers explicitly, which is precisely what I wanted to avoid as now you've transformed an infix position (p1.chain(makeP2)
) into prefix (chain(makeP2, p1)
). This isn't ideal for things like useRight
: p1.useRight(p2).useRight(p3).useLeft(p4).useLeft(p5)
becomes useLeft(useLeft(useRight(useRight(p1, p2), p3), p4), p5)
.
Slack followup from masaeedu:
You can actually have your cake and eat it too, with
p1 |> chain(makeP2)
orFn.pipe([p1, useRight(p2), useRight(p3), ...])
, etc. (lots of different flavors of this)
Another good point, but that layers on even more new concepts for exercise-takers to tackle while also understanding parsers and chain
. Might make a good refactoring challenge though.
Moved to GitHub repo
Notes to self on exercise design:
chain
regex
as a parser, to get around having to implementmany0
&many1
useRight
anduseLeft
many0
andmany1
and drop all uses ofregex
map
.