Last active
August 29, 2019 06:14
-
-
Save zaydek-old/e20864abe25ecbad7311ee1f7fed82f3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const Token = { | |
UNS: "uns", // unset (not whitespace) | |
COM: "com", // comment | |
KEY: "key", // keyword | |
NUM: "num", // number | |
STR: "str", // string | |
PUN: "pun", // punctuation | |
FUN: "fun", // function | |
} | |
class Lexer { | |
constructor(value) { | |
this.value = value | |
this.x1 = 0 | |
this.x2 = 0 | |
this.width = 0 | |
this.lines = [[]] | |
} | |
next() { | |
if (this.x2 == this.value.length) { | |
this.width = 0 | |
return undefined | |
} | |
const ch = this.value[this.x2] | |
this.width = 1 | |
this.x2 += this.width | |
return ch | |
} | |
peek() { | |
const ch = this.next() | |
this.backup() | |
return ch | |
} | |
backup() { | |
this.x2 -= this.width | |
} | |
emit(token) { | |
const nth = this.lines.length - 1 | |
this.lines[nth].push({token, value: this.focus()}) | |
this.ignore() | |
} | |
emit_line(token) { | |
this.backup() | |
this.emit(token) | |
this.lines.push([]) | |
this.next() | |
this.ignore() | |
} | |
focus() { | |
return this.value.slice(this.x1, this.x2) | |
} | |
ignore() { | |
this.x1 = this.x2 | |
} | |
accept(str) { | |
return str.includes(this.next()) || !!this.backup() | |
} | |
accept_run(str) { | |
while (this.accept(str)) { | |
// no op | |
} | |
} } | |
const key_map = {} | |
;(function () { | |
const keys = "break default func interface select case defer go map struct chan else goto package switch const fallthrough if range type continue for import return var bool byte complex64 complex128 error float32 float64 int int8 int16 int32 int64 rune string uint uint8 uint16 uint32 uint64 uintptr true false iota nil append cap close complex copy delete imag len make new panic print println real recover" | |
for (var key of keys.split(" ")) { | |
key_map[key] = true | |
} | |
}()) | |
function parse_go(value) { | |
return parse(lex(value)) | |
} | |
function lex(value) { | |
const lexer = new Lexer(value) | |
let ch = "" | |
while ((ch = lexer.next())) { | |
let token = 0 | |
switch (true) { | |
// comment | |
case ch == "/" && (lexer.peek() == "/" || lexer.peek() == "*"): | |
ch = lexer.next() | |
if (ch == "/") { | |
while ((ch = lexer.next())) { | |
if (ch == "\n") { | |
lexer.backup() | |
break | |
} | |
} | |
} else if (ch == "*") { | |
while ((ch = lexer.next())) { | |
if (ch == "*" && lexer.peek() == "/") { | |
lexer.next() | |
break | |
} else if (ch == "\n") { | |
lexer.emit_line(Token.COM) | |
// don't break | |
} | |
} | |
} | |
token = Token.COM | |
break | |
// whitespace | |
case ch == " " || ch == "\t" || ch == "\n": | |
if (lexer.x2 > 1 && ch == "\n") { | |
lexer.lines.push([]) | |
lexer.ignore() | |
break | |
} | |
lexer.accept_run(" \t") | |
break | |
// keyword or function | |
case ch >= "a" && ch <= "z" || ch >= "A" && ch <= "Z" || ch == "_": | |
lexer.accept_run("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789") | |
if (key_map[lexer.focus()]) { | |
token = Token.KEY | |
break | |
} | |
const x2 = lexer.x2 | |
lexer.accept_run(" ") | |
if (lexer.peek() == "(") { | |
token = Token.FUN | |
} | |
lexer.x2 = x2 | |
token = token || Token.UNS | |
break | |
// string | |
case ch == "'" || ch == "\"" || ch == "`": | |
const quote = ch | |
while ((ch = lexer.next())) { | |
if (quote != "`" && ch == "\\" && lexer.peek() == quote) { | |
lexer.next() | |
} else if (quote == "`" && ch == "\n") { | |
lexer.emit_line(Token.STR) | |
// don't break | |
} else if (ch == quote || ch == "\n") { // break opportunities | |
if (ch == "\n") { | |
lexer.backup() | |
} | |
break | |
} | |
} | |
token = Token.STR | |
break | |
// number | |
case ch >= "0" && ch <= "9": | |
let base = "0123456789" | |
if (lexer.accept("0") && lexer.accept("xX")) { | |
base += "abcdefABCDEF" | |
} | |
lexer.accept_run(base) | |
lexer.accept(".") && lexer.accept_run(base) | |
lexer.accept("eE") && lexer.accept("-+") && lexer.accept_run("0123456789") | |
lexer.accept("i") | |
token = Token.NUM | |
break | |
// punctuation | |
case "!%&()*+,-./:;<=>[]^{|}".includes(ch): | |
lexer.accept_run("!%&()*+,-./:;<=>[]^{|}") | |
token = Token.PUN | |
break | |
// not whitespace | |
default: | |
while ((ch = lexer.next())) { | |
if (ch == " " || ch == "\t" || ch == "\n") { | |
lexer.backup() | |
break | |
} | |
} | |
token = Token.UNS | |
break | |
} | |
if (lexer.x1 < lexer.x2) { | |
lexer.emit(token) | |
} | |
} | |
return lexer.lines | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment