zaydek-old · August 29, 2019 06:14
diff --git a/lexer.js b/lexer.js
 const Token = {
  UNS: "uns", // unset (not whitespace)
  COM: "com", // comment
  KEY: "key", // keyword
  NUM: "num", // number
  STR: "str", // string
  PUN: "pun", // punctuation
  FUN: "fun", // function
 }

 class Lexer {
  constructor(value) {
    this.value = value
    this.x1    = 0
    this.x2    = 0
    this.width = 0
    this.lines = [[]]
  }
  next() {
    if (this.x2 == this.value.length) {
      this.width = 0
      return undefined
    }
    const ch = this.value[this.x2]
    this.width = 1
    this.x2 += this.width
    return ch
  }
  peek() {
    const ch = this.next()
    this.backup()
    return ch
  }
  backup() {
    this.x2 -= this.width
  }
  emit(token) {
    const nth = this.lines.length - 1
    this.lines[nth].push({token, value: this.focus()})
    this.ignore()
  }
  emit_line(token) {
    this.backup()
    this.emit(token)
    this.lines.push([])
    this.next()
    this.ignore()
  }
  focus() {
    return this.value.slice(this.x1, this.x2)
  }
  ignore() {
    this.x1 = this.x2
  }
  accept(str) {
    return str.includes(this.next()) || !!this.backup()
  }
  accept_run(str) {
    while (this.accept(str)) {
      // no op
    }
 } }

 const key_map = {}

 ;(function () {
  const keys = "break default func interface select case defer go map struct chan else goto package switch const fallthrough if range type continue for import return var bool byte complex64 complex128 error float32 float64 int int8 int16 int32 int64 rune string uint uint8 uint16 uint32 uint64 uintptr true false iota nil append cap close complex copy delete imag len make new panic print println real recover"
  for (var key of keys.split(" ")) {
    key_map[key] = true
  }
 }())

 function parse_go(value) {
  return parse(lex(value))
 }

 function lex(value) {
  const lexer = new Lexer(value)
  let ch = ""
  while ((ch = lexer.next())) {
    let token = 0
    switch (true) {
      // comment
      case ch == "/" && (lexer.peek() == "/" || lexer.peek() == "*"):
        ch = lexer.next()
        if (ch == "/") {
          while ((ch = lexer.next())) {
            if (ch == "\n") {
              lexer.backup()
              break
            }
          }
        } else if (ch == "*") {
          while ((ch = lexer.next())) {
            if (ch == "*" && lexer.peek() == "/") {
              lexer.next()
              break
            } else if (ch == "\n") {
              lexer.emit_line(Token.COM)
              // don't break
            }
          }
        }
        token = Token.COM
        break
      // whitespace
      case ch == " " || ch == "\t" || ch == "\n":
        if (lexer.x2 > 1 && ch == "\n") {
          lexer.lines.push([])
          lexer.ignore()
          break
        }
        lexer.accept_run(" \t")
        break
      // keyword or function
      case ch >= "a" && ch <= "z" || ch >= "A" && ch <= "Z" || ch == "_":
        lexer.accept_run("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789")
        if (key_map[lexer.focus()]) {
          token = Token.KEY
          break
        }
        const x2 = lexer.x2
        lexer.accept_run(" ")
        if (lexer.peek() == "(") {
          token = Token.FUN
        }
        lexer.x2 = x2
        token = token || Token.UNS
        break
      // string
      case ch == "'" || ch == "\"" || ch == "`":
        const quote = ch
        while ((ch = lexer.next())) {
          if (quote != "`" && ch == "\\" && lexer.peek() == quote) {
            lexer.next()
          } else if (quote == "`" && ch == "\n") {
            lexer.emit_line(Token.STR)
            // don't break
          } else if (ch == quote || ch == "\n") { // break opportunities
            if (ch == "\n") {
              lexer.backup()
            }
            break
          }
        }
        token = Token.STR
        break
      // number
      case ch >= "0" && ch <= "9":
        let base = "0123456789"
        if (lexer.accept("0") && lexer.accept("xX")) {
          base += "abcdefABCDEF"
        }
        lexer.accept_run(base)
        lexer.accept(".") && lexer.accept_run(base)
        lexer.accept("eE") && lexer.accept("-+") && lexer.accept_run("0123456789")
        lexer.accept("i")
        token = Token.NUM
        break
      // punctuation
      case "!%&()*+,-./:;<=>[]^{|}".includes(ch):
        lexer.accept_run("!%&()*+,-./:;<=>[]^{|}")
        token = Token.PUN
        break
      // not whitespace
      default:
        while ((ch = lexer.next())) {
          if (ch == " " || ch == "\t" || ch == "\n") {
            lexer.backup()
            break
          }
        }
        token = Token.UNS
        break
    }
    if (lexer.x1 < lexer.x2) {
      lexer.emit(token)
    }
  }
  return lexer.lines
 }
	const Token = {
	UNS: "uns", // unset (not whitespace)
	COM: "com", // comment
	KEY: "key", // keyword
	NUM: "num", // number
	STR: "str", // string
	PUN: "pun", // punctuation
	FUN: "fun", // function
	}

	class Lexer {
	constructor(value) {
	this.value = value
	this.x1 = 0
	this.x2 = 0
	this.width = 0
	this.lines = [[]]
	}
	next() {
	if (this.x2 == this.value.length) {
	this.width = 0
	return undefined
	}
	const ch = this.value[this.x2]
	this.width = 1
	this.x2 += this.width
	return ch
	}
	peek() {
	const ch = this.next()
	this.backup()
	return ch
	}
	backup() {
	this.x2 -= this.width
	}
	emit(token) {
	const nth = this.lines.length - 1
	this.lines[nth].push({token, value: this.focus()})
	this.ignore()
	}
	emit_line(token) {
	this.backup()
	this.emit(token)
	this.lines.push([])
	this.next()
	this.ignore()
	}
	focus() {
	return this.value.slice(this.x1, this.x2)
	}
	ignore() {
	this.x1 = this.x2
	}
	accept(str) {
	return str.includes(this.next()) \|\| !!this.backup()
	}
	accept_run(str) {
	while (this.accept(str)) {
	// no op
	}
	} }

	const key_map = {}

	;(function () {
	const keys = "break default func interface select case defer go map struct chan else goto package switch const fallthrough if range type continue for import return var bool byte complex64 complex128 error float32 float64 int int8 int16 int32 int64 rune string uint uint8 uint16 uint32 uint64 uintptr true false iota nil append cap close complex copy delete imag len make new panic print println real recover"
	for (var key of keys.split(" ")) {
	key_map[key] = true
	}
	}())

	function parse_go(value) {
	return parse(lex(value))
	}

	function lex(value) {
	const lexer = new Lexer(value)
	let ch = ""
	while ((ch = lexer.next())) {
	let token = 0
	switch (true) {
	// comment
	case ch == "/" && (lexer.peek() == "/" \|\| lexer.peek() == "*"):
	ch = lexer.next()
	if (ch == "/") {
	while ((ch = lexer.next())) {
	if (ch == "\n") {
	lexer.backup()
	break
	}
	}
	} else if (ch == "*") {
	while ((ch = lexer.next())) {
	if (ch == "*" && lexer.peek() == "/") {
	lexer.next()
	break
	} else if (ch == "\n") {
	lexer.emit_line(Token.COM)
	// don't break
	}
	}
	}
	token = Token.COM
	break
	// whitespace
	case ch == " " \|\| ch == "\t" \|\| ch == "\n":
	if (lexer.x2 > 1 && ch == "\n") {
	lexer.lines.push([])
	lexer.ignore()
	break
	}
	lexer.accept_run(" \t")
	break
	// keyword or function
	case ch >= "a" && ch <= "z" \|\| ch >= "A" && ch <= "Z" \|\| ch == "_":
	lexer.accept_run("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789")
	if (key_map[lexer.focus()]) {
	token = Token.KEY
	break
	}
	const x2 = lexer.x2
	lexer.accept_run(" ")
	if (lexer.peek() == "(") {
	token = Token.FUN
	}
	lexer.x2 = x2
	token = token \|\| Token.UNS
	break
	// string
	case ch == "'" \|\| ch == "\"" \|\| ch == "`":
	const quote = ch
	while ((ch = lexer.next())) {
	if (quote != "`" && ch == "\\" && lexer.peek() == quote) {
	lexer.next()
	} else if (quote == "`" && ch == "\n") {
	lexer.emit_line(Token.STR)
	// don't break
	} else if (ch == quote \|\| ch == "\n") { // break opportunities
	if (ch == "\n") {
	lexer.backup()
	}
	break
	}
	}
	token = Token.STR
	break
	// number
	case ch >= "0" && ch <= "9":
	let base = "0123456789"
	if (lexer.accept("0") && lexer.accept("xX")) {
	base += "abcdefABCDEF"
	}
	lexer.accept_run(base)
	lexer.accept(".") && lexer.accept_run(base)
	lexer.accept("eE") && lexer.accept("-+") && lexer.accept_run("0123456789")
	lexer.accept("i")
	token = Token.NUM
	break
	// punctuation
	case "!%&()*+,-./:;<=>[]^{\|}".includes(ch):
	lexer.accept_run("!%&()*+,-./:;<=>[]^{\|}")
	token = Token.PUN
	break
	// not whitespace
	default:
	while ((ch = lexer.next())) {
	if (ch == " " \|\| ch == "\t" \|\| ch == "\n") {
	lexer.backup()
	break
	}
	}
	token = Token.UNS
	break
	}
	if (lexer.x1 < lexer.x2) {
	lexer.emit(token)
	}
	}
	return lexer.lines
	}