Created
November 6, 2016 22:13
-
-
Save deoxxa/95eb3561a4e04e51f764e43b769c0163 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// @flow | |
/* eslint-disable no-use-before-define */ | |
const TOKEN_AND = 'TOKEN_AND'; | |
const TOKEN_ATOM = 'TOKEN_ATOM'; | |
const TOKEN_BANG = 'TOKEN_BANG'; | |
const TOKEN_COLON = 'TOKEN_COLON'; | |
const TOKEN_CURLY_CLOSE = 'TOKEN_CURLY_CLOSE'; | |
const TOKEN_CURLY_OPEN = 'TOKEN_CURLY_OPEN'; | |
const TOKEN_ERROR = 'TOKEN_ERROR'; | |
const TOKEN_EXISTS = 'TOKEN_EXISTS'; | |
const TOKEN_MINUS = 'TOKEN_MINUS'; | |
const TOKEN_MISSING = 'TOKEN_MISSING'; | |
const TOKEN_NUMBER = 'TOKEN_NUMBER'; | |
const TOKEN_OR = 'TOKEN_OR'; | |
const TOKEN_PAREN_CLOSE = 'TOKEN_PAREN_CLOSE'; | |
const TOKEN_PAREN_OPEN = 'TOKEN_PAREN_OPEN'; | |
const TOKEN_PLUS = 'TOKEN_PLUS'; | |
const TOKEN_SQUARE_CLOSE = 'TOKEN_SQUARE_CLOSE'; | |
const TOKEN_SQUARE_OPEN = 'TOKEN_SQUARE_OPEN'; | |
const TOKEN_STAR = 'TOKEN_STAR'; | |
const TOKEN_STRING = 'TOKEN_STRING'; | |
const TOKEN_TILDE = 'TOKEN_TILDE'; | |
const TOKEN_TO = 'TOKEN_TO'; | |
const TOKEN_WS = 'TOKEN_WS'; | |
const literals = { | |
'&&': TOKEN_AND, | |
'AND': TOKEN_AND, | |
'!': TOKEN_BANG, | |
':': TOKEN_COLON, | |
'}': TOKEN_CURLY_CLOSE, | |
'{': TOKEN_CURLY_OPEN, | |
'OR': TOKEN_OR, | |
'||': TOKEN_OR, | |
')': TOKEN_PAREN_CLOSE, | |
'(': TOKEN_PAREN_OPEN, | |
'+': TOKEN_PLUS, | |
']': TOKEN_SQUARE_CLOSE, | |
'[': TOKEN_SQUARE_OPEN, | |
'*': TOKEN_STAR, | |
'~': TOKEN_TILDE, | |
'TO': TOKEN_TO, | |
'_missing_': TOKEN_MISSING, | |
'_exists_': TOKEN_EXISTS, | |
}; | |
type Token = { type: string, text: string, start: number, end: number }; | |
type LexResult = { token: Token, consumed: number }; | |
// limited to 1000 iterations in case there's an infinite loop bug | |
function lexAll(str: string): Array<Token> { | |
const arr = []; | |
let offset = 0; | |
for (let i = 0; i < 1000; i++) { | |
const res = lex(str, offset); | |
if (!res) { | |
break; | |
} | |
const { token, consumed } = res; | |
arr.push(token); | |
offset += consumed; | |
} | |
return arr; | |
} | |
function lex(str: string, offset: number): LexResult|null { | |
if (str.length === offset) { | |
return null; | |
} | |
switch (true) { | |
case /^\s/.test(str.substr(offset)): | |
return lexWhitespace(str, offset); | |
case /^"/.test(str.substr(offset)): | |
return lexString(str, offset); | |
case /^[-0123456789]/.test(str.substr(offset)): | |
return lexNumberOrMinus(str, offset); | |
case /^[A-Za-z_]/.test(str.substr(offset)): | |
return lexAtom(str, offset); | |
case !!literals[str[offset]]: | |
return { | |
token: { | |
type: literals[str[offset]], | |
text: str[offset], | |
start: offset, | |
end: offset + 1, | |
}, | |
consumed: 1, | |
}; | |
default: | |
return { | |
token: { | |
type: TOKEN_ERROR, | |
text: '', | |
start: offset, | |
end: offset, | |
}, | |
consumed: str.length - offset, | |
}; | |
} | |
} | |
function lexWhitespace(str: string, offset: number): LexResult { | |
const m = /^(\s+)/.exec(str.substr(offset)); | |
return { | |
token: { | |
type: TOKEN_WS, | |
text: m[1], | |
start: offset, | |
end: offset + m[1].length, | |
}, | |
consumed: m[1].length, | |
}; | |
} | |
function lexString(str: string, offset: number): LexResult|null { | |
let i = 0; | |
if (str[offset + i] !== '"') { | |
return null; | |
} | |
i++; | |
for (; offset + i < str.length; i++) { | |
if (str[offset + i] === '\\') { | |
i++; | |
continue; | |
} | |
if (str[offset + i] === '"') { | |
break; | |
} | |
} | |
if (str[offset + i] !== '"') { | |
return { | |
token: { | |
type: TOKEN_ERROR, | |
text: '', | |
start: offset + i, | |
end: offset + i, | |
}, | |
consumed: str.length - offset, | |
}; | |
} | |
i++; | |
const raw = str.substr(offset, i); | |
let text = null; | |
try { text = JSON.parse(raw); } catch (e) { /* nothing */ } | |
return { | |
token: { | |
type: TOKEN_STRING, | |
text: String(text), | |
raw: raw, | |
start: offset, | |
end: offset + i, | |
}, | |
consumed: i, | |
}; | |
} | |
function lexNumberOrMinus(str: string, offset: number): LexResult { | |
const m = /^(-?([0-9]+(\.[0-9]+)?)?)/.exec(str.substr(offset)); | |
if (m[1] === '-') { | |
return { | |
token: { | |
type: TOKEN_MINUS, | |
text: '-', | |
start: offset, | |
end: offset, | |
}, | |
consumed: 1, | |
}; | |
} | |
return { | |
token: { | |
type: TOKEN_NUMBER, | |
text: m[1], | |
start: offset, | |
end: offset + m[1].length, | |
}, | |
consumed: m[1].length, | |
}; | |
} | |
function lexAtom(str: string, offset: number): LexResult { | |
const m = /^[A-Za-z_][A-Za-z_0-9\.]*/.exec(str.substr(offset)); | |
const text = m[0]; | |
const type = literals[text] || TOKEN_ATOM; | |
return { | |
token: { | |
type: type, | |
text: text, | |
start: offset, | |
end: offset + text.length, | |
}, | |
consumed: text.length, | |
}; | |
} | |
export function tokenise(str: string) { | |
return lexAll(str); | |
} | |
function describeToken(t: ?Token): string { | |
if (!t) { | |
return 'EOF'; | |
} | |
return `${t.type} @ ${t.start}`; | |
} | |
function failed(token, ...types) { | |
return new Error(`expected one of ${types.join(', ')}; got ${describeToken(token)}`); | |
} | |
function expect(token, ...types) { | |
if (!types.includes(token.type)) { | |
throw failed(token, ...types); | |
} | |
} | |
type ParseResult = { | |
ast: Node, | |
consumed: number, | |
}; | |
type GroupNode = { | |
type: 'group', | |
nodes: Array<Node>, | |
}; | |
type ExistenceNode = { | |
type: 'existence', | |
modifier: string, | |
target: string, | |
}; | |
type TargetedNode = { | |
type: 'targeted', | |
target: string, | |
condition: Node, | |
}; | |
type BareNode = { | |
type: 'bare', | |
text: string, | |
}; | |
type StringNode = { | |
type: 'string', | |
text: string, | |
}; | |
type RangeNode = { | |
type: 'range', | |
left: Token, | |
right: Token, | |
}; | |
type BooleanNode = { | |
type: 'boolean', | |
operator: string, | |
left: Node, | |
right: Node, | |
}; | |
type ImplicitGroupNode = { | |
type: 'implicitGroup', | |
left: Node, | |
right: Node, | |
}; | |
type EmptyNode = { | |
type: 'empty', | |
}; | |
export type Node | |
= GroupNode | |
| ExistenceNode | |
| TargetedNode | |
| BareNode | |
| StringNode | |
| RangeNode | |
| BooleanNode | |
| ImplicitGroupNode | |
| EmptyNode; | |
function parseGroup(tokens) { | |
const nodes: Array<Node> = []; | |
expect(tokens[0], TOKEN_PAREN_OPEN); | |
let i = 1; | |
let last = null; | |
for (; i < tokens.length; i++) { | |
last = parseImplicitGroup(tokens.slice(i), last, TOKEN_PAREN_CLOSE); | |
const { ast, consumed } = last; | |
nodes.push(ast); | |
i += consumed; | |
if (tokens[i].type === TOKEN_PAREN_CLOSE) { | |
break; | |
} | |
} | |
expect(tokens[i], TOKEN_PAREN_CLOSE); | |
return { | |
ast: { | |
type: 'group', | |
nodes, | |
}, | |
consumed: i + 2, | |
}; | |
} | |
function parseExistence(tokens) { | |
expect(tokens[0], TOKEN_EXISTS, TOKEN_MISSING); | |
expect(tokens[1], TOKEN_COLON); | |
expect(tokens[2], TOKEN_ATOM); | |
return { | |
ast: { | |
type: 'existence', | |
modifier: tokens[0].text, | |
target: tokens[2].text, | |
}, | |
consumed: 3, | |
}; | |
} | |
function parseTargeted(tokens): ParseResult { | |
expect(tokens[0], TOKEN_ATOM); | |
expect(tokens[1], TOKEN_COLON); | |
const condition = parseSingle(tokens.slice(2)); | |
return { | |
ast: { | |
type: 'targeted', | |
target: tokens[0].text, | |
condition: condition.ast, | |
}, | |
consumed: 2 + condition.consumed, | |
}; | |
} | |
function parseBare(tokens): ParseResult { | |
expect(tokens[0], TOKEN_ATOM); | |
return { | |
ast: { | |
type: 'bare', | |
text: tokens[0].text, | |
}, | |
consumed: 1, | |
}; | |
} | |
function parseString(tokens): ParseResult { | |
expect(tokens[0], TOKEN_STRING); | |
return { | |
ast: { | |
type: 'string', | |
text: tokens[0].text, | |
}, | |
consumed: 1, | |
}; | |
} | |
function parseRange(tokens): ParseResult { | |
expect(tokens[0], TOKEN_SQUARE_OPEN); | |
expect(tokens[1], TOKEN_ATOM, TOKEN_NUMBER, TOKEN_STRING, TOKEN_STAR); | |
expect(tokens[2], TOKEN_TO); | |
expect(tokens[3], TOKEN_ATOM, TOKEN_NUMBER, TOKEN_STRING, TOKEN_STAR); | |
expect(tokens[4], TOKEN_SQUARE_CLOSE); | |
return { | |
ast: { | |
type: 'range', | |
left: tokens[1], | |
right: tokens[3], | |
}, | |
consumed: 5, | |
}; | |
} | |
function parseSingle(tokens): ParseResult { | |
switch (tokens[0].type) { | |
case TOKEN_ATOM: | |
return parseBare(tokens); | |
case TOKEN_STRING: | |
return parseString(tokens); | |
case TOKEN_SQUARE_OPEN: | |
return parseRange(tokens); | |
default: | |
throw failed(tokens[0], TOKEN_ATOM, TOKEN_STRING, TOKEN_SQUARE_OPEN); | |
} | |
} | |
function parseBooleanPair(tokens: Array<Token>, left: ParseResult, ignore: ?string): ParseResult { | |
const right = parseImplicitGroup(tokens.slice(1), null, ignore); | |
return { | |
ast: { | |
type: 'boolean', | |
operator: tokens[0].text, | |
left: left.ast, | |
right: right.ast, | |
}, | |
consumed: left.consumed + 1 + right.consumed, | |
}; | |
} | |
function parseImplicitGroup(tokens: Array<Token>, inputLeft: ParseResult|null, ignore: ?string): ParseResult { | |
let left = inputLeft; | |
if (tokens.length === 0 || tokens[0].type === ignore) { | |
if (left === null) { | |
return { | |
ast: { type: 'empty' }, | |
consumed: 0, | |
}; | |
} | |
return left; | |
} | |
let right: ?ParseResult = null; | |
switch (tokens[0].type) { | |
case TOKEN_EXISTS: | |
case TOKEN_MISSING: | |
right = parseExistence(tokens); | |
break; | |
case TOKEN_ATOM: | |
if (tokens.length > 1 && tokens[1].type === 'TOKEN_COLON') { | |
right = parseTargeted(tokens); | |
break; | |
} else { | |
right = parseBare(tokens); | |
break; | |
} | |
case TOKEN_STRING: | |
right = parseString(tokens); | |
break; | |
case TOKEN_PAREN_OPEN: | |
right = parseGroup(tokens); | |
break; | |
case TOKEN_AND: | |
case TOKEN_OR: | |
if (!left) { | |
throw new Error('boolean operator must be preceeded by another expression'); | |
} | |
right = parseBooleanPair(tokens, left, ignore); | |
left = null; | |
break; | |
default: | |
throw failed(tokens[0], TOKEN_EXISTS, TOKEN_MISSING, TOKEN_ATOM, TOKEN_STRING, TOKEN_PAREN_OPEN); | |
} | |
if (!left) { | |
return parseImplicitGroup(tokens.slice(right.consumed), right, ignore); | |
} | |
return parseImplicitGroup(tokens.slice(right.consumed), { | |
ast: { | |
type: 'implicitGroup', | |
left: left.ast, | |
right: right.ast, | |
}, | |
consumed: left.consumed + right.consumed, | |
}, ignore); | |
} | |
export function parse(str: string): Node { | |
return parseImplicitGroup(tokenise(str).filter((e) => e.type !== TOKEN_WS), null, null).ast; | |
} | |
export function format(node: Node): string { | |
switch (node.type) { | |
case 'group': | |
return [ | |
'(', | |
...node.nodes.map(format).join('').split('\n').map((e) => ' ' + e), | |
')', | |
].join('\n'); | |
case 'existence': | |
return `${node.modifier}:${node.target}`; | |
case 'targeted': | |
return `${node.target}:${format(node.condition)}`; | |
case 'bare': | |
return node.text; | |
case 'string': | |
return JSON.stringify(node.text); | |
case 'range': | |
return `[${node.left.text} TO ${node.right.text}]`; | |
case 'boolean': | |
return `${format(node.left)} ${node.operator} ${format(node.right)}`; | |
case 'implicitGroup': | |
return `${format(node.left)}\n${format(node.right)}`; | |
case 'emptyNode': | |
return ''; | |
default: return ''; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment