Created
January 24, 2015 16:01
-
-
Save yorickpeterse/1a94675d3a91d07f0f94 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
git clone git@github.com:YorickPeterse/ruby-ll.git | |
cd ruby-ll | |
bundle install | |
rake clean compile | |
ruby test.rb # see Ruby script below |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require_relative 'lib/ll' | |
class Parser < LL::Driver | |
CONFIG = LL::DriverConfig.new | |
CONFIG.tokens = [ | |
:T_LCURLY, | |
:T_RCURLY, | |
:T_COMMA, | |
:T_STRING, | |
:T_COLON, | |
:T_INT | |
].freeze | |
TOKENS = {} | |
# Removes the need to use Array#index in the Ruby code below, | |
# also removes the need to do this in #initialize (which is slow) | |
CONFIG.tokens.each_with_index do |token, index| | |
TOKENS[token] = index | |
end | |
CONFIG.rules = [ | |
[3, 0, 1, 1, 0, 1, 1, 0], # 0: json | |
[3, 1, 0, 2, 0, 3], # 1: values | |
[3, 2, 0, 1, 1, 2], # 2: more_values (T_COMMA values) | |
[3, 3, 2, 0], # 3: more_values (_) | |
[3, 4, 0, 4, 1, 4, 1, 3], # 4: pair | |
[3, 5, 1, 3], # 5: value (T_STRING) | |
[3, 6, 1, 5] # 6: value (T_INT) | |
].freeze | |
CONFIG.table = [ | |
#T_LCURLY T_RCURLY T_COMMA T_STRING T_COLON T_INT | |
[0, -1, -1, -1, -1, -1], # 0: json | |
[-1, -1, -1, 1, -1, -1], # 1: values | |
[3, 3, 2, 3, 3, 3], # 2: more_values | |
[-1, -1, -1, 4, -1, -1], # 3: pair | |
[-1, -1, -1, 5, -1, 6] # 4: value | |
].freeze | |
CONFIG.actions = [ | |
[:_rule_0, 3], | |
[:_rule_1, 2], | |
[:_rule_2, 2], | |
[:_rule_3, 0], | |
[:_rule_4, 3], | |
[:_rule_5, 1], | |
[:_rule_6, 1] | |
].freeze | |
def missing_rule_error(token_id) | |
name = TOKENS[token_id] | |
raise "No rule found for terminal #{name}" | |
end | |
def invalid_token_error(got, expected) | |
got_name = CONFIG.tokens[got] | |
exp_name = CONFIG.tokens[expected] | |
raise "Invalid terminal #{got_name}, expected #{exp_name}" | |
end | |
def each_token | |
yield [:T_LCURLY, '{'] | |
yield [:T_STRING, 'name'] | |
yield [:T_COLON, ':'] | |
yield [:T_STRING, 'Yorick'] | |
yield [:T_COMMA, ','] | |
yield [:T_STRING, 'age'] | |
yield [:T_COLON, ':'] | |
yield [:T_INT, 22] | |
yield [:T_COMMA, ','] | |
yield [:T_STRING, 'location'] | |
yield [:T_COLON, ':'] | |
yield [:T_STRING, 'Netherlands'] | |
yield [:T_COMMA, ','] | |
yield [:T_STRING, 'anger_level'] | |
yield [:T_COLON, ':'] | |
yield [:T_INT, 9000] | |
yield [:T_RCURLY, '}'] | |
yield [-1, -1] | |
end | |
def parse_ruby | |
stack = [-1, -1, 0, 0] | |
value_stack = [] | |
position = 0 | |
config = self.class::CONFIG | |
tokens_hash = TOKENS | |
each_token do |(type, value)| | |
token_id = tokens_hash[type] | |
while true | |
stack_type, stack_value = stack.pop(2) | |
# Rule | |
if stack_type == 0 | |
production_index = config.table[stack_value][token_id] | |
if production_index == -1 | |
missing_rule_error(stack_value) | |
end | |
stack += config.rules[production_index] | |
# Terminal | |
elsif stack_type == 1 | |
if stack_value == token_id | |
value_stack << value | |
break | |
else | |
invalid_terminal_error(token_id, stack_value) | |
end | |
# Action | |
# Method calls are inlined here for extra webscale. | |
elsif stack_type == 3 | |
value_stack << case stack_value | |
# _rule_0 | |
when 0 | |
val = value_stack.pop(3) | |
val[1] | |
# _rule_1 | |
when 1 | |
val = value_stack.pop(2) | |
new_hash = val[0] | |
if val[1] | |
new_hash = new_hash.merge(val[1]) | |
end | |
new_hash | |
# _rule_2 | |
when 2 | |
val = value_stack.pop(2) | |
val[1] | |
# This one _could_ be optimized not to ever set `val`. That is, if `val` | |
# is never referenced we wouldn't have to set it. Benchmarks however | |
# showed this doesn't really help in this particular example. | |
# _rule_3 | |
when 3 | |
val = value_stack.pop(0) | |
{} | |
# _rule_4 | |
when 4 | |
val = value_stack.pop(3) | |
{val[0] => val[2]} | |
# _rule_5 | |
when 5 | |
val = value_stack.pop(1) | |
val[0] | |
# _rule_6 | |
when 6 | |
val = value_stack.pop(1) | |
val[0] | |
end | |
# EOF | |
elsif stack_type == -1 | |
break | |
end | |
end | |
end | |
return value_stack[0] | |
end | |
# json | |
def _rule_0(val) | |
return val[1] | |
end | |
# values | |
def _rule_1(val) | |
new_hash = val[0] | |
if val[1] | |
val[1].each do |key, value| | |
new_hash[key] = value | |
end | |
end | |
return new_hash | |
end | |
# more_values (T_COMMA values) | |
def _rule_2(val) | |
return val[1] | |
end | |
# more_values (_) | |
def _rule_3(val) | |
return {} | |
end | |
# pair | |
def _rule_4(val) | |
return {val[0] => val[2]} | |
end | |
# value (T_STRING) | |
def _rule_5(val) | |
return val[0] | |
end | |
# value (T_INT) | |
def _rule_6(val) | |
return val[0] | |
end | |
end # Parser | |
require 'benchmark/ips' | |
ll_parser = Parser.new | |
puts "Ruby: #{ll_parser.parse_ruby}" | |
puts "CAPI: #{ll_parser.parse}" | |
puts | |
Benchmark.ips do |bench| | |
bench.report 'Ruby reuse' do | |
ll_parser.parse_ruby | |
end | |
bench.report 'Ruby new' do | |
Parser.new.parse_ruby | |
end | |
bench.report 'CAPI reuse' do | |
ll_parser.parse | |
end | |
bench.report 'CAPI new' do | |
Parser.new.parse | |
end | |
bench.compare! | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment