Skip to content

Instantly share code, notes, and snippets.

@simpl1g
Last active October 27, 2021 16:25
Show Gist options
  • Save simpl1g/77ed741057a265ffd5b41b1739d7cb52 to your computer and use it in GitHub Desktop.
Save simpl1g/77ed741057a265ffd5b41b1739d7cb52 to your computer and use it in GitHub Desktop.
require 'bundler/inline'
gemfile do
source 'https://rubygems.org'
gem 'curb'
gem 'benchmark-ips'
gem 'oj'
gem 'red-parquet'
gem 'rover-df'
end
require 'benchmark'
require 'curb'
require 'oj'
require 'parquet'
require 'rover'
def get(query)
# Connect to demo db from Clickhouse Inc
# https://play.clickhouse.com/ for details
Curl.get(
'https://play-api.clickhouse.com:8443',
query: query,
user: 'playground',
password: 'clickhouse',
database: 'datasets'
).body_str
end
[1000, 10_000, 100_000].each do |limit|
sql = "SELECT WatchID as watch FROM hits_v1 LIMIT #{limit}"
parquet_body = get("#{sql} FORMAT Parquet")
json_body = get("#{sql} FORMAT JSON")
arrow = Arrow::Table.load(Arrow::Buffer.try_convert(parquet_body), format: :parquet)
rover = Rover::DataFrame.new(arrow.each_column.map { |x| [x.field.name, x.data.values] }.to_h)
json = Oj.load(json_body)
p "Amount of items in dataset: #{limit}"
p "Parsing"
Benchmark.ips do |x|
x.report('Arrow') { Arrow::Table.load(Arrow::Buffer.try_convert(parquet_body), format: :parquet) }
x.report('Rover') { Rover::DataFrame.new(arrow.each_column.map { |x| [x.field.name, x.data.values] }.to_h) }
x.report('Json') { Oj.load(json_body) }
x.compare!
end
p "Uniq counting"
Benchmark.ips do |x|
x.report('Arrow') { arrow['watch'].data.values.uniq.count }
x.report('Rover') { rover['watch'].uniq.count }
x.report('Json') { json['data'].map { |x| x['watch'] }.uniq.count }
x.compare!
end
p "Sum counting"
Benchmark.ips do |x|
x.report('Arrow') { arrow['watch'].data.values.sum }
x.report('Rover') { rover['watch'].sum }
x.report('Json') { json['data'].map { |x| x['watch'].to_i }.sum }
x.compare!
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment