Last active
August 29, 2015 14:21
-
-
Save calonso/6dba841659000fdd7959 to your computer and use it in GitHub Desktop.
Ruby script to benchmark a candidate Cassandra model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'rubygems' | |
require 'bundler' | |
Bundler.setup | |
Bundler.require | |
require 'yaml' | |
require 'csv' | |
require 'logger' | |
require 'date' | |
require 'set' | |
require 'benchmark' | |
require 'time' | |
def config | |
YAML.load_file File.expand_path('../../cassandra.yml', __FILE__) | |
end | |
def generate_data | |
today = Date.today | |
(0..100).each_with_object({}) do |i, acc| | |
field_1 = (123456789 + i).to_s | |
acc[field_1] = (1..12).each_with_object({}) do |field_2, sub_acc| | |
sub_acc[field_2] = (0..100).each_with_object({}) do |day, days_acc| | |
days_acc[(today - day).to_time] = (0...100).map { rand(100) } | |
end | |
end | |
end | |
end | |
logger = Logger.new STDOUT | |
logger.level = Logger::DEBUG | |
cluster = Cassandra.cluster config #.merge(logger: logger) | |
session = cluster.connect | |
session.execute "CREATE KEYSPACE IF NOT EXISTS mydrive_models_benchmarks WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 2 };" | |
session.execute "USE mydrive_models_benchmarks" | |
session.execute " | |
CREATE TABLE IF NOT EXISTS bins_as_list ( | |
field_1 varchar, | |
field_2 int, | |
date timestamp, | |
bins list<int>, | |
PRIMARY KEY ((field_1, field_2), date) | |
) | |
" | |
prepared_insert = session.prepare "INSERT INTO bins_as_list (field_1, field_2, date, bins) VALUES (?, ?, ?, ?)" | |
prepared_select = session.prepare "SELECT * FROM bins_as_list WHERE field_1 = ? AND field_2 = ? AND date >= ? AND date < ?" | |
prepared_select_day = session.prepare "SELECT * FROM bins_as_list WHERE field_1 = ? AND field_2 = ? AND date = ?" | |
data = generate_data | |
Benchmark.benchmark(Benchmark::CAPTION, 19, Benchmark::FORMAT) do |report| | |
report.report("Writing:") do | |
data.each do |field_1, field_2_arr| | |
futures = field_2_arr.each_with_object([]) do |(field_2, dates), futures| | |
dates.each do |date, bins| | |
futures << session.execute_async(prepared_insert, arguments: [field_1, field_2, date, bins], consistency: :one) | |
end | |
end | |
Cassandra::Future.all(futures).get | |
end | |
end | |
p 'Writing completed. Flush and compact and press INTRO to continue' | |
gets | |
report.report("Reading:") do | |
data.each do |field_1, field_2_arr| | |
futures = field_2_arr.each_with_object([]) do |(field_2, dates), futures| | |
futures << session.execute_async(prepared_select, arguments: [ | |
field_1, field_2, (Date.today - 90).to_time, (Date.today + 1).to_time | |
], consistency: :quorum) | |
end | |
Cassandra::Future.all(futures).get if futures.any? | |
end | |
end | |
report.report("Updating:") do | |
data.each do |field_1, field_2_arr| | |
date = Date.today - rand(90) | |
new_bins = (1..12).map { (0...100).map { rand 100 } } | |
futures = field_2_arr.map do |field_2, dates| | |
session.execute_async prepared_select_day, arguments: [ | |
field_1, field_2, date.to_time | |
], consistency: :quorum | |
end | |
Cassandra::Future.all(futures).get | |
i = -1 | |
futures = field_2_arr.map do |field_2, dates| | |
session.execute_async prepared_insert, arguments: [ | |
field_1, field_2, date.to_time, new_bins[i+=1] | |
], consistency: :one | |
end | |
Cassandra::Future.all(futures).get | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment