Skip to content

Instantly share code, notes, and snippets.

@simonhf
Last active January 2, 2018 01:53
Show Gist options
  • Save simonhf/e7c2f40d36f1a4bdedfffa40c575b63b to your computer and use it in GitHub Desktop.
Save simonhf/e7c2f40d36f1a4bdedfffa40c575b63b to your computer and use it in GitHub Desktop.
experiments with tarantool transactions, fibers, and C box_insert (because Lua string performance sucks)
#include "module.h"
#define MP_SOURCE 1 /* define in a single .c/.cc file */
#include "msgpuck.h"
// https://tarantool.org/doc/tutorials/c_tutorial.html?highlight=stored%20procedure
// CPATH=/usr/include/tarantool/:/usr/include/ gcc -shared -o hardest.so -fPIC hardest.c
char * key_format = "the quick brown fox jumped over the lazy dog the qu %07u ick brown fox ";
char * field_2 = "'{\"0\":{\"1234567890\": 320, \"1234567890\": 1303}}'";
void hardest_put(uint32_t from, uint32_t to)
{
uint32_t space_id = box_space_id_by_name("capi_test", strlen("capi_test"));
uint32_t i;
//box_txn_begin();
for(i = from; i <= to; i++) {
char field_1[256];
snprintf(&field_1[0], 256, key_format, i);
char tuple[1024];
char *tuple_pointer = tuple;
tuple_pointer = mp_encode_array(tuple_pointer, 4);
tuple_pointer = mp_encode_str(tuple_pointer, &field_1[0], strlen(&field_1[0]));
tuple_pointer = mp_encode_str(tuple_pointer, field_2 , strlen( field_2 ));
tuple_pointer = mp_encode_uint(tuple_pointer, 1234567890);
tuple_pointer = mp_encode_uint(tuple_pointer, 0);
int n = box_insert(space_id, tuple, tuple_pointer, NULL);
//printf("%d = box_insert()\n", n);
}
//box_txn_commit();
}
void hardest_put_mini_batch(uint32_t from, uint32_t to)
{
uint32_t space_id = box_space_id_by_name("capi_test", strlen("capi_test"));
uint32_t i;
for(i = from; i <= (((to - from) / 2) + from); i++) {
char field_1[256];
int n;
box_txn_begin();
snprintf(&field_1[0], 256, key_format, (i * 2) + 0);
char tuple1[1024];
char *tuple1_pointer;
tuple1_pointer = tuple1;
tuple1_pointer = mp_encode_array(tuple1_pointer, 4);
tuple1_pointer = mp_encode_str(tuple1_pointer, &field_1[0], strlen(&field_1[0]));
tuple1_pointer = mp_encode_str(tuple1_pointer, field_2 , strlen( field_2 ));
tuple1_pointer = mp_encode_uint(tuple1_pointer, 1234567890);
tuple1_pointer = mp_encode_uint(tuple1_pointer, 0);
n = box_insert(space_id, tuple1, tuple1_pointer, NULL);
//printf("%d = box_insert()\n", n);
snprintf(&field_1[0], 256, key_format, (i * 2) + 1);
char tuple2[1024];
char *tuple2_pointer;
tuple2_pointer = tuple2;
tuple2_pointer = mp_encode_array(tuple2_pointer, 4);
tuple2_pointer = mp_encode_str(tuple2_pointer, &field_1[0], strlen(&field_1[0]));
tuple2_pointer = mp_encode_str(tuple2_pointer, field_2 , strlen( field_2 ));
tuple2_pointer = mp_encode_uint(tuple2_pointer, 1234567890);
tuple2_pointer = mp_encode_uint(tuple2_pointer, 0);
n = box_insert(space_id, tuple2, tuple2_pointer, NULL);
//printf("%d = box_insert()\n", n);
box_txn_commit();
}
}
void hardest_put_batch(uint32_t from, uint32_t to, uint32_t txn_batch_size)
{
char tuples[4096];
char * tuple_pointer_begin = tuples;
char * tuple_pointer_end = tuples;
uint32_t space_id = box_space_id_by_name("capi_test", strlen("capi_test"));
uint32_t i;
uint32_t txn_pos = 0;
uint32_t txn_begun = 1;
box_txn_begin();
for(i = from; i <= to; i++) {
char field_1[256];
snprintf(&field_1[0], 256, key_format, i);
tuple_pointer_end = mp_encode_array(tuple_pointer_end, 4);
tuple_pointer_end = mp_encode_str (tuple_pointer_end, &field_1[0], strlen(&field_1[0]));
tuple_pointer_end = mp_encode_str (tuple_pointer_end, field_2 , strlen( field_2 ));
tuple_pointer_end = mp_encode_uint (tuple_pointer_end, 1234567890);
tuple_pointer_end = mp_encode_uint (tuple_pointer_end, 0);
int n = box_insert(space_id, tuple_pointer_begin, tuple_pointer_end, NULL);
tuple_pointer_begin = tuple_pointer_end;
//printf("%d = box_insert()\n", n);
txn_pos ++;
if (txn_batch_size == txn_pos) {
box_txn_commit();
txn_begun = 0;
if (i < to) {
txn_pos = 0;
txn_begun = 1;
box_txn_begin();
tuple_pointer_begin = tuples;
tuple_pointer_end = tuples;
}
}
}
if (1 == txn_begun) {
box_txn_commit();
}
}
void hardest_get(uint32_t from, uint32_t to)
{
uint32_t space_id = box_space_id_by_name("capi_test", strlen("capi_test"));
uint32_t i;
for(i = from; i <= to; i++) {
char field_1[256];
snprintf(&field_1[0], 256, key_format, i);
char tuple[1024];
char *tuple_pointer = tuple;
tuple_pointer = mp_encode_array(tuple_pointer, 1);
tuple_pointer = mp_encode_str(tuple_pointer, &field_1[0], strlen(&field_1[0]));
struct tuple *result;
int n = box_index_get(space_id, 0, tuple, tuple_pointer, &result);
if (n != 0) {
puts("ops");
exit(1);
}
//TODO: convert tuple to buf, or use tuple api
// uint32_t part_count = mp_decode_array(&result);
// uint32_t field1_len = 0;
// const char *field1_str = mp_decode_str(&result, &field1_len);
// uint32_t field2_len = 0;
// const char *field2_str = mp_decode_str(&result, &field2_len);
// if(i == 123) {
// puts(field1_str);
// puts(field2_str);
// }
}
}
#!/usr/bin/env tarantool
local ffi = require('ffi')
local clock = require('clock')
local fiber = require('fiber')
local lib = ffi.load(package.searchpath('hardest', package.cpath))
ffi.cdef[[
void hardest_put(uint32_t from, uint32_t to);
void hardest_put_mini_batch(uint32_t from, uint32_t to);
void hardest_put_batch(uint32_t from, uint32_t to, uint32_t txn_batch_size);
void hardest_get(uint32_t from, uint32_t to);
]]
box.cfg {
listen = 3306,
-- wal_mode = "none"
}
box.schema.space.create('capi_test', {if_not_exists=true})
box.space.capi_test:create_index('primary', {type = 'hash', parts = {1, 'STR'}, if_not_exists=true})
printf = function(s,...)
return io.write(s:format(...))
end -- function
-- os.exit()
local txn_batch_size = 20
local fibers_no = 100
local n = 1000000/fibers_no
local space_id = box.space.capi_test.id
local tt1 = clock.time()
for fiber_id=1, fibers_no do
fiber.create(function()
local from = ((fiber_id - 1) * n) + 1
local to = ((fiber_id - 1) * n) + n
printf("-- created fiber %2u for range %7u to %7u for putting\n", fiber_id, from, to)
local t1 = clock.time()
-- lib.hardest_put(from, to)
-- lib.hardest_put_mini_batch(from, to)
lib.hardest_put_batch(from, to, txn_batch_size)
local t2 = clock.time()
printf("-- generated and inserted %u keys in %f seconds or %u keys per second\n", n, t2 - t1, n / (t2 - t1))
if fiber_id == fibers_no then
local tt2 = clock.time()
local tn = 1000000
print("----------------")
printf("-- TOTAL: generated and inserted %u keys in %f seconds or %u keys per second\n", tn, tt2 - tt1, tn / (tt2 - tt1))
print("----------------")
print("keys:" .. box.space.capi_test:len())
end
end)
end
fiber.sleep(100)
print("wakeup")
local tt1 = clock.time()
for fiber_id=1, fibers_no do
fiber.create(function()
local from = ((fiber_id - 1) * n) + 1
local to = ((fiber_id - 1) * n) + n
printf("-- created fiber %2u for range %7u to %7u for getting\n", fiber_id, from, to)
local t1 = clock.time()
lib.hardest_get(from, to)
local t2 = clock.time()
printf("-- read %u keys in %f seconds or %u keys per second\n", n, t2 - t1, n / (t2 - t1))
if fiber_id == fibers_no then
local tt2 = clock.time()
local tn = 1000000
print("----------------")
printf("-- TOTAL: read %u keys in %f seconds or %u keys per second\n", tn, tt2 - tt1, tn / (tt2 - tt1))
print("----------------")
print("keys:" .. box.space.capi_test:len())
end
end)
end
* Each line below box_insert()s 1 million string keys into tarantool.
* Tweaking the variables and running rm 0* ; tarantool hardest_c.lua results in the lines below.
* E.g. inserting 1 million keys via 1 fiber with 1 insert per transaction takes 63.3 seconds and generates a 169.6 MB xlog file.
* E.g. inserting 1 million keys via 1 fiber with 2 inserts per transaction takes 36.2 seconds and generates a 160.6 MB xlog file.
* Therefore, disk I/O is not the limiting factor but probably IPC throughput with the WAL thread.
* Halving the number of IPC requests -- due to transaction size -- with the WAL thread almost halves the seconds.
* E.g. inserting 1 million keys via 2 fibers with 1 insert per transaction takes 31.7 seconds and generates a 169.6 MB xlog file.
* And halving the number of IPC requests -- due to fiber count -- with the WAL thread almost halves the seconds too.
| txn | fibers | xlog on disk | seconds | CPU |
| 1 | 1 | 169.6 mb | 63.3 | 70% |
| 1 | 2 | 169.6 mb | 31.7 | 72% |
| 1 | 10 | 153.3 mb | 9.3 | 76% |
| 1 | 20 | 14.7 mb | 5.9 | 81% |
| 1 | 50 | 7.1 mb | 3.2 | 83% |
| 1 | 100 | 5.1 mb | 1.6 | 83% |
| 2 | 1 | 160.6 mb | 36.2 | 72% |
| 2 | 2 | 156.0 mb | 18.8 | 74% |
| 2 | 10 | 15.3 mb | 5.4 | 81% |
| 2 | 20 | 8.1 mb | 3.3 | 84% |
| 2 | 50 | 5.2 mb | 2.0 | 87% |
| 2 | 100 | 4.2 mb | 1.8 | 85% |
| 10 | 1 | 153.3 mb | 8.4 | 75% |
| 10 | 2 | 14.7 mb | 5.3 | 79% |
| 10 | 10 | 4.8 mb | 1.9 | 85% |
| 10 | 20 | 4.0 mb | 1.1 | ??% |
| 10 | 50 | 3.8 mb | 0.9 | ??% |
| 10 | 100 | 3.5 mb | 0.8 | 100% |
| 20 | 1 | 14.4 mb | 5.4 | 80% |
| 20 | 2 | 7.8 mb | 3.4 | 80% |
| 20 | 10 | 3.9 mb | 1.6 | 88% |
| 20 | 20 | 4.0 mb | 1.3 | 97% |
| 20 | 50 | 3.6 mb | 0.8 | 100% |
| 20 | 100 | 3.5 mb | 0.8 | 100% |
| 1000000 | 1 | 2.1 mb | 5.6 | 100% |
| 500000 | 2 | 2.0 mb | 1.0 | 100% |
| 100000 | 10 | 1.8 mb | 0.8 | 100% |
| 50000 | 20 | 1.8 mb | 0.8 | 100% |
| 20000 | 50 | 1.8 mb | 0.7 | 100% |
| 10000 | 100 | 1.8 mb | 0.7 | 100% |
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment