Skip to content

Instantly share code, notes, and snippets.

@chinnurtb
Created September 21, 2013 06:31
Show Gist options
  • Save chinnurtb/6647807 to your computer and use it in GitHub Desktop.
Save chinnurtb/6647807 to your computer and use it in GitHub Desktop.
#!/bin/bash
# ERL_BIN, RIAK_LIB, and BUCKET_LIB can be defaulted here or set
# in the environment
#
# Path to the erl binary provided by Riak
ERL_BIN=${ERL_BIN:-"/usr/lib64/riak/erts-5.8.4/bin/erl"}
# Path to riak libraries
RIAK_LIB=${ERL_BIN:-"/usr/lib64/riak/lib"}
# Bucket to be dumped
BUCKET_TO_DUMP=${1:-MyTest}
# results will be dumped to $BUCKET_DIR
BUCKET_DIR=${2:-"/var/lib/riak/custom/dump"}
[ "${BUCKET_DIR:0:1}" = "/" ] || BUCKET_DIR="`dirname $0`/$BUCKET_DIR"
# Index to be consulted
INDEX_NAME=${3:-none}
# Index From value
INDEX_FROM=${4:-none}
# Index To value
INDEX_TO=${5:-none}
# bucket_exporter.beam should be in $BUCKET_DIR/ebin
BUCKET_LIB=${BUCKET_LIB:-"BUCKET_DIR/ebin"}
# Note that riak@127.0.0.1 should be the result of running:
# grep name /etc/riak/vm.args
# Target node to query
TARGET_NODE=riak@127.0.0.1
# Cookie - default riak
COOKIE=riak
# Nodename for this instance - using pid to ensure uniqueness
NODENAME=bucketdumper$$@127.0.0.1
# Binary index data needs to be binary
if [ "${INDEX_NAME%_bin}" = "${INDEX_NAME}" ]; then
BIN_PRE=""
BIN_POST=""
else
BIN_PRE="<<\""
BIN_POST="\">>"
fi
if [ "$INDEX_NAME" = "none" ]; then
QUERY=""
FUNC="export_data"
else
if [ "$INDEX_TO" = "none" ]; then
QUERY=", <<\"$INDEX_NAME\">>, [${BIN_PRE}${INDEX_FROM}${BIN_POST}]"
FUNC="export_data_with_index"
else
QUERY=", <<\"$INDEX_NAME\">>, [${BIN_PRE}${INDEX_FROM}${BIN_POST},${BIN_PRE}${INDEX_TO}${BIN_POST}]"
FUNC="export_data_with_index"
fi
fi
$ERL_BIN -name $NODENAME -setcookie $COOKIE \
-pa $BUCKET_LIB \
-env ERL_LIBS $RIAK_LIB \
-eval "bucket_exporter:$FUNC('$TARGET_NODE', <<\"$BUCKET_TO_DUMP\">>, \"json\", \"$BUCKET_DIR\"$QUERY)" \
-s init stop \
-noshell
%% -------------------------------------------------------------------
%%
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(bucket_exporter).
-export([export_data/4,
export_data/5,
export_data_with_index/6,
export_data_with_index/7]).
export_data_with_index(FromServer, Bucket, Extension, Directory, Index, Patterns) ->
export_data_with_index(FromServer, Bucket, Extension, Directory, Index, Patterns, 1.0).
export_data_with_index(FromServer, Bucket, Extension, Directory, Index, Patterns, InputSize) ->
{ok, CFrom} = riak:client_connect(FromServer),
Query = case Patterns of
[Eq] -> { eq, Index, Eq };
[Start,End] -> { range, Index, Start, End }
end,
{ok, Keys0} = CFrom:get_index(Bucket, Query),
Keys = truncate_keys(Keys0, InputSize),
io:format("Got ~p keys~n", [length(Keys)]),
export_data(CFrom, Bucket, Extension, Directory, Keys, 0),
io:format("Data export complete~n").
export_data(FromServer, Bucket, Extension, Directory) ->
export_data(FromServer, Bucket, Extension, Directory, 1.0).
export_data(FromServer, Bucket, Extension, Directory, InputSize) ->
{ok, CFrom} = riak:client_connect(FromServer),
{ok, Keys0} = CFrom:list_keys(Bucket),
Keys = truncate_keys(Keys0, InputSize),
io:format("Got ~p keys~n", [length(Keys)]),
export_data(CFrom, Bucket, Extension, Directory, Keys, 0),
io:format("Data export complete~n").
export_data(_CFrom, _Bucket, _Extension, _Directory, [], _) ->
io:format("~n"),
ok;
export_data(CFrom, Bucket, Extension, Directory0, [H|T], Count) when is_binary(H) ->
Owner = self(),
proc_lib:spawn(fun() ->
case CFrom:get(Bucket, H) of
{ok, FromObj} ->
Directory = munge_directory(Directory0, binary_to_list(H)),
FileName = binary_to_list(H) ++ "." ++ Extension,
Path = filename:join([Directory, FileName]),
filelib:ensure_dir(Path),
Obj = riak_object:get_value(FromObj),
ok = file:write_file(Path, Obj),
Owner ! done;
_Error ->
Owner ! done end end),
NewCount = if
Count == 250 ->
let_workers_catch_up(Count),
0;
true ->
Count + 1
end,
export_data(CFrom, Bucket, Extension, Directory0, T, NewCount).
let_workers_catch_up(0) ->
ok;
let_workers_catch_up(Count) ->
receive
done ->
ok
end,
let_workers_catch_up(Count - 1).
munge_directory(Directory0, [C1, C2, C3|_]) ->
Directory0 ++ [$/,C1,$/,C2,$/,C3].
truncate_keys(Keys, 1.0) ->
Keys;
truncate_keys(Keys, InputSize) ->
TargetSize = erlang:round(length(Keys) * InputSize),
{Keys1, _} = lists:split(TargetSize, Keys),
Keys1.
Joes-MacBook-Pro% curl "127.0.0.1:9000/buckets/test/keys?keys=true"
{"keys":["testkey6","testkey3","testkey8","testkey9","testkey5","testkey4","testkey2","testkey7","testkey10","testkey1"]}
Joes-MacBook-Pro% ./bucket_dumper.sh test dump/dumpall
Got 10 keys
Data export complete
Joes-MacBook-Pro% ./bucket_dumper.sh test dump/dumpodd test_bin 1
Got 5 keys
Data export complete
Joes-MacBook-Pro% ./bucket_dumper.sh test dump/dumpeven test_bin 0
Got 5 keys
Data export complete
Joes-MacBook-Pro% ./bucket_dumper.sh test dump/dumpone test_int 3
Got 1 keys
Data export complete
Joes-MacBook-Pro% ./bucket_dumper.sh test dump/dumprange test_int 5 8
Got 4 keys
Data export complete
Joes-MacBook-Pro% find dump -type f
dump/dumpall/t/e/s/testkey1.json
dump/dumpall/t/e/s/testkey10.json
dump/dumpall/t/e/s/testkey2.json
dump/dumpall/t/e/s/testkey3.json
dump/dumpall/t/e/s/testkey4.json
dump/dumpall/t/e/s/testkey5.json
dump/dumpall/t/e/s/testkey6.json
dump/dumpall/t/e/s/testkey7.json
dump/dumpall/t/e/s/testkey8.json
dump/dumpall/t/e/s/testkey9.json
dump/dumpeven/t/e/s/testkey10.json
dump/dumpeven/t/e/s/testkey2.json
dump/dumpeven/t/e/s/testkey4.json
dump/dumpeven/t/e/s/testkey6.json
dump/dumpeven/t/e/s/testkey8.json
dump/dumpodd/t/e/s/testkey1.json
dump/dumpodd/t/e/s/testkey3.json
dump/dumpodd/t/e/s/testkey5.json
dump/dumpodd/t/e/s/testkey7.json
dump/dumpodd/t/e/s/testkey9.json
dump/dumpone/t/e/s/testkey3.json
dump/dumprange/t/e/s/testkey5.json
dump/dumprange/t/e/s/testkey6.json
dump/dumprange/t/e/s/testkey7.json
dump/dumprange/t/e/s/testkey8.json
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment