Skip to content

Instantly share code, notes, and snippets.

@amitmurthy
Created May 30, 2017 15:51
Show Gist options
  • Save amitmurthy/aaeff92d70399c45d00a50414a54b85f to your computer and use it in GitHub Desktop.
Save amitmurthy/aaeff92d70399c45d00a50414a54b85f to your computer and use it in GitHub Desktop.
pmap timings for a few typical use cases.
#using BenchmarkTools
using NamedArrays
nprocs()==1 && addprocs(4);
data_szs = [10^n for n in 0:6];
coll_lengths = [100, 1000, 10000];
for (idx, sz) in enumerate(data_szs)
sym = Symbol("gv_$idx")
@eval global const $sym = ones($sz)
fsym = Symbol("f_glbdata_$idx")
@eval @everywhere $fsym(x) = length($sym)
pmapsym = Symbol("pmap_glbdata_$idx")
@eval $pmapsym(N) = pmap(x->length($sym), 1:N)
end
function make_results_array()
results = NamedArray(fill(NaN, (length(data_szs),length(coll_lengths))))
setnames!(results, [string("10^", ceil(Int, log(10, x)), " element Array{Float64,1}") for x in data_szs], 1)
setdimnames!(results, "pmap collection length", 1)
setnames!(results, [string(x) for x in coll_lengths], 2)
setdimnames!(results, "input num elements", 2)
results
end
function exec_block(results, f, warmup=false)
for (idx_coll, N) in enumerate(coll_lengths)
skip = false
for (idx_dsz, sz) in enumerate(data_szs)
if skip
println("Skipping data_sz = $sz, coll_length = $N")
continue
end
t = f(idx_coll, N, idx_dsz, sz)
# if t > 2.0
# skip = true
# end
println("Executed data_sz = $sz, coll_length = $N in $t seconds")
results[idx_dsz, idx_coll] = trunc(t,4)
@everywhere gc()
@everywhere gc()
end
warmup && break
end
end
function run_glbdata_closure(idx_coll, N, idx_dsz, sz)
fsym = Symbol("pmap_glbdata_$idx_dsz")
f = getfield(Main, fsym)
@elapsed f(N)
end
function run_glbdata_funcs(idx_coll, N, idx_dsz, sz)
fsym = Symbol("f_glbdata_$idx_dsz")
f = getfield(Main, fsym)
@elapsed pmap(f, 1:N)
end
function run_closures(idx_coll, N, idx_dsz, sz)
a = ones(sz)
@elapsed pmap(x->length(a), 1:N)
end
function run_cp(idx_coll, N, idx_dsz, sz)
a = ones(sz)
cp = CachingPool(workers())
t = @elapsed pmap(cp, x->length(a), 1:N)
clear!(cp)
t
end
const gcp = CachingPool(workers())
function run_cp_reuse(idx_coll, N, idx_dsz, sz)
a = ones(sz)
@elapsed pmap(gcp, x->length(a), 1:N)
end
const cccp = CachingPool(workers())
function run_cached_closure(idx_coll, N, idx_dsz, sz)
a = ones(sz)
cl = x->length(a)
pmap(cccp, cl, 1:nworkers()) # cache cl in cccp
t = @elapsed pmap(cccp, cl, 1:N)
end
function run()
all_results=[]
def_cleanup()=nothing
for (hdr, f, cleanup) in [
("Regular pmap with closures pointing to global data", run_glbdata_closure, def_cleanup),
# run_glbdata_closure first as it creates globals on all workers, referenced again by run_glbdata_funcs
("Regular pmap with defined functions pointing to global data", run_glbdata_funcs, def_cleanup),
("Regular pmap with local data captured in a closure", run_closures, def_cleanup),
("pmap with a caching pool created for each call. Using closures", run_cp, def_cleanup),
# ("pmap with a caching pool used across all calls. closures defined in each pmap call", run_cp_reuse, ()->clear!(gcp)),
("pmap with a caching pool used across all calls. Time cached closures call.", run_cached_closure, ()->clear!(cccp))
]
println("\n", "Warmup run : ", hdr)
results = make_results_array();
exec_block(results, f, true) # warmup run
println("\n", "Timed run : ", hdr)
exec_block(results, f)
push!(all_results, (hdr, results))
cleanup()
end
all_results
end
for (hdr, results) in run()
println(hdr, ":")
@show results
println()
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment