Created
April 16, 2018 21:37
-
-
Save pkkm/07f6a5c902f8d343d25524e87c8c76db to your computer and use it in GitHub Desktop.
Testing the reliability of various statistics in the Criterion library.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env Rscript | |
library(ggplot2) | |
library(reshape2) | |
library(pander) | |
data <- read.csv("results/results.csv", check.names=FALSE) | |
# Reorder columns for readability. | |
col_order <- c("Least-squares slope", "Theil-Sen slope", | |
"Mean", "Median of means", | |
"Minimum of means", "Quartile 1 of means", "Quartile 3 of means") | |
data <- data[, col_order] | |
molten <- melt(data) | |
# Assign categories to variables. | |
molten$type <- factor( | |
"This should never be visible", | |
levels=c("Central tendency", "Regression", "Other", "This should never be visible")) | |
molten <- within(molten, type[variable == "Least-squares slope" | variable == "Theil-Sen slope"] <- "Regression") | |
molten <- within(molten, type[variable == "Mean" | variable == "Median of means"] <- "Central tendency") | |
molten <- within(molten, type[variable == "Quartile 1 of means" | variable == "Quartile 3 of means" | variable == "Minimum of means"] <- "Other") | |
# Draw densities. | |
plot <- ggplot(molten, aes(x=value, color=variable)) + | |
geom_density(adjust=0.5) + | |
labs(x="Time [s]", y="Number of benchmarks (smoothed)", color="") + | |
facet_wrap("type", scales="fixed", ncol=1) | |
ggsave("results/density.pdf", plot, device=cairo_pdf, width=8, height=6) | |
# Draw boxplots. | |
plot <- ggplot(molten, aes(x=variable, y=value)) + | |
geom_boxplot() + | |
labs(x="Statistic", y="Time [s]") + | |
theme(axis.text.x=element_text(angle=25, hjust=1)) | |
png(filename="results/boxplot.png", type="cairo", width=1100, height=1100, units="px", res=200) | |
print(plot) | |
dev.off() | |
# Data range as a single number (instead of vector of min and max). | |
range_num <- function(data) { | |
return(diff(range(data))) | |
} | |
# Summarize the spread of the data in a table. | |
df <- data.frame() | |
df[ncol(data),] <- NA | |
rownames(df) <- names(data) | |
iqr_rel <- apply(data, 2, IQR) / apply(data, 2, median) | |
df$`IQR/Median` <- sprintf("%.1f%%", unlist(iqr_rel * 100)) | |
range_rel <- apply(data, 2, range_num) / apply(data, 2, median) | |
df$`Range/Median` <- sprintf("%.1f%%", unlist(range_rel * 100)) | |
table <- pandoc.table.return( | |
df, style="rmarkdown", justify=c("right", "left", "left"), emphasize.rownames=FALSE) | |
handle <- file("results/summary.md") | |
writeLines(table, handle) | |
close(handle) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import argparse | |
import json | |
import os | |
import subprocess | |
import tempfile | |
import numpy | |
import scipy.stats | |
def format_s(seconds): | |
"""Format a time in seconds like Criterion does.""" | |
if seconds < 0: | |
return "-" + format_s(-seconds) | |
def format_with_prefix(seconds, prefix): | |
"""Format to 4 digits, even if they are trailing zeros.""" | |
if seconds >= 1e9: | |
return "{:.4g} {}".format(seconds, prefix) | |
for exponent in [3, 2, 1]: | |
if seconds >= 10 ** exponent: | |
return "{1:.{0}f} {2}".format(3 - exponent, seconds, prefix) | |
return "{:.3f} {}".format(seconds, prefix) | |
PREFIXES = [ | |
(0, ""), | |
(-3, "m"), | |
(-3, "m"), | |
(-6, "μ"), | |
(-9, "n"), | |
(-12, "p"), | |
(-15, "f"), | |
(-18, "a")] | |
for exponent, prefix in PREFIXES: | |
if seconds >= 10 ** exponent: | |
return format_with_prefix(seconds * 10 ** (-exponent), prefix + "s") | |
return "{:g} s".format(seconds) | |
def format_row(a, b, c=""): | |
"""Format a row of output.""" | |
return "{: <20} {: <10} {}".format(a, b, c) | |
def criterion_print_extra_stats(benchmark): | |
"""Print some extra statistics that Criterion doesn't provide. | |
`benchmark` should be a parsed JSON object describing a single benchmark | |
from Criterion's output (tested on Criterion 1.2.3).""" | |
# Extract columns which are interesting and should be non-null. | |
keys = benchmark["reportKeys"] | |
indices = {name: index for index, name in enumerate(keys)} | |
def process(datum): | |
return {key: datum[indices[key]] | |
for key in ["time", "cpuTime", "iters"]} | |
measured = list(map(process, benchmark["reportMeasured"])) | |
# Criterion repeatedly executes the benchmarked code in a loop with an | |
# increasing number of iterations. `time` and `cpuTime` are totals for the | |
# loop and `iters` is the number of iterations. | |
mean_times = [datum["time"] / datum["iters"] for datum in measured] | |
print(format_row( | |
"quartiles of means", | |
", ".join( | |
format_s(numpy.percentile(mean_times, p)) | |
for p in [25, 50, 75]))) | |
# Theil-Sen regression of time vs. number of iterations. | |
slope, intercept, *_ = scipy.stats.theilslopes( | |
[m["time"] for m in measured], [m["iters"] for m in measured]) | |
print(format_row( | |
"Theil-Sen", | |
format_s(slope), | |
"(intercept: {})".format(format_s(intercept)))) | |
print(format_row("min of means", format_s(numpy.amin(mean_times)))) | |
def criterion_benchmark(command, time_limit_s=None): | |
"""Benchmark a shell command using Criterion and print the results.""" | |
with tempfile.TemporaryDirectory(prefix="benchmark-") as dir_name: | |
json_file = os.path.join(dir_name, "criterion-out.json") | |
bench_command = ["bench"] | |
if time_limit_s is not None: | |
bench_command += ["--time-limit", str(time_limit_s)] | |
bench_command += ["--json", json_file, "--", command] | |
process = subprocess.run(bench_command, stdout=subprocess.PIPE) | |
print(process.stdout.decode("utf-8").rstrip("\n")) | |
with open(json_file, "r") as f: | |
data = json.load(f) | |
data = data[2] # Skip the header. | |
assert len(data) == 1 # We're always doing a single benchmark. | |
criterion_print_extra_stats(data[0]) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("command", help="sh command to benchmark") | |
parser.add_argument( | |
"--time-limit", type=int, default=60, | |
help="time limit in seconds for the whole benchmark") | |
args = parser.parse_args() | |
criterion_benchmark(args.command, time_limit_s=args.time_limit) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
command='bash -c "a=0; for i in {1..500000}; do (( a += RANDOM )); done"' | |
n_warmup_runs=5 | |
n_benchmarks=60 | |
single_benchmark_time=60 | |
require_cmd_present() { | |
for cmd in "$@"; do | |
if ! command -v -- "$cmd" >/dev/null 2>&1; then | |
printf "ERROR: Required command \`%s\` not found.\n" "$cmd" 1>&2 | |
exit 1 | |
fi | |
done | |
} | |
require_cmd_present python3 grep cut tr sed bench Rscript | |
mkdir -p "results" | |
single_benchmark() { | |
./benchmark.py "$command" --time-limit "$single_benchmark_time" | | |
grep -E "time|mean|quartile|Theil|min" | | |
cut -c 22- | | |
cut -d\( -f1 | | |
tr -cd ".,\n0-9" | | |
tr "\n" "," | | |
sed "s/,$//" | |
printf "\n" | |
} | |
{ | |
for ((i=0; i<"$n_warmup_runs"; i++)); do | |
single_benchmark >/dev/null | |
done | |
echo "Least-squares slope,Mean,Quartile 1 of means,Median of means,Quartile 3 of means,Theil-Sen slope,Minimum of means" | |
for ((i=0; i<"$n_benchmarks"; i++)); do | |
single_benchmark | |
done | |
} >"results/results.csv" | |
./analyze.R |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment