Last active
August 29, 2015 14:05
-
-
Save si14/9902fca946a25e01d8a8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.commons.math3.util.FastMath; | |
import org.openjdk.jmh.annotations.*; | |
import org.openjdk.jmh.runner.Runner; | |
import org.openjdk.jmh.runner.RunnerException; | |
import org.openjdk.jmh.runner.options.Options; | |
import org.openjdk.jmh.runner.options.OptionsBuilder; | |
import java.util.Random; | |
import java.util.concurrent.*; | |
import java.util.stream.DoubleStream; | |
@BenchmarkMode(Mode.Throughput) | |
@OutputTimeUnit(TimeUnit.SECONDS) | |
@Warmup(iterations = 5, time = 1) | |
@Measurement(iterations = 5, time = 1) | |
@State(Scope.Benchmark) | |
public class FindMaxBenchmark { | |
@Param({"10000000"}) | |
int arrayLen; | |
@Param({"2", "4", "8", "32", "256"}) | |
int pieces; | |
double[] values; | |
double trueMaxValue; | |
ExecutorService pool; | |
double maxValue; | |
@Setup | |
public void prepare() { | |
values = new double[arrayLen]; | |
final Random gen = new Random(42); | |
for (int i = 0; i < arrayLen; i++) { | |
values[i] = gen.nextDouble(); | |
} | |
trueMaxValue = DoubleStream.of(values).max().getAsDouble(); | |
pool = Executors.newFixedThreadPool(ForkJoinPool.getCommonPoolParallelism()); | |
} | |
@TearDown | |
public void check() { | |
assert maxValue == trueMaxValue : "max should be right"; | |
pool.shutdown(); | |
} | |
public static class RecursiveFindMax extends RecursiveTask<Double> { | |
final int lo; | |
final int hi; | |
final double[] array; | |
final int threshold; | |
final int pieces; | |
RecursiveFindMax(final double[] array, final int pieces) { | |
this(array, 0, array.length, pieces); | |
} | |
RecursiveFindMax(final double[] array, final int lo, final int hi, final int pieces) { | |
this.array = array; | |
this.lo = lo; | |
this.hi = hi; | |
this.pieces = pieces; | |
threshold = array.length / pieces; | |
} | |
@Override | |
protected Double compute() { | |
if (hi - lo <= threshold) { | |
double max = FastMath.exp(array[lo]); | |
for (int i = lo + 1; i < hi; i++) { | |
// final double exp = FastMath.exp(array[i]); | |
final double exp = array[i]; | |
if (exp > max) { | |
max = exp; | |
} | |
} | |
return max; | |
} else { | |
final int mid = lo + (hi - lo) / 2; | |
final RecursiveFindMax left = new RecursiveFindMax(array, lo, mid, pieces); | |
final RecursiveFindMax right = new RecursiveFindMax(array, mid, hi, pieces); | |
right.fork(); | |
final double leftAns = left.compute(); | |
final double rightAns = right.join(); | |
return Math.max(leftAns, rightAns); | |
} | |
} | |
public static double apply(final double[] array, final int pieces) { | |
return ForkJoinPool.commonPool().invoke(new RecursiveFindMax(array, pieces)); | |
} | |
} | |
double findMax(final double[] array) { | |
double max = FastMath.exp(array[0]); | |
for (final double x : array) { | |
// final double exp = FastMath.exp(x); | |
final double exp = x; | |
if (exp > max) { | |
max = exp; | |
} | |
} | |
return max; | |
} | |
double findMaxFutures(final double[] array, final int pieces, final ExecutorService pool) { | |
final int blockSize = array.length / pieces; | |
final FutureTask<Double>[] futures = new FutureTask[pieces]; | |
int lo = 0; | |
int hi = blockSize; | |
for (int i = 0; i < pieces; i++) { | |
final int finalLo = lo; | |
final int finalHi = hi; | |
futures[i] = new FutureTask<>(() -> { | |
double localMax = FastMath.exp(array[finalLo]); | |
for (int j = finalLo; j < finalHi; j++) { | |
// final double exp = FastMath.exp(array[j]); | |
final double exp = array[j]; | |
if (exp > localMax) { | |
localMax = exp; | |
} | |
} | |
return localMax; | |
}); | |
pool.execute(futures[i]); | |
lo = hi; | |
hi += blockSize; | |
} | |
double max = array[0]; | |
for (int i = 0; i < pieces; i++) { | |
try { | |
final double val = futures[i].get(); | |
if (val > max) { | |
max = val; | |
} | |
} catch (InterruptedException|ExecutionException e) { | |
throw new RuntimeException(e); | |
} | |
} | |
return max; | |
} | |
public static class CountedFindMax extends CountedCompleter<Double> { | |
final int lo; | |
final int hi; | |
final double[] array; | |
final int blockSize; | |
double max; | |
CountedFindMax[] siblings; | |
CountedFindMax(final double[] array, final int blockSize, final int maxSiblings) { | |
this(null, array, 0, array.length, blockSize, maxSiblings); | |
} | |
CountedFindMax(final CountedCompleter<?> parent, | |
final double[] array, final int lo, final int hi, | |
final int blockSize, final int maxSiblings) { | |
super(parent); | |
this.array = array; | |
this.lo = lo; | |
this.hi = hi; | |
this.blockSize = blockSize; | |
this.siblings = new CountedFindMax[maxSiblings]; | |
} | |
@Override | |
public void compute() { | |
final int l = lo; | |
int h = hi; | |
int j = 0; | |
while (h - l > blockSize) { | |
final int mid = (l + h + 1) / 2; | |
addToPendingCount(1); | |
siblings[j++] = (CountedFindMax) new CountedFindMax(this, | |
array, mid, h, blockSize, | |
siblings.length - j).fork(); | |
h = mid; | |
} | |
if (h > l) { | |
max = FastMath.exp(array[l]); | |
for (int i = l + 1; i < h; i++) { | |
//final double exp = FastMath.exp(array[i]); | |
final double exp = array[i]; | |
if (exp > max) { | |
max = exp; | |
} | |
} | |
} | |
tryComplete(); | |
} | |
@Override | |
public void onCompletion(final CountedCompleter<?> caller) { | |
if (caller != this) { | |
for (final CountedFindMax sibling : siblings) { | |
if (sibling != null) { // this shouldn't be needed, but somehow I underfill siblings | |
if (sibling.max > this.max) { | |
this.max = sibling.max; | |
} | |
} | |
} | |
} | |
} | |
@Override | |
public Double getRawResult() { | |
return max; | |
} | |
public static double apply(final double[] array, final int pieces) { | |
final int blockSize = (array.length + pieces - 1) / pieces; | |
final int siblingCount = new Double(Math.floor(Math.log(1. / pieces) / Math.log(0.5))).intValue(); | |
return new CountedFindMax(array, blockSize, siblingCount).invoke(); | |
} | |
} | |
@Benchmark | |
public void simpleMax() { | |
maxValue = findMax(values); | |
} | |
@Benchmark | |
public void fjRecursiveMax() { | |
maxValue = RecursiveFindMax.apply(values, pieces); | |
} | |
@Benchmark | |
public void futuresMax() { | |
maxValue = findMaxFutures(values, pieces, pool); | |
} | |
@Benchmark | |
public void fjCountedMax() { | |
maxValue = CountedFindMax.apply(values, pieces); | |
} | |
public static void main(final String[] args) throws RunnerException { | |
final Options opts = new OptionsBuilder() | |
.include(".*" + FindMaxBenchmark.class.getSimpleName() + ".*") | |
.forks(1) | |
.build(); | |
new Runner(opts).run(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Benchmark (arrayLen) (pieces) Mode Samples Score Score error Units | |
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 2 thrpt 5 152.022 26.515 ops/s | |
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 4 thrpt 5 161.961 13.154 ops/s | |
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 8 thrpt 5 167.746 4.560 ops/s | |
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 32 thrpt 5 168.868 3.247 ops/s | |
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 256 thrpt 5 170.415 4.082 ops/s | |
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 2 thrpt 5 161.938 6.353 ops/s | |
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 4 thrpt 5 165.995 6.204 ops/s | |
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 8 thrpt 5 160.432 13.595 ops/s | |
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 32 thrpt 5 156.904 22.049 ops/s | |
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 256 thrpt 5 152.807 33.084 ops/s | |
o.j.b.FindMaxBenchmark.futuresMax 10000000 2 thrpt 5 140.285 23.045 ops/s | |
o.j.b.FindMaxBenchmark.futuresMax 10000000 4 thrpt 5 156.551 13.103 ops/s | |
o.j.b.FindMaxBenchmark.futuresMax 10000000 8 thrpt 5 159.055 13.238 ops/s | |
o.j.b.FindMaxBenchmark.futuresMax 10000000 32 thrpt 5 156.826 27.912 ops/s | |
o.j.b.FindMaxBenchmark.futuresMax 10000000 256 thrpt 5 152.148 33.182 ops/s | |
o.j.b.FindMaxBenchmark.simpleMax 10000000 2 thrpt 5 110.729 22.986 ops/s | |
o.j.b.FindMaxBenchmark.simpleMax 10000000 4 thrpt 5 110.015 20.115 ops/s | |
o.j.b.FindMaxBenchmark.simpleMax 10000000 8 thrpt 5 94.124 14.482 ops/s | |
o.j.b.FindMaxBenchmark.simpleMax 10000000 32 thrpt 5 107.391 12.271 ops/s | |
o.j.b.FindMaxBenchmark.simpleMax 10000000 256 thrpt 5 107.299 29.436 ops/s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.commons.math3.util.FastMath; | |
import org.openjdk.jmh.annotations.*; | |
import org.openjdk.jmh.runner.Runner; | |
import org.openjdk.jmh.runner.RunnerException; | |
import org.openjdk.jmh.runner.options.Options; | |
import org.openjdk.jmh.runner.options.OptionsBuilder; | |
import java.util.Random; | |
import java.util.concurrent.*; | |
import java.util.stream.DoubleStream; | |
/** | |
* Playground for parallel array operations | |
* @author Dmitry Groshev | |
* @date 12/08/2014 | |
*/ | |
@BenchmarkMode(Mode.Throughput) | |
@OutputTimeUnit(TimeUnit.SECONDS) | |
@Warmup(iterations = 5, time = 1) | |
@Measurement(iterations = 5, time = 1) | |
@State(Scope.Benchmark) | |
public class FindMaxBenchmark { | |
//@Param({"100", "1000", "10000", "100000", "1000000", "10000000"}) | |
//@Param({"1000000", "10000000"}) | |
@Param({"10000000"}) | |
int arrayLen; | |
@Param({"2", "4", "8", "32", "256"}) | |
int pieces; | |
double[] values; | |
double trueMaxValue; | |
ExecutorService pool; | |
double maxValue; | |
@Setup | |
public void prepare() { | |
values = new double[arrayLen]; | |
final Random gen = new Random(42); | |
for (int i = 0; i < arrayLen; i++) { | |
values[i] = gen.nextDouble(); | |
} | |
trueMaxValue = DoubleStream.of(values).max().getAsDouble(); | |
pool = Executors.newFixedThreadPool(ForkJoinPool.getCommonPoolParallelism()); | |
} | |
@TearDown | |
public void check() { | |
assert maxValue == trueMaxValue : "max should be right"; | |
pool.shutdown(); | |
} | |
public static class RecursiveFindMax extends RecursiveTask<Double> { | |
final int lo; | |
final int hi; | |
final double[] array; | |
final int threshold; | |
final int pieces; | |
RecursiveFindMax(final double[] array, final int pieces) { | |
this(array, 0, array.length, pieces); | |
} | |
RecursiveFindMax(final double[] array, final int lo, final int hi, final int pieces) { | |
this.array = array; | |
this.lo = lo; | |
this.hi = hi; | |
this.pieces = pieces; | |
threshold = array.length / pieces; | |
} | |
@Override | |
protected Double compute() { | |
if (hi - lo <= threshold) { | |
double max = FastMath.exp(array[lo]); | |
for (int i = lo + 1; i < hi; i++) { | |
final double exp = FastMath.exp(array[i]); | |
if (exp > max) { | |
max = exp; | |
} | |
} | |
return max; | |
} else { | |
final int mid = lo + (hi - lo) / 2; | |
final RecursiveFindMax left = new RecursiveFindMax(array, lo, mid, pieces); | |
final RecursiveFindMax right = new RecursiveFindMax(array, mid, hi, pieces); | |
right.fork(); | |
final double leftAns = left.compute(); | |
final double rightAns = right.join(); | |
return Math.max(leftAns, rightAns); | |
} | |
} | |
public static double apply(final double[] array, final int pieces) { | |
return ForkJoinPool.commonPool().invoke(new RecursiveFindMax(array, pieces)); | |
} | |
} | |
double findMax(final double[] array) { | |
double max = FastMath.exp(array[0]); | |
for (final double x : array) { | |
final double exp = FastMath.exp(x); | |
if (exp > max) { | |
max = exp; | |
} | |
} | |
return max; | |
} | |
double findMaxFutures(final double[] array, final int pieces, final ExecutorService pool) { | |
final int blockSize = array.length / pieces; | |
final FutureTask<Double>[] futures = new FutureTask[pieces]; | |
int lo = 0; | |
int hi = blockSize; | |
for (int i = 0; i < pieces; i++) { | |
final int finalLo = lo; | |
final int finalHi = hi; | |
futures[i] = new FutureTask<>(() -> { | |
double localMax = FastMath.exp(array[finalLo]); | |
for (int j = finalLo; j < finalHi; j++) { | |
final double exp = FastMath.exp(array[j]); | |
if (exp > localMax) { | |
localMax = exp; | |
} | |
} | |
return localMax; | |
}); | |
pool.execute(futures[i]); | |
lo = hi; | |
hi += blockSize; | |
} | |
double max = array[0]; | |
for (int i = 0; i < pieces; i++) { | |
try { | |
final double val = futures[i].get(); | |
if (val > max) { | |
max = val; | |
} | |
} catch (InterruptedException|ExecutionException e) { | |
throw new RuntimeException(e); | |
} | |
} | |
return max; | |
} | |
public static class CountedFindMax extends CountedCompleter<Double> { | |
final int lo; | |
final int hi; | |
final double[] array; | |
final int blockSize; | |
double max; | |
CountedFindMax[] siblings; | |
CountedFindMax(final double[] array, final int blockSize, final int maxSiblings) { | |
this(null, array, 0, array.length, blockSize, maxSiblings); | |
} | |
CountedFindMax(final CountedCompleter<?> parent, | |
final double[] array, final int lo, final int hi, | |
final int blockSize, final int maxSiblings) { | |
super(parent); | |
this.array = array; | |
this.lo = lo; | |
this.hi = hi; | |
this.blockSize = blockSize; | |
this.siblings = new CountedFindMax[maxSiblings]; | |
} | |
@Override | |
public void compute() { | |
final int l = lo; | |
int h = hi; | |
int j = 0; | |
while (h - l > blockSize) { | |
final int mid = (l + h + 1) / 2; | |
addToPendingCount(1); | |
siblings[j++] = (CountedFindMax) new CountedFindMax(this, | |
array, mid, h, blockSize, | |
siblings.length - 1).fork(); | |
h = mid; | |
} | |
if (h > l) { | |
max = FastMath.exp(array[l]); | |
for (int i = l + 1; i < h; i++) { | |
final double exp = FastMath.exp(array[i]); | |
if (exp > max) { | |
max = exp; | |
} | |
} | |
} | |
tryComplete(); | |
} | |
@Override | |
public void onCompletion(final CountedCompleter<?> caller) { | |
if (caller != this) { | |
for (final CountedFindMax sibling : siblings) { | |
if (sibling != null) { // this shouldn't be needed, but somehow I underfill siblings | |
if (sibling.max > this.max) { | |
this.max = sibling.max; | |
} | |
} | |
} | |
} | |
} | |
@Override | |
public Double getRawResult() { | |
return max; | |
} | |
public static double apply(final double[] array, final int pieces) { | |
final int blockSize = (array.length + pieces - 1) / pieces; | |
final int siblingCount = new Double(Math.floor(Math.log(1. / pieces) / Math.log(0.5))).intValue(); | |
return new CountedFindMax(array, blockSize, siblingCount).invoke(); | |
} | |
} | |
@Benchmark | |
public void simpleMax() { | |
maxValue = findMax(values); | |
} | |
@Benchmark | |
public void fjRecursiveMax() { | |
maxValue = RecursiveFindMax.apply(values, pieces); | |
} | |
@Benchmark | |
public void futuresMax() { | |
maxValue = findMaxFutures(values, pieces, pool); | |
} | |
@Benchmark | |
public void fjCountedMax() { | |
maxValue = CountedFindMax.apply(values, pieces); | |
} | |
public static void main(final String[] args) throws RunnerException { | |
final Options opts = new OptionsBuilder() | |
.include(".*" + FindMaxBenchmark.class.getSimpleName() + ".*") | |
.forks(1) | |
.build(); | |
new Runner(opts).run(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Benchmark (arrayLen) (pieces) Mode Samples Score Score error Units | |
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 2 thrpt 5 5.827 0.357 ops/s | |
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 4 thrpt 5 13.045 1.223 ops/s | |
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 8 thrpt 5 21.936 6.818 ops/s | |
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 32 thrpt 5 23.270 2.956 ops/s | |
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 256 thrpt 5 24.338 1.101 ops/s | |
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 2 thrpt 5 8.914 2.990 ops/s | |
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 4 thrpt 5 13.291 4.091 ops/s | |
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 8 thrpt 5 20.789 8.258 ops/s | |
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 32 thrpt 5 20.440 2.159 ops/s | |
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 256 thrpt 5 19.389 8.121 ops/s | |
o.j.b.FindMaxBenchmark.futuresMax 10000000 2 thrpt 5 9.878 0.899 ops/s | |
o.j.b.FindMaxBenchmark.futuresMax 10000000 4 thrpt 5 13.257 2.340 ops/s | |
o.j.b.FindMaxBenchmark.futuresMax 10000000 8 thrpt 5 19.084 2.034 ops/s | |
o.j.b.FindMaxBenchmark.futuresMax 10000000 32 thrpt 5 22.057 1.652 ops/s | |
o.j.b.FindMaxBenchmark.futuresMax 10000000 256 thrpt 5 22.638 4.262 ops/s | |
o.j.b.FindMaxBenchmark.simpleMax 10000000 2 thrpt 5 5.496 0.211 ops/s | |
o.j.b.FindMaxBenchmark.simpleMax 10000000 4 thrpt 5 5.136 0.438 ops/s | |
o.j.b.FindMaxBenchmark.simpleMax 10000000 8 thrpt 5 5.013 0.535 ops/s | |
o.j.b.FindMaxBenchmark.simpleMax 10000000 32 thrpt 5 5.058 1.601 ops/s | |
o.j.b.FindMaxBenchmark.simpleMax 10000000 256 thrpt 5 5.455 0.612 ops/s |
2x2 i5-2520M, JDK 8 GA:
Benchmark (arrayLen) (pieces) Mode Samples Score Score error Units
o.o.FindMaxBenchmark.fjCountedMax 10000000 2 thrpt 15 132.858 29.619 ops/s
o.o.FindMaxBenchmark.fjCountedMax 10000000 4 thrpt 15 176.044 8.036 ops/s
o.o.FindMaxBenchmark.fjCountedMax 10000000 8 thrpt 15 174.449 8.615 ops/s
o.o.FindMaxBenchmark.fjCountedMax 10000000 32 thrpt 15 161.985 16.314 ops/s
o.o.FindMaxBenchmark.fjCountedMax 10000000 256 thrpt 15 176.822 10.759 ops/s
o.o.FindMaxBenchmark.fjRecursiveMax 10000000 2 thrpt 15 115.386 9.911 ops/s
o.o.FindMaxBenchmark.fjRecursiveMax 10000000 4 thrpt 15 155.198 19.247 ops/s
o.o.FindMaxBenchmark.fjRecursiveMax 10000000 8 thrpt 15 142.627 25.476 ops/s
o.o.FindMaxBenchmark.fjRecursiveMax 10000000 32 thrpt 15 171.528 3.865 ops/s
o.o.FindMaxBenchmark.fjRecursiveMax 10000000 256 thrpt 15 169.140 2.069 ops/s
o.o.FindMaxBenchmark.futuresMax 10000000 2 thrpt 15 130.649 10.201 ops/s
o.o.FindMaxBenchmark.futuresMax 10000000 4 thrpt 15 143.422 4.727 ops/s
o.o.FindMaxBenchmark.futuresMax 10000000 8 thrpt 15 143.518 7.510 ops/s
o.o.FindMaxBenchmark.futuresMax 10000000 32 thrpt 15 151.627 11.779 ops/s
o.o.FindMaxBenchmark.futuresMax 10000000 256 thrpt 15 138.823 13.500 ops/s
o.o.FindMaxBenchmark.simpleMax 10000000 2 thrpt 15 79.631 3.943 ops/s
o.o.FindMaxBenchmark.simpleMax 10000000 4 thrpt 15 79.769 2.952 ops/s
o.o.FindMaxBenchmark.simpleMax 10000000 8 thrpt 15 78.356 2.192 ops/s
o.o.FindMaxBenchmark.simpleMax 10000000 32 thrpt 15 79.745 4.774 ops/s
o.o.FindMaxBenchmark.simpleMax 10000000 256 thrpt 15 76.996 2.722 ops/s
Ну вот, почти в два раза быстрее, без всяких изменений. Во всех случаях мы упёрлись именно в вычисление максимума, паразитных проблем в профиле нет. Цикл развернулся на разное количество итераций кое-где.
Частоту проца-то не забыл зафиксировать? А то ведь simpleMax отлично с turboboost-ами дружит.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Меня сильно смущает твой left.compute. Можешь поправить первый вариант и прогнать? Должно начать чуть дольше вызывать, но, имхо, чуть больше шансов на steal. Плюс поменял константы ближе к 64k L1 кешей.
18c18
-< @param({"10000000"})
-> @param({"16777216"})
21c21
-< @param({"2", "4", "8", "32", "256"})
-> @param({"2", "4", "8", "32", "256", "65536", "131072"})
82,83c82,83
-< right.fork();
-< final double leftAns = left.compute();
-> invokeAll(left,rigth);
-> final double leftAns = left.join();