-
-
Save xiaozhiliaoo/d1f5842534815cddded9090e14cb10d9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.openjdk.jmh.annotations.*; | |
import java.util.Arrays; | |
import java.util.Random; | |
import java.util.concurrent.CountedCompleter; | |
import java.util.concurrent.ExecutionException; | |
/** | |
* 40 Core Xeon E5-2660 v3 @ 2.60GHz | |
* | |
* Benchmark (choice) Mode Cnt Score Error Units | |
* forkJoinBench.Main.customCC 0 thrpt 15 3206592.104 ± 1457.036 ops/s | |
* forkJoinBench.Main.customCC 1 thrpt 15 23593.980 ± 9.608 ops/s | |
* forkJoinBench.Main.customCC 2 thrpt 15 391.796 ± 1.615 ops/s | |
* forkJoinBench.Main.customCC 3 thrpt 15 115.990 ± 0.190 ops/s | |
* forkJoinBench.Main.customCC 4 thrpt 15 81.434 ± 1.050 ops/s | |
* forkJoinBench.Main.parallelStream 0 thrpt 15 23671.085 ± 252.414 ops/s | |
* forkJoinBench.Main.parallelStream 1 thrpt 15 21285.229 ± 261.597 ops/s | |
* forkJoinBench.Main.parallelStream 2 thrpt 15 256.318 ± 13.717 ops/s | |
* forkJoinBench.Main.parallelStream 3 thrpt 15 67.165 ± 0.197 ops/s | |
* forkJoinBench.Main.parallelStream 4 thrpt 15 40.167 ± 0.086 ops/s | |
* forkJoinBench.Main.serial 0 thrpt 15 3250371.613 ± 1751.204 ops/s | |
* forkJoinBench.Main.serial 1 thrpt 15 22931.864 ± 9.380 ops/s | |
* forkJoinBench.Main.serial 2 thrpt 15 22.846 ± 0.117 ops/s | |
* forkJoinBench.Main.serial 3 thrpt 15 7.644 ± 0.025 ops/s | |
* forkJoinBench.Main.serial 4 thrpt 15 4.583 ± 0.028 ops/s | |
* | |
*/ | |
@BenchmarkMode(Mode.Throughput) | |
@State(Scope.Benchmark) | |
@Measurement(iterations = 15) | |
@Fork(1) | |
public class Main { | |
// just some verification that the numbers are correct | |
public static void main(String[] args) throws ExecutionException, InterruptedException { | |
Main m = new Main(); | |
m.arr = arrays[3]; | |
System.out.println("serial: " + m.serial() | |
+ "\nparallelStream: " + m.parallelStream() | |
+ "\ncustomCC: " + m.customCC()); | |
} | |
static final int[][] arrays = new int[][] { | |
new int[100], new int[10000], new int[10_000_000], new int[30_000_000], new int[50_000_000] | |
}; | |
static { | |
Random rand = new Random(); | |
for (int[] arr : arrays) { | |
for (int i = 0; i < arr.length; ++i) arr[i] = rand.nextInt(200); | |
} | |
} | |
@Param({"0", "1", "2", "3", "4"}) | |
int choice; | |
int[] arr; | |
@Setup(Level.Trial) | |
public void setup() { | |
arr = arrays[choice]; | |
} | |
@Benchmark | |
public long serial() { | |
long res = 0; | |
for (int i : arr) if (i < 150) res += fnv(i) % 5; | |
return res; | |
} | |
@Benchmark | |
public long parallelStream() { | |
return Arrays.stream(arr) | |
.parallel() | |
.filter(i -> i < 150) | |
.mapToLong(i -> fnv(i) % 5) | |
.sum(); | |
} | |
@Benchmark | |
public long customCC() { | |
return new CustomCC(arr).invoke(); | |
} | |
static final class CustomCC extends CountedCompleter<Long> { | |
private static final int TARGET_LEAVES = Runtime.getRuntime().availableProcessors() << 2; | |
// ie, every leaf should be process at least 10_000 elements | |
private static final int MIN_SIZE_PER_LEAF = 10_000; | |
private static int computeInitialPending(int size) { | |
// if x = size / MIN_SIZE_PER_LEAF, and x < TARGET_LEAVES, create x leaves instead. | |
// This ensures that every leaf does at least MIN_SIZE_PER_LEAF operations. | |
int leaves = Math.min(TARGET_LEAVES, size / MIN_SIZE_PER_LEAF); | |
// If the total # leaves = 0, then pending = 0 since we shouldn't fork. | |
// Otherwise, pending = log2(leaves), as explained in the benchmark doc | |
return leaves == 0 | |
? 0 | |
: 31 - Integer.numberOfLeadingZeros(leaves); | |
} | |
final int[] arr; | |
int pos, size; | |
long res = 0; | |
final CustomCC[] subs; | |
public CustomCC(int[] arr) { | |
this(null, arr, arr.length, 0, computeInitialPending(arr.length)); | |
} | |
private CustomCC(CustomCC parent, int[] arr, int size, int pos, int pending) { | |
super(parent, pending); | |
this.arr = arr; this.size = size; this.pos = pos; | |
subs = new CustomCC[pending]; | |
} | |
@Override | |
public void compute() { | |
// subs.length === getPendingCount() | |
for (int p = subs.length - 1; p >= 0; --p) { | |
size >>>= 1; | |
CustomCC sub = new CustomCC(this, arr, size, pos + size, p); | |
subs[p] = sub; | |
sub.fork(); | |
} | |
// needed for c2 to remove array index checking... modified from java's Spliterator class | |
int[] a; int i, hi; | |
if ((a = arr).length >= (hi = pos + size) && (i = pos) >= 0 && i < hi) { | |
do { | |
int e = a[i]; | |
if (e < 150) res += fnv(e) % 5; | |
} while (++i < hi); | |
} | |
tryComplete(); | |
} | |
@Override | |
public void onCompletion(CountedCompleter<?> caller) { | |
for (CustomCC sub : subs) res += sub.res; | |
} | |
@Override | |
public Long getRawResult() { | |
return res; | |
} | |
} | |
static final long FNV_64_PRIME = 0x100000001b3L; | |
static final long FNV_64_OFFSET = 0xcbf29ce484222325L; | |
static long fnv(int x) { | |
long h = FNV_64_OFFSET; | |
h ^= (x >>> 24); | |
h *= FNV_64_PRIME; | |
h ^= (x >>> 16) & 0xff; | |
h *= FNV_64_PRIME; | |
h ^= (x >>> 8) & 0xff; | |
h *= FNV_64_PRIME; | |
h ^= x & 0xff; | |
h *= FNV_64_PRIME; | |
return h; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment