Skip to content

Instantly share code, notes, and snippets.

@AugustNagro
Last active January 21, 2024 07:08
Show Gist options
  • Save AugustNagro/0278c1c66f23414ba3be1f9fe299ab86 to your computer and use it in GitHub Desktop.
Save AugustNagro/0278c1c66f23414ba3be1f9fe299ab86 to your computer and use it in GitHub Desktop.
import org.openjdk.jmh.annotations.*;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.CountedCompleter;
import java.util.concurrent.ExecutionException;
/**
* 40 Core Xeon E5-2660 v3 @ 2.60GHz
*
* Benchmark (choice) Mode Cnt Score Error Units
* forkJoinBench.Main.customCC 0 thrpt 15 3206592.104 ± 1457.036 ops/s
* forkJoinBench.Main.customCC 1 thrpt 15 23593.980 ± 9.608 ops/s
* forkJoinBench.Main.customCC 2 thrpt 15 391.796 ± 1.615 ops/s
* forkJoinBench.Main.customCC 3 thrpt 15 115.990 ± 0.190 ops/s
* forkJoinBench.Main.customCC 4 thrpt 15 81.434 ± 1.050 ops/s
* forkJoinBench.Main.parallelStream 0 thrpt 15 23671.085 ± 252.414 ops/s
* forkJoinBench.Main.parallelStream 1 thrpt 15 21285.229 ± 261.597 ops/s
* forkJoinBench.Main.parallelStream 2 thrpt 15 256.318 ± 13.717 ops/s
* forkJoinBench.Main.parallelStream 3 thrpt 15 67.165 ± 0.197 ops/s
* forkJoinBench.Main.parallelStream 4 thrpt 15 40.167 ± 0.086 ops/s
* forkJoinBench.Main.serial 0 thrpt 15 3250371.613 ± 1751.204 ops/s
* forkJoinBench.Main.serial 1 thrpt 15 22931.864 ± 9.380 ops/s
* forkJoinBench.Main.serial 2 thrpt 15 22.846 ± 0.117 ops/s
* forkJoinBench.Main.serial 3 thrpt 15 7.644 ± 0.025 ops/s
* forkJoinBench.Main.serial 4 thrpt 15 4.583 ± 0.028 ops/s
*
*/
@BenchmarkMode(Mode.Throughput)
@State(Scope.Benchmark)
@Measurement(iterations = 15)
@Fork(1)
public class Main {
// just some verification that the numbers are correct
public static void main(String[] args) throws ExecutionException, InterruptedException {
Main m = new Main();
m.arr = arrays[3];
System.out.println("serial: " + m.serial()
+ "\nparallelStream: " + m.parallelStream()
+ "\ncustomCC: " + m.customCC());
}
static final int[][] arrays = new int[][] {
new int[100], new int[10000], new int[10_000_000], new int[30_000_000], new int[50_000_000]
};
static {
Random rand = new Random();
for (int[] arr : arrays) {
for (int i = 0; i < arr.length; ++i) arr[i] = rand.nextInt(200);
}
}
@Param({"0", "1", "2", "3", "4"})
int choice;
int[] arr;
@Setup(Level.Trial)
public void setup() {
arr = arrays[choice];
}
@Benchmark
public long serial() {
long res = 0;
for (int i : arr) if (i < 150) res += fnv(i) % 5;
return res;
}
@Benchmark
public long parallelStream() {
return Arrays.stream(arr)
.parallel()
.filter(i -> i < 150)
.mapToLong(i -> fnv(i) % 5)
.sum();
}
@Benchmark
public long customCC() {
return new CustomCC(arr).invoke();
}
static final class CustomCC extends CountedCompleter<Long> {
private static final int TARGET_LEAVES = Runtime.getRuntime().availableProcessors() << 2;
// ie, every leaf should be process at least 10_000 elements
private static final int MIN_SIZE_PER_LEAF = 10_000;
private static int computeInitialPending(int size) {
// if x = size / MIN_SIZE_PER_LEAF, and x < TARGET_LEAVES, create x leaves instead.
// This ensures that every leaf does at least MIN_SIZE_PER_LEAF operations.
int leaves = Math.min(TARGET_LEAVES, size / MIN_SIZE_PER_LEAF);
// If the total # leaves = 0, then pending = 0 since we shouldn't fork.
// Otherwise, pending = log2(leaves), as explained in the benchmark doc
return leaves == 0
? 0
: 31 - Integer.numberOfLeadingZeros(leaves);
}
final int[] arr;
int pos, size;
long res = 0;
final CustomCC[] subs;
public CustomCC(int[] arr) {
this(null, arr, arr.length, 0, computeInitialPending(arr.length));
}
private CustomCC(CustomCC parent, int[] arr, int size, int pos, int pending) {
super(parent, pending);
this.arr = arr; this.size = size; this.pos = pos;
subs = new CustomCC[pending];
}
@Override
public void compute() {
// subs.length === getPendingCount()
for (int p = subs.length - 1; p >= 0; --p) {
size >>>= 1;
CustomCC sub = new CustomCC(this, arr, size, pos + size, p);
subs[p] = sub;
sub.fork();
}
// needed for c2 to remove array index checking... modified from java's Spliterator class
int[] a; int i, hi;
if ((a = arr).length >= (hi = pos + size) && (i = pos) >= 0 && i < hi) {
do {
int e = a[i];
if (e < 150) res += fnv(e) % 5;
} while (++i < hi);
}
tryComplete();
}
@Override
public void onCompletion(CountedCompleter<?> caller) {
for (CustomCC sub : subs) res += sub.res;
}
@Override
public Long getRawResult() {
return res;
}
}
static final long FNV_64_PRIME = 0x100000001b3L;
static final long FNV_64_OFFSET = 0xcbf29ce484222325L;
static long fnv(int x) {
long h = FNV_64_OFFSET;
h ^= (x >>> 24);
h *= FNV_64_PRIME;
h ^= (x >>> 16) & 0xff;
h *= FNV_64_PRIME;
h ^= (x >>> 8) & 0xff;
h *= FNV_64_PRIME;
h ^= x & 0xff;
h *= FNV_64_PRIME;
return h;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment