Skip to content

Instantly share code, notes, and snippets.

@xacrimon
Created August 21, 2024 00:29
Show Gist options
  • Save xacrimon/5c93343a65263d764b9189da78b5a143 to your computer and use it in GitHub Desktop.
Save xacrimon/5c93343a65263d764b9189da78b5a143 to your computer and use it in GitHub Desktop.
#include <benchmark/benchmark.h>
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <utility>
#include <vector>
using namespace std;
uint32_t murmur32(uint32_t h) {
h ^= h >> 16;
h *= UINT32_C(0x85ebca6b);
h ^= h >> 13;
h *= UINT32_C(0xc2b2ae35);
h ^= h >> 16;
return h;
}
uint32_t fastrange32(uint32_t word, uint32_t p) {
return (uint32_t)(((uint64_t)word * (uint64_t)p) >> 32);
}
vector< uint64_t > init_arrays( size_t N )
{
vector< uint64_t > values( N );
for (size_t i = 0; i < N; i++)
{
values[i] = i * 3 - 2;
}
return values;
}
#define SETSIZE 1024*128*17;
uint64_t Block8_SumLazy(size_t k, size_t size) {
uint64_t pos1 = murmur32(k) % size;
uint64_t pos2 = murmur32(k+1) % size;
uint64_t pos3 = murmur32(k+2) % size;
uint64_t pos4 = murmur32(k+3) % size;
uint64_t pos5 = murmur32(k+4) % size;
uint64_t pos6 = murmur32(k+5) % size;
uint64_t pos7 = murmur32(k+6) % size;
uint64_t pos8 = murmur32(k+7) % size;
uint64_t q1 = pos1 * 3 - 2;
uint64_t q2 = pos2 * 3 - 2;
uint64_t q3 = pos3 * 3 - 2;
uint64_t q4 = pos4 * 3 - 2;
uint64_t q5 = pos5 * 3 - 2;
uint64_t q6 = pos6 * 3 - 2;
uint64_t q7 = pos7 * 3 - 2;
uint64_t q8 = pos8 * 3 - 2;
return q1 + q2 + q3 + q4 + q5 + q6 + q7 + q8;
}
uint64_t Block8_SumLazy_fastrange32(size_t k, size_t size) {
uint64_t pos1 = fastrange32(murmur32(k), size);
uint64_t pos2 = fastrange32(murmur32(k+1), size);
uint64_t pos3 = fastrange32(murmur32(k+2), size);
uint64_t pos4 = fastrange32(murmur32(k+3), size);
uint64_t pos5 = fastrange32(murmur32(k+4), size);
uint64_t pos6 = fastrange32(murmur32(k+5), size);
uint64_t pos7 = fastrange32(murmur32(k+6), size);
uint64_t pos8 = fastrange32(murmur32(k+7), size);
uint64_t q1 = pos1 * 3 - 2;
uint64_t q2 = pos2 * 3 - 2;
uint64_t q3 = pos3 * 3 - 2;
uint64_t q4 = pos4 * 3 - 2;
uint64_t q5 = pos5 * 3 - 2;
uint64_t q6 = pos6 * 3 - 2;
uint64_t q7 = pos7 * 3 - 2;
uint64_t q8 = pos8 * 3 - 2;
return q1 + q2 + q3 + q4 + q5 + q6 + q7 + q8;
}
static void SuperSet_SumLazy( benchmark::State& state )
{
size_t size = SETSIZE;
benchmark::DoNotOptimize( size );
for (auto _ : state)
{
uint64_t sum = 0;
for (size_t k = 0; k < size; k+=8)
{
sum += Block8_SumLazy(k, size);
}
benchmark::DoNotOptimize( sum );
}
}
BENCHMARK( SuperSet_SumLazy );
static void SuperSet_SumLazy_fastrange32( benchmark::State& state )
{
size_t size = SETSIZE;
benchmark::DoNotOptimize( size );
for (auto _ : state)
{
uint64_t sum = 0;
for (size_t k = 0; k < size; k+=8)
{
sum += Block8_SumLazy_fastrange32(k, size);
}
benchmark::DoNotOptimize( sum );
}
}
BENCHMARK( SuperSet_SumLazy_fastrange32 );
static void SumRandom( benchmark::State& state )
{
size_t size = SETSIZE;
benchmark::DoNotOptimize( size );
[[clang::noinline]] auto values = init_arrays( size );
for (auto _ : state)
{
uint64_t sum = 0;
for (size_t k = 0; k < size; ++k)
{
sum += values[murmur32(k) % size ];
}
benchmark::DoNotOptimize( sum );
}
}
BENCHMARK( SumRandom );
static void SumRandom_fastrange32( benchmark::State& state )
{
size_t size = SETSIZE;
benchmark::DoNotOptimize( size );
[[clang::noinline]] auto values = init_arrays( size );
for (auto _ : state)
{
uint64_t sum = 0;
for (size_t k = 0; k < size; ++k)
{
sum += values[fastrange32(murmur32(k), size)];
}
benchmark::DoNotOptimize( sum );
}
}
BENCHMARK( SumRandom_fastrange32 );
BENCHMARK_MAIN();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment