Skip to content

Instantly share code, notes, and snippets.

@shadeglare
Created April 6, 2023 15:08
Show Gist options
  • Save shadeglare/cd8b3b7256dcbd89b59cc63c8e96339f to your computer and use it in GitHub Desktop.
Save shadeglare/cd8b3b7256dcbd89b59cc63c8e96339f to your computer and use it in GitHub Desktop.
Benchmark for plain shift op vs arm optimised one
fn shl_plain(v: [u32; 4], o: i32) -> [u32; 4] {
[v[0] << o, v[1] << o, v[2] << o, v[3] << o]
}
#[cfg(target_arch = "aarch64")]
fn shl_neon(v: [u32; 4], o: i32) -> [u32; 4] {
use std::arch::aarch64::{vld1q_s32, vld1q_u32, vqshlq_u32, vst1q_u32};
unsafe {
let v = vld1q_u32(v.as_ptr());
let o = vld1q_s32([o, o, o, o].as_ptr());
let r = vqshlq_u32(v, o);
let mut p = std::mem::MaybeUninit::<[u32; 4]>::uninit();
vst1q_u32(p.as_mut_ptr() as *mut u32, r);
p.assume_init()
}
}
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
pub fn criterion_benchmark(c: &mut Criterion) {
let mut group = c.benchmark_group("shl");
for i in 0..10 {
group.bench_with_input(BenchmarkId::new("plain", i), &i, |b, i| {
b.iter(|| shl_plain([1, 2, 3, 4], 1))
});
group.bench_with_input(BenchmarkId::new("neon", i), &i, |b, i| {
b.iter(|| shl_neon([1, 2, 3, 4], 1))
});
}
group.finish();
}
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment