Skip to content

Instantly share code, notes, and snippets.

@llogiq
Created September 4, 2018 20:19
Show Gist options
  • Save llogiq/5763578ef8f60f7646798ed59b9e7404 to your computer and use it in GitHub Desktop.
Save llogiq/5763578ef8f60f7646798ed59b9e7404 to your computer and use it in GitHub Desktop.
single-threaded fannkuch_redux stdsimd version
use std::arch::x86_64::*;
#[derive(Copy, Clone, Debug)]
pub struct u8x16(__m128i);
impl Default for u8x16 {
fn default() -> Self { u8x16(unsafe { _mm_setzero_si128() }) }
}
impl u8x16 {
pub fn from_slice_unaligned(s: &[u8]) -> u8x16 {
u8x16(unsafe { _mm_loadu_si128(s.as_ptr() as *const __m128i) })
}
pub fn write_to_slice_unaligned(self, s: &mut [u8]) {
unsafe { _mm_storeu_si128(s.as_mut_ptr() as *mut __m128i, self.0) }
}
pub fn extract(self, _idx: usize) -> u8 {
unsafe { _mm_extract_epi8(self.0, 0i32) as u8 }
}
pub fn permute_dyn(self, indices: Self) -> Self {
u8x16(unsafe { _mm_shuffle_epi8(self.0, indices.0) })
}
}
#[derive(Default)]
struct State {
s: [u8; 16],
flip_masks: [u8x16; 16],
rotate_masks: [u8x16; 16],
maxflips: i32,
odd: u16,
checksum: i32,
}
impl State {
fn rotate_sisd(&mut self, n: usize) {
let c = self.s[0];
for i in 1..(n + 1) {
self.s[i - 1] = self.s[i];
}
self.s[n] = c;
}
fn popmasks(&mut self) {
let mut mask = [0_u8; 16];
for i in 0..16 {
for (j, m) in mask.iter_mut().enumerate() {
*m = j as u8;
}
for x in 0..(i + 1) / 2 {
mask.swap(x, i - x);
}
self.flip_masks[i] = u8x16::from_slice_unaligned(&mask);
for (j, s) in self.s.iter_mut().enumerate() {
*s = j as u8;
}
self.rotate_sisd(i);
self.rotate_masks[i] = self.load_s();
}
}
fn rotate(&mut self, n: usize) {
self.load_s()
.permute_dyn(self.rotate_masks[n])
.write_to_slice_unaligned(&mut self.s)
}
fn load_s(&self) -> u8x16 {
u8x16::from_slice_unaligned(&self.s)
}
fn tk(&mut self, n: usize) {
#[derive(Copy, Clone, Debug, Default)]
struct Perm {
perm: u8x16,
start: u8,
odd: u16,
}
let mut perms = [Perm::default(); 60];
let mut i = 0;
let mut c = [0_u8; 16];
let mut perm_max = 0;
while i < n {
while i < n && perm_max < 60 {
self.rotate(i);
if c[i] as usize >= i {
c[i] = 0;
i += 1;
continue;
}
c[i] += 1;
i = 1;
self.odd = !self.odd;
if self.s[0] != 0 {
if self.s[self.s[0] as usize] == 0 {
if self.maxflips == 0 {
self.maxflips = 1
}
self.checksum += if self.odd == 0 { 1 } else { -1 };
} else {
perms[perm_max].perm = self.load_s();
perms[perm_max].start = self.s[0];
perms[perm_max].odd = self.odd;
perm_max += 1;
}
}
}
let mut k = 0;
while k < std::cmp::max(1, perm_max) - 1 {
let pk = &perms[k];
let pk1 = &perms[k + 1];
let mut perm1 = pk.perm;
let mut perm2 = pk1.perm;
let mut f1 = 0;
let mut f2 = 0;
let mut toterm1 = pk.start;
let mut toterm2 = pk1.start;
while toterm1 != 0 && toterm2 != 0 {
perm1 =
perm1.permute_dyn(self.flip_masks[toterm1 as usize]);
perm2 =
perm2.permute_dyn(self.flip_masks[toterm2 as usize]);
toterm1 = perm1.extract(0);
toterm2 = perm2.extract(0);
f1 += 1;
f2 += 1;
}
while toterm1 != 0 {
perm1 =
perm1.permute_dyn(self.flip_masks[toterm1 as usize]);
toterm1 = perm1.extract(0);
f1 += 1;
}
while toterm2 != 0 {
perm2 =
perm2.permute_dyn(self.flip_masks[toterm2 as usize]);
toterm2 = perm2.extract(0);
f2 += 1;
}
if f1 > self.maxflips {
self.maxflips = f1
}
if f2 > self.maxflips {
self.maxflips = f2
}
self.checksum += if pk.odd == 0 { f1 } else { -f1 };
self.checksum += if pk1.odd == 0 { f2 } else { -f2 };
k += 2;
}
while k < perm_max {
let pk = &perms[k];
let mut perm = pk.perm;
let mut f = 0;
let mut toterm = pk.start;
while toterm != 0 {
perm = perm.permute_dyn(self.flip_masks[toterm as usize]);
toterm = perm.extract(0);
f += 1;
}
if f > self.maxflips {
self.maxflips = f
}
self.checksum += if pk.odd == 0 { f } else { -f };
k += 1
}
perm_max = 0;
}
}
}
fn main() {
let n: usize = std::env::args().nth(1).and_then(|s| s.parse().ok())
.unwrap_or(7);
let mut state = State::default();
state.popmasks();
for i in 0..n {
state.s[i] = i as u8
}
state.tk(n);
let (checksum, maxflips) = (state.checksum, state.maxflips);
println!("{}\nPfannkuchen({}) = {}", checksum, n, maxflips);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment