|
#![feature(test)] |
|
|
|
#[cfg(test)] |
|
extern crate quickcheck; |
|
#[cfg(test)] |
|
#[macro_use(quickcheck)] |
|
extern crate quickcheck_macros; |
|
|
|
use core::arch::x86_64; |
|
|
|
#[target_feature(enable = "sse4.2")] |
|
unsafe fn _contains4_pcmp(data: &[u8], a: u8, b: u8, c: u8, d: u8) -> bool { |
|
let (prefix, body, suffix) = data.align_to::<x86_64::__m128i>(); |
|
|
|
for &el in prefix { |
|
if el == a || el == b || el == c || el == d { |
|
return true; |
|
} |
|
} |
|
|
|
let needle = x86_64::_mm_set_epi8( |
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, a as i8, b as i8, c as i8, d as i8, |
|
); |
|
|
|
for &data_vec in body { |
|
if x86_64::_mm_cmpistrc(data_vec, needle, 0) != 0 { |
|
return true; |
|
} |
|
} |
|
|
|
// Check the remaining things non-vectorized |
|
for &el in suffix { |
|
if el == a || el == b || el == c || el == d { |
|
return true; |
|
} |
|
} |
|
|
|
false |
|
} |
|
|
|
pub fn contains4_pcmp(data: &[u8], a: u8, b: u8, c: u8, d: u8) -> bool { |
|
unsafe { _contains4_pcmp(data, a, b, c, d) } |
|
} |
|
|
|
pub fn contains4(data: &[u8], a: u8, b: u8, c: u8, d: u8) -> bool { |
|
let (prefix, body, suffix) = unsafe { data.align_to::<packed_simd::u8x16>() }; |
|
|
|
for &el in prefix { |
|
if el == a || el == b || el == c || el == d { |
|
return true; |
|
} |
|
} |
|
|
|
let a_vec = packed_simd::u8x16::splat(a); |
|
let b_vec = packed_simd::u8x16::splat(b); |
|
let c_vec = packed_simd::u8x16::splat(c); |
|
let d_vec = packed_simd::u8x16::splat(d); |
|
|
|
for &data_vec in body { |
|
// We now have a set of bool vectors whether any byte matches each of |
|
// a/b/c/d |
|
let a_matches = data_vec.eq(a_vec); |
|
let b_matches = data_vec.eq(b_vec); |
|
let c_matches = data_vec.eq(c_vec); |
|
let d_matches = data_vec.eq(d_vec); |
|
|
|
// Bit-or of these together "did any byte match at this position" |
|
if (a_matches | b_matches | c_matches | d_matches).any() { |
|
return true; |
|
} |
|
} |
|
|
|
// Check the remaining things non-vectorized |
|
for &el in suffix { |
|
if el == a || el == b || el == c || el == d { |
|
return true; |
|
} |
|
} |
|
|
|
false |
|
} |
|
|
|
#[cfg(test)] |
|
mod tests { |
|
use super::*; |
|
extern crate rand; |
|
extern crate test; |
|
|
|
use rand::rngs::SmallRng; |
|
use rand::{Rng, SeedableRng}; |
|
|
|
#[quickcheck] |
|
fn contains4_vs_naive(data: Vec<u8>, a: u8, b: u8, c: u8, d: u8) { |
|
let expected = data |
|
.iter() |
|
.any(|&el| el == a || el == b || el == c || el == d); |
|
assert_eq!(contains4(&data, a, b, c, d), expected); |
|
} |
|
|
|
#[quickcheck] |
|
fn contains4_pcmp_vs_naive(data: Vec<u8>, a: u8, b: u8, c: u8, d: u8) { |
|
let expected = data |
|
.iter() |
|
.any(|&el| el == a || el == b || el == c || el == d); |
|
if data.contains(&0) || a == 0 || b == 0 || c == 0 || d == 0 { |
|
// the PCMPI* variant can't handle NULL bytes |
|
return; |
|
} |
|
assert_eq!(contains4_pcmp(&data, a, b, c, d), expected); |
|
} |
|
|
|
static HAYSTACK_SHORT: &'static [u8] = b"fdsjalkfadlkfjdsafjl;dsafjdsaiofewjoa;fjdsia;fds;alfjdsioafjiworfewjaifdsfdsafisa;fdsfsidj;offdsk;l"; |
|
|
|
#[bench] |
|
fn bench_naive(b: &mut test::Bencher) { |
|
b.iter(|| { |
|
let data = test::black_box(HAYSTACK_SHORT); |
|
test::black_box( |
|
data.iter() |
|
.any(|&el| el == b'm' || el == b'n' || el == b'p' || el == b'q'), |
|
); |
|
}); |
|
} |
|
|
|
#[bench] |
|
fn bench_contains4(b: &mut test::Bencher) { |
|
b.iter(|| { |
|
let data = test::black_box(HAYSTACK_SHORT); |
|
test::black_box(contains4(data, b'm', b'n', b'p', b'q')); |
|
}); |
|
} |
|
|
|
#[bench] |
|
fn bench_contains4_pcmp(b: &mut test::Bencher) { |
|
b.iter(|| { |
|
let data = test::black_box(HAYSTACK_SHORT); |
|
test::black_box(contains4_pcmp(data, b'm', b'n', b'p', b'q')); |
|
}); |
|
} |
|
|
|
const LONG_HAYSTACK_LEN: usize = 1 << 20; |
|
|
|
fn long_haystack() -> Vec<u8> { |
|
let mut ret = Vec::with_capacity(LONG_HAYSTACK_LEN); |
|
let mut rng = SmallRng::seed_from_u64(1); |
|
while ret.len() < LONG_HAYSTACK_LEN { |
|
let b = rng.gen::<u8>(); |
|
if b == 0 || b == b'm' || b == b'n' || b == b'p' || b == b'q' { |
|
continue; |
|
} |
|
ret.push(b); |
|
} |
|
ret |
|
} |
|
|
|
#[bench] |
|
fn bench_naive_long(b: &mut test::Bencher) { |
|
let data = test::black_box(long_haystack()); |
|
b.iter(|| { |
|
test::black_box( |
|
data.iter() |
|
.any(|&el| el == b'm' || el == b'n' || el == b'p' || el == b'q'), |
|
); |
|
}); |
|
} |
|
|
|
#[bench] |
|
fn bench_contains4_long(b: &mut test::Bencher) { |
|
let data = test::black_box(long_haystack()); |
|
b.iter(|| { |
|
test::black_box(contains4(&data, b'm', b'n', b'p', b'q')); |
|
}); |
|
} |
|
|
|
#[bench] |
|
fn bench_contains4_pcmp_long(b: &mut test::Bencher) { |
|
let data = test::black_box(long_haystack()); |
|
b.iter(|| { |
|
test::black_box(contains4_pcmp(&data, b'm', b'n', b'p', b'q')); |
|
}); |
|
} |
|
} |