Last active
October 3, 2022 06:37
-
-
Save feymartynov/d27de97deaef4107fe4778462fe2a3c3 to your computer and use it in GitHub Desktop.
vector2 rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "vector2" | |
version = "0.1.0" | |
edition = "2021" | |
[dev-dependencies] | |
rand = "0.8" | |
criterion = "0.4" | |
[[bench]] | |
name = "vector2" | |
harness = false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::cmp::Ordering; | |
static BITS_TABLE: [u64; 256] = build_bits_table(); | |
const fn build_bits_table() -> [u64; 256] { | |
let mut bits_table = [0; 256]; | |
let mut i = 0; | |
while i < 256 { | |
bits_table[i] = 1 << (i & 0x3f); | |
i += 1; | |
} | |
bits_table | |
} | |
fn bytes_to_bits(bytes: &[u8]) -> [u64; 4] { | |
let mut bits = [0; 4]; | |
for i in 0..(bytes.len()) { | |
bits[(bytes[i] >> 6) as usize] |= BITS_TABLE[bytes[i] as usize]; | |
} | |
bits | |
} | |
pub fn intersect_array<T: Ord>(a: &[T], b: &[T]) { | |
let an = a.len(); | |
let bn = b.len(); | |
let mut i = 0; | |
let mut j = 0; | |
while i < an && j < bn { | |
match a[i].cmp(&b[i]) { | |
Ordering::Less => i += 1, | |
Ordering::Greater => j += 1, | |
Ordering::Equal => { | |
i += 1; | |
j += 1; | |
} | |
} | |
} | |
} | |
#[derive(Debug)] | |
struct Pack { | |
a: u8, | |
b: u8, | |
c: u8, | |
d: [u8; 256], | |
} | |
impl Default for Pack { | |
fn default() -> Self { | |
Self { | |
a: 0, | |
b: 0, | |
c: 0, | |
d: [0; 256], | |
} | |
} | |
} | |
pub fn compress_array(array: Vec<u32>) -> Vec<u8> { | |
let mut r = Vec::new(); | |
let mut p = Pack::default(); | |
for v in array { | |
let a = (v >> 16) as u8; | |
let b = (v >> 8) as u8; | |
let d = v as u8; | |
if a != p.a || b != p.b { | |
if p.c > 0 { | |
r.push(p.a); | |
r.push(p.b); | |
r.push(p.c); | |
r.extend_from_slice(&p.d[..(p.c as usize)]); | |
} | |
p = Pack::default(); | |
p.a = a; | |
p.b = b; | |
} | |
p.d[p.c as usize] = d; | |
p.c += 1; | |
} | |
if p.c > 0 { | |
r.push(p.a); | |
r.push(p.b); | |
r.push(p.c); | |
r.extend_from_slice(&p.d[..(p.c as usize)]); | |
} | |
r | |
} | |
#[derive(Debug, Default)] | |
struct Head { | |
base: u16, | |
size: u8, | |
} | |
pub fn intersect_vector_old(a: &[u8], b: &[u8]) -> u64 { | |
let mut r = 0; | |
let an = a.len(); | |
let bn = b.len(); | |
let mut i = 0; | |
let mut j = 0; | |
while i < an && j < bn { | |
let x = unsafe { &*(a.as_ptr().add(i) as *const Head) }; | |
let y = unsafe { &*(b.as_ptr().add(j) as *const Head) }; | |
match x.base.cmp(&y.base) { | |
Ordering::Less => i += 3 + (x.size as usize), | |
Ordering::Greater => j += 3 + (y.size as usize), | |
Ordering::Equal => { | |
i += 3; | |
let mut bits0 = bytes_to_bits(&a[i..(i + (x.size as usize))]); | |
j += 3; | |
let bits1 = bytes_to_bits(&b[j..(j + (y.size as usize))]); | |
bits0[0] &= bits1[0]; | |
bits0[1] &= bits1[1]; | |
bits0[2] &= bits1[2]; | |
bits0[3] &= bits1[3]; | |
r += bits0[0] + bits0[1] + bits0[2] + bits0[3]; | |
i += x.size as usize; | |
j += y.size as usize; | |
} | |
} | |
} | |
r | |
} | |
pub fn intersect_vector_new(a: &[u8], b: &[u8]) -> u64 { | |
let mut r = 0_u64; | |
let mut i = 0; | |
let mut j = 0; | |
while i < a.len() && j < b.len() { | |
let x_base = (a[i] as u16) << 8 | (a[i + 1] as u16); | |
let y_base = (b[j] as u16) << 8 | (b[j + 1] as u16); | |
match x_base.cmp(&y_base) { | |
Ordering::Less => i += 3 + (a[i + 2] as usize), | |
Ordering::Greater => j += 3 + (b[j + 2] as usize), | |
Ordering::Equal => { | |
let x_size = a[i + 2] as usize; | |
let y_size = b[j + 2] as usize; | |
let mut h = [0; 256]; | |
r = 0; | |
i += 3; | |
for v in a[i..(i + x_size)].iter().copied() { | |
h[v as usize] = 1; | |
} | |
j += 3; | |
for v in b[j..(j + y_size)].iter().copied() { | |
let ok = h[v as usize] as u64; | |
h[r as usize] = v; | |
r += ok; | |
} | |
i += x_size; | |
j += y_size; | |
} | |
} | |
} | |
r | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn vector_intersect_old() { | |
let v0 = compress_array(vec![0, 1, 4, 63, 64, 67, 78, 240, 512]); | |
let v1 = compress_array(vec![0, 1, 3, 63, 64, 77, 89, 205, 240, 512, 1024, 2047]); | |
let r = intersect_vector_old(&v0, &v1); | |
assert_eq!(r, 9223653511831486469); | |
} | |
#[test] | |
fn vector_intersect_new() { | |
let v0 = compress_array(vec![0, 1, 4, 63, 64, 67, 78, 240, 512]); | |
let v1 = compress_array(vec![0, 1, 3, 63, 64, 77, 89, 205, 240, 512, 1024, 2047]); | |
let r = intersect_vector_new(&v0, &v1); | |
assert_eq!(r, 1); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern crate criterion; | |
extern crate rand; | |
use criterion::*; | |
use rand::prelude::*; | |
//////////////////////////////////////////////////////////////////////////////// | |
const FIRST_ARRAY_SIZE: usize = 128 * 1024; | |
const SECOND_ARRAY_SIZE: usize = 1024 * 1024; | |
fn build_compressed_arrays() -> (Vec<u8>, Vec<u8>) { | |
let (a0, a1) = build_arrays(); | |
let a0 = vector2::compress_array(a0); | |
let a1 = vector2::compress_array(a1); | |
(a0, a1) | |
} | |
fn build_arrays() -> (Vec<u32>, Vec<u32>) { | |
let a0 = rand_array(FIRST_ARRAY_SIZE, FIRST_ARRAY_SIZE as u32); | |
let a1 = rand_array(SECOND_ARRAY_SIZE, SECOND_ARRAY_SIZE as u32); | |
(a0, a1) | |
} | |
fn rand_array(n: usize, max: u32) -> Vec<u32> { | |
let mut rng = rand::thread_rng(); | |
let mut a = (0..n).map(|_| rng.gen_range(0..max)).collect::<Vec<_>>(); | |
a.sort_unstable(); | |
a.dedup(); | |
a | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
fn intersect_array(c: &mut Criterion) { | |
c.bench_function("intersect_array", |bencher| { | |
bencher.iter_batched( | |
build_arrays, | |
|(ref a0, ref a1)| vector2::intersect_array(a0, a1), | |
BatchSize::SmallInput, | |
); | |
}); | |
} | |
fn intersect_vector_old(c: &mut Criterion) { | |
c.bench_function("intersect_vector_old", |bencher| { | |
bencher.iter_batched( | |
build_compressed_arrays, | |
|(ref a0, ref a1)| vector2::intersect_vector_old(a0, a1), | |
BatchSize::SmallInput, | |
) | |
}); | |
} | |
fn intersect_vector_new(c: &mut Criterion) { | |
c.bench_function("intersect_vector_new", |bencher| { | |
bencher.iter_batched( | |
build_compressed_arrays, | |
|(ref a0, ref a1)| vector2::intersect_vector_new(a0, a1), | |
BatchSize::SmallInput, | |
) | |
}); | |
} | |
//////////////////////////////////////////////////////////////////////////////// | |
criterion_group!( | |
benches, | |
intersect_array, | |
intersect_vector_old, | |
intersect_vector_new | |
); | |
criterion_main!(benches); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment