Created
January 13, 2023 23:17
-
-
Save connorskees/955439d1ad62a4dcbe4f594a293c6187 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[inline] | |
fn memcpy( | |
out_slice: &mut [u8], | |
base: &[u8], | |
) { | |
let chunk_size = base.len(); | |
let mut chunks = out_slice.chunks_exact_mut(chunk_size); | |
while let Some(chunk) = chunks.next() { | |
chunk.copy_from_slice(base); | |
} | |
let rem = chunks.into_remainder(); | |
let base = &base[..rem.len()]; | |
rem.copy_from_slice(base); | |
} | |
#[inline] | |
fn transfer( | |
out_slice: &mut [u8], | |
mut source_pos: usize, | |
mut out_pos: usize, | |
match_len: usize, | |
out_buf_size_mask: usize, | |
) { | |
debug_assert!(out_pos > source_pos); | |
// special case that comes up surprisingly often. in the case that `source_pos` | |
// is 1 less than `out_pos`, we can say that the entire range will be the same | |
// value and optimize this to be a simple `memset` | |
// | |
// using `memset` here is significantly (~3x) faster than using the general case memcpy, | |
// since the general case is effectively a slow loop that goes 1 byte at a time for this | |
// case | |
if out_buf_size_mask == usize::MAX && source_pos.abs_diff(out_pos) == 1 { | |
let init = out_slice[out_pos - 1]; | |
let end = (match_len >> 2) * 4 + out_pos; | |
out_slice[out_pos..end].fill(init); | |
out_pos = end; | |
source_pos = end - 1; | |
} else if out_buf_size_mask == usize::MAX { | |
let (start, out) = out_slice.split_at_mut(out_pos); | |
let base = &start[source_pos..]; | |
let end = (match_len >> 2) * 4 + out_pos; | |
memcpy(&mut out[..end - out_pos], base); | |
source_pos = end - source_pos.abs_diff(out_pos); | |
out_pos = end; | |
} else { | |
for _ in 0..match_len >> 2 { | |
out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask]; | |
out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask]; | |
out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask]; | |
out_slice[out_pos + 3] = out_slice[(source_pos + 3) & out_buf_size_mask]; | |
source_pos += 4; | |
out_pos += 4; | |
} | |
} | |
match match_len & 3 { | |
0 => (), | |
1 => out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask], | |
2 => { | |
out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask]; | |
out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask]; | |
} | |
3 => { | |
out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask]; | |
out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask]; | |
out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask]; | |
} | |
_ => unreachable!(), | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment