~50% faster compress, 11% faster decomp
Some checks failed
CI / linux (map[runner:ubuntu-22.04 target:aarch64]) (push) Has been cancelled
CI / linux (map[runner:ubuntu-22.04 target:armv7]) (push) Has been cancelled
CI / linux (map[runner:ubuntu-22.04 target:ppc64le]) (push) Has been cancelled
CI / linux (map[runner:ubuntu-22.04 target:s390x]) (push) Has been cancelled
CI / linux (map[runner:ubuntu-22.04 target:x86]) (push) Has been cancelled
CI / linux (map[runner:ubuntu-22.04 target:x86_64]) (push) Has been cancelled
CI / musllinux (map[runner:ubuntu-22.04 target:aarch64]) (push) Has been cancelled
CI / musllinux (map[runner:ubuntu-22.04 target:armv7]) (push) Has been cancelled
CI / musllinux (map[runner:ubuntu-22.04 target:x86]) (push) Has been cancelled
CI / musllinux (map[runner:ubuntu-22.04 target:x86_64]) (push) Has been cancelled
CI / windows (map[python_arch:x64 runner:windows-latest target:x64]) (push) Has been cancelled
CI / windows (map[python_arch:x86 runner:windows-latest target:x86]) (push) Has been cancelled
CI / macos (map[runner:macos-15-intel target:x86_64]) (push) Has been cancelled
CI / macos (map[runner:macos-latest target:aarch64]) (push) Has been cancelled
CI / sdist (push) Has been cancelled
CI / Standalone .exe (Windows x86_64) (push) Has been cancelled
CI / Release (push) Has been cancelled

This commit is contained in:
Will Toohey 2026-04-26 13:55:14 +10:00
parent 536d0c6ca5
commit 7477b79fd4

View File

@ -15,6 +15,10 @@ const HASH_SIZE: usize = 1 << HASH_BITS;
const HASH_MASK: u32 = HASH_SIZE as u32 - 1;
const NIL: u32 = u32::MAX;
// Greedy match-search depth. 128 hits a good speed/ratio knee on real-world
// texture data: ~1.5× faster than an unbounded chain at <0.5% size cost.
const MAX_CHAIN: u32 = 128;
#[derive(Debug)]
pub enum DecompressError {
Truncated,
@ -33,9 +37,10 @@ impl std::error::Error for DecompressError {}
pub fn decompress(input: &[u8]) -> Result<Vec<u8>, DecompressError> {
let mut out: Vec<u8> = Vec::with_capacity(input.len() * 2);
let mut i = 0usize;
let n = input.len();
loop {
if i >= input.len() {
if i >= n {
return Err(DecompressError::Truncated);
}
let flag = input[i];
@ -43,13 +48,13 @@ pub fn decompress(input: &[u8]) -> Result<Vec<u8>, DecompressError> {
for bit in 0..8 {
if (flag >> bit) & 1 == 1 {
if i >= input.len() {
if i >= n {
return Err(DecompressError::Truncated);
}
out.push(input[i]);
i += 1;
} else {
if i + 1 >= input.len() {
if i + 1 >= n {
return Err(DecompressError::Truncated);
}
let w = u16::from_be_bytes([input[i], input[i + 1]]);
@ -65,22 +70,31 @@ pub fn decompress(input: &[u8]) -> Result<Vec<u8>, DecompressError> {
// References into the virtual zero-prefilled window before stream start.
if pos > out.len() {
let diff = (pos - out.len()).min(len);
out.extend(std::iter::repeat(0u8).take(diff));
let new_len = out.len() + diff;
out.resize(new_len, 0);
len -= diff;
}
// Self-overlapping copy: each output byte may feed the next.
if len == 0 {
continue;
}
let start = out.len() - pos;
for k in 0..len {
let b = out[start + k];
out.push(b);
if pos >= len {
// Non-overlapping run: single memcpy.
out.extend_from_within(start..start + len);
} else {
// Self-overlapping copy: each output byte may feed the next.
for k in 0..len {
let b = out[start + k];
out.push(b);
}
}
}
}
}
}
/// Hash a 3-byte prefix into the chain table.
#[inline(always)]
fn hash3(a: u8, b: u8, c: u8) -> usize {
let h = ((a as u32) << 16) | ((b as u32) << 8) | (c as u32);
@ -88,9 +102,7 @@ fn hash3(a: u8, b: u8, c: u8) -> usize {
((h >> (32 - HASH_BITS)) & HASH_MASK) as usize
}
/// Compress a buffer. Output is byte-identical to the reference Python encoder
/// when the matcher decisions agree (greedy longest-match here vs. greedy
/// longest-match there).
/// Compress a buffer. Greedy longest-match with a bounded hash chain.
pub fn compress(input: &[u8]) -> Vec<u8> {
// Pre-pend a 4 KB zero window so matches at the start can legitimately
// reference into the zero-prefilled history that the decoder synthesises.
@ -115,15 +127,17 @@ pub fn compress(input: &[u8]) -> Vec<u8> {
let mut pos = WINDOW;
while pos < buf_len {
// Reserve a flag byte we'll patch in once the group is done. Avoids the
// per-group scratch Vec the older impl allocated.
let flag_idx = out.len();
out.push(0);
let mut flag_byte: u8 = 0;
let mut group: Vec<u8> = Vec::with_capacity(8 * 2);
for bit in 0..8 {
if pos >= buf_len {
// Out of input mid-group: leave the bit as match (0). The
// decoder will read into our trailing 0x00 0x00 0x00 sentinel
// and exit on the position == 0 check before reaching this
// phantom slot.
// decoder reads our trailing 0x00 0x00 0x00 sentinel and
// exits on the position == 0 check before reaching this slot.
continue;
}
@ -132,10 +146,9 @@ pub fn compress(input: &[u8]) -> Vec<u8> {
if best_len >= THRESHOLD {
let dist = best_dist as u16;
let info: u16 = (dist << 4) | ((best_len - THRESHOLD) as u16);
group.extend_from_slice(&info.to_be_bytes());
out.extend_from_slice(&info.to_be_bytes());
// Insert hash entries for every position covered by the match,
// including the start (which we are about to skip past).
// Insert hash entries for every position covered by the match.
for k in 0..best_len {
let p = pos + k;
if p + 2 < buf_len {
@ -146,7 +159,7 @@ pub fn compress(input: &[u8]) -> Vec<u8> {
}
pos += best_len;
} else {
group.push(buf[pos]);
out.push(buf[pos]);
flag_byte |= 1 << bit;
if pos + 2 < buf_len {
@ -158,8 +171,7 @@ pub fn compress(input: &[u8]) -> Vec<u8> {
}
}
out.push(flag_byte);
out.extend_from_slice(&group);
out[flag_idx] = flag_byte;
}
// EOS sentinel: flag byte saying "next code is a match", then a 0x0000
@ -188,28 +200,24 @@ fn find_match(buf: &[u8], pos: usize, head: &[u32], prev: &[u32]) -> (usize, usi
let mut best_len = 0usize;
let mut best_dist = 0usize;
// Bound on chain depth — at N=4096 chains are short anyway, but cap to keep
// worst-case behaviour bounded on highly repetitive input.
let mut chain_remaining: u32 = 4096;
let mut chain_remaining = MAX_CHAIN;
while candidate != NIL {
let cand = candidate as usize;
if cand < limit {
break;
}
if chain_remaining == 0 {
break;
}
chain_remaining -= 1;
// Quick reject: if the byte at best_len doesn't match, skip.
if best_len > 0 && buf[cand + best_len] != buf[pos + best_len] {
// fall through to chain step
} else {
// Compare bytes up to max_len.
if best_len == 0 || buf[cand + best_len] == buf[pos + best_len] {
let mut len = 0usize;
while len < max_len && buf[cand + len] == buf[pos + len] {
len += 1;
}
if len > best_len {
best_len = len;
best_dist = pos - cand;
@ -219,9 +227,6 @@ fn find_match(buf: &[u8], pos: usize, head: &[u32], prev: &[u32]) -> (usize, usi
}
}
if chain_remaining == 0 {
break;
}
candidate = prev[cand & (WINDOW - 1)];
}