Skip to content

Commit

Permalink
Reduce unnecessary allocations and indirections
Browse files Browse the repository at this point in the history
* Changed literal_probs array from a Vec<Vec<u16>> to a Vec2D backed by a contiguous allocation
* BitTrees in LenDecoder and DecoderState are now stored inline. The actual BitTree data still
  lives in a Vec but one level of indirection is reduced.
* Don't bother with filling stack-allocated DecoderState arrays on reset, and just recreate the
  arrays dropping the existing ones.
  • Loading branch information
chyyran committed Aug 17, 2022
1 parent 6e1f0d7 commit b90b484
Show file tree
Hide file tree
Showing 5 changed files with 216 additions and 42 deletions.
77 changes: 39 additions & 38 deletions src/decode/lzma.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use crate::decode::lzbuffer::{LzBuffer, LzCircularBuffer};
use crate::decode::rangecoder;
use crate::decode::rangecoder::RangeDecoder;
use crate::decompress::Options;
use crate::decompress::UnpackedSize;
use crate::decode::rangecoder::{BitTree, LenDecoder, RangeDecoder};
use crate::decompress::{Options, UnpackedSize};
use crate::error;
use crate::util::vec2d::Vec2D;
use byteorder::{LittleEndian, ReadBytesExt};
use std::io;

Expand Down Expand Up @@ -167,9 +166,9 @@ pub(crate) struct DecoderState {
partial_input_buf: std::io::Cursor<[u8; MAX_REQUIRED_INPUT]>,
pub(crate) lzma_props: LzmaProperties,
unpacked_size: Option<u64>,
literal_probs: Vec<Vec<u16>>,
pos_slot_decoder: Vec<rangecoder::BitTree>,
align_decoder: rangecoder::BitTree,
literal_probs: Vec2D<u16>,
pos_slot_decoder: [BitTree; 4],
align_decoder: BitTree,
pos_decoders: [u16; 115],
is_match: [u16; 192], // true = LZ, false = literal
is_rep: [u16; 12],
Expand All @@ -179,8 +178,8 @@ pub(crate) struct DecoderState {
is_rep_0long: [u16; 192],
state: usize,
rep: [usize; 4],
len_decoder: rangecoder::LenDecoder,
rep_len_decoder: rangecoder::LenDecoder,
len_decoder: LenDecoder,
rep_len_decoder: LenDecoder,
}

impl DecoderState {
Expand All @@ -190,9 +189,14 @@ impl DecoderState {
partial_input_buf: std::io::Cursor::new([0; MAX_REQUIRED_INPUT]),
lzma_props,
unpacked_size,
literal_probs: vec![vec![0x400; 0x300]; 1 << (lzma_props.lc + lzma_props.lp)],
pos_slot_decoder: vec![rangecoder::BitTree::new(6); 4],
align_decoder: rangecoder::BitTree::new(4),
literal_probs: Vec2D::init(0x400, (1 << (lzma_props.lc + lzma_props.lp), 0x300)),
pos_slot_decoder: [
BitTree::new(6),
BitTree::new(6),
BitTree::new(6),
BitTree::new(6),
],
align_decoder: BitTree::new(4),
pos_decoders: [0x400; 115],
is_match: [0x400; 192],
is_rep: [0x400; 12],
Expand All @@ -202,33 +206,33 @@ impl DecoderState {
is_rep_0long: [0x400; 192],
state: 0,
rep: [0; 4],
len_decoder: rangecoder::LenDecoder::new(),
rep_len_decoder: rangecoder::LenDecoder::new(),
len_decoder: LenDecoder::new(),
rep_len_decoder: LenDecoder::new(),
}
}

pub fn reset_state(&mut self, new_props: LzmaProperties) {
new_props.validate();
if self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp {
// We can reset here by filling the existing buffer with 0x400.
self.literal_probs.iter_mut().for_each(|v| v.fill(0x400))
self.literal_probs.fill(0x400);
} else {
// We need to reallocate because of the new size of `lc+lp`.
self.literal_probs = vec![vec![0x400; 0x300]; 1 << (new_props.lc + new_props.lp)];
self.literal_probs = Vec2D::init(0x400, (1 << (new_props.lc + new_props.lp), 0x300));
}

self.lzma_props = new_props;
self.pos_slot_decoder.iter_mut().for_each(|t| t.reset());
self.align_decoder.reset();
self.pos_decoders.fill(0x400);
self.is_match.fill(0x400);
self.is_rep.fill(0x400);
self.is_rep_g0.fill(0x400);
self.is_rep_g1.fill(0x400);
self.is_rep_g2.fill(0x400);
self.is_rep_0long.fill(0x400);
self.pos_decoders = [0x400; 115];
self.is_match = [0x400; 192];
self.is_rep = [0x400; 12];
self.is_rep_g0 = [0x400; 12];
self.is_rep_g1 = [0x400; 12];
self.is_rep_g2 = [0x400; 12];
self.is_rep_0long = [0x400; 192];
self.state = 0;
self.rep.fill(0);
self.rep = [0; 4];
self.len_decoder.reset();
self.rep_len_decoder.reset();
}
Expand All @@ -240,7 +244,7 @@ impl DecoderState {
pub fn process<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
&mut self,
output: &mut LZB,
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
rangecoder: &mut RangeDecoder<'a, R>,
) -> error::Result<()> {
self.process_mode(output, rangecoder, ProcessingMode::Finish)
}
Expand All @@ -249,7 +253,7 @@ impl DecoderState {
pub fn process_stream<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
&mut self,
output: &mut LZB,
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
rangecoder: &mut RangeDecoder<'a, R>,
) -> error::Result<()> {
self.process_mode(output, rangecoder, ProcessingMode::Partial)
}
Expand All @@ -263,7 +267,7 @@ impl DecoderState {
fn process_next_inner<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
&mut self,
output: &mut LZB,
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
rangecoder: &mut RangeDecoder<'a, R>,
update: bool,
) -> error::Result<ProcessingStatus> {
let pos_state = output.len() & ((1 << self.lzma_props.pb) - 1);
Expand Down Expand Up @@ -380,7 +384,7 @@ impl DecoderState {
fn process_next<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
&mut self,
output: &mut LZB,
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
rangecoder: &mut RangeDecoder<'a, R>,
) -> error::Result<ProcessingStatus> {
self.process_next_inner(output, rangecoder, true)
}
Expand All @@ -398,15 +402,15 @@ impl DecoderState {
code: u32,
) -> error::Result<()> {
let mut temp = std::io::Cursor::new(buf);
let mut rangecoder = rangecoder::RangeDecoder::from_parts(&mut temp, range, code);
let mut rangecoder = RangeDecoder::from_parts(&mut temp, range, code);
let _ = self.process_next_inner(output, &mut rangecoder, false)?;
Ok(())
}

/// Utility function to read data into the partial input buffer.
fn read_partial_input_buf<'a, R: io::BufRead>(
&mut self,
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
rangecoder: &mut RangeDecoder<'a, R>,
) -> error::Result<()> {
// Fill as much of the tmp buffer as possible
let start = self.partial_input_buf.position() as usize;
Expand All @@ -420,7 +424,7 @@ impl DecoderState {
fn process_mode<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
&mut self,
output: &mut LZB,
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
rangecoder: &mut RangeDecoder<'a, R>,
mode: ProcessingMode,
) -> error::Result<()> {
loop {
Expand Down Expand Up @@ -461,11 +465,8 @@ impl DecoderState {
// Run the decompressor on the tmp buffer
let mut tmp_reader =
io::Cursor::new(&tmp[..self.partial_input_buf.position() as usize]);
let mut tmp_rangecoder = rangecoder::RangeDecoder::from_parts(
&mut tmp_reader,
rangecoder.range,
rangecoder.code,
);
let mut tmp_rangecoder =
RangeDecoder::from_parts(&mut tmp_reader, rangecoder.range, rangecoder.code);
let res = self.process_next(output, &mut tmp_rangecoder)?;

// Update the actual rangecoder
Expand Down Expand Up @@ -514,7 +515,7 @@ impl DecoderState {
fn decode_literal<'a, W: io::Write, LZB: LzBuffer<W>, R: io::BufRead>(
&mut self,
output: &mut LZB,
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
rangecoder: &mut RangeDecoder<'a, R>,
update: bool,
) -> error::Result<u8> {
let def_prev_byte = 0u8;
Expand Down Expand Up @@ -550,7 +551,7 @@ impl DecoderState {

fn decode_distance<'a, R: io::BufRead>(
&mut self,
rangecoder: &mut rangecoder::RangeDecoder<'a, R>,
rangecoder: &mut RangeDecoder<'a, R>,
length: usize,
update: bool,
) -> error::Result<usize> {
Expand Down
42 changes: 38 additions & 4 deletions src/decode/rangecoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ impl BitTree {
pub struct LenDecoder {
choice: u16,
choice2: u16,
low_coder: Vec<BitTree>,
mid_coder: Vec<BitTree>,
low_coder: [BitTree; 16],
mid_coder: [BitTree; 16],
high_coder: BitTree,
}

Expand All @@ -200,8 +200,42 @@ impl LenDecoder {
LenDecoder {
choice: 0x400,
choice2: 0x400,
low_coder: vec![BitTree::new(3); 16],
mid_coder: vec![BitTree::new(3); 16],
low_coder: [
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
],
mid_coder: [
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
BitTree::new(3),
],
high_coder: BitTree::new(8),
}
}
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ mod encode;

pub mod error;

mod util;
mod xz;

use std::io;
Expand Down
1 change: 1 addition & 0 deletions src/util/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub mod vec2d;
137 changes: 137 additions & 0 deletions src/util/vec2d.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
use std::ops::{Index, IndexMut};

/// A 2 dimensional matrix in row-major order backed by a contiguous `Vec`
#[derive(Debug)]
pub struct Vec2D<T> {
data: Box<[T]>,
cols: usize,
}

impl<T> Vec2D<T> {
/// Initialize a grid of size (`rows`, `cols`) with the given data element.
pub fn init(data: T, size: (usize, usize)) -> Vec2D<T>
where
T: Clone,
{
let (rows, cols) = size;
let len = rows
.checked_mul(cols)
.unwrap_or_else(|| panic!("{} rows by {} cols exceeds usize::MAX", rows, cols));
Vec2D {
data: vec![data; len].into_boxed_slice(),
cols,
}
}

/// Fills the grid with elements by cloning `value`.
pub fn fill(&mut self, value: T)
where
T: Clone,
{
self.data.fill(value)
}
}

impl<T> Index<usize> for Vec2D<T> {
type Output = [T];

#[inline]
fn index(&self, row: usize) -> &Self::Output {
let start_row = row * self.cols;
&self.data[start_row..start_row + self.cols]
}
}

impl<T> IndexMut<usize> for Vec2D<T> {
#[inline]
fn index_mut(&mut self, row: usize) -> &mut Self::Output {
let start_row = row * self.cols;
&mut self.data[start_row..start_row + self.cols]
}
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn vec2d_init() {
let vec2d = Vec2D::init(1, (2, 3));
assert_eq!(vec2d[0], [1, 1, 1]);
assert_eq!(vec2d[1], [1, 1, 1]);
}

#[test]
fn vec2d_fill() {
let mut vec2d = Vec2D::init(0, (2, 3));
vec2d.fill(7);
assert_eq!(vec2d[0], [7, 7, 7]);
assert_eq!(vec2d[1], [7, 7, 7]);
}

#[test]
fn vec2d_index() {
let vec2d = Vec2D {
data: vec![0, 1, 2, 3, 4, 5, 6, 7, 8].into_boxed_slice(),
cols: 3,
};
assert_eq!(vec2d[0], [0, 1, 2]);
assert_eq!(vec2d[1], [3, 4, 5]);
assert_eq!(vec2d[2], [6, 7, 8]);
}

#[test]
fn vec2d_index_mut() {
let mut vec2d = Vec2D {
data: vec![0, 1, 2, 3, 4, 5, 6, 7, 8].into_boxed_slice(),
cols: 3,
};

vec2d[1][1] = 9;
assert_eq!(vec2d[0], [0, 1, 2]);
assert_eq!(vec2d[1], [3, 9, 5]);
assert_eq!(vec2d[2], [6, 7, 8]);
}

#[test]
#[should_panic]
fn vec2d_index_out_of_bounds() {
let vec2d = Vec2D::init(1, (2, 3));
let _x = vec2d[2][4];
}

#[test]
#[should_panic]
fn vec2d_index_out_of_bounds_vec_edge() {
let vec2d = Vec2D::init(1, (2, 3));
let _x = vec2d[1][3];
}

#[test]
#[should_panic]
fn vec2d_index_out_of_bounds_overflow() {
let vec2d = Vec2D::init(1, (2, 3));
let _x = vec2d[0][3];
}

#[test]
#[should_panic]
fn vec2d_indexmut_out_of_bounds_vec_edge() {
let mut vec2d = Vec2D::init(1, (2, 3));
vec2d[1][3] = 0;
}

#[test]
#[should_panic]
fn vec2d_indexmut_out_of_bounds_overflow() {
let mut vec2d = Vec2D::init(1, (2, 3));
vec2d[0][3] = 0;
}

#[test]
#[should_panic]
fn vec2d_indexmut_out_of_bounds() {
let mut vec2d = Vec2D::init(1, (2, 3));
vec2d[2][4] = 0;
}
}

0 comments on commit b90b484

Please sign in to comment.