Skip to content

Commit

Permalink
Add BCJ2 decompressor
Browse files Browse the repository at this point in the history
  • Loading branch information
bodgit committed Apr 29, 2022
1 parent ac766fa commit 74eef47
Show file tree
Hide file tree
Showing 5 changed files with 200 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ Current status:
* Handles password-protected versions of both of the above (`7za a -mhc=on|off -mhe=on -ppassword test.7z ...`).
* Handles archives split into multiple volumes, (`7za a -v100m test.7z ...`).
* Validates CRC values as it parses the file.
* Supports Bzip2, Deflate, Copy, LZMA and LZMA2 methods.
* Supports BCJ2, Bzip2, Deflate, Copy, LZMA and LZMA2 methods.

More examples of 7-zip archives are needed to test all of the different combinations/algorithms possible.
192 changes: 192 additions & 0 deletions internal/bcj2/reader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
package bcj2

import (
"bytes"
"encoding/binary"
"errors"
"io"

"github.com/bodgit/sevenzip/internal/util"
"github.com/hashicorp/go-multierror"
)

const (
numMoveBits = 5
numbitModelTotalBits = 11
bitModelTotal uint = 1 << numbitModelTotalBits
numTopBits = 24
topValue uint = 1 << numTopBits
)

func isJcc(b0, b1 byte) bool {
return b0 == 0x0f && (b1&0xf0) == 0x80
}

func isJ(b0, b1 byte) bool {
return (b1&0xfe) == 0xe8 || isJcc(b0, b1)
}

func index(b0, b1 byte) int {
switch b1 {
case 0xe8:
return int(b0)
case 0xe9:
return 256
default:
return 257
}
}

type readCloser struct {
main util.ReadCloser
call io.ReadCloser
jump io.ReadCloser

rd util.ReadCloser
nrange uint
code uint

sd [256 + 2]uint

previous byte
written uint64

buf *bytes.Buffer
}

// NewReader returns a new BCJ2 io.ReadCloser.
func NewReader(_ []byte, _ uint64, readers []io.ReadCloser) (io.ReadCloser, error) {
if len(readers) != 4 {
return nil, errors.New("bcj2: need exactly four readers")
}

rc := &readCloser{
main: util.ByteReadCloser(readers[0]),
call: readers[1],
jump: readers[2],
rd: util.ByteReadCloser(readers[3]),
nrange: 0xffffffff,
buf: new(bytes.Buffer),
}
rc.buf.Grow(1 << 16)

b := make([]byte, 5)
if _, err := io.ReadFull(rc.rd, b); err != nil {
return nil, err
}
for _, x := range b {
rc.code = (rc.code << 8) | uint(x)
}

for i := range rc.sd {
rc.sd[i] = bitModelTotal >> 1
}

return rc, nil
}

func (rc *readCloser) Close() error {
var err *multierror.Error
if rc.main != nil {
err = multierror.Append(err, rc.main.Close(), rc.call.Close(), rc.jump.Close(), rc.rd.Close())
}
return err.ErrorOrNil()
}

func (rc *readCloser) Read(p []byte) (int, error) {
if rc.main == nil {
return 0, errors.New("bcj2: Read after Close")
}

if err := rc.read(); err != nil && err != io.EOF {
return 0, err
}

return rc.buf.Read(p)
}

func (rc *readCloser) update() error {
if rc.nrange < topValue {
b, err := rc.rd.ReadByte()
if err != nil {
return err
}
rc.code = (rc.code << 8) | uint(b)
rc.nrange <<= 8
}

return nil
}

func (rc *readCloser) decode(i int) (bool, error) {
newBound := (rc.nrange >> numbitModelTotalBits) * rc.sd[i]

if rc.code < newBound {
rc.nrange = newBound
rc.sd[i] += (bitModelTotal - rc.sd[i]) >> numMoveBits
if err := rc.update(); err != nil {
return false, nil
}
return false, nil
}

rc.nrange -= newBound
rc.code -= newBound
rc.sd[i] -= rc.sd[i] >> numMoveBits
if err := rc.update(); err != nil {
return false, nil
}
return true, nil
}

func (rc *readCloser) read() error {
var b byte
var err error

for {
if b, err = rc.main.ReadByte(); err != nil {
return err
}

rc.written++
_ = rc.buf.WriteByte(b)

if isJ(rc.previous, b) {
break
}
rc.previous = b

if rc.buf.Len() == rc.buf.Cap() {
return nil
}
}

bit, err := rc.decode(index(rc.previous, b))
if err != nil {
return err
}

if bit {
var r io.Reader
if b == 0xe8 {
r = rc.call
} else {
r = rc.jump
}

var dest uint32
if err = binary.Read(r, binary.BigEndian, &dest); err != nil {
return err
}

dest -= uint32(rc.written + 4)
_ = binary.Write(rc.buf, binary.LittleEndian, dest)

rc.previous = byte(dest >> 24)
rc.written += 4
} else {
rc.previous = b
}

return nil
}
4 changes: 4 additions & 0 deletions reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,7 @@ func BenchmarkLZMA(b *testing.B) {
func BenchmarkLZMA2(b *testing.B) {
benchmarkArchive("lzma2.7z", b)
}

func BenchmarkBCJ2(b *testing.B) {
benchmarkArchive("bcj2.7z", b)
}
3 changes: 3 additions & 0 deletions register.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"sync"

"github.com/bodgit/sevenzip/internal/aes7z"
"github.com/bodgit/sevenzip/internal/bcj2"
"github.com/bodgit/sevenzip/internal/bzip2"
"github.com/bodgit/sevenzip/internal/deflate"
"github.com/bodgit/sevenzip/internal/lzma"
Expand All @@ -31,6 +32,8 @@ func init() {
}))
// LZMA
RegisterDecompressor([]byte{0x03, 0x01, 0x01}, Decompressor(lzma.NewReader))
// BCJ2
RegisterDecompressor([]byte{0x03, 0x03, 0x01, 0x1b}, Decompressor(bcj2.NewReader))
// Deflate
RegisterDecompressor([]byte{0x04, 0x01, 0x08}, Decompressor(deflate.NewReader))
// Bzip2
Expand Down
Binary file added testdata/bcj2.7z
Binary file not shown.

0 comments on commit 74eef47

Please sign in to comment.