Skip to content

Commit

Permalink
Merge pull request #85 from bodgit/sfx
Browse files Browse the repository at this point in the history
Add support for reading self-extracting archives
  • Loading branch information
bodgit authored Apr 26, 2023
2 parents 56e81dc + 5990375 commit 7a455e1
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 16 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Current status:
* Handles compressed headers, (`7za a -mhc=on test.7z ...`).
* Handles password-protected versions of both of the above (`7za a -mhc=on|off -mhe=on -ppassword test.7z ...`).
* Handles archives split into multiple volumes, (`7za a -v100m test.7z ...`).
* Handles self-extracting archives, (`7za a -sfx archive.exe ...`).
* Validates CRC values as it parses the file.
* Supports BCJ2, Brotli, Bzip2, Copy, Deflate, Delta, LZ4, LZMA, LZMA2 and Zstandard methods.
* Implements the `fs.FS` interface so you can treat an opened 7-zip archive like a filesystem.
Expand Down
96 changes: 80 additions & 16 deletions reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,38 +257,99 @@ func (z *Reader) folderReader(si *streamsInfo, f int) (*folderReadCloser, uint32
return si.FolderReader(io.NewSectionReader(z.r, z.start, z.end-z.start), f, z.p)
}

//nolint:cyclop,funlen,gocognit
const (
chunkSize = 4096
searchLimit = 1 << 20 // 1 MiB
)

func findSignature(r io.ReaderAt, search []byte) ([]int64, error) {
var (
offset int64
offsets []int64
)

chunk := make([]byte, chunkSize+len(search))

for offset < searchLimit {
n, err := r.ReadAt(chunk, offset)

for i := 0; ; {
idx := bytes.Index(chunk[i:n], search)
if idx == -1 {
break
}

offsets = append(offsets, offset+int64(i+idx))
if offsets[0] == 0 {
// If signature is at the beginning, return immediately, it's a regular archive
return offsets, nil
}

i += idx + 1
}

if err != nil {
if errors.Is(err, io.EOF) {
break
}

return nil, err
}

offset += chunkSize
}

return offsets, nil
}

//nolint:cyclop,funlen,gocognit,gocyclo
func (z *Reader) init(r io.ReaderAt, size int64) error {
h := crc32.NewIEEE()
tra := plumbing.TeeReaderAt(r, h)
sr := io.NewSectionReader(tra, 0, size) // Will only read first 32 bytes

var sh signatureHeader
if err := binary.Read(sr, binary.LittleEndian, &sh); err != nil {
signature := []byte{'7', 'z', 0xbc, 0xaf, 0x27, 0x1c}

offsets, err := findSignature(r, signature)
if err != nil {
return err
}

signature := []byte{'7', 'z', 0xbc, 0xaf, 0x27, 0x1c}
if !bytes.Equal(sh.Signature[:], signature) {
if len(offsets) == 0 {
return errFormat
}

z.r = r

h.Reset()

var (
err error
sr *io.SectionReader
off int64
start startHeader
)

if err = binary.Read(sr, binary.LittleEndian, &start); err != nil {
return err
for _, off = range offsets {
sr = io.NewSectionReader(tra, off, size-off) // Will only read first 32 bytes

var sh signatureHeader
if err = binary.Read(sr, binary.LittleEndian, &sh); err != nil {
return err
}

z.r = r

h.Reset()

if err = binary.Read(sr, binary.LittleEndian, &start); err != nil {
return err
}

// CRC of the start header should match
if util.CRC32Equal(h.Sum(nil), sh.CRC) {
break
}

err = errChecksum
}

// CRC of the start header should match
if !util.CRC32Equal(h.Sum(nil), sh.CRC) {
return errChecksum
if err != nil {
return err
}

// Work out where we are in the file (32, avoiding magic numbers)
Expand All @@ -301,6 +362,9 @@ func (z *Reader) init(r io.ReaderAt, size int64) error {
return err
}

z.start += off
z.end += off

h.Reset()

// Bound bufio.Reader otherwise it can read trailing garbage which screws up the CRC check
Expand Down
4 changes: 4 additions & 0 deletions reader_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ func TestOpenReader(t *testing.T) {
name: "zstd",
file: "zstd.7z",
},
{
name: "sfx",
file: "sfx.exe",
},
}

for _, table := range tables {
Expand Down
Binary file added testdata/sfx.exe
Binary file not shown.

0 comments on commit 7a455e1

Please sign in to comment.