Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Zip file tokenization #471

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 45 additions & 16 deletions internal/magic/zip.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ var (
Sxc = offset([]byte("mimetypeapplication/vnd.sun.xml.calc"), 30)
)

var zipHeader = []byte("PK\u0003\u0004")

// Zip matches a zip archive.
func Zip(raw []byte, limit uint32) bool {
return len(raw) > 3 &&
Expand All @@ -55,26 +57,53 @@ type zipTokenizer struct {
// next returns the next file name from the zip headers.
// https://web.archive.org/web/20191129114319/https://users.cs.jmu.edu/buchhofp/forensics/formats/pkzip.html
func (t *zipTokenizer) next() (fileName string) {
if t.i > len(t.in) {
return
// When the rest length is smaller than the header size, exit.
if len(t.in)-t.i < 30 {
zekroTJA marked this conversation as resolved.
Show resolved Hide resolved
return ""
}

in := t.in[t.i:]
// pkSig is the signature of the zip local file header.
pkSig := []byte("PK\003\004")
pkIndex := bytes.Index(in, pkSig)
// 30 is the offset of the file name in the header.
fNameOffset := pkIndex + 30
// end if signature not found or file name offset outside of file.
if pkIndex == -1 || fNameOffset > len(in) {
return
}

fNameLen := int(binary.LittleEndian.Uint16(in[pkIndex+26 : pkIndex+28]))
if fNameLen <= 0 || fNameOffset+fNameLen > len(in) {
return
offset := 0

// Read the first 4 bytes and look for the file header signature.
// If it is not at the start of buf, then set the current index to
// the first occurrence of the first byte of the signature in buf
// and re-run.
buf := in[offset : offset+4]
offset += 4
if !bytes.Equal(buf, zipHeader) {
i := bytes.IndexByte(buf, zipHeader[0])
zekroTJA marked this conversation as resolved.
Show resolved Hide resolved
t.i += offset
if i > 0 {
t.i += len(zipHeader) - i
}
return t.next()
}
t.i += fNameOffset + fNameLen
return string(in[fNameOffset : fNameOffset+fNameLen])

offset += 14

buf = in[offset : offset+4]
offset += 4
compressedSize := binary.LittleEndian.Uint32(buf)

offset += 4
buf = in[offset : offset+2]
offset += 2
fileNameLength := binary.LittleEndian.Uint16(buf)

buf = in[offset : offset+2]
offset += 2
extraFieldsLength := binary.LittleEndian.Uint16(buf)

buf = in[offset : offset+int(fileNameLength)]
zekroTJA marked this conversation as resolved.
Show resolved Hide resolved
offset += int(fileNameLength)

offset += int(extraFieldsLength) + int(compressedSize)
zekroTJA marked this conversation as resolved.
Show resolved Hide resolved

t.i += offset

return string(buf)
}

// zipContains returns true if the zip file headers from in contain any of the paths.
Expand Down
Loading