Skip to content

Commit

Permalink
zip: use []byte instead of string to prevent allocs (#537)
Browse files Browse the repository at this point in the history
goos: linux
goarch: amd64
pkg: github.com/gabriel-vasile/mimetype
cpu: Intel(R) Core(TM) i7-10510U CPU @ 1.80GHz
               │   master    │                 dev                  │
               │   sec/op    │    sec/op     vs base                │
Common/.xlsx-8   482.0n ± 4%   446.7n ±  3%   -7.33% (p=0.000 n=10)
Common/.pptx-8   1.999µ ± 3%   1.542µ ±  3%  -22.89% (p=0.000 n=10)
Common/.docx-8   1.501µ ± 2%   1.139µ ±  3%  -24.13% (p=0.000 n=10)
Common/.tar-8    1.134µ ± 3%   1.127µ ±  2%        ~ (p=0.670 n=10)
Common/.zip-8    882.4n ± 1%   778.6n ±  4%  -11.75% (p=0.000 n=10)
Common/.pdf-8    363.9n ± 2%   352.9n ±  2%   -3.02% (p=0.005 n=10)
Common/.jpg-8    508.9n ± 4%   504.3n ±  2%        ~ (p=0.190 n=10)
Common/.png-8    497.1n ± 3%   495.2n ±  2%        ~ (p=0.971 n=10)
Common/.gif-8    594.9n ± 5%   591.8n ±  2%        ~ (p=0.481 n=10)
Common/.xls-8    602.7n ± 4%   598.2n ± 11%        ~ (p=0.436 n=10)
Common/.webm-8   1.541µ ± 3%   1.549µ ±  3%        ~ (p=0.645 n=10)
Common/.csv-8    10.89µ ± 1%   10.96µ ±  1%        ~ (p=0.225 n=10)
geomean          981.7n        919.3n         -6.36%

               │    master    │                  dev                   │
               │     B/op     │     B/op      vs base                  │
Common/.xlsx-8     312.0 ± 0%     288.0 ± 0%   -7.69% (p=0.000 n=10)
Common/.pptx-8     592.0 ± 0%     288.0 ± 0%  -51.35% (p=0.000 n=10)
Common/.docx-8     504.0 ± 0%     288.0 ± 0%  -42.86% (p=0.000 n=10)
Common/.tar-8      200.0 ± 0%     200.0 ± 0%        ~ (p=1.000 n=10) ¹
Common/.zip-8      224.0 ± 0%     192.0 ± 0%  -14.29% (p=0.000 n=10)
Common/.pdf-8      192.0 ± 0%     192.0 ± 0%        ~ (p=1.000 n=10) ¹
Common/.jpg-8      192.0 ± 0%     192.0 ± 0%        ~ (p=1.000 n=10) ¹
Common/.png-8      192.0 ± 0%     192.0 ± 0%        ~ (p=1.000 n=10) ¹
Common/.gif-8      192.0 ± 0%     192.0 ± 0%        ~ (p=1.000 n=10) ¹
Common/.xls-8      288.0 ± 0%     288.0 ± 0%        ~ (p=1.000 n=10) ¹
Common/.webm-8     192.0 ± 0%     192.0 ± 0%        ~ (p=1.000 n=10) ¹
Common/.csv-8    7.331Ki ± 0%   7.332Ki ± 0%        ~ (p=0.179 n=10)
geomean            339.6          299.3       -11.85%
¹ all samples are equal

               │   master    │                 dev                  │
               │  allocs/op  │ allocs/op   vs base                  │
Common/.xlsx-8    4.000 ± 0%   3.000 ± 0%  -25.00% (p=0.000 n=10)
Common/.pptx-8   15.000 ± 0%   3.000 ± 0%  -80.00% (p=0.000 n=10)
Common/.docx-8   13.000 ± 0%   3.000 ± 0%  -76.92% (p=0.000 n=10)
Common/.tar-8     3.000 ± 0%   3.000 ± 0%        ~ (p=1.000 n=10) ¹
Common/.zip-8     6.000 ± 0%   2.000 ± 0%  -66.67% (p=0.000 n=10)
Common/.pdf-8     2.000 ± 0%   2.000 ± 0%        ~ (p=1.000 n=10) ¹
Common/.jpg-8     2.000 ± 0%   2.000 ± 0%        ~ (p=1.000 n=10) ¹
Common/.png-8     2.000 ± 0%   2.000 ± 0%        ~ (p=1.000 n=10) ¹
Common/.gif-8     2.000 ± 0%   2.000 ± 0%        ~ (p=1.000 n=10) ¹
Common/.xls-8     3.000 ± 0%   3.000 ± 0%        ~ (p=1.000 n=10) ¹
Common/.webm-8    2.000 ± 0%   2.000 ± 0%        ~ (p=1.000 n=10) ¹
Common/.csv-8     33.00 ± 0%   33.00 ± 0%        ~ (p=1.000 n=10) ¹
geomean           4.339        2.991       -31.05%
¹ all samples are equal
  • Loading branch information
gabriel-vasile authored Jun 2, 2024
1 parent 77e3848 commit cdceff9
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 42 deletions.
70 changes: 35 additions & 35 deletions internal/magic/ms_office.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,41 +6,41 @@ import (
)

var (
xlsxSigFiles = []string{
"xl/worksheets/",
"xl/drawings/",
"xl/theme/",
"xl/_rels/",
"xl/styles.xml",
"xl/workbook.xml",
"xl/sharedStrings.xml",
}
docxSigFiles = []string{
"word/media/",
"word/_rels/document.xml.rels",
"word/document.xml",
"word/styles.xml",
"word/fontTable.xml",
"word/settings.xml",
"word/numbering.xml",
"word/header",
"word/footer",
}
pptxSigFiles = []string{
"ppt/slides/",
"ppt/media/",
"ppt/slideLayouts/",
"ppt/theme/",
"ppt/slideMasters/",
"ppt/tags/",
"ppt/notesMasters/",
"ppt/_rels/",
"ppt/handoutMasters/",
"ppt/notesSlides/",
"ppt/presentation.xml",
"ppt/tableStyles.xml",
"ppt/presProps.xml",
"ppt/viewProps.xml",
xlsxSigFiles = [][]byte{
[]byte("xl/worksheets/"),
[]byte("xl/drawings/"),
[]byte("xl/theme/"),
[]byte("xl/_rels/"),
[]byte("xl/styles.xml"),
[]byte("xl/workbook.xml"),
[]byte("xl/sharedStrings.xml"),
}
docxSigFiles = [][]byte{
[]byte("word/media/"),
[]byte("word/_rels/document.xml.rels"),
[]byte("word/document.xml"),
[]byte("word/styles.xml"),
[]byte("word/fontTable.xml"),
[]byte("word/settings.xml"),
[]byte("word/numbering.xml"),
[]byte("word/header"),
[]byte("word/footer"),
}
pptxSigFiles = [][]byte{
[]byte("ppt/slides/"),
[]byte("ppt/media/"),
[]byte("ppt/slideLayouts/"),
[]byte("ppt/theme/"),
[]byte("ppt/slideMasters/"),
[]byte("ppt/tags/"),
[]byte("ppt/notesMasters/"),
[]byte("ppt/_rels/"),
[]byte("ppt/handoutMasters/"),
[]byte("ppt/notesSlides/"),
[]byte("ppt/presentation.xml"),
[]byte("ppt/tableStyles.xml"),
[]byte("ppt/presProps.xml"),
[]byte("ppt/viewProps.xml"),
}
)

Expand Down
13 changes: 6 additions & 7 deletions internal/magic/zip.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package magic
import (
"bytes"
"encoding/binary"
"strings"
)

var (
Expand Down Expand Up @@ -43,7 +42,7 @@ func Zip(raw []byte, limit uint32) bool {

// Jar matches a Java archive file.
func Jar(raw []byte, limit uint32) bool {
return zipContains(raw, "META-INF/MANIFEST.MF")
return zipContains(raw, []byte("META-INF/MANIFEST.MF"))
}

// zipTokenizer holds the source zip file and scanned index.
Expand All @@ -54,7 +53,7 @@ type zipTokenizer struct {

// next returns the next file name from the zip headers.
// https://web.archive.org/web/20191129114319/https://users.cs.jmu.edu/buchhofp/forensics/formats/pkzip.html
func (t *zipTokenizer) next() (fileName string) {
func (t *zipTokenizer) next() (fileName []byte) {
if t.i > len(t.in) {
return
}
Expand All @@ -74,15 +73,15 @@ func (t *zipTokenizer) next() (fileName string) {
return
}
t.i += fNameOffset + fNameLen
return string(in[fNameOffset : fNameOffset+fNameLen])
return in[fNameOffset : fNameOffset+fNameLen]
}

// zipContains returns true if the zip file headers from in contain any of the paths.
func zipContains(in []byte, paths ...string) bool {
func zipContains(in []byte, paths ...[]byte) bool {
t := zipTokenizer{in: in}
for i, tok := 0, t.next(); tok != ""; i, tok = i+1, t.next() {
for tok := t.next(); len(tok) != 0; tok = t.next() {
for p := range paths {
if strings.HasPrefix(tok, paths[p]) {
if bytes.HasPrefix(tok, paths[p]) {
return true
}
}
Expand Down

0 comments on commit cdceff9

Please sign in to comment.