Skip to content

Commit

Permalink
Switch to a more comprehensive mimetype detection library (#231)
Browse files Browse the repository at this point in the history
  • Loading branch information
Infinoid authored Aug 3, 2020
1 parent 6ce2bd6 commit 5eb6f32
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 21 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ require (
github.com/dchest/uniuri v0.0.0-20200228104902-7aecb25e1fe5
github.com/dustin/go-humanize v1.0.0
github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4
github.com/gabriel-vasile/mimetype v1.1.1
github.com/microcosm-cc/bluemonday v1.0.2
github.com/minio/sha256-simd v0.1.1
github.com/russross/blackfriday v1.5.1
github.com/vharitonsky/iniflags v0.0.0-20180513140207-a33cd0b5f3de
github.com/zeebo/bencode v1.0.0
github.com/zenazn/goji v0.9.0
golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073
gopkg.in/h2non/filetype.v1 v1.0.5
)
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4 h1:GY1+t5Dr9OKADM64SYnQjw/w99HMYvQ0A8/JoUkxVmc=
github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4/go.mod h1:T9YF2M40nIgbVgp3rreNmTged+9HrbNTIQf1PsaIiTA=
github.com/gabriel-vasile/mimetype v1.1.1 h1:qbN9MPuRf3bstHu9zkI9jDWNfH//9+9kHxr9oRBBBOA=
github.com/gabriel-vasile/mimetype v1.1.1/go.mod h1:6CDPel/o/3/s4+bp6kIbsWATq8pmgOisOPG40CJa6To=
github.com/go-check/check v0.0.0-20180628173108-788fd7840127 h1:0gkP6mzaMqkmpcJYCFOLkIBwI7xFExG03bbkOkCvUPI=
github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
Expand Down Expand Up @@ -68,8 +70,6 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/tools v0.0.0-20181221001348-537d06c36207/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/h2non/filetype.v1 v1.0.5 h1:CC1jjJjoEhNVbMhXYalmGBhOBK2V70Q1N850wt/98/Y=
gopkg.in/h2non/filetype.v1 v1.0.5/go.mod h1:M0yem4rwSX5lLVrkEuRRp2/NinFMD5vgJ4DlAhZcfNo=
gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce h1:xcEWjVhvbDy+nHP67nPDDpbYrY+ILlfndk4bRioVHaU=
gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
Expand Down
17 changes: 4 additions & 13 deletions helpers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import (
"unicode"

"github.com/andreimarcu/linx-server/backends"
"github.com/gabriel-vasile/mimetype"
"github.com/minio/sha256-simd"
"gopkg.in/h2non/filetype.v1"
)

func GenerateMetadata(r io.Reader) (m backends.Metadata, err error) {
Expand All @@ -21,7 +21,7 @@ func GenerateMetadata(r io.Reader) (m backends.Metadata, err error) {

// Get first 512 bytes for mimetype detection
header := make([]byte, 512)
_, err = teeReader.Read(header)
headerlen, err := teeReader.Read(header)
if err != nil {
return
}
Expand All @@ -47,17 +47,8 @@ func GenerateMetadata(r io.Reader) (m backends.Metadata, err error) {

// Use the bytes we extracted earlier and attempt to determine the file
// type
kind, err := filetype.Match(header)
if err != nil {
m.Mimetype = "application/octet-stream"
return m, err
} else if kind.MIME.Value != "" {
m.Mimetype = kind.MIME.Value
} else if printable(header) {
m.Mimetype = "text/plain"
} else {
m.Mimetype = "application/octet-stream"
}
kind := mimetype.Detect(header[:headerlen])
m.Mimetype = kind.String()

return
}
Expand Down
46 changes: 45 additions & 1 deletion helpers/helpers_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package helpers

import (
"bytes"
"strings"
"testing"
"unicode/utf16"
)

func TestGenerateMetadata(t *testing.T) {
Expand All @@ -17,7 +19,7 @@ func TestGenerateMetadata(t *testing.T) {
t.Fatalf("Sha256sum was %q instead of expected value of %q", m.Sha256sum, expectedSha256sum)
}

expectedMimetype := "text/plain"
expectedMimetype := "text/plain; charset=utf-8"
if m.Mimetype != expectedMimetype {
t.Fatalf("Mimetype was %q instead of expected value of %q", m.Mimetype, expectedMimetype)
}
Expand All @@ -27,3 +29,45 @@ func TestGenerateMetadata(t *testing.T) {
t.Fatalf("Size was %d instead of expected value of %d", m.Size, expectedSize)
}
}

func TestTextCharsets(t *testing.T) {
// verify that different text encodings are detected and passed through
orig := "This is a text string"
utf16 := utf16.Encode([]rune(orig))
utf16LE := make([]byte, len(utf16)*2+2)
utf16BE := make([]byte, len(utf16)*2+2)
utf8 := []byte(orig)
utf16LE[0] = 0xff
utf16LE[1] = 0xfe
utf16BE[0] = 0xfe
utf16BE[1] = 0xff
for i := 0; i < len(utf16); i++ {
lsb := utf16[i] & 0xff
msb := utf16[i] >> 8
utf16LE[i*2+2] = byte(lsb)
utf16LE[i*2+3] = byte(msb)
utf16BE[i*2+2] = byte(msb)
utf16BE[i*2+3] = byte(lsb)
}

testcases := []struct {
data []byte
extension string
mimetype string
}{
{mimetype: "text/plain; charset=utf-8", data: utf8},
{mimetype: "text/plain; charset=utf-16le", data: utf16LE},
{mimetype: "text/plain; charset=utf-16be", data: utf16BE},
}

for i, testcase := range testcases {
r := bytes.NewReader(testcase.data)
m, err := GenerateMetadata(r)
if err != nil {
t.Fatalf("[%d] unexpected error return %v\n", i, err)
}
if m.Mimetype != testcase.mimetype {
t.Errorf("[%d] Expected mimetype '%s', got mimetype '%s'\n", i, testcase.mimetype, m.Mimetype)
}
}
}
8 changes: 4 additions & 4 deletions upload.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ import (
"github.com/andreimarcu/linx-server/backends"
"github.com/andreimarcu/linx-server/expiry"
"github.com/dchest/uniuri"
"github.com/gabriel-vasile/mimetype"
"github.com/zenazn/goji/web"
"gopkg.in/h2non/filetype.v1"
)

var FileTooLargeError = errors.New("File too large.")
Expand Down Expand Up @@ -263,11 +263,11 @@ func processUpload(upReq UploadRequest) (upload Upload, err error) {
header = header[:n]

// Determine the type of file from header
kind, err := filetype.Match(header)
if err != nil || kind.Extension == "unknown" {
kind := mimetype.Detect(header)
if len(kind.Extension()) < 2 {
extension = "file"
} else {
extension = kind.Extension
extension = kind.Extension()[1:] // remove leading "."
}
}

Expand Down

0 comments on commit 5eb6f32

Please sign in to comment.