Skip to content

Commit

Permalink
Do not recognize text files as audio (#23355)
Browse files Browse the repository at this point in the history
Close #17108

This PR uses a trick (removing the ID3 tag) to detect the content again
to to see whether the content is text type.

---------

Co-authored-by: delvh <dev.lh@web.de>
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
  • Loading branch information
4 people authored Mar 8, 2023
1 parent b70c7f8 commit 7e3b7c2
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 0 deletions.
10 changes: 10 additions & 0 deletions modules/typesniffer/typesniffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,16 @@ func DetectContentType(data []byte) SniffedType {
}
}

if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) {
// The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
// So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
// This works especially because audio files contain many unprintable/invalid characters like `0x00`
ct2 := http.DetectContentType(data[3:])
if strings.HasPrefix(ct2, "text/") {
ct = ct2
}
}

return SniffedType{ct}
}

Expand Down
4 changes: 4 additions & 0 deletions modules/typesniffer/typesniffer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ func TestIsAudio(t *testing.T) {
mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
assert.True(t, DetectContentType(mp3).IsAudio())
assert.False(t, DetectContentType([]byte("plain text")).IsAudio())

assert.True(t, DetectContentType([]byte("ID3Toy\000")).IsAudio())
assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ...")).IsText()) // test ID3 tag for plain text
assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char
}

func TestDetectContentTypeFromReader(t *testing.T) {
Expand Down

0 comments on commit 7e3b7c2

Please sign in to comment.