Skip to content

Commit

Permalink
Add support for UTF-16
Browse files Browse the repository at this point in the history
  • Loading branch information
gabriel-vasile committed Dec 23, 2019
1 parent 3011b1a commit 798d7a4
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 41 deletions.
16 changes: 14 additions & 2 deletions internal/matchers/text.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,20 @@ var (
}
)

// Txt matches a text file.
func Txt(in []byte) bool {
// Utf16be matches a text file encoded with UTF-16 and with the characters
// represented in big endian.
func Utf16be(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xFE, 0xFF})
}

// Utf16le matches a text file encoded with UTF-16 and with the characters
// represented in little endian.
func Utf16le(in []byte) bool {
return bytes.HasPrefix(in, []byte{0xFF, 0xFE})
}

// Utf8 matches a UTF-8 text file.
func Utf8(in []byte) bool {
in = trimLWS(in)
for _, b := range in {
if b <= 0x08 ||
Expand Down
4 changes: 3 additions & 1 deletion mimetype_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@ var files = map[string]*MIME{
"html.withbr.html": html,
"svg.svg": svg,
"svg.1.svg": svg,
"txt.txt": txt,
"utf8.txt": utf8,
"utf16lebom.txt": utf16le,
"utf16bebom.txt": utf16be,
"php.php": php,
"ps.ps": ps,
"json.json": json,
Expand Down
64 changes: 33 additions & 31 deletions supported_mimes.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## 137 Supported MIME types
## 139 Supported MIME types
This file is automatically generated when running tests. Do not edit manually.

Extension | MIME type | Aliases
Expand Down Expand Up @@ -77,36 +77,8 @@ Extension | MIME type | Aliases
**.voc** | audio/x-unknown | -
**.mp4** | audio/mp4 | audio/x-m4a, audio/x-mp4a
**.m4a** | audio/x-m4a | -
**.txt** | text/plain; charset=utf-8 | -
**.html** | text/html; charset=utf-8 | -
**.svg** | image/svg+xml | -
**.xml** | text/xml; charset=utf-8 | -
**.rss** | application/rss+xml | text/rss
**.atom** | application/atom+xml | -
**.x3d** | model/x3d+xml | -
**.kml** | application/vnd.google-earth.kml+xml | -
**.xlf** | application/x-xliff+xml | -
**.dae** | model/vnd.collada+xml | -
**.gml** | application/gml+xml | -
**.gpx** | application/gpx+xml | -
**.tcx** | application/vnd.garmin.tcx+xml | -
**.amf** | application/x-amf | -
**.3mf** | application/vnd.ms-package.3dmanufacturing-3dmodel+xml | -
**.php** | text/x-php; charset=utf-8 | -
**.js** | application/javascript | application/x-javascript, text/javascript
**.lua** | text/x-lua | -
**.pl** | text/x-perl | -
**.py** | application/x-python | -
**.json** | application/json | -
**.geojson** | application/geo+json | -
**.ndjson** | application/x-ndjson | -
**.rtf** | text/rtf | -
**.tcl** | text/x-tcl | application/x-tcl
**.csv** | text/csv | -
**.tsv** | text/tab-separated-values | -
**.vcf** | text/vcard | -
**.ics** | text/calendar | -
**.warc** | application/warc | -
**.txt** | text/plain; charset=utf-16le | -
**.txt** | text/plain; charset=utf-16be | -
**.gz** | application/gzip | application/x-gzip, application/x-gunzip, application/gzipped, application/gzip-compressed, application/x-gzip-compressed, gzip/document
**.class** | application/x-java-applet; charset=binary | -
**.swf** | application/x-shockwave-flash | -
Expand Down Expand Up @@ -140,3 +112,33 @@ Extension | MIME type | Aliases
**.accdb** | application/x-msaccess | -
**.zst** | application/zstd | -
**.cab** | application/vnd.ms-cab-compressed | -
**.txt** | text/plain; charset=utf-8 | -
**.html** | text/html; charset=utf-8 | -
**.svg** | image/svg+xml | -
**.xml** | text/xml; charset=utf-8 | -
**.rss** | application/rss+xml | text/rss
**.atom** | application/atom+xml | -
**.x3d** | model/x3d+xml | -
**.kml** | application/vnd.google-earth.kml+xml | -
**.xlf** | application/x-xliff+xml | -
**.dae** | model/vnd.collada+xml | -
**.gml** | application/gml+xml | -
**.gpx** | application/gpx+xml | -
**.tcx** | application/vnd.garmin.tcx+xml | -
**.amf** | application/x-amf | -
**.3mf** | application/vnd.ms-package.3dmanufacturing-3dmodel+xml | -
**.php** | text/x-php; charset=utf-8 | -
**.js** | application/javascript | application/x-javascript, text/javascript
**.lua** | text/x-lua | -
**.pl** | text/x-perl | -
**.py** | application/x-python | -
**.json** | application/json | -
**.geojson** | application/geo+json | -
**.ndjson** | application/x-ndjson | -
**.rtf** | text/rtf | -
**.tcl** | text/x-tcl | application/x-tcl
**.csv** | text/csv | -
**.tsv** | text/tab-separated-values | -
**.vcf** | text/vcard | -
**.ics** | text/calendar | -
**.warc** | application/warc | -
Binary file added testdata/utf16bebom.txt
Binary file not shown.
Binary file added testdata/utf16lebom.txt
Binary file not shown.
File renamed without changes.
17 changes: 10 additions & 7 deletions tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ import "github.com/gabriel-vasile/mimetype/internal/matchers"
// When a matcher passes the check, the children matchers
// are tried in order to find a more accurate mime type.
var root = newMIME("application/octet-stream", "", matchers.True,
sevenZ, zip, pdf, ole, ps, psd, ogg, png, jpg, jp2, jpx, jpm, gif, webp, exe, elf,
ar, tar, xar, bz2, fits, tiff, bmp, ico, mp3, flac, midi, ape, musePack, amr,
wav, aiff, au, mpeg, quickTime, mqv, mp4, webM, threeGP, threeG2, avi, flv,
mkv, asf, aac, voc, aMp4, m4a, txt, gzip, class, swf, crx, woff, woff2, otf,
eot, wasm, shx, dbf, dcm, rar, djvu, mobi, lit, bpg, sqlite3, dwg, nes, macho,
qcp, icns, heic, heicSeq, heif, heifSeq, mrc, mdb, accdb, zstd, cab,
sevenZ, zip, pdf, ole, ps, psd, ogg, png, jpg, jp2, jpx, jpm, gif, webp,
exe, elf, ar, tar, xar, bz2, fits, tiff, bmp, ico, mp3, flac, midi, ape,
musePack, amr, wav, aiff, au, mpeg, quickTime, mqv, mp4, webM, threeGP,
threeG2, avi, flv, mkv, asf, aac, voc, aMp4, m4a, utf16le, utf16be, gzip,
class, swf, crx, woff, woff2, otf, eot, wasm, shx, dbf, dcm, rar, djvu,
mobi, lit, bpg, sqlite3, dwg, nes, macho, qcp, icns, heic, heicSeq, heif,
heifSeq, mrc, mdb, accdb, zstd, cab, utf8,
)

// The list of nodes appended to the root node
Expand Down Expand Up @@ -45,7 +46,9 @@ var (
alias("application/x-ogg")
oggAudio = newMIME("audio/ogg", ".oga", matchers.OggAudio)
oggVideo = newMIME("video/ogg", ".ogv", matchers.OggVideo)
txt = newMIME("text/plain; charset=utf-8", ".txt", matchers.Txt, html, svg, xml, php, js, lua, perl, python, json, ndJson, rtf, tcl, csv, tsv, vCard, iCalendar, warc)
utf16le = newMIME("text/plain; charset=utf-16le", ".txt", matchers.Utf16le)
utf16be = newMIME("text/plain; charset=utf-16be", ".txt", matchers.Utf16be)
utf8 = newMIME("text/plain; charset=utf-8", ".txt", matchers.Utf8, html, svg, xml, php, js, lua, perl, python, json, ndJson, rtf, tcl, csv, tsv, vCard, iCalendar, warc)
xml = newMIME("text/xml; charset=utf-8", ".xml", matchers.Xml, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf)
json = newMIME("application/json", ".json", matchers.Json, geoJson)
csv = newMIME("text/csv", ".csv", matchers.Csv)
Expand Down

0 comments on commit 798d7a4

Please sign in to comment.