diff --git a/internal/magic/magic_test.go b/internal/magic/magic_test.go index d0a5d8a2..04490a68 100644 --- a/internal/magic/magic_test.go +++ b/internal/magic/magic_test.go @@ -112,3 +112,15 @@ func TestDropLastLine(t *testing.T) { } } } + +func BenchmarkSrt(b *testing.B) { + const subtitle = `1 +00:02:16,612 --> 00:02:19,376 +Senator, we're making +our final approach into Coruscant. + +` + for i := 0; i < b.N; i++ { + Srt([]byte(subtitle), 0) + } +} diff --git a/internal/magic/text.go b/internal/magic/text.go index b807dc51..d55c6afe 100644 --- a/internal/magic/text.go +++ b/internal/magic/text.go @@ -3,6 +3,8 @@ package magic import ( "bufio" "bytes" + "strings" + "time" "github.com/gabriel-vasile/mimetype/internal/charset" "github.com/gabriel-vasile/mimetype/internal/json" @@ -297,3 +299,52 @@ func HAR(raw []byte, limit uint32) bool { func Svg(raw []byte, limit uint32) bool { return bytes.Contains(raw, []byte(" 00:02:19,376) limits secondLine + // length to exactly 29 characters. + if len(secondLine) != 29 { + return false + } + // Decimal separator of fractional seconds in the timestamps must be a + // comma, not a period. + if strings.Contains(secondLine, ".") { + return false + } + // For Go <1.17, comma is not recognised as a decimal separator by `time.Parse`. + secondLine = strings.ReplaceAll(secondLine, ",", ".") + // Second line must be a time range. + ts := strings.Split(secondLine, " --> ") + if len(ts) != 2 { + return false + } + const layout = "15:04:05.000" + t0, err := time.Parse(layout, ts[0]) + if err != nil { + return false + } + t1, err := time.Parse(layout, ts[1]) + if err != nil { + return false + } + if t0.After(t1) { + return false + } + + // A third line must exist and not be empty. This is the actual subtitle text. + return s.Scan() && len(s.Bytes()) != 0 +} diff --git a/mimetype_test.go b/mimetype_test.go index cb701602..d2ef7df3 100644 --- a/mimetype_test.go +++ b/mimetype_test.go @@ -171,47 +171,53 @@ var files = map[string]string{ "shx.shx": "application/octet-stream", "so.so": "application/x-sharedlib", "sqlite.sqlite": "application/vnd.sqlite3", - "svg.1.svg": "image/svg+xml", - "svg.svg": "image/svg+xml", - "swf.swf": "application/x-shockwave-flash", - "tar.tar": "application/x-tar", - "tcl.tcl": "text/x-tcl", - "tcx.tcx": "application/vnd.garmin.tcx+xml", - "tiff.tiff": "image/tiff", - "torrent.torrent": "application/x-bittorrent", - "tsv.tsv": "text/tab-separated-values", - "ttf.ttf": "font/ttf", - "tzfile": "application/tzif", - "utf16bebom.txt": "text/plain; charset=utf-16be", - "utf16lebom.txt": "text/plain; charset=utf-16le", - "utf32bebom.txt": "text/plain; charset=utf-32be", - "utf32lebom.txt": "text/plain; charset=utf-32le", - "utf8.txt": "text/plain; charset=utf-8", - "utf8ctrlchars": "application/octet-stream", - "vcf.dos.vcf": "text/vcard", - "vcf.vcf": "text/vcard", - "voc.voc": "audio/x-unknown", - "warc.warc": "application/warc", - "wasm.wasm": "application/wasm", - "wav.wav": "audio/wav", - "webm.webm": "video/webm", - "webp.webp": "image/webp", - "woff.woff": "font/woff", - "woff2.woff2": "font/woff2", - "x3d.x3d": "model/x3d+xml", - "xar.xar": "application/x-xar", - "xcf.xcf": "image/x-xcf", - "xfdf.xfdf": "application/vnd.adobe.xfdf", - "xlf.xlf": "application/x-xliff+xml", - "xls.xls": "application/vnd.ms-excel", - "xlsx.1.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "xlsx.2.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "xlsx.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "xml.xml": "text/xml; charset=utf-8", - "xml.withbr.xml": "text/xml; charset=utf-8", - "xz.xz": "application/x-xz", - "zip.zip": "application/zip", - "zst.zst": "application/zstd", + "srt.srt": "text/x-subrip", + // not.srt.txt uses periods instead of commas for the decimal separators of + // the timestamps. + "not.srt.txt": "text/plain; charset=utf-8", + // not.srt.2.txt does not specify milliseconds. + "not.srt.2.txt": "text/plain; charset=utf-8", + "svg.1.svg": "image/svg+xml", + "svg.svg": "image/svg+xml", + "swf.swf": "application/x-shockwave-flash", + "tar.tar": "application/x-tar", + "tcl.tcl": "text/x-tcl", + "tcx.tcx": "application/vnd.garmin.tcx+xml", + "tiff.tiff": "image/tiff", + "torrent.torrent": "application/x-bittorrent", + "tsv.tsv": "text/tab-separated-values", + "ttf.ttf": "font/ttf", + "tzfile": "application/tzif", + "utf16bebom.txt": "text/plain; charset=utf-16be", + "utf16lebom.txt": "text/plain; charset=utf-16le", + "utf32bebom.txt": "text/plain; charset=utf-32be", + "utf32lebom.txt": "text/plain; charset=utf-32le", + "utf8.txt": "text/plain; charset=utf-8", + "utf8ctrlchars": "application/octet-stream", + "vcf.dos.vcf": "text/vcard", + "vcf.vcf": "text/vcard", + "voc.voc": "audio/x-unknown", + "warc.warc": "application/warc", + "wasm.wasm": "application/wasm", + "wav.wav": "audio/wav", + "webm.webm": "video/webm", + "webp.webp": "image/webp", + "woff.woff": "font/woff", + "woff2.woff2": "font/woff2", + "x3d.x3d": "model/x3d+xml", + "xar.xar": "application/x-xar", + "xcf.xcf": "image/x-xcf", + "xfdf.xfdf": "application/vnd.adobe.xfdf", + "xlf.xlf": "application/x-xliff+xml", + "xls.xls": "application/vnd.ms-excel", + "xlsx.1.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "xlsx.2.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "xlsx.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "xml.xml": "text/xml; charset=utf-8", + "xml.withbr.xml": "text/xml; charset=utf-8", + "xz.xz": "application/x-xz", + "zip.zip": "application/zip", + "zst.zst": "application/zstd", } func TestDetect(t *testing.T) { diff --git a/supported_mimes.md b/supported_mimes.md index 55ccee60..b966a4fd 100644 --- a/supported_mimes.md +++ b/supported_mimes.md @@ -1,4 +1,4 @@ -## 166 Supported MIME types +## 167 Supported MIME types This file is automatically generated when running tests. Do not edit manually. Extension | MIME type | Aliases @@ -163,6 +163,7 @@ Extension | MIME type | Aliases **.har** | application/json | - **.ndjson** | application/x-ndjson | - **.rtf** | text/rtf | - +**.srt** | text/x-subrip | text/x-srt **.tcl** | text/x-tcl | application/x-tcl **.csv** | text/csv | - **.tsv** | text/tab-separated-values | - diff --git a/testdata/not.srt.2.txt b/testdata/not.srt.2.txt new file mode 100644 index 00000000..78bebbc3 --- /dev/null +++ b/testdata/not.srt.2.txt @@ -0,0 +1,20 @@ +1 +00:02:16 --> 00:02:19 +Senator, we're making +our final approach into Coruscant. + +2 +00:02:19 --> 00:02:21 +Very good, Lieutenant. + +3 +00:03:13 --> 00:03:15 +We made it. + +4 +00:03:18 --> 00:03:20 +I guess I was wrong. + +5 +00:03:20 --> 00:03:22 +There was no danger at all. diff --git a/testdata/not.srt.txt b/testdata/not.srt.txt new file mode 100644 index 00000000..338c9ee5 --- /dev/null +++ b/testdata/not.srt.txt @@ -0,0 +1,20 @@ +1 +00:02:16.612 --> 00:02:19.376 +Senator, we're making +our final approach into Coruscant. + +2 +00:02:19.482 --> 00:02:21.609 +Very good, Lieutenant. + +3 +00:03:13.336 --> 00:03:15.167 +We made it. + +4 +00:03:18.608 --> 00:03:20.371 +I guess I was wrong. + +5 +00:03:20.476 --> 00:03:22.671 +There was no danger at all. diff --git a/testdata/srt.srt b/testdata/srt.srt new file mode 100644 index 00000000..e16f3443 --- /dev/null +++ b/testdata/srt.srt @@ -0,0 +1,20 @@ +1 +00:02:16,612 --> 00:02:19,376 +Senator, we're making +our final approach into Coruscant. + +2 +00:02:19,482 --> 00:02:21,609 +Very good, Lieutenant. + +3 +00:03:13,336 --> 00:03:15,167 +We made it. + +4 +00:03:18,608 --> 00:03:20,371 +I guess I was wrong. + +5 +00:03:20,476 --> 00:03:22,671 +There was no danger at all. diff --git a/tree.go b/tree.go index dfd32271..6f8578e1 100644 --- a/tree.go +++ b/tree.go @@ -76,7 +76,7 @@ var ( alias("application/x-ogg") oggAudio = newMIME("audio/ogg", ".oga", magic.OggAudio) oggVideo = newMIME("video/ogg", ".ogv", magic.OggVideo) - text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, tcl, csv, tsv, vCard, iCalendar, warc) + text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc) xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2) json = newMIME("application/json", ".json", magic.JSON, geoJSON, har) har = newMIME("application/json", ".har", magic.HAR) @@ -87,8 +87,10 @@ var ( html = newMIME("text/html", ".html", magic.HTML) php = newMIME("text/x-php", ".php", magic.Php) rtf = newMIME("text/rtf", ".rtf", magic.Rtf) - js = newMIME("application/javascript", ".js", magic.Js). - alias("application/x-javascript", "text/javascript") + srt = newMIME("text/x-subrip", ".srt", magic.Srt). + alias("text/x-srt") + js = newMIME("application/javascript", ".js", magic.Js). + alias("application/x-javascript", "text/javascript") lua = newMIME("text/x-lua", ".lua", magic.Lua) perl = newMIME("text/x-perl", ".pl", magic.Perl) python = newMIME("application/x-python", ".py", magic.Python)