Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for SubRip #232

Merged
merged 10 commits into from
Jan 17, 2022
12 changes: 12 additions & 0 deletions internal/magic/magic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,15 @@ func TestDropLastLine(t *testing.T) {
}
}
}

func BenchmarkSrt(b *testing.B) {
const subtitle = `1
00:02:16,612 --> 00:02:19,376
Senator, we're making
our final approach into Coruscant.

`
for i := 0; i < b.N; i++ {
Srt([]byte(subtitle), 0)
}
}
51 changes: 51 additions & 0 deletions internal/magic/text.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package magic
import (
"bufio"
"bytes"
"strings"
"time"

"github.com/gabriel-vasile/mimetype/internal/charset"
"github.com/gabriel-vasile/mimetype/internal/json"
Expand Down Expand Up @@ -297,3 +299,52 @@ func HAR(raw []byte, limit uint32) bool {
func Svg(raw []byte, limit uint32) bool {
return bytes.Contains(raw, []byte("<svg"))
}

// Srt matches a SubRip file.
func Srt(in []byte, _ uint32) bool {
s := bufio.NewScanner(bytes.NewReader(in))
if !s.Scan() {
return false
}
// First line must be 1.
if s.Text() != "1" {
return false
}

if !s.Scan() {
return false
}
secondLine := s.Text()
joksas marked this conversation as resolved.
Show resolved Hide resolved
// Timestamp format (e.g: 00:02:16,612 --> 00:02:19,376) limits secondLine
// length to exactly 29 characters.
if len(secondLine) != 29 {
return false
}
// Decimal separator of fractional seconds in the timestamps must be a
// comma, not a period.
if strings.Contains(secondLine, ".") {
return false
}
// For Go <1.17, comma is not recognised as a decimal separator by `time.Parse`.
secondLine = strings.ReplaceAll(secondLine, ",", ".")
// Second line must be a time range.
ts := strings.Split(secondLine, " --> ")
if len(ts) != 2 {
return false
}
const layout = "15:04:05.000"
t0, err := time.Parse(layout, ts[0])
if err != nil {
return false
}
t1, err := time.Parse(layout, ts[1])
if err != nil {
return false
}
if t0.After(t1) {
return false
}

// A third line must exist and not be empty. This is the actual subtitle text.
return s.Scan() && len(s.Bytes()) != 0
}
88 changes: 47 additions & 41 deletions mimetype_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,47 +171,53 @@ var files = map[string]string{
"shx.shx": "application/octet-stream",
"so.so": "application/x-sharedlib",
"sqlite.sqlite": "application/vnd.sqlite3",
"svg.1.svg": "image/svg+xml",
"svg.svg": "image/svg+xml",
"swf.swf": "application/x-shockwave-flash",
"tar.tar": "application/x-tar",
"tcl.tcl": "text/x-tcl",
"tcx.tcx": "application/vnd.garmin.tcx+xml",
"tiff.tiff": "image/tiff",
"torrent.torrent": "application/x-bittorrent",
"tsv.tsv": "text/tab-separated-values",
"ttf.ttf": "font/ttf",
"tzfile": "application/tzif",
"utf16bebom.txt": "text/plain; charset=utf-16be",
"utf16lebom.txt": "text/plain; charset=utf-16le",
"utf32bebom.txt": "text/plain; charset=utf-32be",
"utf32lebom.txt": "text/plain; charset=utf-32le",
"utf8.txt": "text/plain; charset=utf-8",
"utf8ctrlchars": "application/octet-stream",
"vcf.dos.vcf": "text/vcard",
"vcf.vcf": "text/vcard",
"voc.voc": "audio/x-unknown",
"warc.warc": "application/warc",
"wasm.wasm": "application/wasm",
"wav.wav": "audio/wav",
"webm.webm": "video/webm",
"webp.webp": "image/webp",
"woff.woff": "font/woff",
"woff2.woff2": "font/woff2",
"x3d.x3d": "model/x3d+xml",
"xar.xar": "application/x-xar",
"xcf.xcf": "image/x-xcf",
"xfdf.xfdf": "application/vnd.adobe.xfdf",
"xlf.xlf": "application/x-xliff+xml",
"xls.xls": "application/vnd.ms-excel",
"xlsx.1.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"xlsx.2.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"xlsx.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"xml.xml": "text/xml; charset=utf-8",
"xml.withbr.xml": "text/xml; charset=utf-8",
"xz.xz": "application/x-xz",
"zip.zip": "application/zip",
"zst.zst": "application/zstd",
"srt.srt": "text/x-subrip",
// not.srt.txt uses periods instead of commas for the decimal separators of
// the timestamps.
"not.srt.txt": "text/plain; charset=utf-8",
// not.srt.2.txt does not specify milliseconds.
"not.srt.2.txt": "text/plain; charset=utf-8",
"svg.1.svg": "image/svg+xml",
"svg.svg": "image/svg+xml",
"swf.swf": "application/x-shockwave-flash",
"tar.tar": "application/x-tar",
"tcl.tcl": "text/x-tcl",
"tcx.tcx": "application/vnd.garmin.tcx+xml",
"tiff.tiff": "image/tiff",
"torrent.torrent": "application/x-bittorrent",
"tsv.tsv": "text/tab-separated-values",
"ttf.ttf": "font/ttf",
"tzfile": "application/tzif",
"utf16bebom.txt": "text/plain; charset=utf-16be",
"utf16lebom.txt": "text/plain; charset=utf-16le",
"utf32bebom.txt": "text/plain; charset=utf-32be",
"utf32lebom.txt": "text/plain; charset=utf-32le",
"utf8.txt": "text/plain; charset=utf-8",
"utf8ctrlchars": "application/octet-stream",
"vcf.dos.vcf": "text/vcard",
"vcf.vcf": "text/vcard",
"voc.voc": "audio/x-unknown",
"warc.warc": "application/warc",
"wasm.wasm": "application/wasm",
"wav.wav": "audio/wav",
"webm.webm": "video/webm",
"webp.webp": "image/webp",
"woff.woff": "font/woff",
"woff2.woff2": "font/woff2",
"x3d.x3d": "model/x3d+xml",
"xar.xar": "application/x-xar",
"xcf.xcf": "image/x-xcf",
"xfdf.xfdf": "application/vnd.adobe.xfdf",
"xlf.xlf": "application/x-xliff+xml",
"xls.xls": "application/vnd.ms-excel",
"xlsx.1.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"xlsx.2.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"xlsx.xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"xml.xml": "text/xml; charset=utf-8",
"xml.withbr.xml": "text/xml; charset=utf-8",
"xz.xz": "application/x-xz",
"zip.zip": "application/zip",
"zst.zst": "application/zstd",
}

func TestDetect(t *testing.T) {
Expand Down
3 changes: 2 additions & 1 deletion supported_mimes.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
## 166 Supported MIME types
## 167 Supported MIME types
This file is automatically generated when running tests. Do not edit manually.

Extension | MIME type | Aliases
Expand Down Expand Up @@ -163,6 +163,7 @@ Extension | MIME type | Aliases
**.har** | application/json | -
**.ndjson** | application/x-ndjson | -
**.rtf** | text/rtf | -
**.srt** | text/x-subrip | text/x-srt
**.tcl** | text/x-tcl | application/x-tcl
**.csv** | text/csv | -
**.tsv** | text/tab-separated-values | -
Expand Down
20 changes: 20 additions & 0 deletions testdata/not.srt.2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
1
00:02:16 --> 00:02:19
Senator, we're making
our final approach into Coruscant.

2
00:02:19 --> 00:02:21
Very good, Lieutenant.

3
00:03:13 --> 00:03:15
We made it.

4
00:03:18 --> 00:03:20
I guess I was wrong.

5
00:03:20 --> 00:03:22
There was no danger at all.
20 changes: 20 additions & 0 deletions testdata/not.srt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
1
00:02:16.612 --> 00:02:19.376
Senator, we're making
our final approach into Coruscant.

2
00:02:19.482 --> 00:02:21.609
Very good, Lieutenant.

3
00:03:13.336 --> 00:03:15.167
We made it.

4
00:03:18.608 --> 00:03:20.371
I guess I was wrong.

5
00:03:20.476 --> 00:03:22.671
There was no danger at all.
20 changes: 20 additions & 0 deletions testdata/srt.srt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
1
00:02:16,612 --> 00:02:19,376
Senator, we're making
our final approach into Coruscant.

2
00:02:19,482 --> 00:02:21,609
Very good, Lieutenant.

3
00:03:13,336 --> 00:03:15,167
We made it.

4
00:03:18,608 --> 00:03:20,371
I guess I was wrong.

5
00:03:20,476 --> 00:03:22,671
There was no danger at all.
8 changes: 5 additions & 3 deletions tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ var (
alias("application/x-ogg")
oggAudio = newMIME("audio/ogg", ".oga", magic.OggAudio)
oggVideo = newMIME("video/ogg", ".ogv", magic.OggVideo)
text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, tcl, csv, tsv, vCard, iCalendar, warc)
text = newMIME("text/plain", ".txt", magic.Text, html, svg, xml, php, js, lua, perl, python, json, ndJSON, rtf, srt, tcl, csv, tsv, vCard, iCalendar, warc)
xml = newMIME("text/xml", ".xml", magic.XML, rss, atom, x3d, kml, xliff, collada, gml, gpx, tcx, amf, threemf, xfdf, owl2)
json = newMIME("application/json", ".json", magic.JSON, geoJSON, har)
har = newMIME("application/json", ".har", magic.HAR)
Expand All @@ -87,8 +87,10 @@ var (
html = newMIME("text/html", ".html", magic.HTML)
php = newMIME("text/x-php", ".php", magic.Php)
rtf = newMIME("text/rtf", ".rtf", magic.Rtf)
js = newMIME("application/javascript", ".js", magic.Js).
alias("application/x-javascript", "text/javascript")
srt = newMIME("text/x-subrip", ".srt", magic.Srt).
alias("text/x-srt")
js = newMIME("application/javascript", ".js", magic.Js).
alias("application/x-javascript", "text/javascript")
lua = newMIME("text/x-lua", ".lua", magic.Lua)
perl = newMIME("text/x-perl", ".pl", magic.Perl)
python = newMIME("application/x-python", ".py", magic.Python)
Expand Down