diff --git a/internal/magic/archive.go b/internal/magic/archive.go index 8f893e89..fec11f08 100644 --- a/internal/magic/archive.go +++ b/internal/magic/archive.go @@ -74,13 +74,21 @@ func CRX(raw []byte, limit uint32) bool { } // Tar matches a (t)ape (ar)chive file. -// -// Signature source: https://www.nationalarchives.gov.uk/PRONOM/Format/proFormatSearch.aspx?status=detailReport&id=385&strPageToDisplay=signatures func Tar(raw []byte, _ uint32) bool { + // The "magic" header field for files in in UStar (POSIX IEEE P1003.1) archives + // has the prefix "ustar". The values of the remaining bytes in this field vary + // by archiver implementation. + if len(raw) >= 512 && bytes.HasPrefix(raw[257:], []byte{0x75, 0x73, 0x74, 0x61, 0x72}) { + return true + } + if len(raw) < 256 { return false } + // The older v7 format has no "magic" field, and therefore must be identified + // with heuristics based on legal ranges of values for other header fields: + // https://www.nationalarchives.gov.uk/PRONOM/Format/proFormatSearch.aspx?status=detailReport&id=385&strPageToDisplay=signatures rules := []struct { min, max uint8 i int diff --git a/mimetype_test.go b/mimetype_test.go index 8d6e645a..cfdde995 100644 --- a/mimetype_test.go +++ b/mimetype_test.go @@ -180,12 +180,20 @@ var files = map[string]string{ // the timestamps. "not.srt.txt": "text/plain; charset=utf-8", // not.srt.2.txt does not specify milliseconds. - "not.srt.2.txt": "text/plain; charset=utf-8", - "svg.1.svg": "image/svg+xml", - "svg.svg": "image/svg+xml", - "swf.swf": "application/x-shockwave-flash", - "tar.tar": "application/x-tar", - "tar.v7.tar": "application/x-tar", + "not.srt.2.txt": "text/plain; charset=utf-8", + "svg.1.svg": "image/svg+xml", + "svg.svg": "image/svg+xml", + "swf.swf": "application/x-shockwave-flash", + "tar.tar": "application/x-tar", + "tar.gnu.tar": "application/x-tar", + "tar.oldgnu.tar": "application/x-tar", + "tar.posix.tar": "application/x-tar", + // tar.star.tar was generated with star 1.6. + "tar.star.tar": "application/x-tar", + "tar.ustar.tar": "application/x-tar", + "tar.v7.tar": "application/x-tar", + // tar.v7-gnu.tar is a v7 tar archive generated with GNU tar 1.29. + "tar.v7-gnu.tar": "application/x-tar", "tcl.tcl": "text/x-tcl", "tcx.tcx": "application/vnd.garmin.tcx+xml", "tiff.tiff": "image/tiff", diff --git a/testdata/tar.gnu.tar b/testdata/tar.gnu.tar new file mode 100644 index 00000000..84ee599b Binary files /dev/null and b/testdata/tar.gnu.tar differ diff --git a/testdata/tar.oldgnu.tar b/testdata/tar.oldgnu.tar new file mode 100644 index 00000000..0bd9b86a Binary files /dev/null and b/testdata/tar.oldgnu.tar differ diff --git a/testdata/tar.posix.tar b/testdata/tar.posix.tar new file mode 100644 index 00000000..fad1c10b Binary files /dev/null and b/testdata/tar.posix.tar differ diff --git a/testdata/tar.star.tar b/testdata/tar.star.tar new file mode 100644 index 00000000..3b1e565e Binary files /dev/null and b/testdata/tar.star.tar differ diff --git a/testdata/tar.ustar.tar b/testdata/tar.ustar.tar new file mode 100644 index 00000000..9b023439 Binary files /dev/null and b/testdata/tar.ustar.tar differ diff --git a/testdata/tar.v7-gnu.tar b/testdata/tar.v7-gnu.tar new file mode 100644 index 00000000..41dc50ed Binary files /dev/null and b/testdata/tar.v7-gnu.tar differ