From 98f30913535da704c5b621c410307bcaa273defe Mon Sep 17 00:00:00 2001 From: Chris Novakovic Date: Mon, 18 Jul 2022 10:30:50 +0100 Subject: [PATCH] Detect UStar tar archives (#308) * Detect UStar tar archives UStar tar archives have a `magic` header field at byte offset 257 in each entry whose value begins with the string `ustar`. Identify them with the MIME type `application/x-tar`. Also add test cases for a number of UStar-compatible formats, created by GNU tar 1.29 (with `--format=`): * `tar.gnu.tar` * `tar.oldgnu.tar` * `tar.posix.tar` * `tar.ustar.tar` as well as `tar.star.tar` (created by star 1.6) and, for completeness, `tar.v7-gnu.tar` (a v7 tar archive created by GNU tar 1.29). Fixes #307. --- internal/magic/archive.go | 12 ++++++++++-- mimetype_test.go | 20 ++++++++++++++------ testdata/tar.gnu.tar | Bin 0 -> 10240 bytes testdata/tar.oldgnu.tar | Bin 0 -> 10240 bytes testdata/tar.posix.tar | Bin 0 -> 10240 bytes testdata/tar.star.tar | Bin 0 -> 10240 bytes testdata/tar.ustar.tar | Bin 0 -> 10240 bytes testdata/tar.v7-gnu.tar | Bin 0 -> 10240 bytes 8 files changed, 24 insertions(+), 8 deletions(-) create mode 100644 testdata/tar.gnu.tar create mode 100644 testdata/tar.oldgnu.tar create mode 100644 testdata/tar.posix.tar create mode 100644 testdata/tar.star.tar create mode 100644 testdata/tar.ustar.tar create mode 100644 testdata/tar.v7-gnu.tar diff --git a/internal/magic/archive.go b/internal/magic/archive.go index 8f893e89..fec11f08 100644 --- a/internal/magic/archive.go +++ b/internal/magic/archive.go @@ -74,13 +74,21 @@ func CRX(raw []byte, limit uint32) bool { } // Tar matches a (t)ape (ar)chive file. -// -// Signature source: https://www.nationalarchives.gov.uk/PRONOM/Format/proFormatSearch.aspx?status=detailReport&id=385&strPageToDisplay=signatures func Tar(raw []byte, _ uint32) bool { + // The "magic" header field for files in in UStar (POSIX IEEE P1003.1) archives + // has the prefix "ustar". The values of the remaining bytes in this field vary + // by archiver implementation. + if len(raw) >= 512 && bytes.HasPrefix(raw[257:], []byte{0x75, 0x73, 0x74, 0x61, 0x72}) { + return true + } + if len(raw) < 256 { return false } + // The older v7 format has no "magic" field, and therefore must be identified + // with heuristics based on legal ranges of values for other header fields: + // https://www.nationalarchives.gov.uk/PRONOM/Format/proFormatSearch.aspx?status=detailReport&id=385&strPageToDisplay=signatures rules := []struct { min, max uint8 i int diff --git a/mimetype_test.go b/mimetype_test.go index 8d6e645a..cfdde995 100644 --- a/mimetype_test.go +++ b/mimetype_test.go @@ -180,12 +180,20 @@ var files = map[string]string{ // the timestamps. "not.srt.txt": "text/plain; charset=utf-8", // not.srt.2.txt does not specify milliseconds. - "not.srt.2.txt": "text/plain; charset=utf-8", - "svg.1.svg": "image/svg+xml", - "svg.svg": "image/svg+xml", - "swf.swf": "application/x-shockwave-flash", - "tar.tar": "application/x-tar", - "tar.v7.tar": "application/x-tar", + "not.srt.2.txt": "text/plain; charset=utf-8", + "svg.1.svg": "image/svg+xml", + "svg.svg": "image/svg+xml", + "swf.swf": "application/x-shockwave-flash", + "tar.tar": "application/x-tar", + "tar.gnu.tar": "application/x-tar", + "tar.oldgnu.tar": "application/x-tar", + "tar.posix.tar": "application/x-tar", + // tar.star.tar was generated with star 1.6. + "tar.star.tar": "application/x-tar", + "tar.ustar.tar": "application/x-tar", + "tar.v7.tar": "application/x-tar", + // tar.v7-gnu.tar is a v7 tar archive generated with GNU tar 1.29. + "tar.v7-gnu.tar": "application/x-tar", "tcl.tcl": "text/x-tcl", "tcx.tcx": "application/vnd.garmin.tcx+xml", "tiff.tiff": "image/tiff", diff --git a/testdata/tar.gnu.tar b/testdata/tar.gnu.tar new file mode 100644 index 0000000000000000000000000000000000000000..84ee599bb233c92599785774f5156d2aeb9f15b9 GIT binary patch literal 10240 zcmeIwu};G<6a~=C{Rm`bhAK{CM`A;43|Tr=sDo+TCeS36?FuSXiBIR*O@INx6b5v& zRK0QUdpW6^Kia;FCcP56WS&QFG)1f~%=qbZe@R|MJWpjNL{`WwN_ZlKh*&b|Rzvy0 zI%OD(*7j$G-|qj#t`p37oiP?~8e?6lS{+}r_^z+J_|nHa^_ec(#?|LoZT-1agfq2n zwzZ@0d|qTBQB`HMwG{bosoEM=X?zj)Xse5`>e>^PMji+g| zl+j=BilCwe4eLaqRY}6=V;VtQ;hT07=QCIjs<6;E!iT>LnfVC*Z$bAFU&JrcJ^tU)9dk} zSFs#N`;LaY+pY|A`f~{$)2dTgzpuL16o=-)Sco--wLcyTw&Q7P zCYY4+ii6X+LJX!cO`u7t#1%z||IW2(fbv`b6?DBs zJ=Zxt_iL}4?i`!tp*4l=`c%+-$D~(%+eF{FC@dvJkx;o1g8AxXt-AK^KS%o~+WUcVf77=g`p@QHG5vb}6=M3voAO~QJ*+}=>wOyMpjB)P1*-i=LdlUEJU(UMdqO~ofo(TnSDJcAy}s7$CdHi_0rW)dN#(up!cUNqHUzU#g1 zuB2S{e^WU3{kV#x@Lb|QvY(l8%JlSkycQP!m9BtzC2{qQ|yuqMIOZ?2kID zD_(bM^nq~Sx!H-4jPR2B9H-zvVk+ zOS^`44%m#{lq?on>su8kX|Nnj#jwhW!^*z@W?)jo2B|jLU_dqtPPTSU!J!5&W0=}* z9Os`LCgZQc2?8Jh0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4eaAOHd&00JNY0w4ea QAOHd&00JNY0%r+)0XEisQ~&?~ literal 0 HcmV?d00001 diff --git a/testdata/tar.ustar.tar b/testdata/tar.ustar.tar new file mode 100644 index 0000000000000000000000000000000000000000..9b0234399ee512ff4ba9fca98303df0088ef8077 GIT binary patch literal 10240 zcmeIvu};G<6a~=C{R%5HRCW?O5*uP;$kM4o45o3LKoeJqE2vQa9s4CI3<#E%h3lot zt#kc+qUX=OZKI1W?w884C?D48H{@X{kw+p+R4Qeft2E+5d$o1)UFz0y$SmgNqZ_& zZz|`X#XL_b(Pi10zV}h+_BFlhjB&T8I8Fph+LA3Or%u0XRl7qtzA&$BX}gyx_n*hn zT*Y#VcGid6J6lpNI8Q|Ek2=!vyUMnv*f&q6^K}n2!Z3(G#2t3@PG$XF!*&|5op$@l z5+^4JI5zQ z2LvDh0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bcLPXs;y DqiuFV literal 0 HcmV?d00001 diff --git a/testdata/tar.v7-gnu.tar b/testdata/tar.v7-gnu.tar new file mode 100644 index 0000000000000000000000000000000000000000..41dc50ed8e2f0fd8d0ccc9f8c1d3706e6bd1a971 GIT binary patch literal 10240 zcmeIvF;BxV5C!1O{S{VbK(UiJ5*uP;$kM4o42HN(poy!*2rAV7&iRrQ2F7MV&r8*( z>+|`mxp^I27cF`cIy%duVm#;1kcXpX5y?#HG|$sQr%{p=ipIsFOI^Brd`d#g^SN~W zQhi@5A>wV*556?5jc-N#;OZ{E3i00jr0cfv^%$F7m{U2=)VA5xp1#YQBIU$XRd0uZ zqR>q>gYT{Ncjq`(QmC#HYtFford}F<4A+~t#Y;^t^D?d3Ti$gCr literal 0 HcmV?d00001