From e67508e3849506a102e77d0eb815f75e16ff366c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Sun, 29 Oct 2023 21:00:54 +0100 Subject: [PATCH 01/16] Remove zip dependency --- CHANGELOG.md | 6 + Cargo.toml | 5 +- README.md | 2 +- fixtures/application/sample.mxl | Bin 201 -> 148 bytes fixtures/book/sample.epub | Bin 216 -> 134 bytes fixtures/database/sample.odb | Bin 210 -> 153 bytes fixtures/document/sample.idml | Bin 240 -> 157 bytes fixtures/document/sample.odf | Bin 239 -> 156 bytes fixtures/document/sample.odg | Bin 237 -> 157 bytes fixtures/document/sample.odm | Bin 242 -> 160 bytes fixtures/document/sample.odp | Bin 238 -> 161 bytes fixtures/document/sample.ods | Bin 238 -> 160 bytes fixtures/document/sample.odt | Bin 235 -> 153 bytes fixtures/document/sample.otf | Bin 246 -> 165 bytes fixtures/document/sample.otg | Bin 246 -> 166 bytes fixtures/document/sample.otm | Bin 248 -> 169 bytes fixtures/document/sample.otp | Bin 248 -> 170 bytes fixtures/document/sample.ots | Bin 247 -> 169 bytes fixtures/document/sample.ott | Bin 243 -> 162 bytes fixtures/document/sample.sgw | Bin 234 -> 151 bytes fixtures/document/sample.stc | Bin 234 -> 151 bytes fixtures/document/sample.std | Bin 234 -> 151 bytes fixtures/document/sample.sti | Bin 237 -> 154 bytes fixtures/document/sample.stw | Bin 236 -> 153 bytes fixtures/document/sample.sxc | Bin 225 -> 142 bytes fixtures/document/sample.sxd | Bin 195 -> 142 bytes fixtures/document/sample.sxi | Bin 228 -> 145 bytes fixtures/document/sample.sxm | Bin 225 -> 142 bytes fixtures/document/sample.sxw | Bin 227 -> 144 bytes fixtures/image/sample.ora | Bin 212 -> 130 bytes src/readers.rs | 265 +++++++++++++++++++++----------- src/signatures.rs | 1 + 32 files changed, 188 insertions(+), 91 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b6a305..dd85379 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +# Version 0.22.0 (unreleased) + +## Improvements + +- Remove `zip` dependency when using `reader-zip` feature + # Version 0.21.0 (2023-09-29) ## API diff --git a/Cargo.toml b/Cargo.toml index f7ad149..a19c76c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "file-format" -version = "0.21.0" +version = "0.22.0" authors = ["Mickaël Malécot "] edition = "2021" description = "Crate for determining the file format of a given file or stream." @@ -17,7 +17,6 @@ rust-version = "1.60.0" [dependencies] cfb = { version = "0.8", optional = true } serde = { version = "1.0", optional = true, features = ["derive"], default-features = false } -zip = { version = "0.6", optional = true, features = ["deflate"], default-features = false } [features] ## Ecosystem features @@ -45,4 +44,4 @@ reader-pdf = [] reader-rm = [] reader-txt = [] reader-xml = [] -reader-zip = ["dep:zip"] +reader-zip = [] diff --git a/README.md b/README.md index 746c05f..26b6bf1 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ Add this to your `Cargo.toml`: ```toml [dependencies] -file-format = "0.21" +file-format = "0.22" ``` ## Supported file formats diff --git a/fixtures/application/sample.mxl b/fixtures/application/sample.mxl index d1783287674161531c03243d97e76053b8ebf824..dfdcbc230941480c7885f8d95cf5117c97e1ca13 100644 GIT binary patch literal 148 zcmWIWW@Zs#fB`)x(IzDz2ZT9*I5#slwWP8jHL;)|Co?&*Br`uxzbr3BuP8M+zbGZK rC{-`Fv^X=lA~z?%n~_O`0k=so8z3~y+yHM@HjoG-5SjsL53n=0FaQ8*DJ-f0 diff --git a/fixtures/book/sample.epub b/fixtures/book/sample.epub index 0d3e818b167d4e532f4a7607588abc3250fe39e5..d3d7ac8600078e86f74d33c8683553dbadbb3f5a 100644 GIT binary patch literal 134 zcmWIWW@Zs#fPwtP)jA*^2y*~&Zf0(3No7H5VnIPpW^!UlW`3T2YC&m|c2#CUfHxzP f2v{XVH@aSkD2#-e6yVLu1`=ZgLNg$31y%q6PTUm) literal 216 zcmWIWW@Zs#-~d9-5}8m2C=dbC91IE!xtY1CC6xuKp&`5s>_G|K$uL}6!Og(P@`9Ox z0Ze%7Xq-6dGr~4xIgo8I au%r>hBG&EzZ&o&tdPX1&1JXGl4g&yB0VxXr diff --git a/fixtures/database/sample.odb b/fixtures/database/sample.odb index 7020c0628158a6f0bb4fac3fe5811435e6463ec4..e59b76a72681d887d71b8e3d8cc1e91e38851a4c 100644 GIT binary patch literal 153 zcmWIWW@Zs#fPo+LdB3XzIUvjd#JQQdsU?*Ksfh&zIho0cC7Jno`ek`3dijaPnZmVgA7A%W^QUpWkG6a2qy#c{YO42AY59(&A`a= zf|-E6hiD=;bFCXBO+_7o_H;gFPln;r3T_5QmKV$n z3}C`rN8`jvA3vRwXU<>NKka?d+vlXe*VWT!yghZ#`JBIe*5~Bei@F{@=lp#9pE5B7 zc(Zfdo%F&^4`>U>rT}k7CJ|bWdo^a1VWI$6T7FS(X-;B*HzSh>18x&xHbH2Z`2pUnY#2@Q z**SJuzgVLSv;<^PfHxzP2s6SF$Z{Y@z`&A55Q|tR1$eWvfz&esA;@3NAPxfnS8Y6H diff --git a/fixtures/document/sample.odg b/fixtures/document/sample.odg index 4a293303a496d5afc9ddf49499256ca95df68468..3f6ba2cd726afccf06bbe9145c7c3370f89b41c9 100644 GIT binary patch literal 157 zcmWIWW@Zs#fPwkUdPlT@91!LJ;@r&K)RM}A)Wm{1?W(EL%cQbha diff --git a/fixtures/document/sample.odm b/fixtures/document/sample.odm index 3e83018ecd3d5bb3073ed4c88a02fe5a8cf61f10..1008e499ad338ca8f95a3ffe51767ad1a19609da 100644 GIT binary patch literal 160 zcmWIWW@Zs#fB~<43%m4y91!LJ;@r&K)RM}A)Wm{5&2Hd8?Y=h7+I|95}*+3$UKxhV}L%`Ai D>8v31 literal 242 zcmWIWW@Zs#-~hs%(R0HXpghBGy#_-mGjO^^8CW@>&~+!vFvv CvOS3a diff --git a/fixtures/document/sample.odp b/fixtures/document/sample.odp index 5e7eaa18f93d06ff9b47996c33e17f63dc6f513c..611280e77018d41b26fa2d568e019af717266ac0 100644 GIT binary patch literal 161 zcmWIWW@Zs#fB|E*H7oRi91!LJ;@r&K)RM}A)Wm{LB1b}7M>A;6oR y!&q(23LT&wAbSG58JR?w5pF=11GxbPmNbG`#JVWJo0Scuo)HMcfOHdx!vFx@@-;^Q diff --git a/fixtures/document/sample.ods b/fixtures/document/sample.ods index 109a05f0e3ad5411d5f93666afa39129b5fe1e62..8ebcbb44492cf7d118ea8b221f464894b15f773d 100644 GIT binary patch literal 160 zcmWIWW@Zs#fPvN=%Pu`22ZT9*I5#slwWP8jHL;)|Co?&*Br`uxzbr3BFF&z3vsf>` zAT=)~Ke;qFHLpakxS%LCF{L;oHMJzbn~_O`0k^3z+aNT|jsR~~HjoG-5Sjt$5U?}= Dj$|MZ literal 238 zcmWIWW@Zs#-~htar81!mP@o5-IT#ceax-&NODYReLqm8O*prgElOur`M3+`@GcdBe zU}j(d6RZatIT;i9Lp{ppdBE40=yZSM3@n7K$Zi!0S1;df>^}5D8QSQ4WynC2*ZGM6NtkA0E{0s AOaK4? diff --git a/fixtures/document/sample.odt b/fixtures/document/sample.odt index df2e6446f2786ac531631f2172357973cdcf04a5..64c250792a5335842782cc1255500feb216cd0e4 100644 GIT binary patch literal 153 zcmWIWW@Zs#fPuJUMm*|34hVAqac*XAYDr~5YGOe_PG)jqNoIbYepz0MUVdV6X0cv= wL26z~esXDUYF>$6Noqw&fHxzP2m@~8VAeornBf85tZX0=Mj$i;(!OA60L+IRb^rhX literal 235 zcmWIWW@Zs#-~htir81!mP@oQ^IT#ceax-&NODYReLqm8O*qf8NlVP~Df}4Sn$6T7FS(X-=YUNosCEPGU)FfHxzP2m@}TVOBzDm_-5JtZX0=Mj$i; I(ve_k0G(7J{r~^~ literal 246 zcmWIWW@Zs#-~hsg=(%AGP+$V2IT#ceax-&NODYReLqm8O*cBi9q`+`#1vdjD%L`@( z1~9>T(2=jffP>}0?s~!dTQUt58K-Tz^4+>wYkQ;Fv(HaOl5UGld-lX);muD)M+@() z4eY!2nmxdqoug<1&jyfPAROS$$Rxsya0{{=$Sp9iq!Gj-)@1?StZX3lj6ewTUKfbN F0010+KrR3P diff --git a/fixtures/document/sample.otg b/fixtures/document/sample.otg index d072f8deeed9d3cbab0195a155cd9e3cede706dc..1a1642fc4e0bde60674b45a768512dbd812bd866 100644 GIT binary patch literal 166 zcmWIWW@Zs#fPwp}YhRiGIUvjd#JQQdsU?*Ksfh&zIho0cC7Jno`ek`3dijaPnZPh4 J3`j?Tr2&{%Bs>5B literal 246 zcmWIWW@Zs#-~hs|=(%AGP+$tAIT#ceax-&NODYReLqm8O*i9e%q`+`#1vdjD%L`@( z1~9>T(2=jffP>}0?s~!dTNb)_M%@tk^7mZBoOK!JckGjxq@nK_)pMmha?2g*nwZx) z(Q`7a>jJ#lIXhBGzRA-mGjO^^8CW@?IB+ G!vFxMkv}5< diff --git a/fixtures/document/sample.otm b/fixtures/document/sample.otm index 3cfd2b20ec3184244d599391538238f77669fe0e..86570e2d014139643c0d17dc1f98811cd98e2c08 100644 GIT binary patch literal 169 zcmWIWW@Zs#fPovrhc=o6IUvjd#JQQdsU?*Ksfh&zIho0cC7Jno`ek`3dijaPnZ$6K~ZWkkOr&KElJHS$Vn_o4e(}U5@En?KFnSS4YMu4o0SbD!U%+B JKspXA4FHF)BY6M- literal 248 zcmWIWW@Zs#-~htO(R0HXpuiGHb1*0{PoH*&@r*rbm`OEsJy)T~j*Ey$i?X&W0%TCpmF$@9T>>OopBbdy9wt;L6@MdHZVMe$HSq|hH7+BH>ViD`M0B=?{ka|WS L1bMIr#9;sc3$H%` diff --git a/fixtures/document/sample.ots b/fixtures/document/sample.ots index 96eef0fdc889772db37fac452072bd8f63378a5e..1db1e3adbba32220eeaa92b1d0ad0a8a9291b96f 100644 GIT binary patch literal 169 zcmWIWW@Zs#fPw01i~Y=j91!LJ;@r&K)RM}A)Wm{onSFv|kGS=m4$ Mj6i4xq+`L-0NAu7VgLXD literal 247 zcmWIWW@Zs#-~hsn(R0HXpuhr1b1*0{Y#{ZFKnU_* IH;BUk089Tw0{{R3 diff --git a/fixtures/document/sample.ott b/fixtures/document/sample.ott index da669110094adf5a5f1db45255af94f0e0516541..9a54bd690b1a8324709bf5461cf35c0545c35ce9 100644 GIT binary patch literal 162 zcmWIWW@Zs#fPp81lQa#091!LJ;@r&K)RM}A)Wm{T(2Lh diff --git a/fixtures/document/sample.sgw b/fixtures/document/sample.sgw index 0da329390e85826d96a7f8f4c876029236a80c3c..0a026ccc2a25653306dd937e8a15ad886833816d 100644 GIT binary patch literal 151 zcmWIWW@Zs#fPtDlzROgB91!LJ;@r&K)RM}A)Wm{NKka={Tkq`U)hEtg)YdbXH!B-RJtGi;yi^C`FaQ7$7CkKh diff --git a/fixtures/document/sample.stc b/fixtures/document/sample.stc index f11915bc5e42cd7a806d12de20166ae186c6e7a4..0954799ce3cbd8481b9486dcfd24a0701694f099 100644 GIT binary patch literal 151 zcmWIWW@Zs#fPu@mp3YVUazK~^h;uV@Q%fofQWFabax#+>OEUBG^vm*6^omRK^eS?5 v^pX>ElJ!bba|?13OHu>88JR>Fa2p1*1VY1%4)A7W1BoyKp&5|&21^3~n@k?j literal 234 zcmWIWW@Zs#-~hr20WZTCpg;{sb1*0{NKka={Tkq`U)hEtg^!7XBc}?5%tnLX<9nYss3<2Kk9O? uUP@77xn4NKka={Tkq`U)hEtg^w#v!xvuSbR`-Obj^|S*h5&DNj*x9) t2Q`4^gX{?KW@Hj!M%a%m2eKaqwlsoR#JVQHo0Scuo)HK^UaA9e7ywdfIZOZm diff --git a/fixtures/document/sample.sti b/fixtures/document/sample.sti index 716aa1c510889a31078192249ccaed6c3043e479..55d38a2ac1d353a523949870205bd73ae2ac5bc8 100644 GIT binary patch literal 154 zcmWIWW@Zs#fPuWOQ`Q xUS@7VQEG9qUP)?hK~7>xYJfK*lL!ND^I-NsXqf2%-mGjO5k??11JZtAX#i?j9q<4E literal 237 zcmWIWW@Zs#-~hsd0WZTCpgNKka={Tkq`U)hEtgJbgx2!%J81nzrXz-4mWVo==$=0=(Hd w+!~(E&<0uovL?Wrkx7IZ;RIwkkP~2FOCyLytb+o)S=m798G#VwE3jq;0FtylBme*a diff --git a/fixtures/document/sample.stw b/fixtures/document/sample.stw index c1116779961d350a69e2b895b13333ad3d3a00de..da91e67a4bcb973e366d25787317728449538ed2 100644 GIT binary patch literal 153 zcmWIWW@Zs#fPrnCKli8uIUvjd#JQQdsU?*Ksfh&zIho0cC7Jno`ek`3dc~!AdKI}j wdgVo#C8NKka={Tkq`U)hEtg)YdNKka={Tkq`U)hEtg^!7XB`HYDnz?+@JV8K@td7!Z%qXWDd knM9Zo)*{P+tc8IsjUX1WjtKB(Wdo^a1VWHcNPh43`jeHr2%Yv8XEur literal 195 zcmWIWW@h1H0D(^dFT=tQ@IR6TvO$=GL53kWGdH!QvLH1ygp+~!c-E>E5H79YW?*D_ z!OXw_CK3w@ax#+>OEUBG^vm*6^omRK^eS?5^iqlv%eex)8JXmmaakY%vX_Aoh_^I? cSP1J_A=aT;72wUv22#fegnmFe9>ie)0BRR4WB>pF diff --git a/fixtures/document/sample.sxi b/fixtures/document/sample.sxi index 341d4f91b836d6dcd6cb1357b374d61b9aa3c565..fee1bd95cfd0a240d41e587aecffbc3856abb421 100644 GIT binary patch literal 145 zcmWIWW@Zs#fPv2sZb!%iIUvjd#JQQdsU?*Ksfh&zIho0cC7Jno`ek`3dc~!AdKI}j odYQQeMXANb0p5&EA`G~Vff)~>VTJ~Hv$BCi7=h3XNV|fi0b64nO8@`> literal 228 zcmWIWW@Zs#-~hrM0WZTCpg;jgb1*0{NKka={Tkq`U)hEtgJbgx2!%J81852W*H#^7GI%_c{pvfT9 n1H2iTM3@maBg=tohJh`OAQrLi2=HcQ1F2^OLXc-FKpX}DxZXE^ diff --git a/fixtures/document/sample.sxm b/fixtures/document/sample.sxm index 957ea0e964d6939a28a0812b8277f1dc7e1a608f..071cbda024a12c5c7deae613c588c230d3e9b568 100644 GIT binary patch literal 142 zcmWIWW@Zs#fPv**tcfx}4hVAqac*XAYDr~5YGOe_PG)jqNoIbYepz0MUU6xjUPW$> lUT$JZMu0aXlL!NDGhk*zXqbrs-mGjO5k??11JX`lX#lM-7zF?T literal 225 zcmWIWW@Zs#-~hst0WZTCpgNKka={Tkq`U)hEtgJngG<@&OY=fHylwkSR-*JkVH>(E;9! kOd`w(YmwzZ*22J+Mi7fwM+A7YvVqhy0wKsJr63Li0O_bUbpQYW diff --git a/fixtures/document/sample.sxw b/fixtures/document/sample.sxw index 8a05e8260f838f6b995d36400883703fe3043bb8..af8e0d7eaa2878cc002a7601103ee1235601b3a1 100644 GIT binary patch literal 144 zcmWIWW@Zs#fPsfg4K3w>91!LJ;@r&K)RM}A)Wm{5&2Hd8=Ooz}gGXuO?*+3$UKxhV}UBJ=+UCJw)#YU`iU@zj09#1P=k&T-@Ez diff --git a/fixtures/image/sample.ora b/fixtures/image/sample.ora index 53c7e228a0e69299682042a863a2a405c085c76a..92a81cc819ed8bea36c7c7389205b031bc256c90 100644 GIT binary patch literal 130 zcmWIWW@Zs#fPv$)KJ*9xIUvjd#JQQdsU?*KshPQn>8bkp1*v&OiNz(UMFHN7Od<@p a^})=8&@fX1yjj^mB8)(22BbllfdK$?!4>2H literal 212 zcmWIWW@Zs#-~dAZ<&B{XP#^%LIT#ceax-&NODYReLqm8O*wfiNlVP~Df}4SnUn*Sv)W!ddY-yW3<2Kk9LHyU=n(>%1Trnan~_O`8DSH$9LOdZSkee$ W5o>RNH!B-RJtGi?0qG17hXDW+kt>k^ diff --git a/src/readers.rs b/src/readers.rs index 63d5a00..af55b00 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -585,23 +585,90 @@ impl crate::FileFormat { /// Determines file format from a ZIP reader. #[cfg(feature = "reader-zip")] pub(crate) fn from_zip_reader(reader: &mut BufReader) -> Result { - // Constants for limits. + // Constants. + const CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE: &[u8] = b"\x50\x4B\x01\x02"; + const END_OF_CENTRAL_DIRECTORY_SIGNATURE: &[u8] = b"\x50\x4B\x05\x06"; const FILE_LIMIT: usize = 4096; - const READ_LIMIT: u64 = 64; // Rewinds to the beginning of the stream. + reader.rewind()?; + + // Gets the stream length. + let length = reader.seek(SeekFrom::End(0))?; reader.rewind()?; - // Opens the archive. - let mut archive = zip::ZipArchive::new(reader)?; + // Searches for the end of central directory. + let mut buffer = [0; 4]; + let mut position = length.saturating_sub(22); + while position >= length.saturating_sub(22 + u16::MAX as u64) + && &buffer != END_OF_CENTRAL_DIRECTORY_SIGNATURE + { + reader.seek(SeekFrom::Start(position))?; + reader.read_exact(&mut buffer)?; + position = match position.checked_sub(1) { + Some(position) => position, + None => break, + } + } + + // Reads the start of central directory offset. + reader.seek(SeekFrom::Current(12))?; + let mut buffer = [0; 4]; + reader.read_exact(&mut buffer)?; + let offset = u32::from_le_bytes(buffer); + + // Seeks to the start of central directory. + reader.seek(SeekFrom::Start(offset as u64))?; // Sets the default variant. let mut format = Self::Zip; - // Browses archive files. - for index in 0..std::cmp::min(archive.len(), FILE_LIMIT) { - let file = archive.by_index(index)?; - match file.name() { + // Browses central directory file headers. + let mut buffer = [0; 4]; + let mut file_count = 0; + while file_count < FILE_LIMIT + && reader.read_exact(&mut buffer).is_ok() + && &buffer == CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE + { + // Reads compressed size. + reader.seek(SeekFrom::Current(16))?; + let mut buffer = [0; 4]; + reader.read_exact(&mut buffer)?; + let compressed_size = u32::from_le_bytes(buffer); + + // Reads uncompressed size. + let mut buffer = [0; 4]; + reader.read_exact(&mut buffer)?; + let uncompressed_size = u32::from_le_bytes(buffer); + + // Reads filename length. + let mut buffer = [0; 2]; + reader.read_exact(&mut buffer)?; + let filename_length = u16::from_le_bytes(buffer); + + // Reads extra field length. + let mut buffer = [0; 2]; + reader.read_exact(&mut buffer)?; + let extra_field_length = u16::from_le_bytes(buffer); + + // Reads file comment length. + let mut buffer = [0; 2]; + reader.read_exact(&mut buffer)?; + let file_comment_length = u16::from_le_bytes(buffer); + + // Reads relative offset of local file header. + reader.seek(SeekFrom::Current(8))?; + let mut buffer = [0; 4]; + reader.read_exact(&mut buffer)?; + let offset = u32::from_le_bytes(buffer); + + // Reads filename. + let mut buffer = vec![0; filename_length as usize]; + reader.read_exact(&mut buffer)?; + let filename = String::from_utf8_lossy(&buffer).to_string(); + + // Checks filename. + match filename.as_str() { "AndroidManifest.xml" => return Ok(Self::AndroidPackage), "AppManifest.xaml" => return Ok(Self::Xap), "AppxManifest.xml" => return Ok(Self::WindowsAppPackage), @@ -612,103 +679,127 @@ impl crate::FileFormat { "WEB-INF/web.xml" => return Ok(Self::WebApplicationArchive), "doc.kml" => return Ok(Self::KeyholeMarkupLanguageZipped), "extension.vsixmanifest" => return Ok(Self::MicrosoftVisualStudioExtension), - "mimetype" => match read_to_string(file.take(READ_LIMIT))?.trim() { - "application/epub+zip" => return Ok(Self::ElectronicPublication), - "application/vnd.adobe.indesign-idml-package" => { - return Ok(Self::IndesignMarkupLanguage) - } - "application/vnd.oasis.opendocument.base" - | "application/vnd.oasis.opendocument.database" => { - return Ok(Self::OpendocumentDatabase) - } - "application/vnd.oasis.opendocument.formula" => { - return Ok(Self::OpendocumentFormula) - } - "application/vnd.oasis.opendocument.formula-template" => { - return Ok(Self::OpendocumentFormulaTemplate) - } - "application/vnd.oasis.opendocument.graphics" => { - return Ok(Self::OpendocumentGraphics) - } - "application/vnd.oasis.opendocument.graphics-template" => { - return Ok(Self::OpendocumentGraphicsTemplate) - } - "application/vnd.oasis.opendocument.presentation" => { - return Ok(Self::OpendocumentPresentation); - } - "application/vnd.oasis.opendocument.presentation-template" => { - return Ok(Self::OpendocumentPresentationTemplate); - } - "application/vnd.oasis.opendocument.spreadsheet" => { - return Ok(Self::OpendocumentSpreadsheet); - } - "application/vnd.oasis.opendocument.spreadsheet-template" => { - return Ok(Self::OpendocumentSpreadsheetTemplate); - } - "application/vnd.oasis.opendocument.text" => { - return Ok(Self::OpendocumentText); - } - "application/vnd.oasis.opendocument.text-master" => { - return Ok(Self::OpendocumentTextMaster); - } - "application/vnd.oasis.opendocument.text-master-template" => { - return Ok(Self::OpendocumentTextMasterTemplate); - } - "application/vnd.oasis.opendocument.text-template" => { - return Ok(Self::OpendocumentTextTemplate); - } - "application/vnd.recordare.musicxml" => return Ok(Self::MusicxmlZipped), - "application/vnd.sun.xml.calc" => return Ok(Self::SunXmlCalc), - "application/vnd.sun.xml.calc.template" => return Ok(Self::SunXmlCalcTemplate), - "application/vnd.sun.xml.draw" => return Ok(Self::SunXmlDraw), - "application/vnd.sun.xml.draw.template" => return Ok(Self::SunXmlDrawTemplate), - "application/vnd.sun.xml.impress" => return Ok(Self::SunXmlImpress), - "application/vnd.sun.xml.impress.template" => { - return Ok(Self::SunXmlImpressTemplate) - } - "application/vnd.sun.xml.math" => return Ok(Self::SunXmlMath), - "application/vnd.sun.xml.writer" => return Ok(Self::SunXmlWriter), - "application/vnd.sun.xml.writer.global" => return Ok(Self::SunXmlWriterGlobal), - "application/vnd.sun.xml.writer.template" => { - return Ok(Self::SunXmlWriterTemplate) - } - "image/openraster" => return Ok(Self::Openraster), - _ => {} - }, + "mimetype" if compressed_size == uncompressed_size => { + // Seeks to the filename of the local file header. + reader.seek(SeekFrom::Start(offset as u64 + 26))?; + + // Reads filename length. + let mut buffer = [0; 2]; + reader.read_exact(&mut buffer)?; + let filename_length = u16::from_le_bytes(buffer); + + // Reads extra field length. + let mut buffer = [0; 2]; + reader.read_exact(&mut buffer)?; + let extra_field_length = u16::from_le_bytes(buffer); + + // Seeks to the data. + reader.seek(SeekFrom::Current( + filename_length as i64 + extra_field_length as i64, + ))?; + + // Reads the data. + let mut buffer = vec![0; compressed_size as usize]; + reader.read_exact(&mut buffer)?; + let data = String::from_utf8_lossy(&buffer).to_string(); + + // Checks the trimmed data. + return Ok(match data.trim() { + "application/epub+zip" => Self::ElectronicPublication, + "application/vnd.adobe.indesign-idml-package" => { + Self::IndesignMarkupLanguage + } + "application/vnd.oasis.opendocument.base" + | "application/vnd.oasis.opendocument.database" => { + Self::OpendocumentDatabase + } + "application/vnd.oasis.opendocument.formula" => Self::OpendocumentFormula, + "application/vnd.oasis.opendocument.formula-template" => { + Self::OpendocumentFormulaTemplate + } + "application/vnd.oasis.opendocument.graphics" => Self::OpendocumentGraphics, + "application/vnd.oasis.opendocument.graphics-template" => { + Self::OpendocumentGraphicsTemplate + } + "application/vnd.oasis.opendocument.presentation" => { + Self::OpendocumentPresentation + } + "application/vnd.oasis.opendocument.presentation-template" => { + Self::OpendocumentPresentationTemplate + } + "application/vnd.oasis.opendocument.spreadsheet" => { + Self::OpendocumentSpreadsheet + } + "application/vnd.oasis.opendocument.spreadsheet-template" => { + Self::OpendocumentSpreadsheetTemplate + } + "application/vnd.oasis.opendocument.text" => Self::OpendocumentText, + "application/vnd.oasis.opendocument.text-master" => { + Self::OpendocumentTextMaster + } + "application/vnd.oasis.opendocument.text-master-template" => { + Self::OpendocumentTextMasterTemplate + } + "application/vnd.oasis.opendocument.text-template" => { + Self::OpendocumentTextTemplate + } + "application/vnd.recordare.musicxml" => Self::MusicxmlZipped, + "application/vnd.sun.xml.calc" => Self::SunXmlCalc, + "application/vnd.sun.xml.calc.template" => Self::SunXmlCalcTemplate, + "application/vnd.sun.xml.draw" => Self::SunXmlDraw, + "application/vnd.sun.xml.draw.template" => Self::SunXmlDrawTemplate, + "application/vnd.sun.xml.impress" => Self::SunXmlImpress, + "application/vnd.sun.xml.impress.template" => Self::SunXmlImpressTemplate, + "application/vnd.sun.xml.math" => Self::SunXmlMath, + "application/vnd.sun.xml.writer" => Self::SunXmlWriter, + "application/vnd.sun.xml.writer.global" => Self::SunXmlWriterGlobal, + "application/vnd.sun.xml.writer.template" => Self::SunXmlWriterTemplate, + "image/openraster" => Self::Openraster, + _ => Self::Zip, + }); + } _ => { - if file.name().starts_with("Fusion[Active]/") { + if filename.starts_with("Fusion[Active]/") { return Ok(Self::Autodesk123d); - } else if file.name().starts_with("circuitdiagram/") { + } else if filename.starts_with("circuitdiagram/") { return Ok(Self::CircuitDiagramDocument); - } else if file.name().starts_with("dwf/") { + } else if filename.starts_with("dwf/") { return Ok(Self::DesignWebFormatXps); - } else if file.name().ends_with(".fb2") && !file.name().contains('/') { + } else if filename.ends_with(".fb2") && !filename.contains('/') { return Ok(Self::FictionbookZipped); - } else if file.name().starts_with("FusionAssetName[Active]/") { + } else if filename.starts_with("FusionAssetName[Active]/") { return Ok(Self::Fusion360); - } else if file.name().starts_with("Payload/") && file.name().contains(".app/") { + } else if filename.starts_with("Payload/") && filename.contains(".app/") { return Ok(Self::IosAppStorePackage); - } else if file.name().starts_with("word/") { + } else if filename.starts_with("word/") { return Ok(Self::OfficeOpenXmlDocument); - } else if file.name().starts_with("visio/") { + } else if filename.starts_with("visio/") { return Ok(Self::OfficeOpenXmlDrawing); - } else if file.name().starts_with("ppt/") { + } else if filename.starts_with("ppt/") { return Ok(Self::OfficeOpenXmlPresentation); - } else if file.name().starts_with("xl/") { + } else if filename.starts_with("xl/") { return Ok(Self::OfficeOpenXmlSpreadsheet); - } else if file.name().starts_with("SpaceClaim/") { + } else if filename.starts_with("SpaceClaim/") { return Ok(Self::SpaceclaimDocument); - } else if file.name().starts_with("3D/") && file.name().ends_with(".model") { + } else if filename.starts_with("3D/") && filename.ends_with(".model") { return Ok(Self::ThreeDimensionalManufacturingFormat); - } else if (file.name().ends_with(".usd") - || file.name().ends_with(".usda") - || file.name().ends_with(".usdc")) - && !file.name().contains('/') + } else if (filename.ends_with(".usd") + || filename.ends_with(".usda") + || filename.ends_with(".usdc")) + && !filename.contains('/') { return Ok(Self::UniversalSceneDescriptionZipped); } } } + + // Seeks to the next central directory file header. + reader.seek(SeekFrom::Current( + extra_field_length as i64 + file_comment_length as i64, + ))?; + + // Increments the file count. + file_count += 1; } Ok(format) } diff --git a/src/signatures.rs b/src/signatures.rs index 43c78d3..fa9ab3c 100644 --- a/src/signatures.rs +++ b/src/signatures.rs @@ -1240,6 +1240,7 @@ signatures! { format = Zip value = b"\x50\x4B\x03\x04" + value = b"\x50\x4B\x05\x06" format = Zpaq value = b"7kSt" From ee73241a2ab4f992b50f89ecdeb915e97665105d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Sun, 29 Oct 2023 21:02:05 +0100 Subject: [PATCH 02/16] Update cfb dependency from 0.8 to 0.9 --- CHANGELOG.md | 3 ++- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd85379..c53c014 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,9 @@ # Version 0.22.0 (unreleased) -## Improvements +## Internal changes - Remove `zip` dependency when using `reader-zip` feature +- Update `cfb` dependency from 0.8 to 0.9 # Version 0.21.0 (2023-09-29) diff --git a/Cargo.toml b/Cargo.toml index a19c76c..fb22c4b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ exclude = ["/.github", "/examples", "/fixtures", "/tests", ".gitattributes", ".g rust-version = "1.60.0" [dependencies] -cfb = { version = "0.8", optional = true } +cfb = { version = "0.9", optional = true } serde = { version = "1.0", optional = true, features = ["derive"], default-features = false } [features] From 960e308e7546219bb14b2c14319b06e14765d2cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Sun, 29 Oct 2023 21:03:04 +0100 Subject: [PATCH 03/16] Fix fmt --- src/readers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/readers.rs b/src/readers.rs index af55b00..bca825e 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -591,7 +591,7 @@ impl crate::FileFormat { const FILE_LIMIT: usize = 4096; // Rewinds to the beginning of the stream. - reader.rewind()?; + reader.rewind()?; // Gets the stream length. let length = reader.seek(SeekFrom::End(0))?; From 3ee9a8ad3f84099c86ec87d63f19278c06ac2b85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Sun, 29 Oct 2023 21:13:18 +0100 Subject: [PATCH 04/16] Fix clippy warning --- src/readers.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/readers.rs b/src/readers.rs index bca825e..cd46bf1 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -601,7 +601,7 @@ impl crate::FileFormat { let mut buffer = [0; 4]; let mut position = length.saturating_sub(22); while position >= length.saturating_sub(22 + u16::MAX as u64) - && &buffer != END_OF_CENTRAL_DIRECTORY_SIGNATURE + && buffer != END_OF_CENTRAL_DIRECTORY_SIGNATURE { reader.seek(SeekFrom::Start(position))?; reader.read_exact(&mut buffer)?; @@ -628,7 +628,7 @@ impl crate::FileFormat { let mut file_count = 0; while file_count < FILE_LIMIT && reader.read_exact(&mut buffer).is_ok() - && &buffer == CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE + && buffer == CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE { // Reads compressed size. reader.seek(SeekFrom::Current(16))?; From 85cd6ec268042d6811e3ef8b022ebea84af2b772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Sun, 29 Oct 2023 22:34:25 +0100 Subject: [PATCH 05/16] Rename position to pos --- src/readers.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/readers.rs b/src/readers.rs index cd46bf1..1ae9df3 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -599,13 +599,13 @@ impl crate::FileFormat { // Searches for the end of central directory. let mut buffer = [0; 4]; - let mut position = length.saturating_sub(22); - while position >= length.saturating_sub(22 + u16::MAX as u64) + let mut pos = length.saturating_sub(22); + while pos >= length.saturating_sub(22 + u16::MAX as u64) && buffer != END_OF_CENTRAL_DIRECTORY_SIGNATURE { - reader.seek(SeekFrom::Start(position))?; + reader.seek(SeekFrom::Start(pos))?; reader.read_exact(&mut buffer)?; - position = match position.checked_sub(1) { + pos = match pos.checked_sub(1) { Some(position) => position, None => break, } From c4d07cd6d68055ffa3ff0cb3a14637451e0ac841 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Tue, 31 Oct 2023 13:42:39 +0100 Subject: [PATCH 06/16] Remove cfb dependency --- CHANGELOG.md | 2 +- Cargo.toml | 3 +- src/readers.rs | 247 +++++++++++++++++++++++++++++++++++++------------ 3 files changed, 192 insertions(+), 60 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c53c014..93fdbf3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,8 @@ ## Internal changes +- Remove `cfb` dependency when using `reader-cfb` feature - Remove `zip` dependency when using `reader-zip` feature -- Update `cfb` dependency from 0.8 to 0.9 # Version 0.21.0 (2023-09-29) diff --git a/Cargo.toml b/Cargo.toml index fb22c4b..f8ce5ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,6 @@ exclude = ["/.github", "/examples", "/fixtures", "/tests", ".gitattributes", ".g rust-version = "1.60.0" [dependencies] -cfb = { version = "0.9", optional = true } serde = { version = "1.0", optional = true, features = ["derive"], default-features = false } [features] @@ -36,7 +35,7 @@ reader = [ "reader-zip" ] reader-asf = [] -reader-cfb = ["dep:cfb"] +reader-cfb = [] reader-ebml = [] reader-exe = [] reader-mp4 = [] diff --git a/src/readers.rs b/src/readers.rs index 1ae9df3..3faed54 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -91,66 +91,199 @@ impl crate::FileFormat { /// Determines file format from a CFB reader. #[cfg(feature = "reader-cfb")] pub(crate) fn from_cfb_reader(reader: &mut BufReader) -> Result { + // Constants for limits. + const SEARCH_LIMIT: usize = 32768; + + // Constants for CLSIDs. + const AUTODESK_INVENTORY_ASSEMBLY_CLSID: &[u8] = + b"\xE1\x81\x0F\xE6\xB3\x49\xD0\x11\x93\xC3\x7E\x07\x06\x00\x00\x00"; + const AUTODESK_INVENTOR_DRAWING_CLSID: &[u8] = + b"\xF1\xFD\xF9\xBB\xDC\x52\xD0\x11\x8C\x04\x08\x00\x09\x0B\xE8\xEC"; + const AUTODESK_INVENTOR_PART_CLSID: &[u8] = + b"\x90\xB4\x29\x4D\xB2\x49\xD0\x11\x93\xC3\x7E\x07\x06\x00\x00\x00"; + const AUTODESK_INVENTOR_PRESENTATION_CLSID: &[u8] = + b"\x80\x3A\x28\x76\xDD\x50\xD3\x11\xA7\xE3\x00\xC0\x4F\x79\xD7\xBC"; + const MICROSOFT_EXCEL_SPREADSHEET_CLSID_1: &[u8] = + b"\x10\x08\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; + const MICROSOFT_EXCEL_SPREADSHEET_CLSID_2: &[u8] = + b"\x20\x08\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; + const MICROSOFT_POWERPOINT_PRESENTATION_CLSID_1: &[u8] = + b"\x51\x48\x04\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; + const MICROSOFT_POWERPOINT_PRESENTATION_CLSID_2: &[u8] = + b"\x10\x8D\x81\x64\x9B\x4F\xCF\x11\x86\xEA\x00\xAA\x00\xB9\x29\xE8"; + const MICROSOFT_POWERPOINT_PRESENTATION_CLSID_3: &[u8] = + b"\x70\xAE\x7B\xEA\x3B\xFB\xCD\x11\xA9\x03\x00\xAA\x00\x51\x0E\xA3"; + const MICROSOFT_PROJECT_PLAN_CLSID: &[u8] = + b"\x3A\x8F\xB7\x74\xC8\xC8\xD1\x11\xBE\x11\x00\xC0\x4F\xB6\xFA\xF1"; + const MICROSOFT_PUBLISHER_DOCUMENT_CLSID: &[u8] = + b"\x01\x12\x02\x00\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x46"; + const MICROSOFT_SOFTWARE_INSTALLER_CLSID: &[u8] = + b"\x84\x10\x0C\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; + const MICROSOFT_VISIO_DRAWING_CLSID_1: &[u8] = + b"\x13\x1A\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; + const MICROSOFT_VISIO_DRAWING_CLSID_2: &[u8] = + b"\x14\x1A\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; + const MICROSOFT_WORD_DOCUMENT_CLSID_1: &[u8] = + b"\x00\x90\x20\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; + const MICROSOFT_WORD_DOCUMENT_CLSID_2: &[u8] = + b"\x06\x09\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; + const MICROSOFT_WORKS_DATABASE_CLSID_1: &[u8] = + b"\x03\x13\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; + const MICROSOFT_WORKS_DATABASE_CLSID_2: &[u8] = + b"\xC3\xDB\xCD\x28\xE2\x0A\xCE\x11\xA2\x9A\x00\xAA\x00\x4A\x1A\x72"; + const MICROSOFT_WORKS_WORD_PROCESSOR_CLSID_1: &[u8] = + b"\x02\x13\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; + const MICROSOFT_WORKS_WORD_PROCESSOR_CLSID_2: &[u8] = + b"\xB2\x5A\xA4\x0E\x0A\x9E\xD1\x11\xA4\x07\x00\xC0\x4F\xB9\x32\xBA"; + const MICROSOFT_WORKS_WORD_PROCESSOR_CLSID_3: &[u8] = + b"\xC2\xDB\xCD\x28\xE2\x0A\xCE\x11\xA2\x9A\x00\xAA\x00\x4A\x1A\x72"; + const SOLIDWORKS_ASSEMBLY_CLSID: &[u8] = + b"\x36\x3D\xA3\x83\xC5\x27\xCE\x11\xBF\xD4\x00\x40\x05\x13\xBB\x57"; + const SOLIDWORKS_DRAWING_CLSID: &[u8] = + b"\x34\x3D\xA3\x83\xC5\x27\xCE\x11\xBF\xD4\x00\x40\x05\x13\xBB\x57"; + const SOLIDWORKS_PART_CLSID: &[u8] = + b"\x30\x3D\xA3\x83\xC5\x27\xCE\x11\xBF\xD4\x00\x40\x05\x13\xBB\x57"; + const STARCALC_CLSID_1: &[u8] = + b"\xA0\x3F\x54\x3F\xA6\xB6\x1B\x10\x99\x61\x04\x02\x1C\x00\x70\x02"; + const STARCALC_CLSID_2: &[u8] = + b"\x41\xD4\x61\x63\x35\x42\xD0\x11\x89\xCB\x00\x80\x29\xE4\xB0\xB1"; + const STARCALC_CLSID_3: &[u8] = + b"\x61\xB8\xA5\xC6\xD6\x85\x1D\x11\x89\xCB\x00\x80\x29\xE4\xB0\xB1"; + const STARCHART_CLSID_1: &[u8] = + b"\xE0\xB7\xB3\x02\x25\x42\xD0\x11\x89\xCA\x00\x80\x29\xE4\xB0\xB1"; + const STARCHART_CLSID_2: &[u8] = + b"\x21\x43\x88\xBF\xDD\x85\x1D\x11\x89\xD0\x00\x80\x29\xE4\xB0\xB1"; + const STARCHART_CLSID_3: &[u8] = + b"\xE0\x99\x9C\xFB\x6D\x2C\x1C\x10\x8E\x2C\x00\x00\x1B\x4C\xC7\x11"; + const STARDRAW_CLSID_1: &[u8] = + b"\xA0\x05\x89\x2E\xBD\x85\xD1\x11\x89\xD0\x00\x80\x29\xE4\xB0\xB1"; + const STARDRAW_CLSID_2: &[u8] = + b"\xE0\xAA\x10\xAF\x6D\xB3\x1B\x10\x99\x61\x04\x02\x1C\x00\x70\x02"; + const STARIMPRESS_CLSID_1: &[u8] = + b"\xC0\x3C\x2D\x01\x16\x42\xD0\x11\x89\xCB\x00\x80\x29\xE4\xB0\xB1"; + const STARIMPRESS_CLSID_2: &[u8] = + b"\x21\x72\x5C\x56\xBC\x85\x1D\x11\x89\xD0\x00\x80\x29\xE4\xB0\xB1"; + const STARMATH_CLSID_1: &[u8] = + b"\xE1\xB7\xB3\x02\x25\x42\xD0\x11\x89\xCA\x00\x80\x29\xE4\xB0\xB1"; + const STARMATH_CLSID_2: &[u8] = + b"\x60\x04\x59\xD4\xFD\x35\x1C\x10\xB1\x2A\x04\x02\x1C\x00\x70\x02"; + const STARMATH_CLSID_3: &[u8] = + b"\x40\xE6\xB5\xFF\xDE\x85\x1D\x11\x89\xD0\x00\x80\x29\xE4\xB0\xB1"; + const STARWRITER_CLSID_1: &[u8] = + b"\xB0\xE9\x04\x8B\x0E\x42\xD0\x11\xA4\x5E\x00\xA0\x24\x9D\x57\xB1"; + const STARWRITER_CLSID_2: &[u8] = + b"\xD1\xF9\x0C\xC2\xAE\x85\x1D\x11\xAA\xB4\x00\x60\x97\xDA\x56\x1A"; + const STARWRITER_CLSID_3: &[u8] = + b"\x40\x7E\x5C\xDC\x5C\xB3\x1B\x10\x99\x61\x04\x02\x1C\x00\x70\x02"; + const THREE_DIMENSIONAL_STUDIO_MAX_CLSID: &[u8] = + b"\x7B\x8C\xDD\x1C\xC0\x81\xA0\x45\x9F\xED\x04\x14\x31\x44\xCC\x1E"; + const WORDPERFECT_DOCUMENT_CLSID: &[u8] = + b"\xFF\x73\x98\x51\xAD\x2D\x20\x02\x19\x37\x00\x00\x92\x96\x79\xCD"; + const WORDPERFECT_GRAPHICS_CLSID: &[u8] = + b"\x60\xFE\x2E\x40\x99\x19\x1B\x10\x99\xAE\x04\x02\x1C\x00\x70\x02"; + + // Constants for UTF-16-encoded filenames. + const MICROSOFT_WORKS6_SPREADSHEET_FILENAME: &[u8] = + b"\x00W\x00k\x00s\x00S\x00S\x00W\x00o\x00r\x00k\x00B\x00o\x00o\x00k"; + const MICROSOFT_WORKS_WORD_PROCESSOR_FILENAME: &[u8] = b"\x00M\x00a\x00t\x00O\x00S\x00T"; + // Rewinds to the beginning of the stream. reader.rewind()?; - // Opens the compound file. - let file = cfb::CompoundFile::open(reader)?; - - // Reads the CLSID from the root entry and returns the corresponding variant. - Ok(match file.root_entry().clsid().to_string().as_str() { - "e60f81e1-49b3-11d0-93c3-7e0706000000" => Self::AutodeskInventorAssembly, - "bbf9fdf1-52dc-11d0-8c04-0800090be8ec" => Self::AutodeskInventorDrawing, - "4d29b490-49b2-11d0-93c3-7e0706000000" => Self::AutodeskInventorPart, - "76283a80-50dd-11d3-a7e3-00c04f79d7bc" => Self::AutodeskInventorPresentation, - "00020810-0000-0000-c000-000000000046" => Self::MicrosoftExcelSpreadsheet, - "00020820-0000-0000-c000-000000000046" => Self::MicrosoftExcelSpreadsheet, - "00044851-0000-0000-c000-000000000046" => Self::MicrosoftPowerpointPresentation, - "64818d10-4f9b-11cf-86ea-00aa00b929e8" => Self::MicrosoftPowerpointPresentation, - "ea7bae70-fb3b-11cd-a903-00aa00510ea3" => Self::MicrosoftPowerpointPresentation, - "74b78f3a-c8c8-11d1-be11-00c04fb6faf1" => Self::MicrosoftProjectPlan, - "00021201-0000-0000-00c0-000000000046" => Self::MicrosoftPublisherDocument, - "000c1084-0000-0000-c000-000000000046" => Self::MicrosoftSoftwareInstaller, - "00021a13-0000-0000-c000-000000000046" => Self::MicrosoftVisioDrawing, - "00021a14-0000-0000-c000-000000000046" => Self::MicrosoftVisioDrawing, - "00020900-0000-0000-c000-000000000046" => Self::MicrosoftWordDocument, - "00020906-0000-0000-c000-000000000046" => Self::MicrosoftWordDocument, - "00021303-0000-0000-c000-000000000046" => Self::MicrosoftWorksDatabase, - "28cddbc3-0ae2-11ce-a29a-00aa004a1a72" => Self::MicrosoftWorksDatabase, - "00021302-0000-0000-c000-000000000046" => Self::MicrosoftWorksWordProcessor, - "0ea45ab2-9e0a-11d1-a407-00c04fb932ba" => Self::MicrosoftWorksWordProcessor, - "28cddbc2-0ae2-11ce-a29a-00aa004a1a72" => Self::MicrosoftWorksWordProcessor, - "83a33d36-27c5-11ce-bfd4-00400513bb57" => Self::SolidworksAssembly, - "83a33d34-27c5-11ce-bfd4-00400513bb57" => Self::SolidworksDrawing, - "83a33d30-27c5-11ce-bfd4-00400513bb57" => Self::SolidworksPart, - "3f543fa0-b6a6-101b-9961-04021c007002" => Self::Starcalc, - "6361d441-4235-11d0-89cb-008029e4b0b1" => Self::Starcalc, - "c6a5b861-85d6-11d1-89cb-008029e4b0b1" => Self::Starcalc, - "02b3b7e0-4225-11d0-89ca-008029e4b0b1" => Self::Starchart, - "bf884321-85dd-11d1-89d0-008029e4b0b1" => Self::Starchart, - "fb9c99e0-2c6d-101c-8e2c-00001b4cc711" => Self::Starchart, - "2e8905a0-85bd-11d1-89d0-008029e4b0b1" => Self::Stardraw, - "af10aae0-b36d-101b-9961-04021c007002" => Self::Stardraw, - "012d3cc0-4216-11d0-89cb-008029e4b0b1" => Self::Starimpress, - "565c7221-85bc-11d1-89d0-008029e4b0b1" => Self::Starimpress, - "02b3b7e1-4225-11d0-89ca-008029e4b0b1" => Self::Starmath, - "d4590460-35fd-101c-b12a-04021c007002" => Self::Starmath, - "ffb5e640-85de-11d1-89d0-008029e4b0b1" => Self::Starmath, - "8b04e9b0-420e-11d0-a45e-00a0249d57b1" => Self::Starwriter, - "c20cf9d1-85ae-11d1-aab4-006097da561a" => Self::Starwriter, - "dc5c7e40-b35c-101b-9961-04021c007002" => Self::Starwriter, - "1cdd8c7b-81c0-45a0-9fed-04143144cc1e" => Self::ThreeDimensionalStudioMax, - "519873ff-2dad-0220-1937-0000929679cd" => Self::WordperfectDocument, - "402efe60-1999-101b-99ae-04021c007002" => Self::WordperfectGraphics, - _ => { - if file.exists("WksSSWorkBook") { - Self::MicrosoftWorks6Spreadsheet - } else if file.exists("MatOST") { - Self::MicrosoftWorksWordProcessor - } else { - Self::CompoundFileBinary - } - } + // Gets the stream length. + let length = reader.seek(SeekFrom::End(0))?; + reader.rewind()?; + + // Skips the CFB header. + reader.seek(SeekFrom::Start(512))?; + + // Fills the buffer. + let mut buffer = vec![0; std::cmp::min(SEARCH_LIMIT, (length - 512) as usize)]; + reader.read_exact(&mut buffer)?; + + // Searches for specific CLSIDs or filenames in the buffer. + Ok(if contains(&buffer, AUTODESK_INVENTORY_ASSEMBLY_CLSID) { + Self::AutodeskInventorAssembly + } else if contains(&buffer, AUTODESK_INVENTOR_DRAWING_CLSID) { + Self::AutodeskInventorDrawing + } else if contains(&buffer, AUTODESK_INVENTOR_PART_CLSID) { + Self::AutodeskInventorPart + } else if contains(&buffer, AUTODESK_INVENTOR_PRESENTATION_CLSID) { + Self::AutodeskInventorPresentation + } else if contains(&buffer, MICROSOFT_EXCEL_SPREADSHEET_CLSID_1) + || contains(&buffer, MICROSOFT_EXCEL_SPREADSHEET_CLSID_2) + { + Self::MicrosoftExcelSpreadsheet + } else if contains(&buffer, MICROSOFT_POWERPOINT_PRESENTATION_CLSID_1) + || contains(&buffer, MICROSOFT_POWERPOINT_PRESENTATION_CLSID_2) + || contains(&buffer, MICROSOFT_POWERPOINT_PRESENTATION_CLSID_3) + { + Self::MicrosoftPowerpointPresentation + } else if contains(&buffer, MICROSOFT_PROJECT_PLAN_CLSID) { + Self::MicrosoftProjectPlan + } else if contains(&buffer, MICROSOFT_PUBLISHER_DOCUMENT_CLSID) { + Self::MicrosoftPublisherDocument + } else if contains(&buffer, MICROSOFT_SOFTWARE_INSTALLER_CLSID) { + Self::MicrosoftSoftwareInstaller + } else if contains(&buffer, MICROSOFT_VISIO_DRAWING_CLSID_1) + || contains(&buffer, MICROSOFT_VISIO_DRAWING_CLSID_2) + { + Self::MicrosoftVisioDrawing + } else if contains(&buffer, MICROSOFT_WORD_DOCUMENT_CLSID_1) + || contains(&buffer, MICROSOFT_WORD_DOCUMENT_CLSID_2) + { + Self::MicrosoftWordDocument + } else if contains(&buffer, MICROSOFT_WORKS_DATABASE_CLSID_1) + || contains(&buffer, MICROSOFT_WORKS_DATABASE_CLSID_2) + { + Self::MicrosoftWorksDatabase + } else if contains(&buffer, MICROSOFT_WORKS_WORD_PROCESSOR_CLSID_1) + || contains(&buffer, MICROSOFT_WORKS_WORD_PROCESSOR_CLSID_2) + || contains(&buffer, MICROSOFT_WORKS_WORD_PROCESSOR_CLSID_3) + { + Self::MicrosoftWorksWordProcessor + } else if contains(&buffer, SOLIDWORKS_ASSEMBLY_CLSID) { + Self::SolidworksAssembly + } else if contains(&buffer, SOLIDWORKS_DRAWING_CLSID) { + Self::SolidworksDrawing + } else if contains(&buffer, SOLIDWORKS_PART_CLSID) { + Self::SolidworksPart + } else if contains(&buffer, STARCALC_CLSID_1) + || contains(&buffer, STARCALC_CLSID_2) + || contains(&buffer, STARCALC_CLSID_3) + { + Self::Starcalc + } else if contains(&buffer, STARCHART_CLSID_1) + || contains(&buffer, STARCHART_CLSID_2) + || contains(&buffer, STARCHART_CLSID_3) + { + Self::Starchart + } else if contains(&buffer, STARDRAW_CLSID_1) || contains(&buffer, STARDRAW_CLSID_2) { + Self::Stardraw + } else if contains(&buffer, STARIMPRESS_CLSID_1) || contains(&buffer, STARIMPRESS_CLSID_2) { + Self::Starimpress + } else if contains(&buffer, STARMATH_CLSID_1) + || contains(&buffer, STARMATH_CLSID_2) + || contains(&buffer, STARMATH_CLSID_3) + { + Self::Starmath + } else if contains(&buffer, STARWRITER_CLSID_1) + || contains(&buffer, STARWRITER_CLSID_2) + || contains(&buffer, STARWRITER_CLSID_3) + { + Self::Starwriter + } else if contains(&buffer, THREE_DIMENSIONAL_STUDIO_MAX_CLSID) { + Self::ThreeDimensionalStudioMax + } else if contains(&buffer, WORDPERFECT_DOCUMENT_CLSID) { + Self::WordperfectDocument + } else if contains(&buffer, WORDPERFECT_GRAPHICS_CLSID) { + Self::WordperfectGraphics + } else if contains(&buffer, MICROSOFT_WORKS6_SPREADSHEET_FILENAME) { + Self::MicrosoftWorks6Spreadsheet + } else if contains(&buffer, MICROSOFT_WORKS_WORD_PROCESSOR_FILENAME) { + Self::MicrosoftWorksWordProcessor + } else { + Self::CompoundFileBinary }) } From 0f1c6bca1876c7f3003c9293151025c57a41715b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Tue, 31 Oct 2023 13:51:09 +0100 Subject: [PATCH 07/16] Add reader-cfb feature above contains function --- src/readers.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/readers.rs b/src/readers.rs index 3faed54..ae91ca9 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -941,6 +941,7 @@ impl crate::FileFormat { /// Checks if the `data` array contains the `target` sequence using the Boyer-Moore algorithm. #[cfg(any( feature = "reader-asf", + feature = "reader-cfb", feature = "reader-pdf", feature = "reader-rm", feature = "reader-xml" From a39969dcfef89e1ebea79fa19626db71d5ee6e78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Wed, 1 Nov 2023 19:45:15 +0100 Subject: [PATCH 08/16] Standardizes the different readers --- src/readers.rs | 92 +++++++++++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 39 deletions(-) diff --git a/src/readers.rs b/src/readers.rs index ae91ca9..92def6f 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -50,17 +50,19 @@ impl crate::FileFormat { /// Determines file format from an ASF reader. #[cfg(feature = "reader-asf")] pub(crate) fn from_asf_reader(reader: &mut BufReader) -> Result { - // Constants representing GUIDs and descriptors. - const VIDEO_MEDIA_GUID: &[u8] = - b"\xC0\xEF\x19\xBC\x4D\x5B\xCF\x11\xA8\xFD\x00\x80\x5F\x5C\x44\x2B"; + // Constants for limits. + const SEARCH_LIMIT: usize = 8192; + + // Constants for GUIDs. const AUDIO_MEDIA_GUID: &[u8] = b"\x40\x9E\x69\xF8\x4D\x5B\xCF\x11\xA8\xFD\x00\x80\x5F\x5C\x44\x2B"; + const VIDEO_MEDIA_GUID: &[u8] = + b"\xC0\xEF\x19\xBC\x4D\x5B\xCF\x11\xA8\xFD\x00\x80\x5F\x5C\x44\x2B"; + + // Constants for descriptors. const DVR_DESCRIPTOR: &[u8] = b"D\x00V\x00R\x00 \x00F\x00i\x00l\x00e\x00 \x00V\x00e\x00r\x00s\x00i\x00o\x00n"; - // Constants for limits. - const SEARCH_LIMIT: usize = 8192; - // Rewinds to the beginning of the stream. reader.rewind()?; @@ -201,7 +203,7 @@ impl crate::FileFormat { let mut buffer = vec![0; std::cmp::min(SEARCH_LIMIT, (length - 512) as usize)]; reader.read_exact(&mut buffer)?; - // Searches for specific CLSIDs or filenames in the buffer. + // Searches for specific CLSIDs and filenames in the buffer. Ok(if contains(&buffer, AUTODESK_INVENTORY_ASSEMBLY_CLSID) { Self::AutodeskInventorAssembly } else if contains(&buffer, AUTODESK_INVENTOR_DRAWING_CLSID) { @@ -290,21 +292,23 @@ impl crate::FileFormat { /// Determines file format from an EBML reader. #[cfg(feature = "reader-ebml")] pub(crate) fn from_ebml_reader(reader: &mut BufReader) -> Result { - // Constants representing EBML element IDs. - const EBML: u32 = 0x1A45DFA3; - const DOC_TYPE: u32 = 0x4282; - const SEGMENT: u32 = 0x18538067; - const TRACKS: u32 = 0x1654AE6B; - const TRACK_ENTRY: u32 = 0xAE; - const CODEC_ID: u32 = 0x86; - const VIDEO: u32 = 0xE0; - const STEREO_MODE: u32 = 0x53B8; - const CLUSTER: u32 = 0x1F43B675; - // Constants for limits. const ITERATION_LIMIT: usize = 512; const STRING_LIMIT: usize = 64; + // Constants for EBML elements IDs. + const DOC_TYPE_ID: u32 = 0x4282; + const EBML_ID: u32 = 0x1A45DFA3; + + // Constants for Matroska elements IDs. + const CLUSTER_ID: u32 = 0x1F43B675; + const CODEC_ID: u32 = 0x86; + const SEGMENT_ID: u32 = 0x18538067; + const STEREO_MODE_ID: u32 = 0x53B8; + const TRACKS_ID: u32 = 0x1654AE6B; + const TRACK_ENTRY_ID: u32 = 0xAE; + const VIDEO_ID: u32 = 0xE0; + /// Helper function to read the ID of an EBML element. fn read_id(reader: &mut R) -> Result { // Reads the first byte. @@ -365,10 +369,10 @@ impl crate::FileFormat { // Checks the ID of the element to perform specific actions. match id { - EBML | SEGMENT | TRACKS | TRACK_ENTRY | VIDEO => { + EBML_ID | SEGMENT_ID | TRACKS_ID | TRACK_ENTRY_ID | VIDEO_ID => { // Does nothing for these elements. } - DOC_TYPE => { + DOC_TYPE_ID => { // Reads the buffer containing the DocType. let mut buffer = vec![0; std::cmp::min(STRING_LIMIT, size as usize)]; reader.read_exact(&mut buffer)?; @@ -402,7 +406,7 @@ impl crate::FileFormat { subtitle_codec = true; } } - STEREO_MODE => { + STEREO_MODE_ID => { // Reads a single byte to determine the StereoMode. let mut buffer = [0]; reader.read_exact(&mut buffer)?; @@ -412,7 +416,7 @@ impl crate::FileFormat { return Ok(Self::Matroska3dVideo); } } - CLUSTER => { + CLUSTER_ID => { // No need to continue reading. break; } @@ -446,6 +450,12 @@ impl crate::FileFormat { /// Determines file format from an EXE reader. #[cfg(feature = "reader-exe")] pub(crate) fn from_exe_reader(reader: &mut BufReader) -> Result { + // Constants for signatures. + const LINEAR_EXECUTABLE_SIGNATURE_1: &[u8] = b"LE"; + const LINEAR_EXECUTABLE_SIGNATURE_2: &[u8] = b"LX"; + const NEW_EXECUTABLE_SIGNATURE: &[u8] = b"NE"; + const PORTABLE_EXECUTABLE_SIGNATURE: &[u8] = b"PE\0\0"; + // Rewinds to the beginning of the stream. reader.rewind()?; @@ -469,7 +479,7 @@ impl crate::FileFormat { reader.read_exact(&mut signature)?; // Checks the signature. - if &signature == b"PE\0\0" { + if signature == PORTABLE_EXECUTABLE_SIGNATURE { reader.seek(SeekFrom::Current(0x12))?; let mut characteristics = [0; 2]; reader.read_exact(&mut characteristics)?; @@ -478,9 +488,11 @@ impl crate::FileFormat { } else { Self::PortableExecutable }); - } else if &signature[..2] == b"LE" || &signature[..2] == b"LX" { + } else if &signature[..2] == LINEAR_EXECUTABLE_SIGNATURE_1 + || &signature[..2] == LINEAR_EXECUTABLE_SIGNATURE_2 + { return Ok(Self::LinearExecutable); - } else if &signature[..2] == b"NE" { + } else if &signature[..2] == NEW_EXECUTABLE_SIGNATURE { return Ok(Self::NewExecutable); } } @@ -619,8 +631,8 @@ impl crate::FileFormat { #[cfg(feature = "reader-txt")] pub(crate) fn from_txt_reader(reader: &mut BufReader) -> Result { // Constants for limits. - const READ_LIMIT: u64 = 8_388_608; const LINE_LIMIT: usize = 256; + const READ_LIMIT: u64 = 8_388_608; // Rewinds to the beginning of the stream. reader.rewind()?; @@ -645,9 +657,9 @@ impl crate::FileFormat { #[cfg(feature = "reader-xml")] pub(crate) fn from_xml_reader(reader: &mut BufReader) -> Result { // Constants for limits. - const READ_LIMIT: u64 = 262_144; - const LINE_LIMIT: usize = 8; const CHAR_LIMIT: usize = 2048; + const LINE_LIMIT: usize = 8; + const READ_LIMIT: u64 = 262_144; // Rewinds to the beginning of the stream. reader.rewind()?; @@ -718,10 +730,12 @@ impl crate::FileFormat { /// Determines file format from a ZIP reader. #[cfg(feature = "reader-zip")] pub(crate) fn from_zip_reader(reader: &mut BufReader) -> Result { - // Constants. + // Constants for limits. + const FILE_LIMIT: usize = 4096; + + // Constants for signatures. const CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE: &[u8] = b"\x50\x4B\x01\x02"; const END_OF_CENTRAL_DIRECTORY_SIGNATURE: &[u8] = b"\x50\x4B\x05\x06"; - const FILE_LIMIT: usize = 4096; // Rewinds to the beginning of the stream. reader.rewind()?; @@ -732,13 +746,13 @@ impl crate::FileFormat { // Searches for the end of central directory. let mut buffer = [0; 4]; - let mut pos = length.saturating_sub(22); - while pos >= length.saturating_sub(22 + u16::MAX as u64) + let mut position = length.saturating_sub(22); + while position >= length.saturating_sub(22 + u16::MAX as u64) && buffer != END_OF_CENTRAL_DIRECTORY_SIGNATURE { - reader.seek(SeekFrom::Start(pos))?; + reader.seek(SeekFrom::Start(position))?; reader.read_exact(&mut buffer)?; - pos = match pos.checked_sub(1) { + position = match position.checked_sub(1) { Some(position) => position, None => break, } @@ -964,10 +978,10 @@ fn contains(data: &[u8], target: &[u8]) -> bool { } // Starts searching from the last possible position in the data array. - let mut pos = target.len() - 1; - while pos < data.len() { + let mut position = target.len() - 1; + while position < data.len() { let mut target_index = target.len() - 1; - let mut data_index = pos; + let mut data_index = position; while data[data_index] == target[target_index] { if target_index == 0 { return true; @@ -977,10 +991,10 @@ fn contains(data: &[u8], target: &[u8]) -> bool { } // Calculates the maximum shift based on the bad character rule and good suffix rule. - let bad_char_shift = bad_char_table[data[pos] as usize]; + let bad_char_shift = bad_char_table[data[position] as usize]; let good_suffix_shift = target.len() - target_index; let shift = std::cmp::max(bad_char_shift, good_suffix_shift); - pos += shift; + position += shift; } false } From 16fd149655458f57ef618804bb4c2b69df2384c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Thu, 2 Nov 2023 00:55:45 +0100 Subject: [PATCH 09/16] Standardizes the different readers --- src/readers.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/readers.rs b/src/readers.rs index 92def6f..929e67c 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -74,14 +74,10 @@ impl crate::FileFormat { let mut buffer = vec![0; std::cmp::min(SEARCH_LIMIT, length as usize)]; reader.read_exact(&mut buffer)?; - // Searches for an Extended Content Description descriptor named "DVR File Version" in the - // buffer. - if contains(&buffer, DVR_DESCRIPTOR) { + // Searches for specific GUIDs or descriptors in the buffer. + Ok(if contains(&buffer, DVR_DESCRIPTOR) { return Ok(Self::MicrosoftDigitalVideoRecording); - } - - // Searches for specific GUIDs in the buffer. - Ok(if contains(&buffer, VIDEO_MEDIA_GUID) { + } else if contains(&buffer, VIDEO_MEDIA_GUID) { Self::WindowsMediaVideo } else if contains(&buffer, AUDIO_MEDIA_GUID) { Self::WindowsMediaAudio @@ -203,7 +199,7 @@ impl crate::FileFormat { let mut buffer = vec![0; std::cmp::min(SEARCH_LIMIT, (length - 512) as usize)]; reader.read_exact(&mut buffer)?; - // Searches for specific CLSIDs and filenames in the buffer. + // Searches for specific CLSIDs or filenames in the buffer. Ok(if contains(&buffer, AUTODESK_INVENTORY_ASSEMBLY_CLSID) { Self::AutodeskInventorAssembly } else if contains(&buffer, AUTODESK_INVENTOR_DRAWING_CLSID) { @@ -615,7 +611,7 @@ impl crate::FileFormat { let mut buffer = vec![0; std::cmp::min(SEARCH_LIMIT, length as usize)]; reader.read_exact(&mut buffer)?; - // Searches for the media type in the buffer. + // Searches for specific media types in the buffer. Ok(if contains(&buffer, b"video/x-pn-realvideo") { Self::Realvideo } else if contains(&buffer, b"audio/x-pn-realaudio") From b4d3758e7a4f0dca3e5b5cf1512c9f925aed1e6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Thu, 2 Nov 2023 01:28:11 +0100 Subject: [PATCH 10/16] Fix MICROSOFT_WORD_DOCUMENT_CLSID_1 --- src/readers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/readers.rs b/src/readers.rs index 929e67c..ad9cc22 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -122,7 +122,7 @@ impl crate::FileFormat { const MICROSOFT_VISIO_DRAWING_CLSID_2: &[u8] = b"\x14\x1A\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; const MICROSOFT_WORD_DOCUMENT_CLSID_1: &[u8] = - b"\x00\x90\x20\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; + b"\x00\x09\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; const MICROSOFT_WORD_DOCUMENT_CLSID_2: &[u8] = b"\x06\x09\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; const MICROSOFT_WORKS_DATABASE_CLSID_1: &[u8] = From 05b8b0bff4d1455b0cfb7dccaa6138a72fbdb5cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Sat, 4 Nov 2023 00:58:45 +0100 Subject: [PATCH 11/16] Improves CFB reader --- src/readers.rs | 278 +++++++++++++++++-------------------------------- 1 file changed, 93 insertions(+), 185 deletions(-) diff --git a/src/readers.rs b/src/readers.rs index ad9cc22..c4207ae 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -90,198 +90,106 @@ impl crate::FileFormat { #[cfg(feature = "reader-cfb")] pub(crate) fn from_cfb_reader(reader: &mut BufReader) -> Result { // Constants for limits. - const SEARCH_LIMIT: usize = 32768; - - // Constants for CLSIDs. - const AUTODESK_INVENTORY_ASSEMBLY_CLSID: &[u8] = - b"\xE1\x81\x0F\xE6\xB3\x49\xD0\x11\x93\xC3\x7E\x07\x06\x00\x00\x00"; - const AUTODESK_INVENTOR_DRAWING_CLSID: &[u8] = - b"\xF1\xFD\xF9\xBB\xDC\x52\xD0\x11\x8C\x04\x08\x00\x09\x0B\xE8\xEC"; - const AUTODESK_INVENTOR_PART_CLSID: &[u8] = - b"\x90\xB4\x29\x4D\xB2\x49\xD0\x11\x93\xC3\x7E\x07\x06\x00\x00\x00"; - const AUTODESK_INVENTOR_PRESENTATION_CLSID: &[u8] = - b"\x80\x3A\x28\x76\xDD\x50\xD3\x11\xA7\xE3\x00\xC0\x4F\x79\xD7\xBC"; - const MICROSOFT_EXCEL_SPREADSHEET_CLSID_1: &[u8] = - b"\x10\x08\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; - const MICROSOFT_EXCEL_SPREADSHEET_CLSID_2: &[u8] = - b"\x20\x08\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; - const MICROSOFT_POWERPOINT_PRESENTATION_CLSID_1: &[u8] = - b"\x51\x48\x04\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; - const MICROSOFT_POWERPOINT_PRESENTATION_CLSID_2: &[u8] = - b"\x10\x8D\x81\x64\x9B\x4F\xCF\x11\x86\xEA\x00\xAA\x00\xB9\x29\xE8"; - const MICROSOFT_POWERPOINT_PRESENTATION_CLSID_3: &[u8] = - b"\x70\xAE\x7B\xEA\x3B\xFB\xCD\x11\xA9\x03\x00\xAA\x00\x51\x0E\xA3"; - const MICROSOFT_PROJECT_PLAN_CLSID: &[u8] = - b"\x3A\x8F\xB7\x74\xC8\xC8\xD1\x11\xBE\x11\x00\xC0\x4F\xB6\xFA\xF1"; - const MICROSOFT_PUBLISHER_DOCUMENT_CLSID: &[u8] = - b"\x01\x12\x02\x00\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x46"; - const MICROSOFT_SOFTWARE_INSTALLER_CLSID: &[u8] = - b"\x84\x10\x0C\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; - const MICROSOFT_VISIO_DRAWING_CLSID_1: &[u8] = - b"\x13\x1A\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; - const MICROSOFT_VISIO_DRAWING_CLSID_2: &[u8] = - b"\x14\x1A\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; - const MICROSOFT_WORD_DOCUMENT_CLSID_1: &[u8] = - b"\x00\x09\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; - const MICROSOFT_WORD_DOCUMENT_CLSID_2: &[u8] = - b"\x06\x09\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; - const MICROSOFT_WORKS_DATABASE_CLSID_1: &[u8] = - b"\x03\x13\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; - const MICROSOFT_WORKS_DATABASE_CLSID_2: &[u8] = - b"\xC3\xDB\xCD\x28\xE2\x0A\xCE\x11\xA2\x9A\x00\xAA\x00\x4A\x1A\x72"; - const MICROSOFT_WORKS_WORD_PROCESSOR_CLSID_1: &[u8] = - b"\x02\x13\x02\x00\x00\x00\x00\x00\xC0\x00\x00\x00\x00\x00\x00\x46"; - const MICROSOFT_WORKS_WORD_PROCESSOR_CLSID_2: &[u8] = - b"\xB2\x5A\xA4\x0E\x0A\x9E\xD1\x11\xA4\x07\x00\xC0\x4F\xB9\x32\xBA"; - const MICROSOFT_WORKS_WORD_PROCESSOR_CLSID_3: &[u8] = - b"\xC2\xDB\xCD\x28\xE2\x0A\xCE\x11\xA2\x9A\x00\xAA\x00\x4A\x1A\x72"; - const SOLIDWORKS_ASSEMBLY_CLSID: &[u8] = - b"\x36\x3D\xA3\x83\xC5\x27\xCE\x11\xBF\xD4\x00\x40\x05\x13\xBB\x57"; - const SOLIDWORKS_DRAWING_CLSID: &[u8] = - b"\x34\x3D\xA3\x83\xC5\x27\xCE\x11\xBF\xD4\x00\x40\x05\x13\xBB\x57"; - const SOLIDWORKS_PART_CLSID: &[u8] = - b"\x30\x3D\xA3\x83\xC5\x27\xCE\x11\xBF\xD4\x00\x40\x05\x13\xBB\x57"; - const STARCALC_CLSID_1: &[u8] = - b"\xA0\x3F\x54\x3F\xA6\xB6\x1B\x10\x99\x61\x04\x02\x1C\x00\x70\x02"; - const STARCALC_CLSID_2: &[u8] = - b"\x41\xD4\x61\x63\x35\x42\xD0\x11\x89\xCB\x00\x80\x29\xE4\xB0\xB1"; - const STARCALC_CLSID_3: &[u8] = - b"\x61\xB8\xA5\xC6\xD6\x85\x1D\x11\x89\xCB\x00\x80\x29\xE4\xB0\xB1"; - const STARCHART_CLSID_1: &[u8] = - b"\xE0\xB7\xB3\x02\x25\x42\xD0\x11\x89\xCA\x00\x80\x29\xE4\xB0\xB1"; - const STARCHART_CLSID_2: &[u8] = - b"\x21\x43\x88\xBF\xDD\x85\x1D\x11\x89\xD0\x00\x80\x29\xE4\xB0\xB1"; - const STARCHART_CLSID_3: &[u8] = - b"\xE0\x99\x9C\xFB\x6D\x2C\x1C\x10\x8E\x2C\x00\x00\x1B\x4C\xC7\x11"; - const STARDRAW_CLSID_1: &[u8] = - b"\xA0\x05\x89\x2E\xBD\x85\xD1\x11\x89\xD0\x00\x80\x29\xE4\xB0\xB1"; - const STARDRAW_CLSID_2: &[u8] = - b"\xE0\xAA\x10\xAF\x6D\xB3\x1B\x10\x99\x61\x04\x02\x1C\x00\x70\x02"; - const STARIMPRESS_CLSID_1: &[u8] = - b"\xC0\x3C\x2D\x01\x16\x42\xD0\x11\x89\xCB\x00\x80\x29\xE4\xB0\xB1"; - const STARIMPRESS_CLSID_2: &[u8] = - b"\x21\x72\x5C\x56\xBC\x85\x1D\x11\x89\xD0\x00\x80\x29\xE4\xB0\xB1"; - const STARMATH_CLSID_1: &[u8] = - b"\xE1\xB7\xB3\x02\x25\x42\xD0\x11\x89\xCA\x00\x80\x29\xE4\xB0\xB1"; - const STARMATH_CLSID_2: &[u8] = - b"\x60\x04\x59\xD4\xFD\x35\x1C\x10\xB1\x2A\x04\x02\x1C\x00\x70\x02"; - const STARMATH_CLSID_3: &[u8] = - b"\x40\xE6\xB5\xFF\xDE\x85\x1D\x11\x89\xD0\x00\x80\x29\xE4\xB0\xB1"; - const STARWRITER_CLSID_1: &[u8] = - b"\xB0\xE9\x04\x8B\x0E\x42\xD0\x11\xA4\x5E\x00\xA0\x24\x9D\x57\xB1"; - const STARWRITER_CLSID_2: &[u8] = - b"\xD1\xF9\x0C\xC2\xAE\x85\x1D\x11\xAA\xB4\x00\x60\x97\xDA\x56\x1A"; - const STARWRITER_CLSID_3: &[u8] = - b"\x40\x7E\x5C\xDC\x5C\xB3\x1B\x10\x99\x61\x04\x02\x1C\x00\x70\x02"; - const THREE_DIMENSIONAL_STUDIO_MAX_CLSID: &[u8] = - b"\x7B\x8C\xDD\x1C\xC0\x81\xA0\x45\x9F\xED\x04\x14\x31\x44\xCC\x1E"; - const WORDPERFECT_DOCUMENT_CLSID: &[u8] = - b"\xFF\x73\x98\x51\xAD\x2D\x20\x02\x19\x37\x00\x00\x92\x96\x79\xCD"; - const WORDPERFECT_GRAPHICS_CLSID: &[u8] = - b"\x60\xFE\x2E\x40\x99\x19\x1B\x10\x99\xAE\x04\x02\x1C\x00\x70\x02"; - - // Constants for UTF-16-encoded filenames. - const MICROSOFT_WORKS6_SPREADSHEET_FILENAME: &[u8] = - b"\x00W\x00k\x00s\x00S\x00S\x00W\x00o\x00r\x00k\x00B\x00o\x00o\x00k"; - const MICROSOFT_WORKS_WORD_PROCESSOR_FILENAME: &[u8] = b"\x00M\x00a\x00t\x00O\x00S\x00T"; + const SEARCH_LIMIT: usize = 512; - // Rewinds to the beginning of the stream. - reader.rewind()?; + // Constants for UTF-16-encoded entry names. + const MICROSOFT_WORKS6_SPREADSHEET_ENTRY_NAME: &[u8] = + b"W\x00k\x00s\x00S\x00S\x00W\x00o\x00r\x00k\x00B\x00o\x00o\x00k\x00"; + const MICROSOFT_WORKS_WORD_PROCESSOR_ENTRY_NAME: &[u8] = b"M\x00a\x00t\x00O\x00S\x00T\x00"; - // Gets the stream length. - let length = reader.seek(SeekFrom::End(0))?; + // Rewinds to the beginning of the stream. reader.rewind()?; - // Skips the CFB header. - reader.seek(SeekFrom::Start(512))?; + // Reads the major version. + reader.seek(SeekFrom::Current(26))?; + let mut buffer = [0; 2]; + reader.read_exact(&mut buffer)?; + let major_version = u16::from_le_bytes(buffer); - // Fills the buffer. - let mut buffer = vec![0; std::cmp::min(SEARCH_LIMIT, (length - 512) as usize)]; + // Reads the first directory sector location. + reader.seek(SeekFrom::Current(20))?; + let mut buffer = [0; 4]; reader.read_exact(&mut buffer)?; + let first_directory_sector_location = u32::from_le_bytes(buffer); - // Searches for specific CLSIDs or filenames in the buffer. - Ok(if contains(&buffer, AUTODESK_INVENTORY_ASSEMBLY_CLSID) { - Self::AutodeskInventorAssembly - } else if contains(&buffer, AUTODESK_INVENTOR_DRAWING_CLSID) { - Self::AutodeskInventorDrawing - } else if contains(&buffer, AUTODESK_INVENTOR_PART_CLSID) { - Self::AutodeskInventorPart - } else if contains(&buffer, AUTODESK_INVENTOR_PRESENTATION_CLSID) { - Self::AutodeskInventorPresentation - } else if contains(&buffer, MICROSOFT_EXCEL_SPREADSHEET_CLSID_1) - || contains(&buffer, MICROSOFT_EXCEL_SPREADSHEET_CLSID_2) - { - Self::MicrosoftExcelSpreadsheet - } else if contains(&buffer, MICROSOFT_POWERPOINT_PRESENTATION_CLSID_1) - || contains(&buffer, MICROSOFT_POWERPOINT_PRESENTATION_CLSID_2) - || contains(&buffer, MICROSOFT_POWERPOINT_PRESENTATION_CLSID_3) - { - Self::MicrosoftPowerpointPresentation - } else if contains(&buffer, MICROSOFT_PROJECT_PLAN_CLSID) { - Self::MicrosoftProjectPlan - } else if contains(&buffer, MICROSOFT_PUBLISHER_DOCUMENT_CLSID) { - Self::MicrosoftPublisherDocument - } else if contains(&buffer, MICROSOFT_SOFTWARE_INSTALLER_CLSID) { - Self::MicrosoftSoftwareInstaller - } else if contains(&buffer, MICROSOFT_VISIO_DRAWING_CLSID_1) - || contains(&buffer, MICROSOFT_VISIO_DRAWING_CLSID_2) - { - Self::MicrosoftVisioDrawing - } else if contains(&buffer, MICROSOFT_WORD_DOCUMENT_CLSID_1) - || contains(&buffer, MICROSOFT_WORD_DOCUMENT_CLSID_2) - { - Self::MicrosoftWordDocument - } else if contains(&buffer, MICROSOFT_WORKS_DATABASE_CLSID_1) - || contains(&buffer, MICROSOFT_WORKS_DATABASE_CLSID_2) - { - Self::MicrosoftWorksDatabase - } else if contains(&buffer, MICROSOFT_WORKS_WORD_PROCESSOR_CLSID_1) - || contains(&buffer, MICROSOFT_WORKS_WORD_PROCESSOR_CLSID_2) - || contains(&buffer, MICROSOFT_WORKS_WORD_PROCESSOR_CLSID_3) - { - Self::MicrosoftWorksWordProcessor - } else if contains(&buffer, SOLIDWORKS_ASSEMBLY_CLSID) { - Self::SolidworksAssembly - } else if contains(&buffer, SOLIDWORKS_DRAWING_CLSID) { - Self::SolidworksDrawing - } else if contains(&buffer, SOLIDWORKS_PART_CLSID) { - Self::SolidworksPart - } else if contains(&buffer, STARCALC_CLSID_1) - || contains(&buffer, STARCALC_CLSID_2) - || contains(&buffer, STARCALC_CLSID_3) - { - Self::Starcalc - } else if contains(&buffer, STARCHART_CLSID_1) - || contains(&buffer, STARCHART_CLSID_2) - || contains(&buffer, STARCHART_CLSID_3) - { - Self::Starchart - } else if contains(&buffer, STARDRAW_CLSID_1) || contains(&buffer, STARDRAW_CLSID_2) { - Self::Stardraw - } else if contains(&buffer, STARIMPRESS_CLSID_1) || contains(&buffer, STARIMPRESS_CLSID_2) { - Self::Starimpress - } else if contains(&buffer, STARMATH_CLSID_1) - || contains(&buffer, STARMATH_CLSID_2) - || contains(&buffer, STARMATH_CLSID_3) - { - Self::Starmath - } else if contains(&buffer, STARWRITER_CLSID_1) - || contains(&buffer, STARWRITER_CLSID_2) - || contains(&buffer, STARWRITER_CLSID_3) - { - Self::Starwriter - } else if contains(&buffer, THREE_DIMENSIONAL_STUDIO_MAX_CLSID) { - Self::ThreeDimensionalStudioMax - } else if contains(&buffer, WORDPERFECT_DOCUMENT_CLSID) { - Self::WordperfectDocument - } else if contains(&buffer, WORDPERFECT_GRAPHICS_CLSID) { - Self::WordperfectGraphics - } else if contains(&buffer, MICROSOFT_WORKS6_SPREADSHEET_FILENAME) { - Self::MicrosoftWorks6Spreadsheet - } else if contains(&buffer, MICROSOFT_WORKS_WORD_PROCESSOR_FILENAME) { - Self::MicrosoftWorksWordProcessor - } else { - Self::CompoundFileBinary + // Seeks to the root entry CLSID. + let offset = if major_version == 0x0003 { 512 } else { 4096 } + * (1 + first_directory_sector_location as u64) + + 80; + reader.seek(SeekFrom::Start(offset))?; + + // Reads and decodes the CLSID. + let mut buffer = [0; 16]; + reader.read_exact(&mut buffer)?; + let clsid = format!( + "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}", + buffer[3], buffer[2], buffer[1], buffer[0], + buffer[5], buffer[4], + buffer[7], buffer[6], + buffer[8], buffer[9], + buffer[10], buffer[11], buffer[12], buffer[13], buffer[14], buffer[15] + ); + + // Checks the CLSID and returns the corresponding variant. + Ok(match clsid.as_str() { + "e60f81e1-49b3-11d0-93c3-7e0706000000" => Self::AutodeskInventorAssembly, + "bbf9fdf1-52dc-11d0-8c04-0800090be8ec" => Self::AutodeskInventorDrawing, + "4d29b490-49b2-11d0-93c3-7e0706000000" => Self::AutodeskInventorPart, + "76283a80-50dd-11d3-a7e3-00c04f79d7bc" => Self::AutodeskInventorPresentation, + "00020810-0000-0000-c000-000000000046" => Self::MicrosoftExcelSpreadsheet, + "00020820-0000-0000-c000-000000000046" => Self::MicrosoftExcelSpreadsheet, + "00044851-0000-0000-c000-000000000046" => Self::MicrosoftPowerpointPresentation, + "64818d10-4f9b-11cf-86ea-00aa00b929e8" => Self::MicrosoftPowerpointPresentation, + "ea7bae70-fb3b-11cd-a903-00aa00510ea3" => Self::MicrosoftPowerpointPresentation, + "74b78f3a-c8c8-11d1-be11-00c04fb6faf1" => Self::MicrosoftProjectPlan, + "00021201-0000-0000-00c0-000000000046" => Self::MicrosoftPublisherDocument, + "000c1084-0000-0000-c000-000000000046" => Self::MicrosoftSoftwareInstaller, + "00021a13-0000-0000-c000-000000000046" => Self::MicrosoftVisioDrawing, + "00021a14-0000-0000-c000-000000000046" => Self::MicrosoftVisioDrawing, + "00020900-0000-0000-c000-000000000046" => Self::MicrosoftWordDocument, + "00020906-0000-0000-c000-000000000046" => Self::MicrosoftWordDocument, + "00021303-0000-0000-c000-000000000046" => Self::MicrosoftWorksDatabase, + "28cddbc3-0ae2-11ce-a29a-00aa004a1a72" => Self::MicrosoftWorksDatabase, + "00021302-0000-0000-c000-000000000046" => Self::MicrosoftWorksWordProcessor, + "0ea45ab2-9e0a-11d1-a407-00c04fb932ba" => Self::MicrosoftWorksWordProcessor, + "28cddbc2-0ae2-11ce-a29a-00aa004a1a72" => Self::MicrosoftWorksWordProcessor, + "83a33d36-27c5-11ce-bfd4-00400513bb57" => Self::SolidworksAssembly, + "83a33d34-27c5-11ce-bfd4-00400513bb57" => Self::SolidworksDrawing, + "83a33d30-27c5-11ce-bfd4-00400513bb57" => Self::SolidworksPart, + "3f543fa0-b6a6-101b-9961-04021c007002" => Self::Starcalc, + "6361d441-4235-11d0-89cb-008029e4b0b1" => Self::Starcalc, + "c6a5b861-85d6-11d1-89cb-008029e4b0b1" => Self::Starcalc, + "02b3b7e0-4225-11d0-89ca-008029e4b0b1" => Self::Starchart, + "bf884321-85dd-11d1-89d0-008029e4b0b1" => Self::Starchart, + "fb9c99e0-2c6d-101c-8e2c-00001b4cc711" => Self::Starchart, + "2e8905a0-85bd-11d1-89d0-008029e4b0b1" => Self::Stardraw, + "af10aae0-b36d-101b-9961-04021c007002" => Self::Stardraw, + "012d3cc0-4216-11d0-89cb-008029e4b0b1" => Self::Starimpress, + "565c7221-85bc-11d1-89d0-008029e4b0b1" => Self::Starimpress, + "02b3b7e1-4225-11d0-89ca-008029e4b0b1" => Self::Starmath, + "d4590460-35fd-101c-b12a-04021c007002" => Self::Starmath, + "ffb5e640-85de-11d1-89d0-008029e4b0b1" => Self::Starmath, + "8b04e9b0-420e-11d0-a45e-00a0249d57b1" => Self::Starwriter, + "c20cf9d1-85ae-11d1-aab4-006097da561a" => Self::Starwriter, + "dc5c7e40-b35c-101b-9961-04021c007002" => Self::Starwriter, + "1cdd8c7b-81c0-45a0-9fed-04143144cc1e" => Self::ThreeDimensionalStudioMax, + "519873ff-2dad-0220-1937-0000929679cd" => Self::WordperfectDocument, + "402efe60-1999-101b-99ae-04021c007002" => Self::WordperfectGraphics, + "00000000-0000-0000-0000-000000000000" => { + // Fills the buffer. + let mut buffer = [0; SEARCH_LIMIT]; + reader.read_exact(&mut buffer)?; + + // Searches for specific entry names in the buffer. + if contains(&buffer, MICROSOFT_WORKS6_SPREADSHEET_ENTRY_NAME) { + Self::MicrosoftWorks6Spreadsheet + } else if contains(&buffer, MICROSOFT_WORKS_WORD_PROCESSOR_ENTRY_NAME) { + Self::MicrosoftWorksWordProcessor + } else { + Self::CompoundFileBinary + } + } + _ => Self::CompoundFileBinary, }) } From d0aaa130c40cde665c36b2bd4c3f6d9b6b9380f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Sat, 4 Nov 2023 11:03:28 +0100 Subject: [PATCH 12/16] Simplifies CLSID decoding --- src/readers.rs | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/readers.rs b/src/readers.rs index c4207ae..7ef153c 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -121,14 +121,16 @@ impl crate::FileFormat { // Reads and decodes the CLSID. let mut buffer = [0; 16]; reader.read_exact(&mut buffer)?; - let clsid = format!( - "{:02x}{:02x}{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}-{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}", - buffer[3], buffer[2], buffer[1], buffer[0], - buffer[5], buffer[4], - buffer[7], buffer[6], - buffer[8], buffer[9], - buffer[10], buffer[11], buffer[12], buffer[13], buffer[14], buffer[15] - ); + let clsid = [3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15] + .iter() + .map(|&index| { + if index == 5 || index == 7 || index == 8 || index == 10 { + format!("-{:02x}", buffer[index]) + } else { + format!("{:02x}", buffer[index]) + } + }) + .collect::(); // Checks the CLSID and returns the corresponding variant. Ok(match clsid.as_str() { From fa8466a686fb19e49b633e3fbc826f1c7af75835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Sat, 4 Nov 2023 11:51:57 +0100 Subject: [PATCH 13/16] Updates the DVR_DESCRIPTOR --- src/readers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/readers.rs b/src/readers.rs index 7ef153c..2769cc4 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -61,7 +61,7 @@ impl crate::FileFormat { // Constants for descriptors. const DVR_DESCRIPTOR: &[u8] = - b"D\x00V\x00R\x00 \x00F\x00i\x00l\x00e\x00 \x00V\x00e\x00r\x00s\x00i\x00o\x00n"; + b"D\x00V\x00R\x00 \x00F\x00i\x00l\x00e\x00 \x00V\x00e\x00r\x00s\x00i\x00o\x00n\x00"; // Rewinds to the beginning of the stream. reader.rewind()?; From 93b2f75bd1709677b7e87a9daf575fadb231ef4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Sat, 4 Nov 2023 15:13:45 +0100 Subject: [PATCH 14/16] Standardizes the different readers --- src/readers.rs | 129 +++++++++++++++++++++++-------------------------- 1 file changed, 60 insertions(+), 69 deletions(-) diff --git a/src/readers.rs b/src/readers.rs index 2769cc4..a179d64 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -51,7 +51,10 @@ impl crate::FileFormat { #[cfg(feature = "reader-asf")] pub(crate) fn from_asf_reader(reader: &mut BufReader) -> Result { // Constants for limits. - const SEARCH_LIMIT: usize = 8192; + const BUFFER_SIZE_LIMIT: usize = 8192; + + // Constants for UTF-16-encoded descriptors. + const DVR_DESCRIPTOR: &[u8] = b"D\0V\0R\0 \0F\0i\0l\0e\0 \0V\0e\0r\0s\0i\0o\0n\0"; // Constants for GUIDs. const AUDIO_MEDIA_GUID: &[u8] = @@ -59,22 +62,17 @@ impl crate::FileFormat { const VIDEO_MEDIA_GUID: &[u8] = b"\xC0\xEF\x19\xBC\x4D\x5B\xCF\x11\xA8\xFD\x00\x80\x5F\x5C\x44\x2B"; - // Constants for descriptors. - const DVR_DESCRIPTOR: &[u8] = - b"D\x00V\x00R\x00 \x00F\x00i\x00l\x00e\x00 \x00V\x00e\x00r\x00s\x00i\x00o\x00n\x00"; - - // Rewinds to the beginning of the stream. - reader.rewind()?; - // Gets the stream length. let length = reader.seek(SeekFrom::End(0))?; + + // Rewinds to the beginning of the stream. reader.rewind()?; // Fills the buffer. - let mut buffer = vec![0; std::cmp::min(SEARCH_LIMIT, length as usize)]; + let mut buffer = vec![0; std::cmp::min(BUFFER_SIZE_LIMIT, length as usize)]; reader.read_exact(&mut buffer)?; - // Searches for specific GUIDs or descriptors in the buffer. + // Searches for specific descriptors and GUIDs in the buffer. Ok(if contains(&buffer, DVR_DESCRIPTOR) { return Ok(Self::MicrosoftDigitalVideoRecording); } else if contains(&buffer, VIDEO_MEDIA_GUID) { @@ -89,13 +87,10 @@ impl crate::FileFormat { /// Determines file format from a CFB reader. #[cfg(feature = "reader-cfb")] pub(crate) fn from_cfb_reader(reader: &mut BufReader) -> Result { - // Constants for limits. - const SEARCH_LIMIT: usize = 512; - // Constants for UTF-16-encoded entry names. const MICROSOFT_WORKS6_SPREADSHEET_ENTRY_NAME: &[u8] = - b"W\x00k\x00s\x00S\x00S\x00W\x00o\x00r\x00k\x00B\x00o\x00o\x00k\x00"; - const MICROSOFT_WORKS_WORD_PROCESSOR_ENTRY_NAME: &[u8] = b"M\x00a\x00t\x00O\x00S\x00T\x00"; + b"W\0k\0s\0S\0S\0W\0o\0r\0k\0B\0o\0o\0k\0"; + const MICROSOFT_WORKS_WORD_PROCESSOR_ENTRY_NAME: &[u8] = b"M\0a\0t\0O\0S\0T\0"; // Rewinds to the beginning of the stream. reader.rewind()?; @@ -132,7 +127,7 @@ impl crate::FileFormat { }) .collect::(); - // Checks the CLSID and returns the corresponding variant. + // Determines the file format based on the CLSID. Ok(match clsid.as_str() { "e60f81e1-49b3-11d0-93c3-7e0706000000" => Self::AutodeskInventorAssembly, "bbf9fdf1-52dc-11d0-8c04-0800090be8ec" => Self::AutodeskInventorDrawing, @@ -179,7 +174,7 @@ impl crate::FileFormat { "402efe60-1999-101b-99ae-04021c007002" => Self::WordperfectGraphics, "00000000-0000-0000-0000-000000000000" => { // Fills the buffer. - let mut buffer = [0; SEARCH_LIMIT]; + let mut buffer = [0; 512]; reader.read_exact(&mut buffer)?; // Searches for specific entry names in the buffer. @@ -199,14 +194,14 @@ impl crate::FileFormat { #[cfg(feature = "reader-ebml")] pub(crate) fn from_ebml_reader(reader: &mut BufReader) -> Result { // Constants for limits. - const ITERATION_LIMIT: usize = 512; + const ELEMENT_LIMIT: usize = 512; const STRING_LIMIT: usize = 64; - // Constants for EBML elements IDs. + // Constants for EBML element IDs. const DOC_TYPE_ID: u32 = 0x4282; const EBML_ID: u32 = 0x1A45DFA3; - // Constants for Matroska elements IDs. + // Constants for Matroska element IDs. const CLUSTER_ID: u32 = 0x1F43B675; const CODEC_ID: u32 = 0x86; const SEGMENT_ID: u32 = 0x18538067; @@ -216,6 +211,7 @@ impl crate::FileFormat { const VIDEO_ID: u32 = 0xE0; /// Helper function to read the ID of an EBML element. + #[inline] fn read_id(reader: &mut R) -> Result { // Reads the first byte. let mut first_byte = [0]; @@ -238,6 +234,7 @@ impl crate::FileFormat { } /// Helper function to read the size of an EBML element. + #[inline] fn read_size(reader: &mut R) -> Result { // Reads the first byte. let mut first_byte = [0]; @@ -262,13 +259,13 @@ impl crate::FileFormat { // Rewinds to the beginning of the stream. reader.rewind()?; - // Flags indicating the presence of audio, video and subtitle codecs. + // Flags indicating the presence of audio, video or subtitle codecs. let mut audio_codec = false; let mut video_codec = false; let mut subtitle_codec = false; // Iterates through the EBML elements in the reader. - let mut iteration_count = 0; + let mut element_count = 0; while let Ok(id) = read_id(reader) { // Reads the size of the element. let size = read_size(reader)?; @@ -332,11 +329,11 @@ impl crate::FileFormat { } } - // Increments the iteration count. - iteration_count += 1; + // Increments the element count. + element_count += 1; - // Checks if the iteration limit has been reached. - if iteration_count == ITERATION_LIMIT { + // Checks if the element limit has been reached. + if element_count == ELEMENT_LIMIT { break; } } @@ -362,23 +359,22 @@ impl crate::FileFormat { const NEW_EXECUTABLE_SIGNATURE: &[u8] = b"NE"; const PORTABLE_EXECUTABLE_SIGNATURE: &[u8] = b"PE\0\0"; - // Rewinds to the beginning of the stream. - reader.rewind()?; - // Gets the stream length. let length = reader.seek(SeekFrom::End(0))?; + + // Rewinds to the beginning of the stream. reader.rewind()?; // Reads the e_lfanew field. reader.seek(SeekFrom::Current(0x3C))?; - let mut e_lfanew = [0; 4]; - reader.read_exact(&mut e_lfanew)?; - let e_lfanew = u32::from_le_bytes(e_lfanew) as u64; + let mut buffer = [0; 4]; + reader.read_exact(&mut buffer)?; + let e_lfanew = u32::from_le_bytes(buffer); // Checks that the e_lfanew value is not outside the stream's boundaries. - if e_lfanew + 4 < length { + if e_lfanew as u64 + 4 < length { // Seeks to e_lfanew. - reader.seek(SeekFrom::Start(e_lfanew))?; + reader.seek(SeekFrom::Start(e_lfanew as u64))?; // Reads the signature. let mut signature = [0; 4]; @@ -386,10 +382,11 @@ impl crate::FileFormat { // Checks the signature. if signature == PORTABLE_EXECUTABLE_SIGNATURE { + // Checks the characteristics. reader.seek(SeekFrom::Current(0x12))?; - let mut characteristics = [0; 2]; - reader.read_exact(&mut characteristics)?; - return Ok(if u16::from_le_bytes(characteristics) & 0x2000 == 0x2000 { + let mut buffer = [0; 2]; + reader.read_exact(&mut buffer)?; + return Ok(if u16::from_le_bytes(buffer) & 0x2000 == 0x2000 { Self::DynamicLinkLibrary } else { Self::PortableExecutable @@ -409,31 +406,28 @@ impl crate::FileFormat { #[cfg(feature = "reader-mp4")] pub(crate) fn from_mp4_reader(reader: &mut BufReader) -> Result { // Constants for limits. - const ITERATION_LIMIT: usize = 512; + const BOX_LIMIT: usize = 512; // Rewinds to the beginning of the stream. reader.rewind()?; - // Flags indicating the presence of audio, video and subtitle tracks. + // Flags indicating the presence of audio, video or subtitle tracks. let mut audio_track = false; let mut video_track = false; let mut subtitle_track = false; // Iterates through boxes in the reader. - let mut iteration_count = 0; - let mut box_header = [0; 8]; - while reader.read_exact(&mut box_header).is_ok() { - let box_size = - u32::from_be_bytes([box_header[0], box_header[1], box_header[2], box_header[3]]); - match &box_header[4..8] { + let mut box_count = 0; + let mut buffer = [0; 8]; + while reader.read_exact(&mut buffer).is_ok() { + let box_size = u32::from_be_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]); + match &buffer[4..8] { b"moov" | b"trak" | b"mdia" => { // Does nothing for these boxes. } b"hdlr" => { - // Skips the first 8 bytes. - reader.seek(SeekFrom::Current(8))?; - // Reads the handler type. + reader.seek(SeekFrom::Current(8))?; let mut handler_type = [0; 4]; reader.read_exact(&mut handler_type)?; @@ -458,11 +452,11 @@ impl crate::FileFormat { } } - // Increments the iteration count. - iteration_count += 1; + // Increments the box count. + box_count += 1; - // Checks if the iteration limit has been reached. - if iteration_count == ITERATION_LIMIT { + // Checks if the box limit has been reached. + if box_count == BOX_LIMIT { break; } } @@ -483,20 +477,19 @@ impl crate::FileFormat { #[cfg(feature = "reader-pdf")] pub(crate) fn from_pdf_reader(reader: &mut BufReader) -> Result { // Constants for limits. - const SEARCH_LIMIT: usize = 4_194_304; - - // Rewinds to the beginning of the stream. - reader.rewind()?; + const BUFFER_SIZE_LIMIT: usize = 4_194_304; // Gets the stream length. let length = reader.seek(SeekFrom::End(0))?; + + // Rewinds to the beginning of the stream. reader.rewind()?; // Fills the buffer. - let mut buffer = vec![0; std::cmp::min(SEARCH_LIMIT, length as usize)]; + let mut buffer = vec![0; std::cmp::min(BUFFER_SIZE_LIMIT, length as usize)]; reader.read_exact(&mut buffer)?; - // Searches for the "AIPrivateData" sequence in the buffer. + // Searches for the "AIPrivateData" tag in the buffer. Ok(if contains(&buffer, b"AIPrivateData") { Self::AdobeIllustratorArtwork } else { @@ -508,17 +501,16 @@ impl crate::FileFormat { #[cfg(feature = "reader-rm")] pub(crate) fn from_rm_reader(reader: &mut BufReader) -> Result { // Constants for limits. - const SEARCH_LIMIT: usize = 4096; - - // Rewinds to the beginning of the stream. - reader.rewind()?; + const BUFFER_SIZE_LIMIT: usize = 4096; // Gets the stream length. let length = reader.seek(SeekFrom::End(0))?; + + // Rewinds to the beginning of the stream. reader.rewind()?; // Fills the buffer. - let mut buffer = vec![0; std::cmp::min(SEARCH_LIMIT, length as usize)]; + let mut buffer = vec![0; std::cmp::min(BUFFER_SIZE_LIMIT, length as usize)]; reader.read_exact(&mut buffer)?; // Searches for specific media types in the buffer. @@ -537,8 +529,8 @@ impl crate::FileFormat { #[cfg(feature = "reader-txt")] pub(crate) fn from_txt_reader(reader: &mut BufReader) -> Result { // Constants for limits. - const LINE_LIMIT: usize = 256; - const READ_LIMIT: u64 = 8_388_608; + const LINE_LIMIT: usize = 128; + const READ_LIMIT: u64 = 4_194_304; // Rewinds to the beginning of the stream. reader.rewind()?; @@ -637,17 +629,16 @@ impl crate::FileFormat { #[cfg(feature = "reader-zip")] pub(crate) fn from_zip_reader(reader: &mut BufReader) -> Result { // Constants for limits. - const FILE_LIMIT: usize = 4096; + const FILE_LIMIT: usize = 2048; // Constants for signatures. const CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE: &[u8] = b"\x50\x4B\x01\x02"; const END_OF_CENTRAL_DIRECTORY_SIGNATURE: &[u8] = b"\x50\x4B\x05\x06"; - // Rewinds to the beginning of the stream. - reader.rewind()?; - // Gets the stream length. let length = reader.seek(SeekFrom::End(0))?; + + // Rewinds to the beginning of the stream. reader.rewind()?; // Searches for the end of central directory. From 6311a58030f43951b7403c44144d8519d89508a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Sat, 4 Nov 2023 15:44:50 +0100 Subject: [PATCH 15/16] Adds release date for version 0.22.0 --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93fdbf3..7ec625b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# Version 0.22.0 (unreleased) +# Version 0.22.0 (2023-11-04) ## Internal changes From f5e2e2d29cbd2d0ebfc8ac60a994a5eec899e735 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Mal=C3=A9cot?= Date: Sat, 4 Nov 2023 15:55:09 +0100 Subject: [PATCH 16/16] Allows clippy::needless_pass_by_ref_mut --- src/readers.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/readers.rs b/src/readers.rs index a179d64..6833227 100644 --- a/src/readers.rs +++ b/src/readers.rs @@ -5,6 +5,7 @@ use std::io::*; impl crate::FileFormat { /// Determines file format from the specified format reader. #[allow(unused_variables)] + #[allow(clippy::needless_pass_by_ref_mut)] #[inline] pub(crate) fn from_format_reader( format: Self, @@ -35,6 +36,7 @@ impl crate::FileFormat { /// Determines file format from a generic reader. #[allow(unused_variables)] + #[allow(clippy::needless_pass_by_ref_mut)] #[inline] pub(crate) fn from_generic_reader(reader: &mut BufReader) -> Self { #[cfg(feature = "reader-txt")]