-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#15 Invalid unicode codepoint warning
Use "utf-8-strict" rather than "utf8" when decoding filenames when efs is in play. That traps invalid code points immediately.
- Loading branch information
Showing
8 changed files
with
168 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# | ||
# check that a UTF filename with an invalid codepoint is detected | ||
# this will be trapped when using "utf-8-strict" when decoding in the code | ||
# but not when "utf8" is used. | ||
|
||
perl -MIO::Compress::Zip=:all -e 'zip \"abcd" => "test.zip", Minimal => 1, Stream => 0, efs =>1, Name => "\xFA\x80\xA0\x89\xB6" ' | ||
|
||
# codepoint 0x2020276 in UTF8 is | ||
"\xFA\x80\xA0\x89\xB6" ' | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
|
||
0000 LOCAL HEADER #1 04034B50 (67324752) | ||
0004 Extract Zip Spec 14 (20) '2.0' | ||
0005 Extract OS 00 (0) 'MS-DOS' | ||
0006 General Purpose Flag 0800 (2048) | ||
[Bits 1-2] 0 'Normal Compression' | ||
[Bit 11] 1 'Language Encoding' | ||
0008 Compression Method 0008 (8) 'Deflated' | ||
000A Last Mod Date/Time 5879ADE9 (1484369385) 'Mon Mar 25 21:47:18 2024' | ||
000E CRC ED82CD11 (3984772369) | ||
0012 Compressed Size 00000006 (6) | ||
0016 Uncompressed Size 00000004 (4) | ||
001A Filename Length 0005 (5) | ||
001C Extra Length 0000 (0) | ||
# | ||
# WARNING: Offset 0x1E: Could not decode 'utf8' Filename: UTF-8 "\xFA\x80\xA0\x89\xB6" does not map to Unicode | ||
# | ||
001E Filename 'ú ¶' | ||
0023 PAYLOAD KLJN.. | ||
|
||
0029 CENTRAL HEADER #1 02014B50 (33639248) | ||
002D Created Zip Spec 14 (20) '2.0' | ||
002E Created OS 03 (3) 'Unix' | ||
002F Extract Zip Spec 14 (20) '2.0' | ||
0030 Extract OS 00 (0) 'MS-DOS' | ||
0031 General Purpose Flag 0800 (2048) | ||
[Bits 1-2] 0 'Normal Compression' | ||
[Bit 11] 1 'Language Encoding' | ||
0033 Compression Method 0008 (8) 'Deflated' | ||
0035 Last Mod Date/Time 5879ADE9 (1484369385) 'Mon Mar 25 21:47:18 2024' | ||
0039 CRC ED82CD11 (3984772369) | ||
003D Compressed Size 00000006 (6) | ||
0041 Uncompressed Size 00000004 (4) | ||
0045 Filename Length 0005 (5) | ||
0047 Extra Length 0000 (0) | ||
0049 Comment Length 0000 (0) | ||
004B Disk Start 0000 (0) | ||
004D Int File Attributes 0000 (0) | ||
[Bit 0] 0 'Binary Data' | ||
004F Ext File Attributes 81A40000 (2175008768) | ||
[Bits 16-24] 01A4 (420) 'Unix attrib: rw-r--r--' | ||
[Bits 28-31] 08 (8) 'Regular File' | ||
0053 Local Header Offset 00000000 (0) | ||
# | ||
# WARNING: Offset 0x57: Could not decode 'utf8' Filename: UTF-8 "\xFA\x80\xA0\x89\xB6" does not map to Unicode | ||
# | ||
0057 Filename 'ú ¶' | ||
|
||
005C END CENTRAL HEADER 06054B50 (101010256) | ||
0060 Number of this disk 0000 (0) | ||
0062 Central Dir Disk no 0000 (0) | ||
0064 Entries in this disk 0001 (1) | ||
0066 Total Entries 0001 (1) | ||
0068 Size of Central Dir 00000033 (51) | ||
006C Offset to Central Dir 00000029 (41) | ||
0070 Comment Length 0000 (0) | ||
# | ||
# Warning Count: 2 | ||
# | ||
# Done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
|
||
0000 0003 0004 50 4B 03 04 LOCAL HEADER #1 04034B50 (67324752) | ||
0004 0004 0001 14 Extract Zip Spec 14 (20) '2.0' | ||
0005 0005 0001 00 Extract OS 00 (0) 'MS-DOS' | ||
0006 0007 0002 00 08 General Purpose Flag 0800 (2048) | ||
[Bits 1-2] 0 'Normal Compression' | ||
[Bit 11] 1 'Language Encoding' | ||
0008 0009 0002 08 00 Compression Method 0008 (8) 'Deflated' | ||
000A 000D 0004 E9 AD 79 58 Last Mod Date/Time 5879ADE9 (1484369385) 'Mon Mar 25 21:47:18 2024' | ||
000E 0011 0004 11 CD 82 ED CRC ED82CD11 (3984772369) | ||
0012 0015 0004 06 00 00 00 Compressed Size 00000006 (6) | ||
0016 0019 0004 04 00 00 00 Uncompressed Size 00000004 (4) | ||
001A 001B 0002 05 00 Filename Length 0005 (5) | ||
001C 001D 0002 00 00 Extra Length 0000 (0) | ||
# | ||
# WARNING: Offset 0x1E: Could not decode 'utf8' Filename: UTF-8 "\xFA\x80\xA0\x89\xB6" does not map to Unicode | ||
# | ||
001E 0022 0005 FA 80 A0 89 Filename 'ú ¶' | ||
B6 | ||
0023 0028 0006 4B 4C 4A 4E PAYLOAD KLJN.. | ||
01 00 | ||
|
||
0029 002C 0004 50 4B 01 02 CENTRAL HEADER #1 02014B50 (33639248) | ||
002D 002D 0001 14 Created Zip Spec 14 (20) '2.0' | ||
002E 002E 0001 03 Created OS 03 (3) 'Unix' | ||
002F 002F 0001 14 Extract Zip Spec 14 (20) '2.0' | ||
0030 0030 0001 00 Extract OS 00 (0) 'MS-DOS' | ||
0031 0032 0002 00 08 General Purpose Flag 0800 (2048) | ||
[Bits 1-2] 0 'Normal Compression' | ||
[Bit 11] 1 'Language Encoding' | ||
0033 0034 0002 08 00 Compression Method 0008 (8) 'Deflated' | ||
0035 0038 0004 E9 AD 79 58 Last Mod Date/Time 5879ADE9 (1484369385) 'Mon Mar 25 21:47:18 2024' | ||
0039 003C 0004 11 CD 82 ED CRC ED82CD11 (3984772369) | ||
003D 0040 0004 06 00 00 00 Compressed Size 00000006 (6) | ||
0041 0044 0004 04 00 00 00 Uncompressed Size 00000004 (4) | ||
0045 0046 0002 05 00 Filename Length 0005 (5) | ||
0047 0048 0002 00 00 Extra Length 0000 (0) | ||
0049 004A 0002 00 00 Comment Length 0000 (0) | ||
004B 004C 0002 00 00 Disk Start 0000 (0) | ||
004D 004E 0002 00 00 Int File Attributes 0000 (0) | ||
[Bit 0] 0 'Binary Data' | ||
004F 0052 0004 00 00 A4 81 Ext File Attributes 81A40000 (2175008768) | ||
[Bits 16-24] 01A4 (420) 'Unix attrib: rw-r--r--' | ||
[Bits 28-31] 08 (8) 'Regular File' | ||
0053 0056 0004 00 00 00 00 Local Header Offset 00000000 (0) | ||
# | ||
# WARNING: Offset 0x57: Could not decode 'utf8' Filename: UTF-8 "\xFA\x80\xA0\x89\xB6" does not map to Unicode | ||
# | ||
0057 005B 0005 FA 80 A0 89 Filename 'ú ¶' | ||
B6 | ||
|
||
005C 005F 0004 50 4B 05 06 END CENTRAL HEADER 06054B50 (101010256) | ||
0060 0061 0002 00 00 Number of this disk 0000 (0) | ||
0062 0063 0002 00 00 Central Dir Disk no 0000 (0) | ||
0064 0065 0002 01 00 Entries in this disk 0001 (1) | ||
0066 0067 0002 01 00 Total Entries 0001 (1) | ||
0068 006B 0004 33 00 00 00 Size of Central Dir 00000033 (51) | ||
006C 006F 0004 29 00 00 00 Offset to Central Dir 00000029 (41) | ||
0070 0071 0002 00 00 Comment Length 0000 (0) | ||
# | ||
# Warning Count: 2 | ||
# | ||
# Done |
Binary file not shown.