Skip to content

Commit 6cdc448

Browse files
authored
Respect general-purpose bit flags when decoding ZipArchiveEntry names and comments (#103271)
If bit 11 in the general purpose bit flags is set, forces the use of UTF-8 instead of the encoding specified in the ZipArchive constructor.
1 parent 0ea5ea7 commit 6cdc448

File tree

6 files changed

+120
-61
lines changed

6 files changed

+120
-61
lines changed

src/libraries/Common/tests/System/IO/Compression/ZipTestHelper.cs

+13
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
using System.Collections.Generic;
55
using System.Diagnostics;
66
using System.Linq;
7+
using System.Text;
78
using System.Threading.Tasks;
89
using Xunit;
910

@@ -499,5 +500,17 @@ public static IEnumerable<object[]> Latin1Comment_Data()
499500
yield return e;
500501
}
501502
}
503+
504+
// Returns pairs encoded with Latin1, but decoded with UTF8.
505+
// Returns: originalComment, expectedComment, transcoded expectedComment
506+
public static IEnumerable<object[]> MismatchingEncodingComment_Data()
507+
{
508+
foreach (object[] e in Latin1Comment_Data())
509+
{
510+
byte[] expectedBytes = Encoding.Latin1.GetBytes(e[1] as string);
511+
512+
yield return new object[] { e[0], e[1], Encoding.UTF8.GetString(expectedBytes) };
513+
}
514+
}
502515
}
503516
}

src/libraries/System.IO.Compression.ZipFile/src/System/IO/Compression/ZipFile.Create.cs

+19-19
Original file line numberDiff line numberDiff line change
@@ -102,39 +102,39 @@ public static partial class ZipFile
102102
/// If the file exists and is not a Zip file, a <code>ZipArchiveException</code> will be thrown.
103103
/// If the file exists and is empty or does not exist, a new Zip file will be created.
104104
/// Note that creating a Zip file with the <code>ZipArchiveMode.Create</code> mode is more efficient when creating a new Zip file.</param>
105-
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names in this ZipArchive.
105+
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names and comments in this ZipArchive.
106106
/// /// <para>NOTE: Specifying this parameter to values other than <c>null</c> is discouraged.
107107
/// However, this may be necessary for interoperability with ZIP archive tools and libraries that do not correctly support
108-
/// UTF-8 encoding for entry names.<br />
108+
/// UTF-8 encoding for entry names or comments.<br />
109109
/// This value is used as follows:</para>
110110
/// <para><strong>Reading (opening) ZIP archive files:</strong></para>
111111
/// <para>If <c>entryNameEncoding</c> is not specified (<c>== null</c>):</para>
112112
/// <list>
113113
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header is <em>not</em> set,
114-
/// use the current system default code page (<c>Encoding.Default</c>) in order to decode the entry name.</item>
114+
/// use the current system default code page (<c>Encoding.Default</c>) in order to decode the entry name and comment.</item>
115115
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header <em>is</em> set,
116-
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name.</item>
116+
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name and comment.</item>
117117
/// </list>
118118
/// <para>If <c>entryNameEncoding</c> is specified (<c>!= null</c>):</para>
119119
/// <list>
120120
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header is <em>not</em> set,
121-
/// use the specified <c>entryNameEncoding</c> in order to decode the entry name.</item>
121+
/// use the specified <c>entryNameEncoding</c> in order to decode the entry name and comment.</item>
122122
/// <item>For entries where the language encoding flag (EFS) in the general purpose bit flag of the local file header <em>is</em> set,
123-
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name.</item>
123+
/// use UTF-8 (<c>Encoding.UTF8</c>) in order to decode the entry name and comment.</item>
124124
/// </list>
125125
/// <para><strong>Writing (saving) ZIP archive files:</strong></para>
126126
/// <para>If <c>entryNameEncoding</c> is not specified (<c>== null</c>):</para>
127127
/// <list>
128-
/// <item>For entry names that contain characters outside the ASCII range,
128+
/// <item>For entry names or comments that contain characters outside the ASCII range,
129129
/// the language encoding flag (EFS) will be set in the general purpose bit flag of the local file header,
130-
/// and UTF-8 (<c>Encoding.UTF8</c>) will be used in order to encode the entry name into bytes.</item>
131-
/// <item>For entry names that do not contain characters outside the ASCII range,
130+
/// and UTF-8 (<c>Encoding.UTF8</c>) will be used in order to encode the entry name and comment into bytes.</item>
131+
/// <item>For entry names or comments that do not contain characters outside the ASCII range,
132132
/// the language encoding flag (EFS) will not be set in the general purpose bit flag of the local file header,
133-
/// and the current system default code page (<c>Encoding.Default</c>) will be used to encode the entry names into bytes.</item>
133+
/// and the current system default code page (<c>Encoding.Default</c>) will be used to encode the entry names and comments into bytes.</item>
134134
/// </list>
135135
/// <para>If <c>entryNameEncoding</c> is specified (<c>!= null</c>):</para>
136136
/// <list>
137-
/// <item>The specified <c>entryNameEncoding</c> will always be used to encode the entry names into bytes.
137+
/// <item>The specified <c>entryNameEncoding</c> will always be used to encode the entry names and comments into bytes.
138138
/// The language encoding flag (EFS) in the general purpose bit flag of the local file header will be set if and only
139139
/// if the specified <c>entryNameEncoding</c> is a UTF-8 encoding.</item>
140140
/// </list>
@@ -322,23 +322,23 @@ public static void CreateFromDirectory(string sourceDirectoryName, string destin
322322
/// <param name="includeBaseDirectory"><code>true</code> to indicate that a directory named <code>sourceDirectoryName</code> should
323323
/// be included at the root of the archive. <code>false</code> to indicate that the files and directories in <code>sourceDirectoryName</code>
324324
/// should be included directly in the archive.</param>
325-
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names in this ZipArchive.
325+
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names and comments in this ZipArchive.
326326
/// /// <para>NOTE: Specifying this parameter to values other than <c>null</c> is discouraged.
327327
/// However, this may be necessary for interoperability with ZIP archive tools and libraries that do not correctly support
328-
/// UTF-8 encoding for entry names.<br />
328+
/// UTF-8 encoding for entry names or comments.<br />
329329
/// This value is used as follows while creating the archive:</para>
330330
/// <para>If <c>entryNameEncoding</c> is not specified (<c>== null</c>):</para>
331331
/// <list>
332-
/// <item>For file names that contain characters outside the ASCII range:<br />
332+
/// <item>For file names or comments that contain characters outside the ASCII range:<br />
333333
/// The language encoding flag (EFS) will be set in the general purpose bit flag of the local file header of the corresponding entry,
334-
/// and UTF-8 (<c>Encoding.UTF8</c>) will be used in order to encode the entry name into bytes.</item>
335-
/// <item>For file names that do not contain characters outside the ASCII range:<br />
334+
/// and UTF-8 (<c>Encoding.UTF8</c>) will be used in order to encode the entry name and comment into bytes.</item>
335+
/// <item>For file names or comments that do not contain characters outside the ASCII range:<br />
336336
/// the language encoding flag (EFS) will not be set in the general purpose bit flag of the local file header of the corresponding entry,
337-
/// and the current system default code page (<c>Encoding.Default</c>) will be used to encode the entry names into bytes.</item>
337+
/// and the current system default code page (<c>Encoding.Default</c>) will be used to encode the entry names and comments into bytes.</item>
338338
/// </list>
339339
/// <para>If <c>entryNameEncoding</c> is specified (<c>!= null</c>):</para>
340340
/// <list>
341-
/// <item>The specified <c>entryNameEncoding</c> will always be used to encode the entry names into bytes.
341+
/// <item>The specified <c>entryNameEncoding</c> will always be used to encode the entry names and comments into bytes.
342342
/// The language encoding flag (EFS) in the general purpose bit flag of the local file header for each entry will be set if and only
343343
/// if the specified <c>entryNameEncoding</c> is a UTF-8 encoding.</item>
344344
/// </list>
@@ -408,7 +408,7 @@ public static void CreateFromDirectory(string sourceDirectoryName, Stream destin
408408
/// <param name="destination">The stream where the zip archive is to be stored.</param>
409409
/// <param name="compressionLevel">One of the enumeration values that indicates whether to emphasize speed or compression effectiveness when creating the entry.</param>
410410
/// <param name="includeBaseDirectory"><see langword="true" /> to include the directory name from <paramref name="sourceDirectoryName" /> at the root of the archive; <see langword="false" /> to include only the contents of the directory.</param>
411-
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names in this archive. Specify a value for this parameter only when an encoding is required for interoperability with zip archive tools and libraries that do not support UTF-8 encoding for entry names.</param>
411+
/// <param name="entryNameEncoding">The encoding to use when reading or writing entry names in this archive. Specify a value for this parameter only when an encoding is required for interoperability with zip archive tools and libraries that do not support UTF-8 encoding for entry names or comments.</param>
412412
/// <remarks>
413413
/// The directory structure from the file system is preserved in the archive. If the directory is empty, an empty archive is created.
414414
/// Use this method overload to specify the compression level and character encoding, and whether to include the base directory in the archive.

0 commit comments

Comments
 (0)