Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.
/ corefx Public archive

Commit 4c3d4a0

Browse files
[master] Update dependencies from dotnet/coreclr (#36816)
- Microsoft.NET.Sdk.IL - 3.0.0-preview5-27612-73 - Microsoft.NETCore.ILAsm - 3.0.0-preview5-27612-73 - Microsoft.NETCore.Runtime.CoreCLR - 3.0.0-preview5-27612-73 - Also react to changes in the UTF-8 transcoding logic (#36712)
1 parent c3981ca commit 4c3d4a0

14 files changed

+1086
-109
lines changed

eng/Version.Details.xml

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
<Dependencies>
22
<ProductDependencies>
3-
<Dependency Name="Microsoft.NETCore.Runtime.CoreCLR" Version="3.0.0-preview5-27610-72">
3+
<Dependency Name="Microsoft.NETCore.Runtime.CoreCLR" Version="3.0.0-preview5-27612-73">
44
<Uri>https://github.com/dotnet/coreclr</Uri>
5-
<Sha>45e04dd1bb1c7171d88a24454cb2c2811f46ce55</Sha>
5+
<Sha>d5865236e7898b730de28a7a6f034e975bb7282e</Sha>
66
</Dependency>
7-
<Dependency Name="Microsoft.NETCore.ILAsm" Version="3.0.0-preview5-27610-72">
7+
<Dependency Name="Microsoft.NETCore.ILAsm" Version="3.0.0-preview5-27612-73">
88
<Uri>https://github.com/dotnet/coreclr</Uri>
9-
<Sha>45e04dd1bb1c7171d88a24454cb2c2811f46ce55</Sha>
9+
<Sha>d5865236e7898b730de28a7a6f034e975bb7282e</Sha>
1010
</Dependency>
11-
<Dependency Name="Microsoft.NET.Sdk.IL" Version="3.0.0-preview5-27610-72">
11+
<Dependency Name="Microsoft.NET.Sdk.IL" Version="3.0.0-preview5-27612-73">
1212
<Uri>https://github.com/dotnet/coreclr</Uri>
13-
<Sha>45e04dd1bb1c7171d88a24454cb2c2811f46ce55</Sha>
13+
<Sha>d5865236e7898b730de28a7a6f034e975bb7282e</Sha>
1414
</Dependency>
1515
</ProductDependencies>
1616
<ToolsetDependencies>

eng/Versions.props

+2-2
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@
4141
<MicrosoftNETCoreDotNetHostPackageVersion>3.0.0-preview5-27610-11</MicrosoftNETCoreDotNetHostPackageVersion>
4242
<MicrosoftNETCoreDotNetHostPolicyPackageVersion>3.0.0-preview5-27610-11</MicrosoftNETCoreDotNetHostPolicyPackageVersion>
4343
<!-- Coreclr dependencies -->
44-
<MicrosoftNETCoreILAsmPackageVersion>3.0.0-preview5-27610-72</MicrosoftNETCoreILAsmPackageVersion>
45-
<MicrosoftNETCoreRuntimeCoreCLRPackageVersion>3.0.0-preview5-27610-72</MicrosoftNETCoreRuntimeCoreCLRPackageVersion>
44+
<MicrosoftNETCoreILAsmPackageVersion>3.0.0-preview5-27612-73</MicrosoftNETCoreILAsmPackageVersion>
45+
<MicrosoftNETCoreRuntimeCoreCLRPackageVersion>3.0.0-preview5-27612-73</MicrosoftNETCoreRuntimeCoreCLRPackageVersion>
4646
<!-- Corefx dependencies -->
4747
<MicrosoftNETCorePlatformsPackageVersion>3.0.0-preview5.19211.2</MicrosoftNETCorePlatformsPackageVersion>
4848
<!-- Standard dependencies -->

global.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@
55
"msbuild-sdks": {
66
"Microsoft.DotNet.Arcade.Sdk": "1.0.0-beta.19212.2",
77
"Microsoft.DotNet.Helix.Sdk": "2.0.0-beta.19212.2",
8-
"Microsoft.NET.Sdk.IL": "3.0.0-preview5-27610-72"
8+
"Microsoft.NET.Sdk.IL": "3.0.0-preview5-27612-73"
99
}
1010
}

src/System.Reflection.Metadata/tests/Metadata/Ecma335/MetadataBuilderTests.cs

+10-1
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,16 @@ public void GetOrAddDocumentName2()
492492
Assert.Equal(@"a/", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n6))));
493493
Assert.Equal(@"/", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n7))));
494494
Assert.Equal(@"\\", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n8))));
495-
Assert.Equal("\uFFFd\uFFFd", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n9))));
495+
if (PlatformDetection.IsNetCore)
496+
{
497+
Assert.Equal("\uFFFD\uFFFD\uFFFD", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n9))));
498+
}
499+
else
500+
{
501+
// Versions of .NET prior to Core 3.0 didn't follow Unicode recommendations for U+FFFD substitution,
502+
// so they sometimes emitted too few replacement chars.
503+
Assert.Equal("\uFFFD\uFFFD", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n9))));
504+
}
496505
Assert.Equal("\0", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n10))));
497506
}
498507
}

src/System.Reflection.Metadata/tests/Metadata/Ecma335/MetadataRootBuilderTests.cs

+14-1
Original file line numberDiff line numberDiff line change
@@ -377,11 +377,24 @@ public void MetadataVersion()
377377
0x08, 0x00, 0x00, 0x00,
378378

379379
// padded version:
380+
// [ E1 88 B4 ] -> U+1234
381+
// [ ED ] -> invalid (ED cannot be followed by A0) -> U+FFFD
382+
// [ A0 ] -> invalid (not ASCII, not valid leading byte) -> U+FFFD
383+
// [ 80 ] -> invalid (not ASCII, not valid leading byte) -> U+FFFD
380384
0xE1, 0x88, 0xB4, 0xED, 0xA0, 0x80, 0x00, 0x00,
381385
}, builder.Slice(12, -132));
382386

383387
// the default decoder replaces bad byte sequences by U+FFFD
384-
Assert.Equal("\u1234\ufffd\ufffd", ReadVersion(builder));
388+
if (PlatformDetection.IsNetCore)
389+
{
390+
Assert.Equal("\u1234\ufffd\ufffd\ufffd", ReadVersion(builder));
391+
}
392+
else
393+
{
394+
// Versions of .NET prior to Core 3.0 didn't follow Unicode recommendations for U+FFFD substitution,
395+
// so they sometimes emitted too few replacement chars.
396+
Assert.Equal("\u1234\ufffd\ufffd", ReadVersion(builder));
397+
}
385398
}
386399
}
387400
}

src/System.Runtime/tests/System.Runtime.Tests.csproj

+3-1
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,11 @@
287287
<Compile Include="System\Text\RuneTests.netcoreapp.cs" />
288288
<Compile Include="System\Text\RuneTests.TestData.netcoreapp.cs" />
289289
<Compile Include="System\Text\StringBuilderTests.netcoreapp.cs" />
290+
<Compile Include="System\Text\Unicode\Utf16UtilityTests.ValidateChars.netcoreapp.cs" />
290291
<Compile Include="System\Text\Unicode\Utf8Tests.netcoreapp.cs" />
291292
<Compile Include="System\Text\Unicode\Utf8Tests.ToBytes.netcoreapp.cs" />
292293
<Compile Include="System\Text\Unicode\Utf8Tests.ToChars.netcoreapp.cs" />
294+
<Compile Include="System\Text\Unicode\Utf8UtilityTests.ValidateBytes.netcoreapp.cs" />
293295
<Compile Include="System\Type\TypePropertyTests.netcoreapp.cs" />
294296
<Compile Include="System\Type\TypeTests.netcoreapp.cs" />
295297
<Compile Include="System\ArgIteratorTests.netcoreapp.cs" />
@@ -338,4 +340,4 @@
338340
<ItemGroup>
339341
<Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
340342
</ItemGroup>
341-
</Project>
343+
</Project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System.Buffers;
6+
using System.Globalization;
7+
using System.Linq;
8+
using System.Reflection;
9+
using System.Runtime.InteropServices;
10+
using Xunit;
11+
12+
namespace System.Text.Unicode.Tests
13+
{
14+
public partial class Utf16UtilityTests
15+
{
16+
private unsafe delegate char* GetPointerToFirstInvalidCharDel(char* pInputBuffer, int inputLength, out long utf8CodeUnitCountAdjustment, out int scalarCountAdjustment);
17+
private static readonly Lazy<GetPointerToFirstInvalidCharDel> _getPointerToFirstInvalidCharFn = CreateGetPointerToFirstInvalidCharFn();
18+
19+
[Theory]
20+
[InlineData("", 0, 0)] // empty string is OK
21+
[InlineData("X", 1, 1)]
22+
[InlineData("XY", 2, 2)]
23+
[InlineData("XYZ", 3, 3)]
24+
[InlineData("<EACU>", 1, 2)]
25+
[InlineData("X<EACU>", 2, 3)]
26+
[InlineData("<EACU>X", 2, 3)]
27+
[InlineData("<EURO>", 1, 3)]
28+
[InlineData("<GRIN>", 1, 4)]
29+
[InlineData("X<GRIN>Z", 3, 6)]
30+
[InlineData("X<0000>Z", 3, 3)] // null chars are allowed
31+
public void GetIndexOfFirstInvalidUtf16Sequence_WithSmallValidBuffers(string unprocessedInput, int expectedRuneCount, int expectedUtf8ByteCount)
32+
{
33+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(unprocessedInput, -1 /* expectedIdxOfFirstInvalidChar */, expectedRuneCount, expectedUtf8ByteCount);
34+
}
35+
36+
[Theory]
37+
[InlineData("<DC00>", 0, 0, 0)] // standalone low surrogate (at beginning of sequence)
38+
[InlineData("X<DC00>", 1, 1, 1)] // standalone low surrogate (preceded by valid ASCII data)
39+
[InlineData("<EURO><DC00>", 1, 1, 3)] // standalone low surrogate (preceded by valid non-ASCII data)
40+
[InlineData("<D800>", 0, 0, 0)] // standalone high surrogate (missing follow-up low surrogate)
41+
[InlineData("<D800>Y", 0, 0, 0)] // standalone high surrogate (followed by ASCII char)
42+
[InlineData("<D800><D800>", 0, 0, 0)] // standalone high surrogate (followed by high surrogate)
43+
[InlineData("<D800><EURO>", 0, 0, 0)] // standalone high surrogate (followed by valid non-ASCII char)
44+
[InlineData("<DC00><DC00>", 0, 0, 0)] // standalone low surrogate (not preceded by a high surrogate)
45+
[InlineData("<DC00><D800>", 0, 0, 0)] // standalone low surrogate (not preceded by a high surrogate)
46+
[InlineData("<GRIN><DC00><DC00>", 2, 1, 4)] // standalone low surrogate (preceded by a valid surrogate pair)
47+
[InlineData("<GRIN><DC00><D800>", 2, 1, 4)] // standalone low surrogate (preceded by a valid surrogate pair)
48+
[InlineData("<GRIN><0000><DC00><D800>", 3, 2, 5)] // standalone low surrogate (preceded by a valid null char)
49+
public void GetIndexOfFirstInvalidUtf16Sequence_WithSmallInvalidBuffers(string unprocessedInput, int idxOfFirstInvalidChar, int expectedRuneCount, int expectedUtf8ByteCount)
50+
{
51+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(unprocessedInput, idxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount);
52+
}
53+
54+
[Fact]
55+
public void GetIndexOfFirstInvalidUtf16Sequence_WithInvalidSurrogateSequences()
56+
{
57+
// All ASCII
58+
59+
char[] chars = Enumerable.Repeat('x', 128).ToArray();
60+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 128, expectedUtf8ByteCount: 128);
61+
62+
// Throw a surrogate pair at the beginning
63+
64+
chars[0] = '\uD800';
65+
chars[1] = '\uDFFF';
66+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 127, expectedUtf8ByteCount: 130);
67+
68+
// Throw a surrogate pair near the end
69+
70+
chars[124] = '\uD800';
71+
chars[125] = '\uDFFF';
72+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 126, expectedUtf8ByteCount: 132);
73+
74+
// Throw a standalone surrogate code point at the *very* end
75+
76+
chars[127] = '\uD800'; // high surrogate
77+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, 127, expectedRuneCount: 125, expectedUtf8ByteCount: 131);
78+
79+
chars[127] = '\uDFFF'; // low surrogate
80+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, 127, expectedRuneCount: 125, expectedUtf8ByteCount: 131);
81+
82+
// Make the final surrogate pair valid
83+
84+
chars[126] = '\uD800'; // high surrogate
85+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 125, expectedUtf8ByteCount: 134);
86+
87+
// Throw an invalid surrogate sequence in the middle (straddles a vector boundary)
88+
89+
chars[12] = '\u0080'; // 2-byte UTF-8 sequence
90+
chars[13] = '\uD800'; // high surrogate
91+
chars[14] = '\uD800'; // high surrogate
92+
chars[15] = '\uDFFF'; // low surrogate
93+
chars[16] = '\uDFFF'; // low surrogate
94+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, 13, expectedRuneCount: 12, expectedUtf8ByteCount: 16);
95+
96+
// Correct the surrogate sequence we just added
97+
98+
chars[14] = '\uDC00'; // low surrogate
99+
chars[15] = '\uDBFF'; // high surrogate
100+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 123, expectedUtf8ByteCount: 139);
101+
102+
// Corrupt the surrogate pair that's split across a vector boundary
103+
104+
chars[16] = 'x'; // ASCII char (remember.. chars[15] is a high surrogate char)
105+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, 15, expectedRuneCount: 13, expectedUtf8ByteCount: 20);
106+
}
107+
108+
private static void GetIndexOfFirstInvalidUtf16Sequence_Test_Core(string unprocessedInput, int expectedIdxOfFirstInvalidChar, int expectedRuneCount, long expectedUtf8ByteCount)
109+
{
110+
char[] processedInput = ProcessInput(unprocessedInput).ToCharArray();
111+
112+
// Run the test normally
113+
114+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount);
115+
116+
// Put a bunch of ASCII data at the beginning (to test the call to ASCIIUtility at method entry)
117+
118+
processedInput = Enumerable.Repeat('x', 128).Concat(processedInput).ToArray();
119+
120+
if (expectedIdxOfFirstInvalidChar >= 0)
121+
{
122+
expectedIdxOfFirstInvalidChar += 128;
123+
}
124+
expectedRuneCount += 128;
125+
expectedUtf8ByteCount += 128;
126+
127+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount);
128+
129+
// Change the first few chars to a mixture of 2-byte and 3-byte UTF-8 sequences
130+
// This makes sure the vectorized code paths can properly handle these.
131+
132+
processedInput[0] = '\u0080'; // 2-byte UTF-8 sequence
133+
processedInput[1] = '\u0800'; // 3-byte UTF-8 sequence
134+
processedInput[2] = '\u0080'; // 2-byte UTF-8 sequence
135+
processedInput[3] = '\u0800'; // 3-byte UTF-8 sequence
136+
processedInput[4] = '\u0080'; // 2-byte UTF-8 sequence
137+
processedInput[5] = '\u0800'; // 3-byte UTF-8 sequence
138+
processedInput[6] = '\u0080'; // 2-byte UTF-8 sequence
139+
processedInput[7] = '\u0800'; // 3-byte UTF-8 sequence
140+
141+
expectedUtf8ByteCount += 12;
142+
143+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount);
144+
145+
// Throw some surrogate pairs into the mix to make sure they're also handled properly
146+
// by the vectorized code paths.
147+
148+
processedInput[8] = '\u0080'; // 2-byte UTF-8 sequence
149+
processedInput[9] = '\u0800'; // 3-byte UTF-8 sequence
150+
processedInput[10] = '\u0080'; // 2-byte UTF-8 sequence
151+
processedInput[11] = '\u0800'; // 3-byte UTF-8 sequence
152+
processedInput[12] = '\u0080'; // 2-byte UTF-8 sequence
153+
processedInput[13] = '\uD800'; // high surrogate
154+
processedInput[14] = '\uDC00'; // low surrogate
155+
processedInput[15] = 'z'; // ASCII char
156+
157+
expectedRuneCount--;
158+
expectedUtf8ByteCount += 9;
159+
160+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount);
161+
162+
// Split the next surrogate pair across the vector boundary (so that we
163+
// don't inadvertently treat this as a standalone surrogate sequence).
164+
165+
processedInput[15] = '\uDBFF'; // high surrogate
166+
processedInput[16] = '\uDFFF'; // low surrogate
167+
168+
expectedRuneCount--;
169+
expectedUtf8ByteCount += 2;
170+
171+
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount);
172+
}
173+
174+
private static unsafe void GetIndexOfFirstInvalidUtf16Sequence_Test_Core(char[] input, int expectedRetVal, int expectedRuneCount, long expectedUtf8ByteCount)
175+
{
176+
// Arrange
177+
178+
using BoundedMemory<char> boundedMemory = BoundedMemory.AllocateFromExistingData(input);
179+
boundedMemory.MakeReadonly();
180+
181+
// Act
182+
183+
int actualRetVal;
184+
long actualUtf8CodeUnitCount;
185+
int actualRuneCount;
186+
187+
fixed (char* pInputBuffer = &MemoryMarshal.GetReference(boundedMemory.Span))
188+
{
189+
char* pFirstInvalidChar = _getPointerToFirstInvalidCharFn.Value(pInputBuffer, input.Length, out long utf8CodeUnitCountAdjustment, out int scalarCountAdjustment);
190+
191+
long ptrDiff = pFirstInvalidChar - pInputBuffer;
192+
Assert.True((ulong)ptrDiff <= (uint)input.Length, "ptrDiff was outside expected range.");
193+
194+
Assert.True(utf8CodeUnitCountAdjustment >= 0, "UTF-16 code unit count adjustment must be non-negative.");
195+
Assert.True(scalarCountAdjustment <= 0, "Scalar count adjustment must be 0 or negative.");
196+
197+
actualRetVal = (ptrDiff == input.Length) ? -1 : (int)ptrDiff;
198+
199+
// The last two 'out' parameters are:
200+
// a) The number to be added to the "chars processed" return value to come up with the total UTF-8 code unit count, and
201+
// b) The number to be added to the "total UTF-16 code unit count" value to come up with the total scalar count.
202+
203+
actualUtf8CodeUnitCount = ptrDiff + utf8CodeUnitCountAdjustment;
204+
actualRuneCount = (int)ptrDiff + scalarCountAdjustment;
205+
}
206+
207+
// Assert
208+
209+
Assert.Equal(expectedRetVal, actualRetVal);
210+
Assert.Equal(expectedRuneCount, actualRuneCount);
211+
Assert.Equal(actualUtf8CodeUnitCount, expectedUtf8ByteCount);
212+
}
213+
214+
private static Lazy<GetPointerToFirstInvalidCharDel> CreateGetPointerToFirstInvalidCharFn()
215+
{
216+
return new Lazy<GetPointerToFirstInvalidCharDel>(() =>
217+
{
218+
Type utf16UtilityType = typeof(Utf8).Assembly.GetType("System.Text.Unicode.Utf16Utility");
219+
220+
if (utf16UtilityType is null)
221+
{
222+
throw new Exception("Couldn't find Utf16Utility type in System.Private.CoreLib.");
223+
}
224+
225+
MethodInfo methodInfo = utf16UtilityType.GetMethod("GetPointerToFirstInvalidChar", BindingFlags.Static | BindingFlags.Public | BindingFlags.NonPublic);
226+
227+
if (methodInfo is null)
228+
{
229+
throw new Exception("Couldn't find GetPointerToFirstInvalidChar method on Utf8Utility.");
230+
}
231+
232+
return (GetPointerToFirstInvalidCharDel)methodInfo.CreateDelegate(typeof(GetPointerToFirstInvalidCharDel));
233+
});
234+
}
235+
236+
private static string ProcessInput(string input)
237+
{
238+
input = input.Replace("<EACU>", "\u00E9", StringComparison.Ordinal); // U+00E9 LATIN SMALL LETTER E WITH ACUTE
239+
input = input.Replace("<EURO>", "\u20AC", StringComparison.Ordinal); // U+20AC EURO SIGN
240+
input = input.Replace("<GRIN>", "\U0001F600", StringComparison.Ordinal); // U+1F600 GRINNING FACE
241+
242+
// Replace <ABCD> with \uABCD. This allows us to flow potentially malformed
243+
// UTF-16 strings without Xunit. (The unit testing framework gets angry when
244+
// we try putting invalid UTF-16 data as inline test data.)
245+
246+
int idx;
247+
while ((idx = input.IndexOf('<')) >= 0)
248+
{
249+
input = input[..idx] + (char)ushort.Parse(input.Substring(idx + 1, 4), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture) + input[idx + 6..];
250+
}
251+
252+
return input;
253+
}
254+
}
255+
}

0 commit comments

Comments
 (0)