Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.
/ corefx Public archive

Commit

Permalink
[master] Update dependencies from dotnet/coreclr (#36816)
Browse files Browse the repository at this point in the history
- Microsoft.NET.Sdk.IL - 3.0.0-preview5-27612-73
- Microsoft.NETCore.ILAsm - 3.0.0-preview5-27612-73
- Microsoft.NETCore.Runtime.CoreCLR - 3.0.0-preview5-27612-73
- Also react to changes in the UTF-8 transcoding logic (#36712)
  • Loading branch information
dotnet-maestro[bot] authored and GrabYourPitchforks committed Apr 13, 2019
1 parent c3981ca commit 4c3d4a0
Show file tree
Hide file tree
Showing 14 changed files with 1,086 additions and 109 deletions.
12 changes: 6 additions & 6 deletions eng/Version.Details.xml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
<Dependencies>
<ProductDependencies>
<Dependency Name="Microsoft.NETCore.Runtime.CoreCLR" Version="3.0.0-preview5-27610-72">
<Dependency Name="Microsoft.NETCore.Runtime.CoreCLR" Version="3.0.0-preview5-27612-73">
<Uri>https://github.com/dotnet/coreclr</Uri>
<Sha>45e04dd1bb1c7171d88a24454cb2c2811f46ce55</Sha>
<Sha>d5865236e7898b730de28a7a6f034e975bb7282e</Sha>
</Dependency>
<Dependency Name="Microsoft.NETCore.ILAsm" Version="3.0.0-preview5-27610-72">
<Dependency Name="Microsoft.NETCore.ILAsm" Version="3.0.0-preview5-27612-73">
<Uri>https://github.com/dotnet/coreclr</Uri>
<Sha>45e04dd1bb1c7171d88a24454cb2c2811f46ce55</Sha>
<Sha>d5865236e7898b730de28a7a6f034e975bb7282e</Sha>
</Dependency>
<Dependency Name="Microsoft.NET.Sdk.IL" Version="3.0.0-preview5-27610-72">
<Dependency Name="Microsoft.NET.Sdk.IL" Version="3.0.0-preview5-27612-73">
<Uri>https://github.com/dotnet/coreclr</Uri>
<Sha>45e04dd1bb1c7171d88a24454cb2c2811f46ce55</Sha>
<Sha>d5865236e7898b730de28a7a6f034e975bb7282e</Sha>
</Dependency>
</ProductDependencies>
<ToolsetDependencies>
Expand Down
4 changes: 2 additions & 2 deletions eng/Versions.props
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
<MicrosoftNETCoreDotNetHostPackageVersion>3.0.0-preview5-27610-11</MicrosoftNETCoreDotNetHostPackageVersion>
<MicrosoftNETCoreDotNetHostPolicyPackageVersion>3.0.0-preview5-27610-11</MicrosoftNETCoreDotNetHostPolicyPackageVersion>
<!-- Coreclr dependencies -->
<MicrosoftNETCoreILAsmPackageVersion>3.0.0-preview5-27610-72</MicrosoftNETCoreILAsmPackageVersion>
<MicrosoftNETCoreRuntimeCoreCLRPackageVersion>3.0.0-preview5-27610-72</MicrosoftNETCoreRuntimeCoreCLRPackageVersion>
<MicrosoftNETCoreILAsmPackageVersion>3.0.0-preview5-27612-73</MicrosoftNETCoreILAsmPackageVersion>
<MicrosoftNETCoreRuntimeCoreCLRPackageVersion>3.0.0-preview5-27612-73</MicrosoftNETCoreRuntimeCoreCLRPackageVersion>
<!-- Corefx dependencies -->
<MicrosoftNETCorePlatformsPackageVersion>3.0.0-preview5.19211.2</MicrosoftNETCorePlatformsPackageVersion>
<!-- Standard dependencies -->
Expand Down
2 changes: 1 addition & 1 deletion global.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@
"msbuild-sdks": {
"Microsoft.DotNet.Arcade.Sdk": "1.0.0-beta.19212.2",
"Microsoft.DotNet.Helix.Sdk": "2.0.0-beta.19212.2",
"Microsoft.NET.Sdk.IL": "3.0.0-preview5-27610-72"
"Microsoft.NET.Sdk.IL": "3.0.0-preview5-27612-73"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -492,7 +492,16 @@ public void GetOrAddDocumentName2()
Assert.Equal(@"a/", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n6))));
Assert.Equal(@"/", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n7))));
Assert.Equal(@"\\", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n8))));
Assert.Equal("\uFFFd\uFFFd", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n9))));
if (PlatformDetection.IsNetCore)
{
Assert.Equal("\uFFFD\uFFFD\uFFFD", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n9))));
}
else
{
// Versions of .NET prior to Core 3.0 didn't follow Unicode recommendations for U+FFFD substitution,
// so they sometimes emitted too few replacement chars.
Assert.Equal("\uFFFD\uFFFD", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n9))));
}
Assert.Equal("\0", mdReader.GetString(MetadataTokens.DocumentNameBlobHandle(MetadataTokens.GetHeapOffset(n10))));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -377,11 +377,24 @@ public void MetadataVersion()
0x08, 0x00, 0x00, 0x00,

// padded version:
// [ E1 88 B4 ] -> U+1234
// [ ED ] -> invalid (ED cannot be followed by A0) -> U+FFFD
// [ A0 ] -> invalid (not ASCII, not valid leading byte) -> U+FFFD
// [ 80 ] -> invalid (not ASCII, not valid leading byte) -> U+FFFD
0xE1, 0x88, 0xB4, 0xED, 0xA0, 0x80, 0x00, 0x00,
}, builder.Slice(12, -132));

// the default decoder replaces bad byte sequences by U+FFFD
Assert.Equal("\u1234\ufffd\ufffd", ReadVersion(builder));
if (PlatformDetection.IsNetCore)
{
Assert.Equal("\u1234\ufffd\ufffd\ufffd", ReadVersion(builder));
}
else
{
// Versions of .NET prior to Core 3.0 didn't follow Unicode recommendations for U+FFFD substitution,
// so they sometimes emitted too few replacement chars.
Assert.Equal("\u1234\ufffd\ufffd", ReadVersion(builder));
}
}
}
}
4 changes: 3 additions & 1 deletion src/System.Runtime/tests/System.Runtime.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,11 @@
<Compile Include="System\Text\RuneTests.netcoreapp.cs" />
<Compile Include="System\Text\RuneTests.TestData.netcoreapp.cs" />
<Compile Include="System\Text\StringBuilderTests.netcoreapp.cs" />
<Compile Include="System\Text\Unicode\Utf16UtilityTests.ValidateChars.netcoreapp.cs" />
<Compile Include="System\Text\Unicode\Utf8Tests.netcoreapp.cs" />
<Compile Include="System\Text\Unicode\Utf8Tests.ToBytes.netcoreapp.cs" />
<Compile Include="System\Text\Unicode\Utf8Tests.ToChars.netcoreapp.cs" />
<Compile Include="System\Text\Unicode\Utf8UtilityTests.ValidateBytes.netcoreapp.cs" />
<Compile Include="System\Type\TypePropertyTests.netcoreapp.cs" />
<Compile Include="System\Type\TypeTests.netcoreapp.cs" />
<Compile Include="System\ArgIteratorTests.netcoreapp.cs" />
Expand Down Expand Up @@ -338,4 +340,4 @@
<ItemGroup>
<Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
</ItemGroup>
</Project>
</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Buffers;
using System.Globalization;
using System.Linq;
using System.Reflection;
using System.Runtime.InteropServices;
using Xunit;

namespace System.Text.Unicode.Tests
{
public partial class Utf16UtilityTests
{
private unsafe delegate char* GetPointerToFirstInvalidCharDel(char* pInputBuffer, int inputLength, out long utf8CodeUnitCountAdjustment, out int scalarCountAdjustment);
private static readonly Lazy<GetPointerToFirstInvalidCharDel> _getPointerToFirstInvalidCharFn = CreateGetPointerToFirstInvalidCharFn();

[Theory]
[InlineData("", 0, 0)] // empty string is OK
[InlineData("X", 1, 1)]
[InlineData("XY", 2, 2)]
[InlineData("XYZ", 3, 3)]
[InlineData("<EACU>", 1, 2)]
[InlineData("X<EACU>", 2, 3)]
[InlineData("<EACU>X", 2, 3)]
[InlineData("<EURO>", 1, 3)]
[InlineData("<GRIN>", 1, 4)]
[InlineData("X<GRIN>Z", 3, 6)]
[InlineData("X<0000>Z", 3, 3)] // null chars are allowed
public void GetIndexOfFirstInvalidUtf16Sequence_WithSmallValidBuffers(string unprocessedInput, int expectedRuneCount, int expectedUtf8ByteCount)
{
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(unprocessedInput, -1 /* expectedIdxOfFirstInvalidChar */, expectedRuneCount, expectedUtf8ByteCount);
}

[Theory]
[InlineData("<DC00>", 0, 0, 0)] // standalone low surrogate (at beginning of sequence)
[InlineData("X<DC00>", 1, 1, 1)] // standalone low surrogate (preceded by valid ASCII data)
[InlineData("<EURO><DC00>", 1, 1, 3)] // standalone low surrogate (preceded by valid non-ASCII data)
[InlineData("<D800>", 0, 0, 0)] // standalone high surrogate (missing follow-up low surrogate)
[InlineData("<D800>Y", 0, 0, 0)] // standalone high surrogate (followed by ASCII char)
[InlineData("<D800><D800>", 0, 0, 0)] // standalone high surrogate (followed by high surrogate)
[InlineData("<D800><EURO>", 0, 0, 0)] // standalone high surrogate (followed by valid non-ASCII char)
[InlineData("<DC00><DC00>", 0, 0, 0)] // standalone low surrogate (not preceded by a high surrogate)
[InlineData("<DC00><D800>", 0, 0, 0)] // standalone low surrogate (not preceded by a high surrogate)
[InlineData("<GRIN><DC00><DC00>", 2, 1, 4)] // standalone low surrogate (preceded by a valid surrogate pair)
[InlineData("<GRIN><DC00><D800>", 2, 1, 4)] // standalone low surrogate (preceded by a valid surrogate pair)
[InlineData("<GRIN><0000><DC00><D800>", 3, 2, 5)] // standalone low surrogate (preceded by a valid null char)
public void GetIndexOfFirstInvalidUtf16Sequence_WithSmallInvalidBuffers(string unprocessedInput, int idxOfFirstInvalidChar, int expectedRuneCount, int expectedUtf8ByteCount)
{
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(unprocessedInput, idxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount);
}

[Fact]
public void GetIndexOfFirstInvalidUtf16Sequence_WithInvalidSurrogateSequences()
{
// All ASCII

char[] chars = Enumerable.Repeat('x', 128).ToArray();
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 128, expectedUtf8ByteCount: 128);

// Throw a surrogate pair at the beginning

chars[0] = '\uD800';
chars[1] = '\uDFFF';
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 127, expectedUtf8ByteCount: 130);

// Throw a surrogate pair near the end

chars[124] = '\uD800';
chars[125] = '\uDFFF';
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 126, expectedUtf8ByteCount: 132);

// Throw a standalone surrogate code point at the *very* end

chars[127] = '\uD800'; // high surrogate
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, 127, expectedRuneCount: 125, expectedUtf8ByteCount: 131);

chars[127] = '\uDFFF'; // low surrogate
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, 127, expectedRuneCount: 125, expectedUtf8ByteCount: 131);

// Make the final surrogate pair valid

chars[126] = '\uD800'; // high surrogate
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 125, expectedUtf8ByteCount: 134);

// Throw an invalid surrogate sequence in the middle (straddles a vector boundary)

chars[12] = '\u0080'; // 2-byte UTF-8 sequence
chars[13] = '\uD800'; // high surrogate
chars[14] = '\uD800'; // high surrogate
chars[15] = '\uDFFF'; // low surrogate
chars[16] = '\uDFFF'; // low surrogate
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, 13, expectedRuneCount: 12, expectedUtf8ByteCount: 16);

// Correct the surrogate sequence we just added

chars[14] = '\uDC00'; // low surrogate
chars[15] = '\uDBFF'; // high surrogate
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, -1, expectedRuneCount: 123, expectedUtf8ByteCount: 139);

// Corrupt the surrogate pair that's split across a vector boundary

chars[16] = 'x'; // ASCII char (remember.. chars[15] is a high surrogate char)
GetIndexOfFirstInvalidUtf16Sequence_Test_Core(chars, 15, expectedRuneCount: 13, expectedUtf8ByteCount: 20);
}

private static void GetIndexOfFirstInvalidUtf16Sequence_Test_Core(string unprocessedInput, int expectedIdxOfFirstInvalidChar, int expectedRuneCount, long expectedUtf8ByteCount)
{
char[] processedInput = ProcessInput(unprocessedInput).ToCharArray();

// Run the test normally

GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount);

// Put a bunch of ASCII data at the beginning (to test the call to ASCIIUtility at method entry)

processedInput = Enumerable.Repeat('x', 128).Concat(processedInput).ToArray();

if (expectedIdxOfFirstInvalidChar >= 0)
{
expectedIdxOfFirstInvalidChar += 128;
}
expectedRuneCount += 128;
expectedUtf8ByteCount += 128;

GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount);

// Change the first few chars to a mixture of 2-byte and 3-byte UTF-8 sequences
// This makes sure the vectorized code paths can properly handle these.

processedInput[0] = '\u0080'; // 2-byte UTF-8 sequence
processedInput[1] = '\u0800'; // 3-byte UTF-8 sequence
processedInput[2] = '\u0080'; // 2-byte UTF-8 sequence
processedInput[3] = '\u0800'; // 3-byte UTF-8 sequence
processedInput[4] = '\u0080'; // 2-byte UTF-8 sequence
processedInput[5] = '\u0800'; // 3-byte UTF-8 sequence
processedInput[6] = '\u0080'; // 2-byte UTF-8 sequence
processedInput[7] = '\u0800'; // 3-byte UTF-8 sequence

expectedUtf8ByteCount += 12;

GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount);

// Throw some surrogate pairs into the mix to make sure they're also handled properly
// by the vectorized code paths.

processedInput[8] = '\u0080'; // 2-byte UTF-8 sequence
processedInput[9] = '\u0800'; // 3-byte UTF-8 sequence
processedInput[10] = '\u0080'; // 2-byte UTF-8 sequence
processedInput[11] = '\u0800'; // 3-byte UTF-8 sequence
processedInput[12] = '\u0080'; // 2-byte UTF-8 sequence
processedInput[13] = '\uD800'; // high surrogate
processedInput[14] = '\uDC00'; // low surrogate
processedInput[15] = 'z'; // ASCII char

expectedRuneCount--;
expectedUtf8ByteCount += 9;

GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount);

// Split the next surrogate pair across the vector boundary (so that we
// don't inadvertently treat this as a standalone surrogate sequence).

processedInput[15] = '\uDBFF'; // high surrogate
processedInput[16] = '\uDFFF'; // low surrogate

expectedRuneCount--;
expectedUtf8ByteCount += 2;

GetIndexOfFirstInvalidUtf16Sequence_Test_Core(processedInput, expectedIdxOfFirstInvalidChar, expectedRuneCount, expectedUtf8ByteCount);
}

private static unsafe void GetIndexOfFirstInvalidUtf16Sequence_Test_Core(char[] input, int expectedRetVal, int expectedRuneCount, long expectedUtf8ByteCount)
{
// Arrange

using BoundedMemory<char> boundedMemory = BoundedMemory.AllocateFromExistingData(input);
boundedMemory.MakeReadonly();

// Act

int actualRetVal;
long actualUtf8CodeUnitCount;
int actualRuneCount;

fixed (char* pInputBuffer = &MemoryMarshal.GetReference(boundedMemory.Span))
{
char* pFirstInvalidChar = _getPointerToFirstInvalidCharFn.Value(pInputBuffer, input.Length, out long utf8CodeUnitCountAdjustment, out int scalarCountAdjustment);

long ptrDiff = pFirstInvalidChar - pInputBuffer;
Assert.True((ulong)ptrDiff <= (uint)input.Length, "ptrDiff was outside expected range.");

Assert.True(utf8CodeUnitCountAdjustment >= 0, "UTF-16 code unit count adjustment must be non-negative.");
Assert.True(scalarCountAdjustment <= 0, "Scalar count adjustment must be 0 or negative.");

actualRetVal = (ptrDiff == input.Length) ? -1 : (int)ptrDiff;

// The last two 'out' parameters are:
// a) The number to be added to the "chars processed" return value to come up with the total UTF-8 code unit count, and
// b) The number to be added to the "total UTF-16 code unit count" value to come up with the total scalar count.

actualUtf8CodeUnitCount = ptrDiff + utf8CodeUnitCountAdjustment;
actualRuneCount = (int)ptrDiff + scalarCountAdjustment;
}

// Assert

Assert.Equal(expectedRetVal, actualRetVal);
Assert.Equal(expectedRuneCount, actualRuneCount);
Assert.Equal(actualUtf8CodeUnitCount, expectedUtf8ByteCount);
}

private static Lazy<GetPointerToFirstInvalidCharDel> CreateGetPointerToFirstInvalidCharFn()
{
return new Lazy<GetPointerToFirstInvalidCharDel>(() =>
{
Type utf16UtilityType = typeof(Utf8).Assembly.GetType("System.Text.Unicode.Utf16Utility");

if (utf16UtilityType is null)
{
throw new Exception("Couldn't find Utf16Utility type in System.Private.CoreLib.");
}

MethodInfo methodInfo = utf16UtilityType.GetMethod("GetPointerToFirstInvalidChar", BindingFlags.Static | BindingFlags.Public | BindingFlags.NonPublic);

if (methodInfo is null)
{
throw new Exception("Couldn't find GetPointerToFirstInvalidChar method on Utf8Utility.");
}

return (GetPointerToFirstInvalidCharDel)methodInfo.CreateDelegate(typeof(GetPointerToFirstInvalidCharDel));
});
}

private static string ProcessInput(string input)
{
input = input.Replace("<EACU>", "\u00E9", StringComparison.Ordinal); // U+00E9 LATIN SMALL LETTER E WITH ACUTE
input = input.Replace("<EURO>", "\u20AC", StringComparison.Ordinal); // U+20AC EURO SIGN
input = input.Replace("<GRIN>", "\U0001F600", StringComparison.Ordinal); // U+1F600 GRINNING FACE

// Replace <ABCD> with \uABCD. This allows us to flow potentially malformed
// UTF-16 strings without Xunit. (The unit testing framework gets angry when
// we try putting invalid UTF-16 data as inline test data.)

int idx;
while ((idx = input.IndexOf('<')) >= 0)
{
input = input[..idx] + (char)ushort.Parse(input.Substring(idx + 1, 4), NumberStyles.AllowHexSpecifier, CultureInfo.InvariantCulture) + input[idx + 6..];
}

return input;
}
}
}
Loading

0 comments on commit 4c3d4a0

Please sign in to comment.