Skip to content

Commit

Permalink
Big endian fixes for dotnet runtime (#47981)
Browse files Browse the repository at this point in the history
  • Loading branch information
nealef authored Feb 9, 2021
1 parent 08be965 commit 6f38724
Show file tree
Hide file tree
Showing 10 changed files with 178 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, n
currentUInt32 = Unsafe.ReadUnaligned<ushort>(pBuffer);
if (!AllBytesInUInt32AreAscii(currentUInt32))
{
if (!BitConverter.IsLittleEndian)
{
currentUInt32 = currentUInt32 << 16;
}
goto FoundNonAsciiData;
}

Expand Down Expand Up @@ -1678,6 +1682,10 @@ public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buf
asciiData = Unsafe.ReadUnaligned<ushort>(pAsciiBuffer + currentOffset);
if (!AllBytesInUInt32AreAscii(asciiData))
{
if (!BitConverter.IsLittleEndian)
{
asciiData = asciiData << 16;
}
goto FoundNonAsciiData;
}

Expand Down Expand Up @@ -1719,11 +1727,23 @@ public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buf

// Drain ASCII bytes one at a time.

while (((byte)asciiData & 0x80) == 0)
if (BitConverter.IsLittleEndian)
{
pUtf16Buffer[currentOffset] = (char)(byte)asciiData;
currentOffset++;
asciiData >>= 8;
while (((byte)asciiData & 0x80) == 0)
{
pUtf16Buffer[currentOffset] = (char)(byte)asciiData;
currentOffset++;
asciiData >>= 8;
}
}
else
{
while ((asciiData & 0x80000000) == 0)
{
asciiData = BitOperations.RotateLeft(asciiData, 8);
pUtf16Buffer[currentOffset] = (char)(byte)asciiData;
currentOffset++;
}
}

goto Finish;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ private static uint ExtractFourUtf8BytesFromSurrogatePair(uint value)
tempB |= tempA;

uint tempC = (value << 2) & 0x0000_0F00u; // = [ 00000000 00000000 0000yyyy 00000000 ]
uint tempD = (value >> 6) & 0x0003_0000u; // = [ 00000000 00000000 00yy0000 00000000 ]
uint tempD = (value >> 4) & 0x0000_3000u; // = [ 00000000 00000000 00yy0000 00000000 ]
tempD |= tempC;

uint tempE = (value & 0x3Fu) + 0xF080_8080u; // = [ 11110000 10000000 10000000 10xxxxxx ]
Expand Down Expand Up @@ -232,7 +232,7 @@ private static uint ExtractUtf8TwoByteSequenceFromFirstUtf16Char(uint value)
// want to return [ ######## ######## 110yyyyy 10xxxxxx ]

uint temp = (value >> 16) & 0x3Fu; // [ 00000000 00000000 00000000 00xxxxxx ]
value = (value >> 22) & 0x1F00u; // [ 00000000 00000000 000yyyyy 0000000 ]
value = (value >> 14) & 0x1F00u; // [ 00000000 00000000 000yyyyy 0000000 ]
return value + temp + 0xC080u;
}
}
Expand Down Expand Up @@ -498,7 +498,7 @@ private static bool UInt32BeginsWithUtf8FourByteMask(uint value)
// Return statement is written this way to work around https://github.com/dotnet/runtime/issues/4207.

return (BitConverter.IsLittleEndian && (((value - 0x8080_80F0u) & 0xC0C0_C0F8u) == 0))
|| (!BitConverter.IsLittleEndian && (((value - 0xF080_8000u) & 0xF8C0_C0C0u) == 0));
|| (!BitConverter.IsLittleEndian && (((value - 0xF080_8080u) & 0xF8C0_C0C0u) == 0));
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1139,7 +1139,7 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
}
else
{
pOutputBuffer[0] = (byte)(thisDWord >> 24); // extract [ AA 00 ## ## ]
pOutputBuffer[0] = (byte)(thisDWord >> 16); // extract [ 00 AA ## ## ]
}

pInputBuffer++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ private void WriteArrayAsBytes(Array array, int typeLength)
if (!BitConverter.IsLittleEndian)
{
// we know that we are writing a primitive type, so just do a simple swap
Debug.Fail("Re-review this code if/when we start running on big endian systems");
for (int i = 0; i < bufferUsed; i += typeLength)
{
for (int j = 0; j < typeLength / 2; j++)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,6 @@ private void ReadArrayAsBytes(ParseRecord pr)
if (!BitConverter.IsLittleEndian)
{
// we know that we are reading a primitive type, so just do a simple swap
Debug.Fail("Re-review this code if/when we start running on big endian systems");
for (int i = 0; i < bufferUsed; i += typeLength)
{
for (int j = 0; j < typeLength / 2; j++)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,14 @@
<Reference Include="System.Collections" />
<Reference Include="System.Diagnostics.Debug" />
<Reference Include="System.Diagnostics.Tools" />
<Reference Include="System.Memory" />
<Reference Include="System.Resources.ResourceManager" />
<Reference Include="System.Runtime" />
<Reference Include="System.Runtime.Extensions" />
<Reference Include="System.Runtime.InteropServices" />
<Reference Include="System.Threading" />
</ItemGroup>
<ItemGroup Condition="!$(TargetFramework.StartsWith('$(NetCoreAppCurrent)'))">
<PackageReference Include="System.Memory" Version="$(SystemMemoryVersion)" />
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers.Binary;
using System.Reflection;
using System.IO;
using System.Diagnostics;
Expand Down Expand Up @@ -99,6 +100,28 @@ internal struct CodePageDataFileHeader
internal short unused1; // Add an unused WORD so that CodePages is aligned with DWORD boundary.
}
private const int CODEPAGE_DATA_FILE_HEADER_SIZE = 44;
internal static unsafe void ReadCodePageDataFileHeader(Stream stream, byte[] codePageDataFileHeader)
{
stream.Read(codePageDataFileHeader, 0, codePageDataFileHeader.Length);
if (!BitConverter.IsLittleEndian)
{
fixed (byte* pBytes = &codePageDataFileHeader[0])
{
CodePageDataFileHeader* p = (CodePageDataFileHeader*)pBytes;
char *pTableName = &p->TableName;
for (int i = 0; i < 16; i++)
{
pTableName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pTableName[i]);
}
ushort *pVersion = &p->Version;
for (int i = 0; i < 4; i++)
{
pVersion[i] = BinaryPrimitives.ReverseEndianness(pVersion[i]);
}
p->CodePageCount = BinaryPrimitives.ReverseEndianness(p->CodePageCount);
}
}
}

[StructLayout(LayoutKind.Explicit, Pack = 2)]
internal unsafe struct CodePageIndex
Expand All @@ -112,6 +135,25 @@ internal unsafe struct CodePageIndex
[FieldOffset(0x24)]
internal int Offset; // DWORD
}
internal static unsafe void ReadCodePageIndex(Stream stream, byte[] codePageIndex)
{
stream.Read(codePageIndex, 0, codePageIndex.Length);
if (!BitConverter.IsLittleEndian)
{
fixed (byte* pBytes = &codePageIndex[0])
{
CodePageIndex* p = (CodePageIndex*)pBytes;
char *pCodePageName = &p->CodePageName;
for (int i = 0; i < 16; i++)
{
pCodePageName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pCodePageName[i]);
}
p->CodePage = BinaryPrimitives.ReverseEndianness(p->CodePage);
p->ByteCount = BinaryPrimitives.ReverseEndianness(p->ByteCount);
p->Offset = BinaryPrimitives.ReverseEndianness(p->Offset);
}
}
}

[StructLayout(LayoutKind.Explicit)]
internal unsafe struct CodePageHeader
Expand All @@ -136,6 +178,30 @@ internal unsafe struct CodePageHeader
internal ushort ByteReplace; // WORD // default replacement bytes
}
private const int CODEPAGE_HEADER_SIZE = 48;
internal static unsafe void ReadCodePageHeader(Stream stream, byte[] codePageHeader)
{
stream.Read(codePageHeader, 0, codePageHeader!.Length);
if (!BitConverter.IsLittleEndian)
{
fixed (byte* pBytes = &codePageHeader[0])
{
CodePageHeader* p = (CodePageHeader*)pBytes;
char *pCodePageName = &p->CodePageName;
for (int i = 0; i < 16; i++)
{
pCodePageName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pCodePageName[i]);
}
p->VersionMajor = BinaryPrimitives.ReverseEndianness(p->VersionMajor);
p->VersionMinor = BinaryPrimitives.ReverseEndianness(p->VersionMinor);
p->VersionRevision = BinaryPrimitives.ReverseEndianness(p->VersionRevision);
p->VersionBuild = BinaryPrimitives.ReverseEndianness(p->VersionBuild);
p->CodePage = BinaryPrimitives.ReverseEndianness(p->CodePage);
p->ByteCount = BinaryPrimitives.ReverseEndianness(p->ByteCount);
p->UnicodeReplace = (char)BinaryPrimitives.ReverseEndianness((ushort)p->UnicodeReplace);
p->ByteReplace = BinaryPrimitives.ReverseEndianness(p->ByteReplace);
}
}
}

// Initialize our global stuff
private static readonly byte[] s_codePagesDataHeader = new byte[CODEPAGE_DATA_FILE_HEADER_SIZE];
Expand Down Expand Up @@ -166,7 +232,7 @@ internal static Stream GetEncodingDataStream(string tableName)
}

// Read the header
stream.Read(s_codePagesDataHeader, 0, s_codePagesDataHeader.Length);
ReadCodePageDataFileHeader(stream, s_codePagesDataHeader);

return stream;
}
Expand Down Expand Up @@ -210,14 +276,14 @@ private unsafe bool FindCodePage(int codePage)
CodePageIndex* pCodePageIndex = (CodePageIndex*)pBytes;
for (int i = 0; i < codePagesCount; i++)
{
s_codePagesEncodingDataStream.Read(codePageIndex, 0, codePageIndex.Length);
ReadCodePageIndex(s_codePagesEncodingDataStream, codePageIndex);

if (pCodePageIndex->CodePage == codePage)
{
// Found it!
long position = s_codePagesEncodingDataStream.Position;
s_codePagesEncodingDataStream.Seek((long)pCodePageIndex->Offset, SeekOrigin.Begin);
s_codePagesEncodingDataStream.Read(m_codePageHeader, 0, m_codePageHeader!.Length);
ReadCodePageHeader(s_codePagesEncodingDataStream, m_codePageHeader);
m_firstDataWordOffset = (int)s_codePagesEncodingDataStream.Position; // stream now pointing to the codepage data

if (i == codePagesCount - 1) // last codepage
Expand All @@ -229,7 +295,7 @@ private unsafe bool FindCodePage(int codePage)
// Read Next codepage data to get the offset and then calculate the size
s_codePagesEncodingDataStream.Seek(position, SeekOrigin.Begin);
int currentOffset = pCodePageIndex->Offset;
s_codePagesEncodingDataStream.Read(codePageIndex, 0, codePageIndex.Length);
ReadCodePageIndex(s_codePagesEncodingDataStream, codePageIndex);
m_dataSize = pCodePageIndex->Offset - currentOffset - m_codePageHeader.Length;
}

Expand Down Expand Up @@ -266,7 +332,7 @@ internal static unsafe int GetCodePageByteSize(int codePage)
CodePageIndex* pCodePageIndex = (CodePageIndex*)pBytes;
for (int i = 0; i < codePagesCount; i++)
{
s_codePagesEncodingDataStream.Read(codePageIndex, 0, codePageIndex.Length);
ReadCodePageIndex(s_codePagesEncodingDataStream, codePageIndex);

if (pCodePageIndex->CodePage == codePage)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,34 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.IO;
using System.Buffers.Binary;
using System.Runtime.Serialization;
using System.Runtime.CompilerServices;

namespace System.Text
{
internal abstract partial class BaseCodePageEncoding : EncodingNLS, ISerializable
{
internal static unsafe void ReadCodePageIndex(Stream stream, Span<byte> codePageIndex)
{
stream.Read(codePageIndex);
if (!BitConverter.IsLittleEndian)
{
fixed (byte* pBytes = &codePageIndex[0])
{
CodePageIndex* p = (CodePageIndex*)pBytes;
char *pCodePageName = &p->CodePageName;
for (int i = 0; i < 16; i++)
{
pCodePageName[i] = (char)BinaryPrimitives.ReverseEndianness((ushort)pCodePageName[i]);
}
p->CodePage = BinaryPrimitives.ReverseEndianness(p->CodePage);
p->ByteCount = BinaryPrimitives.ReverseEndianness(p->ByteCount);
p->Offset = BinaryPrimitives.ReverseEndianness(p->Offset);
}
}
}

internal static unsafe EncodingInfo [] GetEncodings(CodePagesEncodingProvider provider)
{
lock (s_streamLock)
Expand All @@ -29,7 +50,7 @@ internal static unsafe EncodingInfo [] GetEncodings(CodePagesEncodingProvider pr

for (int i = 0; i < codePagesCount; i++)
{
s_codePagesEncodingDataStream.Read(pCodePageIndex);
ReadCodePageIndex(s_codePagesEncodingDataStream, pCodePageIndex);

string codePageName;
switch (codePageIndex.CodePage)
Expand Down
Loading

0 comments on commit 6f38724

Please sign in to comment.