dotnet · StephenMolloy · Apr 4, 2023 · Jul 7, 2022 · Jul 18, 2022 · Jul 26, 2022
diff --git a/...vate.DataContractSerialization/src/System/Runtime/Serialization/DataContractSerializer.cs b/...vate.DataContractSerialization/src/System/Runtime/Serialization/DataContractSerializer.cs
@@ -33,13 +33,27 @@ public sealed class DataContractSerializer : XmlObjectSerializer
  private static SerializationOption s_option = IsReflectionBackupAllowed() ? SerializationOption.ReflectionAsBackup : SerializationOption.CodeGenOnly;
  private static bool s_optionAlreadySet;
 
- internal static UTF8Encoding UTF8NoBom { get; } = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: false);
- internal static UTF8Encoding ValidatingUTF8 { get; } = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
+ internal sealed class SealedUTF8Encoding : UTF8Encoding
+ {
+ public SealedUTF8Encoding(bool encoderShouldEmitUTF8Identifier, bool throwOnInvalidBytes)
+ : base(encoderShouldEmitUTF8Identifier, throwOnInvalidBytes)
+ { }
+ }
+
+ internal sealed class SealedUnicodeEncoding : UnicodeEncoding
+ {
+ public SealedUnicodeEncoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
+ : base(bigEndian, byteOrderMark, throwOnInvalidBytes)
+ { }
+ }
+
+ internal static SealedUTF8Encoding UTF8NoBom { get; } = new SealedUTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: false);
+ internal static SealedUTF8Encoding ValidatingUTF8 { get; } = new SealedUTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true);
 
- internal static UnicodeEncoding UTF16NoBom { get; } = new UnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: false);
- internal static UnicodeEncoding BEUTF16NoBom { get; } = new UnicodeEncoding(bigEndian: true, byteOrderMark: false, throwOnInvalidBytes: false);
- internal static UnicodeEncoding ValidatingUTF16 { get; } = new UnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: true);
- internal static UnicodeEncoding ValidatingBEUTF16 { get; } = new UnicodeEncoding(bigEndian: true, byteOrderMark: false, throwOnInvalidBytes: true);
+ internal static SealedUnicodeEncoding UTF16NoBom { get; } = new SealedUnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: false);
+ internal static SealedUnicodeEncoding BEUTF16NoBom { get; } = new SealedUnicodeEncoding(bigEndian: true, byteOrderMark: false, throwOnInvalidBytes: false);
+ internal static SealedUnicodeEncoding ValidatingUTF16 { get; } = new SealedUnicodeEncoding(bigEndian: false, byteOrderMark: false, throwOnInvalidBytes: true);
+ internal static SealedUnicodeEncoding ValidatingBEUTF16 { get; } = new SealedUnicodeEncoding(bigEndian: true, byteOrderMark: false, throwOnInvalidBytes: true);
 
  internal static Base64Encoding Base64Encoding { get; } = new Base64Encoding();
  internal static BinHexEncoding BinHexEncoding { get; } = new BinHexEncoding();

diff --git a/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs b/src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlStreamNodeWriter.cs
@@ -1,8 +1,10 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
+using System.Buffers.Binary;
 using System.IO;
 using System.Text;
+using System.Runtime.InteropServices;
 using System.Runtime.Serialization;
 using System.Threading.Tasks;
 
@@ -334,34 +336,26 @@ protected unsafe void UnsafeWriteUnicodeChars(char* chars, int charCount)
  }
  }
 
- protected unsafe int UnsafeGetUnicodeChars(char* chars, int charCount, byte[] buffer, int offset)
+ protected static unsafe int UnsafeGetUnicodeChars(char* chars, int charCount, byte[] buffer, int offset)
  {
- char* charsMax = chars + charCount;
- while (chars < charsMax)
+ if (BitConverter.IsLittleEndian)
  {
- char value = *chars++;
- buffer[offset++] = (byte)value;
- value >>= 8;
- buffer[offset++] = (byte)value;
+ new ReadOnlySpan<char>(chars, charCount)
+ .CopyTo(MemoryMarshal.Cast<byte, char>(buffer.AsSpan(offset)));
  }
+ else
+ {
+ BinaryPrimitives.ReverseEndianness(new ReadOnlySpan<short>(chars, charCount),
+ MemoryMarshal.Cast<byte, short>(buffer.AsSpan(offset)));
+ }
+
  return charCount * 2;
  }
 
  protected unsafe int UnsafeGetUTF8Length(char* chars, int charCount)
  {
- char* charsMax = chars + charCount;
- while (chars < charsMax)
- {
- if (*chars >= 0x80)
- break;
-
- chars++;
- }
-
- if (chars == charsMax)
- return charCount;
-
- return (int)(chars - (charsMax - charCount)) + (_encoding ?? DataContractSerializer.ValidatingUTF8).GetByteCount(chars, (int)(charsMax - chars));
+ // Length will always be at least ( 128 / maxBytesPerChar) = 42
+ return (_encoding ?? DataContractSerializer.ValidatingUTF8).GetByteCount(chars, charCount);
  }
 
  protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffer, int offset)
@@ -370,39 +364,32 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe
  {
  fixed (byte* _bytes = &buffer[offset])
  {
- byte* bytes = _bytes;
- byte* bytesMax = &bytes[buffer.Length - offset];
- char* charsMax = &chars[charCount];
-
- while (true)
+ // Fast path for small strings, use Encoding.GetBytes for larger strings since it is faster when vectorization is possible
+ if ((uint)charCount < 25)
  {
+ byte* bytes = _bytes;
+ char* charsMax = &chars[charCount];
+
  while (chars < charsMax)
  {
  char t = *chars;
  if (t >= 0x80)
- break;
+ goto NonAscii;
 
  *bytes = (byte)t;
  bytes++;
  chars++;
  }
+ return charCount;
 
- if (chars >= charsMax)
- break;
-
- char* charsStart = chars;
- while (chars < charsMax && *chars >= 0x80)
- {
- chars++;
- }
-
- bytes += (_encoding ?? DataContractSerializer.ValidatingUTF8).GetBytes(charsStart, (int)(chars - charsStart), bytes, (int)(bytesMax - bytes));
-
- if (chars >= charsMax)
- break;
+ NonAscii:
+ byte* bytesMax = _bytes + buffer.Length - offset;
+ return (int)(bytes - _bytes) + (_encoding ?? DataContractSerializer.ValidatingUTF8).GetBytes(chars, (int)(charsMax - chars), bytes, (int)(bytesMax - bytes));
+ }
+ else
+ {
+ return (_encoding ?? DataContractSerializer.ValidatingUTF8).GetBytes(chars, charCount, _bytes, buffer.Length - offset);
  }
-
- return (int)(bytes - _bytes);
  }
  }
  return 0;

diff --git a/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs b/src/libraries/System.Runtime.Serialization.Xml/tests/XmlDictionaryWriterTest.cs
@@ -494,6 +494,71 @@ void AssertBytesWritten(Action<XmlDictionaryWriter> action, XmlBinaryNodeType no
  }
  }
 
+ [Fact]
+ public static void XmlBaseWriter_WriteString()
+ {
+ const byte Chars8Text = 152;
+ const byte Chars16Text = 154;
+ MemoryStream ms = new MemoryStream();
+ XmlDictionaryWriter writer = (XmlDictionaryWriter)XmlDictionaryWriter.CreateBinaryWriter(ms);
+ writer.WriteStartElement("root");
+
+ int[] lengths = new[] { 7, 8, 9, 15, 16, 17, 31, 32, 36, 258 };
+ byte[] buffer = new byte[lengths.Max() + 1];
+
+ foreach (var length in lengths)
+ {
+ string allAscii = string.Create(length, null, (Span<char> chars, object _) =>
+ {
+ for (int i = 0; i < chars.Length; ++i)
+ chars[i] = (char)(i % 128);
+ });
+ string multiByteLast = string.Create(length, null, (Span<char> chars, object _) =>
+ {
+ for (int i = 0; i < chars.Length; ++i)
+ chars[i] = (char)(i % 128);
+ chars[^1] = '\u00E4'; // '�' - Latin Small Letter a with Diaeresis. Latin-1 Supplement.
+ });
+
+ int numBytes = Encoding.UTF8.GetBytes(allAscii, buffer);
+ Assert.True(numBytes == length, "Test setup wrong - allAscii");
+ ValidateWriteText(ms, writer, allAscii, expected: buffer.AsSpan(0, numBytes));
+
+ numBytes = Encoding.UTF8.GetBytes(multiByteLast, buffer);
+ Assert.True(numBytes == length + 1, "Test setup wrong - multiByte");
+ ValidateWriteText(ms, writer, multiByteLast, expected: buffer.AsSpan(0, numBytes));
+ }
+
+ static void ValidateWriteText(MemoryStream ms, XmlDictionaryWriter writer, string text, ReadOnlySpan<byte> expected)
+ {
+ writer.Flush();
+ ms.Seek(0, SeekOrigin.Begin);
+ ms.SetLength(0);
+ writer.WriteString(text);
+ writer.Flush();
+
+ ms.TryGetBuffer(out ArraySegment<byte> arraySegment);
+ ReadOnlySpan<byte> buffer = arraySegment;
+
+ if (expected.Length <= byte.MaxValue)
+ {
+ Assert.Equal(Chars8Text, buffer[0]);
+ Assert.Equal(expected.Length, buffer[1]);
+ buffer = buffer.Slice(2);
+ }
+ else if (expected.Length <= ushort.MaxValue)
+ {
+ Assert.Equal(Chars16Text, buffer[0]);
+ Assert.Equal(expected.Length, (int)(buffer[1]) | ((int)buffer[2] << 8));
+ buffer = buffer.Slice(3);
+ }
+ else
+ Assert.Fail("test use to long length");
+
+ AssertExtensions.SequenceEqual(expected, buffer);
+ }
+ }
+
  private static bool ReadTest(MemoryStream ms, Encoding encoding, ReaderWriterFactory.ReaderWriterType rwType, byte[] byteArray)
  {
  ms.Position = 0;