diff --git a/src/libraries/System.Text.Json/Common/JsonSeparatorNamingPolicy.cs b/src/libraries/System.Text.Json/Common/JsonSeparatorNamingPolicy.cs index d450631b78ad2..de50b7a02eb8a 100644 --- a/src/libraries/System.Text.Json/Common/JsonSeparatorNamingPolicy.cs +++ b/src/libraries/System.Text.Json/Common/JsonSeparatorNamingPolicy.cs @@ -11,14 +11,14 @@ namespace System.Text.Json internal abstract class JsonSeparatorNamingPolicy : JsonNamingPolicy { private readonly bool _lowercase; - private readonly char _separator; + private readonly Rune _separator; internal JsonSeparatorNamingPolicy(bool lowercase, char separator) { Debug.Assert(char.IsPunctuation(separator)); _lowercase = lowercase; - _separator = separator; + _separator = new Rune(separator); } public sealed override string ConvertName(string name) @@ -31,7 +31,7 @@ public sealed override string ConvertName(string name) return ConvertNameCore(_separator, _lowercase, name); } - private static string ConvertNameCore(char separator, bool lowercase, string name) + private static string ConvertNameCore(Rune separator, bool lowercase, string name) { Debug.Assert(name != null); @@ -44,15 +44,15 @@ private static string ConvertNameCore(char separator, bool lowercase, string nam ? stackalloc char[JsonConstants.StackallocCharThreshold] : (rentedBuffer = ArrayPool.Shared.Rent(initialBufferLength)); - ReadOnlySpan chars = name.AsSpan(); SeparatorState state = SeparatorState.NotStarted; int charsWritten = 0; - for (int i = 0; i < chars.Length; i++) + for (int i = 0; i < name.Length;) { - char current = chars[i]; + Rune current = Rune.GetRuneAt(name, i); + int charLength = current.Utf16SequenceLength; - switch (char.GetUnicodeCategory(current)) + switch (Rune.GetUnicodeCategory(current)) { case UnicodeCategory.UppercaseLetter: @@ -65,7 +65,7 @@ private static string ConvertNameCore(char separator, bool lowercase, string nam case SeparatorState.SpaceSeparator: // An uppercase letter following a sequence of lowercase letters or spaces // denotes the start of a new grouping: emit a separator character. - WriteChar(separator, ref destination); + Write(separator, ref destination); break; case SeparatorState.UppercaseLetter: @@ -74,10 +74,15 @@ private static string ConvertNameCore(char separator, bool lowercase, string nam // final letter, assuming it is followed by lowercase letters. // For example, the value 'XMLReader' should render as 'xml_reader', // however 'SHA512Hash' should render as 'sha512-hash'. - if (i + 1 < chars.Length && char.IsLower(chars[i + 1])) + if (i + charLength < name.Length) { - WriteChar(separator, ref destination); + Rune next = Rune.GetRuneAt(name, i + charLength); + if (Rune.GetUnicodeCategory(next) is UnicodeCategory.LowercaseLetter) + { + Write(separator, ref destination); + } } + break; default: @@ -86,9 +91,11 @@ private static string ConvertNameCore(char separator, bool lowercase, string nam } if (lowercase) - current = char.ToLowerInvariant(current); + { + current = Rune.ToLowerInvariant(current); + } - WriteChar(current, ref destination); + Write(current, ref destination); state = SeparatorState.UppercaseLetter; break; @@ -98,13 +105,15 @@ private static string ConvertNameCore(char separator, bool lowercase, string nam if (state is SeparatorState.SpaceSeparator) { // Normalize preceding spaces to one separator. - WriteChar(separator, ref destination); + Write(separator, ref destination); } if (!lowercase) - current = char.ToUpperInvariant(current); + { + current = Rune.ToUpperInvariant(current); + } - WriteChar(current, ref destination); + Write(current, ref destination); state = SeparatorState.LowercaseLetterOrDigit; break; @@ -122,10 +131,12 @@ private static string ConvertNameCore(char separator, bool lowercase, string nam // are written as-is to the output and reset the separator state. // E.g. 'ABC???def' maps to 'abc???def' in snake_case. - WriteChar(current, ref destination); + Write(current, ref destination); state = SeparatorState.NotStarted; break; } + + i += charLength; } name = destination.Slice(0, charsWritten).ToString(); @@ -139,14 +150,16 @@ private static string ConvertNameCore(char separator, bool lowercase, string nam return name; [MethodImpl(MethodImplOptions.AggressiveInlining)] - void WriteChar(char value, ref Span destination) + void Write(Rune rune, ref Span destination) { - if (charsWritten == destination.Length) + if (charsWritten + 2 > destination.Length) { ExpandBuffer(ref destination); } - destination[charsWritten++] = value; + int written = rune.EncodeToUtf16(destination.Slice(charsWritten)); + Debug.Assert(written == rune.Utf16SequenceLength); + charsWritten += written; } void ExpandBuffer(ref Span destination) @@ -166,6 +179,118 @@ void ExpandBuffer(ref Span destination) } } +#if !NETCOREAPP + // Provides a basic Rune polyfill that handles surrogate pairs + // TODO remove once https://github.com/dotnet/runtime/issues/52947 is complete. + private readonly struct Rune + { + private readonly char _first; + private readonly char? _lowSurrogate; + private readonly UnicodeCategory _category; + + public int Utf16SequenceLength => _lowSurrogate.HasValue ? 2 : 1; + + public static UnicodeCategory GetUnicodeCategory(Rune rune) + => rune._category; + + public Rune(char ch) : this(ch, char.GetUnicodeCategory(ch)) + { + } + + private Rune(char ch, UnicodeCategory category) + { + Debug.Assert(!char.IsSurrogate(ch)); + _first = ch; + _category = category; + } + + private Rune(char highSurrogate, char lowSurrogate, UnicodeCategory category) + { + Debug.Assert(char.IsSurrogatePair(highSurrogate, lowSurrogate)); + _first = highSurrogate; + _lowSurrogate = lowSurrogate; + _category = category; + } + + public static Rune ToLowerInvariant(Rune value) + { + UnicodeCategory category = value._category; + if (category is UnicodeCategory.UppercaseLetter) + { + category = UnicodeCategory.LowercaseLetter; + } + + if (value._lowSurrogate is not char lowSurrogate) + { + return new Rune(char.ToLowerInvariant(value._first), category); + } + + ReadOnlySpan source = stackalloc char[] { value._first, lowSurrogate }; + Span destination = stackalloc char[2]; + + source.ToLowerInvariant(destination); + return new Rune(destination[0], destination[1], category); + } + + public static Rune ToUpperInvariant(Rune value) + { + UnicodeCategory category = value._category; + if (category is UnicodeCategory.LowercaseLetter) + { + category = UnicodeCategory.UppercaseLetter; + } + + if (value._lowSurrogate is not char lowSurrogate) + { + return new Rune(char.ToUpperInvariant(value._first), category); + } + + ReadOnlySpan source = stackalloc char[] { value._first, lowSurrogate }; + Span destination = stackalloc char[2]; + + source.ToUpperInvariant(destination); + return new Rune(destination[0], destination[1], category); + } + + public int EncodeToUtf16(Span destination) + { + Debug.Assert(Utf16SequenceLength <= destination.Length); + destination[0] = _first; + + if (_lowSurrogate is not char lowSurrogate) + { + return 1; + } + + destination[1] = lowSurrogate; + return 2; + } + + public static Rune GetRuneAt(string input, int index) + { + char first = input[index]; + UnicodeCategory category = char.GetUnicodeCategory(first); + if (category is UnicodeCategory.Surrogate) + { + char lowSurrogate = default; + if (index + 1 == input.Length || + !char.IsSurrogatePair(first, lowSurrogate = input[index + 1])) + { + // CharUnicodeInfo.GetUnicodeCategory does + // not throw so we throw here instead. + ThrowArgumentException(); + + static void ThrowArgumentException() => throw new ArgumentException(nameof(input)); + } + + category = CharUnicodeInfo.GetUnicodeCategory(input, index); + return new Rune(first, lowSurrogate, category); + } + + return new Rune(first, category); + } + } +#endif private enum SeparatorState { NotStarted, diff --git a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/NamingPolicyUnitTests.cs b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/NamingPolicyUnitTests.cs index c64b962cad7f7..f060bd51c6f6c 100644 --- a/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/NamingPolicyUnitTests.cs +++ b/src/libraries/System.Text.Json/tests/System.Text.Json.Tests/Serialization/NamingPolicyUnitTests.cs @@ -12,6 +12,7 @@ namespace System.Text.Json.Serialization.Tests public static class NamingPolicyUnitTests { private readonly static CamelCaseNamingStrategy s_newtonsoftCamelCaseNamingStrategy = new(); + [Theory] // These test cases were copied from Json.NET. [InlineData("urlValue", "URLValue")] @@ -65,7 +66,6 @@ public static void CamelCaseNullNameReturnsNull() [InlineData("i18n", "i18n")] [InlineData("i18n_policy", "I18nPolicy")] [InlineData("7samurai", "7samurai")] - [InlineData("άλφα_βήτα_γάμμα", "ΆλφαΒήταΓάμμα")] [InlineData("camel_case", "camelCase")] [InlineData("camel_case", "CamelCase")] [InlineData("snake_case", "snake_case")] @@ -95,6 +95,10 @@ public static void CamelCaseNullNameReturnsNull() [InlineData("abc_7ef", " abc 7ef ")] [InlineData("ab7_def", " ab7 def ")] [InlineData("_abc", "_abc")] + [InlineData("", "")] + [InlineData("😀", "😀")] // Surrogate pairs + [InlineData("άλφα_βήτα_γάμμα", "ΆλφαΒήταΓάμμα")] // Non-ascii letters + [InlineData("𐐨𐐨𐐨_𐐨𐐨𐐨", "𐐀𐐨𐐨𐐀𐐨𐐨")] // Surrogate pair letters [InlineData("a%", "a%")] [InlineData("_?#-", "_?#-")] [InlineData("?!?", "? ! ?")] @@ -128,7 +132,6 @@ public static void ToSnakeLowerCase(string expectedResult, string name) [InlineData("I18N", "i18n")] [InlineData("I18N_POLICY", "I18nPolicy")] [InlineData("7SAMURAI", "7samurai")] - [InlineData("ΆΛΦΑ_ΒΉΤΑ_ΓΆΜΜΑ", "ΆλφαΒήταΓάμμα")] [InlineData("CAMEL_CASE", "camelCase")] [InlineData("CAMEL_CASE", "CamelCase")] [InlineData("SNAKE_CASE", "snake_case")] @@ -158,6 +161,10 @@ public static void ToSnakeLowerCase(string expectedResult, string name) [InlineData("ABC_7EF", " abc 7ef ")] [InlineData("AB7_DEF", " ab7 def ")] [InlineData("_ABC", "_abc")] + [InlineData("", "")] + [InlineData("😀", "😀")] // Surrogate pairs + [InlineData("ΆΛΦΑ_ΒΉΤΑ_ΓΆΜΜΑ", "ΆλφαΒήταΓάμμα")] // Non-ascii letters + [InlineData("𐐀𐐀𐐀_𐐀𐐀𐐀", "𐐀𐐨𐐨𐐀𐐨𐐨")] // Surrogate pair letters [InlineData("A%", "a%")] [InlineData("_?#-", "_?#-")] [InlineData("?!?", "? ! ?")] @@ -191,7 +198,6 @@ public static void ToSnakeUpperCase(string expectedResult, string name) [InlineData("i18n", "i18n")] [InlineData("i18n-policy", "I18nPolicy")] [InlineData("7samurai", "7samurai")] - [InlineData("άλφα-βήτα-γάμμα", "ΆλφαΒήταΓάμμα")] [InlineData("camel-case", "camelCase")] [InlineData("camel-case", "CamelCase")] [InlineData("snake_case", "snake_case")] @@ -222,6 +228,10 @@ public static void ToSnakeUpperCase(string expectedResult, string name) [InlineData("abc-7ef", " abc 7ef ")] [InlineData("ab7-def", " ab7 def ")] [InlineData("-abc", "-abc")] + [InlineData("", "")] + [InlineData("😀", "😀")] // Surrogate pairs + [InlineData("άλφα-βήτα-γάμμα", "ΆλφαΒήταΓάμμα")] // Non-ascii letters + [InlineData("𐐨𐐨𐐨-𐐨𐐨𐐨", "𐐀𐐨𐐨𐐀𐐨𐐨")] // Surrogate pair letters [InlineData("a%", "a%")] [InlineData("-?#_", "-?#_")] [InlineData("?!?", "? ! ?")] @@ -255,7 +265,6 @@ public static void ToKebabLowerCase(string expectedResult, string name) [InlineData("I18N", "i18n")] [InlineData("I18N-POLICY", "I18nPolicy")] [InlineData("7SAMURAI", "7samurai")] - [InlineData("ΆΛΦΑ-ΒΉΤΑ-ΓΆΜΜΑ", "ΆλφαΒήταΓάμμα")] [InlineData("CAMEL-CASE", "camelCase")] [InlineData("CAMEL-CASE", "CamelCase")] [InlineData("SNAKE_CASE", "snake_case")] @@ -286,6 +295,10 @@ public static void ToKebabLowerCase(string expectedResult, string name) [InlineData("ABC-7EF", " abc 7ef ")] [InlineData("AB7-DEF", " ab7 def ")] [InlineData("-ABC", "-abc")] + [InlineData("", "")] + [InlineData("😀", "😀")] // Surrogate pairs + [InlineData("ΆΛΦΑ-ΒΉΤΑ-ΓΆΜΜΑ", "ΆλφαΒήταΓάμμα")] // Non-ascii letters + [InlineData("𐐀𐐀𐐀-𐐀𐐀𐐀", "𐐀𐐨𐐨𐐀𐐨𐐨")] // Surrogate pair letters [InlineData("A%", "a%")] [InlineData("-?#_", "-?#_")] [InlineData("?!?", "? ! ?")] @@ -313,6 +326,22 @@ public static void ToKebabUpperCase(string expectedResult, string name) Assert.Equal(expectedResult, value); } + [Fact] + public static void SnakeCasePolicy_MissingSurrogatePair_ThrowsArgumentException() + { + string value = "xyz\ud83d"; + Assert.Throws(() => JsonNamingPolicy.SnakeCaseLower.ConvertName(value)); + Assert.Throws(() => JsonNamingPolicy.SnakeCaseUpper.ConvertName(value)); + } + + [Fact] + public static void KebabCasePolicy_MissingSurrogatePair_ThrowsArgumentException() + { + string value = "xyz\ud83d"; + Assert.Throws(() => JsonNamingPolicy.KebabCaseLower.ConvertName(value)); + Assert.Throws(() => JsonNamingPolicy.KebabCaseUpper.ConvertName(value)); + } + [Theory, OuterLoop] [MemberData(nameof(GetValidMemberNames))] public static void CamelCaseNamingPolicyMatchesNewtonsoftNamingStrategy(string name)