Skip to content

Commit

Permalink
When producing character literals, surrogate characters should be esc…
Browse files Browse the repository at this point in the history
…aped. (#20720)

Fixes #20693
  • Loading branch information
gafter authored Jul 31, 2017
1 parent 8285d42 commit e7869bd
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 8 deletions.
44 changes: 39 additions & 5 deletions src/Compilers/CSharp/Portable/SymbolDisplay/ObjectDisplay.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Diagnostics;
using System.Globalization;
using System.Reflection;
using System.Text;
Expand Down Expand Up @@ -175,13 +176,24 @@ private static bool TryReplaceChar(char c, out string replaceWith)
return true;
}

switch (CharUnicodeInfo.GetUnicodeCategory(c))
if (NeedsEscaping(CharUnicodeInfo.GetUnicodeCategory(c)))
{
replaceWith = "\\u" + ((int)c).ToString("x4");
return true;
}

return false;
}

private static bool NeedsEscaping(UnicodeCategory category)
{
switch (category)
{
case UnicodeCategory.Control:
case UnicodeCategory.OtherNotAssigned:
case UnicodeCategory.ParagraphSeparator:
case UnicodeCategory.LineSeparator:
replaceWith = "\\u" + ((int)c).ToString("x4");
case UnicodeCategory.Surrogate:
return true;
default:
return false;
Expand Down Expand Up @@ -223,10 +235,32 @@ public static string FormatLiteral(string value, ObjectDisplayOptions options)
builder.Append(quote);
}

foreach (var c in value)
for (int i = 0; i < value.Length; i++)
{
string replaceWith;
if (escapeNonPrintable && TryReplaceChar(c, out replaceWith))
char c = value[i];
if (escapeNonPrintable && CharUnicodeInfo.GetUnicodeCategory(c) == UnicodeCategory.Surrogate)
{
var category = CharUnicodeInfo.GetUnicodeCategory(value, i);
if (category == UnicodeCategory.Surrogate)
{
// an unpaired surrogate
builder.Append("\\u" + ((int)c).ToString("x4"));
}
else if (NeedsEscaping(category))
{
// a surrogate pair that needs to be escaped
var unicode = char.ConvertToUtf32(value, i);
builder.Append("\\U" + unicode.ToString("x8"));
i++; // skip the already-encoded second surrogate of the pair
}
else
{
// copy a printable surrogate pair directly
builder.Append(c);
builder.Append(value[++i]);
}
}
else if (escapeNonPrintable && TryReplaceChar(c, out var replaceWith))
{
builder.Append(replaceWith);
}
Expand Down
8 changes: 8 additions & 0 deletions src/Compilers/CSharp/Test/Syntax/Syntax/SyntaxFactoryTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,14 @@ public void TestEscapeLineSeparator()
Assert.Equal("\"\\u2028\"", literal.Text);
}

[WorkItem(20693, "https://github.com/dotnet/roslyn/issues/20693")]
[Fact]
public void TestEscapeSurrogate()
{
var literal = SyntaxFactory.Literal('\uDBFF');
Assert.Equal("'\\udbff'", literal.Text);
}

private static void CheckLiteralToString(dynamic value, string expected)
{
var literal = SyntaxFactory.Literal(value);
Expand Down
29 changes: 27 additions & 2 deletions src/Compilers/VisualBasic/Portable/SymbolDisplay/ObjectDisplay.vb
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ Namespace Microsoft.CodeAnalysis.VisualBasic.ObjectDisplay
Dim wellKnown As String
Dim shouldEscape As Boolean
Dim isCrLf As Boolean
Dim copyPair = False

If Not escapeNonPrintable Then
wellKnown = Nothing
Expand All @@ -392,6 +393,12 @@ Namespace Microsoft.CodeAnalysis.VisualBasic.ObjectDisplay
shouldEscape = True
isCrLf = True
i += 1
ElseIf CharUnicodeInfo.GetUnicodeCategory(c) = UnicodeCategory.Surrogate AndAlso IsPrintable(CharUnicodeInfo.GetUnicodeCategory(str, i - 1)) Then
' copy properly paired surrogates directly into the resulting output
wellKnown = Nothing
shouldEscape = False
isCrLf = False
copyPair = True
Else
wellKnown = GetWellKnownCharacterName(c)
shouldEscape = wellKnown IsNot Nothing OrElse Not IsPrintable(c)
Expand Down Expand Up @@ -462,6 +469,12 @@ Namespace Microsoft.CodeAnalysis.VisualBasic.ObjectDisplay
Yield Quotes()
Else
Yield Character(c)
If copyPair Then
' copy the second character of a Unicode surrogate pair
c = str(i)
i += 1
Yield Character(c)
End If
End If
End If
End While
Expand All @@ -472,8 +485,20 @@ Namespace Microsoft.CodeAnalysis.VisualBasic.ObjectDisplay
End Function

Friend Function IsPrintable(c As Char) As Boolean
Dim category = CharUnicodeInfo.GetUnicodeCategory(c)
Return category <> UnicodeCategory.OtherNotAssigned AndAlso category <> UnicodeCategory.ParagraphSeparator AndAlso category <> UnicodeCategory.Control
Return IsPrintable(CharUnicodeInfo.GetUnicodeCategory(c))
End Function

Private Function IsPrintable(category As UnicodeCategory) As Boolean
Select Case category
Case UnicodeCategory.OtherNotAssigned,
UnicodeCategory.ParagraphSeparator,
UnicodeCategory.Control,
UnicodeCategory.LineSeparator,
UnicodeCategory.Surrogate
Return False
Case Else
Return True
End Select
End Function

Friend Function GetWellKnownCharacterName(c As Char) As String
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,30 @@ Namespace Microsoft.CodeAnalysis.VisualBasic.UnitTests
Assert.Equal("""a"" & vbVerticalTab", FormatPrimitiveUsingHexadecimalNumbers("a" & vbVerticalTab, quoteStrings:=True))
End Sub

<Fact>
Public Sub TextForEscapedStringLiterals_01()
Dim literal = SyntaxFactory.Literal(ChrW(&H2028) & "x") ' U+2028 is a line separator
Assert.Equal("ChrW(8232) & ""x""", literal.Text)
literal = SyntaxFactory.Literal(ChrW(&HDBFF)) ' U+DBFF is a unicode surrogate
Assert.Equal("ChrW(56319)", literal.Text)
End Sub

<Fact>
Public Sub TextForEscapedStringLiterals_02()
' Well-ordered surrogate characters
Dim footBall = "🏈"
Assert.Equal(footBall, ObjectDisplay.FormatPrimitive(footBall, ObjectDisplayOptions.None))
Assert.Equal("""" & footBall & """", ObjectDisplay.FormatPrimitive(footBall, ObjectDisplayOptions.UseQuotes Or ObjectDisplayOptions.EscapeNonPrintableCharacters Or ObjectDisplayOptions.UseHexadecimalNumbers))
Assert.Equal("""" & footBall & """", ObjectDisplay.FormatPrimitive(footBall, ObjectDisplayOptions.UseQuotes Or ObjectDisplayOptions.EscapeNonPrintableCharacters))

' Misordered surrogate characters
Dim trash = ChrW(&HDFC8) & ChrW(&HD83C)
Assert.Equal(trash, ObjectDisplay.FormatPrimitive(trash, ObjectDisplayOptions.None))
Assert.Equal("ChrW(&HDFC8) & ChrW(&HD83C)", ObjectDisplay.FormatPrimitive(trash, ObjectDisplayOptions.UseQuotes Or ObjectDisplayOptions.EscapeNonPrintableCharacters Or ObjectDisplayOptions.UseHexadecimalNumbers))
Assert.Equal("ChrW(57288) & ChrW(55356)", ObjectDisplay.FormatPrimitive(trash, ObjectDisplayOptions.UseQuotes Or ObjectDisplayOptions.EscapeNonPrintableCharacters))

End Sub

<Fact>
Public Sub Strings_QuotesAndEscaping()
Assert.Equal(QuoteAndEscapingCombinations("a"), {"a", """a""", """a"""})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,18 @@ public void String()
Assert.Equal("\"\\uffff\"", FormatValue(s));
Assert.Equal("\"\\uffff\"", FormatValue(s, useHexadecimal: true));

string multiByte = "\ud83c\udfc8";
string multiByte = "\ud83c\udfc8"; // unicode surrogates properly paired representing a printable Unicode codepoint
Assert.Equal(string.Format(format, "🏈"), FormatValue(multiByte));
Assert.Equal(string.Format(format, "🏈"), FormatValue(multiByte, useHexadecimal: true));
Assert.Equal(multiByte, "🏈");

multiByte = "\udbff\udfff"; // unicode surrogates representing an unprintable Unicode codepoint
Assert.Equal(string.Format(format, "\\U0010ffff"), FormatValue(multiByte));
Assert.Equal(string.Format(format, "\\U0010ffff"), FormatValue(multiByte, useHexadecimal: true));

multiByte = "\udfc8\ud83c"; // unicode surrogates not properly paired (in the wrong order in this case)
Assert.Equal(string.Format(format, "\\udfc8\\ud83c"), FormatValue(multiByte));
Assert.Equal(string.Format(format, "\\udfc8\\ud83c"), FormatValue(multiByte, useHexadecimal: true));
}

private static string FormatStringChar(char c)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,11 @@ Namespace Microsoft.CodeAnalysis.VisualBasic.ExpressionEvaluator.UnitTests
Dim multiByte = ChrW(&HD83C) & ChrW(&HDFC8)
Assert.Equal("""🏈""", FormatValue(multiByte))
Assert.Equal("""🏈""", FormatValue(multiByte, useHexadecimal:=True))
Assert.Equal("🏈", multiByte)

multiByte = ChrW(&HDFC8) & ChrW(&HD83C)
Assert.Equal("ChrW(57288) & ChrW(55356)", FormatValue(multiByte))
Assert.Equal("ChrW(&HDFC8) & ChrW(&HD83C)", FormatValue(multiByte, useHexadecimal:=True))
End Sub

<Fact>
Expand Down

0 comments on commit e7869bd

Please sign in to comment.