55using System ;
66using System . Diagnostics ;
77using System . Diagnostics . CodeAnalysis ;
8- using System . Runtime . CompilerServices ;
98using System . Text ;
109using Microsoft . CodeAnalysis . PooledObjects ;
1110using Microsoft . CodeAnalysis . Text ;
12- using Roslyn . Utilities ;
1311
1412namespace Microsoft . CodeAnalysis . CSharp . Syntax . InternalSyntax
1513{
1614 internal partial class LanguageParser
1715 {
18- private ExpressionSyntax ParseInterpolatedStringToken ( )
16+ private LiteralExpressionSyntax ParseRawStringToken ( )
17+ {
18+ var originalToken = this . EatToken ( ) ;
19+
20+ var expressionKind = SyntaxFacts . GetLiteralExpression ( originalToken . Kind ) ;
21+ Debug . Assert ( expressionKind != SyntaxKind . None ) ;
22+
23+ // We want to share as much code as possible with raw-interpolated-strings. Especially the code for dealing
24+ // with indentation removal and determining the 'value' of the string. As such, we will reinterpret this
25+ // raw string as an interpolated string with no $'s and no holes, and then extract out the content token
26+ // from that.
27+
28+ Debug . Assert ( originalToken . Text is [ '"' , '"' , '"' , ..] ) ;
29+
30+ var interpolatedString = ParseInterpolatedOrRawStringToken ( originalToken , isInterpolatedString : false ) ;
31+
32+ // Because there are no actual interpolations, we expect to only see a single text content node containing
33+ // the interpreted value of the raw string.
34+ Debug . Assert ( interpolatedString . StringStartToken . Kind is SyntaxKind . InterpolatedSingleLineRawStringStartToken or SyntaxKind . InterpolatedMultiLineRawStringStartToken ) ;
35+ Debug . Assert ( interpolatedString . Contents is [ InterpolatedStringTextSyntax ] ) ;
36+
37+ var interpolatedText = ( InterpolatedStringTextSyntax ) interpolatedString . Contents [ 0 ] ! ;
38+
39+ var diagnostics = getDiagnostics ( ) ;
40+
41+ // We preserve everything from the original raw token. Except we use the computed value text from the
42+ // interpolated text token instead as long as we got no diagnostics for this raw string.
43+ var finalToken = SyntaxFactory
44+ . Literal ( originalToken . GetLeadingTrivia ( ) , originalToken . Text , originalToken . Kind , getTokenValue ( ) , originalToken . GetTrailingTrivia ( ) )
45+ . WithDiagnosticsGreen ( diagnostics ) ;
46+
47+ return _syntaxFactory . LiteralExpression ( expressionKind , finalToken ) ;
48+
49+ DiagnosticInfo [ ] getDiagnostics ( )
50+ {
51+ var diagnosticsBuilder = ArrayBuilder < DiagnosticInfo > . GetInstance ( ) ;
52+
53+ // And any diagnostics from the interpolated string as a whole.
54+ diagnosticsBuilder . AddRange ( interpolatedString . GetDiagnostics ( ) ) ;
55+
56+ // We may have diagnostics on the InterpolatedStringText node itself, but not on the text token inside it
57+ // (since we create it, and immediately add it to the InterpolatedStringText node). If so, move those over.
58+ // However, move them as they are relative to the text token, and now need to be relative to the start of
59+ // the token as a whole.
60+ Debug . Assert ( ! interpolatedText . TextToken . ContainsDiagnostics ) ;
61+ var textTokenDiagnostics = MoveDiagnostics ( interpolatedText . GetDiagnostics ( ) , interpolatedString . StringStartToken . Width ) ;
62+ if ( textTokenDiagnostics != null )
63+ diagnosticsBuilder . AddRange ( textTokenDiagnostics ) ;
64+
65+ // if the original token had diagnostics, then we absolutely must have produced some diagnostics creating
66+ // the interpolated version. Note: the converse does not hold. Producing the interpolation may produce
67+ // indentation diagnostics, which are not something the lexer would have produced.
68+ if ( originalToken . ContainsDiagnostics )
69+ Debug . Assert ( diagnosticsBuilder . Count > 0 ) ;
70+
71+ return diagnosticsBuilder . ToArrayAndFree ( ) ;
72+ }
73+
74+ string getTokenValue ( )
75+ {
76+ if ( diagnostics . Length == 0 )
77+ return interpolatedText . TextToken . GetValueText ( ) ;
78+
79+ // Preserve what the lexer used to do here. In the presence of any diagnostics, the text of the raw
80+ // string minus the starting quotes is used as the value.
81+ var startIndex = 0 ;
82+ var originalText = originalToken . Text ;
83+ while ( startIndex < originalText . Length && originalText [ startIndex ] is '"' )
84+ startIndex ++ ;
85+
86+ return originalText [ startIndex ..] ;
87+ }
88+ }
89+
90+ private InterpolatedStringExpressionSyntax ParseInterpolatedStringToken ( )
1991 {
2092 // We don't want to make the scanner stateful (between tokens) if we can possibly avoid it.
2193 // The approach implemented here is
@@ -40,9 +112,23 @@ private ExpressionSyntax ParseInterpolatedStringToken()
40112 Debug . Assert ( this . CurrentToken . Kind == SyntaxKind . InterpolatedStringToken ) ;
41113 var originalToken = this . EatToken ( ) ;
42114
43- var originalText = originalToken . ValueText ; // this is actually the source text
115+ Debug . Assert ( originalToken . Text [ 0 ] is '$' or '@' ) ;
116+
117+ return ParseInterpolatedOrRawStringToken ( originalToken , isInterpolatedString : true ) ;
118+ }
119+
120+ /// <summary>
121+ /// Takes the token produced by the lexer for an (raw or regular) interpolated string or non-interpolated raw
122+ /// string literal and creates an actual parsed <see cref="InterpolatedStringExpressionSyntax"/> for the syntax
123+ /// tree. For an interpolated string, this will now contain all the holes parsed out as well. For a raw string
124+ /// this will contain a single <see cref="InterpolatedStringTextSyntax"/> for the contents of the raw string.
125+ /// </summary>
126+ private InterpolatedStringExpressionSyntax ParseInterpolatedOrRawStringToken (
127+ SyntaxToken originalToken ,
128+ bool isInterpolatedString )
129+ {
130+ var originalText = originalToken . Text ;
44131 var originalTextSpan = originalText . AsSpan ( ) ;
45- Debug . Assert ( originalText [ 0 ] == '$' || originalText [ 0 ] == '@' ) ;
46132
47133 // compute the positions of the interpolations in the original string literal, if there was an error or not,
48134 // and where the open and close quotes can be found.
@@ -55,19 +141,35 @@ private ExpressionSyntax ParseInterpolatedStringToken()
55141 var needsDedentation = kind == Lexer . InterpolatedStringKind . MultiLineRaw && error == null ;
56142
57143 var result = SyntaxFactory . InterpolatedStringExpression ( getOpenQuote ( ) , getContent ( originalTextSpan ) , getCloseQuote ( ) ) ;
144+ Debug . Assert ( originalToken . ToFullString ( ) == result . ToFullString ( ) ) ; // yield from text equals yield from node
145+
146+ #if DEBUG
147+ // In the raw string case, none of the added text tokens should have diagnostics. Any diagnostics should be
148+ // on their containing InterpolatedStringTextSyntax node instead.
149+ if ( ! isInterpolatedString )
150+ {
151+ foreach ( var content in result . Contents )
152+ {
153+ if ( content is InterpolatedStringTextSyntax interpolatedText )
154+ Debug . Assert ( ! interpolatedText . TextToken . ContainsDiagnostics ) ;
155+ }
156+ }
157+ #endif
58158
59- interpolations . Free ( ) ;
60159 if ( error != null )
61160 result = result . WithDiagnosticsGreen ( [ error ] ) ;
62161
63- Debug . Assert ( originalToken . ToFullString ( ) == result . ToFullString ( ) ) ; // yield from text equals yield from node
162+ interpolations . Free ( ) ;
64163 return result ;
65164
66165 void rescanInterpolation ( out Lexer . InterpolatedStringKind kind , out SyntaxDiagnosticInfo ? error , out Range openQuoteRange , ArrayBuilder < Lexer . Interpolation > interpolations , out Range closeQuoteRange )
67166 {
68167 using var tempLexer = new Lexer ( SourceText . From ( originalText ) , this . Options , allowPreprocessorDirectives : false ) ;
69168 var info = default ( Lexer . TokenInfo ) ;
70- tempLexer . ScanInterpolatedStringLiteralTop ( ref info , out error , out kind , out openQuoteRange , interpolations , out closeQuoteRange ) ;
169+ tempLexer . ScanInterpolatedOrRawStringLiteralTop (
170+ ref info , isInterpolatedString , out error , out kind , out openQuoteRange , interpolations , out closeQuoteRange ) ;
171+
172+ Debug . Assert ( isInterpolatedString || interpolations . Count == 0 , "Non-interpolated parsing should never produce interpolations" ) ;
71173 }
72174
73175 SyntaxToken getOpenQuote ( )
@@ -109,7 +211,7 @@ CodeAnalysis.Syntax.InternalSyntax.SyntaxList<InterpolatedStringContentSyntax> g
109211 // Make sure the interpolation starts at the right location.
110212 var indentationError = getInterpolationIndentationError ( indentationWhitespace , interpolation ) ;
111213 if ( indentationError != null )
112- interpolationNode = interpolationNode . WithDiagnosticsGreen ( new [ ] { indentationError } ) ;
214+ interpolationNode = interpolationNode . WithDiagnosticsGreen ( [ indentationError ] ) ;
113215
114216 builder . Add ( interpolationNode ) ;
115217 currentContentStart = interpolation . CloseBraceRange . End ;
@@ -146,8 +248,17 @@ ReadOnlySpan<char> getIndentationWhitespace(ReadOnlySpan<char> originalTextSpan)
146248 InterpolatedStringContentSyntax ? makeContent (
147249 ReadOnlySpan < char > indentationWhitespace , StringBuilder content , bool isFirst , bool isLast , ReadOnlySpan < char > text )
148250 {
149- if ( text . Length == 0 )
150- return null ;
251+ if ( text . IsEmpty )
252+ {
253+ // For the raw string case, always include an InterpolatedStringText token, even if empty. This
254+ // allows the caller to uniformly assume there is always at least one text token that it can
255+ // extract data from.
256+ return isInterpolatedString
257+ ? null
258+ : SyntaxFactory . InterpolatedStringText (
259+ SyntaxFactory . Literal ( leading : null , "" , SyntaxKind . InterpolatedStringTextToken , "" , trailing : null ) ) ;
260+
261+ }
151262
152263 // If we're not dedenting then just make a standard interpolated text token. Also, we can short-circuit
153264 // if the indentation whitespace is empty (nothing to dedent in that case).
@@ -222,11 +333,13 @@ ReadOnlySpan<char> getIndentationWhitespace(ReadOnlySpan<char> originalTextSpan)
222333 var textString = text . ToString ( ) ;
223334 var valueString = indentationError != null ? textString : content . ToString ( ) ;
224335
336+ // Note: we place errors on the InterpolatedStringText node itself, not on the token. This is an
337+ // invariant that higher up callers can depend on.
225338 var node = SyntaxFactory . InterpolatedStringText (
226339 SyntaxFactory . Literal ( leading : null , textString , SyntaxKind . InterpolatedStringTextToken , valueString , trailing : null ) ) ;
227340
228341 return indentationError != null
229- ? node . WithDiagnosticsGreen ( new [ ] { indentationError } )
342+ ? node . WithDiagnosticsGreen ( [ indentationError ] )
230343 : node ;
231344 }
232345
@@ -285,6 +398,24 @@ SyntaxToken getCloseQuote()
285398 }
286399 }
287400
401+ /// <summary>
402+ /// Converts a whitespace character to its string representation for error messages.
403+ /// </summary>
404+ private static string CharToString ( char ch )
405+ {
406+ return ch switch
407+ {
408+ '\t ' => @"\t" ,
409+ '\v ' => @"\v" ,
410+ '\f ' => @"\f" ,
411+ _ => @$ "\u{ ( int ) ch : x4} ",
412+ } ;
413+ }
414+
415+ /// <summary>
416+ /// Checks if two whitespace sequences differ at a specific character position where both
417+ /// characters are whitespace but different types (e.g., tab vs space).
418+ /// </summary>
288419 private static bool CheckForSpaceDifference (
289420 ReadOnlySpan < char > currentLineWhitespace ,
290421 ReadOnlySpan < char > indentationLineWhitespace ,
@@ -300,8 +431,8 @@ private static bool CheckForSpaceDifference(
300431 SyntaxFacts . IsWhitespace ( currentLineChar ) &&
301432 SyntaxFacts . IsWhitespace ( indentationLineChar ) )
302433 {
303- currentLineMessage = Lexer . CharToString ( currentLineChar ) ;
304- indentationLineMessage = Lexer . CharToString ( indentationLineChar ) ;
434+ currentLineMessage = CharToString ( currentLineChar ) ;
435+ indentationLineMessage = CharToString ( indentationLineChar ) ;
305436 return true ;
306437 }
307438 }
@@ -469,9 +600,11 @@ private SyntaxToken MakeInterpolatedStringTextToken(Lexer.InterpolatedStringKind
469600 return result ;
470601 }
471602
472- private static DiagnosticInfo [ ] MoveDiagnostics ( DiagnosticInfo [ ] infos , int offset )
603+ private static DiagnosticInfo [ ] ? MoveDiagnostics ( DiagnosticInfo [ ] ? infos , int offset )
473604 {
474- Debug . Assert ( infos . Length > 0 ) ;
605+ if ( infos is null or [ ] )
606+ return null ;
607+
475608 var builder = ArrayBuilder < DiagnosticInfo > . GetInstance ( infos . Length ) ;
476609 foreach ( var info in infos )
477610 {
0 commit comments