From 0b3a82c313f902b85ca7cea8ab61f98192cdff72 Mon Sep 17 00:00:00 2001 From: Cyrus Najmabadi Date: Sun, 16 Jun 2024 09:37:39 -0700 Subject: [PATCH 1/5] Do not treat text as json when passed to api that explicit says it is not --- .../SemanticClassifierTests_Json.cs | 21 +++++++++++++++++++ .../AbstractLanguageDetector.cs | 21 +++++++------------ .../EmbeddedLanguageDetector.cs | 13 ++++++++++++ .../LanguageServices/JsonLanguageDetector.cs | 9 +++++++- 4 files changed, 49 insertions(+), 15 deletions(-) diff --git a/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests_Json.cs b/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests_Json.cs index a2cd0d8506acb..021a5cd6aab39 100644 --- a/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests_Json.cs +++ b/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests_Json.cs @@ -222,6 +222,27 @@ void Goo() Json.Array("]")); } + [Theory, CombinatorialData] + public async Task TestJsonOnApiWithStringSyntaxAttribute_OtherLanguage_Field(TestHost testHost) + { + await TestAsync( + """ + using System.Diagnostics.CodeAnalysis; + + class Program + { + [StringSyntax("notjson")] + private string field; + void Goo() + { + [|this.field = @"[{ 'goo': 0}]";|] + } + } + """ + EmbeddedLanguagesTestConstants.StringSyntaxAttributeCodeCSharp, + testHost, + Field("field")); + } + [Theory, CombinatorialData] public async Task TestJsonOnApiWithStringSyntaxAttribute_Property(TestHost testHost) { diff --git a/src/Features/Core/Portable/EmbeddedLanguages/AbstractLanguageDetector.cs b/src/Features/Core/Portable/EmbeddedLanguages/AbstractLanguageDetector.cs index cb22f05ded3b3..5c4048ac1afbf 100644 --- a/src/Features/Core/Portable/EmbeddedLanguages/AbstractLanguageDetector.cs +++ b/src/Features/Core/Portable/EmbeddedLanguages/AbstractLanguageDetector.cs @@ -13,21 +13,14 @@ namespace Microsoft.CodeAnalysis.Features.EmbeddedLanguages; -internal abstract class AbstractLanguageDetector +internal abstract class AbstractLanguageDetector( + EmbeddedLanguageInfo info, + ImmutableArray languageIdentifiers, + EmbeddedLanguageCommentDetector commentDetector) where TOptions : struct, Enum { - protected readonly EmbeddedLanguageInfo Info; - - private readonly EmbeddedLanguageDetector _detector; - - protected AbstractLanguageDetector( - EmbeddedLanguageInfo info, - ImmutableArray languageIdentifiers, - EmbeddedLanguageCommentDetector commentDetector) - { - Info = info; - _detector = new EmbeddedLanguageDetector(info, languageIdentifiers, commentDetector); - } + protected readonly EmbeddedLanguageInfo Info = info; + protected readonly EmbeddedLanguageDetector Detector = new EmbeddedLanguageDetector(info, languageIdentifiers, commentDetector); /// /// Whether or not this is an argument to a well known api for this language (like Regex.Match or JToken.Parse). @@ -64,7 +57,7 @@ public bool IsEmbeddedLanguageToken( // First check for the standard pattern of either a `// lang=...` comment or an API annotated with the // [StringSyntax] attribute. - if (_detector.IsEmbeddedLanguageToken(token, semanticModel, cancellationToken, out _, out var stringOptions)) + if (Detector.IsEmbeddedLanguageToken(token, semanticModel, cancellationToken, out _, out var stringOptions)) { // If we got string-options back, then we were on a comment string (e.g. `// lang=regex,option1,option2`). // Attempt to convert the string options to actual options requested. diff --git a/src/Features/Core/Portable/EmbeddedLanguages/EmbeddedLanguageDetector.cs b/src/Features/Core/Portable/EmbeddedLanguages/EmbeddedLanguageDetector.cs index 5f67076ad07ff..101611416f132 100644 --- a/src/Features/Core/Portable/EmbeddedLanguages/EmbeddedLanguageDetector.cs +++ b/src/Features/Core/Portable/EmbeddedLanguages/EmbeddedLanguageDetector.cs @@ -42,6 +42,19 @@ public bool IsEmbeddedLanguageToken( return LanguageIdentifiers.Contains(identifier); } + public string? TryGetEmbeddedLanguageTokenIdentifier( + SyntaxToken token, + SemanticModel semanticModel, + CancellationToken cancellationToken) + { + return IsEmbeddedLanguageTokenWorker(token, semanticModel, cancellationToken, out var identifier, out _) + ? identifier + : null; + } + + public bool IsEmbeddedLanguageIdentifier(string identifier) + => LanguageIdentifiers.Contains(identifier); + private bool IsEmbeddedLanguageTokenWorker( SyntaxToken token, SemanticModel semanticModel, diff --git a/src/Features/Core/Portable/EmbeddedLanguages/Json/LanguageServices/JsonLanguageDetector.cs b/src/Features/Core/Portable/EmbeddedLanguages/Json/LanguageServices/JsonLanguageDetector.cs index 8db47646a6a61..41bb12bc0cd9c 100644 --- a/src/Features/Core/Portable/EmbeddedLanguages/Json/LanguageServices/JsonLanguageDetector.cs +++ b/src/Features/Core/Portable/EmbeddedLanguages/Json/LanguageServices/JsonLanguageDetector.cs @@ -73,7 +73,14 @@ protected override JsonOptions GetStringSyntaxDefaultOptions() return result; if (includeProbableStrings && IsProbablyJson(token, out var tree)) - return tree; + { + // We have a string that looks like json. Treat it as such *unless* we see that it was *explicitly* marked + // as belonging to some other language. This allows us to light up on strings that are very likely to be + // json while not misclassifying strings that are actually meant to be something else. + var languageIdentifier = this.Detector.TryGetEmbeddedLanguageTokenIdentifier(token, semanticModel, cancellationToken); + if (languageIdentifier is null || this.Detector.IsEmbeddedLanguageIdentifier(languageIdentifier)) + return tree; + } return null; } From 62d8b664907d5e5eb93a7dec248cfc7df06ca98b Mon Sep 17 00:00:00 2001 From: Cyrus Najmabadi Date: Sun, 16 Jun 2024 14:03:26 -0700 Subject: [PATCH 2/5] Work item' --- .../CSharpTest/Classification/SemanticClassifierTests_Json.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests_Json.cs b/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests_Json.cs index 021a5cd6aab39..44c63fe81ea29 100644 --- a/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests_Json.cs +++ b/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests_Json.cs @@ -223,6 +223,7 @@ void Goo() } [Theory, CombinatorialData] + [WorkItem("https://github.com/dotnet/roslyn/issues/74020")] public async Task TestJsonOnApiWithStringSyntaxAttribute_OtherLanguage_Field(TestHost testHost) { await TestAsync( From 6c38ee6b6081ffa70d3fdd51029aabc98d19f0e4 Mon Sep 17 00:00:00 2001 From: Cyrus Najmabadi Date: Sun, 16 Jun 2024 14:54:58 -0700 Subject: [PATCH 3/5] Add string syntax --- .../Classification/SemanticClassifierTests.cs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests.cs b/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests.cs index 86646b86074b2..02027819001aa 100644 --- a/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests.cs +++ b/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System.Collections.Immutable; +using System.Diagnostics.CodeAnalysis; using System.Linq; using System.Threading.Tasks; using Microsoft.CodeAnalysis.Classification; @@ -10,6 +11,7 @@ using Microsoft.CodeAnalysis.Editor.Shared.Utilities; using Microsoft.CodeAnalysis.Editor.Tagging; using Microsoft.CodeAnalysis.Editor.UnitTests; +using Microsoft.CodeAnalysis.Editor.UnitTests.Classification; using Microsoft.CodeAnalysis.Options; using Microsoft.CodeAnalysis.Remote.Testing; using Microsoft.CodeAnalysis.Shared.Extensions; @@ -27,7 +29,7 @@ namespace Microsoft.CodeAnalysis.Editor.CSharp.UnitTests.Classification; [Trait(Traits.Feature, Traits.Features.Classification)] -public partial class SemanticClassifierTests : AbstractCSharpClassifierTests +public sealed partial class SemanticClassifierTests : AbstractCSharpClassifierTests { protected override async Task> GetClassificationSpansAsync(string code, ImmutableArray spans, ParseOptions? options, TestHost testHost) { @@ -37,6 +39,14 @@ protected override async Task> GetClassificationS return await GetSemanticClassificationsAsync(document, spans); } + private new Task TestAsync( + [StringSyntax("C#-Test")] string code, + TestHost testHost, + params FormattedClassification[] expected) + { + return base.TestAsync(code, testHost, expected); + } + [Theory, CombinatorialData] public async Task GenericClassDeclaration(TestHost testHost) { From 8e7bec2050fe4760e82fff300d577fd6043ca89a Mon Sep 17 00:00:00 2001 From: Cyrus Najmabadi Date: Mon, 17 Jun 2024 11:20:19 -0700 Subject: [PATCH 4/5] Update version --- .../FindSymbols/Shared/AbstractSyntaxIndex_Persistence.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Workspaces/Core/Portable/FindSymbols/Shared/AbstractSyntaxIndex_Persistence.cs b/src/Workspaces/Core/Portable/FindSymbols/Shared/AbstractSyntaxIndex_Persistence.cs index aaec84b363045..0ebeaf8f4e9af 100644 --- a/src/Workspaces/Core/Portable/FindSymbols/Shared/AbstractSyntaxIndex_Persistence.cs +++ b/src/Workspaces/Core/Portable/FindSymbols/Shared/AbstractSyntaxIndex_Persistence.cs @@ -23,7 +23,7 @@ internal partial class AbstractSyntaxIndex /// that we will not try to read previously cached data from a prior version of roslyn with a different format and /// will instead regenerate all the indices with the new format. /// - private static readonly Checksum s_serializationFormatChecksum = CodeAnalysis.Checksum.Create("40"); + private static readonly Checksum s_serializationFormatChecksum = CodeAnalysis.Checksum.Create("41"); /// /// Cache of ParseOptions to a checksum for the contained From 3a03267c9d31512d096ecff22bd1ebd582a32544 Mon Sep 17 00:00:00 2001 From: Cyrus Najmabadi Date: Mon, 17 Jun 2024 11:25:00 -0700 Subject: [PATCH 5/5] Make private --- .../Classification/SemanticClassifierTests_TestMarkup.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests_TestMarkup.cs b/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests_TestMarkup.cs index d2950acac68c0..b84547333eeac 100644 --- a/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests_TestMarkup.cs +++ b/src/EditorFeatures/CSharpTest/Classification/SemanticClassifierTests_TestMarkup.cs @@ -30,7 +30,7 @@ public static void M([System.Diagnostics.CodeAnalysis.StringSyntax("C#-test")] s } """ + EmbeddedLanguagesTestConstants.StringSyntaxAttributeCodeCSharp; - protected async Task TestEmbeddedCSharpAsync( + private async Task TestEmbeddedCSharpAsync( string code, TestHost testHost, params FormattedClassification[] expected) @@ -53,7 +53,7 @@ void M() await TestEmbeddedCSharpWithMultipleSpansAsync(allCode, testHost, spans, expected); } - protected async Task TestEmbeddedCSharpWithMultipleSpansAsync( + private async Task TestEmbeddedCSharpWithMultipleSpansAsync( string allCode, TestHost testHost, ImmutableArray spans,