Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make diagnostic checksum computation into an extension method. #77277

Merged
merged 5 commits into from
Feb 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
using System.Linq;
using System.Reflection;
using Microsoft.CodeAnalysis.Diagnostics.Telemetry;
using Microsoft.CodeAnalysis.Simplification;
using Roslyn.Utilities;

namespace Microsoft.CodeAnalysis.Diagnostics;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

using System;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Threading;
Expand Down Expand Up @@ -37,7 +36,7 @@ internal partial class DiagnosticAnalyzerService
return null;

var projectState = project.State;
var checksum = await project.GetDependentChecksumAsync(cancellationToken).ConfigureAwait(false);
var checksum = await project.GetDiagnosticChecksumAsync(cancellationToken).ConfigureAwait(false);

// Make sure the cached pair was computed with at least the same state sets we're asking about. if not,
// recompute and cache with the new state sets.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ async Task<ImmutableDictionary<DiagnosticAnalyzer, DiagnosticAnalysisResult>> Ge
if (s_projectToForceAnalysisData.TryGetValue(project.State, out var box) &&
analyzers.IsSubsetOf(box.Value.analyzers))
{
var checksum = await project.GetDependentChecksumAsync(cancellationToken).ConfigureAwait(false);
var checksum = await project.GetDiagnosticChecksumAsync(cancellationToken).ConfigureAwait(false);
if (box.Value.checksum == checksum)
return box.Value.diagnosticAnalysisResults;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ private partial class DiagnosticIncrementalAnalyzer
public async Task<ImmutableArray<DiagnosticData>> ForceAnalyzeProjectAsync(Project project, CancellationToken cancellationToken)
{
var projectState = project.State;
var checksum = await project.GetDependentChecksumAsync(cancellationToken).ConfigureAwait(false);
var checksum = await project.GetDiagnosticChecksumAsync(cancellationToken).ConfigureAwait(false);

try
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,12 @@ internal abstract partial class AbstractPullDiagnosticHandler<TDiagnosticsParams
internal readonly record struct DiagnosticsRequestState(Project Project, int GlobalStateVersion, RequestContext Context, IDiagnosticSource DiagnosticSource);

/// <summary>
/// Cache where we store the data produced by prior requests so that they can be returned if nothing of significance
/// changed. The <see cref="VersionStamp"/> is produced by <see cref="Project.GetDependentVersionAsync(CancellationToken)"/> while the
/// <see cref="Checksum"/> is produced by <see cref="Project.GetDependentChecksumAsync(CancellationToken)"/>. The former is faster
/// and works well for us in the normal case. The latter still allows us to reuse diagnostics when changes happen that
/// update the version stamp but not the content (for example, forking LSP text).
/// Cache where we store the data produced by prior requests so that they can be returned if nothing of significance
/// changed. The <see cref="VersionStamp"/> is produced by <see
/// cref="Project.GetDependentVersionAsync(CancellationToken)"/> while the <see cref="Checksum"/> is produced by
/// <see cref="CodeAnalysis.Diagnostics.Extensions.GetDiagnosticChecksumAsync"/>. The former is faster and works
/// well for us in the normal case. The latter still allows us to reuse diagnostics when changes happen that update
/// the version stamp but not the content (for example, forking LSP text).
/// </summary>
private sealed class DiagnosticsPullCache(string uniqueKey) : VersionedPullCache<(int globalStateVersion, VersionStamp? dependentVersion), (int globalStateVersion, Checksum dependentChecksum), DiagnosticsRequestState, ImmutableArray<DiagnosticData>>(uniqueKey)
{
Expand All @@ -33,7 +34,7 @@ private sealed class DiagnosticsPullCache(string uniqueKey) : VersionedPullCache

public override async Task<(int globalStateVersion, Checksum dependentChecksum)> ComputeExpensiveVersionAsync(DiagnosticsRequestState state, CancellationToken cancellationToken)
{
return (state.GlobalStateVersion, await state.Project.GetDependentChecksumAsync(cancellationToken).ConfigureAwait(false));
return (state.GlobalStateVersion, await state.Project.GetDiagnosticChecksumAsync(cancellationToken).ConfigureAwait(false));
}

/// <inheritdoc cref="VersionedPullCache{TCheapVersion, TExpensiveVersion, TState, TComputedData}.ComputeDataAsync(TState, CancellationToken)"/>
Expand Down
75 changes: 75 additions & 0 deletions src/Workspaces/Core/Portable/Diagnostics/Extensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using System.Collections.Immutable;
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Collections;
Expand All @@ -21,6 +22,8 @@ namespace Microsoft.CodeAnalysis.Diagnostics;

internal static partial class Extensions
{
private static readonly ConditionalWeakTable<Project, AsyncLazy<Checksum>> s_projectToDiagnosticChecksum = new();
Copy link
Contributor

@ToddGrun ToddGrun Feb 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ConditionalWeakTable

For my understanding:

I thought generally it was best to avoid CWTs unless it was quite difficult not to do so as they add extra strain to GC. Is this not the case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I didn't think there would be an issue with cwt here given how few keys there would be.

We can definitely run speedometer to see if there are concerns here


public static async Task<ImmutableArray<Diagnostic>> ToDiagnosticsAsync(this IEnumerable<DiagnosticData> diagnostics, Project project, CancellationToken cancellationToken)
{
var result = ArrayBuilder<Diagnostic>.GetInstance();
Expand Down Expand Up @@ -429,4 +432,76 @@ await suppressionAnalyzer.AnalyzeAsync(
semanticModel, span, hostCompilationWithAnalyzers, analyzerInfoCache.GetDiagnosticDescriptors, reportDiagnostic, cancellationToken).ConfigureAwait(false);
}
}

/// <summary>
/// Calculates a checksum that contains a project's checksum along with a checksum for each of the project's
/// transitive dependencies.
/// </summary>
/// <remarks>
/// This checksum calculation can be used for cases where a feature needs to know if the semantics in this project
/// changed. For example, for diagnostics or caching computed semantic data. The goal is to ensure that changes to
/// <list type="bullet">
/// <item>Files inside the current project</item>
/// <item>Project properties of the current project</item>
/// <item>Visible files in referenced projects</item>
/// <item>Project properties in referenced projects</item>
/// </list>
/// are reflected in the metadata we keep so that comparing solutions accurately tells us when we need to recompute
/// semantic work.
///
/// <para>This method of checking for changes has a few important properties that differentiate it from other methods of determining project version.
/// <list type="bullet">
/// <item>Changes to methods inside the current project will be reflected to compute updated diagnostics.
/// <see cref="Project.GetDependentSemanticVersionAsync(CancellationToken)"/> does not change as it only returns top level changes.</item>
/// <item>Reloading a project without making any changes will re-use cached diagnostics.
/// <see cref="Project.GetDependentSemanticVersionAsync(CancellationToken)"/> changes as the project is removed, then added resulting in a version change.</item>
/// </list>
/// </para>
/// This checksum is also affected by the <see cref="SourceGeneratorExecutionVersion"/> for this project.
/// As such, it is not usable across different sessions of a particular host.
/// </remarks>
public static Task<Checksum> GetDiagnosticChecksumAsync(this Project? project, CancellationToken cancellationToken)
{
if (project is null)
return SpecializedTasks.Default<Checksum>();

var lazyChecksum = s_projectToDiagnosticChecksum.GetValue(
project,
static project => AsyncLazy.Create(
static (project, cancellationToken) => ComputeDiagnosticChecksumAsync(project, cancellationToken),
project));

return lazyChecksum.GetValueAsync(cancellationToken);

static async Task<Checksum> ComputeDiagnosticChecksumAsync(Project project, CancellationToken cancellationToken)
{
var solution = project.Solution;

using var _ = ArrayBuilder<Checksum>.GetInstance(out var tempChecksumArray);

// Mix in the SG information for this project. That way if it changes, we will have a different
// checksum (since semantics could have changed because of this).
if (solution.CompilationState.SourceGeneratorExecutionVersionMap.Map.TryGetValue(project.Id, out var executionVersion))
tempChecksumArray.Add(executionVersion.Checksum);

// Get the checksum for the project itself. Note: this will normally be cached. As such, even if we
// have a different Project instance (due to a change in an unrelated project), this will be fast to
// compute and return.
var projectChecksum = await project.State.GetChecksumAsync(cancellationToken).ConfigureAwait(false);
tempChecksumArray.Add(projectChecksum);

// Calculate a checksum this project and for each dependent project that could affect semantics for this
// project. We order the projects guid so that we are resilient to the underlying in-memory graph structure
// changing this arbitrarily.
foreach (var projectRef in project.ProjectReferences.OrderBy(r => r.ProjectId.Id))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ProjectReferences

I don't think it's a problem, but this could hit repeat projects during the recursion, whereas it wouldn't have before, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We shouldn't have any repeats afaict.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: an the expensive checksumming will be cached (at the ProjectState level). All this is doing is mixing the checksums together to make the final checksum.

We honestly could even consider not caching this. I was a bit nervous about going that much, in case of LSP where we might have N requests on the same Project.

{
// Note that these checksums should only actually be calculated once, if the project is unchanged
// the same checksum will be returned.
tempChecksumArray.Add(await GetDiagnosticChecksumAsync(
solution.GetProject(projectRef.ProjectId), cancellationToken).ConfigureAwait(false));
}

return Checksum.Create(tempChecksumArray);
}
}
}
30 changes: 0 additions & 30 deletions src/Workspaces/Core/Portable/Workspace/Solution/Project.cs
Original file line number Diff line number Diff line change
Expand Up @@ -544,36 +544,6 @@ public Task<VersionStamp> GetDependentSemanticVersionAsync(CancellationToken can
public Task<VersionStamp> GetSemanticVersionAsync(CancellationToken cancellationToken = default)
=> State.GetSemanticVersionAsync(cancellationToken);

/// <summary>
/// Calculates a checksum that contains a project's checksum along with a checksum for each of the project's
/// transitive dependencies.
/// </summary>
/// <remarks>
/// This checksum calculation can be used for cases where a feature needs to know if the semantics in this project
/// changed. For example, for diagnostics or caching computed semantic data. The goal is to ensure that changes to
/// <list type="bullet">
/// <item>Files inside the current project</item>
/// <item>Project properties of the current project</item>
/// <item>Visible files in referenced projects</item>
/// <item>Project properties in referenced projects</item>
/// </list>
/// are reflected in the metadata we keep so that comparing solutions accurately tells us when we need to recompute
/// semantic work.
///
/// <para>This method of checking for changes has a few important properties that differentiate it from other methods of determining project version.
/// <list type="bullet">
/// <item>Changes to methods inside the current project will be reflected to compute updated diagnostics.
/// <see cref="Project.GetDependentSemanticVersionAsync(CancellationToken)"/> does not change as it only returns top level changes.</item>
/// <item>Reloading a project without making any changes will re-use cached diagnostics.
/// <see cref="Project.GetDependentSemanticVersionAsync(CancellationToken)"/> changes as the project is removed, then added resulting in a version change.</item>
/// </list>
/// </para>
/// This checksum is also affected by the <see cref="SourceGeneratorExecutionVersion"/> for this project.
/// As such, it is not usable across different sessions of a particular host.
/// </remarks>
internal Task<Checksum> GetDependentChecksumAsync(CancellationToken cancellationToken)
=> Solution.CompilationState.GetDependentChecksumAsync(this.Id, cancellationToken);

/// <summary>
/// Creates a new instance of this project updated to have the new assembly name.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ bool ContainsAssemblyOrModuleOrDynamic(

Task<VersionStamp> GetDependentVersionAsync(SolutionCompilationState compilationState, CancellationToken cancellationToken);
Task<VersionStamp> GetDependentSemanticVersionAsync(SolutionCompilationState compilationState, CancellationToken cancellationToken);
Task<Checksum> GetDependentChecksumAsync(SolutionCompilationState compilationState, CancellationToken cancellationToken);

/// <summary>
/// Gets the source generator files generated by this <see cref="ICompilationTracker"/>. <paramref
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,6 @@ public CompilationTrackerValidationException(string message, Exception inner) :

private AsyncLazy<VersionStamp>? _lazyDependentVersion;
private AsyncLazy<VersionStamp>? _lazyDependentSemanticVersion;
private AsyncLazy<Checksum>? _lazyDependentChecksum;

public Task<VersionStamp> GetDependentVersionAsync(
SolutionCompilationState compilationState, CancellationToken cancellationToken)
Expand Down Expand Up @@ -1087,66 +1086,6 @@ private async Task<VersionStamp> ComputeDependentSemanticVersionAsync(
return version;
}

public Task<Checksum> GetDependentChecksumAsync(
SolutionCompilationState compilationState, CancellationToken cancellationToken)
{
if (_lazyDependentChecksum == null)
{
// note: solution is captured here, but it will go away once GetValueAsync executes.
Interlocked.CompareExchange(
ref _lazyDependentChecksum,
AsyncLazy.Create(static (arg, c) =>
arg.self.ComputeDependentChecksumAsync(arg.compilationState, c),
arg: (self: this, compilationState)),
null);
}

return _lazyDependentChecksum.GetValueAsync(cancellationToken);
}

private async Task<Checksum> ComputeDependentChecksumAsync(
SolutionCompilationState solution, CancellationToken cancellationToken)
{
using var _ = ArrayBuilder<Checksum>.GetInstance(out var tempChecksumArray);

// Mix in the SG information for this project. That way if it changes, we will have a different
// checksum (since semantics could have changed because of this).
if (solution.SourceGeneratorExecutionVersionMap.Map.TryGetValue(this.ProjectState.Id, out var executionVersion))
tempChecksumArray.Add(executionVersion.Checksum);

// Get the checksum for the project itself.
var projectChecksum = await this.ProjectState.GetChecksumAsync(cancellationToken).ConfigureAwait(false);
tempChecksumArray.Add(projectChecksum);

// Calculate a checksum this project and for each dependent project that could affect semantics for this
// project. We order the projects so that we are resilient to the underlying in-memory graph structure
// changing this arbitrarily. We do not want that to cause us to change our semantic version.. Note: we
// use the project filepath+name as a unique way to reference a project. This matches the logic in our
// persistence-service implementation as to how information is associated with a project.
var transitiveDependencies = solution.SolutionState.GetProjectDependencyGraph().GetProjectsThatThisProjectTransitivelyDependsOn(this.ProjectState.Id);
var orderedProjectIds = transitiveDependencies.OrderBy(id =>
{
var depProject = solution.SolutionState.GetRequiredProjectState(id);
return (depProject.FilePath, depProject.Name);
});

foreach (var projectId in orderedProjectIds)
{
// Mix in the SG information for the dependent project. That way if it changes, we will have a
// different checksum (since semantics could have changed because of this).
if (solution.SourceGeneratorExecutionVersionMap.Map.TryGetValue(projectId, out executionVersion))
tempChecksumArray.Add(executionVersion.Checksum);

// Note that these checksums should only actually be calculated once, if the project is unchanged
// the same checksum will be returned.
var referencedProject = solution.SolutionState.GetRequiredProjectState(projectId);
var referencedProjectChecksum = await referencedProject.GetChecksumAsync(cancellationToken).ConfigureAwait(false);
tempChecksumArray.Add(referencedProjectChecksum);
}

return Checksum.Create(tempChecksumArray);
}

#endregion
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ private sealed class WithFrozenSourceGeneratedDocumentsCompilationTracker : ICom
{
private readonly TextDocumentStates<SourceGeneratedDocumentState> _replacementDocumentStates;

private AsyncLazy<Checksum>? _lazyDependentChecksum;

/// <summary>
/// The lazily-produced compilation that has the generated document updated. This is initialized by call to
/// <see cref="GetCompilationAsync"/>.
Expand Down Expand Up @@ -145,28 +143,6 @@ public Task<VersionStamp> GetDependentVersionAsync(SolutionCompilationState comp
public Task<VersionStamp> GetDependentSemanticVersionAsync(SolutionCompilationState compilationState, CancellationToken cancellationToken)
=> UnderlyingTracker.GetDependentSemanticVersionAsync(compilationState, cancellationToken);

public Task<Checksum> GetDependentChecksumAsync(SolutionCompilationState compilationState, CancellationToken cancellationToken)
{
if (_lazyDependentChecksum == null)
{
var tmp = compilationState; // temp. local to avoid a closure allocation for the fast path
// note: solution is captured here, but it will go away once GetValueAsync executes.
Interlocked.CompareExchange(
ref _lazyDependentChecksum,
AsyncLazy.Create(static (arg, c) =>
arg.self.ComputeDependentChecksumAsync(arg.tmp, c),
arg: (self: this, tmp)),
null);
}

return _lazyDependentChecksum.GetValueAsync(cancellationToken);
}

private async Task<Checksum> ComputeDependentChecksumAsync(SolutionCompilationState compilationState, CancellationToken cancellationToken)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ComputeDependentChecksumAsync

Am I reading this right in that the old code would potentially reuse the cached checksum from the underlying tracker and only need to get new checksum information from _replacementDocumentStates?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct. But I don't think the complexity is worth it. Having the new system operate uniformly on any Project instance is more desirable for me.

=> Checksum.Create(
await UnderlyingTracker.GetDependentChecksumAsync(compilationState, cancellationToken).ConfigureAwait(false),
(await _replacementDocumentStates.GetDocumentChecksumsAndIdsAsync(cancellationToken).ConfigureAwait(false)).Checksum);

public async ValueTask<TextDocumentStates<SourceGeneratedDocumentState>> GetSourceGeneratedDocumentStatesAsync(
SolutionCompilationState compilationState, bool withFrozenSourceGeneratedDocuments, CancellationToken cancellationToken)
{
Expand Down
Loading
Loading