Skip to content

Commit

Permalink
Resolve 'collection modified' exception when logging results (#2549)
Browse files Browse the repository at this point in the history
* Introduce slim semaphore to enable per-file-hash results caching.

* Latest iteration of concurrency fix for 'collection modified' exception.

* Improvements to concurrency change.

* Skip Coyote testing for now.

* Add release note and incidental PR clean-up.

* Final PR clean-up.

* Null hash data when not present.
  • Loading branch information
michaelcfanning authored Oct 9, 2022
1 parent 83c8ff5 commit a3b8dd1
Show file tree
Hide file tree
Showing 10 changed files with 162 additions and 88 deletions.
2 changes: 2 additions & 0 deletions src/ReleaseHistory.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# SARIF Package Release History (SDK, Driver, Converters, and Multitool)

## Unreleased
* BUGFIX: Another attempt to resolve 'InvalidOperationException' with message `Collection was modified; enumeration operation may not execute` in `MultithreadedAnalyzeCommandBase`, raised when analyzing with the `--hashes` switch. [#2459](https://github.com/microsoft/sarif-sdk/pull/2549). There was a previous attempt to fix this in [#2447](https://github.com/microsoft/sarif-sdk/pull/2447).

* FEATURE: Allow initialization of file regions cache in `InsertOptionalDataVisitor` (previously initialized exclusively from `FileRegionsCache.Instance`).
* BUGFIX: Resolve issue where `match-results-forward` command fails to generate VersionControlDetails data. [#2487](https://github.com/microsoft/sarif-sdk/pull/2487)
* BUGFIX: Remove duplicated rule definitions when executing `match-results-forward` commands for results with sub-rule ids. [#2486](https://github.com/microsoft/sarif-sdk/pull/2486)
Expand Down
124 changes: 65 additions & 59 deletions src/Sarif.Driver/Sdk/MultithreadedAnalyzeCommandBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,16 @@ public abstract class MultithreadedAnalyzeCommandBase<TContext, TOptions> : Plug
internal bool _captureConsoleOutput;

internal ConsoleLogger _consoleLogger;
internal ConcurrentDictionary<string, IAnalysisLogger> _analysisLoggerCache;

private Run _run;
private Tool _tool;
private bool _computeHashes;
internal TContext _rootContext;
private int _fileContextsCount;
private Channel<int> _hashChannel;
private Channel<int> readyToHashChannel;
private OptionallyEmittedData _dataToInsert;
private Channel<int> _resultsWritingChannel;
private Channel<int> _fileEnumerationChannel;
private Channel<int> readyToScanChannel;
private IDictionary<string, HashData> _pathToHashDataMap;
private ConcurrentDictionary<int, TContext> _fileContexts;

Expand Down Expand Up @@ -190,12 +189,18 @@ private void MultithreadedAnalyzeTargets(TOptions options,
: (Debugger.IsAttached) ? 1 : Environment.ProcessorCount;

var channelOptions = new BoundedChannelOptions(2000)
{
SingleWriter = true,
SingleReader = true,
};
readyToHashChannel = Channel.CreateBounded<int>(channelOptions);

channelOptions = new BoundedChannelOptions(2000)
{
SingleWriter = true,
SingleReader = false,
};
_fileEnumerationChannel = Channel.CreateBounded<int>(channelOptions);
_hashChannel = Channel.CreateBounded<int>(channelOptions);
readyToScanChannel = Channel.CreateBounded<int>(channelOptions);

channelOptions = new BoundedChannelOptions(2000)
{
Expand All @@ -206,26 +211,39 @@ private void MultithreadedAnalyzeTargets(TOptions options,

var sw = Stopwatch.StartNew();

var workers = new Task<bool>[options.Threads];
// 1: First we initiate an asynchronous operation to locate disk files for
// analysis, as specified in analysis configuration (file names, wildcards).
Task<bool> enumerateFilesOnDisk = EnumerateFilesOnDiskAsync(options, rootContext);

// 2: Files found on disk are put in a specific sort order, after which a
// reference to each scan target is put into a channel for hashing,
// if hashing is enabled.
Task<bool> hashFilesAndPutInAnalysisQueue = HashFilesAndPutInAnalysisQueueAsnc();

// 3: A dedicated set of threads pull scan targets and analyze them.
// On completing a scan, the thread writes the index of the
// scanned item to a channel that drives logging.
var workers = new Task<bool>[options.Threads];
for (int i = 0; i < options.Threads; i++)
{
workers[i] = AnalyzeTargetAsync(skimmers, disabledSkimmers);
workers[i] = ScanTargetsAsync(skimmers, disabledSkimmers);
}

Task<bool> hashFiles = HashAsync();
Task<bool> findFiles = FindFilesAsync(options, rootContext);
Task<bool> writeResults = WriteResultsAsync(rootContext);

// FindFiles is single-thread and will close its write channel
findFiles.Wait();
hashFiles.Wait();
// 4: A single-threaded consumer watches for completed scans
// and logs results, if any. This operation is singlle-threaded
// in order to help ensure determinism in log output. i.e., any
// scan of the same targets using the same production code
// should produce a log file that is byte-for-byte identical
// to the previous output.
Task<bool> logScanResults = LogScanResultsAsync(rootContext);

Task.WhenAll(workers)
.ContinueWith(_ => _resultsWritingChannel.Writer.Complete())
.Wait();

writeResults.Wait();
enumerateFilesOnDisk.Wait();
hashFilesAndPutInAnalysisQueue.Wait();
logScanResults.Wait();

Console.WriteLine();

Expand All @@ -239,7 +257,7 @@ private void MultithreadedAnalyzeTargets(TOptions options,
}
}

private async Task<bool> WriteResultsAsync(TContext rootContext)
private async Task<bool> LogScanResultsAsync(TContext rootContext)
{
int currentIndex = 0;

Expand All @@ -265,15 +283,7 @@ private async Task<bool> WriteResultsAsync(TContext rootContext)

while (context?.AnalysisComplete == true)
{
if (_computeHashes)
{
bool cache = _analysisLoggerCache.TryGetValue(context.Hashes.Sha256, out IAnalysisLogger logger);
LogCachingLogger(rootContext, logger ?? context.Logger, context, clone: cache);
}
else
{
LogCachingLogger(rootContext, context.Logger, context);
}
LogCachingLogger(rootContext, context, clone: _computeHashes);

RuntimeErrors |= context.RuntimeErrors;

Expand All @@ -300,9 +310,9 @@ private async Task<bool> WriteResultsAsync(TContext rootContext)
return true;
}

private void LogCachingLogger(TContext rootContext, IAnalysisLogger logger, TContext context, bool clone = false)
private void LogCachingLogger(TContext rootContext, TContext context, bool clone = false)
{
var cachingLogger = (CachingLogger)logger;
var cachingLogger = (CachingLogger)context.Logger;
IDictionary<ReportingDescriptor, IList<Result>> results = cachingLogger.Results;

if (results?.Count > 0)
Expand Down Expand Up @@ -352,7 +362,7 @@ private void LogCachingLogger(TContext rootContext, IAnalysisLogger logger, TCon
}
}

private async Task<bool> FindFilesAsync(TOptions options, TContext rootContext)
private async Task<bool> EnumerateFilesOnDiskAsync(TOptions options, TContext rootContext)
{
this._fileContextsCount = 0;
this._fileContexts = new ConcurrentDictionary<int, TContext>();
Expand Down Expand Up @@ -432,12 +442,12 @@ private async Task<bool> FindFilesAsync(TOptions options, TContext rootContext)
filePath: file)
);

await _hashChannel.Writer.WriteAsync(_fileContextsCount++);
await readyToHashChannel.Writer.WriteAsync(_fileContextsCount++);
}
}
}

_hashChannel.Writer.Complete();
readyToHashChannel.Writer.Complete();

if (_fileContextsCount == 0)
{
Expand All @@ -464,9 +474,11 @@ private void EnqueueAllDirectories(Queue<string> queue, string directory)
}
}

private async Task<bool> HashAsync()
private async Task<bool> HashFilesAndPutInAnalysisQueueAsnc()
{
ChannelReader<int> reader = _hashChannel.Reader;
ChannelReader<int> reader = readyToHashChannel.Reader;

Dictionary<string, CachingLogger> loggerCache = null;

// Wait until there is work or the channel is closed.
while (await reader.WaitToReadAsync())
Expand All @@ -487,20 +499,28 @@ private async Task<bool> HashAsync()
{
_pathToHashDataMap.Add(localPath, hashData);
}

loggerCache ??= new Dictionary<string, CachingLogger>();

if (!loggerCache.TryGetValue(hashData.Sha256, out CachingLogger logger))
{
logger = loggerCache[hashData.Sha256] = (CachingLogger)context.Logger;
}
context.Logger = logger;
}

await _fileEnumerationChannel.Writer.WriteAsync(index);
await readyToScanChannel.Writer.WriteAsync(index);
}
}

_fileEnumerationChannel.Writer.Complete();
readyToScanChannel.Writer.Complete();

return true;
}

private async Task<bool> AnalyzeTargetAsync(IEnumerable<Skimmer<TContext>> skimmers, ISet<string> disabledSkimmers)
private async Task<bool> ScanTargetsAsync(IEnumerable<Skimmer<TContext>> skimmers, ISet<string> disabledSkimmers)
{
ChannelReader<int> reader = _fileEnumerationChannel.Reader;
ChannelReader<int> reader = readyToScanChannel.Reader;

// Wait until there is work or the channel is closed.
while (await reader.WaitToReadAsync())
Expand Down Expand Up @@ -532,9 +552,6 @@ private async Task<bool> AnalyzeTargetAsync(IEnumerable<Skimmer<TContext>> skimm

protected virtual void ValidateOptions(TOptions options, TContext context)
{
_dataToInsert = options.DataToInsert.ToFlags();
_computeHashes = (_dataToInsert & OptionallyEmittedData.Hashes) != 0;

bool succeeded = true;

succeeded &= ValidateFile(context, options.OutputFilePath, DefaultPolicyName, shouldExist: null);
Expand Down Expand Up @@ -564,10 +581,8 @@ internal AggregatingLogger InitializeLogger(AnalyzeOptionsBase analyzeOptions)
logger.Loggers.Add(_consoleLogger);
}

if ((analyzeOptions.DataToInsert.ToFlags() & OptionallyEmittedData.Hashes) != 0)
{
_analysisLoggerCache = new ConcurrentDictionary<string, IAnalysisLogger>();
}
_dataToInsert = analyzeOptions.DataToInsert.ToFlags();
_computeHashes = (_dataToInsert & OptionallyEmittedData.Hashes) != 0;

return logger;
}
Expand Down Expand Up @@ -868,29 +883,20 @@ protected virtual TContext DetermineApplicabilityAndAnalyze(
return context;
}

IAnalysisLogger logger = context.Logger;

if (_computeHashes)
{
if (_analysisLoggerCache.ContainsKey(context.Hashes.Sha256))
{
return context;
}
}

context.Logger.AnalyzingTarget(context);
CachingLogger logger = (CachingLogger)context.Logger;
logger.AnalyzingTarget(context);

if (_computeHashes)
if (logger.CacheFinalized)
{
if (!_analysisLoggerCache.TryAdd(context.Hashes.Sha256, logger))
{
return context;
}
context.Logger = logger;
logger.ReleaseLock();
return context;
}

IEnumerable<Skimmer<TContext>> applicableSkimmers = DetermineApplicabilityForTarget(context, skimmers, disabledSkimmers);
AnalyzeTarget(context, applicableSkimmers, disabledSkimmers);

logger.ReleaseLock();
return context;
}

Expand Down
38 changes: 33 additions & 5 deletions src/Sarif/Core/Artifact.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public static Artifact Create(
var artifact = new Artifact()
{
Encoding = encoding?.WebName,
Hashes = hashData != null ? CreateHashesDictionary(hashData) : null,
};

string mimeType = SarifWriters.MimeType.DetermineFromFileExtension(uri);
Expand Down Expand Up @@ -70,19 +71,46 @@ public static Artifact Create(
if (dataToInsert.HasFlag(OptionallyEmittedData.Hashes))
{
HashData hashes = hashData ?? HashUtilities.ComputeHashes(filePath);
artifact.Hashes = new Dictionary<string, string>

// The hash utilities will return null data in some text contexts.
if (hashes != null)
{
{ "md5", hashes.MD5 },
{ "sha-1", hashes.Sha1 },
{ "sha-256", hashes.Sha256 },
};
artifact.Hashes = new Dictionary<string, string>
{
{ "md5", hashes.MD5 },
{ "sha-1", hashes.Sha1 },
{ "sha-256", hashes.Sha256 },
};
}
}
}
catch (Exception e) when (e is IOException || e is UnauthorizedAccessException) { }

return artifact;
}

private static IDictionary<string, string> CreateHashesDictionary(HashData hashData)
{
var result = new Dictionary<string, string>();

if (!string.IsNullOrEmpty(hashData?.MD5))
{
result["md5"] = hashData?.MD5;
}

if (!string.IsNullOrEmpty(hashData?.Sha1))
{
result["sha-1"] = hashData?.Sha1;
}

if (!string.IsNullOrEmpty(hashData?.Sha256))
{
result["sha-256"] = hashData?.Sha256;
}

return result;
}

private static ArtifactContent GetEncodedFileContents(IFileSystem fileSystem, string filePath, string mimeType, Encoding inputFileEncoding)
{
var fileContent = new ArtifactContent();
Expand Down
12 changes: 12 additions & 0 deletions src/Sarif/HashUtilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,18 @@ public static HashData ComputeHashes(string fileName, IFileSystem fileSystem = n
{
using (Stream stream = fileSystem.FileOpenRead(fileName))
{
// This condition is actually only feasible in testing, as a null
// value will only be returned by a mock object that doesn't
// recognize the current specified file argument. In production,
// an exception will always be raised for a missing file. If
// we enter the code below, that indicates that a test
// encountered an adverse condition and is attempting to produce
// a file hash for some source file in a notification stack. We
// return null here, as the actual source hash isn't interesting
// for this scenario, and we want to reliably finish test execution
// and record exception details.
if (stream == null) { return null; }

using (var bufferedStream = new BufferedStream(stream, 1024 * 32))
{
string md5, sha1, sha256;
Expand Down
Loading

0 comments on commit a3b8dd1

Please sign in to comment.