diff --git a/src/Sarif.WorkItems/SarifWorkItemFiler.cs b/src/Sarif.WorkItems/SarifWorkItemFiler.cs index 464f4a5c6..958a6a74f 100644 --- a/src/Sarif.WorkItems/SarifWorkItemFiler.cs +++ b/src/Sarif.WorkItems/SarifWorkItemFiler.cs @@ -177,8 +177,6 @@ public virtual IReadOnlyList SplitLogFile(SarifLog sarifLog) this.FilingResult = FilingResult.None; this.FiledWorkItems = new List(); - sarifLog = sarifLog ?? throw new ArgumentNullException(nameof(sarifLog)); - OptionallyEmittedData optionallyEmittedData = this.FilingContext.DataToRemove; if (optionallyEmittedData != OptionallyEmittedData.None) { @@ -257,7 +255,7 @@ public virtual IReadOnlyList SplitLogFile(SarifLog sarifLog) } Logger.LogDebug("Begin splitting logs"); - var partitioningVisitor = new PartitioningVisitor(partitionFunction, deepClone: false); + var partitioningVisitor = new PartitioningVisitor(partitionFunction, deepClone: false, remapRules: true); partitioningVisitor.VisitSarifLog(sarifLog); Logger.LogDebug("Begin retrieving split logs"); diff --git a/src/Sarif/ExtensionMethods.cs b/src/Sarif/ExtensionMethods.cs index 2cb2d7f07..dc0400693 100644 --- a/src/Sarif/ExtensionMethods.cs +++ b/src/Sarif/ExtensionMethods.cs @@ -37,6 +37,20 @@ public static IEnumerable Split(this SarifLog sarifLog, SplittingStrat partitionFunction = (result) => result.Run.GetHashCode().ToString(); break; } + case SplittingStrategy.PerRunPerTarget: + { + foreach (Run run in sarifLog.Runs) + { + run.SetRunOnResults(); + } + partitionFunction = (result) => + { + string runHashCode = result.Run.GetHashCode().ToString(); + string targetFile = result.Locations?.FirstOrDefault()?.PhysicalLocation?.ArtifactLocation?.Uri?.OriginalString; + return $"{runHashCode}:{targetFile}"; + }; + break; + } default: { throw new NotImplementedException($"SplittingStrategy: {splittingStrategy}"); diff --git a/src/Sarif/Visitors/PartitioningVisitor.cs b/src/Sarif/Visitors/PartitioningVisitor.cs index 593c6983c..8e0c08b1f 100644 --- a/src/Sarif/Visitors/PartitioningVisitor.cs +++ b/src/Sarif/Visitors/PartitioningVisitor.cs @@ -61,6 +61,10 @@ public class PartitioningVisitor : SarifRewritingVisitor where T : class, IEq // the original log (if true) or from a shallow copy of the original log (if false). private readonly bool deepClone; + // A flag presents if optimize rules for each emitted logs, by removing rules which are not referenced + // by existing results. + private readonly bool remapRules; + // The log file being partitioned. private SarifLog originalLog; @@ -103,10 +107,11 @@ public class PartitioningVisitor : SarifRewritingVisitor where T : class, IEq /// working set, but it is not safe to modify any of the resulting logs because this class /// makes no guarantee about which objects are shared. The default is false. /// - public PartitioningVisitor(PartitionFunction partitionFunction, bool deepClone) + public PartitioningVisitor(PartitionFunction partitionFunction, bool deepClone, bool remapRules = true) { this.partitionFunction = partitionFunction; this.deepClone = deepClone; + this.remapRules = remapRules; } /// @@ -338,6 +343,11 @@ private SarifLog CreatePartitionLog(T partitionValue) originalRun.Properties); } + if (this.remapRules) + { + partitionRun.Tool.Driver.Rules = this.RemapResultsToRules(partitionRun, results); + } + partitionRun.Results = results; // Construct a mapping from the indices in the original run to the indices @@ -395,6 +405,36 @@ private SarifLog CreatePartitionLog(T partitionValue) return partitionLog; } + private IList RemapResultsToRules(Run partitionRun, List results) + { + IList rules = partitionRun.Tool.Driver.Rules; + if (rules == null || !rules.Any() || results == null || !results.Any()) + { + return null; + } + + var referencedRulesByIndex = new Dictionary(new ReportingDescriptorEqualityComparer()); + int index = 0; + + foreach (Result result in results) + { + ReportingDescriptor currentRule = result.GetRule(partitionRun); + if (!referencedRulesByIndex.TryGetValue(currentRule, out int currentIndex)) + { + currentIndex = index; + referencedRulesByIndex.Add(currentRule, index++); + } + + // only update result RuleIndex if it has valid value + if (result.RuleIndex != -1) + { + result.RuleIndex = currentIndex; + } + } + + return referencedRulesByIndex.OrderBy(dict => dict.Value).Select(kv => kv.Key).ToList(); + } + // This class contains information accumulated from an individual run in the // original log file that is used to create the runs in the partition log files. private class PartitionRunInfo diff --git a/src/Test.UnitTests.Sarif/Core/SarifLogTests.cs b/src/Test.UnitTests.Sarif/Core/SarifLogTests.cs index 1aeb526a8..5fefa84c3 100644 --- a/src/Test.UnitTests.Sarif/Core/SarifLogTests.cs +++ b/src/Test.UnitTests.Sarif/Core/SarifLogTests.cs @@ -2,6 +2,8 @@ // Licensed under the MIT license. See LICENSE file in the project root for full license information. using System; +using System.Collections.Generic; +using System.Linq; using FluentAssertions; @@ -82,6 +84,92 @@ public void SarifLog_SplitPerRun() sarifLog.Split(SplittingStrategy.PerRun).Should().HaveCount(3); } + [Fact] + public void SarifLog_SplitPerResult() + { + var random = new Random(); + SarifLog sarifLog = RandomSarifLogGenerator.GenerateSarifLogWithRuns(random, 1); + IList logs = sarifLog.Split(SplittingStrategy.PerResult).ToList(); + logs.Count.Should().Be(5); + foreach (SarifLog log in logs) + { + // optimized partitioned log should only include rules referenced by its results + log.Runs.Count.Should().Be(1); + int ruleCount = log.Runs[0].Results.Select(r => r.RuleId).Distinct().Count(); + ruleCount.Should().Be(log.Runs[0].Tool.Driver.Rules.Count); + } + } + + [Fact] + public void SarifLog_SplitPerTarget() + { + var random = new Random(); + SarifLog sarifLog = RandomSarifLogGenerator.GenerateSarifLogWithRuns(random, 1); + IList logs = sarifLog.Split(SplittingStrategy.PerRunPerTarget).ToList(); + logs.Count.Should().Be( + sarifLog.Runs[0].Results.Select(r => r.Locations[0].PhysicalLocation.ArtifactLocation.Uri).Distinct().Count()); + foreach (SarifLog log in logs) + { + // optimized partitioned log should only include rules referenced by its results + log.Runs.Count.Should().Be(1); + int ruleCount = log.Runs[0].Results.Select(r => r.RuleId).Distinct().Count(); + ruleCount.Should().Be(log.Runs[0].Tool.Driver.Rules.Count); + + // verify result's RuleIndex reference to right rule + foreach (Result result in log.Runs[0].Results) + { + result.RuleId.Should().Be( + log.Runs[0].Tool.Driver.Rules.ElementAt(result.RuleIndex).Id); + } + } + } + + [Fact] + public void SarifLog_SplitPerTarget_WithEmptyLocations() + { + var random = new Random(); + SarifLog sarifLog = RandomSarifLogGenerator.GenerateSarifLogWithRuns(random, 1); + + // set random result's location to empty + IList results = sarifLog.Runs.First().Results; + Result randomResult = results.ElementAt(random.Next(results.Count)); + randomResult.Locations = null; + randomResult = results.ElementAt(random.Next(results.Count)); + if (randomResult.Locations?.FirstOrDefault()?.PhysicalLocation != null) + { + randomResult.Locations.FirstOrDefault().PhysicalLocation = null; + } + randomResult = results.ElementAt(random.Next(results.Count)); + if (randomResult.Locations?.FirstOrDefault()?.PhysicalLocation?.ArtifactLocation != null) + { + randomResult.Locations.FirstOrDefault().PhysicalLocation.ArtifactLocation = null; + } + randomResult = results.ElementAt(random.Next(results.Count)); + if (randomResult.Locations?.FirstOrDefault()?.PhysicalLocation?.ArtifactLocation?.Uri != null) + { + randomResult.Locations.FirstOrDefault().PhysicalLocation.ArtifactLocation.Uri = null; + } + + IList logs = sarifLog.Split(SplittingStrategy.PerRunPerTarget).ToList(); + logs.Count.Should().Be( + sarifLog.Runs[0].Results.Select(r => r.Locations?.FirstOrDefault()?.PhysicalLocation?.ArtifactLocation?.Uri).Distinct().Count()); + foreach (SarifLog log in logs) + { + // optimized partitioned log should only include rules referenced by its results + log.Runs.Count.Should().Be(1); + int ruleCount = log.Runs[0].Results.Select(r => r.RuleId).Distinct().Count(); + ruleCount.Should().Be(log.Runs[0].Tool.Driver.Rules.Count); + + // verify result's RuleIndex reference to right rule + foreach (Result result in log.Runs[0].Results) + { + result.RuleId.Should().Be( + log.Runs[0].Tool.Driver.Rules.ElementAt(result.RuleIndex).Id); + } + } + } + + private Run SerializeAndDeserialize(Run run) { return JsonConvert.DeserializeObject(JsonConvert.SerializeObject(run)); diff --git a/src/Test.UnitTests.Sarif/RandomSarifLogGenerator.cs b/src/Test.UnitTests.Sarif/RandomSarifLogGenerator.cs index ff638231b..0a8c7112f 100644 --- a/src/Test.UnitTests.Sarif/RandomSarifLogGenerator.cs +++ b/src/Test.UnitTests.Sarif/RandomSarifLogGenerator.cs @@ -99,12 +99,14 @@ public static IEnumerable GenerateFakeFiles(string baseAddress, int coun public static IList GenerateFakeResults(Random random, List ruleIds, List filePaths, int resultCount) { List results = new List(); - int fileIndex = random.Next(filePaths.Count); for (int i = 0; i < resultCount; i++) { + int fileIndex = random.Next(filePaths.Count); + int ruleIndex = random.Next(ruleIds.Count); results.Add(new Result() { - RuleId = ruleIds[random.Next(ruleIds.Count)], + RuleId = ruleIds[ruleIndex], + RuleIndex = ruleIndex, Locations = new Location[] { new Location