Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize partitioned logs by removing not relevant rules #2369

Merged
merged 6 commits into from
Jun 21, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions src/Sarif.WorkItems/SarifWorkItemFiler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,6 @@ public virtual IReadOnlyList<SarifLog> SplitLogFile(SarifLog sarifLog)
this.FilingResult = FilingResult.None;
this.FiledWorkItems = new List<WorkItemModel>();

sarifLog = sarifLog ?? throw new ArgumentNullException(nameof(sarifLog));
Copy link
Collaborator

@eddynaka eddynaka Jun 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sarifLog = sarifLog

nice change :) #ByDesign


OptionallyEmittedData optionallyEmittedData = this.FilingContext.DataToRemove;
if (optionallyEmittedData != OptionallyEmittedData.None)
{
Expand Down Expand Up @@ -257,7 +255,7 @@ public virtual IReadOnlyList<SarifLog> SplitLogFile(SarifLog sarifLog)
}

Logger.LogDebug("Begin splitting logs");
var partitioningVisitor = new PartitioningVisitor<string>(partitionFunction, deepClone: false);
var partitioningVisitor = new PartitioningVisitor<string>(partitionFunction, deepClone: false, remapRules: true);
partitioningVisitor.VisitSarifLog(sarifLog);

Logger.LogDebug("Begin retrieving split logs");
Expand Down
59 changes: 58 additions & 1 deletion src/Sarif/Visitors/PartitioningVisitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ public class PartitioningVisitor<T> : SarifRewritingVisitor where T : class, IEq
// the original log (if true) or from a shallow copy of the original log (if false).
private readonly bool deepClone;

// A flag presents if optimize rules for each emitted logs, by removing rules which are not referenced
// by existing results.
private readonly bool remapRules;

// The log file being partitioned.
private SarifLog originalLog;

Expand Down Expand Up @@ -103,10 +107,11 @@ public class PartitioningVisitor<T> : SarifRewritingVisitor where T : class, IEq
/// working set, but it is not safe to modify any of the resulting logs because this class
/// makes no guarantee about which objects are shared. The default is <c>false</c>.
/// </param>
public PartitioningVisitor(PartitionFunction<T> partitionFunction, bool deepClone)
public PartitioningVisitor(PartitionFunction<T> partitionFunction, bool deepClone, bool remapRules = true)
{
this.partitionFunction = partitionFunction;
this.deepClone = deepClone;
this.remapRules = remapRules;
}

/// <summary>
Expand Down Expand Up @@ -338,6 +343,11 @@ private SarifLog CreatePartitionLog(T partitionValue)
originalRun.Properties);
}

if (this.remapRules)
{
partitionRun.Tool.Driver.Rules = this.RemapResultsToRules(partitionRun.Tool.Driver.Rules, results);
}

partitionRun.Results = results;

// Construct a mapping from the indices in the original run to the indices
Expand Down Expand Up @@ -395,6 +405,53 @@ private SarifLog CreatePartitionLog(T partitionValue)
return partitionLog;
}

private IList<ReportingDescriptor> RemapResultsToRules(IList<ReportingDescriptor> rules, List<Result> results)
{
if (rules == null || !rules.Any() || results == null || !results.Any())
{
return null;
}

var referencedRulesByIndex = new Dictionary<ReportingDescriptor, int>();
Copy link
Collaborator

@eddynaka eddynaka Jun 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

()

Check if you can use ReportingDescriptorEqualityComparer #Closed

int index = 0;

foreach (Result result in results)
Copy link
Collaborator

@eddynaka eddynaka Jun 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

result

check if u have access to the run here, if yes, there is a GetRule method in the result #Closed

{
ReportingDescriptor currentRule = null;
if (result.RuleIndex >= 0 && result.RuleIndex < rules.Count)
{
// find the rule by RuleIndex
currentRule = rules.ElementAt(result.RuleIndex);
}

if (currentRule == null && !string.IsNullOrWhiteSpace(result.RuleId))
{
// find rule by RuleId, rule id is case sensitive
currentRule = rules.FirstOrDefault(r => r.Id.Equals(result.RuleId, StringComparison.Ordinal));
}

if (currentRule == null)
{
continue; // no rule found
}


int currentIndex = index;
if (!referencedRulesByIndex.TryGetValue(currentRule, out currentIndex))
{
referencedRulesByIndex.Add(currentRule, index++);
}

// only update result RuleIndex if it has valid value
if (result.RuleIndex != -1)
{
result.RuleIndex = currentIndex;
}
}

return referencedRulesByIndex.OrderBy(dict => dict.Value).Select(kv => kv.Key).ToList();
}

// This class contains information accumulated from an individual run in the
// original log file that is used to create the runs in the partition log files.
private class PartitionRunInfo
Expand Down
18 changes: 18 additions & 0 deletions src/Test.UnitTests.Sarif/Core/SarifLogTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System;
using System.Collections.Generic;
using System.Linq;

using FluentAssertions;

Expand Down Expand Up @@ -82,6 +84,22 @@ public void SarifLog_SplitPerRun()
sarifLog.Split(SplittingStrategy.PerRun).Should().HaveCount(3);
}

[Fact]
public void SarifLog_SplitPerResult()
{
var random = new Random();
SarifLog sarifLog = RandomSarifLogGenerator.GenerateSarifLogWithRuns(random, 1);
IList<SarifLog> logs = sarifLog.Split(SplittingStrategy.PerResult).ToList();
logs.Count.Should().Be(5);
foreach (SarifLog log in logs)
{
// optimized partitioned log should only include rules referenced by its results
log.Runs.Count.Should().Be(1);
int ruleCount = log.Runs[0].Results.Select(r => r.RuleId).Distinct().Count();
ruleCount.Should().Be(log.Runs[0].Tool.Driver.Rules.Count);
}
}

private Run SerializeAndDeserialize(Run run)
{
return JsonConvert.DeserializeObject<Run>(JsonConvert.SerializeObject(run));
Expand Down
4 changes: 3 additions & 1 deletion src/Test.UnitTests.Sarif/RandomSarifLogGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,11 @@ public static IList<Result> GenerateFakeResults(Random random, List<string> rule
int fileIndex = random.Next(filePaths.Count);
for (int i = 0; i < resultCount; i++)
{
int ruleIndex = random.Next(ruleIds.Count);
results.Add(new Result()
{
RuleId = ruleIds[random.Next(ruleIds.Count)],
RuleId = ruleIds[ruleIndex],
RuleIndex = ruleIndex,
Locations = new Location[]
{
new Location
Expand Down