diff --git a/docs/Producing effective SARIF.md b/docs/Producing effective SARIF.md index 17336142c..787d33e8d 100644 --- a/docs/Producing effective SARIF.md +++ b/docs/Producing effective SARIF.md @@ -393,13 +393,25 @@ Similarly, most 'result' objects contain at least one 'artifactLocation' object. #### Messages +##### `AvoidDuplicativeAnalysisTarget`: warning + +The 'analysisTarget' property '{1}' at '{0}' can be removed because it is the same as the result location. This unnecessarily increases log file size. The 'analysisTarget' property is used to distinguish cases when a tool detects a result in a file (such as an included header) that is different than the file that was scanned (such as a .cpp file that included the header). + +#### `AvoidDuplicativeResultRuleInformation`: warning + +'{0}' uses the 'rule' property to specify the violated rule, so it is not necessary also to specify 'ruleId' or 'ruleIndex'. This unnecessarily increases log file size. Remove the 'ruleId' and 'ruleIndex' properties. + ##### `EliminateLocationOnlyArtifacts`: warning -{0}: The 'artifacts' array contains no information beyond the locations of the artifacts. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'artifacts' array might be used to record the full set of artifacts that were analyzed. In such a scenario, the 'artifacts' array should be retained even if it contains only location information. +The 'artifacts' array at '{0}' contains no information beyond the locations of the artifacts. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'artifacts' array might be used to record the full set of artifacts that were analyzed. In such a scenario, the 'artifacts' array should be retained even if it contains only location information. ##### `EliminateIdOnlyRules`: warning -{0}: The 'rules' array contains no information beyond the ids of the rules. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'rules' array might be used to record the full set of rules that were evaluated. In such a scenario, the 'rules' array should be retained even if it contains only id information. +The 'rules' array at '{0}' contains no information beyond the ids of the rules. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'rules' array might be used to record the full set of rules that were evaluated. In such a scenario, the 'rules' array should be retained even if it contains only id information. + +#### `PreferRuleId`: warning + +The result at '{0}' uses the 'rule' property to specify the violated rule, but this is not necessary because the rule is defined by 'tool.driver'. Use the 'ruleId' and 'ruleIndex' instead, because they are shorter and just as clear. --- diff --git a/src/Sarif.Multitool/Extensions.cs b/src/Sarif.Multitool/Extensions.cs new file mode 100644 index 000000000..190b82dfc --- /dev/null +++ b/src/Sarif.Multitool/Extensions.cs @@ -0,0 +1,24 @@ +using System; + +namespace Microsoft.CodeAnalysis.Sarif.Multitool +{ + public static class Extensions + { + public static bool RefersToDriver(this ToolComponentReference toolComponent, string driverGuid) + { + if (toolComponent.Index == -1) + { + if (toolComponent.Guid == null) + { + return true; + } + else + { + return toolComponent.Guid.Equals(driverGuid, StringComparison.OrdinalIgnoreCase); + } + } + + return false; + } + } +} diff --git a/src/Sarif.Multitool/Rules/RuleResources.Designer.cs b/src/Sarif.Multitool/Rules/RuleResources.Designer.cs index b8eaf3b80..83a0168d8 100644 --- a/src/Sarif.Multitool/Rules/RuleResources.Designer.cs +++ b/src/Sarif.Multitool/Rules/RuleResources.Designer.cs @@ -454,7 +454,25 @@ internal static string SARIF2004_OptimizeFileSize_FullDescription_Text { } /// - /// Looks up a localized string similar to {0}: The 'rules' array contains no information beyond the ids of the rules. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'rules' array might be used to record the full set of rules that were evaluated. In such a scenario, the 'rules' array should be retained even if it contains only id information.. + /// Looks up a localized string similar to The 'analysisTarget' property '{1}' at '{0}' can be removed because it is the same as the result location. This unnecessarily increases log file size. The 'analysisTarget' property is used to distinguish cases when a tool detects a result in a file (such as an included header) that is different than the file that was scanned (such as a .cpp file that included the header).. + /// + internal static string SARIF2004_OptimizeFileSize_Warning_AvoidDuplicativeAnalysisTarget_Text { + get { + return ResourceManager.GetString("SARIF2004_OptimizeFileSize_Warning_AvoidDuplicativeAnalysisTarget_Text", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to '{0}' uses the 'rule' property to specify the violated rule, so it is not necessary also to specify 'ruleId' or 'ruleIndex'. This unnecessarily increases log file size. Remove the 'ruleId' and 'ruleIndex' properties.. + /// + internal static string SARIF2004_OptimizeFileSize_Warning_AvoidDuplicativeResultRuleInformation_Text { + get { + return ResourceManager.GetString("SARIF2004_OptimizeFileSize_Warning_AvoidDuplicativeResultRuleInformation_Text", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to The 'rules' array at '{0}' contains no information beyond the ids of the rules. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'rules' array might be used to record the full set of rules that were evaluated. In such a scenario, the 'rules' array should be retained even if it contains only id information.. /// internal static string SARIF2004_OptimizeFileSize_Warning_EliminateIdOnlyRules_Text { get { @@ -463,7 +481,7 @@ internal static string SARIF2004_OptimizeFileSize_Warning_EliminateIdOnlyRules_T } /// - /// Looks up a localized string similar to {0}: The 'artifacts' array contains no information beyond the locations of the artifacts. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'artifacts' array might be used to record the full set of artifacts that were analyzed. In such a scenario, the 'artifacts' array should be retained even if it contains only location information.. + /// Looks up a localized string similar to The 'artifacts' array at '{0}' contains no information beyond the locations of the artifacts. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'artifacts' array might be used to record the full set of artifacts that were analyzed. In such a scenario, the 'artifacts' array should be retained even if it contains only location information.. /// internal static string SARIF2004_OptimizeFileSize_Warning_EliminateLocationOnlyArtifacts_Text { get { @@ -471,6 +489,15 @@ internal static string SARIF2004_OptimizeFileSize_Warning_EliminateLocationOnlyA } } + /// + /// Looks up a localized string similar to The result at '{0}' uses the 'rule' property to specify the violated rule, but this is not necessary because the rule is defined by 'tool.driver'. Use the 'ruleId' and 'ruleIndex' instead, because they are shorter and just as clear.. + /// + internal static string SARIF2004_OptimizeFileSize_Warning_PreferRuleId_Text { + get { + return ResourceManager.GetString("SARIF2004_OptimizeFileSize_Warning_PreferRuleId_Text", resourceCulture); + } + } + /// /// Looks up a localized string similar to Provide information that makes it easy to identify the name and version of your tool. /// diff --git a/src/Sarif.Multitool/Rules/RuleResources.resx b/src/Sarif.Multitool/Rules/RuleResources.resx index 43d28bfb1..c9d846846 100644 --- a/src/Sarif.Multitool/Rules/RuleResources.resx +++ b/src/Sarif.Multitool/Rules/RuleResources.resx @@ -272,10 +272,16 @@ Many tools follow a conventional format for the 'reportingDescriptor.id' propert In several parts of a SARIF log file, a subset of information about an object appears in one place, and the full information describing all such objects appears in an array elsewhere in the log file. For example, each 'result' object has a 'ruleId' property that identifies the rule that was violated. Elsewhere in the log file, the array 'run.tool.driver.rules' contains additional information about the rules. But if the elements of the 'rules' array contained no information about the rules beyond their ids, then there might be no reason to include the 'rules' array at all, and the log file could be made smaller simply by omitting it. In some scenarios (for example, when assessing compliance with policy), the 'rules' array might be used to record the full set of rules that were evaluated. In such a scenario, the 'rules' array should be retained even if it contains only id information. -Similarly, most 'result' objects contain at least one 'artifactLocation' object. Elsewhere in the log file, the array 'run.artifacts' contains additional information about the artifacts that were analyzed. But if the elements of the 'artifacts' array contained not information about the artifacts beyond their locations, then there might be no reason to include the 'artifacts' array at all, and again the log file could be made smaller by omitting it. In some scenarios (for example, when assessing compliance with policy), the 'artifacts' array might be used to record the full set of artifacts that were analyzed. In such a scenario, the 'artifacts' array should be retained even if it contains only location information. +Similarly, most 'result' objects contain at least one 'artifactLocation' object. Elsewhere in the log file, the array 'run.artifacts' contains additional information about the artifacts that were analyzed. But if the elements of the 'artifacts' array contained not information about the artifacts beyond their locations, then there might be no reason to include the 'artifacts' array at all, and again the log file could be made smaller by omitting it. In some scenarios (for example, when assessing compliance with policy), the 'artifacts' array might be used to record the full set of artifacts that were analyzed. In such a scenario, the 'artifacts' array should be retained even if it contains only location information. + +In addition to the avoiding unnecessary arrays, there are other ways to optimize the size of SARIF log files. + +Prefer the result object properties 'ruleId' and 'ruleIndex' to the nested object-valued property 'result.rule', unless the rule comes from a tool component other than the driver (in which case only 'result.rule' can accurately point to the metadata for the rule). The 'ruleId' and 'ruleIndex' properties are shorter and just as clear. + +Do not specify the result object's 'analysisTarget' property unless it differs from the result location. The canonical scenario for using 'result.analysisTarget' is a C/C++ language analyzer that is instructed to analyze example.c, and detects a result in the included file example.h. In this case, 'analysisTarget' is example.c, and the result location is in example.h. - {0}: The 'artifacts' array contains no information beyond the locations of the artifacts. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'artifacts' array might be used to record the full set of artifacts that were analyzed. In such a scenario, the 'artifacts' array should be retained even if it contains only location information. + The 'artifacts' array at '{0}' contains no information beyond the locations of the artifacts. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'artifacts' array might be used to record the full set of artifacts that were analyzed. In such a scenario, the 'artifacts' array should be retained even if it contains only location information. In result messages, use the 'message.id' and 'message.arguments' properties rather than 'message.text'. This has several advantages. If 'text' is lengthy, using 'id' and 'arguments' makes the SARIF file smaller. If the rule metadata is stored externally to the SARIF log file, the message text can be improved (for example, by adding more text, clarifying the phrasing, or fixing typos), and the result messages will pick up the improvements the next time it is displayed. Finally, SARIF supports localizing messages into different languages, which is possible if the SARIF file contains 'message.id' and 'message.arguments', but not if it contains 'message.text' directly. @@ -290,7 +296,7 @@ Similarly, most 'result' objects contain at least one 'artifactLocation' object. {0}: This run does not provide 'versionControlProvenance'. As a result, it is not possible to determine which version of code was analyzed, nor to map relative paths to their locations within the repository. - {0}: The 'rules' array contains no information beyond the ids of the rules. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'rules' array might be used to record the full set of rules that were evaluated. In such a scenario, the 'rules' array should be retained even if it contains only id information. + The 'rules' array at '{0}' contains no information beyond the ids of the rules. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'rules' array might be used to record the full set of rules that were evaluated. In such a scenario, the 'rules' array should be retained even if it contains only id information. URIs that refer to locations such as rule help pages and result-related work items should be reachable via an HTTP GET request. @@ -360,4 +366,13 @@ This is part of a set of authoring practices that make your rule messages more r {0}: This result location does not provide any of the 'uriBaseId' values that specify repository locations: '{1}'. As a result, it will not be possible to determine the location of the file containing this result relative to the root of the repository that contains it. + + The 'analysisTarget' property '{1}' at '{0}' can be removed because it is the same as the result location. This unnecessarily increases log file size. The 'analysisTarget' property is used to distinguish cases when a tool detects a result in a file (such as an included header) that is different than the file that was scanned (such as a .cpp file that included the header). + + + '{0}' uses the 'rule' property to specify the violated rule, so it is not necessary also to specify 'ruleId' or 'ruleIndex'. This unnecessarily increases log file size. Remove the 'ruleId' and 'ruleIndex' properties. + + + The result at '{0}' uses the 'rule' property to specify the violated rule, but this is not necessary because the rule is defined by 'tool.driver'. Use the 'ruleId' and 'ruleIndex' instead, because they are shorter and just as clear. + \ No newline at end of file diff --git a/src/Sarif.Multitool/Rules/SARIF2004.OptimizeFileSize.cs b/src/Sarif.Multitool/Rules/SARIF2004.OptimizeFileSize.cs index 0fcb38ac0..b1a5c5dfb 100644 --- a/src/Sarif.Multitool/Rules/SARIF2004.OptimizeFileSize.cs +++ b/src/Sarif.Multitool/Rules/SARIF2004.OptimizeFileSize.cs @@ -41,20 +41,112 @@ public class OptimizeFileSize : SarifValidationSkimmerBase /// some scenarios (for example, when assessing compliance with policy), the 'artifacts' array /// might be used to record the full set of artifacts that were analyzed. In such a scenario, /// the 'artifacts' array should be retained even if it contains only location information. + /// + /// In addition to the avoiding unnecessary arrays, there are other ways to optimize the + /// size of SARIF log files. + /// + /// Prefer the result object properties 'ruleId' and 'ruleIndex' to the nested object-valued + /// property 'result.rule', unless the rule comes from a tool component other than the driver + /// (in which case only 'result.rule' can accurately point to the metadata for the rule). + /// The 'ruleId' and 'ruleIndex' properties are shorter and just as clear. + /// + /// Do not specify the result object's 'analysisTarget' property unless it differs from the + /// result location. The canonical scenario for using 'result.analysisTarget' is a C/C++ language + /// analyzer that is instructed to analyze example.c, and detects a result in the included file + /// example.h. In this case, 'analysisTarget' is example.c, and the result location is in example.h. /// public override MultiformatMessageString FullDescription => new MultiformatMessageString { Text = RuleResources.SARIF2004_OptimizeFileSize_FullDescription_Text }; protected override IEnumerable MessageResourceNames => new string[] { + nameof(RuleResources.SARIF2004_OptimizeFileSize_Warning_AvoidDuplicativeAnalysisTarget_Text), + nameof(RuleResources.SARIF2004_OptimizeFileSize_Warning_AvoidDuplicativeResultRuleInformation_Text), nameof(RuleResources.SARIF2004_OptimizeFileSize_Warning_EliminateLocationOnlyArtifacts_Text), - nameof(RuleResources.SARIF2004_OptimizeFileSize_Warning_EliminateIdOnlyRules_Text) + nameof(RuleResources.SARIF2004_OptimizeFileSize_Warning_EliminateIdOnlyRules_Text), + nameof(RuleResources.SARIF2004_OptimizeFileSize_Warning_PreferRuleId_Text) }; public override FailureLevel DefaultLevel => FailureLevel.Warning; + private string driverGuid; + protected override void Analyze(Run run, string runPointer) { AnalyzeLocationOnlyArtifacts(run, runPointer); AnalyzeIdOnlyRules(run, runPointer); + + this.driverGuid = run.Tool.Driver?.Guid; + } + + protected override void Analyze(Result result, string resultPointer) + { + ReportUnnecessaryAnalysisTarget(result, resultPointer); + ReportRuleDuplication(result, resultPointer); + } + + private void ReportRuleDuplication(Result result, string resultPointer) + { + if (result.Rule != null) + { + if (result.Rule.ToolComponent != null) + { + if (result.Rule.ToolComponent.RefersToDriver(this.driverGuid)) + { + // The result at '{0}' uses the 'rule' property to specify + // the violated rule, but this is not necessary because the rule + // is defined by 'tool.driver'. Use the 'ruleId' and 'ruleIndex' + // instead, because they are shorter and just as clear. + LogResult( + resultPointer, + nameof(RuleResources.SARIF2004_OptimizeFileSize_Warning_PreferRuleId_Text)); + } + else + { + if (!string.IsNullOrWhiteSpace(result.RuleId) || result.RuleIndex >= 0) + { + // '{0}' uses the 'rule' property to specify the violated rule, so it + // is not necessary also to specify 'ruleId' or 'ruleIndex'. This + // unnecessarily increases log file size. Remove the 'ruleId' and + // 'ruleIndex' properties. + LogResult( + resultPointer, + nameof(RuleResources.SARIF2004_OptimizeFileSize_Warning_AvoidDuplicativeResultRuleInformation_Text)); + } + } + } + else + { + // The result at '{0}' uses the 'rule' property to specify + // the violated rule, but this is not necessary because the rule + // is defined by 'tool.driver'. Use the 'ruleId' and 'ruleIndex' + // instead, because they are shorter and just as clear. + LogResult( + resultPointer, + nameof(RuleResources.SARIF2004_OptimizeFileSize_Warning_PreferRuleId_Text)); + } + } + } + + private void ReportUnnecessaryAnalysisTarget(Result result, string resultPointer) + { + if (result.Locations != null && result.AnalysisTarget != null) + { + foreach (Location location in result.Locations) + { + if (location?.PhysicalLocation?.ArtifactLocation?.Uri == result.AnalysisTarget.Uri) + { + // The 'analysisTarget' property '{1}' at '{0}' can be removed because it is the same + // as the result location. This unnecessarily increases log file size. The + // 'analysisTarget' property is used to distinguish cases when a tool detects a result + // in a file (such as an included header) that is different than the file that was + // scanned (such as a .cpp file that included the header). + LogResult( + resultPointer.AtProperty(SarifPropertyName.AnalysisTarget), + nameof(RuleResources.SARIF2004_OptimizeFileSize_Warning_AvoidDuplicativeAnalysisTarget_Text), + result.AnalysisTarget.Uri.OriginalString); + break; + } + } + } } private void AnalyzeLocationOnlyArtifacts(Run run, string runPointer) @@ -70,8 +162,9 @@ private void AnalyzeLocationOnlyArtifacts(Run run, string runPointer) return; } - string firstArtifactPointer = runPointer - .AtProperty(SarifPropertyName.Artifacts).AtIndex(0); + string artifactPointer = runPointer.AtProperty(SarifPropertyName.Artifacts); + + string firstArtifactPointer = artifactPointer.AtIndex(0); string firstResultLocationPointer = runPointer .AtProperty(SarifPropertyName.Results).AtIndex(0) @@ -81,14 +174,14 @@ private void AnalyzeLocationOnlyArtifacts(Run run, string runPointer) if (HasResultLocationsWithUriAndIndex(firstResultLocationPointer) && HasLocationOnlyArtifacts(firstArtifactPointer)) { - // {0}: The 'artifacts' array contains no information beyond the locations of the + // The 'artifacts' array at '{0}' contains no information beyond the locations of the // artifacts. Removing this array might reduce the log file size without losing // information. In some scenarios (for example, when assessing compliance with policy), // the 'artifacts' array might be used to record the full set of artifacts that were // analyzed. In such a scenario, the 'artifacts' array should be retained even if it // contains only location information. LogResult( - firstArtifactPointer, + artifactPointer, nameof(RuleResources.SARIF2004_OptimizeFileSize_Warning_EliminateLocationOnlyArtifacts_Text)); } } @@ -115,7 +208,7 @@ private void AnalyzeIdOnlyRules(Run run, string runPointer) // since tools will typically generate similar nodes. // This approach may cause occasional false negatives. // Also, `tool` and `driver` are mandatory fields, hence - // null check in not required. + // null check is not required. ReportingDescriptor firstRule = run.Tool.Driver.Rules?.FirstOrDefault(); if (firstRule == null) @@ -123,22 +216,22 @@ private void AnalyzeIdOnlyRules(Run run, string runPointer) return; } - string firstRulePointer = runPointer + string rulesPointer = runPointer .AtProperty(SarifPropertyName.Tool) .AtProperty(SarifPropertyName.Driver) - .AtProperty(SarifPropertyName.Rules) - .AtIndex(0); + .AtProperty(SarifPropertyName.Rules); + string firstRulePointer = rulesPointer.AtIndex(0); if (HasIdOnlyRules(firstRulePointer)) { - // {0}: The 'rules' array contains no information beyond the ids of the rules. + // The 'rules' array at '{0}' contains no information beyond the ids of the rules. // Removing this array might reduce the log file size without losing information. // In some scenarios (for example, when assessing compliance with policy), the // 'rules' array might be used to record the full set of rules that were evaluated. // In such a scenario, the 'rules' array should be retained even if it contains // only id information. LogResult( - firstRulePointer, + rulesPointer, nameof(RuleResources.SARIF2004_OptimizeFileSize_Warning_EliminateIdOnlyRules_Text)); } } diff --git a/src/Test.FunctionalTests.Sarif/Multitool/ValidateCommandTests.cs b/src/Test.FunctionalTests.Sarif/Multitool/ValidateCommandTests.cs index 2bde15661..64901fb64 100644 --- a/src/Test.FunctionalTests.Sarif/Multitool/ValidateCommandTests.cs +++ b/src/Test.FunctionalTests.Sarif/Multitool/ValidateCommandTests.cs @@ -121,15 +121,18 @@ public void SARIF1009_IndexPropertiesMustBeConsistentWithArrays_Valid() [Fact] public void SARIF1009_IndexPropertiesMustBeConsistentWithArrays_Invalid() - => RunTest(MakeInvalidTestFileName(RuleId.IndexPropertiesMustBeConsistentWithArrays, nameof(RuleId.IndexPropertiesMustBeConsistentWithArrays))); + => RunTest(MakeInvalidTestFileName(RuleId.IndexPropertiesMustBeConsistentWithArrays, nameof(RuleId.IndexPropertiesMustBeConsistentWithArrays)), + parameter: new TestParameters(configFileName: "disable2004.configuration.xml")); [Fact] public void SARIF1010_RuleIdMustBeConsistent_Valid() - => RunTest(MakeValidTestFileName(RuleId.RuleIdMustBeConsistent, nameof(RuleId.RuleIdMustBeConsistent))); + => RunTest(MakeValidTestFileName(RuleId.RuleIdMustBeConsistent, nameof(RuleId.RuleIdMustBeConsistent)), + parameter: new TestParameters(configFileName: "disable2004.configuration.xml")); [Fact] public void SARIF1010_RuleIdMustBeConsistent_Invalid() - => RunTest(MakeInvalidTestFileName(RuleId.RuleIdMustBeConsistent, nameof(RuleId.RuleIdMustBeConsistent))); + => RunTest(MakeInvalidTestFileName(RuleId.RuleIdMustBeConsistent, nameof(RuleId.RuleIdMustBeConsistent)), + parameter: new TestParameters(configFileName: "disable2004.configuration.xml")); [Fact] public void SARIF1011_ReferenceFinalSchema_Valid() diff --git a/src/Test.FunctionalTests.Sarif/Test.FunctionalTests.Sarif.csproj b/src/Test.FunctionalTests.Sarif/Test.FunctionalTests.Sarif.csproj index 23f859bc4..f97aa8568 100644 --- a/src/Test.FunctionalTests.Sarif/Test.FunctionalTests.Sarif.csproj +++ b/src/Test.FunctionalTests.Sarif/Test.FunctionalTests.Sarif.csproj @@ -355,6 +355,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest diff --git a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1004.ExpressUriBaseIdsCorrectly_Invalid.sarif b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1004.ExpressUriBaseIdsCorrectly_Invalid.sarif index fa1418469..3697a8ab4 100644 --- a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1004.ExpressUriBaseIdsCorrectly_Invalid.sarif +++ b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1004.ExpressUriBaseIdsCorrectly_Invalid.sarif @@ -365,7 +365,7 @@ "arguments": [ "runs[0].results[0].analysisTarget", "%SRCROOT%", - "file:///C:/src/file.c" + "file:///C:/src/file.h" ] }, "locations": [ diff --git a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1009.IndexPropertiesMustBeConsistentWithArrays_Invalid.sarif b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1009.IndexPropertiesMustBeConsistentWithArrays_Invalid.sarif index 074d7bb59..6651b10e5 100644 --- a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1009.IndexPropertiesMustBeConsistentWithArrays_Invalid.sarif +++ b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1009.IndexPropertiesMustBeConsistentWithArrays_Invalid.sarif @@ -40,6 +40,14 @@ "id": "WRN999.RuleExplicitlyDisabled" } }, + { + "message": { + "text": "Rule 'SARIF2004' was explicitly disabled by the user. As result, this tool run cannot be used for compliance or other auditing processes that require a comprehensive analysis." + }, + "descriptor": { + "id": "WRN999.RuleExplicitlyDisabled" + } + }, { "message": { "text": "Rule 'SARIF2006' was explicitly disabled by the user. As result, this tool run cannot be used for compliance or other auditing processes that require a comprehensive analysis." diff --git a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1010.RuleIdMustBeConsistent_Invalid.sarif b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1010.RuleIdMustBeConsistent_Invalid.sarif index 76bb963de..bd54fc623 100644 --- a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1010.RuleIdMustBeConsistent_Invalid.sarif +++ b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1010.RuleIdMustBeConsistent_Invalid.sarif @@ -40,6 +40,14 @@ "id": "WRN999.RuleExplicitlyDisabled" } }, + { + "message": { + "text": "Rule 'SARIF2004' was explicitly disabled by the user. As result, this tool run cannot be used for compliance or other auditing processes that require a comprehensive analysis." + }, + "descriptor": { + "id": "WRN999.RuleExplicitlyDisabled" + } + }, { "message": { "text": "Rule 'SARIF2006' was explicitly disabled by the user. As result, this tool run cannot be used for compliance or other auditing processes that require a comprehensive analysis." diff --git a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1010.RuleIdMustBeConsistent_Valid.sarif b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1010.RuleIdMustBeConsistent_Valid.sarif index 96a32255d..be7c52e54 100644 --- a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1010.RuleIdMustBeConsistent_Valid.sarif +++ b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF1010.RuleIdMustBeConsistent_Valid.sarif @@ -19,6 +19,14 @@ "id": "WRN999.RuleExplicitlyDisabled" } }, + { + "message": { + "text": "Rule 'SARIF2004' was explicitly disabled by the user. As result, this tool run cannot be used for compliance or other auditing processes that require a comprehensive analysis." + }, + "descriptor": { + "id": "WRN999.RuleExplicitlyDisabled" + } + }, { "message": { "text": "Rule 'SARIF2006' was explicitly disabled by the user. As result, this tool run cannot be used for compliance or other auditing processes that require a comprehensive analysis." diff --git a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF2004.OptimizeFileSize_Invalid.sarif b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF2004.OptimizeFileSize_Invalid.sarif index 178859edb..47e348a47 100644 --- a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF2004.OptimizeFileSize_Invalid.sarif +++ b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/ExpectedOutputs/SARIF2004.OptimizeFileSize_Invalid.sarif @@ -14,14 +14,23 @@ "text": "Emit arrays only if they provide additional information." }, "fullDescription": { - "text": "Emit arrays only if they provide additional information.\r\n\r\nIn several parts of a SARIF log file, a subset of information about an object appears in one place, and the full information describing all such objects appears in an array elsewhere in the log file. For example, each 'result' object has a 'ruleId' property that identifies the rule that was violated. Elsewhere in the log file, the array 'run.tool.driver.rules' contains additional information about the rules. But if the elements of the 'rules' array contained no information about the rules beyond their ids, then there might be no reason to include the 'rules' array at all, and the log file could be made smaller simply by omitting it. In some scenarios (for example, when assessing compliance with policy), the 'rules' array might be used to record the full set of rules that were evaluated. In such a scenario, the 'rules' array should be retained even if it contains only id information.\r\n\r\nSimilarly, most 'result' objects contain at least one 'artifactLocation' object. Elsewhere in the log file, the array 'run.artifacts' contains additional information about the artifacts that were analyzed. But if the elements of the 'artifacts' array contained not information about the artifacts beyond their locations, then there might be no reason to include the 'artifacts' array at all, and again the log file could be made smaller by omitting it. In some scenarios (for example, when assessing compliance with policy), the 'artifacts' array might be used to record the full set of artifacts that were analyzed. In such a scenario, the 'artifacts' array should be retained even if it contains only location information." + "text": "Emit arrays only if they provide additional information.\r\n\r\nIn several parts of a SARIF log file, a subset of information about an object appears in one place, and the full information describing all such objects appears in an array elsewhere in the log file. For example, each 'result' object has a 'ruleId' property that identifies the rule that was violated. Elsewhere in the log file, the array 'run.tool.driver.rules' contains additional information about the rules. But if the elements of the 'rules' array contained no information about the rules beyond their ids, then there might be no reason to include the 'rules' array at all, and the log file could be made smaller simply by omitting it. In some scenarios (for example, when assessing compliance with policy), the 'rules' array might be used to record the full set of rules that were evaluated. In such a scenario, the 'rules' array should be retained even if it contains only id information.\r\n\r\nSimilarly, most 'result' objects contain at least one 'artifactLocation' object. Elsewhere in the log file, the array 'run.artifacts' contains additional information about the artifacts that were analyzed. But if the elements of the 'artifacts' array contained not information about the artifacts beyond their locations, then there might be no reason to include the 'artifacts' array at all, and again the log file could be made smaller by omitting it. In some scenarios (for example, when assessing compliance with policy), the 'artifacts' array might be used to record the full set of artifacts that were analyzed. In such a scenario, the 'artifacts' array should be retained even if it contains only location information.\r\n\r\nIn addition to the avoiding unnecessary arrays, there are other ways to optimize the size of SARIF log files.\r\n\r\nPrefer the result object properties 'ruleId' and 'ruleIndex' to the nested object-valued property 'result.rule', unless the rule comes from a tool component other than the driver (in which case only 'result.rule' can accurately point to the metadata for the rule). The 'ruleId' and 'ruleIndex' properties are shorter and just as clear.\r\n\r\nDo not specify the result object's 'analysisTarget' property unless it differs from the result location. The canonical scenario for using 'result.analysisTarget' is a C/C++ language analyzer that is instructed to analyze example.c, and detects a result in the included file example.h. In this case, 'analysisTarget' is example.c, and the result location is in example.h." }, "messageStrings": { + "Warning_AvoidDuplicativeAnalysisTarget": { + "text": "The 'analysisTarget' property '{1}' at '{0}' can be removed because it is the same as the result location. This unnecessarily increases log file size. The 'analysisTarget' property is used to distinguish cases when a tool detects a result in a file (such as an included header) that is different than the file that was scanned (such as a .cpp file that included the header)." + }, + "Warning_AvoidDuplicativeResultRuleInformation": { + "text": "'{0}' uses the 'rule' property to specify the violated rule, so it is not necessary also to specify 'ruleId' or 'ruleIndex'. This unnecessarily increases log file size. Remove the 'ruleId' and 'ruleIndex' properties." + }, "Warning_EliminateLocationOnlyArtifacts": { - "text": "{0}: The 'artifacts' array contains no information beyond the locations of the artifacts. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'artifacts' array might be used to record the full set of artifacts that were analyzed. In such a scenario, the 'artifacts' array should be retained even if it contains only location information." + "text": "The 'artifacts' array at '{0}' contains no information beyond the locations of the artifacts. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'artifacts' array might be used to record the full set of artifacts that were analyzed. In such a scenario, the 'artifacts' array should be retained even if it contains only location information." }, "Warning_EliminateIdOnlyRules": { - "text": "{0}: The 'rules' array contains no information beyond the ids of the rules. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'rules' array might be used to record the full set of rules that were evaluated. In such a scenario, the 'rules' array should be retained even if it contains only id information." + "text": "The 'rules' array at '{0}' contains no information beyond the ids of the rules. Removing this array might reduce the log file size without losing information. In some scenarios (for example, when assessing compliance with policy), the 'rules' array might be used to record the full set of rules that were evaluated. In such a scenario, the 'rules' array should be retained even if it contains only id information." + }, + "Warning_PreferRuleId": { + "text": "The result at '{0}' uses the 'rule' property to specify the violated rule, but this is not necessary because the rule is defined by 'tool.driver'. Use the 'ruleId' and 'ruleIndex' instead, because they are shorter and just as clear." } }, "helpUri": "http://docs.oasis-open.org/sarif/sarif/v2.1.0/sarif-v2.1.0.html" @@ -75,7 +84,7 @@ "message": { "id": "Warning_EliminateLocationOnlyArtifacts", "arguments": [ - "runs[0].artifacts[0]" + "runs[0].artifacts" ] }, "locations": [ @@ -86,7 +95,7 @@ }, "region": { "startLine": 18, - "startColumn": 9 + "startColumn": 20 } } } @@ -98,7 +107,7 @@ "message": { "id": "Warning_EliminateIdOnlyRules", "arguments": [ - "runs[0].tool.driver.rules[0]" + "runs[0].tool.driver.rules" ] }, "locations": [ @@ -109,7 +118,100 @@ }, "region": { "startLine": 11, - "startColumn": 13 + "startColumn": 20 + } + } + } + ] + }, + { + "ruleId": "SARIF2004", + "ruleIndex": 0, + "message": { + "id": "Warning_AvoidDuplicativeAnalysisTarget", + "arguments": [ + "runs[0].results[1].analysisTarget", + "local/test/code/src/file1.c" + ] + }, + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "index": 0 + }, + "region": { + "startLine": 56, + "startColumn": 29 + } + } + } + ] + }, + { + "ruleId": "SARIF2004", + "ruleIndex": 0, + "message": { + "id": "Warning_PreferRuleId", + "arguments": [ + "runs[0].results[2]" + ] + }, + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "index": 0 + }, + "region": { + "startLine": 69, + "startColumn": 9 + } + } + } + ] + }, + { + "ruleId": "SARIF2004", + "ruleIndex": 0, + "message": { + "id": "Warning_AvoidDuplicativeResultRuleInformation", + "arguments": [ + "runs[0].results[3]" + ] + }, + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "index": 0 + }, + "region": { + "startLine": 81, + "startColumn": 9 + } + } + } + ] + }, + { + "ruleId": "SARIF2004", + "ruleIndex": 0, + "message": { + "id": "Warning_PreferRuleId", + "arguments": [ + "runs[0].results[4]" + ] + }, + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "index": 0 + }, + "region": { + "startLine": 94, + "startColumn": 9 } } } diff --git a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF1004.ExpressUriBaseIdsCorrectly_Invalid.sarif b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF1004.ExpressUriBaseIdsCorrectly_Invalid.sarif index 7bcf29a61..ce94b0f50 100644 --- a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF1004.ExpressUriBaseIdsCorrectly_Invalid.sarif +++ b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF1004.ExpressUriBaseIdsCorrectly_Invalid.sarif @@ -153,7 +153,7 @@ "text": "[No message provided]." }, "analysisTarget": { - "uri": "file:///C:/src/file.c", + "uri": "file:///C:/src/file.h", "uriBaseId": "%SRCROOT%" }, "locations": [ diff --git a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF1004.ExpressUriBaseIdsCorrectly_Valid.sarif b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF1004.ExpressUriBaseIdsCorrectly_Valid.sarif index c0a6d2336..cd28b112c 100644 --- a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF1004.ExpressUriBaseIdsCorrectly_Valid.sarif +++ b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF1004.ExpressUriBaseIdsCorrectly_Valid.sarif @@ -133,7 +133,7 @@ "text": "[No message provided]." }, "analysisTarget": { - "uri": "file.c", + "uri": "file.h", "uriBaseId": "%SRCROOT%" }, "locations": [ diff --git a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF2004.OptimizeFileSize_Invalid.sarif b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF2004.OptimizeFileSize_Invalid.sarif index f8172d4c0..1c4e01e7f 100644 --- a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF2004.OptimizeFileSize_Invalid.sarif +++ b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF2004.OptimizeFileSize_Invalid.sarif @@ -7,6 +7,7 @@ "driver": { "name": "Code Analyser", "version": "1.2.3", + "guid": "686D3462-EC4F-499F-8738-9D6ED96F7A27", "rules": [ { "id": "TEST1001" @@ -43,6 +44,50 @@ } } ] + }, + { + "ruleId": "TEST1001", + "ruleIndex": 0, + "message": { "text": "Bad result: artifactLocation and analysisTarget are equal." }, + "analysisTarget": { "uri": "local/test/code/src/file1.c" }, + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": "local/test/code/src/file1.c" + } + } + } + ] + }, + { + "rule": { + "id": "TEST1001", + "toolComponent": { + "name": "toolComponent", + "guid": "686D3462-EC4F-499F-8738-9D6ED96F7A27" + } + }, + "message": { "text": "Bad result: rule could be simplified, because it's pointing to tool.driver." } + }, + { + "ruleId": "TEST1001", + "rule": { + "id": "TEST1001", + "toolComponent": { + "name": "toolComponent", + "guid": "686D3462-EC4F-499F-8738-9D6ED96F7A28" + } + }, + "message": { "text": "Bad result: ruleId duplicates information in rule." } + }, + { + "ruleId": "TEST1001", + "ruleIndex": 0, + "rule": { + "id": "TEST1001" + }, + "message": { "text": "Bad result: ruleIndex and rule is present." } } ], "columnKind": "utf16CodeUnits" diff --git a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF2004.OptimizeFileSize_Valid.sarif b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF2004.OptimizeFileSize_Valid.sarif index ab7a6b108..8eecb91c9 100644 --- a/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF2004.OptimizeFileSize_Valid.sarif +++ b/src/Test.FunctionalTests.Sarif/TestData/Multitool/ValidateCommand/Inputs/SARIF2004.OptimizeFileSize_Valid.sarif @@ -42,6 +42,27 @@ } } ] + }, + { + "rule": { + "id": "TEST1001", + "toolComponent": { + "name": "Plugin01", + "index": 1 + } + }, + "message": { "text": "Good result: rule contains toolComponent." } + }, + { + "ruleId": "TEST1001", + "ruleIndex": 0, + "locations": [ + { + "physicalLocation": { "artifactLocation": { "uri": "file:///c:/local/test/code/src/file1.h" } } + } + ], + "analysisTarget": { "uri": "file:///c:/local/test/code/src/file1.c" }, + "message": { "text": "Good result: analysisTarget is different from artifactLocation." } } ], "columnKind": "utf16CodeUnits" diff --git a/src/Test.FunctionalTests.Sarif/disable2004.configuration.xml b/src/Test.FunctionalTests.Sarif/disable2004.configuration.xml new file mode 100644 index 000000000..84ab9cd9a --- /dev/null +++ b/src/Test.FunctionalTests.Sarif/disable2004.configuration.xml @@ -0,0 +1,18 @@ + + + + + + + + + + + + + + + + diff --git a/src/Test.UnitTests.Sarif.Multitool/ExtensionsTests.cs b/src/Test.UnitTests.Sarif.Multitool/ExtensionsTests.cs new file mode 100644 index 000000000..19b774308 --- /dev/null +++ b/src/Test.UnitTests.Sarif.Multitool/ExtensionsTests.cs @@ -0,0 +1,61 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +using System.Collections.ObjectModel; +using System.Text; + +using FluentAssertions; + +using Xunit; + +namespace Microsoft.CodeAnalysis.Sarif.Multitool +{ + public class ExtensionsTests + { + [Fact] + public void Extensions_Validate_RefersToDriver() + { + StringBuilder sb = new StringBuilder(); + + foreach (ToolComponentReferenceTestCase item in s_toolComponentReferenceTestCases) + { + bool actualOutput = item.ToolComponentReference.RefersToDriver(item.DriverGuid); + + if (actualOutput != item.ExpectedOutput) + { + sb.AppendLine($" Input: {item.ToolComponentReference.Index} {item.ToolComponentReference.Guid ?? "null"} {item.DriverGuid ?? "null"} Expected: {item.ExpectedOutput} Actual: {actualOutput}"); + } + } + + sb.Length.Should().Be(0, + $"all test cases should pass, but the following test cases failed:\n{sb.ToString()}"); + } + + private class ToolComponentReferenceTestCase + { + public ToolComponentReferenceTestCase(int index, string toolGuid, string driverGuid, bool expectedOutput) + { + ToolComponentReference = new ToolComponentReference + { + Index = index, + Guid = toolGuid + }; + + DriverGuid = driverGuid; + ExpectedOutput = expectedOutput; + } + + public ToolComponentReference ToolComponentReference { get; } + public string DriverGuid { get; } + public bool ExpectedOutput { get; } + } + + private static readonly ReadOnlyCollection s_toolComponentReferenceTestCases = + new ReadOnlyCollection(new[] { + new ToolComponentReferenceTestCase(-1, null, null, true), + new ToolComponentReferenceTestCase(-1, "774707BC-6949-4DB5-826D-9FC0E38BFDEE", "774707BC-6949-4DB5-826D-9FC0E38BFDEE", true), + new ToolComponentReferenceTestCase(-1, "774707BC-6949-4DB5-826D-9FC0E38BFDEF", "774707BC-6949-4DB5-826D-9FC0E38BFDEE", false), + new ToolComponentReferenceTestCase(2, "774707BC-6949-4DB5-826D-9FC0E38BFDEE", "774707BC-6949-4DB5-826D-9FC0E38BFDEE", false) + }); + } +}