@@ -238,10 +238,11 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
238238 if there is no source code available.
239239 """
240240 analysis_result : dict = {}
241- disabled_results : dict = (
242- {}
243- ) # since we have to run them anyway, return disabled rule findings for debug information
244- # only run semgrep open-source features, and disable 'nosemgrep' ignoring so this does not bypass our scan
241+ # since we have to run them anyway, return disabled rule findings for debug information
242+ disabled_results : dict = {}
243+ # Here, we disable 'nosemgrep' ignoring so that this is not an evasion method of our scan (i.e. malware includes
244+ # 'nosemgrep' comments to prevent our scan detecting those code lines). Read more about the 'nosemgrep' feature
245+ # here: https://semgrep.dev/docs/ignoring-files-folders-code
245246 semgrep_commands : list [str ] = ["semgrep" , "scan" , "--oss-only" , "--disable-nosem" ]
246247 result : HeuristicResult = HeuristicResult .PASS
247248
@@ -302,6 +303,8 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
302303 # only work if `--experimental` is also supplied to enable experimental features, which we do not use.
303304 # Semgrep provides a relative path separated by '.' to the rule ID, where the rule ID is always the
304305 # final element in that path, so we use that to match our rule IDs.
306+ # e.g. rule_id = src.macaron.resources.pypi_malware_rules.obfuscation_decode-and-execute, which comes from
307+ # the rule ID 'obfuscation_decode-and-execute' inside 'obfuscation.yaml'.
305308 if rule_id .split ("." )[- 1 ] in self .disabled_rule_ids :
306309 if rule_id not in self .disabled_rule_ids :
307310 disabled_results [rule_id ] = {"message" : message , "detections" : []}
0 commit comments