Skip to content

Commit

Permalink
[Call-by-name] semgrep/trufflehog/yamllint migration (#21149)
Browse files Browse the repository at this point in the history
Auto-migrated the SemGrep, TruffleHog, YamlLint backends to call-by-name
  • Loading branch information
sureshjoshi authored Jul 9, 2024
1 parent da6f86b commit 00a0bd7
Show file tree
Hide file tree
Showing 8 changed files with 342 additions and 163 deletions.
3 changes: 3 additions & 0 deletions docs/notes/2.23.x.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,11 @@ Pants has a new mechanism for `@rule` invocation in backends. In this release th
- `cue`
- `debian`
- `makeself`
- `semgrep`
- `sql`
- `swift`
- `trufflehog`
- `yamllint`

## Full Changelog

Expand Down
117 changes: 61 additions & 56 deletions src/python/pants/backend/tools/semgrep/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,21 @@
from typing import Iterable

from pants.backend.python.util_rules import pex
from pants.backend.python.util_rules.pex import PexRequest, VenvPex, VenvPexProcess
from pants.backend.python.util_rules.pex import VenvPexProcess, create_venv_pex
from pants.core.goals.lint import LintResult, LintTargetsRequest
from pants.core.util_rules.partitions import Partition, Partitions
from pants.core.util_rules.source_files import SourceFiles, SourceFilesRequest
from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files
from pants.engine.addresses import Address
from pants.engine.fs import (
CreateDigest,
Digest,
FileContent,
MergeDigests,
PathGlobs,
Paths,
Snapshot,
from pants.engine.fs import CreateDigest, FileContent, MergeDigests, PathGlobs, Paths, Snapshot
from pants.engine.intrinsics import (
create_digest_to_digest,
digest_to_snapshot,
merge_digests_request_to_digest,
path_globs_to_paths,
process_request_to_process_result,
)
from pants.engine.process import FallibleProcessResult, ProcessCacheScope
from pants.engine.rules import Get, MultiGet, Rule, collect_rules, rule
from pants.engine.process import ProcessCacheScope
from pants.engine.rules import Rule, collect_rules, concurrently, implicitly, rule
from pants.engine.unions import UnionRule
from pants.option.global_options import GlobalOptions
from pants.util.logging import LogLevel
Expand Down Expand Up @@ -101,7 +100,9 @@ def _group_by_semgrep_dir(all_paths: Paths) -> AllSemgrepConfigs:

@rule
async def find_all_semgrep_configs() -> AllSemgrepConfigs:
all_paths = await Get(Paths, PathGlobs([f"**/{file_glob}" for file_glob in _RULES_FILES_GLOBS]))
all_paths = await path_globs_to_paths(
PathGlobs([f"**/{file_glob}" for file_glob in _RULES_FILES_GLOBS])
)
return _group_by_semgrep_dir(all_paths)


Expand All @@ -123,7 +124,7 @@ async def infer_relevant_semgrep_configs(

@rule
async def all_semgrep_ignore_files() -> SemgrepIgnoreFiles:
snapshot = await Get(Snapshot, PathGlobs([f"**/{_IGNORE_FILE_NAME}"]))
snapshot = await digest_to_snapshot(**implicitly(PathGlobs([f"**/{_IGNORE_FILE_NAME}"])))
return SemgrepIgnoreFiles(snapshot)


Expand All @@ -136,8 +137,8 @@ async def partition(
if semgrep.skip:
return Partitions()

all_configs = await MultiGet(
Get(RelevantSemgrepConfigs, RelevantSemgrepConfigsRequest(field_set))
all_configs = await concurrently(
infer_relevant_semgrep_configs(RelevantSemgrepConfigsRequest(field_set), **implicitly())
for field_set in request.field_sets
)

Expand Down Expand Up @@ -168,62 +169,66 @@ async def lint(
semgrep: SemgrepSubsystem,
global_options: GlobalOptions,
) -> LintResult:
config_files, semgrep_pex, input_files, settings = await MultiGet(
Get(Snapshot, PathGlobs(str(s) for s in request.partition_metadata.config_files)),
Get(VenvPex, PexRequest, semgrep.to_pex_request()),
Get(SourceFiles, SourceFilesRequest(field_set.source for field_set in request.elements)),
Get(Digest, CreateDigest([_DEFAULT_SETTINGS])),
config_files, semgrep_pex, input_files, settings = await concurrently(
digest_to_snapshot(
**implicitly(PathGlobs(str(s) for s in request.partition_metadata.config_files))
),
create_venv_pex(**implicitly(semgrep.to_pex_request())),
determine_source_files(
SourceFilesRequest(field_set.source for field_set in request.elements)
),
create_digest_to_digest(CreateDigest([_DEFAULT_SETTINGS])),
)

input_digest = await Get(
Digest,
input_digest = await merge_digests_request_to_digest(
MergeDigests(
(
input_files.snapshot.digest,
config_files.digest,
settings,
request.partition_metadata.ignore_files.digest,
)
),
)
)

cache_scope = ProcessCacheScope.PER_SESSION if semgrep.force else ProcessCacheScope.SUCCESSFUL

# TODO: https://github.com/pantsbuild/pants/issues/18430 support running this with --autofix
# under the fix goal... but not all rules have fixes, so we need to be running with
# --error/checking exit codes, which FixResult doesn't currently support.
result = await Get(
FallibleProcessResult,
VenvPexProcess(
semgrep_pex,
argv=(
"scan",
*(f"--config={f}" for f in config_files.files),
"--jobs={pants_concurrency}",
"--error",
*semgrep.args,
# we don't pass the target files directly because that overrides .semgrepignore
# (https://github.com/returntocorp/semgrep/issues/4978), so instead we just tell its
# traversal to include all the source files in this partition. Unfortunately this
# include is implicitly unrooted (i.e. as if it was **/path/to/file), and so may
# pick up other files if the names match. The highest risk of this is within the
# semgrep PEX.
*(f"--include={f}" for f in input_files.files),
f"--exclude={semgrep_pex.pex_filename}",
),
extra_env={
"SEMGREP_FORCE_COLOR": "true",
# disable various global state/network requests
"SEMGREP_SETTINGS_FILE": _DEFAULT_SETTINGS.path,
"SEMGREP_ENABLE_VERSION_CHECK": "0",
"SEMGREP_SEND_METRICS": "off",
},
input_digest=input_digest,
concurrency_available=len(input_files.files),
description=f"Run Semgrep on {pluralize(len(input_files.files), 'file')}.",
level=LogLevel.DEBUG,
cache_scope=cache_scope,
),
result = await process_request_to_process_result(
**implicitly(
VenvPexProcess(
semgrep_pex,
argv=(
"scan",
*(f"--config={f}" for f in config_files.files),
"--jobs={pants_concurrency}",
"--error",
*semgrep.args,
# we don't pass the target files directly because that overrides .semgrepignore
# (https://github.com/returntocorp/semgrep/issues/4978), so instead we just tell its
# traversal to include all the source files in this partition. Unfortunately this
# include is implicitly unrooted (i.e. as if it was **/path/to/file), and so may
# pick up other files if the names match. The highest risk of this is within the
# semgrep PEX.
*(f"--include={f}" for f in input_files.files),
f"--exclude={semgrep_pex.pex_filename}",
),
extra_env={
"SEMGREP_FORCE_COLOR": "true",
# disable various global state/network requests
"SEMGREP_SETTINGS_FILE": _DEFAULT_SETTINGS.path,
"SEMGREP_ENABLE_VERSION_CHECK": "0",
"SEMGREP_SEND_METRICS": "off",
},
input_digest=input_digest,
concurrency_available=len(input_files.files),
description=f"Run Semgrep on {pluralize(len(input_files.files), 'file')}.",
level=LogLevel.DEBUG,
cache_scope=cache_scope,
)
)
)

return LintResult.create(request, result, output_simplifier=global_options.output_simplifier())
Expand Down
60 changes: 28 additions & 32 deletions src/python/pants/backend/tools/trufflehog/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,24 @@

from __future__ import annotations

from typing import Iterable

from pants.backend.tools.trufflehog.subsystem import Trufflehog
from pants.core.goals.lint import LintFilesRequest, LintResult
from pants.core.util_rules.config_files import ConfigFiles, ConfigFilesRequest
from pants.core.util_rules.external_tool import DownloadedExternalTool, ExternalToolRequest
from pants.core.util_rules.config_files import find_config_file
from pants.core.util_rules.external_tool import download_external_tool
from pants.core.util_rules.partitions import Partitions
from pants.engine.fs import (
CreateDigest,
Digest,
DigestEntries,
FileEntry,
MergeDigests,
PathGlobs,
Snapshot,
from pants.engine.fs import CreateDigest, FileEntry, MergeDigests, PathGlobs
from pants.engine.intrinsics import (
create_digest_to_digest,
digest_to_snapshot,
directory_digest_to_digest_entries,
merge_digests_request_to_digest,
process_request_to_process_result,
)
from pants.engine.platform import Platform
from pants.engine.process import FallibleProcessResult, Process
from pants.engine.rules import Get, MultiGet, collect_rules, rule
from pants.engine.process import Process
from pants.engine.rules import Rule, collect_rules, concurrently, implicitly, rule
from pants.source.filespec import FilespecMatcher
from pants.util.logging import LogLevel
from pants.util.strutil import pluralize
Expand Down Expand Up @@ -53,37 +54,32 @@ async def run_trufflehog(
) -> LintResult:
"""Runs the trufflehog executable against the targeted files."""

download_trufflehog_get = Get(
DownloadedExternalTool, ExternalToolRequest, trufflehog.get_request(platform)
download_trufflehog_get = download_external_tool(trufflehog.get_request(platform))
config_files_get = find_config_file(trufflehog.config_request())
downloaded_trufflehog, config_digest = await concurrently(
download_trufflehog_get, config_files_get
)

config_files_get = Get(ConfigFiles, ConfigFilesRequest, trufflehog.config_request())

downloaded_trufflehog, config_digest = await MultiGet(download_trufflehog_get, config_files_get)
# the downloaded files are going to contain the `exe`, readme and license. We only
# want the `exe`
# The downloaded files are going to contain the `exe`, readme and license. We only want the `exe`
entry = next(
e
for e in await Get(DigestEntries, Digest, downloaded_trufflehog.digest)
for e in await directory_digest_to_digest_entries(downloaded_trufflehog.digest)
if isinstance(e, FileEntry) and e.path == "trufflehog" and e.is_executable
)
trufflehog_digest = await Get(Digest, CreateDigest([entry]))

snapshot = await Get(Snapshot, PathGlobs(request.elements))

input_digest = await Get(
Digest,
trufflehog_digest = await create_digest_to_digest(CreateDigest([entry]))
snapshot = await digest_to_snapshot(**implicitly(PathGlobs(request.elements)))
input_digest = await merge_digests_request_to_digest(
MergeDigests(
(
snapshot.digest,
trufflehog_digest,
config_digest.snapshot.digest,
)
),
)
)

process_result = await Get(
FallibleProcessResult,
process_result = await process_request_to_process_result(
Process(
argv=(
downloaded_trufflehog.exe,
Expand All @@ -102,13 +98,13 @@ async def run_trufflehog(
description=f"Run Trufflehog on {pluralize(len(snapshot.files), 'file')}.",
level=LogLevel.DEBUG,
),
**implicitly(),
)
return LintResult.create(request, process_result)


def rules() -> list:
"""Collect all the rules."""
return [
def rules() -> Iterable[Rule]:
return (
*collect_rules(),
*TrufflehogRequest.rules(),
]
)
Loading

0 comments on commit 00a0bd7

Please sign in to comment.