Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Call-by-name] semgrep/trufflehog/yamllint migration #21149

Merged
merged 8 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/notes/2.23.x.md
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,11 @@ Pants has a new mechanism for `@rule` invocation in backends. In this release th
- `cue`
- `debian`
- `makeself`
- `semgrep`
- `sql`
- `swift`
- `trufflehog`
- `yamllint`

## Full Changelog

Expand Down
117 changes: 61 additions & 56 deletions src/python/pants/backend/tools/semgrep/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,21 @@
from typing import Iterable

from pants.backend.python.util_rules import pex
from pants.backend.python.util_rules.pex import PexRequest, VenvPex, VenvPexProcess
from pants.backend.python.util_rules.pex import VenvPexProcess, create_venv_pex
from pants.core.goals.lint import LintResult, LintTargetsRequest
from pants.core.util_rules.partitions import Partition, Partitions
from pants.core.util_rules.source_files import SourceFiles, SourceFilesRequest
from pants.core.util_rules.source_files import SourceFilesRequest, determine_source_files
from pants.engine.addresses import Address
from pants.engine.fs import (
CreateDigest,
Digest,
FileContent,
MergeDigests,
PathGlobs,
Paths,
Snapshot,
from pants.engine.fs import CreateDigest, FileContent, MergeDigests, PathGlobs, Paths, Snapshot
from pants.engine.intrinsics import (
create_digest_to_digest,
digest_to_snapshot,
merge_digests_request_to_digest,
path_globs_to_paths,
process_request_to_process_result,
)
from pants.engine.process import FallibleProcessResult, ProcessCacheScope
from pants.engine.rules import Get, MultiGet, Rule, collect_rules, rule
from pants.engine.process import ProcessCacheScope
from pants.engine.rules import Rule, collect_rules, concurrently, implicitly, rule
from pants.engine.unions import UnionRule
from pants.option.global_options import GlobalOptions
from pants.util.logging import LogLevel
Expand Down Expand Up @@ -101,7 +100,9 @@ def _group_by_semgrep_dir(all_paths: Paths) -> AllSemgrepConfigs:

@rule
async def find_all_semgrep_configs() -> AllSemgrepConfigs:
all_paths = await Get(Paths, PathGlobs([f"**/{file_glob}" for file_glob in _RULES_FILES_GLOBS]))
all_paths = await path_globs_to_paths(
PathGlobs([f"**/{file_glob}" for file_glob in _RULES_FILES_GLOBS])
)
return _group_by_semgrep_dir(all_paths)


Expand All @@ -123,7 +124,7 @@ async def infer_relevant_semgrep_configs(

@rule
async def all_semgrep_ignore_files() -> SemgrepIgnoreFiles:
snapshot = await Get(Snapshot, PathGlobs([f"**/{_IGNORE_FILE_NAME}"]))
snapshot = await digest_to_snapshot(**implicitly(PathGlobs([f"**/{_IGNORE_FILE_NAME}"])))
return SemgrepIgnoreFiles(snapshot)


Expand All @@ -136,8 +137,8 @@ async def partition(
if semgrep.skip:
return Partitions()

all_configs = await MultiGet(
Get(RelevantSemgrepConfigs, RelevantSemgrepConfigsRequest(field_set))
all_configs = await concurrently(
infer_relevant_semgrep_configs(RelevantSemgrepConfigsRequest(field_set), **implicitly())
for field_set in request.field_sets
)

Expand Down Expand Up @@ -168,62 +169,66 @@ async def lint(
semgrep: SemgrepSubsystem,
global_options: GlobalOptions,
) -> LintResult:
config_files, semgrep_pex, input_files, settings = await MultiGet(
Get(Snapshot, PathGlobs(str(s) for s in request.partition_metadata.config_files)),
Get(VenvPex, PexRequest, semgrep.to_pex_request()),
Get(SourceFiles, SourceFilesRequest(field_set.source for field_set in request.elements)),
Get(Digest, CreateDigest([_DEFAULT_SETTINGS])),
config_files, semgrep_pex, input_files, settings = await concurrently(
digest_to_snapshot(
**implicitly(PathGlobs(str(s) for s in request.partition_metadata.config_files))
),
create_venv_pex(**implicitly(semgrep.to_pex_request())),
determine_source_files(
SourceFilesRequest(field_set.source for field_set in request.elements)
),
create_digest_to_digest(CreateDigest([_DEFAULT_SETTINGS])),
)

input_digest = await Get(
Digest,
input_digest = await merge_digests_request_to_digest(
MergeDigests(
(
input_files.snapshot.digest,
config_files.digest,
settings,
request.partition_metadata.ignore_files.digest,
)
),
)
)

cache_scope = ProcessCacheScope.PER_SESSION if semgrep.force else ProcessCacheScope.SUCCESSFUL

# TODO: https://github.com/pantsbuild/pants/issues/18430 support running this with --autofix
# under the fix goal... but not all rules have fixes, so we need to be running with
# --error/checking exit codes, which FixResult doesn't currently support.
result = await Get(
FallibleProcessResult,
VenvPexProcess(
semgrep_pex,
argv=(
"scan",
*(f"--config={f}" for f in config_files.files),
"--jobs={pants_concurrency}",
"--error",
*semgrep.args,
# we don't pass the target files directly because that overrides .semgrepignore
# (https://github.com/returntocorp/semgrep/issues/4978), so instead we just tell its
# traversal to include all the source files in this partition. Unfortunately this
# include is implicitly unrooted (i.e. as if it was **/path/to/file), and so may
# pick up other files if the names match. The highest risk of this is within the
# semgrep PEX.
*(f"--include={f}" for f in input_files.files),
f"--exclude={semgrep_pex.pex_filename}",
),
extra_env={
"SEMGREP_FORCE_COLOR": "true",
# disable various global state/network requests
"SEMGREP_SETTINGS_FILE": _DEFAULT_SETTINGS.path,
"SEMGREP_ENABLE_VERSION_CHECK": "0",
"SEMGREP_SEND_METRICS": "off",
},
input_digest=input_digest,
concurrency_available=len(input_files.files),
description=f"Run Semgrep on {pluralize(len(input_files.files), 'file')}.",
level=LogLevel.DEBUG,
cache_scope=cache_scope,
),
result = await process_request_to_process_result(
**implicitly(
VenvPexProcess(
semgrep_pex,
argv=(
"scan",
*(f"--config={f}" for f in config_files.files),
"--jobs={pants_concurrency}",
"--error",
*semgrep.args,
# we don't pass the target files directly because that overrides .semgrepignore
# (https://github.com/returntocorp/semgrep/issues/4978), so instead we just tell its
# traversal to include all the source files in this partition. Unfortunately this
# include is implicitly unrooted (i.e. as if it was **/path/to/file), and so may
# pick up other files if the names match. The highest risk of this is within the
# semgrep PEX.
*(f"--include={f}" for f in input_files.files),
f"--exclude={semgrep_pex.pex_filename}",
),
extra_env={
"SEMGREP_FORCE_COLOR": "true",
# disable various global state/network requests
"SEMGREP_SETTINGS_FILE": _DEFAULT_SETTINGS.path,
"SEMGREP_ENABLE_VERSION_CHECK": "0",
"SEMGREP_SEND_METRICS": "off",
},
input_digest=input_digest,
concurrency_available=len(input_files.files),
description=f"Run Semgrep on {pluralize(len(input_files.files), 'file')}.",
level=LogLevel.DEBUG,
cache_scope=cache_scope,
)
)
)

return LintResult.create(request, result, output_simplifier=global_options.output_simplifier())
Expand Down
60 changes: 28 additions & 32 deletions src/python/pants/backend/tools/trufflehog/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,24 @@

from __future__ import annotations

from typing import Iterable

from pants.backend.tools.trufflehog.subsystem import Trufflehog
from pants.core.goals.lint import LintFilesRequest, LintResult
from pants.core.util_rules.config_files import ConfigFiles, ConfigFilesRequest
from pants.core.util_rules.external_tool import DownloadedExternalTool, ExternalToolRequest
from pants.core.util_rules.config_files import find_config_file
from pants.core.util_rules.external_tool import download_external_tool
from pants.core.util_rules.partitions import Partitions
from pants.engine.fs import (
CreateDigest,
Digest,
DigestEntries,
FileEntry,
MergeDigests,
PathGlobs,
Snapshot,
from pants.engine.fs import CreateDigest, FileEntry, MergeDigests, PathGlobs
from pants.engine.intrinsics import (
create_digest_to_digest,
digest_to_snapshot,
directory_digest_to_digest_entries,
merge_digests_request_to_digest,
process_request_to_process_result,
)
from pants.engine.platform import Platform
from pants.engine.process import FallibleProcessResult, Process
from pants.engine.rules import Get, MultiGet, collect_rules, rule
from pants.engine.process import Process
from pants.engine.rules import Rule, collect_rules, concurrently, implicitly, rule
from pants.source.filespec import FilespecMatcher
from pants.util.logging import LogLevel
from pants.util.strutil import pluralize
Expand Down Expand Up @@ -53,37 +54,32 @@ async def run_trufflehog(
) -> LintResult:
"""Runs the trufflehog executable against the targeted files."""

download_trufflehog_get = Get(
DownloadedExternalTool, ExternalToolRequest, trufflehog.get_request(platform)
download_trufflehog_get = download_external_tool(trufflehog.get_request(platform))
config_files_get = find_config_file(trufflehog.config_request())
downloaded_trufflehog, config_digest = await concurrently(
download_trufflehog_get, config_files_get
)

config_files_get = Get(ConfigFiles, ConfigFilesRequest, trufflehog.config_request())

downloaded_trufflehog, config_digest = await MultiGet(download_trufflehog_get, config_files_get)
# the downloaded files are going to contain the `exe`, readme and license. We only
# want the `exe`
# The downloaded files are going to contain the `exe`, readme and license. We only want the `exe`
entry = next(
e
for e in await Get(DigestEntries, Digest, downloaded_trufflehog.digest)
for e in await directory_digest_to_digest_entries(downloaded_trufflehog.digest)
if isinstance(e, FileEntry) and e.path == "trufflehog" and e.is_executable
)
trufflehog_digest = await Get(Digest, CreateDigest([entry]))

snapshot = await Get(Snapshot, PathGlobs(request.elements))

input_digest = await Get(
Digest,
trufflehog_digest = await create_digest_to_digest(CreateDigest([entry]))
snapshot = await digest_to_snapshot(**implicitly(PathGlobs(request.elements)))
input_digest = await merge_digests_request_to_digest(
MergeDigests(
(
snapshot.digest,
trufflehog_digest,
config_digest.snapshot.digest,
)
),
)
)

process_result = await Get(
FallibleProcessResult,
process_result = await process_request_to_process_result(
Process(
argv=(
downloaded_trufflehog.exe,
Expand All @@ -102,13 +98,13 @@ async def run_trufflehog(
description=f"Run Trufflehog on {pluralize(len(snapshot.files), 'file')}.",
level=LogLevel.DEBUG,
),
**implicitly(),
)
return LintResult.create(request, process_result)


def rules() -> list:
"""Collect all the rules."""
return [
def rules() -> Iterable[Rule]:
return (
*collect_rules(),
*TrufflehogRequest.rules(),
]
)
Loading
Loading