Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mmillet/spi 515 add all secrets option to ggshield secret scans #1024

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
<!--
A new scriv changelog fragment.

Uncomment the section that is right (remove the HTML comment wrapper).
-->

<!--
### Removed

- A bullet item for the Removed category.

-->

### Added

- The `--all-secrets` option to secret scans, allowing to display all found secrets, and their possible ignore reason.

<!--
### Changed

- A bullet item for the Changed category.

-->
<!--
### Deprecated

- A bullet item for the Deprecated category.

-->
<!--
### Fixed

- A bullet item for the Fixed category.

-->
<!--
### Security

- A bullet item for the Security category.

-->
10 changes: 10 additions & 0 deletions ggshield/cmd/secret/scan/secret_scan_common_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,15 @@ def _banlist_detectors_callback(
)


_all_secrets = click.option(
"--all-secrets",
is_flag=True,
help=("Do not ignore any secret. Possible ignore-reason is shown as well."),
callback=create_config_callback("secret", "all_secrets"),
default=None,
)


def add_secret_scan_common_options() -> Callable[[AnyFunction], AnyFunction]:
def decorator(cmd: AnyFunction) -> AnyFunction:
add_common_options()(cmd)
Expand All @@ -153,6 +162,7 @@ def decorator(cmd: AnyFunction) -> AnyFunction:
_banlist_detectors_option(cmd)
_with_incident_details_option(cmd)
instance_option(cmd)
_all_secrets(cmd)
return cmd

return decorator
Expand Down
18 changes: 18 additions & 0 deletions ggshield/core/config/user_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
import re
from dataclasses import field
Expand Down Expand Up @@ -47,6 +48,7 @@ class SecretConfig(FilteredConfig):
ignore_known_secrets: bool = False
with_incident_details: bool = False
# if configuration key is left unset the dashboard's remediation message is used.
all_secrets: bool = False
prereceive_remediation_message: str = ""

def add_ignored_match(self, secret: IgnoredMatch) -> None:
Expand All @@ -61,6 +63,22 @@ def add_ignored_match(self, secret: IgnoredMatch) -> None:
return
self.ignored_matches.append(secret)

def dump_for_monitoring(self) -> str:
return json.dumps(
{
"show_secrets": self.show_secrets,
"ignored_detectors_count": len(self.ignored_detectors),
"ignored_matches_count": len(self.ignored_matches),
"ignored_paths_count": len(self.ignored_paths),
"ignore_known_secrets": self.ignore_known_secrets,
"with_incident_details": self.with_incident_details,
"has_prereceive_remediation_message": bool(
self.prereceive_remediation_message
),
"all_secrets": self.all_secrets,
}
)


def validate_policy_id(policy_id: str) -> bool:
return bool(POLICY_ID_PATTERN.fullmatch(policy_id))
Expand Down
59 changes: 46 additions & 13 deletions ggshield/verticals/secret/output/secret_text_output_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,19 @@
from pygitguardian.client import VERSIONS
from pygitguardian.models import PolicyBreak

from ggshield.core.constants import IncidentStatus
from ggshield.core.filter import group_policy_breaks_by_ignore_sha
from ggshield.core.lines import Line, get_offset, get_padding
from ggshield.core.text_utils import (
STYLE,
clip_long_line,
file_info,
format_text,
pluralize,
translate_validity,
)
from ggshield.verticals.secret.secret_scanner import IgnoreReason

from ..extended_match import ExtendedMatch
from ..secret_scan_collection import Result, SecretScanCollection
from ..secret_scan_collection import IgnoreReason, Result, SecretScanCollection
from .secret_output_handler import SecretOutputHandler


Expand All @@ -29,6 +28,31 @@
NB_CONTEXT_LINES = 3


def secret_file_info(
filename: str,
incident_count: int,
incident_status: IncidentStatus = IncidentStatus.DETECTED,
hidden_secrets_count: int = 0,
) -> str:
"""Return the formatted file info (number of secrets, filename, optional number of hidden secrets)."""
result = "\n{} {}: {} {} {}\n".format(
format_text(">", STYLE["detector_line_start"]),
format_text(filename, STYLE["filename"]),
incident_count,
pluralize("secret", incident_count, "secrets"),
incident_status.value,
)
if hidden_secrets_count > 0:
result += "{} {} {} {} ignored - use the `--all-secrets` option to display {}\n".format(
format_text(">", STYLE["detector_line_start"]),
" " * len(filename),
hidden_secrets_count,
pluralize("secret", hidden_secrets_count, "secrets"),
pluralize("it", hidden_secrets_count, "them"),
)
return result


class SecretTextOutputHandler(SecretOutputHandler):
def _process_scan_impl(self, scan: SecretScanCollection) -> str:
"""Output Secret Scan Collection in text format"""
Expand Down Expand Up @@ -100,6 +124,9 @@ def process_result(self, result: Result) -> str:
result.censor()

number_of_displayed_secrets = 0
number_of_hidden_secrets = sum(
result.ignored_policy_breaks_count_by_reason.values()
)
for ignore_sha, policy_breaks in sha_dict.items():
number_of_displayed_secrets += 1

Expand All @@ -118,8 +145,12 @@ def process_result(self, result: Result) -> str:
)

file_info_line = ""
if number_of_displayed_secrets > 0:
file_info_line = file_info(result.filename, number_of_displayed_secrets)
if number_of_displayed_secrets > 0 or number_of_hidden_secrets > 0:
file_info_line = secret_file_info(
result.filename,
incident_count=number_of_displayed_secrets,
hidden_secrets_count=number_of_hidden_secrets,
)

return file_info_line + result_buf.getvalue()

Expand Down Expand Up @@ -259,28 +290,30 @@ def policy_break_header(
"""
Build a header for the policy break.
"""
policy_break = policy_breaks[0]
indent = " "
validity_msg = (
f"\n{indent}Validity: {format_text(translate_validity(policy_breaks[0].validity), STYLE['incident_validity'])}"
if policy_breaks[0].validity
f"\n{indent}Validity: {format_text(translate_validity(policy_break.validity), STYLE['incident_validity'])}"
if policy_break.validity
else ""
)

start_line = format_text(">>", STYLE["detector_line_start"])
policy_break_type = format_text(
policy_breaks[0].break_type, STYLE["policy_break_type"]
)
policy_break_type = format_text(policy_break.break_type, STYLE["policy_break_type"])
number_occurrences = format_text(str(len(policy_breaks)), STYLE["occurrence_count"])
ignore_sha = format_text(ignore_sha, STYLE["ignore_sha"])

return f"""
message = f"""
{start_line} Secret detected: {policy_break_type}{validity_msg}
{indent}Occurrences: {number_occurrences}
{indent}Known by GitGuardian dashboard: {"YES" if known_secret else "NO"}
{indent}Incident URL: {policy_breaks[0].incident_url if known_secret and policy_breaks[0].incident_url else "N/A"}
{indent}Incident URL: {policy_breaks[0].incident_url if known_secret and policy_break.incident_url else "N/A"}
{indent}Secret SHA: {ignore_sha}

"""
if policy_break.is_excluded:
message += f"{indent}Ignored: {policy_break.exclude_reason}\n"

return message + "\n"


def no_leak_message() -> str:
Expand Down
106 changes: 67 additions & 39 deletions ggshield/verticals/secret/secret_scan_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
from enum import Enum
from pathlib import Path
from typing import (
Any,
Callable,
Counter,
Dict,
Iterable,
List,
Expand All @@ -15,22 +14,52 @@
)

from pygitguardian import GGClient
from pygitguardian.models import Detail, Match, PolicyBreak, ScanResult, SecretIncident
from pygitguardian.models import (
Detail,
DiffKind,
Match,
PolicyBreak,
ScanResult,
SecretIncident,
)

from ggshield.core.config.user_config import SecretConfig
from ggshield.core.errors import UnexpectedError, handle_api_error
from ggshield.core.filter import is_in_ignored_matches
from ggshield.core.lines import Line, get_lines_from_content
from ggshield.core.scan.scannable import Scannable
from ggshield.core.scan import Scannable
from ggshield.utils.git_shell import Filemode
from ggshield.verticals.secret.extended_match import ExtendedMatch


class IgnoreReason(Enum):
class IgnoreReason(str, Enum):
IGNORED_MATCH = "ignored_match"
IGNORED_DETECTOR = "ignored_detector"
KNOWN_SECRET = "known_secret"
NOT_INTRODUCED = "not_introduced"
BACKEND_EXCLUDED = "backend_excluded"


def compute_ignore_reason(
policy_break: PolicyBreak, secret_config: SecretConfig
) -> Optional[str]:
"""Computes the possible ignore reason associated with a PolicyBreak"""
ignore_reason = None
if policy_break.diff_kind in {DiffKind.DELETION, DiffKind.CONTEXT}:
ignore_reason = IgnoreReason.NOT_INTRODUCED
elif policy_break.is_excluded:
ignore_reason = f"Excluded from backend ({policy_break.exclude_reason})"
elif is_in_ignored_matches(policy_break, secret_config.ignored_matches or []):
ignore_reason = IgnoreReason.IGNORED_MATCH
elif policy_break.break_type in secret_config.ignored_detectors:
ignore_reason = IgnoreReason.IGNORED_DETECTOR
elif secret_config.ignore_known_secrets and policy_break.known_secret:
ignore_reason = IgnoreReason.KNOWN_SECRET

return ignore_reason


@dataclass
class Result:
"""
Return model for a scan which zips the information
Expand All @@ -42,28 +71,7 @@ class Result:
path: Path
url: str
policy_breaks: List[PolicyBreak]
ignored_policy_breaks_count_by_reason: Dict[IgnoreReason, int]

def __init__(self, file: Scannable, scan: ScanResult):
self.filename = file.filename
self.filemode = file.filemode
self.path = file.path
self.url = file.url
self.policy_breaks = scan.policy_breaks
lines = get_lines_from_content(file.content, self.filemode)
self.enrich_matches(lines)
self.ignored_policy_breaks_count_by_reason = {}

def __eq__(self, other: Any) -> bool:
if not isinstance(other, Result):
return False
return (
self.filename == other.filename
and self.filemode == other.filemode
and self.path == other.path
and self.url == other.url
and self.policy_breaks == other.policy_breaks
)
ignored_policy_breaks_count_by_reason: Counter[str]

@property
def is_on_patch(self) -> bool:
Expand All @@ -90,21 +98,41 @@ def censor(self) -> None:
def has_policy_breaks(self) -> bool:
return len(self.policy_breaks) > 0

def apply_ignore_function(
self, reason: IgnoreReason, ignore_function: Callable[[PolicyBreak], bool]
):
assert (
reason not in self.ignored_policy_breaks_count_by_reason
), f"Ignore was already computed for {IgnoreReason}"
@classmethod
def from_scan_result(
cls, file: Scannable, scan_result: ScanResult, secret_config: SecretConfig
) -> "Result":
"""Creates a Result from a Scannable and a ScanResult.
- Removes ignored policy breaks
- replace matches by ExtendedMatches
"""

to_keep = []
ignored_count = 0
for policy_break in self.policy_breaks:
if ignore_function(policy_break):
ignored_count += 1
ignored_policy_breaks_count_by_reason = Counter()
for policy_break in scan_result.policy_breaks:
ignore_reason = compute_ignore_reason(policy_break, secret_config)
if ignore_reason is not None:
if secret_config.all_secrets:
policy_break.exclude_reason = ignore_reason
policy_break.is_excluded = True
to_keep.append(policy_break)
else:
ignored_policy_breaks_count_by_reason[ignore_reason] += 1
else:
to_keep.append(policy_break)
self.policy_breaks = to_keep
self.ignored_policy_breaks_count_by_reason[reason] = ignored_count

result = Result(
filename=file.filename,
filemode=file.filemode,
path=file.path,
url=file.url,
policy_breaks=to_keep,
ignored_policy_breaks_count_by_reason=ignored_policy_breaks_count_by_reason,
)

lines = get_lines_from_content(file.content, file.filemode)
result.enrich_matches(lines)
return result


class Error(NamedTuple):
Expand Down
Loading
Loading