Skip to content

Commit

Permalink
feat: make ignore reason human readable in text output
Browse files Browse the repository at this point in the history
Introduces new models:
- IgnoreReason
- Secret, which is meant to extend the PolicyBreak model from
  pygitguardian
  • Loading branch information
gg-mmill committed Dec 17, 2024
1 parent 6d0d8b8 commit 5d79826
Show file tree
Hide file tree
Showing 14 changed files with 201 additions and 131 deletions.
25 changes: 11 additions & 14 deletions ggshield/core/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,9 @@
from pathlib import Path
from typing import Any, Dict, List

from pygitguardian.models import PolicyBreak

from ggshield.core import ui
from ggshield.core.constants import CACHE_PATH
from ggshield.core.errors import UnexpectedError
from ggshield.core.filter import get_ignore_sha
from ggshield.core.types import IgnoredMatch


Expand Down Expand Up @@ -74,18 +71,18 @@ def save(self) -> None:
def purge(self) -> None:
self.last_found_secrets = []

def add_found_policy_break(self, policy_break: PolicyBreak, filename: str) -> None:
if policy_break.is_secret:
ignore_sha = get_ignore_sha(policy_break)
if not any(
last_found.match == ignore_sha for last_found in self.last_found_secrets
):
self.last_found_secrets.append(
IgnoredMatch(
name=f"{policy_break.break_type} - {filename}",
match=get_ignore_sha(policy_break),
)
def add_found_policy_break(
self, break_type: str, ignore_sha: str, filename: str
) -> None:
if not any(
last_found.match == ignore_sha for last_found in self.last_found_secrets
):
self.last_found_secrets.append(
IgnoredMatch(
name=f"{break_type} - {filename}",
match=ignore_sha,
)
)


class ReadOnlyCache(Cache):
Expand Down
16 changes: 1 addition & 15 deletions ggshield/core/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import math
import operator
import re
from typing import Dict, Iterable, List, Pattern, Set
from typing import Iterable, Pattern, Set

from click import UsageError
from pygitguardian.models import Match, PolicyBreak
Expand Down Expand Up @@ -60,20 +60,6 @@ def get_ignore_sha(policy_break: PolicyBreak) -> str:
return hashlib.sha256(hashable.encode("UTF-8")).hexdigest()


def group_policy_breaks_by_ignore_sha(
policy_breaks: List[PolicyBreak],
) -> Dict[str, List[PolicyBreak]]:
"""
Group policy breaks by their ignore sha.
"""
sha_dict: Dict[str, List[PolicyBreak]] = {}
for policy_break in policy_breaks:
ignore_sha = get_ignore_sha(policy_break)
sha_dict.setdefault(ignore_sha, []).append(policy_break)

return sha_dict


def translate_user_pattern(pattern: str) -> str:
"""
Translate the user pattern into a regex. This function assumes that the given
Expand Down
14 changes: 14 additions & 0 deletions ggshield/verticals/secret/extended_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,17 @@ def __repr__(self) -> str:
f"post_line_end:{self.post_line_end}",
]
)

def __eq__(self, other: Any) -> bool:
if not isinstance(other, ExtendedMatch):
return False
return (
self.span == other.span
and self.lines_before_secret == other.lines_before_secret
and self.lines_with_secret == other.lines_with_secret
and self.lines_after_secret == other.lines_after_secret
and self.pre_line_start == other.pre_line_start
and self.pre_line_end == other.pre_line_end
and self.post_line_start == other.post_line_start
and self.post_line_end == other.post_line_end
)
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
from pygitguardian.models import PolicyBreak

from ggshield.core.filter import censor_match
from ggshield.core.text_utils import pluralize, translate_validity

from ..secret_scan_collection import SecretScanCollection
from ..secret_scan_collection import Secret, SecretScanCollection
from .secret_output_handler import SecretOutputHandler


def format_policy_break(policy_break: PolicyBreak) -> str:
def format_policy_break(policy_break: Secret) -> str:
"""Returns a string with the policy name, validity and a comma-separated,
double-quoted, censored version of all `policy_break` matches.
Expand Down
17 changes: 11 additions & 6 deletions ggshield/verticals/secret/output/secret_json_output_handler.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from typing import Any, Dict, List, cast

from pygitguardian.client import VERSIONS
from pygitguardian.models import PolicyBreak, SecretIncident
from pygitguardian.models import SecretIncident

from ggshield.core.filter import group_policy_breaks_by_ignore_sha
from ggshield.verticals.secret.extended_match import ExtendedMatch

from ..secret_scan_collection import Error, Result, SecretScanCollection
from ..secret_scan_collection import (
Error,
Result,
Secret,
SecretScanCollection,
group_secrets_by_ignore_sha,
)
from .schemas import JSONScanCollectionSchema
from .secret_output_handler import SecretOutputHandler

Expand Down Expand Up @@ -74,7 +79,7 @@ def process_result(
"total_occurrences": 0,
"total_incidents": 0,
}
sha_dict = group_policy_breaks_by_ignore_sha(result.policy_breaks)
sha_dict = group_secrets_by_ignore_sha(result.policy_breaks)
result_dict["total_incidents"] = len(sha_dict)

if not self.show_secrets:
Expand Down Expand Up @@ -105,7 +110,7 @@ def process_error(error: Error) -> Dict[str, Any]:
def serialized_policy_break(
self,
ignore_sha: str,
policy_breaks: List[PolicyBreak],
policy_breaks: List[Secret],
incident_details: Dict[str, SecretIncident],
) -> Dict[str, Any]:
flattened_dict: Dict[str, Any] = {
Expand Down Expand Up @@ -136,7 +141,7 @@ def serialized_policy_break(

def serialize_policy_break_matches(
self,
policy_break: PolicyBreak,
policy_break: Secret,
) -> List[Dict[str, Any]]:
"""
Serialize policy_break matches. The method uses MatchSpan to get the start and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@
from typing import Any, Dict, Iterable, List, cast

from pygitguardian.client import VERSIONS
from pygitguardian.models import PolicyBreak, SecretIncident
from pygitguardian.models import SecretIncident

from ggshield import __version__ as ggshield_version
from ggshield.core.filter import get_ignore_sha
from ggshield.core.match_span import MatchSpan

from ..extended_match import ExtendedMatch
from ..secret_scan_collection import Result, SecretScanCollection
from ..secret_scan_collection import Result, Secret, SecretScanCollection
from .secret_output_handler import SecretOutputHandler


Expand Down Expand Up @@ -66,7 +65,7 @@ def _create_sarif_results(

def _create_sarif_result_dict(
url: str,
policy_break: PolicyBreak,
policy_break: Secret,
incident_details: Dict[str, SecretIncident],
) -> Dict[str, Any]:
# Prepare message with links to the related location for each match
Expand Down Expand Up @@ -98,7 +97,7 @@ def _create_sarif_result_dict(
for id, m in enumerate(extended_matches)
],
"partialFingerprints": {
"secret/v1": get_ignore_sha(policy_break),
"secret/v1": policy_break.get_ignore_sha(),
},
}
if policy_break.incident_url:
Expand Down
28 changes: 16 additions & 12 deletions ggshield/verticals/secret/output/secret_text_output_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
from typing import Dict, List, Optional, Tuple

from pygitguardian.client import VERSIONS
from pygitguardian.models import PolicyBreak

from ggshield.core.constants import IncidentStatus
from ggshield.core.filter import group_policy_breaks_by_ignore_sha
from ggshield.core.lines import Line, get_offset, get_padding
from ggshield.core.text_utils import (
STYLE,
Expand All @@ -17,7 +15,13 @@
)

from ..extended_match import ExtendedMatch
from ..secret_scan_collection import IgnoreReason, Result, SecretScanCollection
from ..secret_scan_collection import (
IgnoreKind,
Result,
Secret,
SecretScanCollection,
group_secrets_by_ignore_sha,
)
from .secret_output_handler import SecretOutputHandler


Expand Down Expand Up @@ -71,9 +75,7 @@ def _process_scan_impl(self, scan: SecretScanCollection) -> str:
)

known_secrets_count = sum(
result.ignored_policy_breaks_count_by_reason.get(
IgnoreReason.KNOWN_SECRET, 0
)
result.ignored_policy_breaks_count_by_kind.get(IgnoreKind.KNOWN_SECRET, 0)
for result in scan.get_all_results()
)
if self.ignore_known_secrets and known_secrets_count > 0:
Expand Down Expand Up @@ -118,14 +120,14 @@ def process_result(self, result: Result) -> str:
"""
result_buf = StringIO()

sha_dict = group_policy_breaks_by_ignore_sha(result.policy_breaks)
sha_dict = group_secrets_by_ignore_sha(result.policy_breaks)

if not self.show_secrets:
result.censor()

number_of_displayed_secrets = 0
number_of_hidden_secrets = sum(
result.ignored_policy_breaks_count_by_reason.values()
result.ignored_policy_breaks_count_by_kind.values()
)
for ignore_sha, policy_breaks in sha_dict.items():
number_of_displayed_secrets += 1
Expand Down Expand Up @@ -255,7 +257,7 @@ def leak_message_located(


def flatten_policy_breaks_by_line(
policy_breaks: List[PolicyBreak],
policy_breaks: List[Secret],
) -> List[Tuple[Line, List[ExtendedMatch]]]:
"""
flatten_policy_breaks_by_line turns a list of policy breaks into a list of
Expand Down Expand Up @@ -283,7 +285,7 @@ def flatten_policy_breaks_by_line(


def policy_break_header(
policy_breaks: List[PolicyBreak],
policy_breaks: List[Secret],
ignore_sha: str,
known_secret: bool = False,
) -> str:
Expand All @@ -310,8 +312,10 @@ def policy_break_header(
{indent}Incident URL: {policy_breaks[0].incident_url if known_secret and policy_break.incident_url else "N/A"}
{indent}Secret SHA: {ignore_sha}
"""
if policy_break.is_excluded:
message += f"{indent}Ignored: {policy_break.exclude_reason}\n"
if policy_break.ignore_reason is not None:
message += (
f"{indent}Ignored: {policy_break.ignore_reason.to_human_readable()}\n"
)

return message + "\n"

Expand Down
Loading

0 comments on commit 5d79826

Please sign in to comment.