Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Exclusion reasons #286

Merged
merged 7 commits into from
Nov 29, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ Features:
* [#268](https://github.com/godaddy/tartufo/issues/268) - Adds a new
`--recurse / --no-recurse` flag which allows users to recursively scan the entire directory or just
the root directory
* [#202](https://github.com/godaddy/tartufo/issues/202) - Supports new format of exclusions in config file
with the ability to specify the reason along with exclusion

v3.0.0-alpha.1 - 11 November 2021
---------------------------------
Expand Down
13 changes: 10 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -141,11 +141,18 @@ exclude-signatures = [
"d039c652f27c4d42026c5b5c9be31bfe368b283b24248e98d92f131272580053",

# test secrets
"d26b223efb2087d4a3db7b3ff0f65362372c46190fd74b8061db8099f4a2ee60", # tests/data/scan_folder/donotscan.txt
"94c45f9acae0551a2438fa8932fac466e903fd9a09bd2e11897198b133937ccd", # tests/data/scan_folder/test.txt
"6c70b4a712ada69f2a6c919edc783cf485b6637e3c5429de376ef1a451a042e4", # tests/data/scan_folder/donotscan.txt
"357841766ca69979608e4e7fb1da2addeafe6d15cade15466c39d903cc0488a8", # tests/data/scan_folder/test.txt
"743b893e30777d9c4e28d3d341ca4ba1c2541a9c62b497dece75c2a64420e770", # tests/test_folder_scanner.py
"9d1b278fa384c9dc58d607130d97740910309d13b29f13f7ed6348738ea4f32c", # tests/data/scan_folder/scan_sub_folder/sub_folder_test.txt
"358a1ae040ab71b3aafeb4318e5d9e318ff1b681c5bf0e99d381584a33ee7833", # tests/data/scan_folder/scan_sub_folder/sub_folder_test.txt
]

exclude-findings = [
{signature="9d1b278fa384c9dc58d607130d97740910309d13b29f13f7ed6348738ea4f32c", reason="High entropy string added in test_folder_scanner file on 16th November 2021"},
{signature="9f675d4d7f43e484e07fb199a1e7ec372ef204631c1f4ec054acc3119fbde4cf", reason="High entropy string added in build_and_publish_docker file on 13th June 2019"},
{signature="88152f1d10077f417ee5aea5fa86adaa5b74d3f2de2e87ce4b4aa5cd79c7b2f5", reason="High entropy string added in nothing file on 30th December 2016"},
]

json = false
regex = true
repo-path = "."
Expand Down
15 changes: 14 additions & 1 deletion tartufo/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,25 @@ def get_command(self, ctx: click.Context, cmd_name: str) -> Optional[click.Comma
"-e",
"--exclude-signatures",
multiple=True,
help="Specify signatures of matches that you explicitly want to exclude "
help="[DEPRECATED] Use the exclude-findings config option instead. "
"Specify signatures of matches that you explicitly want to exclude "
"from the scan, and mark as okay. These signatures are generated during "
"the scan process, and reported out with each individual match. This "
"option can be specified multiple times, to exclude as many signatures as "
"you would like.",
)
@click.option(
"-ef",
"--exclude-findings",
hidden=True,
multiple=True,
help="Specify signatures of matches that you explicitly want to exclude "
"from the scan along with the reason, and mark as okay. These signatures "
"are generated during the scan process, and reported out with each"
"individual match. This option can be specified multiple times, "
"to exclude as many signatures as you would like. "
"{signature='signature', reason='The reason of excluding the signature'}",
)
@click.option(
"-od",
"--output-dir",
Expand Down
47 changes: 36 additions & 11 deletions tartufo/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ def __bytes__(self) -> bytes:
return self.__str__().encode("utf8")


# pylint: disable=too-many-public-methods
class ScannerBase(abc.ABC): # pylint: disable=too-many-instance-attributes
"""Provide the base, generic functionality needed by all scanners.

Expand All @@ -149,6 +150,8 @@ class ScannerBase(abc.ABC): # pylint: disable=too-many-instance-attributes
global_options: types.GlobalOptions
logger: logging.Logger
_scan_lock: threading.Lock = threading.Lock()
_excluded_findings: tuple = ()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
_excluded_findings: tuple = ()
_excluded_findings: Tuple[str, ...] = ()

_config_data: MutableMapping[str, Any] = {}

def __init__(self, options: types.GlobalOptions) -> None:
self.global_options = options
Expand Down Expand Up @@ -336,6 +339,36 @@ def should_scan(self, file_path: str):
return False
return True

@property
def config_data(self):
return self._config_data

@config_data.setter
def config_data(self, data) -> MutableMapping[str, Any]:
self._config_data = data
return self._config_data
tarkatronic marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def config_data(self, data) -> MutableMapping[str, Any]:
self._config_data = data
return self._config_data
def config_data(self, data: MutableMapping[str, Any]) -> None:
self._config_data = data

Returning data from a setter in Python is generally not a valuable operation, because there's no way to access it. You would need something like...

new_data = scanner.config_data = data

Which is just confusing. And probably won't even work the way you expect.

You can find a bit more of an explanation here, if you're interested: https://stackoverflow.com/a/16615910


@cached_property
def excluded_findings(self) -> tuple:
configured_signatures = []
signatures = self.config_data.get("exclude_signatures", None)
if signatures:
warnings.warn(
"--exclude-signatures has been deprecated and will be removed in a future version. "
"Make sure all the exclusions are moved to exclude-findings section with new format. Example: "
"exclude-findings = [{signature='signature', reason='The reason of excluding the signature'}]",
DeprecationWarning,
)
configured_signatures.extend(signatures)
findings = self.config_data.get("exclude_findings", None)
if findings:
configured_signatures.extend([finding["signature"] for finding in findings])

self._excluded_findings = tuple(
set(self.global_options.exclude_signatures + tuple(configured_signatures))
)
return self._excluded_findings

def signature_is_excluded(self, blob: str, file_path: str) -> bool:
"""Find whether the signature of some data has been excluded in configuration.

Expand All @@ -344,9 +377,8 @@ def signature_is_excluded(self, blob: str, file_path: str) -> bool:
"""
return (
blob
in self.global_options.exclude_signatures # Signatures themselves pop up as entropy matches
or util.generate_signature(blob, file_path)
in self.global_options.exclude_signatures
in self.excluded_findings # Signatures themselves pop up as entropy matches
or util.generate_signature(blob, file_path) in self.excluded_findings
)

@staticmethod
Expand Down Expand Up @@ -660,14 +692,7 @@ def load_repo(self, repo_path: str) -> pygit2.Repository:
except (FileNotFoundError, types.ConfigException):
config_file = None
if config_file and config_file != self.global_options.config:
signatures = data.get("exclude_signatures", None)
if signatures:
self.global_options.exclude_signatures = tuple(
set(self.global_options.exclude_signatures + tuple(signatures))
)
entropy_patterns = data.get("exclude_entropy_patterns", None)
if entropy_patterns:
self.global_options.exclude_entropy_patterns += tuple(entropy_patterns)
self.config_data = data
include_patterns = list(data.get("include_path_patterns", ()))
repo_include_file = data.get("include_paths", None)
if repo_include_file:
Expand Down
2 changes: 2 additions & 0 deletions tartufo/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class GlobalOptions:
"exclude_path_patterns",
"exclude_entropy_patterns",
"exclude_signatures",
"exclude_findings",
"output_dir",
"git_rules_repo",
"git_rules_files",
Expand All @@ -65,6 +66,7 @@ class GlobalOptions:
exclude_path_patterns: Tuple[str, ...]
exclude_entropy_patterns: Tuple[Dict[str, str], ...]
exclude_signatures: Tuple[str, ...]
exclude_findings: Tuple[Dict[str, str], ...]
output_dir: Optional[str]
git_rules_repo: Optional[str]
git_rules_files: Tuple[str, ...]
Expand Down
17 changes: 14 additions & 3 deletions tests/test_base_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,23 +266,34 @@ def test_regex_rules_are_computed_when_first_accessed(
class SignatureTests(ScannerTestCase):
@mock.patch("tartufo.util.generate_signature")
def test_matched_signatures_are_excluded(self, mock_signature: mock.MagicMock):
self.options.exclude_signatures = ("foo",)
mock_signature.return_value = "foo"
test_scanner = TestScanner(self.options)
self.options.exclude_signatures = ()
test_scanner.config_data = {
"exclude_findings": [{"signature": "foo", "reason": "foo finding"}]
}
self.assertTrue(test_scanner.signature_is_excluded("bar", "blah"))

@mock.patch("tartufo.util.generate_signature")
def test_unmatched_signatures_are_not_excluded(
self, mock_signature: mock.MagicMock
):
self.options.exclude_signatures = ("foo",)
mock_signature.return_value = "bar"
test_scanner = TestScanner(self.options)
self.options.exclude_signatures = ()
test_scanner.config_data = {
"exclude_findings": [{"signature": "foo", "reason": "foo finding"}]
}
self.assertFalse(test_scanner.signature_is_excluded("blah", "stuff"))

def test_signature_found_as_scan_match_is_excluded(self):
self.options.exclude_signatures = ("ford_prefect",)
test_scanner = TestScanner(self.options)
self.options.exclude_signatures = ()
test_scanner.config_data = {
"exclude_findings": [
{"signature": "ford_prefect", "reason": "ford_prefect finding"}
]
}
self.assertTrue(test_scanner.signature_is_excluded("ford_prefect", "/earth"))


Expand Down
11 changes: 4 additions & 7 deletions tests/test_folder_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,13 @@ def test_scan_should_detect_entropy_and_not_binary(self):
self.global_options.entropy = True
self.global_options.b64_entropy_score = 4.5
self.global_options.hex_entropy_score = 3
self.global_options.exclude_signatures = []
self.global_options.exclude_signatures = ()
self.global_options.exclude_path_patterns = [r"donotscan\.txt"]

test_scanner = scanner.FolderScanner(self.global_options, folder_path, recurse)
issues = list(test_scanner.scan())

self.assertEqual(2, len(issues))
actual_issues = []
actual_issues = [issue.matched_string for issue in issues]
self.assertIn("KQ0I97OBuPlGB9yPRxoSxnX52zE=", actual_issues)
self.assertIn(
Expand All @@ -40,7 +39,7 @@ def test_scan_should_raise_click_error_on_file_permissions_issues(self):
folder_path = pathlib.Path(__file__).parent / "data" / "scan_folder"
recurse = True
self.global_options.entropy = True
self.global_options.exclude_signatures = []
self.global_options.exclude_signatures = ()

test_scanner = scanner.FolderScanner(self.global_options, folder_path, recurse)

Expand All @@ -52,15 +51,14 @@ def test_scan_all_the_files_recursively(self):
folder_path = pathlib.Path(__file__).parent / "data" / "scan_folder"
recurse = True
self.global_options.entropy = True
self.global_options.exclude_signatures = []
self.global_options.exclude_signatures = ()
self.global_options.b64_entropy_score = 4.5
self.global_options.hex_entropy_score = 3

test_scanner = scanner.FolderScanner(self.global_options, folder_path, recurse)
issues = list(test_scanner.scan())

self.assertEqual(3, len(issues))
actual_issues = []
actual_issues = [issue.matched_string for issue in issues]
self.assertEqual(2, actual_issues.count("KQ0I97OBuPlGB9yPRxoSxnX52zE="))
self.assertIn(
Expand All @@ -72,15 +70,14 @@ def test_scan_only_root_level_files(self):
folder_path = pathlib.Path(__file__).parent / "data" / "scan_folder"
recurse = False
self.global_options.entropy = True
self.global_options.exclude_signatures = []
self.global_options.exclude_signatures = ()
self.global_options.b64_entropy_score = 4.5
self.global_options.hex_entropy_score = 3

test_scanner = scanner.FolderScanner(self.global_options, folder_path, recurse)
issues = list(test_scanner.scan())

self.assertEqual(2, len(issues))
actual_issues = []
actual_issues = [issue.matched_string for issue in issues]
self.assertEqual(2, actual_issues.count("KQ0I97OBuPlGB9yPRxoSxnX52zE="))

Expand Down
15 changes: 0 additions & 15 deletions tests/test_git_repo_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,21 +93,6 @@ def test_extra_exclusions_get_added(self, mock_load: mock.MagicMock):
],
)

@mock.patch("pygit2.Repository", new=mock.MagicMock())
@mock.patch("tartufo.config.load_config_from_path")
def test_extra_signatures_get_added(self, mock_load: mock.MagicMock):
mock_load.return_value = (
self.data_dir / "pyproject.toml",
{"exclude_signatures": ["foo", "bar"]},
)
test_scanner = scanner.GitRepoScanner(
self.global_options, self.git_options, str(self.data_dir)
)
test_scanner.load_repo("../tartufo")
self.assertEqual(
sorted(test_scanner.global_options.exclude_signatures), ["bar", "foo"]
)

tarkatronic marked this conversation as resolved.
Show resolved Hide resolved

class FilterSubmoduleTests(ScannerTestCase):
@mock.patch("pygit2.Repository")
Expand Down