Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add exclude-files and exclude-lines args to scan #18

Merged
merged 3 commits into from
Feb 13, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions detect_secrets_server/actions/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

try:
FileNotFoundError
except NameError:
except NameError: # pragma: no cover
FileNotFoundError = IOError


Expand All @@ -26,7 +26,10 @@ def scan_repo(args):
log.error('Unable to find repo: %s', args.repo)
return 1

secrets = repo.scan()
secrets = repo.scan(
exclude_files_regex=args.exclude_files,
exclude_lines_regex=args.exclude_lines,
)

if len(secrets.data) > 0:
_alert_on_secrets_found(repo, secrets.json(), args.output_hook)
Expand Down
4 changes: 2 additions & 2 deletions detect_secrets_server/core/usage/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ def _add_initialize_options(self):
type=str,
nargs=1,
help=(
'Filenames that match this regex will be ignored when '
'scanning for secrets.'
'This regex will be added to repo metadata files when'
'adding a repository or overriding an existing one.'
),
metavar='REGEX',
)
Expand Down
5 changes: 4 additions & 1 deletion detect_secrets_server/core/usage/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,14 @@ def apply_default_plugin_options_to_repos(args):
This applies default values to the config file, if appropriate.
"""
for tracked_repo in args.repo:
# Issue 17: Not touching exclude_regex in repo metadata
# Just ignoring it for now and using the exclusion CLI args given when calling `scan`
# (This can be ignored because this function is only called by `add`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a TODO for grep-ability?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

++ 🎓

for key in (
'baseline',
'crontab',
'exclude_regex',
'storage',
'crontab',
):
if key not in tracked_repo:
tracked_repo[key] = getattr(args, key)
Expand Down
19 changes: 19 additions & 0 deletions detect_secrets_server/core/usage/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,25 @@ def add_arguments(self):
),
)

self.parser.add_argument(
'--exclude-files',
type=str,
help=(
'Filenames that match this regex will be ignored when '
'scanning for secrets.'
),
metavar='REGEX',
)
self.parser.add_argument(
'--exclude-lines',
type=str,
help=(
'Lines that match this regex will be ignored when '
'scanning for secrets.'
),
metavar='REGEX',
)

self.add_local_flag()
for option in [PluginOptions, OutputOptions]:
option(self.parser).add_arguments()
Expand Down
28 changes: 21 additions & 7 deletions detect_secrets_server/repos/base_tracked_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ def __init__(
**kwargs
):
"""
:type repo: string
:type repo: str
:param repo: git URL or local path of repo

:type sha: string
:type sha: str
:param sha: last commit hash scanned

:type plugins: dict
Expand All @@ -56,7 +56,7 @@ def __init__(
:param exclude_regex: used for repository scanning; if a filename
matches this exclude_regex, it is not scanned.

:type crontab: string
:type crontab: str
:param crontab: crontab syntax, for periodic scanning.

:type baseline_filename: str
Expand Down Expand Up @@ -87,7 +87,7 @@ def load_from_file(

The meta tracked file is in the format of self.__dict__

:type repo_name: string
:type repo_name: str
:param repo_name: If the git URL is `git@github.com:yelp/detect-secrets`
this value will be `yelp/detect-secrets`

Expand All @@ -114,19 +114,33 @@ def get_tracked_repo_data(cls, storage, repo_name):
def name(self):
return self.storage.repository_name

def scan(self):
def scan(self, exclude_files_regex=None, exclude_lines_regex=None):
"""Clones the repo, and scans the git diff between last_commit_hash
and HEAD.

:raises: subprocess.CalledProcessError

:type exclude_files_regex: str|None
:param exclude_files_regex: A regex matching filenames to skip over.

:type exclude_lines: str|None
:param exclude_lines: A regex matching lines to skip over.

:rtype: SecretsCollection
:returns: secrets found.
"""
self.storage.clone_and_pull_master()

default_plugins = initialize_plugins.from_parser_builder(self.plugin_config)
secrets = SecretsCollection(default_plugins, self.exclude_regex)
default_plugins = initialize_plugins.from_parser_builder(
self.plugin_config,
exclude_lines_regex=exclude_lines_regex,
)
# Issue 17: Ignoring self.exclude_regex, using the server scan CLI arg
secrets = SecretsCollection(
plugins=default_plugins,
exclude_files=exclude_files_regex,
exclude_lines=exclude_lines_regex,
)

try:
diff = self.storage.get_diff(self.last_commit_hash)
Expand Down
5 changes: 3 additions & 2 deletions testing/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,17 @@ def metadata_factory(repo, json=False, **kwargs):
"crontab": "0 0 * * *",
"exclude_regex": None,
"plugins": {
"AWSKeyDetector": {},
"Base64HighEntropyString": {
"base64_limit": 4.5,
},
"BasicAuthDetector": {},
"HexHighEntropyString": {
"hex_limit": 3,
},
"PrivateKeyDetector": {},
"KeywordDetector": {},
"AWSKeyDetector": {},
"PrivateKeyDetector": {},
"SlackDetector": {}
},
"repo": repo,
"sha": 'sha256-hash',
Expand Down
21 changes: 12 additions & 9 deletions tests/actions/initialize_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,17 @@ def test_simple_success(self, mock_rootdir):
sha='',
crontab='0 0 * * *',
plugins={
'HexHighEntropyString': {
'hex_limit': 3,
},
'AWSKeyDetector': {},
'Base64HighEntropyString': {
'base64_limit': 4.5,
},
'PrivateKeyDetector': {},
'BasicAuthDetector': {},
'HexHighEntropyString': {
'hex_limit': 3,
},
'KeywordDetector': {},
'AWSKeyDetector': {},
'PrivateKeyDetector': {},
'SlackDetector': {},
},
rootdir=mock_rootdir,
baseline_filename=None,
Expand Down Expand Up @@ -167,15 +168,17 @@ def test_repo_config_overrides_defaults(self, mock_rootdir):
sha='',
crontab='* * 4 * *',
plugins={
'HexHighEntropyString': {
'hex_limit': 4.0,
},
# (No PrivateKeyDetector)
'AWSKeyDetector': {},
'Base64HighEntropyString': {
'base64_limit': 2.0,
},
'BasicAuthDetector': {},
'HexHighEntropyString': {
'hex_limit': 4.0,
},
'KeywordDetector': {},
'AWSKeyDetector': {},
'SlackDetector': {},
},
rootdir=mock_rootdir,
baseline_filename='baseline.file',
Expand Down
39 changes: 38 additions & 1 deletion tests/repos/base_tracked_repo_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,43 @@ def test_no_baseline(self, mock_logic, mock_rootdir):
# It matches both HexHighEntropyString and AWSKeyDetector
assert len(secrets.data['examples/aws_credentials.json']) == 2

def test_exclude_files(self, mock_logic, mock_rootdir):
repo = mock_logic()
with mock_git_calls(*self.git_calls(mock_rootdir)):
secrets = repo.scan(exclude_files_regex=r'aws_credentials.json$')

assert 'examples/aws_credentials.json' not in secrets.data

@pytest.mark.parametrize(
'exclude_lines_regex, expected_line_number',
[
(
r'accessKeyId',
3
),
(
r'secretAccessKey',
2,
),
],
)
def test_exclude_lines(
self,
mock_logic,
mock_rootdir,
exclude_lines_regex,
expected_line_number,
):
repo = mock_logic()
with mock_git_calls(*self.git_calls(mock_rootdir)):
secrets = repo.scan(exclude_lines_regex=exclude_lines_regex)

assert len(secrets.data) == 1
assert len(secrets.data['examples/aws_credentials.json']) == 1

for _, secret in secrets.data['examples/aws_credentials.json'].items():
assert secret.lineno == expected_line_number

def test_unable_to_find_baseline(self, mock_logic, mock_rootdir):
calls = self.git_calls(mock_rootdir)
calls[-1] = SubprocessMock(
Expand Down Expand Up @@ -211,7 +248,7 @@ def test_success(self, mock_logic):
class TestSave(object):

@pytest.mark.parametrize(
'override_level,is_file,mocked_input',
'override_level, is_file, mocked_input',
[
# OverrideLevel doesn't matter if no file exists.
(OverrideLevel.NEVER, False, '',),
Expand Down