Skip to content

Commit

Permalink
Feature/secureli 460 custom regex (#564)
Browse files Browse the repository at this point in the history
secureli-460

Adds new `new-pattern` parameter to `secureli update` and updates
`secureli scan` to check for custom regex


## Changes

*

## Testing
<!--
Mention updated tests and any manual testing performed.
Are aspects not yet tested or not easily testable?
Feel free to include screenshots if appropriate.
 -->
* run `secureli update --new-pattern <custom-regex-pattern>`
* verify that your custom regex pattern was added to `.secureli.yaml`
* modify any file (other than .secureli.yaml) to include text that
matches your custom regex
* stage the changes
* run `secureli scan` and verify the scan fails
* revert the changes and run `secureli scan` and verify the scan passes

## Clean Code Checklist
<!-- This is here to support you. Some/most checkboxes may not apply to
your change -->
- [x] Meets acceptance criteria for issue
- [x] New logic is covered with automated tests
- [ x Appropriate exception handling added
- [x] Thoughtful logging included
- [x] Documentation is updated
- [x] Follow-up work is documented in TODOs
- [x] TODOs have a ticket associated with them
- [x] No commented-out code included


<!--
Github-flavored markdown reference:
https://docs.github.com/en/get-started/writing-on-github
-->

---------

Co-authored-by: Rachel Teal <rachel.teal@slalom.com>
  • Loading branch information
doug-szeto-slalom and rt-slalom authored Jun 11, 2024
1 parent b48109c commit 1917cca
Show file tree
Hide file tree
Showing 13 changed files with 677 additions and 8 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,13 @@ seCureLI utilizes its own PII scan, rather than using an existing pre-commit hoo
```
test_var = "some dummy data I don't want scanned" # disable-pii-scan
```
### Custom Regex Scan
seCureLI utilizes its own custom regex scan to flag any text that matches a user provided regex pattern. To include a regex pattern in the scan simply add the pattern to your `.secureli.yaml` by running
```
secureli update --new-pattern <your-custom-regex>
```
## Upgrade
Expand Down
32 changes: 31 additions & 1 deletion secureli/actions/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from secureli.modules.observability.observability_services.logging import LoggingService
from secureli.modules.core.core_services.scanner import HooksScannerService
from secureli.modules.pii_scanner.pii_scanner import PiiScannerService
from secureli.modules.custom_regex_scanner.custom_regex_scanner import (
CustomRegexScannerService,
)
from secureli.modules.shared.models.scan import ScanMode, ScanResult
from secureli.settings import Settings
from secureli.modules.shared import utilities
Expand All @@ -38,11 +41,13 @@ def __init__(
action_deps: action.ActionDependencies,
hooks_scanner: HooksScannerService,
pii_scanner: PiiScannerService,
custom_regex_scanner: CustomRegexScannerService,
git_repo: GitRepo,
):
super().__init__(action_deps)
self.hooks_scanner = hooks_scanner
self.pii_scanner = pii_scanner
self.custom_regex_scanner = custom_regex_scanner
self.git_repo = git_repo

def publish_results(
Expand Down Expand Up @@ -115,17 +120,32 @@ def scan_repo(

# Execute PII scan (unless `specific_test` is provided, in which case it will be for a hook below)
pii_scan_result: ScanResult | None = None
custom_regex_patterns = self._get_custom_scan_patterns(folder_path=folder_path)
custom_scan_result: ScanResult | None = None
if not specific_test:
pii_scan_result = self.pii_scanner.scan_repo(
folder_path, scan_mode, files=files
)

custom_scan_result = self.custom_regex_scanner.scan_repo(
folder_path=folder_path,
scan_mode=scan_mode,
files=files,
custom_regex_patterns=custom_regex_patterns,
)

# Execute hooks
hooks_scan_result = self.hooks_scanner.scan_repo(
folder_path, scan_mode, specific_test, files=files
)

scan_result = utilities.merge_scan_results([pii_scan_result, hooks_scan_result])
scan_result = utilities.merge_scan_results(
[
pii_scan_result,
custom_scan_result,
hooks_scan_result,
]
)

details = scan_result.output or "Unknown output during scan"
self.action_deps.echo.print(details)
Expand Down Expand Up @@ -208,3 +228,13 @@ def _get_commited_files(self, scan_mode: ScanMode) -> list[Path]:
return [Path(file) for file in committed_files]
except:
return None

def _get_custom_scan_patterns(self, folder_path: Path) -> list[Path]:
settings = self.action_deps.settings.load(folder_path)
if (
settings.scan_patterns is not None
and settings.scan_patterns.custom_scan_patterns is not None
):
custom_scan_patterns = settings.scan_patterns.custom_scan_patterns
return custom_scan_patterns
return []
73 changes: 70 additions & 3 deletions secureli/actions/update.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import Optional
import re
from typing import List, Optional
from pathlib import Path
from secureli.modules.shared.abstractions.echo import EchoAbstraction
from secureli.modules.observability.observability_services.logging import LoggingService
from secureli.modules.core.core_services.updater import UpdaterService
from secureli.actions.action import Action, ActionDependencies
import secureli.modules.shared.models.repository as RepositoryModels

from rich.progress import Progress
from secureli.modules.shared.models.logging import LogAction
Expand Down Expand Up @@ -60,3 +60,70 @@ def update_hooks(self, folder_path: Path, latest: Optional[bool] = False):
else:
self.action_deps.echo.print("Update executed successfully.")
self.action_deps.logging.success(LogAction.update)

def _validate_regex(self, pattern: str) -> bool:
"""
Checks if a given string is a valid Regex pattern, returns a boolean indicator
param pattern: The string to be checked
"""
try:
re.compile(pattern)
return True
except:
self.action_deps.echo.warning(
f'Invalid regex pattern detected: "{pattern}". Excluding pattern.\n'
)
return False

def _validate_pattern(self, pattern, patterns):
"""
Checks the pattern is a valid Regex and is not already present in the patterns list
param pattern: A string to be checked
param patterns: A reference list to check for duplicate values
"""
if pattern in patterns:
self.action_deps.echo.warning(
f'Duplicate scan pattern detected: "{pattern}". Excluding pattern.'
)
return False

return self._validate_regex(pattern)

def add_pattern(self, folder_path, patterns: List[str]):
"""
Validates user provided scan patterns and stores them for future use
:param folder_path: The folder secureli is operating in
:param patterns: A user provided list of regex patterns to be saved
"""

# Algorithm Notes:
# for each pattern
# Check pattern is a valid regex
# if invalid, print warning and filter out pattern
# Check pattern is not present in custom_scan_patterns list
# if present, print warning and do not add duplicate
# Prevent repeated flags from being added twice
# add new patterns to custom_scan_patterns list
# save updated custom_scan_patterns list to secureli yaml file

saved_patterns = []
settings = self.action_deps.settings.load(folder_path)
if settings.scan_patterns is not None:
saved_patterns = settings.scan_patterns.custom_scan_patterns

# Use a set comprehension to prevent flag duplicates
new_patterns = {
pattern
for pattern in patterns
if self._validate_pattern(pattern, saved_patterns)
}
saved_patterns.extend(new_patterns)

if len(saved_patterns) > 0:
settings.scan_patterns = RepositoryModels.CustomScanSettings(
custom_scan_patterns=saved_patterns
)
self.action_deps.settings.save(settings)

self.action_deps.echo.print("Current custom scan patterns:")
self.action_deps.echo.print(saved_patterns)
8 changes: 8 additions & 0 deletions secureli/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
from secureli.modules.core.core_services.scanner import HooksScannerService
from secureli.modules.core.core_services.updater import UpdaterService
from secureli.modules.pii_scanner.pii_scanner import PiiScannerService
from secureli.modules.custom_regex_scanner.custom_regex_scanner import (
CustomRegexScannerService,
)
from secureli.modules.secureli_ignore import SecureliIgnoreService
from secureli.settings import Settings

Expand Down Expand Up @@ -144,6 +147,10 @@ class Container(containers.DeclarativeContainer):
ignored_extensions=config.pii_scanner.ignored_extensions,
)

custom_regex_scanner_service = providers.Factory(
CustomRegexScannerService, repo_files=repo_files_repository, echo=echo
)

updater_service = providers.Factory(
UpdaterService,
pre_commit=pre_commit_abstraction,
Expand Down Expand Up @@ -184,6 +191,7 @@ class Container(containers.DeclarativeContainer):
action_deps=action_deps,
hooks_scanner=hooks_scanner_service,
pii_scanner=pii_scanner_service,
custom_regex_scanner=custom_regex_scanner_service,
git_repo=git_repo,
)

Expand Down
17 changes: 14 additions & 3 deletions secureli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,23 @@ def update(
help="Run secureli against a specific directory",
),
] = Path("."),
new_patterns: Annotated[
Optional[List[str]],
Option(
"--new-pattern",
"-n",
help="Add a new Regex to the custom scan pattern list",
),
] = None,
):
"""
Update linters, configuration, and all else needed to maintain a secure repository.
Update linters, configuration, custom scan patterns and all else needed to maintain a secure repository.
"""
SecureliConfig.FOLDER_PATH = Path(directory)
container.update_action().update_hooks(Path(directory), latest)
if new_patterns is not None:
container.update_action().add_pattern(Path(directory), new_patterns)
else:
SecureliConfig.FOLDER_PATH = Path(directory)
container.update_action().update_hooks(Path(directory), latest)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit 1917cca

Please sign in to comment.