Skip to content

Commit

Permalink
Simultaneous regex filtering at descriptor and linter levels (#2669)
Browse files Browse the repository at this point in the history
* Simultaneous regex filtering at decriptor and linter levels

Fixes #2668

* Add test method

* typo

* [MegaLinter] Apply linters fixes

---------

Co-authored-by: nvuillam <nvuillam@users.noreply.github.com>
  • Loading branch information
nvuillam and nvuillam authored May 19, 2023
1 parent 270cd73 commit 63e74ad
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 23 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ Note: Can be used with `oxsecurity/megalinter@beta` in your GitHub Action mega-l
- Run stale workflow only on schedule, by @echoix in [#2641](https://github.com/oxsecurity/megalinter/pull/2641)
- Add explicit permissions to stale workflow, by @echoix in [#2641](https://github.com/oxsecurity/megalinter/pull/2641)
- Allow MEGALINTER_CONFIG to contain a full path to a MegaLinter config file
- Simultaneous regex filtering at descriptor and linter levels

- Documentation
- Apply many updates after the use of [Vale](https://vale.sh/) on MegaLinter own sources and docs
Expand Down
2 changes: 1 addition & 1 deletion docs/descriptors/xml_xmllint.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ To apply file formatting you must set `XML_XMLLINT_CLI_LINT_MODE: file` and `XML
| Variable | Description | Default value |
|-----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------|
| XML_XMLLINT_AUTOFORMAT | If set to `true`, it will reformat and reindent the output | `false` |
| XML_XMLLINT_INDENT | The number of indentation spaces when `XML_XMLLINT_AUTOFORMAT` is `true` | ` ` |
| XML_XMLLINT_INDENT | The number of indentation spaces when `XML_XMLLINT_AUTOFORMAT` is `true` | `` |
| XML_XMLLINT_ARGUMENTS | User custom arguments to add in linter CLI call<br/>Ex: `-s --foo "bar"` | |
| XML_XMLLINT_FILTER_REGEX_INCLUDE | Custom regex including filter<br/>Ex: `(src\|lib)` | Include every file |
| XML_XMLLINT_FILTER_REGEX_EXCLUDE | Custom regex excluding filter<br/>Ex: `(test\|examples)` | Exclude no file |
Expand Down
26 changes: 15 additions & 11 deletions megalinter/Linter.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,8 @@ def __init__(self, params=None, linter_config=None):
self.ignore_file_label = None
self.ignore_file_error = None
self.filter_regex_include = None
self.filter_regex_exclude = None
self.filter_regex_exclude_descriptor = None
self.filter_regex_exclude_linter = None
self.post_linter_status = (
params["post_linter_status"]
if "post_linter_status" in params
Expand Down Expand Up @@ -687,17 +688,16 @@ def load_config_vars(self, params):
== "true"
):
self.disable_errors = True
# Exclude regex: try first NAME + _FILTER_REGEX_EXCLUDE, then LANGUAGE + _FILTER_REGEX_EXCLUDE
# Exclude regex: descriptor level
if config.exists(self.request_id, self.descriptor_id + "_FILTER_REGEX_EXCLUDE"):
self.filter_regex_exclude_descriptor = config.get(
self.request_id, self.descriptor_id + "_FILTER_REGEX_EXCLUDE"
)
# Exclude regex: linter level
if config.exists(self.request_id, self.name + "_FILTER_REGEX_EXCLUDE"):
self.filter_regex_exclude = config.get(
self.filter_regex_exclude_linter = config.get(
self.request_id, self.name + "_FILTER_REGEX_EXCLUDE"
)
elif config.exists(
self.request_id, self.descriptor_id + "_FILTER_REGEX_EXCLUDE"
):
self.filter_regex_exclude = config.get(
self.request_id, self.descriptor_id + "_FILTER_REGEX_EXCLUDE"
)
# Override default docker image version
if config.exists(self.request_id, self.name + "_DOCKER_IMAGE_VERSION"):
self.cli_docker_image_version = config.get(
Expand Down Expand Up @@ -832,7 +832,8 @@ def log_file_filters(self):
log_object = {
"name": self.name,
"filter_regex_include": self.filter_regex_include,
"filter_regex_exclude": self.filter_regex_exclude,
"filter_regex_exclude_descriptor": self.filter_regex_exclude_descriptor,
"filter_regex_exclude_linter": self.filter_regex_exclude_linter,
"files_sub_directory": self.files_sub_directory,
"lint_all_files": self.lint_all_files,
"lint_all_other_linters_files": self.lint_all_other_linters_files,
Expand All @@ -851,7 +852,10 @@ def collect_files(self, all_files):
self.files = utils.filter_files(
all_files=all_files,
filter_regex_include=self.filter_regex_include,
filter_regex_exclude=self.filter_regex_exclude,
filter_regex_exclude=[
self.filter_regex_exclude_descriptor,
self.filter_regex_exclude_linter,
],
file_names_regex=self.file_names_regex,
file_extensions=self.file_extensions,
ignored_files=[],
Expand Down
2 changes: 1 addition & 1 deletion megalinter/MegaLinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ def collect_files(self):
filtered_files = utils.filter_files(
all_files=all_files,
filter_regex_include=self.filter_regex_include,
filter_regex_exclude=self.filter_regex_exclude,
filter_regex_exclude=[self.filter_regex_exclude],
file_names_regex=self.file_names_regex,
file_extensions=self.file_extensions,
ignored_files=ignored_files,
Expand Down
28 changes: 26 additions & 2 deletions megalinter/tests/test_megalinter/filters_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_filter_files_with_ignored_files(self):
filtered_files = utils.filter_files(
all_files=all_files,
filter_regex_include=None,
filter_regex_exclude=None,
filter_regex_exclude=[None],
file_names_regex=[],
file_extensions=["", ".md", ".ext"],
ignored_files=ignored_files,
Expand All @@ -95,7 +95,7 @@ def test_filter_files_with_file_extensions(self):
filtered_files = utils.filter_files(
all_files=all_files,
filter_regex_include=None,
filter_regex_exclude=None,
filter_regex_exclude=[],
file_names_regex=[],
file_extensions=file_extensions,
ignored_files=[],
Expand All @@ -104,3 +104,27 @@ def test_filter_files_with_file_extensions(self):
self.assertListEqual(
sorted(filtered_files), sorted(expected), f"check {file_extensions}"
)

def test_filter_regex_exclude_multilevel(self):
all_files = [
"should/be/excluded/descriptor-level/test.md",
"target/foo.md",
"should/be/excluded/descriptor-level/test2.md",
"should/be/excluded/linter-level/test.md",
"should/be/excluded/linter-level/test2.md",
"target/foo2.ext",
]
filtered_files = utils.filter_files(
all_files=all_files,
filter_regex_include=None,
filter_regex_exclude=["(descriptor-level)", "(linter-level)"],
file_names_regex=[],
file_extensions=[".md"],
ignored_files=[],
ignore_generated_files=False,
)
self.assertListEqual(
sorted(filtered_files),
sorted(["target/foo.md"]),
"check regex_exclude_multilevel",
)
24 changes: 16 additions & 8 deletions megalinter/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def get_excluded_directories(request_id):
def filter_files(
all_files: Sequence[str],
filter_regex_include: Optional[str],
filter_regex_exclude: Optional[str],
filter_regex_exclude: Sequence[str],
file_names_regex: Sequence[str],
file_extensions: Any,
ignored_files: Optional[Sequence[str]],
Expand All @@ -108,9 +108,12 @@ def filter_files(
filter_regex_include_object = (
re.compile(filter_regex_include) if filter_regex_include else None
)
filter_regex_exclude_object = (
re.compile(filter_regex_exclude) if filter_regex_exclude else None
)
filter_regex_exclude_objects = []
for filter_regex_exclude_item in filter_regex_exclude:
filter_regex_exclude_object = (
re.compile(filter_regex_exclude_item) if filter_regex_exclude_item else None
)
filter_regex_exclude_objects += [filter_regex_exclude_object]
file_names_regex_object = re.compile("|".join(file_names_regex))
filtered_files = []
file_contains_regex_object = (
Expand Down Expand Up @@ -152,10 +155,15 @@ def filter_files(
file_with_workspace
):
continue
# Skip according to FILTER_REGEX_EXCLUDE
if filter_regex_exclude_object and filter_regex_exclude_object.search(
file_with_workspace
):
# Skip according to FILTER_REGEX_EXCLUDE list
excluded_by_regex = False
for filter_regex_exclude_object in filter_regex_exclude_objects:
if filter_regex_exclude_object and filter_regex_exclude_object.search(
file_with_workspace
):
excluded_by_regex = True
break
if excluded_by_regex is True:
continue

# Skip according to file extension (only if lint_all_other_linter_files is false or file_extensions is defined)
Expand Down

0 comments on commit 63e74ad

Please sign in to comment.