Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add optional exceptions to search_substrings_by_line #4341

Merged
merged 13 commits into from
Jun 16, 2024
4 changes: 4 additions & 0 deletions .changelog/4341.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
changes:
- description: Fixed an issue where validations searching for `demisto` in strings would find it in URLs (`/demisto/content`).
type: fix
pr_number: 4341
80 changes: 80 additions & 0 deletions demisto_sdk/commands/common/tests/tools_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@
parse_multiple_path_inputs,
run_command_os,
search_and_delete_from_conf,
search_substrings_by_line,
server_version_compare,
set_value,
str2bool,
Expand Down Expand Up @@ -3330,3 +3331,82 @@ def test_check_timestamp_format():
assert not check_timestamp_format(missing_z)
assert not check_timestamp_format(only_date)
assert not check_timestamp_format(with_hyphen)


class TestSearchSubstringByLine:
@staticmethod
@pytest.mark.parametrize(
"phrases,text,expected_to_find",
(
pytest.param(["foo"], "foo bar", True, id="found"),
pytest.param(
["foo"],
"bar baz",
False,
id="nothing to find",
),
),
)
def test_search_substring_by_line(
phrases: List[str], text: str, expected_to_find: bool
):
assert search_substrings_by_line(phrases, text) == (
["1"] if expected_to_find else []
)

@staticmethod
@pytest.mark.parametrize(
"phrases,text,ignore_case,expected_to_find",
(
pytest.param(
["foo"],
"this is Fooland",
False,
False,
id="case difference, case sensitive, not found",
),
pytest.param(
["foo"],
"this is Fooland",
True,
True,
id="different case, ignore case, found",
),
),
)
def test_search_substring_by_line_case(
phrases: List[str], text: str, ignore_case: bool, expected_to_find: bool
):
assert search_substrings_by_line(phrases, text, ignore_case=ignore_case) == (
["1"] if expected_to_find else []
)

@staticmethod
@pytest.mark.parametrize(
"phrases,text,exceptions,expected_to_find",
(
pytest.param(
["foo"],
"I like food",
[],
True,
id="no exceptions, foo found in food",
),
pytest.param(
["foo"],
"I like food",
["food"],
False,
id="exceptionally ignoring foo in food",
),
),
)
def test_search_substring_by_line_exceptions(
phrases: List[str],
text: str,
exceptions: Optional[List[str]],
expected_to_find: bool,
):
assert search_substrings_by_line(
phrases, text, exceptionally_allowed_substrings=exceptions
) == (["1"] if expected_to_find else [])
54 changes: 35 additions & 19 deletions demisto_sdk/commands/common/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -4404,31 +4404,47 @@ def is_str_bool(input_: str) -> bool:
return False


def check_text_content_contain_sub_text(
sub_text_list: List[str],
is_lower: bool = False,
to_split: bool = False,
text: str = "",
def search_substrings_by_line(
phrases_to_search: List[str],
text: str,
ignore_case: bool = False,
search_whole_word: bool = False,
exceptionally_allowed_substrings: Optional[list[str]] = None,
) -> List[str]:
"""
Args:
sub_text_list (List[str]): list of words/sentences to search in line content.
is_lower (bool): True to check when line is lower cased.
to_split (bool): True to split the line in order to search specific word
text (str): The readme content to search.

Returns:
list of lines which contains the given text.
Returns the list of line indices (as strings) in text,
where the searched phrases are found
"""
invalid_lines = []

if ignore_case:
text = text.casefold()
exceptionally_allowed_substrings = [
allowed_phrase.casefold()
for allowed_phrase in (exceptionally_allowed_substrings or ())
]

for line_num, line in enumerate(text.split("\n")):
if is_lower:
line = line.lower()
if to_split:
line = line.split() # type: ignore
for text in sub_text_list:
if text in line:
if ignore_case:
line = line.casefold()

if search_whole_word:
line = line.split() # type: ignore[assignment]

for phrase_to_search in phrases_to_search:
if phrase_to_search in line:
if exceptionally_allowed_substrings and any(
(allowed in line and phrase_to_search in allowed)
for allowed in exceptionally_allowed_substrings
):
"""
example: we want to catch 'demisto', but not when it's in a URL.
phrase = 'demisto'
allowed = '/demisto/'
line = 'foo/demisto/bar'
we'll skip this line only iff 'demisto' in '/demisto/' and '/demisto/' in foo/demisto/bar
"""
continue
invalid_lines.append(str(line_num + 1))

return invalid_lines
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Dict, Iterable, List, Union

from demisto_sdk.commands.common.tools import check_text_content_contain_sub_text
from demisto_sdk.commands.common.tools import search_substrings_by_line
from demisto_sdk.commands.content_graph.objects.integration import Integration
from demisto_sdk.commands.content_graph.objects.script import Script
from demisto_sdk.commands.content_graph.parsers.related_files import RelatedFileType
Expand All @@ -11,6 +11,7 @@
ValidationResult,
)

DISALLOWED_PHRASES = ["BSD", "MIT", "Copyright", "proprietary"]
ContentTypes = Union[Integration, Script]


Expand Down Expand Up @@ -45,20 +46,18 @@ def get_malformed_files(self, content_item: ContentTypes) -> Dict[str, List[str]
malformed_files = {}
if "CommonServerPython" in content_item.name:
return {}
if content_item.code_file.exist and (
invalid_lines := check_text_content_contain_sub_text(
sub_text_list=["BSD", "MIT", "Copyright", "proprietary"],
to_split=True,
text=content_item.code_file.file_content,
)
):
malformed_files["code file"] = invalid_lines
if content_item.test_code_file.exist and (
invalid_lines := check_text_content_contain_sub_text(
sub_text_list=["BSD", "MIT", "Copyright", "proprietary"],
to_split=True,
text=content_item.test_code_file.file_content,
)

for nickname, file in (
("code file", content_item.code_file),
("test code file", content_item.test_code_file),
):
malformed_files["test code file"] = invalid_lines
if file.exist and (
invalid_lines := search_substrings_by_line(
phrases_to_search=DISALLOWED_PHRASES,
search_whole_word=True,
text=file.file_content,
)
):
malformed_files[nickname] = invalid_lines

return malformed_files
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Iterable, List

from demisto_sdk.commands.common.constants import GitStatuses
from demisto_sdk.commands.common.tools import check_text_content_contain_sub_text
from demisto_sdk.commands.common.tools import search_substrings_by_line
from demisto_sdk.commands.content_graph.objects.integration import Integration
from demisto_sdk.commands.content_graph.parsers.related_files import RelatedFileType
from demisto_sdk.commands.validate.validators.base_validator import (
Expand Down Expand Up @@ -37,10 +37,11 @@ def is_valid(self, content_items: Iterable[ContentTypes]) -> List[ValidationResu
)
for content_item in content_items
if (
lines_contain_demsito := check_text_content_contain_sub_text(
sub_text_list=["demisto"],
is_lower=True,
lines_contain_demsito := search_substrings_by_line(
phrases_to_search=["demisto"],
ignore_case=True,
text=content_item.description_file.file_content,
exceptionally_allowed_substrings=["/demisto/"], # in URL
)
)
]
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Iterable, List, Union

from demisto_sdk.commands.common.constants import GitStatuses
from demisto_sdk.commands.common.tools import check_text_content_contain_sub_text
from demisto_sdk.commands.common.tools import search_substrings_by_line
from demisto_sdk.commands.content_graph.objects.integration import Integration
from demisto_sdk.commands.content_graph.objects.pack import Pack
from demisto_sdk.commands.content_graph.objects.playbook import Playbook
Expand Down Expand Up @@ -40,10 +40,11 @@ def is_valid(self, content_items: Iterable[ContentTypes]) -> List[ValidationResu
)
for content_item in content_items
if (
lines_contain_demsito := check_text_content_contain_sub_text(
sub_text_list=["demisto"],
is_lower=True,
lines_contain_demsito := search_substrings_by_line(
phrases_to_search=["demisto"],
ignore_case=True,
text=content_item.readme.file_content,
exceptionally_allowed_substrings=["/demisto/"], # in URL
)
)
]
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Iterable, List

from demisto_sdk.commands.common.tools import check_text_content_contain_sub_text
from demisto_sdk.commands.common.tools import search_substrings_by_line
from demisto_sdk.commands.content_graph.objects.pack import Pack
from demisto_sdk.commands.content_graph.parsers.related_files import RelatedFileType
from demisto_sdk.commands.validate.validators.base_validator import (
Expand Down Expand Up @@ -31,9 +31,9 @@ def is_valid(self, content_items: Iterable[ContentTypes]) -> List[ValidationResu
)
for content_item in content_items
if (
invalid_lines := check_text_content_contain_sub_text(
sub_text_list=["BSD", "MIT", "Copyright", "proprietary"],
to_split=True,
invalid_lines := search_substrings_by_line(
phrases_to_search=["BSD", "MIT", "Copyright", "proprietary"],
search_whole_word=True,
text=content_item.readme.file_content,
)
)
Expand Down
Loading