demisto · dorschw · Jun 16, 2024 · Jun 13, 2024 · Jun 13, 2024 · Jun 13, 2024
diff --git a/.changelog/4341.yml b/.changelog/4341.yml
@@ -0,0 +1,4 @@
+changes:
+- description: Fixed an issue where validations searching for `demisto` in strings would find it in URLs (`/demisto/content`).
+  type: fix
+pr_number: 4341
diff --git a/demisto_sdk/commands/common/tests/tools_test.py b/demisto_sdk/commands/common/tests/tools_test.py
@@ -112,6 +112,7 @@
     parse_multiple_path_inputs,
     run_command_os,
     search_and_delete_from_conf,
+    search_substrings_by_line,
     server_version_compare,
     set_value,
     str2bool,
@@ -3330,3 +3331,82 @@ def test_check_timestamp_format():
     assert not check_timestamp_format(missing_z)
     assert not check_timestamp_format(only_date)
     assert not check_timestamp_format(with_hyphen)
+
+
+class TestSearchSubstringByLine:
+    @staticmethod
+    @pytest.mark.parametrize(
+        "phrases,text,expected_to_find",
+        (
+            pytest.param(["foo"], "foo bar", True, id="found"),
+            pytest.param(
+                ["foo"],
+                "bar baz",
+                False,
+                id="nothing to find",
+            ),
+        ),
+    )
+    def test_search_substring_by_line(
+        phrases: List[str], text: str, expected_to_find: bool
+    ):
+        assert search_substrings_by_line(phrases, text) == (
+            ["1"] if expected_to_find else []
+        )
+
+    @staticmethod
+    @pytest.mark.parametrize(
+        "phrases,text,ignore_case,expected_to_find",
+        (
+            pytest.param(
+                ["foo"],
+                "this is Fooland",
+                False,
+                False,
+                id="case difference, case sensitive, not found",
+            ),
+            pytest.param(
+                ["foo"],
+                "this is Fooland",
+                True,
+                True,
+                id="different case, ignore case, found",
+            ),
+        ),
+    )
+    def test_search_substring_by_line_case(
+        phrases: List[str], text: str, ignore_case: bool, expected_to_find: bool
+    ):
+        assert search_substrings_by_line(phrases, text, ignore_case=ignore_case) == (
+            ["1"] if expected_to_find else []
+        )
+
+    @staticmethod
+    @pytest.mark.parametrize(
+        "phrases,text,exceptions,expected_to_find",
+        (
+            pytest.param(
+                ["foo"],
+                "I like food",
+                [],
+                True,
+                id="no exceptions, foo found in food",
+            ),
+            pytest.param(
+                ["foo"],
+                "I like food",
+                ["food"],
+                False,
+                id="exceptionally ignoring foo in food",
+            ),
+        ),
+    )
+    def test_search_substring_by_line_exceptions(
+        phrases: List[str],
+        text: str,
+        exceptions: Optional[List[str]],
+        expected_to_find: bool,
+    ):
+        assert search_substrings_by_line(
+            phrases, text, exceptionally_allowed_substrings=exceptions
+        ) == (["1"] if expected_to_find else [])
diff --git a/demisto_sdk/commands/common/tools.py b/demisto_sdk/commands/common/tools.py
@@ -4404,31 +4404,47 @@ def is_str_bool(input_: str) -> bool:
         return False
 
 
-def check_text_content_contain_sub_text(
-    sub_text_list: List[str],
-    is_lower: bool = False,
-    to_split: bool = False,
-    text: str = "",
+def search_substrings_by_line(
+    phrases_to_search: List[str],
+    text: str,
+    ignore_case: bool = False,
+    search_whole_word: bool = False,
+    exceptionally_allowed_substrings: Optional[list[str]] = None,
 ) -> List[str]:
     """
-    Args:
-        sub_text_list (List[str]): list of words/sentences to search in line content.
-        is_lower (bool): True to check when line is lower cased.
-        to_split (bool): True to split the line in order to search specific word
-        text (str): The readme content to search.
-
-    Returns:
-        list of lines which contains the given text.
+    Returns the list of line indices (as strings) in text,
+    where the searched phrases are found
     """
     invalid_lines = []
 
+    if ignore_case:
+        text = text.casefold()
+        exceptionally_allowed_substrings = [
+            allowed_phrase.casefold()
+            for allowed_phrase in (exceptionally_allowed_substrings or ())
+        ]
+
     for line_num, line in enumerate(text.split("\n")):
-        if is_lower:
-            line = line.lower()
-        if to_split:
-            line = line.split()  # type: ignore
-        for text in sub_text_list:
-            if text in line:
+        if ignore_case:
+            line = line.casefold()
+
+        if search_whole_word:
+            line = line.split()  # type: ignore[assignment]
+
+        for phrase_to_search in phrases_to_search:
+            if phrase_to_search in line:
+                if exceptionally_allowed_substrings and any(
+                    (allowed in line and phrase_to_search in allowed)
+                    for allowed in exceptionally_allowed_substrings
+                ):
+                    """
+                    example: we want to catch 'demisto', but not when it's in a URL.
+                        phrase = 'demisto'
+                        allowed = '/demisto/'
+                        line = 'foo/demisto/bar'
+                    we'll skip this line only iff 'demisto' in '/demisto/' and '/demisto/' in foo/demisto/bar
+                    """
+                    continue
                 invalid_lines.append(str(line_num + 1))
 
     return invalid_lines

diff --git a/...commands/validate/validators/BA_validators/BA119_is_py_file_contain_copy_right_section.py b/...commands/validate/validators/BA_validators/BA119_is_py_file_contain_copy_right_section.py
@@ -2,7 +2,7 @@
 
 from typing import Dict, Iterable, List, Union
 
-from demisto_sdk.commands.common.tools import check_text_content_contain_sub_text
+from demisto_sdk.commands.common.tools import search_substrings_by_line
 from demisto_sdk.commands.content_graph.objects.integration import Integration
 from demisto_sdk.commands.content_graph.objects.script import Script
 from demisto_sdk.commands.content_graph.parsers.related_files import RelatedFileType
@@ -11,6 +11,7 @@
     ValidationResult,
 )
 
+DISALLOWED_PHRASES = ["BSD", "MIT", "Copyright", "proprietary"]
 ContentTypes = Union[Integration, Script]
 
 
@@ -45,20 +46,18 @@ def get_malformed_files(self, content_item: ContentTypes) -> Dict[str, List[str]
         malformed_files = {}
         if "CommonServerPython" in content_item.name:
             return {}
-        if content_item.code_file.exist and (
-            invalid_lines := check_text_content_contain_sub_text(
-                sub_text_list=["BSD", "MIT", "Copyright", "proprietary"],
-                to_split=True,
-                text=content_item.code_file.file_content,
-            )
-        ):
-            malformed_files["code file"] = invalid_lines
-        if content_item.test_code_file.exist and (
-            invalid_lines := check_text_content_contain_sub_text(
-                sub_text_list=["BSD", "MIT", "Copyright", "proprietary"],
-                to_split=True,
-                text=content_item.test_code_file.file_content,
-            )
+
+        for nickname, file in (
+            ("code file", content_item.code_file),
+            ("test code file", content_item.test_code_file),
         ):
-            malformed_files["test code file"] = invalid_lines
+            if file.exist and (
+                invalid_lines := search_substrings_by_line(
+                    phrases_to_search=DISALLOWED_PHRASES,
+                    search_whole_word=True,
+                    text=file.file_content,
+                )
+            ):
+                malformed_files[nickname] = invalid_lines
+
         return malformed_files
diff --git a/.../commands/validate/validators/DS_validators/DS107_is_description_contains_demisto_word.py b/.../commands/validate/validators/DS_validators/DS107_is_description_contains_demisto_word.py
@@ -3,7 +3,7 @@
 from typing import Iterable, List
 
 from demisto_sdk.commands.common.constants import GitStatuses
-from demisto_sdk.commands.common.tools import check_text_content_contain_sub_text
+from demisto_sdk.commands.common.tools import search_substrings_by_line
 from demisto_sdk.commands.content_graph.objects.integration import Integration
 from demisto_sdk.commands.content_graph.parsers.related_files import RelatedFileType
 from demisto_sdk.commands.validate.validators.base_validator import (
@@ -37,10 +37,11 @@ def is_valid(self, content_items: Iterable[ContentTypes]) -> List[ValidationResu
             )
             for content_item in content_items
             if (
-                lines_contain_demsito := check_text_content_contain_sub_text(
-                    sub_text_list=["demisto"],
-                    is_lower=True,
+                lines_contain_demsito := search_substrings_by_line(
+                    phrases_to_search=["demisto"],
+                    ignore_case=True,
                     text=content_item.description_file.file_content,
+                    exceptionally_allowed_substrings=["/demisto/"],  # in URL
                 )
             )
         ]
diff --git a/demisto_sdk/commands/validate/validators/RM_validators/RM106_is_contain_demisto_word.py b/demisto_sdk/commands/validate/validators/RM_validators/RM106_is_contain_demisto_word.py
@@ -3,7 +3,7 @@
 from typing import Iterable, List, Union
 
 from demisto_sdk.commands.common.constants import GitStatuses
-from demisto_sdk.commands.common.tools import check_text_content_contain_sub_text
+from demisto_sdk.commands.common.tools import search_substrings_by_line
 from demisto_sdk.commands.content_graph.objects.integration import Integration
 from demisto_sdk.commands.content_graph.objects.pack import Pack
 from demisto_sdk.commands.content_graph.objects.playbook import Playbook
@@ -40,10 +40,11 @@ def is_valid(self, content_items: Iterable[ContentTypes]) -> List[ValidationResu
             )
             for content_item in content_items
             if (
-                lines_contain_demsito := check_text_content_contain_sub_text(
-                    sub_text_list=["demisto"],
-                    is_lower=True,
+                lines_contain_demsito := search_substrings_by_line(
+                    phrases_to_search=["demisto"],
+                    ignore_case=True,
                     text=content_item.readme.file_content,
+                    exceptionally_allowed_substrings=["/demisto/"],  # in URL
                 )
             )
         ]
diff --git a/...sto_sdk/commands/validate/validators/RM_validators/RM113_is_contain_copy_right_section.py b/...sto_sdk/commands/validate/validators/RM_validators/RM113_is_contain_copy_right_section.py
@@ -2,7 +2,7 @@
 
 from typing import Iterable, List
 
-from demisto_sdk.commands.common.tools import check_text_content_contain_sub_text
+from demisto_sdk.commands.common.tools import search_substrings_by_line
 from demisto_sdk.commands.content_graph.objects.pack import Pack
 from demisto_sdk.commands.content_graph.parsers.related_files import RelatedFileType
 from demisto_sdk.commands.validate.validators.base_validator import (
@@ -31,9 +31,9 @@ def is_valid(self, content_items: Iterable[ContentTypes]) -> List[ValidationResu
             )
             for content_item in content_items
             if (
-                invalid_lines := check_text_content_contain_sub_text(
-                    sub_text_list=["BSD", "MIT", "Copyright", "proprietary"],
-                    to_split=True,
+                invalid_lines := search_substrings_by_line(
+                    phrases_to_search=["BSD", "MIT", "Copyright", "proprietary"],
+                    search_whole_word=True,
                     text=content_item.readme.file_content,
                 )
             )