spdx · jonjohnsonjr · Jan 29, 2024 · Feb 14, 2024 · Feb 14, 2024 · maxhbr
diff --git a/src/spdx_tools/common/typing/dataclass_with_properties.py b/src/spdx_tools/common/typing/dataclass_with_properties.py
@@ -1,12 +1,16 @@
 # SPDX-FileCopyrightText: 2022 spdx contributors
 #
 # SPDX-License-Identifier: Apache-2.0
-from dataclasses import dataclass
+from dataclasses import dataclass, astuple
 
 from beartype import beartype
 from beartype.roar import BeartypeCallHintException
 
 
+def freeze_dataclass_with_properties_list(items):
+    return {astuple(itm) for itm in items}
+
+
 def dataclass_with_properties(cls):
     """Decorator to generate a dataclass with properties out of the class' value:type list.
     Their getters and setters will be subjected to the @typechecked decorator to ensure type conformity."""

diff --git a/src/spdx_tools/spdx/model/document.py b/src/spdx_tools/spdx/model/document.py
@@ -1,7 +1,7 @@
 # SPDX-FileCopyrightText: 2022 spdx contributors
 #
 # SPDX-License-Identifier: Apache-2.0
-from dataclasses import field
+from dataclasses import field, astuple
 from datetime import datetime
 
 from beartype.typing import List, Optional
@@ -81,3 +81,18 @@ def __init__(
         relationships = [] if relationships is None else relationships
         extracted_licensing_info = [] if extracted_licensing_info is None else extracted_licensing_info
         check_types_and_set_values(self, locals())
+
+
+def document_cache(func):
+    cache = {}
+
+    def cached_function(document: Document):
+        key = id(document)
+        if key in cache.keys():
+            return cache[key]
+        else:
+            value = func(document)
+            cache[key] = value
+            return value
+
+    return cached_function
diff --git a/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py b/src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py
@@ -1,9 +1,12 @@
 # SPDX-FileCopyrightText: 2022 spdx contributors
 #
 # SPDX-License-Identifier: Apache-2.0
-from beartype.typing import Dict, List, Optional
+from dataclasses import astuple
+
+from beartype.typing import Dict, List, Optional, Set
 
 from spdx_tools.common.typing.constructor_type_errors import ConstructorTypeErrors
+from spdx_tools.common.typing.dataclass_with_properties import freeze_dataclass_with_properties_list
 from spdx_tools.spdx.model import Relationship, RelationshipType
 from spdx_tools.spdx.parser.error import SPDXParsingError
 from spdx_tools.spdx.parser.jsonlikedict.dict_parsing_functions import (
@@ -35,9 +38,12 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]:
         document_describes: List[str] = delete_duplicates_from_list(input_doc_dict.get("documentDescribes", []))
         doc_spdx_id: Optional[str] = input_doc_dict.get("SPDXID")
 
-        existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments(
-            relationships
-        )
+        relationship_hash = lambda r: hash("{} -> {} ({})" \
+                .format(r.spdx_element_id,
+                        str(r.related_spdx_element_id),
+                        str(r.relationship_type)))
+        existing_relationships_without_comments: Set[Relationship] = freeze_dataclass_with_properties_list(
+                self.get_all_relationships_without_comments(relationships))
         relationships.extend(
             parse_field_or_log_error(
                 self.logger,
@@ -52,9 +58,6 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]:
         )
 
         package_dicts: List[Dict] = input_doc_dict.get("packages", [])
-        existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments(
-            relationships
-        )
 
         relationships.extend(
             parse_field_or_log_error(
@@ -110,7 +113,7 @@ def parse_relationship_type(relationship_type_str: str) -> RelationshipType:
         return relationship_type
 
     def parse_document_describes(
-        self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: List[Relationship]
+        self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: Set[Relationship]
     ) -> List[Relationship]:
         logger = Logger()
         describes_relationships = []
@@ -131,10 +134,11 @@ def parse_document_describes(
         return describes_relationships
 
     def parse_has_files(
-        self, package_dicts: List[Dict], existing_relationships: List[Relationship]
+        self, package_dicts: List[Dict], existing_relationships: Set[Relationship]
     ) -> List[Relationship]:
         # assume existing relationships are stripped of comments
         logger = Logger()
+
         contains_relationships = []
         for package in package_dicts:
             package_spdx_id: Optional[str] = package.get("SPDXID")
@@ -160,13 +164,13 @@ def parse_has_files(
         return contains_relationships
 
     def check_if_relationship_exists(
-        self, relationship: Relationship, existing_relationships: List[Relationship]
+        self, relationship: Relationship, existing_relationships: Set[Relationship]
     ) -> bool:
         # assume existing relationships are stripped of comments
-        if relationship in existing_relationships:
+        if astuple(relationship) in existing_relationships:
             return True
         relationship_inverted: Relationship = self.invert_relationship(relationship)
-        if relationship_inverted in existing_relationships:
+        if astuple(relationship_inverted) in existing_relationships:
             return True
 
         return False

diff --git a/src/spdx_tools/spdx/validation/spdx_id_validators.py b/src/spdx_tools/spdx/validation/spdx_id_validators.py
@@ -4,10 +4,11 @@
 
 import re
 
-from beartype.typing import List
+from beartype.typing import List, Set
 
 from spdx_tools.spdx.document_utils import get_contained_spdx_element_ids
 from spdx_tools.spdx.model import Document, File
+from spdx_tools.spdx.model.document import document_cache
 
 
 def is_valid_internal_spdx_id(spdx_id: str) -> bool:
@@ -23,10 +24,14 @@ def is_spdx_id_present_in_files(spdx_id: str, files: List[File]) -> bool:
 
 
 def is_spdx_id_present_in_document(spdx_id: str, document: Document) -> bool:
-    all_spdx_ids_in_document: List[str] = get_list_of_all_spdx_ids(document)
+    all_spdx_ids_in_document: Set[str] = get_set_of_all_spdx_ids(document)
 
     return spdx_id in all_spdx_ids_in_document
 
+@document_cache
+def get_set_of_all_spdx_ids(document: Document) -> Set[str]:
+    return set(get_list_of_all_spdx_ids(document))
+
 
 def get_list_of_all_spdx_ids(document: Document) -> List[str]:
     all_spdx_ids_in_document: List[str] = [document.creation_info.spdx_id]