Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix several accidentally quadratic functions #792

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/spdx_tools/common/typing/dataclass_with_properties.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
# SPDX-FileCopyrightText: 2022 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass
from dataclasses import dataclass, astuple

from beartype import beartype
from beartype.roar import BeartypeCallHintException


def freeze_dataclass_with_properties_list(items):
return {astuple(itm) for itm in items}


def dataclass_with_properties(cls):
"""Decorator to generate a dataclass with properties out of the class' value:type list.
Their getters and setters will be subjected to the @typechecked decorator to ensure type conformity."""
Expand Down
17 changes: 16 additions & 1 deletion src/spdx_tools/spdx/model/document.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2022 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from dataclasses import field
from dataclasses import field, astuple
from datetime import datetime

from beartype.typing import List, Optional
Expand Down Expand Up @@ -81,3 +81,18 @@ def __init__(
relationships = [] if relationships is None else relationships
extracted_licensing_info = [] if extracted_licensing_info is None else extracted_licensing_info
check_types_and_set_values(self, locals())


def document_cache(func):
cache = {}

def cached_function(document: Document):
key = id(document)
if key in cache.keys():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This returns an outdated cache, if the document was modified?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or in other words: does the cache key id(document) change, if I mutate some stuff inside the document?

Maybe a less global caching would be more appropriate. So that its scope is controlled in a way so that no mutation can happen in between.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. From the docs:

id(object)

    Return the “identity” of an object. This is an integer which is guaranteed to be unique and constant for this object during its lifetime.
Two objects with non-overlapping lifetimes may have the same [id()](https://docs.python.org/3/library/functions.html#id) value.
``

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that this is exactly the problem I am afraid of

>>> a = {1}
>>> id(a)
139842939239360
>>> a.add(2)
>>> a
{1, 2}
>>> id(a)
139842939239360

The object has the same ID and it does not change even if its content is mutated. So it is maybe not a good cache key.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can adjust the cache key to be based on the fields you want but once the object is added to the cache it will still not reflect changes. If the goal is to build a fast search over mutable objects, a more complex approach/data structure may be necessary for representing collections of Documents and Relationships.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that the library should allow for the following usecase:

  • Validation of the document
  • mutation of that document, or some of the contained elements
  • validation of the new state, without false values due to caching

return cache[key]
else:
value = func(document)
cache[key] = value
return value

return cached_function
28 changes: 16 additions & 12 deletions src/spdx_tools/spdx/parser/jsonlikedict/relationship_parser.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# SPDX-FileCopyrightText: 2022 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from beartype.typing import Dict, List, Optional
from dataclasses import astuple

from beartype.typing import Dict, List, Optional, Set

from spdx_tools.common.typing.constructor_type_errors import ConstructorTypeErrors
from spdx_tools.common.typing.dataclass_with_properties import freeze_dataclass_with_properties_list
from spdx_tools.spdx.model import Relationship, RelationshipType
from spdx_tools.spdx.parser.error import SPDXParsingError
from spdx_tools.spdx.parser.jsonlikedict.dict_parsing_functions import (
Expand Down Expand Up @@ -35,9 +38,12 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]:
document_describes: List[str] = delete_duplicates_from_list(input_doc_dict.get("documentDescribes", []))
doc_spdx_id: Optional[str] = input_doc_dict.get("SPDXID")

existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments(
relationships
)
relationship_hash = lambda r: hash("{} -> {} ({})" \
.format(r.spdx_element_id,
str(r.related_spdx_element_id),
str(r.relationship_type)))
existing_relationships_without_comments: Set[Relationship] = freeze_dataclass_with_properties_list(
self.get_all_relationships_without_comments(relationships))
relationships.extend(
parse_field_or_log_error(
self.logger,
Expand All @@ -52,9 +58,6 @@ def parse_all_relationships(self, input_doc_dict: Dict) -> List[Relationship]:
)

package_dicts: List[Dict] = input_doc_dict.get("packages", [])
existing_relationships_without_comments: List[Relationship] = self.get_all_relationships_without_comments(
relationships
)

relationships.extend(
parse_field_or_log_error(
Expand Down Expand Up @@ -110,7 +113,7 @@ def parse_relationship_type(relationship_type_str: str) -> RelationshipType:
return relationship_type

def parse_document_describes(
self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: List[Relationship]
self, doc_spdx_id: str, described_spdx_ids: List[str], existing_relationships: Set[Relationship]
) -> List[Relationship]:
logger = Logger()
describes_relationships = []
Expand All @@ -131,10 +134,11 @@ def parse_document_describes(
return describes_relationships

def parse_has_files(
self, package_dicts: List[Dict], existing_relationships: List[Relationship]
self, package_dicts: List[Dict], existing_relationships: Set[Relationship]
) -> List[Relationship]:
# assume existing relationships are stripped of comments
logger = Logger()

contains_relationships = []
for package in package_dicts:
package_spdx_id: Optional[str] = package.get("SPDXID")
Expand All @@ -160,13 +164,13 @@ def parse_has_files(
return contains_relationships

def check_if_relationship_exists(
self, relationship: Relationship, existing_relationships: List[Relationship]
self, relationship: Relationship, existing_relationships: Set[Relationship]
) -> bool:
# assume existing relationships are stripped of comments
if relationship in existing_relationships:
if astuple(relationship) in existing_relationships:
return True
relationship_inverted: Relationship = self.invert_relationship(relationship)
if relationship_inverted in existing_relationships:
if astuple(relationship_inverted) in existing_relationships:
return True

return False
Expand Down
9 changes: 7 additions & 2 deletions src/spdx_tools/spdx/validation/spdx_id_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@

import re

from beartype.typing import List
from beartype.typing import List, Set

from spdx_tools.spdx.document_utils import get_contained_spdx_element_ids
from spdx_tools.spdx.model import Document, File
from spdx_tools.spdx.model.document import document_cache


def is_valid_internal_spdx_id(spdx_id: str) -> bool:
Expand All @@ -23,10 +24,14 @@ def is_spdx_id_present_in_files(spdx_id: str, files: List[File]) -> bool:


def is_spdx_id_present_in_document(spdx_id: str, document: Document) -> bool:
all_spdx_ids_in_document: List[str] = get_list_of_all_spdx_ids(document)
all_spdx_ids_in_document: Set[str] = get_set_of_all_spdx_ids(document)

return spdx_id in all_spdx_ids_in_document

@document_cache
def get_set_of_all_spdx_ids(document: Document) -> Set[str]:
return set(get_list_of_all_spdx_ids(document))


def get_list_of_all_spdx_ids(document: Document) -> List[str]:
all_spdx_ids_in_document: List[str] = [document.creation_info.spdx_id]
Expand Down
Loading