Skip to content

Commit

Permalink
add wrapper methods to ensure correct types in rdf parser
Browse files Browse the repository at this point in the history
Signed-off-by: Meret Behrens <meret.behrens@tngtech.com>
  • Loading branch information
meretp committed Apr 5, 2023
1 parent 21eec8a commit af1dbfc
Show file tree
Hide file tree
Showing 18 changed files with 165 additions and 59 deletions.
4 changes: 2 additions & 2 deletions src/spdx/parser/rdf/annotation_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from rdflib import RDFS, Graph, URIRef
from rdflib import RDFS, BNode, Graph, URIRef

from spdx.datetime_conversions import datetime_from_str
from spdx.model.annotation import Annotation, AnnotationType
Expand All @@ -12,7 +12,7 @@
from spdx.rdfschema.namespace import SPDX_NAMESPACE


def parse_annotation(annotation_node: URIRef, graph: Graph, parent_node: URIRef, doc_namespace: str) -> Annotation:
def parse_annotation(annotation_node: BNode, graph: Graph, parent_node: URIRef, doc_namespace: str) -> Annotation:
logger = Logger()
spdx_id = parse_spdx_id(parent_node, doc_namespace, graph)
annotator = parse_literal(
Expand Down
4 changes: 2 additions & 2 deletions src/spdx/parser/rdf/checksum_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from rdflib import Graph, URIRef
from rdflib import BNode, Graph

from spdx.model.checksum import Checksum, ChecksumAlgorithm
from spdx.parser.error import SPDXParsingError
Expand All @@ -11,7 +11,7 @@
from spdx.rdfschema.namespace import SPDX_NAMESPACE


def parse_checksum(parent_node: URIRef, graph: Graph) -> Checksum:
def parse_checksum(parent_node: BNode, graph: Graph) -> Checksum:
logger = Logger()
algorithm = parse_literal(
logger, graph, parent_node, SPDX_NAMESPACE.algorithm, parsing_method=convert_rdf_to_algorithm
Expand Down
19 changes: 14 additions & 5 deletions src/spdx/parser/rdf/creation_info_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
from spdx.parser.logger import Logger
from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages
from spdx.parser.rdf.checksum_parser import parse_checksum
from spdx.parser.rdf.graph_parsing_functions import parse_literal, parse_spdx_id, remove_prefix
from spdx.parser.rdf.graph_parsing_functions import (
get_correctly_typed_triples,
parse_literal,
parse_spdx_id,
remove_prefix,
)
from spdx.rdfschema.namespace import LICENSE_NAMESPACE, SPDX_NAMESPACE


Expand Down Expand Up @@ -50,10 +55,14 @@ def parse_creation_info(graph: Graph) -> Tuple[CreationInfo, URIRef]:
)
creator_comment = parse_literal(logger, graph, creation_info_node, RDFS.comment)
creators = []
for _, _, creator_literal in graph.triples((creation_info_node, SPDX_NAMESPACE.creator, None)):
creators.append(ActorParser.parse_actor(creator_literal))
for _, _, creator_literal in get_correctly_typed_triples(
logger, graph, creation_info_node, SPDX_NAMESPACE.creator
):
creators.append(ActorParser.parse_actor(creator_literal.toPython()))
external_document_refs = []
for _, _, external_document_node in graph.triples((doc_node, SPDX_NAMESPACE.externalDocumentRef, None)):
for _, _, external_document_node in get_correctly_typed_triples(
logger, graph, doc_node, SPDX_NAMESPACE.externalDocumentRef
):
external_document_refs.append(parse_external_document_refs(external_document_node, graph, namespace))

raise_parsing_error_if_logger_has_messages(logger, "CreationInfo")
Expand Down Expand Up @@ -93,7 +102,7 @@ def parse_namespace_and_spdx_id(graph: Graph) -> (str, str):
)
sys.exit(1)

namespace, spdx_id = urldefrag(subject)
namespace, spdx_id = urldefrag(str(subject))

if not namespace:
logging.error(
Expand Down
10 changes: 8 additions & 2 deletions src/spdx/parser/rdf/extracted_licensing_info_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
from spdx.model.extracted_licensing_info import ExtractedLicensingInfo
from spdx.parser.logger import Logger
from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages
from spdx.parser.rdf.graph_parsing_functions import parse_literal, parse_literal_or_no_assertion_or_none
from spdx.parser.rdf.graph_parsing_functions import (
get_correctly_typed_triples,
parse_literal,
parse_literal_or_no_assertion_or_none,
)
from spdx.rdfschema.namespace import SPDX_NAMESPACE


Expand All @@ -28,7 +32,9 @@ def parse_extracted_licensing_info(
logger, graph, extracted_licensing_info_node, SPDX_NAMESPACE.name
)
cross_references = []
for _, _, cross_reference_node in graph.triples((extracted_licensing_info_node, RDFS.seeAlso, None)):
for _, _, cross_reference_node in get_correctly_typed_triples(
logger, graph, extracted_licensing_info_node, RDFS.seeAlso
):
cross_references.append(cross_reference_node.toPython())
raise_parsing_error_if_logger_has_messages(logger, "ExtractedLicensingInfo")
extracted_licensing_info = construct_or_raise_parsing_error(
Expand Down
17 changes: 12 additions & 5 deletions src/spdx/parser/rdf/file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from spdx.parser.rdf.checksum_parser import parse_checksum
from spdx.parser.rdf.graph_parsing_functions import (
apply_parsing_method_or_log_error,
get_correctly_typed_triples,
get_correctly_typed_value,
parse_enum_value,
parse_literal,
Expand All @@ -24,7 +25,7 @@ def parse_file(file_node: URIRef, graph: Graph, doc_namespace: str) -> File:
spdx_id = parse_spdx_id(file_node, doc_namespace, graph)
name = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.fileName)
checksums = []
for _, _, checksum_node in graph.triples((file_node, SPDX_NAMESPACE.checksum, None)):
for _, _, checksum_node in get_correctly_typed_triples(logger, graph, file_node, SPDX_NAMESPACE.checksum):
checksums.append(parse_checksum(checksum_node, graph))

file_types = []
Expand All @@ -39,25 +40,31 @@ def parse_file(file_node: URIRef, graph: Graph, doc_namespace: str) -> File:
graph,
file_node,
SPDX_NAMESPACE.licenseConcluded,
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace),
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
)
license_info_in_file = []
for _, _, license_info_from_files_node in graph.triples((file_node, SPDX_NAMESPACE.licenseInfoInFile, None)):
license_info_in_file.append(
get_correctly_typed_value(
logger, license_info_from_files_node, lambda x: parse_license_expression(x, graph, doc_namespace)
logger,
license_info_from_files_node,
lambda x: parse_license_expression(x, graph, doc_namespace, logger),
)
)
license_comment = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.licenseComments)
copyright_text = parse_literal_or_no_assertion_or_none(logger, graph, file_node, SPDX_NAMESPACE.copyrightText)
file_contributors = []
for _, _, file_contributor in graph.triples((file_node, SPDX_NAMESPACE.fileContributor, None)):
for _, _, file_contributor in get_correctly_typed_triples(
logger, graph, file_node, SPDX_NAMESPACE.fileContributor, None
):
file_contributors.append(file_contributor.toPython())

notice_text = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.noticeText)
comment = parse_literal(logger, graph, file_node, RDFS.comment)
attribution_texts = []
for _, _, attribution_text_literal in graph.triples((file_node, SPDX_NAMESPACE.attributionText, None)):
for _, _, attribution_text_literal in get_correctly_typed_triples(
logger, graph, file_node, SPDX_NAMESPACE.attributionText, None
):
attribution_texts.append(attribution_text_literal.toPython())
raise_parsing_error_if_logger_has_messages(logger, "File")
file = construct_or_raise_parsing_error(
Expand Down
49 changes: 46 additions & 3 deletions src/spdx/parser/rdf/graph_parsing_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
#
# SPDX-License-Identifier: Apache-2.0
from enum import Enum
from typing import Any, Callable, Optional, Type
from typing import Any, Callable, Optional, Tuple, Type, Union

from rdflib import Graph, URIRef
from rdflib import RDF, Graph, URIRef
from rdflib.exceptions import UniquenessError
from rdflib.namespace import NamespaceManager
from rdflib.term import Node
from rdflib.term import BNode, Literal, Node

from spdx.casing_tools import camel_case_to_snake_case
from spdx.model.spdx_no_assertion import SPDX_NO_ASSERTION_STRING, SpdxNoAssertion
Expand Down Expand Up @@ -102,3 +102,46 @@ def remove_prefix(string: str, prefix: str) -> str:
if string.startswith(prefix):
return string[len(prefix) :]
return string


def get_correctly_typed_triples(
logger: Logger,
graph: Graph,
subject: Optional[Node] = None,
predicate: Optional[Node] = None,
_object: Optional[Node] = None,
) -> Tuple[Union[BNode, URIRef], Node, Union[BNode, Literal, URIRef]]:
# this is a helper method to cast some rdf types from graph.triples() to be compatible with the
# code that follows
for s, p, o in graph.triples((subject, predicate, _object)):
if not isinstance(s, (BNode, URIRef)):
logger.append(
f"Warning: Subject {s} should be of type BNode or URIRef, but is {type(s).__name__}. "
f"This might lead to a failure."
)
if not isinstance(o, (BNode, Literal, URIRef)):
logger.append(
f"Warning: Object {o} should be of type BNode, Literal or URIRef, but is {type(o).__name__}. "
f"This might lead to a failure."
)
yield s, p, o


def get_value_from_graph(
logger: Logger,
graph: Graph,
subject: Optional[Node] = None,
predicate: Optional[Node] = RDF.value,
_object: Optional[Node] = None,
default: Optional[Any] = None,
_any: Optional[bool] = True,
) -> Optional[Union[URIRef, Literal, BNode]]:
# this is a helper method to cast some rdf types from graph.value() to be compatible with the
# code that follows
value = graph.value(subject=subject, predicate=predicate, object=_object, default=default, any=_any)
if value and not isinstance(value, (URIRef, Literal, BNode)):
logger.append(
f"Warning: Node {value} should be of type BNode, Literal or URIRef, but is {type(value).__name__}. "
f"This might lead to a failure."
)
return value
26 changes: 17 additions & 9 deletions src/spdx/parser/rdf/license_expression_parser.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from typing import Union
from typing import Optional, Union

from license_expression import LicenseExpression, get_spdx_licensing
from rdflib import RDF, Graph
from rdflib.term import BNode, Identifier, Node, URIRef

from spdx.parser.rdf.graph_parsing_functions import remove_prefix
from spdx.parser.logger import Logger
from spdx.parser.rdf.graph_parsing_functions import get_value_from_graph, remove_prefix
from spdx.rdfschema.namespace import LICENSE_NAMESPACE, SPDX_NAMESPACE


def parse_license_expression(
license_expression_node: Union[URIRef, BNode, Node], graph: Graph, doc_namespace: str
license_expression_node: Union[URIRef, BNode, Node],
graph: Graph,
doc_namespace: str,
logger: Optional[Logger] = None,
) -> LicenseExpression:
if not logger:
logger = Logger()
spdx_licensing = get_spdx_licensing()
expression = ""
if license_expression_node.startswith(LICENSE_NAMESPACE):
Expand All @@ -27,28 +33,30 @@ def parse_license_expression(
if node_type == SPDX_NAMESPACE.ConjunctiveLicenseSet:
members = []
for _, _, member_node in graph.triples((license_expression_node, SPDX_NAMESPACE.member, None)):
members.append(parse_license_expression(member_node, graph, doc_namespace))
members.append(parse_license_expression(member_node, graph, doc_namespace, logger))
expression = " AND ".join([str(member) for member in members])
if node_type == SPDX_NAMESPACE.DisjunctiveLicenseSet:
members = []
for _, _, member_node in graph.triples((license_expression_node, SPDX_NAMESPACE.member, None)):
members.append(parse_license_expression(member_node, graph, doc_namespace))
members.append(parse_license_expression(member_node, graph, doc_namespace, logger))
expression = " OR ".join([str(member) for member in members])
if node_type == SPDX_NAMESPACE.WithExceptionOperator:
license_expression = parse_license_expression(
graph.value(license_expression_node, SPDX_NAMESPACE.member), graph, doc_namespace
graph.value(license_expression_node, SPDX_NAMESPACE.member), graph, doc_namespace, logger
)
exception = parse_license_exception(
graph.value(license_expression_node, SPDX_NAMESPACE.licenseException), graph
get_value_from_graph(logger, graph, license_expression_node, SPDX_NAMESPACE.licenseException),
graph,
logger,
)
expression = f"{license_expression} WITH {exception}"

return spdx_licensing.parse(expression)


def parse_license_exception(exception_node: Identifier, graph: Graph) -> str:
def parse_license_exception(exception_node: Identifier, graph: Graph, logger) -> str:
if exception_node.startswith(LICENSE_NAMESPACE):
exception = remove_prefix(exception_node, LICENSE_NAMESPACE)
else:
exception = graph.value(exception_node, SPDX_NAMESPACE.licenseExceptionId).toPython()
exception = get_value_from_graph(logger, graph, exception_node, SPDX_NAMESPACE.licenseExceptionId).toPython()
return exception
23 changes: 16 additions & 7 deletions src/spdx/parser/rdf/package_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Optional

from rdflib import DOAP, RDFS, Graph, URIRef
from rdflib.term import BNode

from spdx.datetime_conversions import datetime_from_str
from spdx.model.package import (
Expand All @@ -18,7 +19,9 @@
from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages
from spdx.parser.rdf.checksum_parser import parse_checksum
from spdx.parser.rdf.graph_parsing_functions import (
get_correctly_typed_triples,
get_correctly_typed_value,
get_value_from_graph,
parse_enum_value,
parse_literal,
parse_literal_or_no_assertion_or_none,
Expand All @@ -36,7 +39,7 @@ def parse_package(package_node: URIRef, graph: Graph, doc_namespace: str) -> Pac
logger, graph, package_node, SPDX_NAMESPACE.downloadLocation
)
checksums = []
for _, _, checksum_node in graph.triples((package_node, SPDX_NAMESPACE.checksum, None)):
for _, _, checksum_node in get_correctly_typed_triples(logger, graph, package_node, SPDX_NAMESPACE.checksum):
checksums.append(parse_checksum(checksum_node, graph))

version_info = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.versionInfo)
Expand All @@ -57,28 +60,34 @@ def parse_package(package_node: URIRef, graph: Graph, doc_namespace: str) -> Pac
)

external_package_refs = []
for _, _, external_package_ref_node in graph.triples((package_node, SPDX_NAMESPACE.externalRef, None)):
for _, _, external_package_ref_node in get_correctly_typed_triples(
logger, graph, package_node, SPDX_NAMESPACE.externalRef
):
external_package_refs.append(parse_external_package_ref(external_package_ref_node, graph, doc_namespace))
files_analyzed = bool(graph.value(package_node, SPDX_NAMESPACE.filesAnalyzed, default=True))
files_analyzed = bool(
get_value_from_graph(logger, graph, package_node, SPDX_NAMESPACE.filesAnalyzed, default=True)
)
license_concluded = parse_literal_or_no_assertion_or_none(
logger,
graph,
package_node,
SPDX_NAMESPACE.licenseConcluded,
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace),
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
)
license_declared = parse_literal_or_no_assertion_or_none(
logger,
graph,
package_node,
SPDX_NAMESPACE.licenseDeclared,
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace),
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
)
license_info_from_files = []
for _, _, license_info_from_files_node in graph.triples((package_node, SPDX_NAMESPACE.licenseInfoFromFiles, None)):
license_info_from_files.append(
get_correctly_typed_value(
logger, license_info_from_files_node, lambda x: parse_license_expression(x, graph, doc_namespace)
logger,
license_info_from_files_node,
lambda x: parse_license_expression(x, graph, doc_namespace, logger),
)
)
license_comment = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.licenseComments)
Expand Down Expand Up @@ -161,7 +170,7 @@ def parse_package_verification_code(
return package_verification_code


def parse_external_package_ref(external_package_ref_node: URIRef, graph: Graph, doc_namespace) -> ExternalPackageRef:
def parse_external_package_ref(external_package_ref_node: BNode, graph: Graph, doc_namespace) -> ExternalPackageRef:
logger = Logger()
ref_locator = parse_literal(logger, graph, external_package_ref_node, SPDX_NAMESPACE.referenceLocator)
ref_category = parse_literal(
Expand Down
9 changes: 6 additions & 3 deletions src/spdx/parser/rdf/rdf_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from spdx.parser.rdf.creation_info_parser import parse_creation_info
from spdx.parser.rdf.extracted_licensing_info_parser import parse_extracted_licensing_info
from spdx.parser.rdf.file_parser import parse_file
from spdx.parser.rdf.graph_parsing_functions import get_correctly_typed_triples
from spdx.parser.rdf.package_parser import parse_package
from spdx.parser.rdf.relationship_parser import parse_implicit_relationship, parse_relationship
from spdx.parser.rdf.snippet_parser import parse_snippet
Expand Down Expand Up @@ -46,7 +47,7 @@ def translate_graph_to_document(graph: Graph) -> Document:
("snippets", (None, RDF.type, SPDX_NAMESPACE.Snippet), parse_snippet),
]:
elements = []
for element_node, _, _ in graph.triples(triple):
for element_node, _, _ in get_correctly_typed_triples(logger, graph, *triple):
try:
elements.append(parsing_method(element_node, graph, creation_info.document_namespace))
except SPDXParsingError as err:
Expand All @@ -69,7 +70,7 @@ def translate_graph_to_document(graph: Graph) -> Document:
((None, SPDX_NAMESPACE.hasFile, None), RelationshipType.CONTAINS),
((None, SPDX_NAMESPACE.describesPackage, None), RelationshipType.DESCRIBES),
]:
for parent_node, _, element_node in graph.triples(triple):
for parent_node, _, element_node in get_correctly_typed_triples(logger, graph, *triple):
try:
relationship = parse_implicit_relationship(
parent_node, relationship_type, element_node, graph, creation_info.document_namespace
Expand All @@ -81,7 +82,9 @@ def translate_graph_to_document(graph: Graph) -> Document:
logger.extend(err.get_messages())

extracted_licensing_infos = []
for _, _, extracted_licensing_info_node in graph.triples((None, SPDX_NAMESPACE.hasExtractedLicensingInfo, None)):
for _, _, extracted_licensing_info_node in get_correctly_typed_triples(
logger, graph, None, SPDX_NAMESPACE.hasExtractedLicensingInfo
):
try:
extracted_licensing_infos.append(
parse_extracted_licensing_info(extracted_licensing_info_node, graph, creation_info.document_namespace)
Expand Down
Loading

0 comments on commit af1dbfc

Please sign in to comment.