From 9d824c8066d4f1bcb8cb35501d7f72e7cc279662 Mon Sep 17 00:00:00 2001 From: Meret Behrens Date: Wed, 5 Apr 2023 09:07:06 +0200 Subject: [PATCH] add wrapper methods to ensure correct types in rdf parser Signed-off-by: Meret Behrens --- src/spdx/parser/rdf/annotation_parser.py | 4 +- src/spdx/parser/rdf/checksum_parser.py | 4 +- src/spdx/parser/rdf/creation_info_parser.py | 19 +++++-- .../rdf/extracted_licensing_info_parser.py | 10 +++- src/spdx/parser/rdf/file_parser.py | 17 +++++-- .../parser/rdf/graph_parsing_functions.py | 49 +++++++++++++++++-- .../parser/rdf/license_expression_parser.py | 26 ++++++---- src/spdx/parser/rdf/package_parser.py | 23 ++++++--- src/spdx/parser/rdf/rdf_parser.py | 9 ++-- src/spdx/parser/rdf/relationship_parser.py | 5 +- src/spdx/parser/rdf/snippet_parser.py | 28 +++++++---- .../spdx/parser/rdf/test_annotation_parser.py | 3 +- tests/spdx/parser/rdf/test_checksum_parser.py | 3 +- .../parser/rdf/test_creation_info_parser.py | 1 + tests/spdx/parser/rdf/test_file_parser.py | 3 +- tests/spdx/parser/rdf/test_package_parser.py | 4 +- .../parser/rdf/test_relationship_parser.py | 1 + tests/spdx/parser/rdf/test_snippet_parser.py | 15 ++++-- 18 files changed, 165 insertions(+), 59 deletions(-) diff --git a/src/spdx/parser/rdf/annotation_parser.py b/src/spdx/parser/rdf/annotation_parser.py index e75aa7b80..26544de3f 100644 --- a/src/spdx/parser/rdf/annotation_parser.py +++ b/src/spdx/parser/rdf/annotation_parser.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023 spdx contributors # # SPDX-License-Identifier: Apache-2.0 -from rdflib import RDFS, Graph, URIRef +from rdflib import RDFS, BNode, Graph, URIRef from spdx.datetime_conversions import datetime_from_str from spdx.model.annotation import Annotation, AnnotationType @@ -12,7 +12,7 @@ from spdx.rdfschema.namespace import SPDX_NAMESPACE -def parse_annotation(annotation_node: URIRef, graph: Graph, parent_node: URIRef, doc_namespace: str) -> Annotation: +def parse_annotation(annotation_node: BNode, graph: Graph, parent_node: URIRef, doc_namespace: str) -> Annotation: logger = Logger() spdx_id = parse_spdx_id(parent_node, doc_namespace, graph) annotator = parse_literal( diff --git a/src/spdx/parser/rdf/checksum_parser.py b/src/spdx/parser/rdf/checksum_parser.py index 456e709cb..d2bad6e7d 100644 --- a/src/spdx/parser/rdf/checksum_parser.py +++ b/src/spdx/parser/rdf/checksum_parser.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2023 spdx contributors # # SPDX-License-Identifier: Apache-2.0 -from rdflib import Graph, URIRef +from rdflib import BNode, Graph from spdx.model.checksum import Checksum, ChecksumAlgorithm from spdx.parser.error import SPDXParsingError @@ -11,7 +11,7 @@ from spdx.rdfschema.namespace import SPDX_NAMESPACE -def parse_checksum(parent_node: URIRef, graph: Graph) -> Checksum: +def parse_checksum(parent_node: BNode, graph: Graph) -> Checksum: logger = Logger() algorithm = parse_literal( logger, graph, parent_node, SPDX_NAMESPACE.algorithm, parsing_method=convert_rdf_to_algorithm diff --git a/src/spdx/parser/rdf/creation_info_parser.py b/src/spdx/parser/rdf/creation_info_parser.py index eeb1e1d31..3e11589fa 100644 --- a/src/spdx/parser/rdf/creation_info_parser.py +++ b/src/spdx/parser/rdf/creation_info_parser.py @@ -19,7 +19,12 @@ from spdx.parser.logger import Logger from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages from spdx.parser.rdf.checksum_parser import parse_checksum -from spdx.parser.rdf.graph_parsing_functions import parse_literal, parse_spdx_id, remove_prefix +from spdx.parser.rdf.graph_parsing_functions import ( + get_correctly_typed_triples, + parse_literal, + parse_spdx_id, + remove_prefix, +) from spdx.rdfschema.namespace import LICENSE_NAMESPACE, SPDX_NAMESPACE @@ -50,10 +55,14 @@ def parse_creation_info(graph: Graph) -> Tuple[CreationInfo, URIRef]: ) creator_comment = parse_literal(logger, graph, creation_info_node, RDFS.comment) creators = [] - for _, _, creator_literal in graph.triples((creation_info_node, SPDX_NAMESPACE.creator, None)): - creators.append(ActorParser.parse_actor(creator_literal)) + for _, _, creator_literal in get_correctly_typed_triples( + logger, graph, creation_info_node, SPDX_NAMESPACE.creator + ): + creators.append(ActorParser.parse_actor(creator_literal.toPython())) external_document_refs = [] - for _, _, external_document_node in graph.triples((doc_node, SPDX_NAMESPACE.externalDocumentRef, None)): + for _, _, external_document_node in get_correctly_typed_triples( + logger, graph, doc_node, SPDX_NAMESPACE.externalDocumentRef + ): external_document_refs.append(parse_external_document_refs(external_document_node, graph, namespace)) raise_parsing_error_if_logger_has_messages(logger, "CreationInfo") @@ -93,7 +102,7 @@ def parse_namespace_and_spdx_id(graph: Graph) -> (str, str): ) sys.exit(1) - namespace, spdx_id = urldefrag(subject) + namespace, spdx_id = urldefrag(str(subject)) if not namespace: logging.error( diff --git a/src/spdx/parser/rdf/extracted_licensing_info_parser.py b/src/spdx/parser/rdf/extracted_licensing_info_parser.py index 6a713ddfd..eae155cc4 100644 --- a/src/spdx/parser/rdf/extracted_licensing_info_parser.py +++ b/src/spdx/parser/rdf/extracted_licensing_info_parser.py @@ -6,7 +6,11 @@ from spdx.model.extracted_licensing_info import ExtractedLicensingInfo from spdx.parser.logger import Logger from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages -from spdx.parser.rdf.graph_parsing_functions import parse_literal, parse_literal_or_no_assertion_or_none +from spdx.parser.rdf.graph_parsing_functions import ( + get_correctly_typed_triples, + parse_literal, + parse_literal_or_no_assertion_or_none, +) from spdx.rdfschema.namespace import SPDX_NAMESPACE @@ -28,7 +32,9 @@ def parse_extracted_licensing_info( logger, graph, extracted_licensing_info_node, SPDX_NAMESPACE.name ) cross_references = [] - for _, _, cross_reference_node in graph.triples((extracted_licensing_info_node, RDFS.seeAlso, None)): + for _, _, cross_reference_node in get_correctly_typed_triples( + logger, graph, extracted_licensing_info_node, RDFS.seeAlso + ): cross_references.append(cross_reference_node.toPython()) raise_parsing_error_if_logger_has_messages(logger, "ExtractedLicensingInfo") extracted_licensing_info = construct_or_raise_parsing_error( diff --git a/src/spdx/parser/rdf/file_parser.py b/src/spdx/parser/rdf/file_parser.py index 2dec391d4..649d29879 100644 --- a/src/spdx/parser/rdf/file_parser.py +++ b/src/spdx/parser/rdf/file_parser.py @@ -9,6 +9,7 @@ from spdx.parser.rdf.checksum_parser import parse_checksum from spdx.parser.rdf.graph_parsing_functions import ( apply_parsing_method_or_log_error, + get_correctly_typed_triples, get_correctly_typed_value, parse_enum_value, parse_literal, @@ -24,7 +25,7 @@ def parse_file(file_node: URIRef, graph: Graph, doc_namespace: str) -> File: spdx_id = parse_spdx_id(file_node, doc_namespace, graph) name = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.fileName) checksums = [] - for _, _, checksum_node in graph.triples((file_node, SPDX_NAMESPACE.checksum, None)): + for _, _, checksum_node in get_correctly_typed_triples(logger, graph, file_node, SPDX_NAMESPACE.checksum): checksums.append(parse_checksum(checksum_node, graph)) file_types = [] @@ -39,25 +40,31 @@ def parse_file(file_node: URIRef, graph: Graph, doc_namespace: str) -> File: graph, file_node, SPDX_NAMESPACE.licenseConcluded, - parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace), + parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger), ) license_info_in_file = [] for _, _, license_info_from_files_node in graph.triples((file_node, SPDX_NAMESPACE.licenseInfoInFile, None)): license_info_in_file.append( get_correctly_typed_value( - logger, license_info_from_files_node, lambda x: parse_license_expression(x, graph, doc_namespace) + logger, + license_info_from_files_node, + lambda x: parse_license_expression(x, graph, doc_namespace, logger), ) ) license_comment = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.licenseComments) copyright_text = parse_literal_or_no_assertion_or_none(logger, graph, file_node, SPDX_NAMESPACE.copyrightText) file_contributors = [] - for _, _, file_contributor in graph.triples((file_node, SPDX_NAMESPACE.fileContributor, None)): + for _, _, file_contributor in get_correctly_typed_triples( + logger, graph, file_node, SPDX_NAMESPACE.fileContributor, None + ): file_contributors.append(file_contributor.toPython()) notice_text = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.noticeText) comment = parse_literal(logger, graph, file_node, RDFS.comment) attribution_texts = [] - for _, _, attribution_text_literal in graph.triples((file_node, SPDX_NAMESPACE.attributionText, None)): + for _, _, attribution_text_literal in get_correctly_typed_triples( + logger, graph, file_node, SPDX_NAMESPACE.attributionText, None + ): attribution_texts.append(attribution_text_literal.toPython()) raise_parsing_error_if_logger_has_messages(logger, "File") file = construct_or_raise_parsing_error( diff --git a/src/spdx/parser/rdf/graph_parsing_functions.py b/src/spdx/parser/rdf/graph_parsing_functions.py index 72b107c7f..15a0ff8cf 100644 --- a/src/spdx/parser/rdf/graph_parsing_functions.py +++ b/src/spdx/parser/rdf/graph_parsing_functions.py @@ -2,12 +2,12 @@ # # SPDX-License-Identifier: Apache-2.0 from enum import Enum -from typing import Any, Callable, Optional, Type +from typing import Any, Callable, Optional, Tuple, Type, Union -from rdflib import Graph, URIRef +from rdflib import RDF, Graph, URIRef from rdflib.exceptions import UniquenessError from rdflib.namespace import NamespaceManager -from rdflib.term import Node +from rdflib.term import BNode, Literal, Node from spdx.casing_tools import camel_case_to_snake_case from spdx.model.spdx_no_assertion import SPDX_NO_ASSERTION_STRING, SpdxNoAssertion @@ -102,3 +102,46 @@ def remove_prefix(string: str, prefix: str) -> str: if string.startswith(prefix): return string[len(prefix) :] return string + + +def get_correctly_typed_triples( + logger: Logger, + graph: Graph, + subject: Optional[Node] = None, + predicate: Optional[Node] = None, + _object: Optional[Node] = None, +) -> Tuple[Union[BNode, URIRef], Node, Union[BNode, Literal, URIRef]]: + # this is a helper method to cast some rdf types from graph.triples() to be compatible with the + # code that follows + for s, p, o in graph.triples((subject, predicate, _object)): + if not isinstance(s, (BNode, URIRef)): + logger.append( + f"Warning: Subject {s} should be of type BNode or URIRef, but is {type(s).__name__}. " + f"This might lead to a failure." + ) + if not isinstance(o, (BNode, Literal, URIRef)): + logger.append( + f"Warning: Object {o} should be of type BNode, Literal or URIRef, but is {type(o).__name__}. " + f"This might lead to a failure." + ) + yield s, p, o + + +def get_value_from_graph( + logger: Logger, + graph: Graph, + subject: Optional[Node] = None, + predicate: Optional[Node] = RDF.value, + _object: Optional[Node] = None, + default: Optional[Any] = None, + _any: Optional[bool] = True, +) -> Optional[Union[URIRef, Literal, BNode]]: + # this is a helper method to cast some rdf types from graph.value() to be compatible with the + # code that follows + value = graph.value(subject=subject, predicate=predicate, object=_object, default=default, any=_any) + if value and not isinstance(value, (URIRef, Literal, BNode)): + logger.append( + f"Warning: Node {value} should be of type BNode, Literal or URIRef, but is {type(value).__name__}. " + f"This might lead to a failure." + ) + return value diff --git a/src/spdx/parser/rdf/license_expression_parser.py b/src/spdx/parser/rdf/license_expression_parser.py index 7f6017854..b6ccc3bd8 100644 --- a/src/spdx/parser/rdf/license_expression_parser.py +++ b/src/spdx/parser/rdf/license_expression_parser.py @@ -1,19 +1,25 @@ # SPDX-FileCopyrightText: 2023 spdx contributors # # SPDX-License-Identifier: Apache-2.0 -from typing import Union +from typing import Optional, Union from license_expression import LicenseExpression, get_spdx_licensing from rdflib import RDF, Graph from rdflib.term import BNode, Identifier, Node, URIRef -from spdx.parser.rdf.graph_parsing_functions import remove_prefix +from spdx.parser.logger import Logger +from spdx.parser.rdf.graph_parsing_functions import get_value_from_graph, remove_prefix from spdx.rdfschema.namespace import LICENSE_NAMESPACE, SPDX_NAMESPACE def parse_license_expression( - license_expression_node: Union[URIRef, BNode, Node], graph: Graph, doc_namespace: str + license_expression_node: Union[URIRef, BNode, Node], + graph: Graph, + doc_namespace: str, + logger: Optional[Logger] = None, ) -> LicenseExpression: + if not logger: + logger = Logger() spdx_licensing = get_spdx_licensing() expression = "" if license_expression_node.startswith(LICENSE_NAMESPACE): @@ -27,28 +33,30 @@ def parse_license_expression( if node_type == SPDX_NAMESPACE.ConjunctiveLicenseSet: members = [] for _, _, member_node in graph.triples((license_expression_node, SPDX_NAMESPACE.member, None)): - members.append(parse_license_expression(member_node, graph, doc_namespace)) + members.append(parse_license_expression(member_node, graph, doc_namespace, logger)) expression = " AND ".join([str(member) for member in members]) if node_type == SPDX_NAMESPACE.DisjunctiveLicenseSet: members = [] for _, _, member_node in graph.triples((license_expression_node, SPDX_NAMESPACE.member, None)): - members.append(parse_license_expression(member_node, graph, doc_namespace)) + members.append(parse_license_expression(member_node, graph, doc_namespace, logger)) expression = " OR ".join([str(member) for member in members]) if node_type == SPDX_NAMESPACE.WithExceptionOperator: license_expression = parse_license_expression( - graph.value(license_expression_node, SPDX_NAMESPACE.member), graph, doc_namespace + graph.value(license_expression_node, SPDX_NAMESPACE.member), graph, doc_namespace, logger ) exception = parse_license_exception( - graph.value(license_expression_node, SPDX_NAMESPACE.licenseException), graph + get_value_from_graph(logger, graph, license_expression_node, SPDX_NAMESPACE.licenseException), + graph, + logger, ) expression = f"{license_expression} WITH {exception}" return spdx_licensing.parse(expression) -def parse_license_exception(exception_node: Identifier, graph: Graph) -> str: +def parse_license_exception(exception_node: Identifier, graph: Graph, logger) -> str: if exception_node.startswith(LICENSE_NAMESPACE): exception = remove_prefix(exception_node, LICENSE_NAMESPACE) else: - exception = graph.value(exception_node, SPDX_NAMESPACE.licenseExceptionId).toPython() + exception = get_value_from_graph(logger, graph, exception_node, SPDX_NAMESPACE.licenseExceptionId).toPython() return exception diff --git a/src/spdx/parser/rdf/package_parser.py b/src/spdx/parser/rdf/package_parser.py index 866af16a4..e4ba4b02b 100644 --- a/src/spdx/parser/rdf/package_parser.py +++ b/src/spdx/parser/rdf/package_parser.py @@ -4,6 +4,7 @@ from typing import Optional from rdflib import DOAP, RDFS, Graph, URIRef +from rdflib.term import BNode from spdx.datetime_conversions import datetime_from_str from spdx.model.package import ( @@ -18,7 +19,9 @@ from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages from spdx.parser.rdf.checksum_parser import parse_checksum from spdx.parser.rdf.graph_parsing_functions import ( + get_correctly_typed_triples, get_correctly_typed_value, + get_value_from_graph, parse_enum_value, parse_literal, parse_literal_or_no_assertion_or_none, @@ -36,7 +39,7 @@ def parse_package(package_node: URIRef, graph: Graph, doc_namespace: str) -> Pac logger, graph, package_node, SPDX_NAMESPACE.downloadLocation ) checksums = [] - for _, _, checksum_node in graph.triples((package_node, SPDX_NAMESPACE.checksum, None)): + for _, _, checksum_node in get_correctly_typed_triples(logger, graph, package_node, SPDX_NAMESPACE.checksum): checksums.append(parse_checksum(checksum_node, graph)) version_info = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.versionInfo) @@ -57,28 +60,34 @@ def parse_package(package_node: URIRef, graph: Graph, doc_namespace: str) -> Pac ) external_package_refs = [] - for _, _, external_package_ref_node in graph.triples((package_node, SPDX_NAMESPACE.externalRef, None)): + for _, _, external_package_ref_node in get_correctly_typed_triples( + logger, graph, package_node, SPDX_NAMESPACE.externalRef + ): external_package_refs.append(parse_external_package_ref(external_package_ref_node, graph, doc_namespace)) - files_analyzed = bool(graph.value(package_node, SPDX_NAMESPACE.filesAnalyzed, default=True)) + files_analyzed = bool( + get_value_from_graph(logger, graph, package_node, SPDX_NAMESPACE.filesAnalyzed, default=True) + ) license_concluded = parse_literal_or_no_assertion_or_none( logger, graph, package_node, SPDX_NAMESPACE.licenseConcluded, - parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace), + parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger), ) license_declared = parse_literal_or_no_assertion_or_none( logger, graph, package_node, SPDX_NAMESPACE.licenseDeclared, - parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace), + parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger), ) license_info_from_files = [] for _, _, license_info_from_files_node in graph.triples((package_node, SPDX_NAMESPACE.licenseInfoFromFiles, None)): license_info_from_files.append( get_correctly_typed_value( - logger, license_info_from_files_node, lambda x: parse_license_expression(x, graph, doc_namespace) + logger, + license_info_from_files_node, + lambda x: parse_license_expression(x, graph, doc_namespace, logger), ) ) license_comment = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.licenseComments) @@ -161,7 +170,7 @@ def parse_package_verification_code( return package_verification_code -def parse_external_package_ref(external_package_ref_node: URIRef, graph: Graph, doc_namespace) -> ExternalPackageRef: +def parse_external_package_ref(external_package_ref_node: BNode, graph: Graph, doc_namespace) -> ExternalPackageRef: logger = Logger() ref_locator = parse_literal(logger, graph, external_package_ref_node, SPDX_NAMESPACE.referenceLocator) ref_category = parse_literal( diff --git a/src/spdx/parser/rdf/rdf_parser.py b/src/spdx/parser/rdf/rdf_parser.py index cbd77b255..e05de8b64 100644 --- a/src/spdx/parser/rdf/rdf_parser.py +++ b/src/spdx/parser/rdf/rdf_parser.py @@ -14,6 +14,7 @@ from spdx.parser.rdf.creation_info_parser import parse_creation_info from spdx.parser.rdf.extracted_licensing_info_parser import parse_extracted_licensing_info from spdx.parser.rdf.file_parser import parse_file +from spdx.parser.rdf.graph_parsing_functions import get_correctly_typed_triples from spdx.parser.rdf.package_parser import parse_package from spdx.parser.rdf.relationship_parser import parse_implicit_relationship, parse_relationship from spdx.parser.rdf.snippet_parser import parse_snippet @@ -46,7 +47,7 @@ def translate_graph_to_document(graph: Graph) -> Document: ("snippets", (None, RDF.type, SPDX_NAMESPACE.Snippet), parse_snippet), ]: elements = [] - for element_node, _, _ in graph.triples(triple): + for element_node, _, _ in get_correctly_typed_triples(logger, graph, *triple): try: elements.append(parsing_method(element_node, graph, creation_info.document_namespace)) except SPDXParsingError as err: @@ -69,7 +70,7 @@ def translate_graph_to_document(graph: Graph) -> Document: ((None, SPDX_NAMESPACE.hasFile, None), RelationshipType.CONTAINS), ((None, SPDX_NAMESPACE.describesPackage, None), RelationshipType.DESCRIBES), ]: - for parent_node, _, element_node in graph.triples(triple): + for parent_node, _, element_node in get_correctly_typed_triples(logger, graph, *triple): try: relationship = parse_implicit_relationship( parent_node, relationship_type, element_node, graph, creation_info.document_namespace @@ -81,7 +82,9 @@ def translate_graph_to_document(graph: Graph) -> Document: logger.extend(err.get_messages()) extracted_licensing_infos = [] - for _, _, extracted_licensing_info_node in graph.triples((None, SPDX_NAMESPACE.hasExtractedLicensingInfo, None)): + for _, _, extracted_licensing_info_node in get_correctly_typed_triples( + logger, graph, None, SPDX_NAMESPACE.hasExtractedLicensingInfo + ): try: extracted_licensing_infos.append( parse_extracted_licensing_info(extracted_licensing_info_node, graph, creation_info.document_namespace) diff --git a/src/spdx/parser/rdf/relationship_parser.py b/src/spdx/parser/rdf/relationship_parser.py index cdd61e5ee..d08ddf54d 100644 --- a/src/spdx/parser/rdf/relationship_parser.py +++ b/src/spdx/parser/rdf/relationship_parser.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 from rdflib import RDFS, Graph, URIRef +from rdflib.term import Node from spdx.model.relationship import Relationship, RelationshipType from spdx.parser.logger import Logger @@ -15,9 +16,7 @@ from spdx.rdfschema.namespace import SPDX_NAMESPACE -def parse_relationship( - relationship_node: URIRef, graph: Graph, parent_node: URIRef, doc_namespace: str -) -> Relationship: +def parse_relationship(relationship_node: Node, graph: Graph, parent_node: URIRef, doc_namespace: str) -> Relationship: logger = Logger() spdx_element_id = parse_spdx_id(parent_node, doc_namespace, graph) diff --git a/src/spdx/parser/rdf/snippet_parser.py b/src/spdx/parser/rdf/snippet_parser.py index 446b0fc73..23f746be5 100644 --- a/src/spdx/parser/rdf/snippet_parser.py +++ b/src/spdx/parser/rdf/snippet_parser.py @@ -5,7 +5,7 @@ from rdflib import RDF, RDFS, Graph from rdflib.exceptions import UniquenessError -from rdflib.term import Node, URIRef +from rdflib.term import BNode, Node, URIRef from spdx.model.snippet import Snippet from spdx.parser.error import SPDXParsingError @@ -13,7 +13,9 @@ from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages from spdx.parser.rdf.graph_parsing_functions import ( apply_parsing_method_or_log_error, + get_correctly_typed_triples, get_correctly_typed_value, + get_value_from_graph, parse_literal, parse_literal_or_no_assertion_or_none, parse_spdx_id, @@ -25,7 +27,9 @@ def parse_snippet(snippet_node: URIRef, graph: Graph, doc_namespace: str) -> Snippet: logger = Logger() spdx_id = parse_spdx_id(snippet_node, doc_namespace, graph) - file_spdx_id_uri = graph.value(subject=snippet_node, predicate=SPDX_NAMESPACE.snippetFromFile) + file_spdx_id_uri = get_value_from_graph( + logger, graph, subject=snippet_node, predicate=SPDX_NAMESPACE.snippetFromFile + ) file_spdx_id = parse_spdx_id(file_spdx_id_uri, doc_namespace, graph) byte_range = None line_range = None @@ -40,13 +44,15 @@ def parse_snippet(snippet_node: URIRef, graph: Graph, doc_namespace: str) -> Sni graph, snippet_node, SPDX_NAMESPACE.licenseConcluded, - parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace), + parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger), ) license_info_in_snippet = [] for _, _, license_info_in_snippet_node in graph.triples((snippet_node, SPDX_NAMESPACE.licenseInfoInSnippet, None)): license_info_in_snippet.append( get_correctly_typed_value( - logger, license_info_in_snippet_node, lambda x: parse_license_expression(x, graph, doc_namespace) + logger, + license_info_in_snippet_node, + lambda x: parse_license_expression(x, graph, doc_namespace, logger), ) ) license_comment = parse_literal(logger, graph, snippet_node, SPDX_NAMESPACE.licenseComments) @@ -54,7 +60,9 @@ def parse_snippet(snippet_node: URIRef, graph: Graph, doc_namespace: str) -> Sni comment = parse_literal(logger, graph, snippet_node, RDFS.comment) name = parse_literal(logger, graph, snippet_node, SPDX_NAMESPACE.name) attribution_texts = [] - for _, _, attribution_text_literal in graph.triples((snippet_node, SPDX_NAMESPACE.attributionText, None)): + for _, _, attribution_text_literal in get_correctly_typed_triples( + logger, graph, snippet_node, SPDX_NAMESPACE.attributionText, None + ): attribution_texts.append(attribution_text_literal.toPython()) raise_parsing_error_if_logger_has_messages(logger, "Snippet") @@ -96,7 +104,7 @@ def set_range_or_log_error( return byte_range, line_range -def parse_ranges(start_end_pointer: URIRef, graph: Graph) -> Dict[str, Tuple[int, int]]: +def parse_ranges(start_end_pointer: BNode, graph: Graph) -> Dict[str, Tuple[int, int]]: range_values = dict() start_pointer_type, start_pointer_node = get_pointer_type(graph, POINTER_NAMESPACE.startPointer, start_end_pointer) end_pointer_type, end_pointer_node = get_pointer_type(graph, POINTER_NAMESPACE.endPointer, start_end_pointer) @@ -110,14 +118,14 @@ def parse_ranges(start_end_pointer: URIRef, graph: Graph) -> Dict[str, Tuple[int return {str(start_pointer_type.fragment): (range_values["startPointer"], range_values["endPointer"])} -def get_pointer_type(graph: Graph, pointer: URIRef, start_end_pointer: URIRef) -> Tuple[URIRef, URIRef]: +def get_pointer_type(graph: Graph, pointer: URIRef, start_end_pointer: BNode) -> Tuple[URIRef, Node]: try: pointer_node = graph.value(start_end_pointer, pointer, any=False) except UniquenessError: raise SPDXParsingError([f"Multiple values for {pointer.fragment}"]) if not pointer_node: raise SPDXParsingError([f"Couldn't find pointer of type {pointer.fragment}."]) - pointer_type = graph.value(pointer_node, RDF.type) + pointer_type = get_value_from_graph(Logger(), graph, pointer_node, RDF.type) return pointer_type, pointer_node @@ -129,9 +137,9 @@ def get_pointer_type(graph: Graph, pointer: URIRef, start_end_pointer: URIRef) - def parse_range_value(graph: Graph, pointer_node: Node, predicate: URIRef) -> Optional[int]: try: - value = graph.value(pointer_node, predicate, any=False) + value = get_value_from_graph(Logger(), graph, pointer_node, predicate, _any=False) except UniquenessError: raise SPDXParsingError([f"Multiple values for {predicate.fragment} found."]) if value: - value = int(value) + value = int(value.toPython()) return value diff --git a/tests/spdx/parser/rdf/test_annotation_parser.py b/tests/spdx/parser/rdf/test_annotation_parser.py index ed9978394..c98c34675 100644 --- a/tests/spdx/parser/rdf/test_annotation_parser.py +++ b/tests/spdx/parser/rdf/test_annotation_parser.py @@ -4,7 +4,7 @@ import os from datetime import datetime -from rdflib import Graph, URIRef +from rdflib import BNode, Graph, URIRef from spdx.model.actor import Actor, ActorType from spdx.model.annotation import AnnotationType @@ -17,6 +17,7 @@ def test_parse_annotation(): doc_namespace = "https://some.namespace" file_node = URIRef(f"{doc_namespace}#SPDXRef-File") annotation_node = graph.value(subject=file_node, predicate=SPDX_NAMESPACE.annotation) + assert isinstance(annotation_node, BNode) annotation = parse_annotation(annotation_node, graph, file_node, doc_namespace) diff --git a/tests/spdx/parser/rdf/test_checksum_parser.py b/tests/spdx/parser/rdf/test_checksum_parser.py index 028b3f20c..575d1f3cc 100644 --- a/tests/spdx/parser/rdf/test_checksum_parser.py +++ b/tests/spdx/parser/rdf/test_checksum_parser.py @@ -4,7 +4,7 @@ import os import pytest -from rdflib import Graph, URIRef +from rdflib import BNode, Graph, URIRef from spdx.model.checksum import ChecksumAlgorithm from spdx.parser.error import SPDXParsingError @@ -17,6 +17,7 @@ def test_parse_checksum(): checksum_node = graph.value( subject=URIRef("https://some.namespace#DocumentRef-external"), predicate=SPDX_NAMESPACE.checksum ) + assert isinstance(checksum_node, BNode) checksum = parse_checksum(checksum_node, graph) diff --git a/tests/spdx/parser/rdf/test_creation_info_parser.py b/tests/spdx/parser/rdf/test_creation_info_parser.py index 04ac5e801..e5f17877c 100644 --- a/tests/spdx/parser/rdf/test_creation_info_parser.py +++ b/tests/spdx/parser/rdf/test_creation_info_parser.py @@ -82,6 +82,7 @@ def test_parse_external_document_refs(): external_doc_ref_node = graph.value( subject=URIRef(f"{doc_namespace}#SPDXRef-DOCUMENT"), predicate=SPDX_NAMESPACE.externalDocumentRef ) + assert isinstance(external_doc_ref_node, URIRef) external_document_ref = parse_external_document_refs(external_doc_ref_node, graph, doc_namespace) diff --git a/tests/spdx/parser/rdf/test_file_parser.py b/tests/spdx/parser/rdf/test_file_parser.py index 6e962bc3d..17b0edeef 100644 --- a/tests/spdx/parser/rdf/test_file_parser.py +++ b/tests/spdx/parser/rdf/test_file_parser.py @@ -5,7 +5,7 @@ from unittest import TestCase from license_expression import get_spdx_licensing -from rdflib import RDF, Graph +from rdflib import RDF, Graph, URIRef from spdx.model.checksum import Checksum, ChecksumAlgorithm from spdx.model.file import FileType @@ -18,6 +18,7 @@ def test_parse_file(): graph = Graph().parse(os.path.join(os.path.dirname(__file__), "data/file_to_test_rdf_parser.rdf.xml")) file_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.File) doc_namespace = "https://some.namespace" + assert isinstance(file_node, URIRef) file = parse_file(file_node, graph, doc_namespace) diff --git a/tests/spdx/parser/rdf/test_package_parser.py b/tests/spdx/parser/rdf/test_package_parser.py index dacdce56b..d48797a14 100644 --- a/tests/spdx/parser/rdf/test_package_parser.py +++ b/tests/spdx/parser/rdf/test_package_parser.py @@ -6,7 +6,7 @@ import pytest from license_expression import get_spdx_licensing -from rdflib import RDF, Graph, Literal +from rdflib import RDF, BNode, Graph, Literal, URIRef from spdx.model.actor import Actor, ActorType from spdx.model.checksum import Checksum, ChecksumAlgorithm @@ -21,6 +21,7 @@ def test_package_parser(): # we have two packages in the test file, graph.value() will return the first package package_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.Package) doc_namespace = "https://some.namespace" + assert isinstance(package_node, URIRef) package = parse_package(package_node, graph, doc_namespace) @@ -80,6 +81,7 @@ def test_external_package_ref_parser(download_location, category, locator, type, # in the test file we have two different external package refs depending on the package package_node = graph.value(predicate=SPDX_NAMESPACE.downloadLocation, object=Literal(download_location)) external_package_ref_node = graph.value(package_node, SPDX_NAMESPACE.externalRef) + assert isinstance(external_package_ref_node, BNode) external_package_ref = parse_external_package_ref(external_package_ref_node, graph, doc_namespace) diff --git a/tests/spdx/parser/rdf/test_relationship_parser.py b/tests/spdx/parser/rdf/test_relationship_parser.py index 53935f06d..cf2a8b7df 100644 --- a/tests/spdx/parser/rdf/test_relationship_parser.py +++ b/tests/spdx/parser/rdf/test_relationship_parser.py @@ -16,6 +16,7 @@ def test_relationship_parser(): parent_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.SpdxDocument) relationship_node = graph.value(subject=parent_node, predicate=SPDX_NAMESPACE.relationship) doc_namespace = "https://some.namespace" + assert isinstance(parent_node, URIRef) relationship = parse_relationship(relationship_node, graph, parent_node, doc_namespace) diff --git a/tests/spdx/parser/rdf/test_snippet_parser.py b/tests/spdx/parser/rdf/test_snippet_parser.py index 5c975798c..13dcf5fdc 100644 --- a/tests/spdx/parser/rdf/test_snippet_parser.py +++ b/tests/spdx/parser/rdf/test_snippet_parser.py @@ -6,7 +6,7 @@ import pytest from license_expression import get_spdx_licensing -from rdflib import RDF, BNode, Graph, Literal +from rdflib import RDF, BNode, Graph, Literal, URIRef from spdx.model.spdx_no_assertion import SpdxNoAssertion from spdx.parser.error import SPDXParsingError @@ -18,6 +18,7 @@ def test_parse_snippet(): graph = Graph().parse(os.path.join(os.path.dirname(__file__), "data/file_to_test_rdf_parser.rdf.xml")) snippet_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.Snippet) doc_namespace = "https://some.namespace" + assert isinstance(snippet_node, URIRef) snippet = parse_snippet(snippet_node, graph, doc_namespace) @@ -60,7 +61,9 @@ def test_parse_ranges(predicate_value_class_member): add_range_to_graph_helper(graph, predicate_value_class_member) - range_dict = parse_ranges(graph.value(predicate=RDF.type, object=POINTER_NAMESPACE.StartEndPointer), graph) + range_node = graph.value(predicate=RDF.type, object=POINTER_NAMESPACE.StartEndPointer) + assert isinstance(range_node, BNode) + range_dict = parse_ranges(range_node, graph) assert pointer_class.fragment in range_dict.keys() assert range_dict[pointer_class.fragment][0] == predicate_value_class_member[0][1] @@ -90,7 +93,9 @@ def test_parse_ranges_wrong_pair_of_pointer_classes(predicate_value_class_member add_range_to_graph_helper(graph, predicate_value_class_member) - range_dict = parse_ranges(graph.value(predicate=RDF.type, object=POINTER_NAMESPACE.StartEndPointer), graph) + range_node = graph.value(predicate=RDF.type, object=POINTER_NAMESPACE.StartEndPointer) + assert isinstance(range_node, BNode) + range_dict = parse_ranges(range_node, graph) assert pointer_class.fragment in range_dict.keys() assert range_dict[pointer_class.fragment][0] is None @@ -141,7 +146,9 @@ def test_parse_ranges_error(predicate_value_class_member, expected_message): add_range_to_graph_helper(graph, predicate_value_class_member) with pytest.raises(SPDXParsingError, match=expected_message): - parse_ranges(graph.value(predicate=RDF.type, object=POINTER_NAMESPACE.StartEndPointer), graph) + range_node = graph.value(predicate=RDF.type, object=POINTER_NAMESPACE.StartEndPointer) + assert isinstance(range_node, BNode) + parse_ranges(range_node, graph) def add_range_to_graph_helper(graph, predicate_value_class_member):