Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[issue-561] fix rdf parser #563

Merged
merged 2 commits into from
Apr 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/spdx/parser/rdf/annotation_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from rdflib import RDFS, Graph, URIRef
from rdflib import RDFS, BNode, Graph, URIRef

from spdx.datetime_conversions import datetime_from_str
from spdx.model.annotation import Annotation, AnnotationType
Expand All @@ -12,7 +12,7 @@
from spdx.rdfschema.namespace import SPDX_NAMESPACE


def parse_annotation(annotation_node: URIRef, graph: Graph, parent_node: URIRef, doc_namespace: str) -> Annotation:
def parse_annotation(annotation_node: BNode, graph: Graph, parent_node: URIRef, doc_namespace: str) -> Annotation:
logger = Logger()
spdx_id = parse_spdx_id(parent_node, doc_namespace, graph)
annotator = parse_literal(
Expand Down
4 changes: 2 additions & 2 deletions src/spdx/parser/rdf/checksum_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from rdflib import Graph, URIRef
from rdflib import BNode, Graph

from spdx.model.checksum import Checksum, ChecksumAlgorithm
from spdx.parser.error import SPDXParsingError
Expand All @@ -11,7 +11,7 @@
from spdx.rdfschema.namespace import SPDX_NAMESPACE


def parse_checksum(parent_node: URIRef, graph: Graph) -> Checksum:
def parse_checksum(parent_node: BNode, graph: Graph) -> Checksum:
logger = Logger()
algorithm = parse_literal(
logger, graph, parent_node, SPDX_NAMESPACE.algorithm, parsing_method=convert_rdf_to_algorithm
Expand Down
19 changes: 14 additions & 5 deletions src/spdx/parser/rdf/creation_info_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
from spdx.parser.logger import Logger
from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages
from spdx.parser.rdf.checksum_parser import parse_checksum
from spdx.parser.rdf.graph_parsing_functions import parse_literal, parse_spdx_id, remove_prefix
from spdx.parser.rdf.graph_parsing_functions import (
get_correctly_typed_triples,
parse_literal,
parse_spdx_id,
remove_prefix,
)
from spdx.rdfschema.namespace import LICENSE_NAMESPACE, SPDX_NAMESPACE


Expand Down Expand Up @@ -50,10 +55,14 @@ def parse_creation_info(graph: Graph) -> Tuple[CreationInfo, URIRef]:
)
creator_comment = parse_literal(logger, graph, creation_info_node, RDFS.comment)
creators = []
for _, _, creator_literal in graph.triples((creation_info_node, SPDX_NAMESPACE.creator, None)):
creators.append(ActorParser.parse_actor(creator_literal))
for _, _, creator_literal in get_correctly_typed_triples(
logger, graph, creation_info_node, SPDX_NAMESPACE.creator
):
creators.append(ActorParser.parse_actor(creator_literal.toPython()))
external_document_refs = []
for _, _, external_document_node in graph.triples((doc_node, SPDX_NAMESPACE.externalDocumentRef, None)):
for _, _, external_document_node in get_correctly_typed_triples(
logger, graph, doc_node, SPDX_NAMESPACE.externalDocumentRef
):
external_document_refs.append(parse_external_document_refs(external_document_node, graph, namespace))

raise_parsing_error_if_logger_has_messages(logger, "CreationInfo")
Expand Down Expand Up @@ -93,7 +102,7 @@ def parse_namespace_and_spdx_id(graph: Graph) -> (str, str):
)
sys.exit(1)

namespace, spdx_id = urldefrag(subject)
namespace, spdx_id = urldefrag(str(subject))

if not namespace:
logging.error(
Expand Down
10 changes: 8 additions & 2 deletions src/spdx/parser/rdf/extracted_licensing_info_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
from spdx.model.extracted_licensing_info import ExtractedLicensingInfo
from spdx.parser.logger import Logger
from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages
from spdx.parser.rdf.graph_parsing_functions import parse_literal, parse_literal_or_no_assertion_or_none
from spdx.parser.rdf.graph_parsing_functions import (
get_correctly_typed_triples,
parse_literal,
parse_literal_or_no_assertion_or_none,
)
from spdx.rdfschema.namespace import SPDX_NAMESPACE


Expand All @@ -28,7 +32,9 @@ def parse_extracted_licensing_info(
logger, graph, extracted_licensing_info_node, SPDX_NAMESPACE.name
)
cross_references = []
for _, _, cross_reference_node in graph.triples((extracted_licensing_info_node, RDFS.seeAlso, None)):
for _, _, cross_reference_node in get_correctly_typed_triples(
logger, graph, extracted_licensing_info_node, RDFS.seeAlso
):
cross_references.append(cross_reference_node.toPython())
raise_parsing_error_if_logger_has_messages(logger, "ExtractedLicensingInfo")
extracted_licensing_info = construct_or_raise_parsing_error(
Expand Down
17 changes: 12 additions & 5 deletions src/spdx/parser/rdf/file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from spdx.parser.rdf.checksum_parser import parse_checksum
from spdx.parser.rdf.graph_parsing_functions import (
apply_parsing_method_or_log_error,
get_correctly_typed_triples,
get_correctly_typed_value,
parse_enum_value,
parse_literal,
Expand All @@ -24,7 +25,7 @@ def parse_file(file_node: URIRef, graph: Graph, doc_namespace: str) -> File:
spdx_id = parse_spdx_id(file_node, doc_namespace, graph)
name = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.fileName)
checksums = []
for _, _, checksum_node in graph.triples((file_node, SPDX_NAMESPACE.checksum, None)):
for _, _, checksum_node in get_correctly_typed_triples(logger, graph, file_node, SPDX_NAMESPACE.checksum):
checksums.append(parse_checksum(checksum_node, graph))

file_types = []
Expand All @@ -39,25 +40,31 @@ def parse_file(file_node: URIRef, graph: Graph, doc_namespace: str) -> File:
graph,
file_node,
SPDX_NAMESPACE.licenseConcluded,
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace),
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
)
license_info_in_file = []
for _, _, license_info_from_files_node in graph.triples((file_node, SPDX_NAMESPACE.licenseInfoInFile, None)):
license_info_in_file.append(
get_correctly_typed_value(
logger, license_info_from_files_node, lambda x: parse_license_expression(x, graph, doc_namespace)
logger,
license_info_from_files_node,
lambda x: parse_license_expression(x, graph, doc_namespace, logger),
)
)
license_comment = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.licenseComments)
copyright_text = parse_literal_or_no_assertion_or_none(logger, graph, file_node, SPDX_NAMESPACE.copyrightText)
file_contributors = []
for _, _, file_contributor in graph.triples((file_node, SPDX_NAMESPACE.fileContributor, None)):
for _, _, file_contributor in get_correctly_typed_triples(
logger, graph, file_node, SPDX_NAMESPACE.fileContributor, None
):
file_contributors.append(file_contributor.toPython())

notice_text = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.noticeText)
comment = parse_literal(logger, graph, file_node, RDFS.comment)
attribution_texts = []
for _, _, attribution_text_literal in graph.triples((file_node, SPDX_NAMESPACE.attributionText, None)):
for _, _, attribution_text_literal in get_correctly_typed_triples(
logger, graph, file_node, SPDX_NAMESPACE.attributionText, None
):
attribution_texts.append(attribution_text_literal.toPython())
raise_parsing_error_if_logger_has_messages(logger, "File")
file = construct_or_raise_parsing_error(
Expand Down
49 changes: 46 additions & 3 deletions src/spdx/parser/rdf/graph_parsing_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
#
# SPDX-License-Identifier: Apache-2.0
from enum import Enum
from typing import Any, Callable, Optional, Type
from typing import Any, Callable, Optional, Tuple, Type, Union

from rdflib import Graph, URIRef
from rdflib import RDF, Graph, URIRef
from rdflib.exceptions import UniquenessError
from rdflib.namespace import NamespaceManager
from rdflib.term import Node
from rdflib.term import BNode, Literal, Node

from spdx.casing_tools import camel_case_to_snake_case
from spdx.model.spdx_no_assertion import SPDX_NO_ASSERTION_STRING, SpdxNoAssertion
Expand Down Expand Up @@ -102,3 +102,46 @@ def remove_prefix(string: str, prefix: str) -> str:
if string.startswith(prefix):
return string[len(prefix) :]
return string


def get_correctly_typed_triples(
logger: Logger,
graph: Graph,
subject: Optional[Node] = None,
predicate: Optional[Node] = None,
_object: Optional[Node] = None,
) -> Tuple[Union[BNode, URIRef], Node, Union[BNode, Literal, URIRef]]:
# this is a helper method to cast some rdf types from graph.triples() to be compatible with the
# code that follows
for s, p, o in graph.triples((subject, predicate, _object)):
if not isinstance(s, (BNode, URIRef)):
armintaenzertng marked this conversation as resolved.
Show resolved Hide resolved
logger.append(
f"Warning: Subject {s} should be of type BNode or URIRef, but is {type(s).__name__}. "
f"This might lead to a failure."
armintaenzertng marked this conversation as resolved.
Show resolved Hide resolved
)
if not isinstance(o, (BNode, Literal, URIRef)):
logger.append(
f"Warning: Object {o} should be of type BNode, Literal or URIRef, but is {type(o).__name__}. "
f"This might lead to a failure."
)
yield s, p, o


def get_value_from_graph(
logger: Logger,
graph: Graph,
subject: Optional[Node] = None,
predicate: Optional[Node] = RDF.value,
_object: Optional[Node] = None,
default: Optional[Any] = None,
_any: Optional[bool] = True,
) -> Optional[Union[URIRef, Literal, BNode]]:
# this is a helper method to cast some rdf types from graph.value() to be compatible with the
# code that follows
value = graph.value(subject=subject, predicate=predicate, object=_object, default=default, any=_any)
if value and not isinstance(value, (URIRef, Literal, BNode)):
logger.append(
f"Warning: Node {value} should be of type BNode, Literal or URIRef, but is {type(value).__name__}. "
f"This might lead to a failure."
)
return value
26 changes: 17 additions & 9 deletions src/spdx/parser/rdf/license_expression_parser.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from typing import Union
from typing import Optional, Union

from license_expression import LicenseExpression, get_spdx_licensing
from rdflib import RDF, Graph
from rdflib.term import BNode, Identifier, Node, URIRef

from spdx.parser.rdf.graph_parsing_functions import remove_prefix
from spdx.parser.logger import Logger
from spdx.parser.rdf.graph_parsing_functions import get_value_from_graph, remove_prefix
from spdx.rdfschema.namespace import LICENSE_NAMESPACE, SPDX_NAMESPACE


def parse_license_expression(
license_expression_node: Union[URIRef, BNode, Node], graph: Graph, doc_namespace: str
license_expression_node: Union[URIRef, BNode, Node],
graph: Graph,
doc_namespace: str,
logger: Optional[Logger] = None,
) -> LicenseExpression:
if not logger:
logger = Logger()
spdx_licensing = get_spdx_licensing()
expression = ""
if license_expression_node.startswith(LICENSE_NAMESPACE):
Expand All @@ -27,28 +33,30 @@ def parse_license_expression(
if node_type == SPDX_NAMESPACE.ConjunctiveLicenseSet:
members = []
for _, _, member_node in graph.triples((license_expression_node, SPDX_NAMESPACE.member, None)):
members.append(parse_license_expression(member_node, graph, doc_namespace))
members.append(parse_license_expression(member_node, graph, doc_namespace, logger))
expression = " AND ".join([str(member) for member in members])
if node_type == SPDX_NAMESPACE.DisjunctiveLicenseSet:
members = []
for _, _, member_node in graph.triples((license_expression_node, SPDX_NAMESPACE.member, None)):
members.append(parse_license_expression(member_node, graph, doc_namespace))
members.append(parse_license_expression(member_node, graph, doc_namespace, logger))
expression = " OR ".join([str(member) for member in members])
if node_type == SPDX_NAMESPACE.WithExceptionOperator:
license_expression = parse_license_expression(
graph.value(license_expression_node, SPDX_NAMESPACE.member), graph, doc_namespace
graph.value(license_expression_node, SPDX_NAMESPACE.member), graph, doc_namespace, logger
)
exception = parse_license_exception(
graph.value(license_expression_node, SPDX_NAMESPACE.licenseException), graph
get_value_from_graph(logger, graph, license_expression_node, SPDX_NAMESPACE.licenseException),
graph,
logger,
)
expression = f"{license_expression} WITH {exception}"

return spdx_licensing.parse(expression)


def parse_license_exception(exception_node: Identifier, graph: Graph) -> str:
def parse_license_exception(exception_node: Identifier, graph: Graph, logger) -> str:
if exception_node.startswith(LICENSE_NAMESPACE):
exception = remove_prefix(exception_node, LICENSE_NAMESPACE)
else:
exception = graph.value(exception_node, SPDX_NAMESPACE.licenseExceptionId).toPython()
exception = get_value_from_graph(logger, graph, exception_node, SPDX_NAMESPACE.licenseExceptionId).toPython()
return exception
23 changes: 16 additions & 7 deletions src/spdx/parser/rdf/package_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Optional

from rdflib import DOAP, RDFS, Graph, URIRef
from rdflib.term import BNode

from spdx.datetime_conversions import datetime_from_str
from spdx.model.package import (
Expand All @@ -18,7 +19,9 @@
from spdx.parser.parsing_functions import construct_or_raise_parsing_error, raise_parsing_error_if_logger_has_messages
from spdx.parser.rdf.checksum_parser import parse_checksum
from spdx.parser.rdf.graph_parsing_functions import (
get_correctly_typed_triples,
get_correctly_typed_value,
get_value_from_graph,
parse_enum_value,
parse_literal,
parse_literal_or_no_assertion_or_none,
Expand All @@ -36,7 +39,7 @@ def parse_package(package_node: URIRef, graph: Graph, doc_namespace: str) -> Pac
logger, graph, package_node, SPDX_NAMESPACE.downloadLocation
)
checksums = []
for _, _, checksum_node in graph.triples((package_node, SPDX_NAMESPACE.checksum, None)):
for _, _, checksum_node in get_correctly_typed_triples(logger, graph, package_node, SPDX_NAMESPACE.checksum):
checksums.append(parse_checksum(checksum_node, graph))

version_info = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.versionInfo)
Expand All @@ -57,28 +60,34 @@ def parse_package(package_node: URIRef, graph: Graph, doc_namespace: str) -> Pac
)

external_package_refs = []
for _, _, external_package_ref_node in graph.triples((package_node, SPDX_NAMESPACE.externalRef, None)):
for _, _, external_package_ref_node in get_correctly_typed_triples(
logger, graph, package_node, SPDX_NAMESPACE.externalRef
):
external_package_refs.append(parse_external_package_ref(external_package_ref_node, graph, doc_namespace))
files_analyzed = bool(graph.value(package_node, SPDX_NAMESPACE.filesAnalyzed, default=True))
files_analyzed = bool(
get_value_from_graph(logger, graph, package_node, SPDX_NAMESPACE.filesAnalyzed, default=True)
)
license_concluded = parse_literal_or_no_assertion_or_none(
logger,
graph,
package_node,
SPDX_NAMESPACE.licenseConcluded,
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace),
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
)
license_declared = parse_literal_or_no_assertion_or_none(
logger,
graph,
package_node,
SPDX_NAMESPACE.licenseDeclared,
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace),
parsing_method=lambda x: parse_license_expression(x, graph, doc_namespace, logger),
)
license_info_from_files = []
for _, _, license_info_from_files_node in graph.triples((package_node, SPDX_NAMESPACE.licenseInfoFromFiles, None)):
license_info_from_files.append(
get_correctly_typed_value(
logger, license_info_from_files_node, lambda x: parse_license_expression(x, graph, doc_namespace)
logger,
license_info_from_files_node,
lambda x: parse_license_expression(x, graph, doc_namespace, logger),
)
)
license_comment = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.licenseComments)
Expand Down Expand Up @@ -161,7 +170,7 @@ def parse_package_verification_code(
return package_verification_code


def parse_external_package_ref(external_package_ref_node: URIRef, graph: Graph, doc_namespace) -> ExternalPackageRef:
def parse_external_package_ref(external_package_ref_node: BNode, graph: Graph, doc_namespace) -> ExternalPackageRef:
logger = Logger()
ref_locator = parse_literal(logger, graph, external_package_ref_node, SPDX_NAMESPACE.referenceLocator)
ref_category = parse_literal(
Expand Down
Loading