Skip to content

Commit

Permalink
[issue-650] raise ParsingError if elements don't have an SPDXID
Browse files Browse the repository at this point in the history
Signed-off-by: Meret Behrens <meret.behrens@tngtech.com>
  • Loading branch information
meretp committed May 15, 2023
1 parent 6a7e500 commit 0f41ee0
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 10 deletions.
6 changes: 4 additions & 2 deletions src/spdx_tools/spdx/parser/rdf/file_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from rdflib import RDFS, Graph, URIRef
from typing import Union

from rdflib import RDFS, BNode, Graph, URIRef

from spdx_tools.spdx.model import File, FileType
from spdx_tools.spdx.parser.logger import Logger
Expand All @@ -23,7 +25,7 @@
from spdx_tools.spdx.rdfschema.namespace import SPDX_NAMESPACE


def parse_file(file_node: URIRef, graph: Graph, doc_namespace: str) -> File:
def parse_file(file_node: Union[URIRef, BNode], graph: Graph, doc_namespace: str) -> File:
logger = Logger()
spdx_id = parse_spdx_id(file_node, doc_namespace, graph)
name = parse_literal(logger, graph, file_node, SPDX_NAMESPACE.fileName)
Expand Down
4 changes: 2 additions & 2 deletions src/spdx_tools/spdx/parser/rdf/graph_parsing_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ def parse_enum_value(enum_str: str, enum_class: Type[Enum], prefix: str) -> Enum
raise SPDXParsingError([f"Invalid value for {enum_class}: {enum_str}"])


def parse_spdx_id(resource: URIRef, doc_namespace: str, graph: Graph) -> Optional[str]:
if not resource:
def parse_spdx_id(resource: Union[URIRef, BNode], doc_namespace: str, graph: Graph) -> Optional[str]:
if not resource or isinstance(resource, BNode):
return None
if resource.startswith(f"{doc_namespace}#"):
return resource.fragment
Expand Down
5 changes: 2 additions & 3 deletions src/spdx_tools/spdx/parser/rdf/package_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from typing import Optional
from typing import Optional, Union

from rdflib import DOAP, RDFS, Graph, URIRef
from rdflib.term import BNode
Expand Down Expand Up @@ -34,7 +34,7 @@
from spdx_tools.spdx.rdfschema.namespace import REFERENCE_NAMESPACE, SPDX_NAMESPACE


def parse_package(package_node: URIRef, graph: Graph, doc_namespace: str) -> Package:
def parse_package(package_node: Union[URIRef, BNode], graph: Graph, doc_namespace: str) -> Package:
logger = Logger()
spdx_id = parse_spdx_id(package_node, doc_namespace, graph)
name = parse_literal(logger, graph, package_node, SPDX_NAMESPACE.name)
Expand Down Expand Up @@ -120,7 +120,6 @@ def parse_package(package_node: URIRef, graph: Graph, doc_namespace: str) -> Pac
valid_until_date = parse_literal(
logger, graph, package_node, SPDX_NAMESPACE.validUntilDate, parsing_method=datetime_from_str
)

raise_parsing_error_if_logger_has_messages(logger, "Package")
package = construct_or_raise_parsing_error(
Package,
Expand Down
4 changes: 2 additions & 2 deletions src/spdx_tools/spdx/parser/rdf/snippet_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SPDX-FileCopyrightText: 2023 spdx contributors
#
# SPDX-License-Identifier: Apache-2.0
from typing import Dict, Optional, Tuple
from typing import Dict, Optional, Tuple, Union

from rdflib import RDF, RDFS, Graph
from rdflib.exceptions import UniquenessError
Expand All @@ -27,7 +27,7 @@
from spdx_tools.spdx.rdfschema.namespace import POINTER_NAMESPACE, SPDX_NAMESPACE


def parse_snippet(snippet_node: URIRef, graph: Graph, doc_namespace: str) -> Snippet:
def parse_snippet(snippet_node: Union[URIRef, BNode], graph: Graph, doc_namespace: str) -> Snippet:
logger = Logger()
spdx_id = parse_spdx_id(snippet_node, doc_namespace, graph)
file_spdx_id_uri = get_value_from_graph(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF
xmlns:spdx="http://spdx.org/rdf/terms#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:ptr="http://www.w3.org/2009/pointers#"
>
<spdx:SpdxDocument rdf:about="https://some.namespace#SPDXRef-DOCUMENT">
<rdfs:comment>documentComment</rdfs:comment>
<spdx:name>documentName</spdx:name>
<spdx:dataLicense rdf:resource="http://spdx.org/licenses/CC0-1.0"/>
<spdx:creationInfo>
<spdx:CreationInfo rdf:nodeID="N80af30c2c55b44afb3a7cf4124ed9aff">
<spdx:licenseListVersion>3.19</spdx:licenseListVersion>
<spdx:created>2022-12-01T00:00:00Z</spdx:created>
</spdx:CreationInfo>
</spdx:creationInfo>
</spdx:SpdxDocument>
<spdx:Package>
<spdx:name>packageName</spdx:name>
<spdx:downloadLocation>http://differentdownload.com</spdx:downloadLocation>
</spdx:Package>
<spdx:File>
<spdx:checksum>
<spdx:Checksum rdf:nodeID="Nab58c45c3b81449e9d4fbcb81940ceee">
<spdx:algorithm rdf:resource="http://spdx.org/rdf/terms#checksumAlgorithm_sha1"/>
<spdx:checksumValue>71c4025dd9897b364f3ebbb42c484ff43d00791c</spdx:checksumValue>
</spdx:Checksum>
</spdx:checksum>
<spdx:fileName>./fileName.py</spdx:fileName>
</spdx:File>
<spdx:Snippet>
<spdx:range>
<ptr:StartEndPointer rdf:nodeID="N77e7b08564a0412a9413b90ff9b3ddd9">
<ptr:startPointer>
<ptr:ByteOffsetPointer rdf:nodeID="N89d93c1292de424f9aa75ee1e297ede9">
<ptr:offset rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">1</ptr:offset>
<ptr:reference rdf:resource="https://some.namespace#SPDXRef-File"/>
</ptr:ByteOffsetPointer>
</ptr:startPointer>
<ptr:endPointer>
<ptr:ByteOffsetPointer rdf:nodeID="N7099800c04534fcc8a998b551fee34ce">
<ptr:reference rdf:resource="https://some.namespace#SPDXRef-File"/>
<ptr:offset rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">2</ptr:offset>
</ptr:ByteOffsetPointer>
</ptr:endPointer>
</ptr:StartEndPointer>
</spdx:range>
<spdx:snippetFromFile/>
</spdx:Snippet>
</rdf:RDF>


19 changes: 18 additions & 1 deletion tests/spdx/parser/rdf/test_file_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
import os
from unittest import TestCase

import pytest
from license_expression import get_spdx_licensing
from rdflib import RDF, Graph, URIRef
from rdflib import RDF, BNode, Graph, URIRef

from spdx_tools.spdx.model import Checksum, ChecksumAlgorithm, FileType, SpdxNoAssertion
from spdx_tools.spdx.parser.error import SPDXParsingError
from spdx_tools.spdx.parser.rdf.file_parser import parse_file
from spdx_tools.spdx.rdfschema.namespace import SPDX_NAMESPACE

Expand Down Expand Up @@ -35,3 +37,18 @@ def test_parse_file():
assert file.license_comment == "licenseComment"
assert file.notice == "fileNotice"
assert file.attribution_texts == ["fileAttributionText"]


def test_parse_invalid_file():
graph = Graph().parse(os.path.join(os.path.dirname(__file__), "data/invalid_documents/file_without_spdx_ids.xml"))
file_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.File)
doc_namespace = "https://some.namespace"

assert isinstance(file_node, BNode)
with pytest.raises(SPDXParsingError) as err:
parse_file(file_node, graph, doc_namespace)

assert err.value.get_messages() == [
"Error while constructing File: ['SetterError File: type of argument "
'"spdx_id" must be str; got NoneType instead: None\']'
]
16 changes: 16 additions & 0 deletions tests/spdx/parser/rdf/test_package_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
PackageVerificationCode,
SpdxNoAssertion,
)
from spdx_tools.spdx.parser.error import SPDXParsingError
from spdx_tools.spdx.parser.rdf.package_parser import parse_external_package_ref, parse_package
from spdx_tools.spdx.rdfschema.namespace import SPDX_NAMESPACE

Expand Down Expand Up @@ -95,3 +96,18 @@ def test_external_package_ref_parser(download_location, category, locator, type,
assert external_package_ref.locator == locator
assert external_package_ref.reference_type == type
assert external_package_ref.comment == comment


def test_parse_invalid_package():
graph = Graph().parse(os.path.join(os.path.dirname(__file__), "data/invalid_documents/file_without_spdx_ids.xml"))
package_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.Package)
doc_namespace = "https://some.namespace"

assert isinstance(package_node, BNode)
with pytest.raises(SPDXParsingError) as err:
parse_package(package_node, graph, doc_namespace)

assert err.value.get_messages() == [
"Error while constructing Package: ['SetterError Package: type of argument "
'"spdx_id" must be str; got NoneType instead: None\']'
]
16 changes: 16 additions & 0 deletions tests/spdx/parser/rdf/test_snippet_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,19 @@ def add_range_to_graph_helper(graph, predicate_value_class_member):
graph.add((pointer_node, RDF.type, pointer_class))
graph.add((start_end_pointer, predicate, pointer_node))
graph.add((pointer_node, pointer_member, Literal(value)))


def test_parse_invalid_file():
graph = Graph().parse(os.path.join(os.path.dirname(__file__), "data/invalid_documents/file_without_spdx_ids.xml"))
snippet_node = graph.value(predicate=RDF.type, object=SPDX_NAMESPACE.Snippet)
doc_namespace = "https://some.namespace"

assert isinstance(snippet_node, BNode)
with pytest.raises(SPDXParsingError) as err:
parse_snippet(snippet_node, graph, doc_namespace)

assert err.value.get_messages() == [
"Error while constructing Snippet: ['SetterError Snippet: type of argument "
"\"spdx_id\" must be str; got NoneType instead: None', 'SetterError Snippet: "
'type of argument "file_spdx_id" must be str; got NoneType instead: None\']'
]

0 comments on commit 0f41ee0

Please sign in to comment.