diff --git a/README.md b/README.md index a56cc8f3e..3aa762980 100644 --- a/README.md +++ b/README.md @@ -38,9 +38,11 @@ This library implements SPDX parsers, convertors, validators and handlers in Pyt # Features -* API to create and manipulate SPDX v2.2 and v2.3 documents. +* API to create and manipulate SPDX v2.2 and v2.3 documents * Parse, convert, create and validate SPDX files * supported formats: Tag/Value, RDF, JSON, YAML, XML +* visualize the structure of a SPDX document by creating an `AGraph`. Note: This is an optional feature and requires +additional installation of optional dependencies # Planned features @@ -78,6 +80,18 @@ instead of `bin`. * For help use `pyspdxtools --help` +3. **GRAPH GENERATION** (optional feature) + +* This feature generates a graph representing all elements in the SPDX document and their connections based on the provided + relationships. The graph can be rendered to a picture. Below is an example for the file `tests/data/formats/SPDXJSONExample-v2.3.spdx.json`: +![SPDXJSONExample-v2.3.spdx.png](assets/SPDXJSONExample-v2.3.spdx.png) +* Make sure you install the optional dependencies `networkx` and `pygraphviz`. To do so run `pip install ".[graph_generation]"`. +* Use `pyspdxtools -i --graph -o ` where `` is an output file name with valid format for `pygraphviz` (check + the documentation [here](https://pygraphviz.github.io/documentation/stable/reference/agraph.html#pygraphviz.AGraph.draw)). +* If you are using a source distribution, try running + `pyspdxtools -i tests/data/formats/SPDXJSONExample-v2.3.spdx.json --graph -o SPDXJSONExample-v2.3.spdx.png` to generate + a png with an overview of the structure of the example file. + ## Library usage 1. **DATA MODEL** * The `src.spdx.model` package constitutes the internal SPDX v2.3 data model (v2.2 is a simply a subset of this). diff --git a/assets/SPDXJSONExample-v2.3.spdx.png b/assets/SPDXJSONExample-v2.3.spdx.png new file mode 100644 index 000000000..1d050a166 Binary files /dev/null and b/assets/SPDXJSONExample-v2.3.spdx.png differ diff --git a/pyproject.toml b/pyproject.toml index b09e2d28e..338339c31 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dynamic = ["version"] [project.optional-dependencies] test = ["pytest"] code_style = ["isort", "black", "flake8"] +graph_generation = ["pygraphviz", "networkx"] [project.scripts] pyspdxtools = "spdx.clitools.pyspdxtools:main" diff --git a/src/spdx/clitools/pyspdxtools.py b/src/spdx/clitools/pyspdxtools.py index e5ee38166..e2461767d 100644 --- a/src/spdx/clitools/pyspdxtools.py +++ b/src/spdx/clitools/pyspdxtools.py @@ -18,6 +18,7 @@ import click +from spdx.graph_generation import export_graph_from_document from spdx.model.document import Document from spdx.parser.error import SPDXParsingError from spdx.parser.parse_anything import parse_file @@ -32,7 +33,8 @@ @click.option( "--outfile", "-o", - help="The file to write the converted document to (write a dash for output to stdout or omit for no conversion).", + help="The file to write the converted document to (write a dash for output to stdout or omit for no conversion). " + "If you add the option --graph to the command the generated graph will be written to this file.", ) @click.option( "--version", @@ -41,7 +43,15 @@ default=None, ) @click.option("--novalidation", is_flag=True, help="Don't validate the provided document.") -def main(infile: str, outfile: str, version: str, novalidation: bool): +@click.option( + "--graph", + is_flag=True, + default=False, + help="Generate a relationship graph from the input file. " + "The generated graph is saved to the file specified with --outfile. " + "Note: You need to install the optional dependencies 'networkx' and 'pygraphviz' for this feature.", +) +def main(infile: str, outfile: str, version: str, novalidation: bool, graph: bool): """ CLI-tool for validating SPDX documents and converting between RDF, TAG-VALUE, JSON, YAML and XML formats. Formats are determined by the file endings. @@ -50,9 +60,6 @@ def main(infile: str, outfile: str, version: str, novalidation: bool): try: document: Document = parse_file(infile) - if outfile == "-": - tagvalue_writer.write_document(document, sys.stdout) - if not novalidation: if not version: version = document.creation_info.spdx_version @@ -72,7 +79,20 @@ def main(infile: str, outfile: str, version: str, novalidation: bool): else: logging.info("The document is valid.") - if outfile and outfile != "-": + if outfile == "-": + tagvalue_writer.write_document(document, sys.stdout) + + elif graph: + try: + export_graph_from_document(document, outfile) + except ImportError: + logging.error( + "To be able to draw a relationship graph of the parsed document " + "you need to install 'networkx' and 'pygraphviz'. Run 'pip install \".[graph_generation]\"'." + ) + sys.exit(1) + + elif outfile: write_file(document, outfile, validate=False) except NotImplementedError as err: diff --git a/src/spdx/document_utils.py b/src/spdx/document_utils.py index 73c603feb..a050ee5a2 100644 --- a/src/spdx/document_utils.py +++ b/src/spdx/document_utils.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: 2022 spdx contributors # # SPDX-License-Identifier: Apache-2.0 -from typing import List, Union +from typing import Dict, List, Union from spdx.model.document import Document from spdx.model.file import File @@ -17,9 +17,15 @@ def get_contained_spdx_element_ids(document: Document) -> List[str]: def get_element_from_spdx_id(document: Document, spdx_id: str) -> Union[Package, File, Snippet, None]: - elements = [file_ for file_ in document.files] - elements.extend([package_ for package_ in document.packages]) - elements.extend([snippet_ for snippet_ in document.snippets]) - for element in elements: - if element.spdx_id == spdx_id: - return element + contained_spdx_elements: Dict[str, Union[Package, File, Snippet]] = get_contained_spdx_elements(document) + if spdx_id not in contained_spdx_elements: + return None + return contained_spdx_elements[spdx_id] + + +def get_contained_spdx_elements(document: Document) -> Dict[str, Union[Package, File, Snippet]]: + contained_spdx_elements = {package.spdx_id: package for package in document.packages} + contained_spdx_elements.update({file.spdx_id: file for file in document.files}) + contained_spdx_elements.update({snippet.spdx_id: snippet for snippet in document.snippets}) + + return contained_spdx_elements diff --git a/src/spdx/graph_generation.py b/src/spdx/graph_generation.py new file mode 100644 index 000000000..40d673315 --- /dev/null +++ b/src/spdx/graph_generation.py @@ -0,0 +1,76 @@ +# SPDX-FileCopyrightText: 2023 spdx contributors +# +# SPDX-License-Identifier: Apache-2.0 +from typing import Dict, List, Union + +from spdx.model.file import File +from spdx.model.package import Package +from spdx.model.snippet import Snippet + +try: + from networkx import DiGraph +except ImportError: + DiGraph = None +from spdx.document_utils import get_contained_spdx_elements +from spdx.model.document import Document +from spdx.model.relationship import Relationship + + +def export_graph_from_document(document: Document, file_name: str) -> None: + from networkx.drawing import nx_agraph + + graph = generate_relationship_graph_from_spdx(document) + _color_nodes(graph) + attributes_graph = nx_agraph.to_agraph(graph) # convert to a pygraphviz graph + attributes_graph.draw(file_name, prog="dot") + + +def generate_relationship_graph_from_spdx(document: Document) -> DiGraph: + from networkx import DiGraph + + graph = DiGraph() + graph.add_node(document.creation_info.spdx_id, element=document.creation_info) + + contained_elements: Dict[str, Union[Package, File, Snippet]] = get_contained_spdx_elements(document) + contained_element_nodes = [(spdx_id, {"element": element}) for spdx_id, element in contained_elements.items()] + graph.add_nodes_from(contained_element_nodes) + + relationships_by_spdx_id: Dict[str, List[Relationship]] = dict() + for relationship in document.relationships: + relationships_by_spdx_id.setdefault(relationship.spdx_element_id, []).append(relationship) + + for spdx_id, relationships in relationships_by_spdx_id.items(): + if spdx_id not in graph.nodes(): + # this will add any external spdx_id to the graph where we have no further information about the element, + # to indicate that this node represents an element we add the attribute "element" + graph.add_node(spdx_id, element=None) + for relationship in relationships: + relationship_node_key = relationship.spdx_element_id + "_" + relationship.relationship_type.name + graph.add_node(relationship_node_key, comment=relationship.comment) + graph.add_edge(relationship.spdx_element_id, relationship_node_key) + # if the related spdx element is SpdxNone or SpdxNoAssertion we need a type conversion + related_spdx_element_id = str(relationship.related_spdx_element_id) + + if related_spdx_element_id not in graph.nodes(): + # this will add any external spdx_id to the graph where we have no further information about + # the element, to indicate that this node represents an element we add the attribute "element" + graph.add_node( + related_spdx_element_id, + element=None, + ) + graph.add_edge(relationship_node_key, related_spdx_element_id) + + return graph + + +def _color_nodes(graph: DiGraph) -> None: + for node in graph.nodes(): + if "_" in node: + # nodes representing a RelationshipType are concatenated with the spdx_element_id, + # to only see the RelationshipType when rendering the graph to a picture we add + # a label to these nodes + graph.add_node(node, color="lightgreen", label=node.split("_", 1)[-1]) + elif node == "SPDXRef-DOCUMENT": + graph.add_node(node, color="indianred2") + else: + graph.add_node(node, color="lightskyblue") diff --git a/tests/spdx/test_document_utils.py b/tests/spdx/test_document_utils.py index 873af1a1a..a81a39bd8 100644 --- a/tests/spdx/test_document_utils.py +++ b/tests/spdx/test_document_utils.py @@ -5,7 +5,7 @@ import pytest -from spdx.document_utils import get_contained_spdx_element_ids, get_element_from_spdx_id +from spdx.document_utils import get_contained_spdx_element_ids, get_contained_spdx_elements, get_element_from_spdx_id from tests.spdx.fixtures import document_fixture, file_fixture, package_fixture, snippet_fixture @@ -26,3 +26,11 @@ def test_get_element_from_spdx_id(variables): assert get_element_from_spdx_id(document, file.spdx_id) == file assert get_element_from_spdx_id(document, snippet.spdx_id) == snippet assert get_element_from_spdx_id(document, "unknown_id") is None + + +def test_get_contained_spdx_elements(variables): + document, package, file, snippet = variables + contained_elements = get_contained_spdx_elements(document) + assert contained_elements[package.spdx_id] == package + assert contained_elements[file.spdx_id] == file + assert contained_elements[snippet.spdx_id] == snippet diff --git a/tests/spdx/test_graph_generation.py b/tests/spdx/test_graph_generation.py new file mode 100644 index 000000000..6c28d1d41 --- /dev/null +++ b/tests/spdx/test_graph_generation.py @@ -0,0 +1,155 @@ +# SPDX-FileCopyrightText: 2023 spdx contributors +# +# SPDX-License-Identifier: Apache-2.0 +from pathlib import Path +from typing import List +from unittest import TestCase + +import pytest + +from spdx.graph_generation import generate_relationship_graph_from_spdx +from spdx.model.document import Document +from spdx.model.relationship import Relationship, RelationshipType +from spdx.parser.parse_anything import parse_file +from tests.spdx.fixtures import document_fixture, file_fixture, package_fixture + +try: + import networkx # noqa: F401 + import pygraphviz # noqa: F401 +except ImportError: + pytest.skip("Skip this module as the tests need optional dependencies to run.", allow_module_level=True) + + +@pytest.mark.parametrize( + "file_name, nodes_count, edges_count, relationship_node_keys", + [ + ( + "SPDXJSONExample-v2.3.spdx.json", + 22, + 22, + ["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"], + ), + ( + "SPDXJSONExample-v2.2.spdx.json", + 20, + 19, + ["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"], + ), + ( + "SPDXRdfExample-v2.3.spdx.rdf.xml", + 22, + 22, + ["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"], + ), + ( + "SPDXRdfExample-v2.2.spdx.rdf.xml", + 20, + 17, + ["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"], + ), + ( + "SPDXTagExample-v2.3.spdx", + 22, + 22, + ["SPDXRef-Package_DYNAMIC_LINK", "SPDXRef-JenaLib_CONTAINS"], + ), + ], +) +def test_generate_graph_from_spdx( + file_name: str, + nodes_count: int, + edges_count: int, + relationship_node_keys: List[str], +) -> None: + document = parse_file(str(Path(__file__).resolve().parent.parent / "spdx" / "data" / "formats" / file_name)) + graph = generate_relationship_graph_from_spdx(document) + + assert document.creation_info.spdx_id in graph.nodes() + assert graph.number_of_nodes() == nodes_count + assert graph.number_of_edges() == edges_count + assert "SPDXRef-DOCUMENT_DESCRIBES" in graph.nodes() + for relationship_node_key in relationship_node_keys: + assert relationship_node_key in graph.nodes() + + +def test_complete_connected_graph() -> None: + document = _create_minimal_document() + + graph = generate_relationship_graph_from_spdx(document) + + TestCase().assertCountEqual( + graph.nodes(), + [ + "SPDXRef-DOCUMENT", + "SPDXRef-Package-A", + "SPDXRef-Package-B", + "SPDXRef-File", + "SPDXRef-DOCUMENT_DESCRIBES", + "SPDXRef-Package-A_CONTAINS", + "SPDXRef-Package-B_CONTAINS", + ], + ) + TestCase().assertCountEqual( + graph.edges(), + [ + ("SPDXRef-DOCUMENT", "SPDXRef-DOCUMENT_DESCRIBES"), + ("SPDXRef-DOCUMENT_DESCRIBES", "SPDXRef-Package-A"), + ("SPDXRef-DOCUMENT_DESCRIBES", "SPDXRef-Package-B"), + ("SPDXRef-Package-A", "SPDXRef-Package-A_CONTAINS"), + ("SPDXRef-Package-A_CONTAINS", "SPDXRef-File"), + ("SPDXRef-Package-B", "SPDXRef-Package-B_CONTAINS"), + ("SPDXRef-Package-B_CONTAINS", "SPDXRef-File"), + ], + ) + + +def test_complete_unconnected_graph() -> None: + document = _create_minimal_document() + document.packages += [package_fixture(spdx_id="SPDXRef-Package-C", name="Package without connection to document")] + + graph = generate_relationship_graph_from_spdx(document) + + TestCase().assertCountEqual( + graph.nodes(), + [ + "SPDXRef-DOCUMENT", + "SPDXRef-Package-A", + "SPDXRef-Package-B", + "SPDXRef-File", + "SPDXRef-DOCUMENT_DESCRIBES", + "SPDXRef-Package-A_CONTAINS", + "SPDXRef-Package-B_CONTAINS", + "SPDXRef-Package-C", + ], + ) + TestCase().assertCountEqual( + graph.edges(), + [ + ("SPDXRef-DOCUMENT", "SPDXRef-DOCUMENT_DESCRIBES"), + ("SPDXRef-DOCUMENT_DESCRIBES", "SPDXRef-Package-A"), + ("SPDXRef-DOCUMENT_DESCRIBES", "SPDXRef-Package-B"), + ("SPDXRef-Package-A", "SPDXRef-Package-A_CONTAINS"), + ("SPDXRef-Package-A_CONTAINS", "SPDXRef-File"), + ("SPDXRef-Package-B", "SPDXRef-Package-B_CONTAINS"), + ("SPDXRef-Package-B_CONTAINS", "SPDXRef-File"), + ], + ) + + +def _create_minimal_document() -> Document: + packages = [ + package_fixture(spdx_id="SPDXRef-Package-A", name="Package-A"), + package_fixture(spdx_id="SPDXRef-Package-B", name="Package-B"), + ] + files = [ + file_fixture(spdx_id="SPDXRef-File", name="File"), + ] + relationships = [ + Relationship("SPDXRef-DOCUMENT", RelationshipType.DESCRIBES, "SPDXRef-Package-A"), + Relationship("SPDXRef-DOCUMENT", RelationshipType.DESCRIBES, "SPDXRef-Package-B"), + Relationship("SPDXRef-Package-A", RelationshipType.CONTAINS, "SPDXRef-File"), + Relationship("SPDXRef-Package-B", RelationshipType.CONTAINS, "SPDXRef-File"), + ] + document = document_fixture(packages=packages, files=files, relationships=relationships, snippets=[]) + + return document