Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SBOMs generation for Windows artifacts #100

Merged
merged 11 commits into from
Apr 10, 2024
183 changes: 143 additions & 40 deletions sbom.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@

"""

import argparse
import datetime
import hashlib
import io
import json
import os
import pathlib
import re
import subprocess
import sys
Expand Down Expand Up @@ -316,38 +318,20 @@ def create_pip_sbom_from_wheel(
)


def create_sbom_for_source_tarball(tarball_path: str):
"""Stitches together an SBOM for a source tarball"""
tarball_name = os.path.basename(tarball_path)

# Open the tarball with known compression settings.
if tarball_name.endswith(".tgz"):
tarball = tarfile.open(tarball_path, mode="r:gz")
elif tarball_name.endswith(".tar.xz"):
tarball = tarfile.open(tarball_path, mode="r:xz")
else:
raise ValueError(f"Unknown tarball format: '{tarball_name}'")
def create_cpython_sbom(
sbom_data: dict[str, typing.Any],
cpython_version: str,
artifact_path: str,
):
"""Creates the top-level SBOM metadata and the CPython SBOM package."""

# Parse the CPython version from the tarball.
# Calculate the download locations from the CPython version and tarball name.
cpython_version = re.match(r"^Python-([0-9abrc.]+)\.t", tarball_name).group(1)
cpython_version_without_suffix = re.match(r"^([0-9.]+)", cpython_version).group(1)
tarball_download_location = f"https://www.python.org/ftp/python/{cpython_version_without_suffix}/{tarball_name}"
artifact_name = os.path.basename(artifact_path)
artifact_download_location = f"https://www.python.org/ftp/python/{cpython_version_without_suffix}/{artifact_name}"

# Take a hash of the tarball
with open(tarball_path, mode="rb") as f:
tarball_checksum_sha256 = hashlib.sha256(f.read()).hexdigest()

# There should be an SBOM included in the tarball.
# If there's not we can't create an SBOM.
try:
sbom_tarball_member = tarball.getmember(f"Python-{cpython_version}/Misc/sbom.spdx.json")
except KeyError:
raise ValueError(
"Tarball doesn't contain an SBOM at 'Misc/sbom.spdx.json'"
) from None
sbom_bytes = tarball.extractfile(sbom_tarball_member).read()
sbom_data = json.loads(sbom_bytes)
# Take a hash of the artifact
with open(artifact_path, mode="rb") as f:
artifact_checksum_sha256 = hashlib.sha256(f.read()).hexdigest()

sbom_data.update({
"SPDXID": "SPDXRef-DOCUMENT",
Expand All @@ -356,7 +340,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
"dataLicense": "CC0-1.0",
# Naming done according to OpenSSF SBOM WG recommendations.
# See: https://github.com/ossf/sbom-everywhere/blob/main/reference/sbom_naming.md
"documentNamespace": f"{tarball_download_location}.spdx.json",
"documentNamespace": f"{artifact_download_location}.spdx.json",
"creationInfo": {
"created": (
datetime.datetime.now(tz=datetime.timezone.utc)
Expand All @@ -381,7 +365,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
"licenseConcluded": "PSF-2.0",
"originator": "Organization: Python Software Foundation",
"supplier": "Organization: Python Software Foundation",
"packageFileName": tarball_name,
"packageFileName": artifact_name,
"externalRefs": [
{
"referenceCategory": "SECURITY",
Expand All @@ -390,8 +374,8 @@ def create_sbom_for_source_tarball(tarball_path: str):
}
],
"primaryPackagePurpose": "SOURCE",
"downloadLocation": tarball_download_location,
"checksums": [{"algorithm": "SHA256", "checksumValue": tarball_checksum_sha256}],
"downloadLocation": artifact_download_location,
"checksums": [{"algorithm": "SHA256", "checksumValue": artifact_checksum_sha256}],
}

# The top-level CPython package depends on every vendored sub-package.
Expand All @@ -404,6 +388,37 @@ def create_sbom_for_source_tarball(tarball_path: str):

sbom_data["packages"].append(sbom_cpython_package)


def create_sbom_for_source_tarball(tarball_path: str):
"""Stitches together an SBOM for a source tarball"""
tarball_name = os.path.basename(tarball_path)

# Open the tarball with known compression settings.
if tarball_name.endswith(".tgz"):
tarball = tarfile.open(tarball_path, mode="r:gz")
elif tarball_name.endswith(".tar.xz"):
tarball = tarfile.open(tarball_path, mode="r:xz")
else:
raise ValueError(f"Unknown tarball format: '{tarball_name}'")

# Parse the CPython version from the tarball.
# Calculate the download locations from the CPython version and tarball name.
cpython_version = re.match(r"^Python-([0-9abrc.]+)\.t", tarball_name).group(1)

# There should be an SBOM included in the tarball.
# If there's not we can't create an SBOM.
try:
sbom_tarball_member = tarball.getmember(f"Python-{cpython_version}/Misc/sbom.spdx.json")
except KeyError:
raise ValueError(
"Tarball doesn't contain an SBOM at 'Misc/sbom.spdx.json'"
) from None
sbom_bytes = tarball.extractfile(sbom_tarball_member).read()
sbom_data = json.loads(sbom_bytes)

create_cpython_sbom(sbom_data, cpython_version=cpython_version, artifact_path=tarball_path)
sbom_cpython_package_spdx_id = spdx_id("SPDXRef-PACKAGE-cpython")

# Find the pip wheel in ensurepip in the tarball
for member in tarball.getmembers():
match = re.match(rf"^Python-{cpython_version}/Lib/ensurepip/_bundled/(pip-.*\.whl)$", member.name)
Expand Down Expand Up @@ -487,7 +502,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
)
sbom_data["relationships"].append(
{
"spdxElementId": sbom_cpython_package["SPDXID"],
"spdxElementId": sbom_cpython_package_spdx_id,
"relatedSpdxElement": sbom_file_spdx_id,
"relationshipType": "CONTAINS",
}
Expand All @@ -505,7 +520,7 @@ def create_sbom_for_source_tarball(tarball_path: str):
sbom_data["relationships"].append(
{
"spdxElementId": "SPDXRef-DOCUMENT",
"relatedSpdxElement": sbom_cpython_package["SPDXID"],
"relatedSpdxElement": sbom_cpython_package_spdx_id,
"relationshipType": "DESCRIBES",
}
)
Expand All @@ -519,16 +534,104 @@ def create_sbom_for_source_tarball(tarball_path: str):
# Calculate the 'packageVerificationCode' values for files in packages.
calculate_package_verification_codes(sbom_data)

# Normalize SBOM structures for reproducibility.
normalize_sbom_data(sbom_data)
return sbom_data


def create_sbom_for_windows_artifact(artifact_path, cpython_source_dir: str):
artifact_name = os.path.basename(artifact_path)
cpython_version = re.match(r"^python-([0-9abrc.]+)(?:-|\.exe|\.zip)", artifact_name).group(1)

if not cpython_source_dir:
raise ValueError("Must specify --cpython-source-dir for Windows artifacts")
cpython_source_dir = pathlib.Path(cpython_source_dir)

# Start with the CPython source SBOM as a base
with (cpython_source_dir / "Misc/externals.spdx.json").open() as f:
sbom_data = json.loads(f.read())

sbom_data["relationships"] = []
sbom_data["files"] = []

# Add all the packages from the source SBOM
# We want to skip the file information because
# the files aren't available in Windows artifacts.
with (cpython_source_dir / "Misc/sbom.spdx.json").open() as f:
source_sbom_data = json.loads(f.read())
for sbom_package in source_sbom_data["packages"]:
sbom_data["packages"].append(sbom_package)

create_cpython_sbom(
sbom_data,
cpython_version=cpython_version,
artifact_path=artifact_path
)
sbom_cpython_package_spdx_id = spdx_id("SPDXRef-PACKAGE-cpython")

# The Windows embed artifacts don't contain pip/ensurepip,
# but the MSI artifacts do. Add pip for MSI installers.
if artifact_name.endswith(".exe"):

# Find the pip wheel in ensurepip in the source code
for pathname in os.listdir(cpython_source_dir / "Lib/ensurepip/_bundled"):
if pathname.startswith("pip-") and pathname.endswith(".whl"):
pip_wheel_filename = pathname
pip_wheel_bytes = (cpython_source_dir / f"Lib/ensurepip/_bundled/{pathname}").read_bytes()
break
else:
raise ValueError("Could not find pip wheel in 'Lib/ensurepip/_bundled/...'")

create_pip_sbom_from_wheel(
sbom_data,
pip_wheel_filename=pip_wheel_filename,
pip_wheel_bytes=pip_wheel_bytes,
)

# Final relationship, this SBOM describes the CPython package.
sbom_data["relationships"].append(
{
"spdxElementId": "SPDXRef-DOCUMENT",
"relatedSpdxElement": sbom_cpython_package_spdx_id,
"relationshipType": "DESCRIBES",
}
)

# Apply the 'supplier' tag to every package since we're shipping
# the package in the artifact itself. Originator field is used for maintainers.
for sbom_package in sbom_data["packages"]:
sbom_package["supplier"] = "Organization: Python Software Foundation"
# Source packages have been compiled.
if sbom_package["primaryPackagePurpose"] == "SOURCE":
sbom_package["primaryPackagePurpose"] = "LIBRARY"

return sbom_data


def main() -> None:
tarball_path = sys.argv[1]
sbom_data = create_sbom_for_source_tarball(tarball_path)
print(json.dumps(sbom_data, indent=2, sort_keys=True))
parser = argparse.ArgumentParser()
parser.add_argument("--cpython-source-dir", default=None)
parser.add_argument("artifacts", nargs="+")
parsed_args = parser.parse_args(sys.argv[1:])

artifact_paths = parsed_args.artifacts
cpython_source_dir = parsed_args.cpython_source_dir

for artifact_path in artifact_paths:
# Windows MSI and Embed artifacts
if artifact_path.endswith(".exe") or artifact_path.endswith(".zip"):
sbom_data = create_sbom_for_windows_artifact(
artifact_path,
cpython_source_dir=cpython_source_dir
)
# Source artifacts
else:
sbom_data = create_sbom_for_source_tarball(artifact_path)

# Normalize SBOM data for reproducibility.
normalize_sbom_data(sbom_data)
with open(artifact_path + ".spdx.json", mode="w") as f:
f.truncate()
f.write(json.dumps(sbom_data, indent=2, sort_keys=True))


if __name__ == "__main__":
main()
Empty file added tests/fake-artifact.txt
Empty file.
81 changes: 80 additions & 1 deletion tests/test_sbom.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import pathlib
import json
import random
import hashlib
import re
import unittest.mock

import pytest
Expand Down Expand Up @@ -65,7 +67,6 @@ def test_normalization():


def test_fetch_project_metadata_from_pypi(mocker):

mock_urlopen = mocker.patch("sbom.urlopen")
mock_urlopen.return_value = unittest.mock.Mock()

Expand Down Expand Up @@ -116,3 +117,81 @@ def test_fetch_project_metadata_from_pypi(mocker):

assert download_url == "https://files.pythonhosted.org/packages/.../pip-24.0.tar.gz"
assert checksum_sha256 == "ea9bd1a847e8c5774a5777bb398c19e80bcd4e2aa16a4b301b718fe6f593aba2"


def test_create_cpython_sbom():
sbom_data = {"packages": []}

artifact_path = str(pathlib.Path(__file__).parent / "fake-artifact.txt")
sbom.create_cpython_sbom(
sbom_data, cpython_version="3.13.0", artifact_path=artifact_path
)

assert re.fullmatch(
r"^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}Z$",
sbom_data["creationInfo"].pop("created")
)
assert re.fullmatch(
r"^Tool: ReleaseTools-[a-f0-9]+$",
sbom_data["creationInfo"]["creators"].pop(1)
)

assert sbom_data == {
"packages": [
{
"SPDXID": "SPDXRef-PACKAGE-cpython",
"name": "CPython",
"versionInfo": "3.13.0",
"licenseConcluded": "PSF-2.0",
"originator": "Organization: Python Software Foundation",
"supplier": "Organization: Python Software Foundation",
"packageFileName": "fake-artifact.txt",
"externalRefs": [
{
"referenceCategory": "SECURITY",
"referenceLocator": "cpe:2.3:a:python:python:3.13.0:*:*:*:*:*:*:*",
"referenceType": "cpe23Type",
}
],
"primaryPackagePurpose": "SOURCE",
"downloadLocation": "https://www.python.org/ftp/python/3.13.0/fake-artifact.txt",
"checksums": [
{
"algorithm": "SHA256",
"checksumValue": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
}
],
}
],
"SPDXID": "SPDXRef-DOCUMENT",
"spdxVersion": "SPDX-2.3",
"name": "CPython SBOM",
"dataLicense": "CC0-1.0",
"documentNamespace": "https://www.python.org/ftp/python/3.13.0/fake-artifact.txt.spdx.json",
"creationInfo": {
"creators": [
"Person: Python Release Managers",
],
"licenseListVersion": "3.22",
},
}


@pytest.mark.parametrize(
["cpython_version", "download_location"],
[
("3.13.0", "https://www.python.org/ftp/python/3.13.0/fake-artifact.txt"),
("3.11.0a1", "https://www.python.org/ftp/python/3.11.0/fake-artifact.txt"),
("3.12.0b2", "https://www.python.org/ftp/python/3.12.0/fake-artifact.txt"),
("3.13.0rc3", "https://www.python.org/ftp/python/3.13.0/fake-artifact.txt"),
]
)
def test_create_cpython_sbom_pre_release_download_location(cpython_version, download_location):
sbom_data = {"packages": []}

artifact_path = str(pathlib.Path(__file__).parent / "fake-artifact.txt")
sbom.create_cpython_sbom(
sbom_data, cpython_version=cpython_version, artifact_path=artifact_path
)

assert sbom_data["packages"][0]["downloadLocation"] == download_location
23 changes: 23 additions & 0 deletions windows-release/msi-steps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,29 @@ steps:
*.cab
*.exe

- powershell: >
& $(Python)
"$(Build.SourcesDirectory)\sbom.py"
"--cpython-source-dir=$(Build.SourcesDirectory)"
$(gci -r "$(Build.ArtifactStagingDirectory)\msi\**\python-*.exe")
workingDirectory: $(Build.BinariesDirectory)
displayName: 'Create SBOMs for binaries'

- task: CopyFiles@2
displayName: 'Layout Artifact: sbom'
inputs:
sourceFolder: $(Build.ArtifactStagingDirectory)\msi
targetFolder: $(Build.ArtifactStagingDirectory)\sbom
flatten: true
contents: |
**\*.spdx.json

- publish: '$(Build.ArtifactStagingDirectory)\msi'
artifact: msi
displayName: 'Publish MSI'

- task: PublishBuildArtifacts@1
displayName: 'Publish Artifact: sbom_msi_$(Name)'
inputs:
PathtoPublish: '$(Build.ArtifactStagingDirectory)\sbom'
ArtifactName: sbom_msi_$(Name)
Loading