diff --git a/README.md b/README.md index 7eca0f8..98723fb 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ Table of Contents * [Generating OmniBOR Docs with Bomtrace3](#Generating-OmniBOR-Docs-with-Bomtrace3) * [Generating OmniBOR Docs with Bomtrace2](#Generating-OmniBOR-Docs-with-Bomtrace2) * [Generating OmniBOR ADGs for Debian or RPM Packages with Bomtrace2](#Generating-OmniBOR-ADGs-for-Debian-or-RPM-Packages-with-Bomtrace2) +* [Generating SPDX Docs](#Generating-SPDX-Docs) * [Reducing Storage of Generated OmniBOR Docs](#Reducing-Storage-of-Generated-OmniBOR-Docs) * [Manipulating OmniBOR Artifact Tree with Grafting and Pruning](#Manipulating-OmniBOR-Artifact-Tree-with-Grafting-and-Pruning) * [Creating Index Database for Debian Source Packages](#Creating-Index-Database-for-Debian-Source-Packages) @@ -473,6 +474,43 @@ lrwxrwxrwx. 1 root root 98 Oct 17 05:28 sysstat-debugsource-11.7.3-6.el8.x86 [root@e8281323a4d6 rpm-src-dir]# ``` +Generating SPDX Docs +-------------------- + +Bomsh is able to generate SPDX SBOM documents for built Debian/RPM packages. +When rebuilding Debian/RPM packages using the bomsh_rebuild_deb.py or +bomsh_rebuild_rpm.py script, adding the --bomsh_spdx option will build the +SPDX documents. + +Also for the bomsh_rebuild_deb.py script, the --deb_build_script can be used +to specify a script file to build the Debian packages, without specifying the +-f or --buildinfo_file option. Note that this script must copy the built +Debian packages and the source tarball files to expected location for later +use by the bomsh_spdx_deb.py script to generate the SPDX documents. +An example bomsh-openosc-deb.sh script file has been provided to illustrate +such Debian-build script. + + $ git clone URL-of-this-git-repo bomsh + $ wget https://vault.centos.org/8-stream/AppStream/Source/SPackages/sysstat-11.7.3-7.el8.src.rpm + $ bomsh/scripts/bomsh_rebuild_rpm.py -c alma+epel-8-x86_64 --docker_image_base almalinux:8 -s sysstat-11.7.3-7.el8.src.rpm -d bomsh/scripts/sample_sysstat_cvedb.json -o outdir --syft_sbom --bomsh_spdx --mock_option="--no-bootstrap-image --define 'packager BOMSH user $(id -un) at $(hostname)'" + $ grep -B1 -A3 CVElist outdir/bomsher_out/bomsh_logfiles/bomsh_search_jsonfile-details.json + $ + $ # the above should take only a few minutes, and the below may take tens of minutes + $ wget https://buildinfos.debian.net/buildinfo-pool/s/sysstat/sysstat_11.7.3-1_all-amd64-source.buildinfo + $ bomsh/scripts/bomsh_rebuild_deb.py -f sysstat_11.7.3-1_all-amd64-source.buildinfo -d bomsh/scripts/sample_sysstat_cvedb.json -o outdir2 --syft_sbom --bomsh_spdx --mmdebstrap_no_cleanup + $ grep -B1 -A3 CVElist outdir2/bomsher_out/bomsh_logfiles/bomsh_search_jsonfile-details.json + $ + $ # specify a Debian build script to build Debian packages + $ bomsh/scripts/bomsh_rebuild_deb.py --deb_build_script bomsh/scripts/bomsh-openosc-deb.sh -o openosc-outdir --syft_sbom --bomsh_spdx + $ ls -tl openosc-outdir/bomsher_out/bomsh_sbom/ + +Currently only SPDX v2.3 version SBOM documents are supported. + +Bomsh is able to generate SPDX documents for generic images too, like +ISO/OVA image files. This usually involves an image unbundler or unpacking +tool to unpack the image to individual binary files. The bomsh_spdx_image.py +script is created for this purpose. This feature is for advanced users only. + Reducing Storage of Generated OmniBOR Docs ----------------------------------------- diff --git a/scripts/bomsh-openosc-deb.sh b/scripts/bomsh-openosc-deb.sh new file mode 100755 index 0000000..fb7bca3 --- /dev/null +++ b/scripts/bomsh-openosc-deb.sh @@ -0,0 +1,28 @@ +#! /usr/bin/bash + +# This is an example Debian build script to illustrace the usage of +# the --deb_build_script option of the bomsh_rebuild_deb.py script. +# This shell script builds the OpenOSC Debian *.deb package files. +# It also copies the built *.deb files to expected directory. +# It also copies the src tarball files to expected directory. +# These copied *.deb and tarball files are later used by the +# bomsh_spdx_deb.py script to generate SPDX documents. + +# pwd should always be the /out/bomsher_out directory inside the docker container +git clone https://github.com/cisco/OpenOSC.git +cd OpenOSC +autoreconf -vfi +./configure +make deb +# also the workspace should not be deleted or cleaned after the build + +# must copy the generated *.deb files to the /out/bomsher_out/debs directory +cp ../*.deb ../debs/ + +# must copy the src tarball files to the /out/bomsher_out/bomsh_logfiles directory +# also need to generate the src tarball first +dpkg-source -b . +cp ../*.dsc ../bomsh_logfiles/ +cp ../*.tar.gz ../bomsh_logfiles/ + +# In the end, everything is ready for the bomsh_spdx_deb.py script diff --git a/scripts/bomsh_create_bom.py b/scripts/bomsh_create_bom.py index ca92822..2db15ad 100755 --- a/scripts/bomsh_create_bom.py +++ b/scripts/bomsh_create_bom.py @@ -812,6 +812,8 @@ def unbundle_package(pkgfile, destdir=''): cmd = "rm -rf " + destdir + " ; mkdir -p " + destdir + " ; dpkg-deb -xv " + pkgfile + " " + destdir + " || true" elif pkgfile[-4:] == ".tgz" or pkgfile[-7:] in (".tar.gz", ".tar.xz") or pkgfile[-8:] == ".tar.bz2": cmd = "rm -rf " + destdir + " ; mkdir -p " + destdir + " ; tar -xf " + pkgfile + " -C " + destdir + " || true" + elif pkgfile[-4:] == ".jar": + cmd = "rm -rf " + destdir + " ; mkdir -p " + destdir + " ; unzip " + pkgfile + " -d " + destdir + " || true" else: print("Unsupported package format in " + pkgfile + " file, skipping it.") return '' diff --git a/scripts/bomsh_index_ws.py b/scripts/bomsh_index_ws.py index d843038..03d0db1 100755 --- a/scripts/bomsh_index_ws.py +++ b/scripts/bomsh_index_ws.py @@ -414,6 +414,8 @@ def get_installed_pkg_info(package): :param package: the installed package name returns a list of lines of the package info. ''' + if not package: + return [] if g_package_type == "deb": return get_installed_deb_pkg_info(package) return get_installed_rpm_pkg_info(package) @@ -655,13 +657,13 @@ def get_deb_source_control_file(): verbose("from buildinfo_file " + args.buildinfo_file + " we get dsc filename: " + dsc_filename) if dsc_filename: return get_dsc_source_control_file_with_filename(dsc_filename) - cmd = 'ls bomsh_logfiles/*.dsc || true' + cmd = 'ls bomsh_logfiles/*.dsc 2>/dev/null || true' output = get_shell_cmd_output(cmd) if output: return output.splitlines()[0] if not (g_chroot_dir and os.path.exists(g_chroot_dir)): return '' - cmd = 'ls ' + g_chroot_dir + '/*.dsc || true' + cmd = 'ls ' + g_chroot_dir + '/*.dsc 2>/dev/null || true' output = get_shell_cmd_output(cmd) if output: return output.splitlines()[0] @@ -688,13 +690,13 @@ def get_deb_source_tarball_files(): They are supposed to have been copied to bomsh_logfiles directory or g_chroot_dir. returns a list of tarball files. ''' - cmd = 'ls bomsh_logfiles/*.tar.* || true' + cmd = 'ls bomsh_logfiles/*.tar.* 2>/dev/null || true' output = get_shell_cmd_output(cmd) if output: return output.splitlines() if not (g_chroot_dir and os.path.exists(g_chroot_dir)): return [] - cmd = 'ls ' + g_chroot_dir + '/*.tar.* || true' + cmd = 'ls ' + g_chroot_dir + '/*.tar.* 2>/dev/null || true' output = get_shell_cmd_output(cmd) if output: return output.splitlines() diff --git a/scripts/bomsh_rebuild_deb.py b/scripts/bomsh_rebuild_deb.py index 35d1140..8b6d289 100755 --- a/scripts/bomsh_rebuild_deb.py +++ b/scripts/bomsh_rebuild_deb.py @@ -175,21 +175,35 @@ def fix_broken_symlinks(bomsher_outdir): # Bomtrace/Bomsh debrebuild run to generate OmniBOR documents # if BASELINE_REBUILD is not empty, then it will not use bomtrace2 to run debrebuild, that is, the baseline run. # if SRC_TAR_DIR is not empty, then the python script must have copied tarball and .dsc file into the bomsher_in directory. -CMD if [ "${SRC_TAR_DIR}" ]; then srctardir_param="--srctardir=/out/bomsher_in" ; fi ; \\ +CMD mkdir -p /out/bomsher_out ; cd /out/bomsher_out ; mkdir -p bomsh_logfiles ; \\ if [ -z "${BASELINE_REBUILD}" ]; then bomtrace_cmd="/tmp/bomtrace2 -w /tmp/bomtrace_watched_programs -c /tmp/bomtrace.conf -o /tmp/bomsh_hook_strace_logfile " ; fi ; \\ - mkdir -p /out/bomsher_out ; cd /out/bomsher_out ; \\ - if [ "${MM_NO_CLEANUP}" ]; then cp /usr/bin/mmdebstrap /usr/bin/mmdebstrap.bak ; echo "Patching mmdebstrap for no-cleanup." ; \\ - cp /usr/bin/mmdebstrap ./ ; patch -p0 < mmdebstrap_patch_file ; cp mmdebstrap /usr/bin/mmdebstrap ; fi ; \\ - echo $bomtrace_cmd debrebuild $srctardir_param --buildresult=./debs --builder=mmdebstrap /out/bomsher_in/$BUILDINFO_FILE ; \\ - # Run strace to collect artifact dependency fragments (ADF) for debrebuild ; \\ - $bomtrace_cmd debrebuild $srctardir_param --buildresult=./debs --builder=mmdebstrap /out/bomsher_in/$BUILDINFO_FILE ; \\ + if [ "${BUILDINFO_FILE}" ]; then \\ + chroot_param="--chroot_dir /tmp/bomsh-mmroot" ; \\ + if [ "${SRC_TAR_DIR}" ]; then srctardir_param="--srctardir=/out/bomsher_in" ; fi ; \\ + if [ "${MM_NO_CLEANUP}" ]; then cp /usr/bin/mmdebstrap /usr/bin/mmdebstrap.bak ; echo "Patching mmdebstrap for no-cleanup." ; \\ + cp /usr/bin/mmdebstrap ./ ; patch -p0 < mmdebstrap_patch_file ; cp mmdebstrap /usr/bin/mmdebstrap ; fi ; \\ + echo $bomtrace_cmd debrebuild $srctardir_param --buildresult=./debs --builder=mmdebstrap /out/bomsher_in/$BUILDINFO_FILE ; \\ + # Run strace to collect artifact dependency fragments (ADF) for debrebuild ; \\ + $bomtrace_cmd debrebuild $srctardir_param --buildresult=./debs --builder=mmdebstrap /out/bomsher_in/$BUILDINFO_FILE ; \\ + cp /tmp/bomsh-mmroot/etc/os-release bomsh_logfiles/mock-os-release ; \\ + elif [ "${BUILD_SCRIPT}" ]; then \\ + mkdir -p debs ; \\ + # Run strace to collect artifact dependency fragments (ADF) for debian build script ; \\ + echo $bomtrace_cmd /out/bomsher_in/$BUILD_SCRIPT >&2 ; \\ + $bomtrace_cmd /out/bomsher_in/$BUILD_SCRIPT ; \\ + cp /etc/os-release bomsh_logfiles/mock-os-release ; \\ + #cp /tmp/bomsh_hook_* ./ ; \\ + else \\ + echo "Nothing to build." >&2 ; \\ + fi ; \\ if [ "${BASELINE_REBUILD}" ]; then exit 0 ; fi ; \\ debfiles=`for i in debs/*.deb ; do echo -n $i, ; done | sed 's/.$//'` ; \\ rm -rf omnibor omnibor_dir ; mv .omnibor omnibor ; mkdir -p bomsh_logfiles ; cp -f /tmp/bomsh_hook_*logfile* bomsh_logfiles/ ; \\ - if [ "${MM_NO_CLEANUP}" ]; then index_db_param="--pkg_db_file /tmp/bomsh-index-pkg-db.json" ; \\ + # Indexing the build workspace if needed ; \\ + if [ "${MM_NO_CLEANUP}" ] || [ "${BUILD_SCRIPT}" ] ; then index_db_param="--pkg_db_file /tmp/bomsh-index-pkg-db.json" ; \\ # Create the package index database for prov_pkg metadata of source files ; \\ - /tmp/bomsh_index_ws.py --chroot_dir /tmp/bomsh-mmroot -p $debfiles -r /tmp/bomsh_hook_raw_logfile.sha1 --package_type deb ; \\ - cp /tmp/bomsh-mmroot/etc/os-release bomsh_logfiles/mock-os-release ; \\ + echo /out/bomsh_index_ws.py $chroot_param -p $debfiles -r /tmp/bomsh_hook_raw_logfile.sha1 --package_type deb ; \\ + /tmp/bomsh_index_ws.py $chroot_param -p $debfiles -r /tmp/bomsh_hook_raw_logfile.sha1 --package_type deb ; \\ cp /tmp/bomsh-index-* bomsh_logfiles ; fi ; \\ # Create the OmniBOR manifest document and metadata database ; \\ /tmp/bomsh_create_bom.py -b omnibor_dir -r /tmp/bomsh_hook_raw_logfile.sha1 $index_db_param ; \\ @@ -240,9 +254,16 @@ def run_docker(buildinfo_file, output_dir): bomsher_outdir = get_or_create_dir(os.path.join(output_dir, "bomsher_out")) # The bomsher_in dir is also the docker build work directory create_dockerfile(bomsher_indir) - os.system("cp -f " + buildinfo_file + " " + bomsher_indir) docker_cmd = 'docker run --cap-add MKNOD --cap-add SYS_ADMIN --cap-add=SYS_PTRACE -it --rm' - docker_cmd += ' -e BUILDINFO_FILE=' + os.path.basename(buildinfo_file) + if args.buildinfo_file: + os.system("cp -f " + args.buildinfo_file + " " + bomsher_indir) + docker_cmd += ' -e BUILDINFO_FILE=' + os.path.basename(args.buildinfo_file) + elif args.deb_build_script: + os.system("cp -f " + args.deb_build_script + " " + bomsher_indir) + docker_cmd += ' -e BUILD_SCRIPT=' + os.path.basename(args.deb_build_script) + else: + verbose("No idea how to build debian packages. Do nothing.") + return # Set appropriate parameters to run docker if args.src_tar_dir: tardir_base = os.path.basename(args.src_tar_dir) @@ -265,7 +286,7 @@ def run_docker(buildinfo_file, output_dir): if args.bomsh_spdx: # Generate SPDX SBOM document with the bomsh_spdx_rpm.py tool docker_cmd += ' -e BOMSH_SPDX=1' - docker_cmd += ' -v ' + output_dir + ':/out $(docker build -t bomsher-deb -q ' + bomsher_indir + ')' + docker_cmd += ' -v ' + output_dir + ':/out $(docker build -t bomsher-deb2 -q ' + bomsher_indir + ')' verbose("==== Here is the docker run command: " + docker_cmd, LEVEL_1) os.system(docker_cmd) fix_broken_symlinks(bomsher_outdir) @@ -293,6 +314,8 @@ def rtd_parse_options(): help = "Debian package's .buildinfo file generated from a previous reproducible build") parser.add_argument('--docker_image_base', help = "the base docker image to start with") + parser.add_argument('--deb_build_script', + help = "the script to build debian packages") parser.add_argument('-o', '--output_dir', help = "the output directory to store rebuilt .deb files and Bomsh/OmniBOR documents, the default is current dir") parser.add_argument('-d', '--cve_db_file', @@ -321,8 +344,8 @@ def rtd_parse_options(): # Parse the command line arguments args = parser.parse_args() - if not (args.buildinfo_file): - print ("Please specify the buildinfo file with -f option!") + if not (args.buildinfo_file or args.deb_build_script): + print ("Please specify the buildinfo file with -f option or the debian build script with --deb_build_script option!") print ("") parser.print_help() sys.exit() diff --git a/scripts/bomsh_spdx_deb.py b/scripts/bomsh_spdx_deb.py index 0e4893e..26d8892 100755 --- a/scripts/bomsh_spdx_deb.py +++ b/scripts/bomsh_spdx_deb.py @@ -124,6 +124,19 @@ DB_FN = "bomsh_search_jsonfile-details.json" BOM_MAPPING_FN = "bomsh_search_jsonfile-bom-mappings.json" +# +# Helper routines +######################### +def verbose(string, level=1): + """ + Prints information to stdout depending on the verbose level. + :param string: String to be printed + :param level: Unsigned Integer, listing the verbose level + """ + if args.verbose >= level: + # print to stdout + print(string) + def get_or_create_dir(destdir): """ Create a directory if it does not exist. otherwise, return it directly @@ -275,6 +288,9 @@ def parse_pkg_info(pkg_info_array): # Our return value pkg_info = dict() + if not pkg_info_array: + return pkg_info + # Set a description list to hold all the lines of the description desc_list = list() @@ -521,7 +537,6 @@ def analyze_files(rpm_file, unpack_dir): # and then there is the linux "file" command file_types=[get_spdx_file_type(f)], - file_types=[FileType.SOURCE], checksums=[ Checksum(ChecksumAlgorithm.SHA1, file_sha1), Checksum(ChecksumAlgorithm.SHA256, file_sha256), @@ -566,6 +581,8 @@ def build_basic_spdx_package(pkg, pkg_db, os_rel_data): return None pkg_data = parse_pkg_info(db_entry['pkg_info']) + if not pkg_data: + return None (pkg_name, pkg_ver, pkg_rel, pkg_arch) = deb_pkg_nvra(pkg_data) @@ -594,18 +611,26 @@ def spdx_add_src_pkg_dependency(spdx_doc, gitoid, sbom_db, pkg_db, os_rel_data, # If we currently don't have a package entry, add one. for pkg in pkg_list: + # If we ended up with a null package name then skip it. + if not pkg: + verbose("Skipping null package...") + continue # The summerization of the sbom detail output leaves the following string in the list # of packages. We don't want that in our output if pkg == "UNKNOWN_COMPONENT_VERSION": + verbose("Skipping unknown-component-version package...") continue # All files that were generated during a build will not have an origination package. # They are listed under the package name that starts with "GENERATED " # The input files used to generate that file will have an origination pkg and those will be captured elsewhere if pkg.startswith('DERIVED_PKG '): + verbose("Skipping derived package " + pkg) continue package = pkg_exists(spdx_doc, pkg) if not package: package = build_basic_spdx_package(pkg, pkg_db, os_rel_data) + if not package: # this is probably empty package "" + continue spdx_doc.packages += [package] # Then we add the the dependency relationship. diff --git a/scripts/bomsh_spdx_rpm.py b/scripts/bomsh_spdx_rpm.py index 024bd82..c2179ac 100755 --- a/scripts/bomsh_spdx_rpm.py +++ b/scripts/bomsh_spdx_rpm.py @@ -118,6 +118,19 @@ DB_FN = "bomsh_search_jsonfile-details.json" BOM_MAPPING_FN = "bomsh_search_jsonfile-bom-mappings.json" +# +# Helper routines +######################### +def verbose(string, level=1): + """ + Prints information to stdout depending on the verbose level. + :param string: String to be printed + :param level: Unsigned Integer, listing the verbose level + """ + if args.verbose >= level: + # print to stdout + print(string) + def get_or_create_dir(destdir): """ Create a directory if it does not exist. otherwise, return it directly @@ -557,14 +570,20 @@ def spdx_add_src_pkg_dependency(spdx_doc, gitoid, sbom_db, pkg_db, os_rel_data, # If we currently don't have a package entry, add one. for pkg in pkg_list: + # If we ended up with a null package name then skip it. + if not pkg: + verbose("Skipping null package...") + continue # The summerization of the sbom detail output leaves the following string in the list # of packages. We don't want that in our output if pkg == "UNKNOWN_COMPONENT_VERSION": + verbose("Skipping unknown-component-version package...") continue # All files that were generated during a build will not have an origination package. # They are listed under the package name that starts with "GENERATED " # The input files used to generate that file will have an origination pkg and those will be captured elsewhere if pkg.startswith('DERIVED_PKG '): + verbose("Skipping derived package " + pkg) continue package = pkg_exists(spdx_doc, pkg) if not package: