Skip to content

Commit

Permalink
Use generic package_getter for all distros #438
Browse files Browse the repository at this point in the history
    * Ensure both installed_file and codebase_resource have the same checksum field before comparing them

Signed-off-by: Jono Yang <jyang@nexb.com>
  • Loading branch information
JonoYang committed Jun 8, 2022
1 parent 093a52e commit c58771a
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 15 deletions.
41 changes: 26 additions & 15 deletions scanpipe/pipes/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from pathlib import Path

from container_inspector.image import Image
from packagedcode import plugin_package

from scanpipe import pipes
from scanpipe.pipes import rootfs
Expand Down Expand Up @@ -139,6 +140,15 @@ def create_codebase_resources(project, image):
)


def package_getter(root_dir, **kwargs):
"""
Returns installed package objects.
"""
packages = plugin_package.get_installed_packages(root_dir)
for package in packages:
yield package.purl, package


def scan_image_for_system_packages(project, image, detect_licenses=True):
"""
Given a `project` and an `image` - this scans the `image` layer by layer for
Expand All @@ -155,38 +165,39 @@ def scan_image_for_system_packages(project, image, detect_licenses=True):
if distro_id not in rootfs.PACKAGE_GETTER_BY_DISTRO:
raise rootfs.DistroNotSupported(f'Distro "{distro_id}" is not supported.')

package_getter = partial(
rootfs.PACKAGE_GETTER_BY_DISTRO[distro_id],
distro=distro_id,
detect_licenses=detect_licenses,
)

installed_packages = image.get_installed_packages(package_getter)

for i, (purl, package, layer) in enumerate(installed_packages):
logger.info(f"Creating package #{i}: {purl}")
created_package = pipes.update_or_create_package(project, package.to_dict())

installed_files = []
if hasattr(package, "installed_files"):
installed_files = package.installed_files
if hasattr(package, "resources"):
installed_files = package.resources

# We have no files for this installed package, we cannot go further.
if not installed_files:
logger.info(f" No installed_files for: {purl}")
continue

missing_resources = created_package.missing_resources[:]
modified_resources = created_package.modified_resources[:]

codebase_resources = project.codebaseresources.all()

for install_file in package.installed_files:
install_file_path = pipes.normalize_path(install_file.path)
layer_rootfs_path = posixpath.join(
layer.layer_id,
install_file_path.strip("/"),
)
for install_file in installed_files:
# TODO: Uncomment the following when the installed_file paths have their roots pre-stripped
# install_file_path = pipes.normalize_path(install_file.path)
# layer_rootfs_path = posixpath.join(
# layer.layer_id,
# install_file_path.strip("/"),
# )

# TODO: Remove the following when the installed_file paths have their roots pre-stripped
layer_rootfs_path = pipes.normalize_path(install_file.path)
leading_layer_id_segment = f"/{layer.layer_id}"
install_file_path = layer_rootfs_path.replace(leading_layer_id_segment, "")

logger.info(f" installed file rootfs_path: {install_file_path}")
logger.info(f" layer rootfs_path: {layer_rootfs_path}")
cbr_qs = codebase_resources.filter(
Expand Down
8 changes: 8 additions & 0 deletions scanpipe/pipes/rootfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,14 @@ def has_hash_diff(install_file, codebase_resource):
hash_types = ["sha512", "sha256", "sha1", "md5"]

for hash_type in hash_types:
# Find a suitable hash type that is present on both install_file and
# codebase_resource, skip otherwise.
if not (
hasattr(install_file, hash_type)
and hasattr(codebase_resource, hash_type)
):
continue

install_file_sum = getattr(install_file, hash_type)
codebase_resource_sum = getattr(codebase_resource, hash_type)
hashes_differ = all(
Expand Down

0 comments on commit c58771a

Please sign in to comment.