Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add RPM mariner package detection support #3734

Merged
merged 1 commit into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ TAGS
Procfile
local.cfg
geckodriver.log
var
.metaflow
selenium
/dist/
Expand Down
5 changes: 4 additions & 1 deletion src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,10 @@

debian.DebianInstalledFilelistHandler,
debian.DebianInstalledMd5sumFilelistHandler,
debian.DebianInstalledStatusDatabaseHandler
debian.DebianInstalledStatusDatabaseHandler,

rpm.RpmLicenseFilesHandler,
rpm.RpmMarinerContainerManifestHandler
]

if on_linux:
Expand Down
7 changes: 7 additions & 0 deletions src/packagedcode/licensing.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ def logger_debug(*args):
return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))


RESOURCE_TO_PACKAGE_LICENSE_FIELDS = {
'detected_license_expression': 'declared_license_expression',
'detected_license_expression_spdx': 'declared_license_expression_spdx',
'license_detections': 'license_detections',
}


def add_referenced_license_matches_for_package(resource, codebase):
"""
Return an updated ``resource`` saving it in place, after adding new license
Expand Down
60 changes: 51 additions & 9 deletions src/packagedcode/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
from commoncode.datautils import String
from commoncode.fileutils import as_posixpath
from commoncode.resource import Resource
from license_expression import combine_expressions
from license_expression import Licensing

try:
from typecode import contenttype
except ImportError:
Expand Down Expand Up @@ -118,11 +121,8 @@
- IdentifiablePackageData: a base class for a Package-like class with a Package URL.
"""

SCANCODE_DEBUG_PACKAGE = os.environ.get('SCANCODE_DEBUG_PACKAGE', False)
SCANCODE_DEBUG_PACKAGE_ASSEMBLY = os.environ.get('SCANCODE_DEBUG_PACKAGE_ASSEMBLY', False)

TRACE = SCANCODE_DEBUG_PACKAGE
TRACE_UPDATE = SCANCODE_DEBUG_PACKAGE_ASSEMBLY
TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE', False)
TRACE_UPDATE = os.environ.get('SCANCODE_DEBUG_PACKAGE_ASSEMBLY', False)


def logger_debug(*args):
Expand Down Expand Up @@ -1618,6 +1618,8 @@ def update(
include_qualifiers=False,
include_subpath=False,
ignore_name_check=False,
default_relation='AND',
licensing=Licensing(),
):
"""
Update this Package with data from the ``package_data`` PackageData.
Expand Down Expand Up @@ -1670,16 +1672,16 @@ def update(
'file_references',
])

license_modified = False
for name, value in existing.items():
new_value = new_package_data.get(name)
if not new_value:
if TRACE_UPDATE: logger_debug(f' No new value: {name!r}: skipping')
continue

if TRACE_UPDATE:
logger_debug(f'update: {name!r}={value!r} with new_value: {new_value!r}')

if not new_value:
if TRACE_UPDATE: logger_debug(' No new value: skipping')
continue

if not value:
if TRACE_UPDATE: logger_debug(' set existing value to new')
setattr(self, name, new_value)
Expand All @@ -1694,6 +1696,18 @@ def update(
if name == 'extra_data':
value.update(new_value)

if 'license_detections' in name:
license_modified = True
license_keys = licensing.license_keys(
expression=new_package_data.get("declared_license_expression"),
unique=True,
)
if name == 'license_detections' and len(license_keys) > 1:
setattr(self, 'other_license_detections', new_value)
else:
merged = value + new_value
setattr(self, name, merged)

if name in list_fields:
if TRACE_UPDATE: logger_debug(' merge lists of values')
merged = merge_sequences(list1=value, list2=new_value)
Expand All @@ -1702,8 +1716,36 @@ def update(
elif TRACE_UPDATE and value != new_value:
if TRACE_UPDATE: logger_debug(' skipping update: no replace')

if license_modified:
self.refresh_license_expressions(default_relation=default_relation)

return True

def refresh_license_expressions(self, default_relation='AND'):
if self.license_detections:
self.declared_license_expression = str(combine_expressions(
expressions=[
detection["license_expression"]
for detection in self.license_detections
],
relation=default_relation,
))
self.declared_license_expression_spdx = get_declared_license_expression_spdx(
declared_license_expression=self.declared_license_expression,
)

if self.other_license_detections:
self.other_license_expression = str(combine_expressions(
expressions=[
detection["license_expression"]
for detection in self.other_license_detections
],
relation=default_relation,
))
self.other_license_expression_spdx = get_declared_license_expression_spdx(
declared_license_expression=self.other_license_expression,
)

def get_packages_files(self, codebase):
"""
Yield all the Resource of this package found in codebase.
Expand Down
167 changes: 166 additions & 1 deletion src/packagedcode/rpm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,25 @@
# See https://aboutcode.org for more information about nexB OSS projects.
#

import io
import os
import fnmatch
import logging
import sys
from collections import namedtuple
from pathlib import Path

from packagedcode import models
from packagedcode import nevra
from packagedcode.licensing import RESOURCE_TO_PACKAGE_LICENSE_FIELDS
from packagedcode.pyrpm import RPM
from packagedcode.rpm_installed import collect_installed_rpmdb_xmlish_from_rpmdb_loc
from packagedcode.rpm_installed import parse_rpm_xmlish
from packagedcode.utils import build_description
from packagedcode.utils import get_ancestor
from scancode.api import get_licenses

TRACE = False
TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False)


def logger_debug(*args):
Expand Down Expand Up @@ -374,6 +379,166 @@ def parse(cls, location, package_only=False):
yield models.PackageData.from_data(package_data, package_only)


class RpmMarinerContainerManifestHandler(models.DatafileHandler):
datasource_id = 'rpm_mariner_manifest'
# container-manifest-1 is more minimal and has the same data
path_patterns = ('*var/lib/rpmmanifest/container-manifest-2',)
default_package_type = 'rpm'
default_package_namespace = 'mariner'
description = 'RPM mariner distroless package manifest'
documentation_url = 'https://github.com/microsoft/marinara/'

manifest_attributes = [
"name",
"version",
"n1",
"n2",
"party",
"n3",
"n4",
"arch",
"checksum_algo",
"filename"
]

@classmethod
def parse(cls, location, package_only=False):
with io.open(location, encoding='utf-8') as data:
lines = data.readlines()

for line in lines:
line = line.rstrip("\n")
metadata = line.split("\t")

package_data = {
"type": cls.default_package_type,
"namespace": cls.default_package_namespace,
"datasource_id": cls.datasource_id,
}
for key, value in zip(cls.manifest_attributes, metadata):
package_data[key] = value
pombredanne marked this conversation as resolved.
Show resolved Hide resolved

package_data = cls.clean_mariner_manifest_data(package_data)
yield models.PackageData.from_data(package_data=package_data)

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):

levels_up = len('var/lib/rpmmanifest/container-manifest-2'.split('/'))
root_resource = get_ancestor(
levels_up=levels_up,
resource=resource,
codebase=codebase,
)
package_name = package_data.name

package = models.Package.from_package_data(
package_data=package_data,
datafile_path=resource.path,
)
package_uid = package.package_uid

assemblable_paths = tuple(set([
f'*usr/share/licenses/{package_name}/COPYING*',
f'*usr/share/licenses/{package_name}/LICENSE*',
]))

resources = []
for res in root_resource.walk(codebase):
if TRACE:
logger_debug(f' rpm: mariner assemble: root_walk: res: {res}')
if not any([
fnmatch.fnmatch(name=res.location, pat=pattern)
for pattern in assemblable_paths
]):
continue

if TRACE:
logger_debug(f' rpm: mariner assemble: pattern matched for: res: {res}')

for pkgdt in res.package_data:
package_data = models.PackageData.from_dict(pkgdt)
if TRACE:
logger_debug(f' rpm: mariner assemble: package_data: {package_data.declared_license_expression}')

package.update(
package_data=package_data,
datafile_path=res.path,
check_compatible=False,
replace=False,
include_version=False,
include_qualifiers=False,
include_subpath=False,
)

package_adder(package_uid, res, codebase)
resources.append(res)

yield package
yield from resources

@staticmethod
def clean_mariner_manifest_data(package_data):
ignore_attributes = ["n1", "n2", "n3", "n4", "checksum_algo"]
for attribute in ignore_attributes:
package_data.pop(attribute)

if arch := package_data.pop("arch"):
package_data["qualifiers"] = {"arch": arch}

if filename := package_data.pop("filename"):
package_data["extra_data"] = {"filename": filename}

if party := package_data.pop("party"):
party_obj = models.Party(
type=models.party_org,
role="owner",
name=party,
)
package_data["parties"] = [party_obj.to_dict()]

return package_data


class RpmLicenseFilesHandler(models.NonAssemblableDatafileHandler):
datasource_id = 'rpm_package_licenses'
pombredanne marked this conversation as resolved.
Show resolved Hide resolved
path_patterns = (
'*usr/share/licenses/*/COPYING*',
'*usr/share/licenses/*/LICENSE*',
)
default_package_type = 'rpm'
default_package_namespace = 'mariner'
description = 'RPM mariner distroless package license files'
documentation_url = 'https://github.com/microsoft/marinara/'

@classmethod
def parse(cls, location, package_only=False):

# The license files are in a directory which is the package name,
# for example: "/usr/share/licenses/openssl/LICENSE"
name = location.split('/usr/share/licenses/').pop().split('/')[0]
package_data = models.PackageData(
type=cls.default_package_type,
namespace=cls.default_package_namespace,
name=name,
datasource_id=cls.datasource_id,
)

if package_only:
yield package_data

resource_license_attributes = get_licenses(
location=location,
include_text=True,
license_diagnostics=True,
license_text_diagnostics=True,
)
for key, key_pkg in RESOURCE_TO_PACKAGE_LICENSE_FIELDS.items():
setattr(package_data, key_pkg, resource_license_attributes.get(key))

yield package_data


ALGO_BY_ID = {
None: 'md5',
0: 'md5',
Expand Down
22 changes: 22 additions & 0 deletions tests/packagedcode/data/chef/package.scan.expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,28 @@
}
],
"identifier": "mit-a822f434-d61f-f2b1-c792-8b8cb9e7b9bf"
},
{
"license_expression": "mit",
"license_expression_spdx": "MIT",
"matches": [
{
"license_expression": "mit",
"spdx_license_expression": "MIT",
"from_file": "package/metadata.json",
"start_line": 1,
"end_line": 1,
"matcher": "1-spdx-id",
"score": 100.0,
"matched_length": 1,
"match_coverage": 100.0,
"rule_relevance": 100,
"rule_identifier": "spdx-license-identifier-mit-5da48780aba670b0860c46d899ed42a0f243ff06",
"rule_url": null,
"matched_text": "MIT"
}
],
"identifier": "mit-a822f434-d61f-f2b1-c792-8b8cb9e7b9bf"
}
],
"other_license_expression": null,
Expand Down
22 changes: 22 additions & 0 deletions tests/packagedcode/data/plugin/chef-package-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,28 @@
}
],
"identifier": "mit-a822f434-d61f-f2b1-c792-8b8cb9e7b9bf"
},
{
"license_expression": "mit",
"license_expression_spdx": "MIT",
"matches": [
{
"license_expression": "mit",
"spdx_license_expression": "MIT",
"from_file": "package/metadata.json",
"start_line": 1,
"end_line": 1,
"matcher": "1-spdx-id",
"score": 100.0,
"matched_length": 1,
"match_coverage": 100.0,
"rule_relevance": 100,
"rule_identifier": "spdx-license-identifier-mit-5da48780aba670b0860c46d899ed42a0f243ff06",
"rule_url": null,
"matched_text": "MIT"
}
],
"identifier": "mit-a822f434-d61f-f2b1-c792-8b8cb9e7b9bf"
}
],
"other_license_expression": null,
Expand Down
Loading
Loading