Skip to content

Commit

Permalink
Add AlpinePackages pipeline
Browse files Browse the repository at this point in the history
A pipeline that complements missing package data. Downloads aports
repository and all its necessary branches (alpine versions) then
iterates over all alpine packages associated with the pipeline's
project. For each package it copies additional files from the aports
repository into scan target directory then downloads and extract all
the source archives, performs a scan and saves it's output to package's
database entry.

Signed-off-by: Mateusz Perc <m.perc@samsung.com>
  • Loading branch information
quepop committed Aug 5, 2021
1 parent e574fa9 commit 26d89a6
Show file tree
Hide file tree
Showing 4 changed files with 181 additions and 0 deletions.
95 changes: 95 additions & 0 deletions scanpipe/pipelines/alpine_packages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/nexB/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

from scanpipe.pipelines import Pipeline
from scanpipe.pipes.alpine import (
download_or_checkout_aports,
get_packages_from_db,
prepare_scan_dir,
extract_summary_fields,
)
from scanpipe.pipes.scancode import run_extractcode, run_scancode


class AlpinePackages(Pipeline):
"""
A pipeline to complement missing alpine package data.
Downloads and extracts needed information from aports repository and packages source files.
"""

@classmethod
def steps(cls):
return (
cls.create_alpine_versions_dict,
cls.download_aports_repo,
cls.complement_missing_packages_data,
)

scancode_options = ["--copyright", "--summary"]

def create_alpine_versions_dict(self):
"""
Create a dict, mapping alpine image ids from the database to alpine versions.
"""
self.alpine_versions = {
i["image_id"]: i["distro"]["version_id"]
for i in self.project.extra_data["images"]
if i["distro"]["identifier"] == "alpine"
}

def download_aports_repo(self):
"""
Iterate over every alpine version associated with this project.
Download corresponding aports repository branches (alpine versions).
"""
for image_id in self.alpine_versions:
download_or_checkout_aports(
self.project.tmp_path, self.alpine_versions[image_id]
)

def complement_missing_packages_data(self):
"""
Iterate over alpine packages associated with this project.
Checkout aports repository to the corresponding alpine version and commit.
Prepare scan target directory, download and extract package's sources.
Run scancode and extract missing data (only copyrights for now).
Update and save package's missing data to database.
"""
for scan_target_path, scan_result_path, package in get_packages_from_db(
self.project
):
if (
not download_or_checkout_aports(
self.project.tmp_path,
self.alpine_versions[package.extra_data["image_id"]],
package.vcs_url.split("id=")[1],
)
or not prepare_scan_dir(package.name, scan_target_path)
):
continue
run_extractcode(str(scan_target_path))
run_scancode(
str(scan_target_path), str(scan_result_path), self.scancode_options
)
package.update_extra_data(
extract_summary_fields(scan_result_path, ["copyrights"])
)
84 changes: 84 additions & 0 deletions scanpipe/pipes/alpine.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,91 @@
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

import json

from packagedcode import alpine
from fetchcode.vcs.git import fetch_via_git
from fetchcode import fetch
from shutil import copytree
from scanpipe.models import DiscoveredPackage

APORTS_URL = "https://gitlab.alpinelinux.org/alpine/aports.git"
APORTS_DIR_NAME = "aports"
APORTS_SUBDIRS = ["main", "non-free", "community", "testing", "unmaintained"]


def download_or_checkout_aports(aports_dir_path, alpine_version, commit_id=None):
"""
Download aports repository and it's branch based on `alpine_version`.
Checkout to a branch (alpine version).
If `commit_id` is provided also checkout to a commit.
Return False if checkout failed otherwise True. #TODO Proper fetchcode patch required (extending #54)
"""
ver = alpine_version.split(".")
aports_dir_path = str(aports_dir_path / APORTS_DIR_NAME)
fetch_via_git(f"git+{APORTS_URL}@{ver[0]}.{ver[1]}-stable", aports_dir_path)
if commit_id:
fetch_via_git(f"git+{APORTS_URL}@{commit_id}", aports_dir_path)
return True


def get_packages_from_db(project):
"""
Get alpine packages from the database that belong to a `project`
Yield only root and not yet scanned packages along with paths needed when performing a scan.
"""
for package in DiscoveredPackage.objects.project(project):
scan_id = f"{package.name}_{package.version}"
scan_target_path = project.tmp_path / scan_id
scan_result_path = project.output_path / (scan_id + ".json")
if (
package.type == "alpine"
or (package.source_packages and package.source_packages[0] in package.purl)
or not scan_result_path.exists()
):
yield scan_target_path, scan_result_path, package


def prepare_scan_dir(package_name, scan_target_path, aports_dir_path=None):
"""
Find package's aports path and if found execute the following steps:
Copy all the files from that path into `scan_target_path`
Download all package's sources into `scan_target_path`
The default value of `aports_dir_path` is set to the parent of the `scan_target_path`
"""
if aports_dir_path is None:
aports_dir_path = scan_target_path.parent
for subdir_name in APORTS_SUBDIRS:
apkbuild_dir = aports_dir_path / APORTS_DIR_NAME / subdir_name / package_name
if not apkbuild_dir.exists():
continue
copytree(apkbuild_dir, scan_target_path)
for source in (
alpine.parse_apkbuild(scan_target_path / "APKBUILD")
.to_dict()
.get("extra_data")
.get("sources")
or []
):
if source["url"]:
fetch(source["url"], scan_target_path)
return True


def extract_summary_fields(scan_result_path, summary_fields):
"""
Having a scancode result file extract all the 'summary_fields' values from the `summary` section.
Return an object mapping 'summary_fields' to the arrays of values mentioned above.
"""
with open(scan_result_path) as scan_result:
json_obj = json.load(scan_result)
result_obj = {}
for field in summary_fields:
result_obj[field] = []
for field_element in json_obj["summary"][field]:
if field_element["value"]:
result_obj[field].append(field_element["value"])
return result_obj


def package_getter(root_dir, **kwargs):
Expand Down
1 change: 1 addition & 0 deletions scanpipe/pipes/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def scan_image_for_system_packages(project, image, detect_licenses=True):
for i, (purl, package, layer) in enumerate(installed_packages):
logger.info(f"Creating package #{i}: {purl}")
created_package = pipes.update_or_create_package(project, package.to_dict())
created_package.update_extra_data({"image_id": image.image_id})

# We have no files for this installed package, we cannot go further.
if not package.installed_files:
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
"root_filesystems = scanpipe.pipelines.root_filesystems:RootFS",
"scan_codebase = scanpipe.pipelines.scan_codebase:ScanCodebase",
"scan_package = scanpipe.pipelines.scan_package:ScanPackage",
"alpine_packages = scanpipe.pipelines.alpine_packages:AlpinePackages"
],
},
classifiers=[
Expand Down

0 comments on commit 26d89a6

Please sign in to comment.