Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 83 additions & 0 deletions minecode/collectors/cran.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging
import requests
from packageurl import PackageURL

from minecode import priority_router
from packagedb.models import PackageContentType

logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
logger.addHandler(handler)
logger.setLevel(logging.INFO)


def get_cran_package_json(name):
"""
Return the contents of the JSON file of the package from CRAN DB API.
Example: https://crandb.r-pkg.org/dplyr/all
"""
url = f"https://crandb.r-pkg.org/{name}/all"

try:
response = requests.get(url)
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as err:
logger.error(f"HTTP error occurred: {err}")


def map_cran_package(package_url, pipelines, priority=0):
"""
Add a CRAN `package_url` to the PackageDB.
"""
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package
from minecode.miners.cran import build_packages

name = package_url.name
package_json = get_cran_package_json(name)

if not package_json:
error = f"Package does not exist on CRAN: {package_url}"
logger.error(error)
return error

packages = build_packages(package_json, package_url)

error = None
for package in packages:
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
if error:
break

if db_package:
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)

return error


@priority_router.route("pkg:cran/.*")
def process_request(purl_str, **kwargs):
"""
Process CRAN Package URL (PURL).
"""
from minecode.model_utils import DEFAULT_PIPELINES

addon_pipelines = kwargs.get("addon_pipelines", [])
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
priority = kwargs.get("priority", 0)

package_url = PackageURL.from_string(purl_str)
error_msg = map_cran_package(package_url, pipelines, priority)

if error_msg:
return error_msg
54 changes: 54 additions & 0 deletions minecode/miners/cran.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,57 @@ def build_packages_from_html(metadata, uri=None, purl=None):
)
package.set_purl(purl)
yield package


def build_packages(metadata_dict, purl):
"""
Yield ScannedPackage built from CRAN DB API.

metadata_dict format:
{
"versions": {
"1.0.0": { ... },
"1.1.0": { ... }
}
}
"""
purl_version = purl.version
name = metadata_dict.get("Package") or purl.name

versions = metadata_dict.get("versions", {})
for version, version_info in versions.items():
if purl_version and not purl_version == version:
continue

description = version_info.get("Description")
homepage_url = version_info.get("URL")
license_str = version_info.get("License")

authors = version_info.get("Author", "")
parties = []
if authors:
parties.append(scan_models.Party(name=authors, role="author"))

# CRAN tarball download URL
download_url = f"https://cran.r-project.org/src/contrib/{name}_{version}.tar.gz"

common_data = dict(
name=name,
version=version,
description=description,
homepage_url=homepage_url,
extracted_license_statement=[license_str] if license_str else [],
parties=parties,
)

download_data = dict(
datasource_id="cran_pkginfo",
type="cran",
download_url=download_url,
)
download_data.update(common_data)

package = scan_models.PackageData.from_data(download_data)
package.datasource_id = "cran_api_metadata"
package.set_purl(purl)
yield package
57 changes: 57 additions & 0 deletions minecode/tests/collectors/test_cran.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# purldb is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/purldb for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import json
import os

from django.test import TestCase as DjangoTestCase
from packageurl import PackageURL

import packagedb
from minecode.collectors import cran
from minecode.utils_test import JsonBasedTesting


class CranPriorityQueueTests(JsonBasedTesting, DjangoTestCase):
test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles")

def setUp(self):
super().setUp()
self.expected_json_loc = self.get_test_loc("cran/dplyr.json")
with open(self.expected_json_loc) as f:
self.expected_json_contents = json.load(f)

def test_get_package_json(self):
"""
Verify get_cran_package_json() returns expected keys for CRAN package.
"""
json_contents = cran.get_cran_package_json(name="dplyr")
self.assertIn("versions", json_contents)
self.assertIn("dplyr", json_contents.get("Package", "dplyr"))

def test_map_cran_package(self):
"""
Verify map_cran_package() creates a Package in the DB with correct PURL
and download URL.
"""
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(0, package_count)

package_url = PackageURL.from_string("pkg:cran/dplyr@1.1.0")
cran.map_cran_package(package_url, ("test_pipeline",))

package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(1, package_count)

package = packagedb.models.Package.objects.all().first()
expected_purl_str = "pkg:cran/dplyr@1.1.0"
expected_download_url = "https://cran.r-project.org/src/contrib/dplyr_1.1.0.tar.gz"

self.assertEqual(expected_purl_str, package.purl)
self.assertEqual(expected_download_url, package.download_url)
Loading
Loading