diff --git a/README.rst b/README.rst index 538b5fd4..4a94f39a 100644 --- a/README.rst +++ b/README.rst @@ -42,6 +42,12 @@ Once the prerequisites have been installed, set up PurlDB with the following com make postgres make postgres_matchcodeio +Indexing some PURLs requires a GitHub API token. Please add your GitHub API key to the `.env` file +:: + + GH_TOKEN=your-github-api + + Once PurlDB and the database has been set up, run tests to ensure functionality: :: diff --git a/minecode/tests/test_generic.py b/minecode/tests/test_generic.py index dafe8be2..466fe436 100644 --- a/minecode/tests/test_generic.py +++ b/minecode/tests/test_generic.py @@ -60,12 +60,31 @@ def test_map_generic_package(self): self.assertEqual('1.0.0', package.version) self.assertEqual('http://example.com/test.tar.gz', package.download_url) - def test_process_request_dir_listed(self): + def test_map_fetchcode_supported_package(self): + package_count = Package.objects.all().count() + self.assertEqual(0, package_count) + + purl = PackageURL.from_string("pkg:generic/udhcp@0.9.1") + error_msg = generic.map_fetchcode_supported_package(purl) + + self.assertEqual('', error_msg) + package_count = Package.objects.all().count() + self.assertEqual(1, package_count) + + package = Package.objects.first() + self.assertEqual("udhcp", package.name) + self.assertEqual("0.9.1", package.version) + self.assertEqual( + "https://web.archive.org/web/20021209021312/http://udhcp.busybox.net/source//udhcp-0.9.1.tar.gz", + package.download_url, + ) + + def test_process_request_fetchcode_generic(self): package_count = Package.objects.all().count() self.assertEqual(0, package_count) purl = "pkg:generic/ipkg@0.99.33" - error_msg = generic.process_request_dir_listed(purl) + error_msg = generic.process_request_fetchcode_generic(purl) self.assertEqual(None, error_msg) package_count = Package.objects.all().count() diff --git a/minecode/visitors/generic.py b/minecode/visitors/generic.py index 314a84e4..6011209f 100644 --- a/minecode/visitors/generic.py +++ b/minecode/visitors/generic.py @@ -91,9 +91,9 @@ def packagedata_from_dict(package_data): return PackageData.from_data(cleaned_package_data) -def map_directory_listed_package(package_url): +def map_fetchcode_supported_package(package_url): """ - Add a directory listed `package_url` to the PackageDB. + Add a `package_url` supported by fetchcode to the PackageDB. Return an error string if any errors are encountered during the process """ @@ -121,7 +121,7 @@ def map_directory_listed_package(package_url): return error -DIR_SUPPORTED_PURLS = [ +GENERIC_FETCHCODE_SUPPORTED_PURLS = [ "pkg:generic/busybox@.*", "pkg:generic/bzip2@.*", "pkg:generic/dnsmasq@.*", @@ -137,16 +137,26 @@ def map_directory_listed_package(package_url): "pkg:generic/samba@.*", "pkg:generic/syslinux@.*", "pkg:generic/toybox@.*", - "pkg:generic/uclibc@@.*", + "pkg:generic/uclibc@.*", "pkg:generic/uclibc-ng@.*", "pkg:generic/util-linux@.*", "pkg:generic/wpa_supplicant@.*", "pkg:generic/ipkg@.*", + "pkg:generic/linux@.*", + "pkg:generic/mtd-utils@.*", + "pkg:generic/barebox@.*", + "pkg:generic/e2fsprogs@.*", + "pkg:generic/udhcp@.*", + "pkg:generic/miniupnpc@.*", + "pkg:generic/miniupnpd@.*", + "pkg:generic/minissdpd@.*", + "pkg:generic/erofs-utils@.*", ] - -@priority_router.route(*DIR_SUPPORTED_PURLS) -def process_request_dir_listed(purl_str): +# Indexing some generic PURLs requires a GitHub API token. +# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. +@priority_router.route(*GENERIC_FETCHCODE_SUPPORTED_PURLS) +def process_request_fetchcode_generic(purl_str): """ Process `priority_resource_uri` containing a generic Package URL (PURL) supported by fetchcode. @@ -161,7 +171,7 @@ def process_request_dir_listed(purl_str): error = f"error occurred when parsing {purl_str}: {e}" return error - error_msg = map_directory_listed_package(package_url) + error_msg = map_fetchcode_supported_package(package_url) if error_msg: return error_msg diff --git a/minecode/visitors/github.py b/minecode/visitors/github.py index 5fa42193..522f8875 100644 --- a/minecode/visitors/github.py +++ b/minecode/visitors/github.py @@ -18,9 +18,11 @@ from github.Download import Download from packageurl import PackageURL +from minecode import priority_router from minecode import visit_router, seed from minecode.visitors import HttpJsonVisitor from minecode.visitors import URI +from minecode.visitors.generic import map_fetchcode_supported_package logger = logging.getLogger(__name__) @@ -179,3 +181,26 @@ def json_serial_date_obj(obj): """JSON serializer for date object""" if obj and isinstance(obj, (datetime, date)): return obj.isoformat() + + +# Indexing GitHub PURLs requires a GitHub API token. +# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. +@priority_router.route('pkg:github/.*') +def process_request_dir_listed(purl_str): + """ + Process `priority_resource_uri` containing a GitHub Package URL (PURL). + + This involves obtaining Package information for the PURL using + https://github.com/nexB/fetchcode and using it to create a new + PackageDB entry. The package is then added to the scan queue afterwards. + """ + try: + package_url = PackageURL.from_string(purl_str) + except ValueError as e: + error = f"error occurred when parsing {purl_str}: {e}" + return error + + error_msg = map_fetchcode_supported_package(package_url) + + if error_msg: + return error_msg diff --git a/minecode/visitors/gnu.py b/minecode/visitors/gnu.py index 3e703a8f..8aec30cc 100644 --- a/minecode/visitors/gnu.py +++ b/minecode/visitors/gnu.py @@ -13,7 +13,7 @@ from packageurl import PackageURL from minecode import priority_router -from minecode.visitors.generic import map_directory_listed_package +from minecode.visitors.generic import map_fetchcode_supported_package logger = logging.getLogger(__name__) handler = logging.StreamHandler() @@ -35,7 +35,7 @@ def process_request(purl_str): if not package_url.version: return - error_msg = map_directory_listed_package(package_url) + error_msg = map_fetchcode_supported_package(package_url) if error_msg: return error_msg diff --git a/minecode/visitors/openssl.py b/minecode/visitors/openssl.py index 90a10b7e..390de77b 100644 --- a/minecode/visitors/openssl.py +++ b/minecode/visitors/openssl.py @@ -11,11 +11,13 @@ from commoncode import fileutils from packageurl import PackageURL +from minecode import priority_router from minecode import seed from minecode import visit_router from minecode.utils import is_int from minecode.visitors import HttpVisitor from minecode.visitors import URI +from minecode.visitors.generic import map_fetchcode_supported_package class OpenSSLSeed(seed.Seeder): @@ -88,3 +90,26 @@ def get_uris(self, content): yield URI(uri=url, source_uri=self.uri, package_url=package_url, date=date, file_name=file_name, size=size) else: yield URI(uri=url, source_uri=self.uri, date=date, size=size) + +# Indexing OpenSSL PURLs requires a GitHub API token. +# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. +@priority_router.route('pkg:openssl/openssl@.*') +def process_request_dir_listed(purl_str): + """ + Process `priority_resource_uri` containing a OpenSSL Package URL (PURL) + supported by fetchcode. + + This involves obtaining Package information for the PURL using + https://github.com/nexB/fetchcode and using it to create a new + PackageDB entry. The package is then added to the scan queue afterwards. + """ + try: + package_url = PackageURL.from_string(purl_str) + except ValueError as e: + error = f"error occurred when parsing {purl_str}: {e}" + return error + + error_msg = map_fetchcode_supported_package(package_url) + + if error_msg: + return error_msg \ No newline at end of file diff --git a/packagedb/api.py b/packagedb/api.py index d778a7a8..f1d46086 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -726,7 +726,7 @@ def _reindex_package(package, reindexed_packages): reindexed_packages = [] requeued_packages = [] - supported_ecosystems = ['maven', 'npm', 'deb'] + supported_ecosystems = ['maven', 'npm', 'deb', 'generic', 'gnu', 'openssl', 'github', 'conan'] unique_packages, unsupported_packages, unsupported_vers = get_resolved_packages(packages, supported_ecosystems) diff --git a/requirements.txt b/requirements.txt index 98e8e466..2822cc43 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,7 +42,7 @@ extractcode==31.0.0 extractcode-7z==16.5.210531 extractcode-libarchive==3.5.1.210531 fasteners==0.19 -fetchcode==0.4.0 +fetchcode==0.5.1 fetchcode-container==1.2.3.210512 fingerprints==1.2.3 fontawesomefree==6.5.1 diff --git a/setup.cfg b/setup.cfg index c247e990..ece63023 100644 --- a/setup.cfg +++ b/setup.cfg @@ -45,7 +45,7 @@ install_requires = djangorestframework == 3.15.0 django-filter == 24.1 drf-spectacular == 0.26.5 - fetchcode == 0.4.0 + fetchcode == 0.5.1 gunicorn == 21.2.0 ftputil == 5.0.4 jawa == 2.2.0