From 90b40b1cb63ba9b9612ea7a285fa9e6c0d9ef191 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 29 Mar 2024 15:56:02 +0530 Subject: [PATCH 1/7] Support indexing of more generic PURLs - pkg:generic/linux - pkg:generic/mtd-utils - pkg:generic/barebox - pkg:generic/e2fsprogs - pkg:generic/udhcp - pkg:generic/miniupnpc - pkg:generic/miniupnpd - pkg:generic/minissdpd - pkg:generic/erofs-utils Signed-off-by: Keshav Priyadarshi --- minecode/tests/test_generic.py | 4 ++-- minecode/visitors/generic.py | 23 ++++++++++++++++------- minecode/visitors/gnu.py | 4 ++-- requirements.txt | 2 +- setup.cfg | 2 +- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/minecode/tests/test_generic.py b/minecode/tests/test_generic.py index dafe8be2..425837ff 100644 --- a/minecode/tests/test_generic.py +++ b/minecode/tests/test_generic.py @@ -60,12 +60,12 @@ def test_map_generic_package(self): self.assertEqual('1.0.0', package.version) self.assertEqual('http://example.com/test.tar.gz', package.download_url) - def test_process_request_dir_listed(self): + def test_process_request_fetchcode_generic(self): package_count = Package.objects.all().count() self.assertEqual(0, package_count) purl = "pkg:generic/ipkg@0.99.33" - error_msg = generic.process_request_dir_listed(purl) + error_msg = generic.process_request_fetchcode_generic(purl) self.assertEqual(None, error_msg) package_count = Package.objects.all().count() diff --git a/minecode/visitors/generic.py b/minecode/visitors/generic.py index 314a84e4..a72de655 100644 --- a/minecode/visitors/generic.py +++ b/minecode/visitors/generic.py @@ -91,9 +91,9 @@ def packagedata_from_dict(package_data): return PackageData.from_data(cleaned_package_data) -def map_directory_listed_package(package_url): +def map_fetchcode_supported_package(package_url): """ - Add a directory listed `package_url` to the PackageDB. + Add a `package_url` supported by fetchcode to the PackageDB. Return an error string if any errors are encountered during the process """ @@ -121,7 +121,7 @@ def map_directory_listed_package(package_url): return error -DIR_SUPPORTED_PURLS = [ +GENERIC_FETCHCODE_SUPPORTED_PURLS = [ "pkg:generic/busybox@.*", "pkg:generic/bzip2@.*", "pkg:generic/dnsmasq@.*", @@ -137,16 +137,25 @@ def map_directory_listed_package(package_url): "pkg:generic/samba@.*", "pkg:generic/syslinux@.*", "pkg:generic/toybox@.*", - "pkg:generic/uclibc@@.*", + "pkg:generic/uclibc@.*", "pkg:generic/uclibc-ng@.*", "pkg:generic/util-linux@.*", "pkg:generic/wpa_supplicant@.*", "pkg:generic/ipkg@.*", + "pkg:generic/linux@.*", + "pkg:generic/mtd-utils@.*", + "pkg:generic/barebox@.*", + "pkg:generic/e2fsprogs@.*", + "pkg:generic/udhcp@.*", + "pkg:generic/miniupnpc@.*", + "pkg:generic/miniupnpd@.*", + "pkg:generic/minissdpd@.*", + "pkg:generic/erofs-utils@.*", ] -@priority_router.route(*DIR_SUPPORTED_PURLS) -def process_request_dir_listed(purl_str): +@priority_router.route(*GENERIC_FETCHCODE_SUPPORTED_PURLS) +def process_request_fetchcode_generic(purl_str): """ Process `priority_resource_uri` containing a generic Package URL (PURL) supported by fetchcode. @@ -161,7 +170,7 @@ def process_request_dir_listed(purl_str): error = f"error occurred when parsing {purl_str}: {e}" return error - error_msg = map_directory_listed_package(package_url) + error_msg = map_fetchcode_supported_package(package_url) if error_msg: return error_msg diff --git a/minecode/visitors/gnu.py b/minecode/visitors/gnu.py index 3e703a8f..8aec30cc 100644 --- a/minecode/visitors/gnu.py +++ b/minecode/visitors/gnu.py @@ -13,7 +13,7 @@ from packageurl import PackageURL from minecode import priority_router -from minecode.visitors.generic import map_directory_listed_package +from minecode.visitors.generic import map_fetchcode_supported_package logger = logging.getLogger(__name__) handler = logging.StreamHandler() @@ -35,7 +35,7 @@ def process_request(purl_str): if not package_url.version: return - error_msg = map_directory_listed_package(package_url) + error_msg = map_fetchcode_supported_package(package_url) if error_msg: return error_msg diff --git a/requirements.txt b/requirements.txt index 98e8e466..6a98407d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,7 +42,7 @@ extractcode==31.0.0 extractcode-7z==16.5.210531 extractcode-libarchive==3.5.1.210531 fasteners==0.19 -fetchcode==0.4.0 +fetchcode==0.5.0 fetchcode-container==1.2.3.210512 fingerprints==1.2.3 fontawesomefree==6.5.1 diff --git a/setup.cfg b/setup.cfg index c247e990..15f0c7bf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -45,7 +45,7 @@ install_requires = djangorestframework == 3.15.0 django-filter == 24.1 drf-spectacular == 0.26.5 - fetchcode == 0.4.0 + fetchcode == 0.5.0 gunicorn == 21.2.0 ftputil == 5.0.4 jawa == 2.2.0 From 565c07201b037c5243cdbae79c0860d925447361 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 29 Mar 2024 15:57:07 +0530 Subject: [PATCH 2/7] Support indexing of OpenSSL package Signed-off-by: Keshav Priyadarshi --- minecode/visitors/openssl.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/minecode/visitors/openssl.py b/minecode/visitors/openssl.py index 90a10b7e..8b93c864 100644 --- a/minecode/visitors/openssl.py +++ b/minecode/visitors/openssl.py @@ -11,11 +11,13 @@ from commoncode import fileutils from packageurl import PackageURL +from minecode import priority_router from minecode import seed from minecode import visit_router from minecode.utils import is_int from minecode.visitors import HttpVisitor from minecode.visitors import URI +from minecode.visitors.generic import map_fetchcode_supported_package class OpenSSLSeed(seed.Seeder): @@ -88,3 +90,25 @@ def get_uris(self, content): yield URI(uri=url, source_uri=self.uri, package_url=package_url, date=date, file_name=file_name, size=size) else: yield URI(uri=url, source_uri=self.uri, date=date, size=size) + + +@priority_router.route('pkg:openssl/openssl@.*') +def process_request_dir_listed(purl_str): + """ + Process `priority_resource_uri` containing a OpenSSL Package URL (PURL) + supported by fetchcode. + + This involves obtaining Package information for the PURL using + https://github.com/nexB/fetchcode and using it to create a new + PackageDB entry. The package is then added to the scan queue afterwards. + """ + try: + package_url = PackageURL.from_string(purl_str) + except ValueError as e: + error = f"error occurred when parsing {purl_str}: {e}" + return error + + error_msg = map_fetchcode_supported_package(package_url) + + if error_msg: + return error_msg \ No newline at end of file From e5fa997e6209b711ced4ffe1836c97eb98c008b4 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 29 Mar 2024 16:06:23 +0530 Subject: [PATCH 3/7] Support indexing of GitHub PURLs Signed-off-by: Keshav Priyadarshi --- minecode/visitors/github.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/minecode/visitors/github.py b/minecode/visitors/github.py index 5fa42193..4e2a92db 100644 --- a/minecode/visitors/github.py +++ b/minecode/visitors/github.py @@ -18,9 +18,11 @@ from github.Download import Download from packageurl import PackageURL +from minecode import priority_router from minecode import visit_router, seed from minecode.visitors import HttpJsonVisitor from minecode.visitors import URI +from minecode.visitors.generic import map_fetchcode_supported_package logger = logging.getLogger(__name__) @@ -179,3 +181,24 @@ def json_serial_date_obj(obj): """JSON serializer for date object""" if obj and isinstance(obj, (datetime, date)): return obj.isoformat() + + +@priority_router.route('pkg:github/.*') +def process_request_dir_listed(purl_str): + """ + Process `priority_resource_uri` containing a GitHub Package URL (PURL). + + This involves obtaining Package information for the PURL using + https://github.com/nexB/fetchcode and using it to create a new + PackageDB entry. The package is then added to the scan queue afterwards. + """ + try: + package_url = PackageURL.from_string(purl_str) + except ValueError as e: + error = f"error occurred when parsing {purl_str}: {e}" + return error + + error_msg = map_fetchcode_supported_package(package_url) + + if error_msg: + return error_msg From 5b79afd2788ccaa9a8f5c7de7d1ccc0ac5d9a5be Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 29 Mar 2024 16:32:59 +0530 Subject: [PATCH 4/7] Add test for map_fetchcode_supported_package Signed-off-by: Keshav Priyadarshi --- minecode/tests/test_generic.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/minecode/tests/test_generic.py b/minecode/tests/test_generic.py index 425837ff..466fe436 100644 --- a/minecode/tests/test_generic.py +++ b/minecode/tests/test_generic.py @@ -60,6 +60,25 @@ def test_map_generic_package(self): self.assertEqual('1.0.0', package.version) self.assertEqual('http://example.com/test.tar.gz', package.download_url) + def test_map_fetchcode_supported_package(self): + package_count = Package.objects.all().count() + self.assertEqual(0, package_count) + + purl = PackageURL.from_string("pkg:generic/udhcp@0.9.1") + error_msg = generic.map_fetchcode_supported_package(purl) + + self.assertEqual('', error_msg) + package_count = Package.objects.all().count() + self.assertEqual(1, package_count) + + package = Package.objects.first() + self.assertEqual("udhcp", package.name) + self.assertEqual("0.9.1", package.version) + self.assertEqual( + "https://web.archive.org/web/20021209021312/http://udhcp.busybox.net/source//udhcp-0.9.1.tar.gz", + package.download_url, + ) + def test_process_request_fetchcode_generic(self): package_count = Package.objects.all().count() self.assertEqual(0, package_count) From 0e8b6a50f39e604ad57ee91e10cca7a0150b157a Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 29 Mar 2024 16:40:12 +0530 Subject: [PATCH 5/7] Update supported ecosystem Signed-off-by: Keshav Priyadarshi --- packagedb/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packagedb/api.py b/packagedb/api.py index d778a7a8..f1d46086 100644 --- a/packagedb/api.py +++ b/packagedb/api.py @@ -726,7 +726,7 @@ def _reindex_package(package, reindexed_packages): reindexed_packages = [] requeued_packages = [] - supported_ecosystems = ['maven', 'npm', 'deb'] + supported_ecosystems = ['maven', 'npm', 'deb', 'generic', 'gnu', 'openssl', 'github', 'conan'] unique_packages, unsupported_packages, unsupported_vers = get_resolved_packages(packages, supported_ecosystems) From a4d5d984364af1ba3f845cb28564cc1818897f00 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 29 Mar 2024 20:37:42 +0530 Subject: [PATCH 6/7] Add instruction to provide GitHub API key Signed-off-by: Keshav Priyadarshi --- minecode/visitors/generic.py | 3 ++- minecode/visitors/github.py | 2 ++ minecode/visitors/openssl.py | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/minecode/visitors/generic.py b/minecode/visitors/generic.py index a72de655..6011209f 100644 --- a/minecode/visitors/generic.py +++ b/minecode/visitors/generic.py @@ -153,7 +153,8 @@ def map_fetchcode_supported_package(package_url): "pkg:generic/erofs-utils@.*", ] - +# Indexing some generic PURLs requires a GitHub API token. +# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route(*GENERIC_FETCHCODE_SUPPORTED_PURLS) def process_request_fetchcode_generic(purl_str): """ diff --git a/minecode/visitors/github.py b/minecode/visitors/github.py index 4e2a92db..522f8875 100644 --- a/minecode/visitors/github.py +++ b/minecode/visitors/github.py @@ -183,6 +183,8 @@ def json_serial_date_obj(obj): return obj.isoformat() +# Indexing GitHub PURLs requires a GitHub API token. +# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route('pkg:github/.*') def process_request_dir_listed(purl_str): """ diff --git a/minecode/visitors/openssl.py b/minecode/visitors/openssl.py index 8b93c864..390de77b 100644 --- a/minecode/visitors/openssl.py +++ b/minecode/visitors/openssl.py @@ -91,7 +91,8 @@ def get_uris(self, content): else: yield URI(uri=url, source_uri=self.uri, date=date, size=size) - +# Indexing OpenSSL PURLs requires a GitHub API token. +# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`. @priority_router.route('pkg:openssl/openssl@.*') def process_request_dir_listed(purl_str): """ From 707c61f9668a3a78173d87a3b60c23853072b96a Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 1 Apr 2024 22:39:15 +0530 Subject: [PATCH 7/7] Update README to provide GitHub API key in `.env` file Signed-off-by: Keshav Priyadarshi --- README.rst | 6 ++++++ requirements.txt | 2 +- setup.cfg | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 538b5fd4..4a94f39a 100644 --- a/README.rst +++ b/README.rst @@ -42,6 +42,12 @@ Once the prerequisites have been installed, set up PurlDB with the following com make postgres make postgres_matchcodeio +Indexing some PURLs requires a GitHub API token. Please add your GitHub API key to the `.env` file +:: + + GH_TOKEN=your-github-api + + Once PurlDB and the database has been set up, run tests to ensure functionality: :: diff --git a/requirements.txt b/requirements.txt index 6a98407d..2822cc43 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,7 +42,7 @@ extractcode==31.0.0 extractcode-7z==16.5.210531 extractcode-libarchive==3.5.1.210531 fasteners==0.19 -fetchcode==0.5.0 +fetchcode==0.5.1 fetchcode-container==1.2.3.210512 fingerprints==1.2.3 fontawesomefree==6.5.1 diff --git a/setup.cfg b/setup.cfg index 15f0c7bf..ece63023 100644 --- a/setup.cfg +++ b/setup.cfg @@ -45,7 +45,7 @@ install_requires = djangorestframework == 3.15.0 django-filter == 24.1 drf-spectacular == 0.26.5 - fetchcode == 0.5.0 + fetchcode == 0.5.1 gunicorn == 21.2.0 ftputil == 5.0.4 jawa == 2.2.0