Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

purl2sym: Support indexing of Batch3, Batch4 and Batch5 packages #360

Merged
merged 7 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ Once the prerequisites have been installed, set up PurlDB with the following com
make postgres
make postgres_matchcodeio

Indexing some PURLs requires a GitHub API token. Please add your GitHub API key to the `.env` file
::

GH_TOKEN=your-github-api


Once PurlDB and the database has been set up, run tests to ensure functionality:
::

Expand Down
23 changes: 21 additions & 2 deletions minecode/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,31 @@ def test_map_generic_package(self):
self.assertEqual('1.0.0', package.version)
self.assertEqual('http://example.com/test.tar.gz', package.download_url)

def test_process_request_dir_listed(self):
def test_map_fetchcode_supported_package(self):
package_count = Package.objects.all().count()
self.assertEqual(0, package_count)

purl = PackageURL.from_string("pkg:generic/udhcp@0.9.1")
error_msg = generic.map_fetchcode_supported_package(purl)

self.assertEqual('', error_msg)
package_count = Package.objects.all().count()
self.assertEqual(1, package_count)

package = Package.objects.first()
self.assertEqual("udhcp", package.name)
self.assertEqual("0.9.1", package.version)
self.assertEqual(
"https://web.archive.org/web/20021209021312/http://udhcp.busybox.net/source//udhcp-0.9.1.tar.gz",
package.download_url,
)

def test_process_request_fetchcode_generic(self):
package_count = Package.objects.all().count()
self.assertEqual(0, package_count)

purl = "pkg:generic/ipkg@0.99.33"
error_msg = generic.process_request_dir_listed(purl)
error_msg = generic.process_request_fetchcode_generic(purl)

self.assertEqual(None, error_msg)
package_count = Package.objects.all().count()
Expand Down
26 changes: 18 additions & 8 deletions minecode/visitors/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ def packagedata_from_dict(package_data):
return PackageData.from_data(cleaned_package_data)


def map_directory_listed_package(package_url):
def map_fetchcode_supported_package(package_url):
"""
Add a directory listed `package_url` to the PackageDB.
Add a `package_url` supported by fetchcode to the PackageDB.

Return an error string if any errors are encountered during the process
"""
Expand Down Expand Up @@ -121,7 +121,7 @@ def map_directory_listed_package(package_url):
return error


DIR_SUPPORTED_PURLS = [
GENERIC_FETCHCODE_SUPPORTED_PURLS = [
"pkg:generic/busybox@.*",
"pkg:generic/bzip2@.*",
"pkg:generic/dnsmasq@.*",
Expand All @@ -137,16 +137,26 @@ def map_directory_listed_package(package_url):
"pkg:generic/samba@.*",
"pkg:generic/syslinux@.*",
"pkg:generic/toybox@.*",
"pkg:generic/uclibc@@.*",
"pkg:generic/uclibc@.*",
"pkg:generic/uclibc-ng@.*",
"pkg:generic/util-linux@.*",
"pkg:generic/wpa_supplicant@.*",
"pkg:generic/ipkg@.*",
"pkg:generic/linux@.*",
"pkg:generic/mtd-utils@.*",
"pkg:generic/barebox@.*",
"pkg:generic/e2fsprogs@.*",
"pkg:generic/udhcp@.*",
"pkg:generic/miniupnpc@.*",
"pkg:generic/miniupnpd@.*",
"pkg:generic/minissdpd@.*",
"pkg:generic/erofs-utils@.*",
]


@priority_router.route(*DIR_SUPPORTED_PURLS)
def process_request_dir_listed(purl_str):
# Indexing some generic PURLs requires a GitHub API token.
JonoYang marked this conversation as resolved.
Show resolved Hide resolved
# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`.
@priority_router.route(*GENERIC_FETCHCODE_SUPPORTED_PURLS)
def process_request_fetchcode_generic(purl_str):
"""
Process `priority_resource_uri` containing a generic Package URL (PURL)
supported by fetchcode.
Expand All @@ -161,7 +171,7 @@ def process_request_dir_listed(purl_str):
error = f"error occurred when parsing {purl_str}: {e}"
return error

error_msg = map_directory_listed_package(package_url)
error_msg = map_fetchcode_supported_package(package_url)

if error_msg:
return error_msg
25 changes: 25 additions & 0 deletions minecode/visitors/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
from github.Download import Download
from packageurl import PackageURL

from minecode import priority_router
from minecode import visit_router, seed
from minecode.visitors import HttpJsonVisitor
from minecode.visitors import URI
from minecode.visitors.generic import map_fetchcode_supported_package


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -179,3 +181,26 @@ def json_serial_date_obj(obj):
"""JSON serializer for date object"""
if obj and isinstance(obj, (datetime, date)):
return obj.isoformat()


# Indexing GitHub PURLs requires a GitHub API token.
# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`.
@priority_router.route('pkg:github/.*')
def process_request_dir_listed(purl_str):
JonoYang marked this conversation as resolved.
Show resolved Hide resolved
"""
Process `priority_resource_uri` containing a GitHub Package URL (PURL).

This involves obtaining Package information for the PURL using
https://github.com/nexB/fetchcode and using it to create a new
PackageDB entry. The package is then added to the scan queue afterwards.
"""
try:
package_url = PackageURL.from_string(purl_str)
except ValueError as e:
error = f"error occurred when parsing {purl_str}: {e}"
return error

error_msg = map_fetchcode_supported_package(package_url)

if error_msg:
return error_msg
4 changes: 2 additions & 2 deletions minecode/visitors/gnu.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from packageurl import PackageURL

from minecode import priority_router
from minecode.visitors.generic import map_directory_listed_package
from minecode.visitors.generic import map_fetchcode_supported_package

logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
Expand All @@ -35,7 +35,7 @@ def process_request(purl_str):
if not package_url.version:
return

error_msg = map_directory_listed_package(package_url)
error_msg = map_fetchcode_supported_package(package_url)

if error_msg:
return error_msg
25 changes: 25 additions & 0 deletions minecode/visitors/openssl.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@
from commoncode import fileutils
from packageurl import PackageURL

from minecode import priority_router
from minecode import seed
from minecode import visit_router
from minecode.utils import is_int
from minecode.visitors import HttpVisitor
from minecode.visitors import URI
from minecode.visitors.generic import map_fetchcode_supported_package


class OpenSSLSeed(seed.Seeder):
Expand Down Expand Up @@ -88,3 +90,26 @@ def get_uris(self, content):
yield URI(uri=url, source_uri=self.uri, package_url=package_url, date=date, file_name=file_name, size=size)
else:
yield URI(uri=url, source_uri=self.uri, date=date, size=size)

# Indexing OpenSSL PURLs requires a GitHub API token.
# Please add your GitHub API key to the `.env` file, for example: `GH_TOKEN=your-github-api`.
@priority_router.route('pkg:openssl/openssl@.*')
JonoYang marked this conversation as resolved.
Show resolved Hide resolved
def process_request_dir_listed(purl_str):
"""
Process `priority_resource_uri` containing a OpenSSL Package URL (PURL)
supported by fetchcode.

This involves obtaining Package information for the PURL using
https://github.com/nexB/fetchcode and using it to create a new
PackageDB entry. The package is then added to the scan queue afterwards.
"""
try:
package_url = PackageURL.from_string(purl_str)
except ValueError as e:
error = f"error occurred when parsing {purl_str}: {e}"
return error

error_msg = map_fetchcode_supported_package(package_url)

if error_msg:
return error_msg
2 changes: 1 addition & 1 deletion packagedb/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@ def _reindex_package(package, reindexed_packages):
reindexed_packages = []
requeued_packages = []

supported_ecosystems = ['maven', 'npm', 'deb']
supported_ecosystems = ['maven', 'npm', 'deb', 'generic', 'gnu', 'openssl', 'github', 'conan']

unique_packages, unsupported_packages, unsupported_vers = get_resolved_packages(packages, supported_ecosystems)

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ extractcode==31.0.0
extractcode-7z==16.5.210531
extractcode-libarchive==3.5.1.210531
fasteners==0.19
fetchcode==0.4.0
fetchcode==0.5.1
fetchcode-container==1.2.3.210512
fingerprints==1.2.3
fontawesomefree==6.5.1
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ install_requires =
djangorestframework == 3.15.0
django-filter == 24.1
drf-spectacular == 0.26.5
fetchcode == 0.4.0
fetchcode == 0.5.1
gunicorn == 21.2.0
ftputil == 5.0.4
jawa == 2.2.0
Expand Down
Loading