-
Notifications
You must be signed in to change notification settings - Fork 18
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add cocoapods support to package.py #119
Changes from all commits
3768d2e
801e740
3b01c28
536f5ed
1f5ff88
666e594
9b3a113
82f2a1e
1eb1bc3
5226943
550c654
e1d9073
3b38ed8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,16 +32,19 @@ | |
from fetchcode.package_util import GitHubSource | ||
from fetchcode.package_util import MiniupnpPackagesGitHubSource | ||
from fetchcode.package_util import OpenSSLGitHubSource | ||
from fetchcode.package_util import construct_cocoapods_package | ||
from fetchcode.package_util import get_cocoapod_tags | ||
from fetchcode.packagedcode_models import Package | ||
from fetchcode.utils import get_hashed_path | ||
from fetchcode.utils import get_response | ||
|
||
router = Router() | ||
|
||
|
||
def info(url): | ||
""" | ||
Return data according to the `url` string | ||
`url` string can be purl too | ||
Return package metadata for a URL or PURL. | ||
Return None if there is no URL, or the URL or PURL is not supported. | ||
""" | ||
if url: | ||
try: | ||
|
@@ -83,13 +86,7 @@ def get_cargo_data_from_purl(purl): | |
crate = response.get("crate") or {} | ||
homepage_url = crate.get("homepage") | ||
code_view_url = crate.get("repository") | ||
yield Package( | ||
homepage_url=homepage_url, | ||
api_url=api_url, | ||
code_view_url=code_view_url, | ||
download_url=download_url, | ||
**purl.to_dict(), | ||
) | ||
|
||
versions = response.get("versions", []) | ||
for version in versions: | ||
version_purl = PackageURL(type=purl.type, name=name, version=version.get("num")) | ||
|
@@ -100,6 +97,9 @@ def get_cargo_data_from_purl(purl): | |
download_url = None | ||
declared_license = version.get("license") | ||
|
||
if purl.version and version_purl.version != purl.version: | ||
continue | ||
|
||
yield Package( | ||
homepage_url=homepage_url, | ||
api_url=api_url, | ||
|
@@ -109,6 +109,9 @@ def get_cargo_data_from_purl(purl): | |
**version_purl.to_dict(), | ||
) | ||
|
||
if purl.version: | ||
break | ||
|
||
|
||
@router.route("pkg:npm/.*") | ||
def get_npm_data_from_purl(purl): | ||
|
@@ -120,39 +123,30 @@ def get_npm_data_from_purl(purl): | |
name = purl.name | ||
version = purl.version | ||
api_url = f"{base_path}/{name}" | ||
|
||
response = get_response(api_url) | ||
vcs_data = response.get("repository") or {} | ||
bugs = response.get("bugs") or {} | ||
|
||
download_url = f"{base_path}/{name}/-/{name}-{version}.tgz" if version else None | ||
vcs_url = vcs_data.get("url") | ||
bug_tracking_url = bugs.get("url") | ||
license = response.get("license") | ||
homepage_url = response.get("homepage") | ||
|
||
yield Package( | ||
homepage_url=homepage_url, | ||
api_url=api_url, | ||
vcs_url=vcs_url, | ||
bug_tracking_url=bug_tracking_url, | ||
download_url=download_url, | ||
declared_license=license, | ||
**purl.to_dict(), | ||
) | ||
|
||
versions = response.get("versions", []) | ||
tags = [] | ||
for num in versions: | ||
version = versions[num] | ||
version_purl = PackageURL(type=purl.type, name=name, version=version.get("version")) | ||
repository = version.get("repository") or {} | ||
bugs = response.get("bugs") or {} | ||
dist = version.get("dist") or {} | ||
licenses = version.get("licenses") or [{}] | ||
vcs_url = repository.get("url") | ||
download_url = dist.get("tarball") | ||
bug_tracking_url = bugs.get("url") | ||
declared_license = licenses[0].get("type") | ||
declared_license = license | ||
|
||
if purl.version and version_purl.version != purl.version: | ||
continue | ||
|
||
yield Package( | ||
homepage_url=homepage_url, | ||
|
@@ -164,6 +158,9 @@ def get_npm_data_from_purl(purl): | |
**version_purl.to_dict(), | ||
) | ||
|
||
if purl.version: | ||
break | ||
|
||
|
||
@router.route("pkg:pypi/.*") | ||
def get_pypi_data_from_purl(purl): | ||
|
@@ -172,6 +169,7 @@ def get_pypi_data_from_purl(purl): | |
""" | ||
purl = PackageURL.from_string(purl) | ||
name = purl.name | ||
|
||
base_path = "https://pypi.org/pypi" | ||
api_url = f"{base_path}/{name}/json" | ||
response = get_response(api_url) | ||
|
@@ -182,19 +180,14 @@ def get_pypi_data_from_purl(purl): | |
project_urls = info.get("project_urls") or {} | ||
code_view_url = get_pypi_codeview_url(project_urls) | ||
bug_tracking_url = get_pypi_bugtracker_url(project_urls) | ||
yield Package( | ||
homepage_url=homepage_url, | ||
api_url=api_url, | ||
bug_tracking_url=bug_tracking_url, | ||
code_view_url=code_view_url, | ||
declared_license=license, | ||
**purl.to_dict(), | ||
) | ||
|
||
for num in releases: | ||
version_purl = PackageURL(type=purl.type, name=name, version=num) | ||
release = releases.get(num) or [{}] | ||
release = release[0] | ||
download_url = release.get("url") | ||
if purl.version and version_purl.version != purl.version: | ||
continue | ||
yield Package( | ||
homepage_url=homepage_url, | ||
api_url=api_url, | ||
|
@@ -205,6 +198,9 @@ def get_pypi_data_from_purl(purl): | |
**version_purl.to_dict(), | ||
) | ||
|
||
if purl.version: | ||
break | ||
|
||
|
||
@router.route("pkg:github/.*") | ||
def get_github_data_from_purl(purl): | ||
|
@@ -291,24 +287,24 @@ def get_bitbucket_data_from_purl(purl): | |
bitbucket_url = "https://bitbucket.org" | ||
bug_tracking_url = f"{bitbucket_url}/{namespace}/{name}/issues" | ||
code_view_url = f"{bitbucket_url}/{namespace}/{name}" | ||
yield Package( | ||
api_url=api_url, | ||
bug_tracking_url=bug_tracking_url, | ||
code_view_url=code_view_url, | ||
**purl.to_dict(), | ||
) | ||
|
||
links = response.get("links") or {} | ||
tags_url = links.get("tags") or {} | ||
tags_url = tags_url.get("href") | ||
if not tags_url: | ||
return [] | ||
tags_data = get_response(tags_url) | ||
tags = tags_data.get("values") or {} | ||
|
||
for tag in tags: | ||
version = tag.get("name") or "" | ||
version_purl = PackageURL(type=purl.type, namespace=namespace, name=name, version=version) | ||
download_url = f"{base_path}/{namespace}/{name}/downloads/{name}-{version}.tar.gz" | ||
code_view_url = f"{bitbucket_url}/{namespace}/{name}/src/{version}" | ||
|
||
if purl.version and version_purl.version != purl.version: | ||
continue | ||
|
||
yield Package( | ||
api_url=api_url, | ||
bug_tracking_url=bug_tracking_url, | ||
|
@@ -317,6 +313,9 @@ def get_bitbucket_data_from_purl(purl): | |
**version_purl.to_dict(), | ||
) | ||
|
||
if purl.version: | ||
break | ||
|
||
|
||
@router.route("pkg:rubygems/.*") | ||
def get_rubygems_data_from_purl(purl): | ||
|
@@ -325,22 +324,38 @@ def get_rubygems_data_from_purl(purl): | |
""" | ||
purl = PackageURL.from_string(purl) | ||
name = purl.name | ||
api_url = f"https://rubygems.org/api/v1/gems/{name}.json" | ||
response = get_response(api_url) | ||
declared_license = response.get("licenses") or None | ||
homepage_url = response.get("homepage_uri") | ||
code_view_url = response.get("source_code_uri") | ||
bug_tracking_url = response.get("bug_tracker_uri") | ||
download_url = response.get("gem_uri") | ||
yield Package( | ||
homepage_url=homepage_url, | ||
api_url=api_url, | ||
bug_tracking_url=bug_tracking_url, | ||
code_view_url=code_view_url, | ||
declared_license=declared_license, | ||
download_url=download_url, | ||
**purl.to_dict(), | ||
) | ||
all_versions_url = f"https://rubygems.org/api/v1/versions/{name}.json" | ||
all_versions = get_response(all_versions_url) | ||
|
||
for vers in all_versions: | ||
version_purl = PackageURL(type=purl.type, name=name, version=vers.get("number")) | ||
|
||
if purl.version and version_purl.version != purl.version: | ||
continue | ||
|
||
number = vers.get("number") | ||
version_api = f"https://rubygems.org/api/v2/rubygems/{name}/versions/{number}.json" | ||
version_api_response = get_response(version_api) | ||
declared_license = version_api_response.get("licenses") or None | ||
homepage_url = version_api_response.get("homepage_uri") | ||
code_view_url = version_api_response.get("source_code_uri") | ||
bug_tracking_url = version_api_response.get("bug_tracker_uri") | ||
download_url = version_api_response.get("gem_uri") | ||
repository_homepage_url = version_api_response.get("project_uri") | ||
|
||
yield Package( | ||
homepage_url=homepage_url, | ||
api_url=version_api, | ||
bug_tracking_url=bug_tracking_url, | ||
code_view_url=code_view_url, | ||
declared_license=declared_license, | ||
download_url=download_url, | ||
repository_homepage_url=repository_homepage_url, | ||
**version_purl.to_dict(), | ||
) | ||
|
||
if purl.version: | ||
break | ||
|
||
|
||
@router.route("pkg:gnu/.*") | ||
|
@@ -354,6 +369,45 @@ def get_gnu_data_from_purl(purl): | |
yield from extract_packages_from_listing(purl, source_archive_url, version_regex, []) | ||
|
||
|
||
@router.route("pkg:cocoapods/.*") | ||
def get_cocoapods_data_from_purl(purl): | ||
purl = PackageURL.from_string(purl) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Put this in try/except block, given input may not be a valid PURL There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you @TG1999 . I'm in the midst of refactoring but will add this to the updated code. One note: there are nearly a dozen other uses of that same syntax by other supported PURL types in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @TG1999 On second thought, purldb-toolkit's
|
||
name = purl.name | ||
cocoapods_org_url = f"https://cocoapods.org/pods/{name}" | ||
api = "https://cdn.cocoapods.org" | ||
hashed_path = get_hashed_path(name) | ||
hashed_path_underscore = hashed_path.replace("/", "_") | ||
file_prefix = "all_pods_versions_" | ||
spec = f"{api}/{file_prefix}{hashed_path_underscore}.txt" | ||
data_list = get_cocoapod_tags(spec, name) | ||
|
||
for tag in data_list: | ||
version_purl = PackageURL(type=purl.type, name=name, version=tag) | ||
if purl.version and version_purl.version != purl.version: | ||
continue | ||
|
||
gh_repo_owner = None | ||
gh_repo_name = name | ||
podspec_api_url = f"https://raw.githubusercontent.com/CocoaPods/Specs/master/Specs/{hashed_path}/{name}/{tag}/{name}.podspec.json" | ||
podspec_api_response = get_response(podspec_api_url) | ||
podspec_homepage = podspec_api_response.get("homepage") | ||
|
||
if podspec_homepage.startswith("https://github.com/"): | ||
podspec_homepage_remove_gh_prefix = podspec_homepage.replace("https://github.com/", "") | ||
podspec_homepage_split = podspec_homepage_remove_gh_prefix.split("/") | ||
gh_repo_owner = podspec_homepage_split[0] | ||
gh_repo_name = podspec_homepage_split[-1] | ||
|
||
tag_pkg = construct_cocoapods_package( | ||
version_purl, name, hashed_path, cocoapods_org_url, gh_repo_owner, gh_repo_name, tag | ||
) | ||
|
||
yield tag_pkg | ||
|
||
if purl.version: | ||
break | ||
|
||
|
||
@dataclasses.dataclass | ||
class DirectoryListedSource: | ||
source_url: str = dataclasses.field( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@johnmhoran after refactoring
get_cocoapods_data_from_purl
into multiple functions, please put those functions inpackage_util.py
and only keep the top-levelget_cocoapods_data_from_purl
function inpackage.py
fileThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks @keshav-space -- I was wondering about that, given how the other existing, relatively short
@router.route()
functions inpackage.py
have related functions in bothpackage_util.py
andutils.py
. I've already added a handful of utilities toutils.py
for cocoapods support (siblings of existing utilities, but these do not throw exceptions because that stops the purlclimetadata
command, which we don't want to do) and will do as you suggest with the now 4 additional functions for cocoapods created by my almost-finished refactoring. And then I have 3 or 4 mock tests to create.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@keshav-space Moving these related functions to
package_util.py
raises one question: in order to facilitate the collection and sharing of cocoapods data from a number of different sources, I've created a dictionary at the top ofpackage.py
which all functions can access. When I move some functions topackage_util.py
, will continued access be as simple as importing that dictionary frompackage.py
intopackage_util.py
? That's my plan atm.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@keshav-space I am having trouble importing and accessing in
package_util.py
thelogger
I've defined and use widely in mypackage.py
code. I'll dig into this soon, but meanwhile, do you have any guidance on how to share a logging function -- this prints to screen and to the "errors"/"warnings" keys in the JSON output. I now import inpackage_util.py
withfrom fetchcode.package import logger
but get this error runningmetadata
:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@johnmhoran please don't share the same logger across different files. Define a new logger for
package_util.py
and avoid any circular dependencies i.e. don't import anything frompackage.py
inpackage_util.py
. The error above is due to a circular dependency.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks @keshav-space . I've defined the logger in each of
package.py
andpackage_util.py
(configured inget_cocoapods_data_from_purl()
), and have defined thepod_summary
dictionary inpackage_util.py
and import it intopackage.py
(pod_summary
is shared among functions in both files), and everything seems to still work as desired. 👍