Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cocoapods support to package.py #119

Merged
merged 13 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 107 additions & 53 deletions src/fetchcode/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,19 @@
from fetchcode.package_util import GitHubSource
from fetchcode.package_util import MiniupnpPackagesGitHubSource
from fetchcode.package_util import OpenSSLGitHubSource
from fetchcode.package_util import construct_cocoapods_package
from fetchcode.package_util import get_cocoapod_tags
from fetchcode.packagedcode_models import Package
from fetchcode.utils import get_hashed_path
from fetchcode.utils import get_response

router = Router()


def info(url):
"""
Return data according to the `url` string
`url` string can be purl too
Return package metadata for a URL or PURL.
Return None if there is no URL, or the URL or PURL is not supported.
"""
if url:
try:
Expand Down Expand Up @@ -83,13 +86,7 @@ def get_cargo_data_from_purl(purl):
crate = response.get("crate") or {}
homepage_url = crate.get("homepage")
code_view_url = crate.get("repository")
yield Package(
homepage_url=homepage_url,
api_url=api_url,
code_view_url=code_view_url,
download_url=download_url,
**purl.to_dict(),
)

versions = response.get("versions", [])
for version in versions:
version_purl = PackageURL(type=purl.type, name=name, version=version.get("num"))
Expand All @@ -100,6 +97,9 @@ def get_cargo_data_from_purl(purl):
download_url = None
declared_license = version.get("license")

if purl.version and version_purl.version != purl.version:
continue

yield Package(
homepage_url=homepage_url,
api_url=api_url,
Expand All @@ -109,6 +109,9 @@ def get_cargo_data_from_purl(purl):
**version_purl.to_dict(),
)

if purl.version:
break


@router.route("pkg:npm/.*")
def get_npm_data_from_purl(purl):
Expand All @@ -120,39 +123,30 @@ def get_npm_data_from_purl(purl):
name = purl.name
version = purl.version
api_url = f"{base_path}/{name}"

response = get_response(api_url)
vcs_data = response.get("repository") or {}
bugs = response.get("bugs") or {}

download_url = f"{base_path}/{name}/-/{name}-{version}.tgz" if version else None
vcs_url = vcs_data.get("url")
bug_tracking_url = bugs.get("url")
license = response.get("license")
homepage_url = response.get("homepage")

yield Package(
homepage_url=homepage_url,
api_url=api_url,
vcs_url=vcs_url,
bug_tracking_url=bug_tracking_url,
download_url=download_url,
declared_license=license,
**purl.to_dict(),
)

versions = response.get("versions", [])
tags = []
for num in versions:
version = versions[num]
version_purl = PackageURL(type=purl.type, name=name, version=version.get("version"))
repository = version.get("repository") or {}
bugs = response.get("bugs") or {}
dist = version.get("dist") or {}
licenses = version.get("licenses") or [{}]
vcs_url = repository.get("url")
download_url = dist.get("tarball")
bug_tracking_url = bugs.get("url")
declared_license = licenses[0].get("type")
declared_license = license

if purl.version and version_purl.version != purl.version:
continue

yield Package(
homepage_url=homepage_url,
Expand All @@ -164,6 +158,9 @@ def get_npm_data_from_purl(purl):
**version_purl.to_dict(),
)

if purl.version:
break


@router.route("pkg:pypi/.*")
def get_pypi_data_from_purl(purl):
Expand All @@ -172,6 +169,7 @@ def get_pypi_data_from_purl(purl):
"""
purl = PackageURL.from_string(purl)
name = purl.name

base_path = "https://pypi.org/pypi"
api_url = f"{base_path}/{name}/json"
response = get_response(api_url)
Expand All @@ -182,19 +180,14 @@ def get_pypi_data_from_purl(purl):
project_urls = info.get("project_urls") or {}
code_view_url = get_pypi_codeview_url(project_urls)
bug_tracking_url = get_pypi_bugtracker_url(project_urls)
yield Package(
homepage_url=homepage_url,
api_url=api_url,
bug_tracking_url=bug_tracking_url,
code_view_url=code_view_url,
declared_license=license,
**purl.to_dict(),
)

for num in releases:
version_purl = PackageURL(type=purl.type, name=name, version=num)
release = releases.get(num) or [{}]
release = release[0]
download_url = release.get("url")
if purl.version and version_purl.version != purl.version:
continue
yield Package(
homepage_url=homepage_url,
api_url=api_url,
Expand All @@ -205,6 +198,9 @@ def get_pypi_data_from_purl(purl):
**version_purl.to_dict(),
)

if purl.version:
break


@router.route("pkg:github/.*")
def get_github_data_from_purl(purl):
Expand Down Expand Up @@ -291,24 +287,24 @@ def get_bitbucket_data_from_purl(purl):
bitbucket_url = "https://bitbucket.org"
bug_tracking_url = f"{bitbucket_url}/{namespace}/{name}/issues"
code_view_url = f"{bitbucket_url}/{namespace}/{name}"
yield Package(
api_url=api_url,
bug_tracking_url=bug_tracking_url,
code_view_url=code_view_url,
**purl.to_dict(),
)

links = response.get("links") or {}
tags_url = links.get("tags") or {}
tags_url = tags_url.get("href")
if not tags_url:
return []
tags_data = get_response(tags_url)
tags = tags_data.get("values") or {}

for tag in tags:
version = tag.get("name") or ""
version_purl = PackageURL(type=purl.type, namespace=namespace, name=name, version=version)
download_url = f"{base_path}/{namespace}/{name}/downloads/{name}-{version}.tar.gz"
code_view_url = f"{bitbucket_url}/{namespace}/{name}/src/{version}"

if purl.version and version_purl.version != purl.version:
continue

yield Package(
api_url=api_url,
bug_tracking_url=bug_tracking_url,
Expand All @@ -317,6 +313,9 @@ def get_bitbucket_data_from_purl(purl):
**version_purl.to_dict(),
)

if purl.version:
break


@router.route("pkg:rubygems/.*")
def get_rubygems_data_from_purl(purl):
Expand All @@ -325,22 +324,38 @@ def get_rubygems_data_from_purl(purl):
"""
purl = PackageURL.from_string(purl)
name = purl.name
api_url = f"https://rubygems.org/api/v1/gems/{name}.json"
response = get_response(api_url)
declared_license = response.get("licenses") or None
homepage_url = response.get("homepage_uri")
code_view_url = response.get("source_code_uri")
bug_tracking_url = response.get("bug_tracker_uri")
download_url = response.get("gem_uri")
yield Package(
homepage_url=homepage_url,
api_url=api_url,
bug_tracking_url=bug_tracking_url,
code_view_url=code_view_url,
declared_license=declared_license,
download_url=download_url,
**purl.to_dict(),
)
all_versions_url = f"https://rubygems.org/api/v1/versions/{name}.json"
all_versions = get_response(all_versions_url)

for vers in all_versions:
version_purl = PackageURL(type=purl.type, name=name, version=vers.get("number"))

if purl.version and version_purl.version != purl.version:
continue

number = vers.get("number")
version_api = f"https://rubygems.org/api/v2/rubygems/{name}/versions/{number}.json"
version_api_response = get_response(version_api)
declared_license = version_api_response.get("licenses") or None
homepage_url = version_api_response.get("homepage_uri")
code_view_url = version_api_response.get("source_code_uri")
bug_tracking_url = version_api_response.get("bug_tracker_uri")
download_url = version_api_response.get("gem_uri")
repository_homepage_url = version_api_response.get("project_uri")

yield Package(
homepage_url=homepage_url,
api_url=version_api,
bug_tracking_url=bug_tracking_url,
code_view_url=code_view_url,
declared_license=declared_license,
download_url=download_url,
repository_homepage_url=repository_homepage_url,
**version_purl.to_dict(),
)

if purl.version:
break


@router.route("pkg:gnu/.*")
Expand All @@ -354,6 +369,45 @@ def get_gnu_data_from_purl(purl):
yield from extract_packages_from_listing(purl, source_archive_url, version_regex, [])


@router.route("pkg:cocoapods/.*")
def get_cocoapods_data_from_purl(purl):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@johnmhoran after refactoring get_cocoapods_data_from_purl into multiple functions, please put those functions in package_util.py and only keep the top-level get_cocoapods_data_from_purl function in package.py file

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @keshav-space -- I was wondering about that, given how the other existing, relatively short @router.route() functions in package.py have related functions in both package_util.py and utils.py. I've already added a handful of utilities to utils.py for cocoapods support (siblings of existing utilities, but these do not throw exceptions because that stops the purlcli metadata command, which we don't want to do) and will do as you suggest with the now 4 additional functions for cocoapods created by my almost-finished refactoring. And then I have 3 or 4 mock tests to create.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@keshav-space Moving these related functions to package_util.py raises one question: in order to facilitate the collection and sharing of cocoapods data from a number of different sources, I've created a dictionary at the top of package.py which all functions can access. When I move some functions to package_util.py, will continued access be as simple as importing that dictionary from package.py into package_util.py? That's my plan atm.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@keshav-space I am having trouble importing and accessing in package_util.py the logger I've defined and use widely in my package.py code. I'll dig into this soon, but meanwhile, do you have any guidance on how to share a logging function -- this prints to screen and to the "errors"/"warnings" keys in the JSON output. I now import in package_util.py with from fetchcode.package import logger but get this error running metadata:

(venv) Wed May 01, 2024 08:33 AM  /home/jmh/dev/nexb/purldb jmh (365-update-cocoapods-pypi-support)
$ python -m purldb_toolkit.purlcli metadata --purl pkg:cocoapods/BoringSSL@10.0.6 --output -
Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/jmh/dev/nexb/purldb/purldb-toolkit/src/purldb_toolkit/purlcli.py", line 19, in <module>
    from fetchcode.package import info
  File "/home/jmh/dev/nexb/fetchcode/src/fetchcode/package.py", line 32, in <module>
    from fetchcode.package_util import GITHUB_SOURCE_BY_PACKAGE
  File "/home/jmh/dev/nexb/fetchcode/src/fetchcode/package_util.py", line 25, in <module>
    from fetchcode.package import logger
ImportError: cannot import name 'logger' from partially initialized module 'fetchcode.package' (most likely due to a circular import) (/home/jmh/dev/nexb/fetchcode/src/fetchcode/package.py)

(venv) Wed May 01, 2024 08:48 AM  /home/jmh/dev/nexb/purldb jmh (365-update-cocoapods-pypi-support)
$

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@johnmhoran please don't share the same logger across different files. Define a new logger for package_util.py and avoid any circular dependencies i.e. don't import anything from package.py in package_util.py. The error above is due to a circular dependency.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @keshav-space . I've defined the logger in each of package.py and package_util.py (configured in get_cocoapods_data_from_purl()), and have defined the pod_summary dictionary in package_util.py and import it into package.py (pod_summary is shared among functions in both files), and everything seems to still work as desired. 👍

purl = PackageURL.from_string(purl)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Put this in try/except block, given input may not be a valid PURL

Copy link
Member Author

@johnmhoran johnmhoran Apr 29, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you @TG1999 . I'm in the midst of refactoring but will add this to the updated code. One note: there are nearly a dozen other uses of that same syntax by other supported PURL types in package.py and none uses a try/except (but perhaps should?).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TG1999 On second thought, purldb-toolkit's purlcli.py already handles invalid PURL inputs by checking the validate endpoint (including for the metadata command, which is the command that calls the fetchcode package.py info() function) and prints a warning to the output JSON warnings list, so I don't think a try/except is needed in the package.py cocoapods code. E.g.,

(venv) Mon Apr 29, 2024 12:25 PM  /home/jmh/dev/nexb/purldb jmh (365-update-cocoapods-pypi-support)
$ python -m purldb_toolkit.purlcli metadata --purl pkg:cocoapods/# --output -
{
    "headers": [
        {
            "tool_name": "purlcli",
            "tool_version": "0.2.0",
            "options": {
                "command": "metadata",
                "--purl": [
                    "pkg:cocoapods/#"
                ],
                "--file": null,
                "--output": "<stdout>"
            },
            "purls": [
                "pkg:cocoapods/#"
            ],
            "errors": [],
            "warnings": [
                "'pkg:cocoapods/#' not valid"
            ]
        }
    ],
    "packages": []
}
(venv) Mon Apr 29, 2024 12:29 PM  /home/jmh/dev/nexb/purldb jmh (365-update-cocoapods-pypi-support)
$

name = purl.name
cocoapods_org_url = f"https://cocoapods.org/pods/{name}"
api = "https://cdn.cocoapods.org"
hashed_path = get_hashed_path(name)
hashed_path_underscore = hashed_path.replace("/", "_")
file_prefix = "all_pods_versions_"
spec = f"{api}/{file_prefix}{hashed_path_underscore}.txt"
data_list = get_cocoapod_tags(spec, name)

for tag in data_list:
version_purl = PackageURL(type=purl.type, name=name, version=tag)
if purl.version and version_purl.version != purl.version:
continue

gh_repo_owner = None
gh_repo_name = name
podspec_api_url = f"https://raw.githubusercontent.com/CocoaPods/Specs/master/Specs/{hashed_path}/{name}/{tag}/{name}.podspec.json"
podspec_api_response = get_response(podspec_api_url)
podspec_homepage = podspec_api_response.get("homepage")

if podspec_homepage.startswith("https://github.com/"):
podspec_homepage_remove_gh_prefix = podspec_homepage.replace("https://github.com/", "")
podspec_homepage_split = podspec_homepage_remove_gh_prefix.split("/")
gh_repo_owner = podspec_homepage_split[0]
gh_repo_name = podspec_homepage_split[-1]

tag_pkg = construct_cocoapods_package(
version_purl, name, hashed_path, cocoapods_org_url, gh_repo_owner, gh_repo_name, tag
)

yield tag_pkg

if purl.version:
break


@dataclasses.dataclass
class DirectoryListedSource:
source_url: str = dataclasses.field(
Expand Down
98 changes: 98 additions & 0 deletions src/fetchcode/package_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,3 +289,101 @@ def get_package_info(cls, gh_purl, package_name):
# Since there will be no new releases of ipkg, it's better to
# store them in a dictionary rather than fetching them every time.
IPKG_RELEASES = json.loads((DATA / "ipkg_releases.json").read_text(encoding="UTF-8"))


def get_cocoapod_tags(spec, name):
try:
response = utils.get_text_response(spec)
data = response.strip()
for line in data.splitlines():
line = line.strip()
if line.startswith(name):
data_list = line.split("/")
if data_list[0] == name:
data_list.pop(0)
return data_list
return None
except:
return None


def construct_cocoapods_package(
purl, name, hashed_path, cocoapods_org_url, gh_repo_owner, gh_repo_name, tag
):
name = name
homepage_url = None
vcs_url = None
github_url = None
bug_tracking_url = None
code_view_url = None
license_data = None
declared_license = None
primary_language = None

if gh_repo_owner and gh_repo_name:
base_path = "https://api.github.com/repos"
api_url = f"{base_path}/{gh_repo_owner}/{gh_repo_name}"
gh_repo_api_response = utils.get_github_rest(api_url)
gh_repo_api_head_request = utils.make_head_request(api_url)
gh_repo_api_status_code = gh_repo_api_head_request.status_code

if gh_repo_api_status_code == 200:
homepage_url = gh_repo_api_response.get("homepage")
vcs_url = gh_repo_api_response.get("git_url")
license_data = gh_repo_api_response.get("license") or {}
declared_license = license_data.get("spdx_id")
primary_language = gh_repo_api_response.get("language")

github_url = "https://github.com"
bug_tracking_url = f"{github_url}/{gh_repo_owner}/{gh_repo_name}/issues"
code_view_url = f"{github_url}/{gh_repo_owner}/{gh_repo_name}"

podspec_api_url = f"https://raw.githubusercontent.com/CocoaPods/Specs/master/Specs/{hashed_path}/{name}/{tag}/{name}.podspec.json"
podspec_api_response = utils.get_response(podspec_api_url)
homepage_url = podspec_api_response.get("homepage")

lic = podspec_api_response.get("license")
extracted_license_statement = None
if isinstance(lic, dict):
extracted_license_statement = lic
else:
extracted_license_statement = lic
if not declared_license:
declared_license = extracted_license_statement

source = podspec_api_response.get("source")
download_url = None
if isinstance(source, dict):
git_url = source.get("git", "")
http_url = source.get("http", "")
if http_url:
download_url = http_url
if git_url and not http_url:
if git_url.endswith(".git") and git_url.startswith("https://github.com/"):
gh_path = git_url[:-4]
github_tag = source.get("tag")
if github_tag and github_tag.startswith("v"):
tag = github_tag
download_url = f"{gh_path}/archive/refs/tags/{tag}.tar.gz"
vcs_url = git_url
elif git_url:
vcs_url = git_url
elif isinstance(source, str):
if not vcs_url:
vcs_url = source

purl_pkg = Package(
homepage_url=homepage_url,
api_url=podspec_api_url,
bug_tracking_url=bug_tracking_url,
code_view_url=code_view_url,
download_url=download_url,
declared_license=declared_license,
primary_language=primary_language,
repository_homepage_url=cocoapods_org_url,
vcs_url=vcs_url,
**purl.to_dict(),
)
purl_pkg.version = tag

return purl_pkg
Loading
Loading