Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update cocoapods podfile.lock parser #3827

Merged
merged 1 commit into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ v33.0.0 (next next, roadmap)
from nuget lockfile `packages.lock.json`.
See https://github.com/nexB/scancode-toolkit/pull/3825

- Add support for parsing resolved packages and dependency relationships
from cocoapods lockfile `Podfile.lock`.
See https://github.com/nexB/scancode-toolkit/pull/3827

v32.2.0 - 2024-06-19
----------------------

Expand Down
285 changes: 255 additions & 30 deletions src/packagedcode/cocoapods.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@

from packagedcode import models
from packagedcode import spec
from packagedcode import utils
from packagedcode.utils import get_base_purl
from packagedcode.utils import build_description

"""
Handle cocoapods packages manifests for macOS and iOS
Expand Down Expand Up @@ -232,7 +233,7 @@ def parse(cls, location, package_only=False):
extracted_license_statement = podspec.get('license')
summary = podspec.get('summary')
description = podspec.get('description')
description = utils.build_description(
description = build_description(
summary=summary,
description=description,
)
Expand Down Expand Up @@ -292,6 +293,96 @@ class PodfileLockHandler(BasePodHandler):
default_primary_language = 'Objective-C'
description = 'Cocoapods Podfile.lock'
documentation_url = 'https://guides.cocoapods.org/using/the-podfile.html'
is_lockfile = True

@classmethod
def get_pods_dependency_with_resolved_package(
cls,
dependency_data,
main_pod,
dependencies_for_resolved=[],
):
"""
Get a DependentPackage object with its resolved package and
dependencies from the `main_pod` string, with additional data
populated from the `PodfileLockDataByPurl` mappings.
"""
purl, xreq = parse_dep_requirements(main_pod)
base_purl = get_base_purl(purl.to_string())

resolved_package_mapping = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
primary_language=cls.default_primary_language,
namespace=purl.namespace,
name=purl.name,
version=purl.version,
dependencies=dependencies_for_resolved,
is_virtual=True,
)
resolved_package = models.PackageData.from_data(resolved_package_mapping)

checksum = dependency_data.checksum_by_base_purl.get(base_purl)
if checksum:
resolved_package.sha1 = checksum

is_direct = False
if base_purl in dependency_data.direct_dependency_purls:
is_direct = True

spec_repo = dependency_data.spec_by_base_purl.get(base_purl)
if spec_repo:
resolved_package.extra_data["spec_repo"] = spec_repo

external_source = dependency_data.external_sources_by_base_purl.get(base_purl)
if external_source:
resolved_package.extra_data["external_source"] = external_source

return models.DependentPackage(
purl=purl.to_string(),
# FIXME: why dev?
scope='requires',
AyanSinhaMahapatra marked this conversation as resolved.
Show resolved Hide resolved
extracted_requirement=xreq,
is_runtime=False,
is_optional=True,
is_resolved=True,
is_direct=is_direct,
resolved_package=resolved_package,
)

@classmethod
def get_dependencies_for_resolved_package(cls, dependency_data, dep_pods):
"""
Get the list of dependencies with versions and version requirements
for a cocoapods resolved package.
"""
dependencies_for_resolved = []
for dep_pod in dep_pods:
dep_purl, dep_xreq = parse_dep_requirements(dep_pod)
base_dep_purl = get_base_purl(dep_purl.to_string())

dep_version = dependency_data.versions_by_base_purl.get(base_dep_purl)
if dep_version:
purl_mapping = dep_purl.to_dict()
purl_mapping["version"] = dep_version
dep_purl = PackageURL(**purl_mapping)

if not dep_xreq:
dep_xreq = dep_version

dependency_for_resolved = models.DependentPackage(
purl=dep_purl.to_string(),
# FIXME: why dev?
scope='requires',
extracted_requirement=dep_xreq,
is_runtime=False,
is_optional=True,
is_resolved=True,
is_direct=True,
).to_dict()
dependencies_for_resolved.append(dependency_for_resolved)

return dependencies_for_resolved

@classmethod
def parse(cls, location, package_only=False):
Expand All @@ -301,52 +392,145 @@ def parse(cls, location, package_only=False):
with open(location) as pfl:
data = saneyaml.load(pfl)

pods = data['PODS']
dependency_data = PodfileLockDataByPurl.collect_dependencies_data_by_purl(
data=data,
package_type=cls.default_package_type,
)

dependencies = []

pods = data.get('PODS') or []
for pod in pods:
# dependencies with mappings have direct dependencies
if isinstance(pod, dict):
for main_pod, _dep_pods in pod.items():

purl, xreq = parse_dep_requirements(main_pod)

dependencies.append(
models.DependentPackage(
purl=str(purl),
# FIXME: why dev?
scope='requires',
extracted_requirement=xreq,
is_runtime=False,
is_optional=True,
is_resolved=True,
)
for main_pod, dep_pods in pod.items():
dependencies_for_resolved = cls.get_dependencies_for_resolved_package(
dependency_data=dependency_data,
dep_pods=dep_pods,
)
dependency = cls.get_pods_dependency_with_resolved_package(
dependency_data=dependency_data,
main_pod=main_pod,
dependencies_for_resolved=dependencies_for_resolved,
)
dependencies.append(dependency)

# These packages have no direct dependencies
elif isinstance(pod, str):

purl, xreq = parse_dep_requirements(pod)

dependencies.append(
models.DependentPackage(
purl=str(purl),
# FIXME: why dev?
scope='requires',
extracted_requirement=xreq,
is_runtime=False,
is_optional=True,
is_resolved=True,
)
dependency = cls.get_pods_dependency_with_resolved_package(
dependency_data, pod,
)
dependencies.append(dependency)

podfile_checksum = data.get('PODFILE CHECKSUM')
cocoapods_version = data.get('COCOAPODS')
extra_data = {
'cocoapods': cocoapods_version,
'podfile_checksum': podfile_checksum,
}

package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
primary_language=cls.default_primary_language,
dependencies=dependencies,
extra_data=extra_data,
)
yield models.PackageData.from_data(package_data, package_only)


class PodfileLockDataByPurl:
"""
Podfile.lock locskfiles contains information about its cocoapods
dependencies in multiple parallel lists by it's name.

These are:
- PODS : Dependency graph with resolved package versions, dependency
relationships and dependency requirements
- DEPENDENCIES : list of direct dependencies
- SPEC REPOS : location of spec repo having the package metadata podspec
- SPEC CHECKSUMS : sha1 checksums of the package
- CHECKOUT OPTIONS : the version control system info for the package with exact commit
- EXTERNAL SOURCES : External source for a package, locally, or in a external vcs repo

Additionally the resolved package version for dependencies are also only
present in the top-level, but not in the dependency relationships.

This class parses these information and stores them in mappings by purl.
"""

versions_by_base_purl = {}
direct_dependency_purls = []
spec_by_base_purl = {}
checksum_by_base_purl = {}
external_sources_by_base_purl = {}

@classmethod
def collect_dependencies_data_by_purl(cls, data, package_type):
"""
Parse and populate cocoapods dependency information by purl,
from the `data` mapping.
"""
dep_data = cls()

# collect versions of all dependencies
pods = data.get('PODS') or []
for pod in pods:
if isinstance(pod, dict):
for main_pod, _dep_pods in pod.items():
purl, xreq = parse_dep_requirements(main_pod)
base_purl = get_base_purl(purl.to_string())
dep_data.versions_by_base_purl[base_purl] = xreq

elif isinstance(pod, str):
purl, xreq = parse_dep_requirements(pod)
base_purl = get_base_purl(purl.to_string())
dep_data.versions_by_base_purl[base_purl] = xreq

direct_dependencies = data.get('DEPENDENCIES') or []
for direct_dep in direct_dependencies:
purl, _xreq = parse_dep_requirements(direct_dep)
base_purl = get_base_purl(purl.to_string())
dep_data.direct_dependency_purls.append(base_purl)

spec_repos = data.get('SPEC REPOS') or {}
for spec_repo, packages in spec_repos.items():
for package in packages:
purl, _xreq = parse_dep_requirements(package)
base_purl = get_base_purl(purl.to_string())
dep_data.spec_by_base_purl[base_purl] = spec_repo

checksums = data.get('SPEC CHECKSUMS') or {}
for name, checksum in checksums.items():
purl, _xreq = parse_dep_requirements(name)
base_purl = get_base_purl(purl.to_string())
dep_data.checksum_by_base_purl[base_purl] = checksum

checkout_options = data.get('CHECKOUT OPTIONS') or {}
for name, source in checkout_options.items():
processed_source = process_external_source(source)
base_purl = PackageURL(
type=package_type,
name=name,
).to_string()
dep_data.external_sources_by_base_purl[base_purl] = processed_source

external_sources = data.get('EXTERNAL SOURCES') or {}
for name, source in external_sources.items():
base_purl = PackageURL(
type=package_type,
name=name,
).to_string()

# `CHECKOUT OPTIONS` is more verbose than `EXTERNAL SOURCES`
if base_purl in dep_data.external_sources_by_base_purl:
continue
processed_source = process_external_source(source)
dep_data.external_sources_by_base_purl[base_purl] = processed_source

return dep_data


class PodspecJsonHandler(models.DatafileHandler):
datasource_id = 'cocoapods_podspec_json'
path_patterns = ('*.podspec.json',)
Expand Down Expand Up @@ -566,3 +750,44 @@ def parse_dep_requirements(dep):
version=version,
)
return purl, requirement


def process_external_source(source_mapping):
"""
Process dependencies with external sources into
a path or URL string.

Some examples:

boost:
:podspec: "../node_modules/react-native/third-party-podspecs/boost.podspec"
Pulley:
:branch: master
:git: https://github.com/artsy/Pulley.git
SnapKit:
:branch: xcode102
:git: "git@github.com:alanzeino/SnapKit.git"
SwiftyJSON:
:commit: af76cf3ef710b6ca5f8c05f3a31307d44a3c5828
:git: https://github.com/SwiftyJSON/SwiftyJSON/
tipsi-stripe:
:path: "../node_modules/tipsi-stripe"
"""

# this could be either `:path`, `:podspec` or `:git`
if len(source_mapping.keys()) == 1:
return str(list(source_mapping.values()).pop())

# this is a link to a git repository
elif len(source_mapping.keys()) == 2 and ':git' in source_mapping:
repo_url = source_mapping.get(':git').replace('.git', '').replace('git@', 'https://')
repo_url = repo_url.rstrip('/')
if ':commit' in source_mapping:
commit = source_mapping.get(':commit')
return f"{repo_url}/tree/{commit}"
elif ':branch' in source_mapping:
branch = source_mapping.get(':branch')
return f"{repo_url}/tree/{branch}"

# In all other cases
return str(source_mapping)
Loading
Loading