Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Populate for packages field correctly #2929 #2939

Merged
merged 6 commits into from
May 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 73 additions & 28 deletions src/packagedcode/npm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from packagedcode import models
from packagedcode.utils import combine_expressions
from packagedcode.utils import normalize_vcs_url
from packagedcode.utils import yield_dependencies_from_package_data
from packagedcode.utils import yield_dependencies_from_package_resource
import saneyaml

"""
Expand All @@ -40,31 +42,84 @@ class BaseNpmHandler(models.DatafileHandler):

@classmethod
def assemble(cls, package_data, resource, codebase):
"""
If ``resource``, or one of its siblings, is a package.json file, use it
to create and yield the package, the package dependencies, and the
package resources.

When reporting the resources of a package, we alk the codebase, skipping
the node_modules directory, assign resources to the package and yield
resources.

For each lock file, assign dependencies to package instances and yield dependencies.

If there is no package.json, we do not have a package instance. In this
case, we yield each of the dependencies in each lock file.
"""
datafile_name_patterns = (
'package.json',
'package-lock.json',
'.package-lock.json',
'npm-shrinkwrap.json',
'yarn.lock',
)

if resource.has_parent():
dir_resource=resource.parent(codebase)
else:
dir_resource=resource

for assembled in cls.assemble_from_many_datafiles(
datafile_name_patterns=datafile_name_patterns,
directory=dir_resource,
codebase=codebase,
):
if isinstance(assembled, models.Package):
cls.assign_package_to_resources(
package=assembled,
resource=resource,
codebase=codebase,
package_resource = None
if resource.name == 'package.json':
package_resource = resource
elif resource.name in datafile_name_patterns:
if resource.has_parent():
siblings = resource.siblings(codebase)
package_resource = [r for r in siblings if r.name == 'package.json']
if package_resource:
package_resource = package_resource[0]

if package_resource:
# do we have enough to create a package?
if package_data.purl:
package = models.Package.from_package_data(
package_data=package_data,
datafile_path=package_resource.path,
)
yield assembled
package_uid = package.package_uid

if not package.license_expression:
package.license_expression = compute_normalized_license(package.declared_license)

root = resource.parent(codebase)
if root:
for npm_res in cls.walk_npm(resource=root, codebase=codebase):
if package_uid not in npm_res.for_packages:
npm_res.for_packages.append(package_uid)
npm_res.save(codebase)
yield npm_res
elif codebase.has_single_resource:
if package_uid not in package_resource.for_packages:
package_resource.for_packages.append(package_uid)
package_resource.save(codebase)
yield package_resource

yield package
else:
# we have no package, so deps are not for a specific package uid
package_uid = None

# in all cases yield possible dependencies
yield from yield_dependencies_from_package_data(package_data, package_resource.path, package_uid)

# we yield this as we do not want this further processed
yield package_resource

for sibling in package_resource.siblings(codebase):
if sibling.name in datafile_name_patterns:
yield from yield_dependencies_from_package_resource(sibling, package_uid)

if package_uid not in sibling.for_packages:
sibling.for_packages.append(package_uid)
sibling.save(codebase)
yield sibling
else:
# we do not have a package.json
yield from yield_dependencies_from_package_resource(resource)

@classmethod
def walk_npm(cls, resource, codebase, depth=0):
Expand All @@ -84,19 +139,9 @@ def walk_npm(cls, resource, codebase, depth=0):

if child.is_dir:
depth += 1
for subchild in cls.walk_skip(child, codebase, depth=depth):
for subchild in cls.walk_npm(child, codebase, depth=depth):
yield subchild

# TODO: this MUST BE USED
@classmethod
def assign_package_to_resources(cls, package, resource, codebase):
"""
Yield the Resources of an npm Package, ignoring nested mode_modules.
"""
root = resource.parent(codebase)
if root:
yield from cls.walk_npm(resource=root, codebase=codebase)


def get_urls(namespace, name, version):
return dict(
Expand Down
116 changes: 110 additions & 6 deletions src/packagedcode/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
import re
import sys
import zipfile
from configparser import ConfigParser
from io import StringIO
from pathlib import Path

import dparse2
Expand All @@ -30,6 +32,8 @@
from packagedcode import models
from packagedcode.utils import build_description
from packagedcode.utils import combine_expressions
from packagedcode.utils import yield_dependencies_from_package_data
from packagedcode.utils import yield_dependencies_from_package_resource

# FIXME: we always want to use the external library rather than the built-in for now
import importlib_metadata
Expand Down Expand Up @@ -135,12 +139,55 @@ def assemble(cls, package_data, resource, codebase):
'Pipfile',
) + PipRequirementsFileHandler.path_patterns

parent = resource.parent(codebase)
yield from cls.assemble_from_many_datafiles(
datafile_name_patterns=datafile_name_patterns,
directory=parent,
codebase=codebase,
)
package_resource = None
if resource.name in datafile_name_patterns:
package_resource = resource

if package_resource:
# do we have enough to create a package?
if package_data.purl:
package = models.Package.from_package_data(
package_data=package_data,
datafile_path=package_resource.path,
)
package_uid = package.package_uid

if not package.license_expression:
package.license_expression = compute_normalized_license(package.declared_license)

root = package_resource.parent(codebase)
if root:
for py_res in root.walk(codebase):
if py_res.is_dir:
continue
if package_uid not in py_res.for_packages:
py_res.for_packages.append(package_uid)
py_res.save(codebase)
yield py_res
elif codebase.has_single_resource:
if package_uid not in package_resource.for_packages:
package_resource.for_packages.append(package_uid)
package_resource.save(codebase)
yield package_resource
yield package
else:
# we have no package, so deps are not for a specific package uid
package_uid = None

# in all cases yield possible dependencies
yield from yield_dependencies_from_package_data(package_data, package_resource.path, package_uid)
yield package_resource

for sibling in package_resource.siblings(codebase):
if sibling.name in datafile_name_patterns:
yield from yield_dependencies_from_package_resource(sibling, package_uid)

if package_uid not in sibling.for_packages:
sibling.for_packages.append(package_uid)
sibling.save(codebase)
yield sibling
else:
yield from yield_dependencies_from_package_resource(resource)

@classmethod
def assign_package_to_resources(cls, package, resource, codebase):
Expand Down Expand Up @@ -547,6 +594,63 @@ class SetupCfgHandler(BaseExtractedPythonLayout):
description = 'Python setup.cfg'
documentation_url = 'https://peps.python.org/pep-0390/'

@classmethod
def parse(cls, location):
file_name = fileutils.file_name(location)

with open(location) as f:
content = f.read()

metadata = {}
parser = ConfigParser()
parser.readfp(StringIO(content))
for section in parser.values():
if section.name == 'metadata':
options = (
'name',
'version',
'license',
'url',
'author',
'author_email',
)
for name in options:
content = section.get(name)
if not content:
continue
metadata[name] = content

parties = []
author = metadata.get('author')
if author:
parties = [
models.Party(
type=models.party_person,
name=author,
role='author',
email=metadata.get('author_email'),
)
]

dependency_type = get_dparse2_supported_file_name(file_name)
if not dependency_type:
return

dependencies = parse_with_dparse2(
location=location,
file_name=dependency_type,
)
yield models.PackageData(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=metadata.get('name'),
version=metadata.get('version'),
parties=parties,
homepage_url=metadata.get('url'),
primary_language=cls.default_primary_language,
dependencies=dependencies,
)


class PipfileHandler(BaseDependencyFileHandler):
datasource_id = 'pipfile'
Expand Down
23 changes: 23 additions & 0 deletions src/packagedcode/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,3 +184,26 @@ def find_root_resource(path, resource, codebase):
return resource


def yield_dependencies_from_package_data(package_data, datafile_path, package_uid):
"""
Yield a Dependency for each dependency from ``package_data.dependencies``
"""
from packagedcode import models
dependent_packages = package_data.dependencies
if dependent_packages:
yield from models.Dependency.from_dependent_packages(
dependent_packages=dependent_packages,
datafile_path=datafile_path,
datasource_id=package_data.datasource_id,
package_uid=package_uid,
)


def yield_dependencies_from_package_resource(resource, package_uid=None):
"""
Yield a Dependency for each dependency from each package from``resource.package_data``
"""
from packagedcode import models
for pkg_data in resource.package_data:
pkg_data = models.PackageData.from_dict(pkg_data)
yield from yield_dependencies_from_package_data(pkg_data, resource.path, package_uid)
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,15 @@
"output_format_version": "2.0.0",
"message": null,
"errors": [],
"warnings": [],
"extra_data": {
"system_environment": {
"operating_system": "linux",
"cpu_architecture": "64",
"platform": "Linux-5.4.0-107-generic-x86_64-with-Ubuntu-18.04-bionic",
"platform_version": "#121~18.04.1-Ubuntu SMP Thu Mar 24 17:21:33 UTC 2022",
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
},
"spdx_license_list_version": "3.16",
"files_count": 1
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,15 @@
"output_format_version": "2.0.0",
"message": null,
"errors": [],
"warnings": [],
"extra_data": {
"system_environment": {
"operating_system": "linux",
"cpu_architecture": "64",
"platform": "Linux-5.4.0-107-generic-x86_64-with-Ubuntu-18.04-bionic",
"platform_version": "#121~18.04.1-Ubuntu SMP Thu Mar 24 17:21:33 UTC 2022",
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
},
"spdx_license_list_version": "3.16",
"files_count": 2
}
Expand Down Expand Up @@ -314,7 +322,9 @@
],
"percentage_of_license_text": 100.0,
"package_data": [],
"for_packages": [],
"for_packages": [
"pkg:npm/npm@2.13.5?uuid=fixed-uid-done-for-testing-5642512d1758"
],
"scan_errors": []
},
{
Expand Down
7 changes: 7 additions & 0 deletions tests/packagedcode/data/about/aboutfiles.expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
"errors": [],
"warnings": [],
"extra_data": {
"system_environment": {
"operating_system": "linux",
"cpu_architecture": "64",
"platform": "Linux-5.4.0-107-generic-x86_64-with-Ubuntu-18.04-bionic",
"platform_version": "#121~18.04.1-Ubuntu SMP Thu Mar 24 17:21:33 UTC 2022",
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
},
"spdx_license_list_version": "3.16",
"files_count": 3
}
Expand Down
7 changes: 7 additions & 0 deletions tests/packagedcode/data/bower/scan-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
"errors": [],
"warnings": [],
"extra_data": {
"system_environment": {
"operating_system": "linux",
"cpu_architecture": "64",
"platform": "Linux-5.4.0-107-generic-x86_64-with-Ubuntu-18.04-bionic",
"platform_version": "#121~18.04.1-Ubuntu SMP Thu Mar 24 17:21:33 UTC 2022",
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
},
"spdx_license_list_version": "3.16",
"files_count": 1
}
Expand Down
7 changes: 7 additions & 0 deletions tests/packagedcode/data/build/bazel/end2end-expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
"errors": [],
"warnings": [],
"extra_data": {
"system_environment": {
"operating_system": "linux",
"cpu_architecture": "64",
"platform": "Linux-5.4.0-107-generic-x86_64-with-Ubuntu-18.04-bionic",
"platform_version": "#121~18.04.1-Ubuntu SMP Thu Mar 24 17:21:33 UTC 2022",
"python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]"
},
"spdx_license_list_version": "3.16",
"files_count": 6
}
Expand Down
Loading