Skip to content

Commit

Permalink
Merge pull request #2718 from nexB/2717-bug-summary
Browse files Browse the repository at this point in the history
Return package details in summary #2717
  • Loading branch information
pombredanne authored Sep 26, 2021
2 parents 1bddb92 + d37bb16 commit 152abda
Show file tree
Hide file tree
Showing 43 changed files with 6,659 additions and 560 deletions.
20 changes: 19 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,30 @@ Outputs:



30.1.0 - 2021-09-25
--------------------

This is a bug fix release for these bugs:

- https://github.com/nexB/scancode-toolkit/issues/2717

We now return the package in the summaries as before.

There is also a minor API change: we no longer return a count of "null" empty
values in the summaries for license, copyrights, etc.


Thank you to:
- Thomas Druez @tdruez



30.0.1 - 2021-09-24
--------------------

This is a minor bug fix release for these bugs:

- https://github.com/nexB/scancode-toolkit/issues/2713
- https://github.com/nexB/commoncode/issues/31
- https://github.com/nexB/scancode-toolkit/issues/2713

We now correctly work with all supported Click versions.
Expand Down
2 changes: 1 addition & 1 deletion setup-mini.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = scancode-toolkit-mini
version = 30.0.1
version = 30.1.0
license = Apache-2.0 AND CC-BY-4.0 AND LicenseRef-scancode-other-permissive AND LicenseRef-scancode-other-copyleft

description = ScanCode is a tool to scan code for license, copyright, package and their documented dependencies and other interesting facts. scancode-toolkit-mini is a special build that does not come with pre-built binary dependencies by default. These are instead installed separately or with the extra_requires scancode-toolkit-mini[full]
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = scancode-toolkit
version = 30.0.1
version = 30.1.0
license = Apache-2.0 AND CC-BY-4.0 AND LicenseRef-scancode-other-permissive AND LicenseRef-scancode-other-copyleft

description = ScanCode is a tool to scan code for license, copyright, package and their documented dependencies and other interesting facts.
Expand Down
2 changes: 1 addition & 1 deletion src/scancode_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def _create_dir(location):

# in case package is not installed or we do not have setutools/pkg_resources
# on hand fall back to this version
__version__ = '30.0.1'
__version__ = '30.1.0'

# used to warn user when the version is out of date
__release_date__ = datetime.datetime(2021, 9, 24)
Expand Down
135 changes: 44 additions & 91 deletions src/summarycode/summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,49 +40,7 @@ def logger_debug(*args):
return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))

"""
top_level:
- license_expressions:
- count: 1
value: gpl-2.0
- holders:
- count: 1
value: RedHat Inc.
by_facet:
facet: core
- license_expressions:
- count: 10
value: gpl-2.0 or bsd-new
- count: 2
value: mit
- programming_language:
- count: 10
value: java
- holders:
- count: 10
value: RedHat Inc.
facet: dev
- license_expressions:
- count: 23
value: gpl-2.0
- holders:
- count: 20
value: RedHat Inc.
- count: 10
value: none
- programming_languages:
- count: 34
value: java
all:
- license_expressions:
- count: 10
value: gpl-2.0 or bsd-new
- programming_language:
- count: 10
value: java
- holders:
- count: 10
value: RedHat Inc.
Create summarized scan data.
"""


Expand Down Expand Up @@ -205,8 +163,10 @@ def license_summarizer(resource, children, keep_details=False):
child_summaries = get_resource_summary(child, key=LIC_EXP, as_attribute=keep_details) or []
for child_summary in child_summaries:
# TODO: review this: this feels rather weird
values = [child_summary['value']] * child_summary['count']
license_expressions.extend(values)
child_sum_val = child_summary.get('value')
if child_sum_val:
values = [child_sum_val] * child_summary['count']
license_expressions.extend(values)

# summarize proper
licenses_counter = summarize_licenses(license_expressions)
Expand Down Expand Up @@ -245,8 +205,10 @@ def language_summarizer(resource, children, keep_details=False):
for child in children:
child_summaries = get_resource_summary(child, key=PROG_LANG, as_attribute=keep_details) or []
for child_summary in child_summaries:
values = [child_summary['value']] * child_summary['count']
languages.extend(values)
child_sum_val = child_summary.get('value')
if child_sum_val:
values = [child_sum_val] * child_summary['count']
languages.extend(values)

# summarize proper
languages_counter = summarize_languages(languages)
Expand All @@ -264,11 +226,23 @@ def summarize_languages(languages):
return Counter(languages)


SUMMARIZABLE_ATTRS = set([
'license_expressions',
'copyrights',
'holders',
'authors',
'programming_language',
# 'packages',
])


def summarize_values(values, attribute):
"""
Given a list of `values` for a given `attribute`, return a mapping of
{value: count of occurences} using a summarization specific to the attribute.
"""
if attribute not in SUMMARIZABLE_ATTRS:
return {}
from summarycode.copyright_summary import summarize_holders
from summarycode.copyright_summary import summarize_copyrights

Expand All @@ -278,7 +252,6 @@ def summarize_values(values, attribute):
holders=summarize_holders,
authors=summarize_holders,
programming_language=summarize_languages,
packages=summarize_packages,
)
return value_summarizers_by_attr[attribute](values)

Expand Down Expand Up @@ -317,23 +290,14 @@ def summarize_codebase_key_files(codebase, **kwargs):
"""
Summarize codebase key files.
"""
summarizable_attributes = codebase.attributes.summary.keys()
if TRACE: logger_debug('summarizable_attributes:', summarizable_attributes)

# TODO: we cannot summarize packages with "key files for now
really_summarizable_attributes = set([
'license_expressions',
'copyrights',
'holders',
'authors',
'programming_language',
# 'packages',
])
summarizable_attributes = [k for k in summarizable_attributes
if k in really_summarizable_attributes]
summarizables = codebase.attributes.summary.keys()
if TRACE: logger_debug('summarizables:', summarizables)

# TODO: we cannot summarize packages with "key files" for now
summarizables = [k for k in summarizables if k in SUMMARIZABLE_ATTRS]

# create one counter for each summarized attribute
summarizable_values_by_key = dict([(key, []) for key in summarizable_attributes])
summarizable_values_by_key = dict([(key, []) for key in summarizables])

# filter to get only key files
key_files = (res for res in codebase.walk(topdown=True)
Expand All @@ -347,10 +311,14 @@ def summarize_codebase_key_files(codebase, **kwargs):
res_summaries = get_resource_summary(resource, key=key, as_attribute=False) or []
for summary in res_summaries:
# each summary is a mapping with value/count: we transform back to values
values.extend([summary['value']] * summary['count'])
sum_value = summary.get('value')
if sum_value:
values.extend([sum_value] * summary['count'])

summary_counters = []
for key, values in summarizable_values_by_key.items():
if key not in SUMMARIZABLE_ATTRS:
continue
summarized = summarize_values(values, key)
summary_counters.append((key, summarized))

Expand Down Expand Up @@ -394,13 +362,13 @@ def summarize_codebase_by_facet(codebase, **kwargs):
"""
from summarycode import facet as facet_module

summarizable_attributes = codebase.attributes.summary.keys()
summarizable = codebase.attributes.summary.keys()
if TRACE:
logger_debug('summarize_codebase_by_facet for attributes:', summarizable_attributes)
logger_debug('summarize_codebase_by_facet for attributes:', summarizable)

# create one group of by-facet values lists for each summarized attribute
summarizable_values_by_key_by_facet = dict([
(facet, dict([(key, []) for key in summarizable_attributes]))
(facet, dict([(key, []) for key in summarizable]))
for facet in facet_module.FACETS
])

Expand All @@ -417,7 +385,9 @@ def summarize_codebase_by_facet(codebase, **kwargs):
res_summaries = get_resource_summary(resource, key=key, as_attribute=False) or []
for summary in res_summaries:
# each summary is a mapping with value/count: we transform back to discrete values
values.extend([summary['value']] * summary['count'])
sum_value = summary.get('value')
if sum_value:
values.extend([sum_value] * summary['count'])

final_summaries = []
for facet, summarizable_values_by_key in summarizable_values_by_key_by_facet.items():
Expand Down Expand Up @@ -480,28 +450,11 @@ def package_summarizer(resource, children, keep_details=False):
logger_debug('package_summarizer: for:', resource,
'packages are:', packs)

package_urls = []
for package in packages:
purl = package.get('purl')
if purl:
package_urls.append(purl)
# Collect direct children packages summary
for child in children:
child_summaries = get_resource_summary(child, key='packages', as_attribute=False) or []
packages.extend(child_summaries)

# summarize proper
packages_counter = summarize_packages(package_urls)
summarized = sorted_counter(packages_counter)
set_resource_summary(
resource=resource,
key='packages',
value=summarized,
as_attribute=keep_details,
)

return summarized


def summarize_packages(package_urls):
"""
Given a list of package urls, return a mapping of {expression: count
of occurences}
"""
return Counter(package_urls)
set_resource_summary(resource, key='packages', value=packages, as_attribute=False)
return packages
1 change: 1 addition & 0 deletions tests/summarycode/data/classify/cli.expected.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"--json-pp": "<file>"
},
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
"output_format_version": "1.0.0",
"message": null,
"errors": [],
"extra_data": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"--summary": true
},
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
"output_format_version": "1.0.0",
"message": null,
"errors": [],
"extra_data": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"--summary": true
},
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
"output_format_version": "1.0.0",
"message": null,
"errors": [],
"extra_data": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"--summary-with-details": true
},
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
"output_format_version": "1.0.0",
"message": null,
"errors": [],
"extra_data": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"--summary-with-details": true
},
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
"output_format_version": "1.0.0",
"message": null,
"errors": [],
"extra_data": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"--summary-key-files": true
},
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
"output_format_version": "1.0.0",
"message": null,
"errors": [],
"extra_data": {
Expand Down Expand Up @@ -148,10 +149,6 @@
"value": "Java",
"count": 7
},
{
"value": null,
"count": 2
},
{
"value": "C#",
"count": 2
Expand All @@ -163,30 +160,10 @@
]
},
"summary_of_key_files": {
"copyrights": [
{
"value": null,
"count": 1
}
],
"holders": [
{
"value": null,
"count": 1
}
],
"authors": [
{
"value": null,
"count": 1
}
],
"programming_language": [
{
"value": null,
"count": 1
}
]
"copyrights": [],
"holders": [],
"authors": [],
"programming_language": []
},
"files": [
{
Expand Down
Loading

0 comments on commit 152abda

Please sign in to comment.