From 513e276ff9541222432e4a7f1511bc0d9fa7b055 Mon Sep 17 00:00:00 2001 From: Kevin Ji Date: Tue, 14 Jun 2022 17:59:44 -0400 Subject: [PATCH] Add support for external licenses in scans #480 This adds `-dir` or `--additional-directories` as a command line option in license detection. This allows users to specify paths to directories of licenses and rules they'd like to use during license detection, but would not like to add to the ScanCode database of licenses. This involves adding a new option in `licensedcode/plugin_license.py`, and this option is used as a parameter in `scancode/api.py`. In this approach, the licenses and rules contained in these additional directories are combined with the existing licenses and rules in the ScanCode database to produce a single index. The code for this is found in `licensedcode/cache.py` and the helper methods for loading these licenses and rules are found in `licensedcode/models.py`. This commit also includes a unit test to verify that license detection succeeds with an additional directory found in `tests/licensedcode/test_plugin_license.py`. Part of the setup for the unit test and future tests involves creating a new directory in `tests/licensedcode/data` that contains sample external licenses used in the unit tests. Signed-off-by: Kevin Ji --- src/licensedcode/cache.py | 100 +++++++++++++-- src/licensedcode/models.py | 63 +++++++++ src/licensedcode/plugin_license.py | 15 ++- src/scancode/api.py | 4 +- .../example1/licenses/example1.LICENSE | 1 + .../example1/licenses/example1.yml | 5 + .../example1/rules/example1_1.RULE | 2 + .../example1/rules/example1_1.yml | 2 + .../example2/licenses/example2.LICENSE | 7 + .../example2/licenses/example2.yml | 5 + .../example2/rules/example2.RULE | 1 + .../example2/rules/example2.yml | 2 + .../external_licenses/scan.expected.json | 81 ++++++++++++ .../external_licenses/scan/license.txt | 6 + .../scan_multiple.expected.json | 120 ++++++++++++++++++ tests/licensedcode/test_plugin_license.py | 38 ++++++ tests/scancode/data/help/help.txt | 39 +++--- 17 files changed, 462 insertions(+), 29 deletions(-) create mode 100644 tests/licensedcode/data/example_external_licenses/example1/licenses/example1.LICENSE create mode 100644 tests/licensedcode/data/example_external_licenses/example1/licenses/example1.yml create mode 100644 tests/licensedcode/data/example_external_licenses/example1/rules/example1_1.RULE create mode 100644 tests/licensedcode/data/example_external_licenses/example1/rules/example1_1.yml create mode 100644 tests/licensedcode/data/example_external_licenses/example2/licenses/example2.LICENSE create mode 100644 tests/licensedcode/data/example_external_licenses/example2/licenses/example2.yml create mode 100644 tests/licensedcode/data/example_external_licenses/example2/rules/example2.RULE create mode 100644 tests/licensedcode/data/example_external_licenses/example2/rules/example2.yml create mode 100644 tests/licensedcode/data/plugin_license/external_licenses/scan.expected.json create mode 100644 tests/licensedcode/data/plugin_license/external_licenses/scan/license.txt create mode 100644 tests/licensedcode/data/plugin_license/external_licenses/scan_multiple.expected.json diff --git a/src/licensedcode/cache.py b/src/licensedcode/cache.py index 9b244728143..f515cb79ed0 100644 --- a/src/licensedcode/cache.py +++ b/src/licensedcode/cache.py @@ -35,6 +35,7 @@ LICENSE_INDEX_FILENAME = 'index_cache' LICENSE_LOCKFILE_NAME = 'scancode_license_index_lockfile' LICENSE_CHECKSUM_FILE = 'scancode_license_index_tree_checksums' +CACHED_DIRECTORIES_FILENAME = 'cached_directories' @attr.s(slots=True) @@ -58,6 +59,7 @@ def load_or_build( timeout=LICENSE_INDEX_LOCK_TIMEOUT, licenses_data_dir=None, rules_data_dir=None, + additional_directories=None, ): """ Load or build and save and return a LicenseCache object. @@ -66,7 +68,8 @@ def load_or_build( On the side, we load cached or build license db, SPDX symbols and other license-related data structures. - - If the cache exists, it is returned unless corrupted or ``force`` is True. + - If the cache exists, it is returned unless corrupted, ``force`` is True, or if we pass in additional + directories containing licenses that are not present in the existing cache. - If the cache does not exist, a new index is built and cached. - If ``index_all_languages`` is True, include texts in all languages when building the license index. Otherwise, only include the English license \ @@ -75,12 +78,17 @@ def load_or_build( idx_cache_dir = os.path.join(licensedcode_cache_dir, LICENSE_INDEX_DIR) create_dir(idx_cache_dir) cache_file = os.path.join(idx_cache_dir, LICENSE_INDEX_FILENAME) + cached_directories_file = os.path.join(idx_cache_dir, CACHED_DIRECTORIES_FILENAME) has_cache = os.path.exists(cache_file) and os.path.getsize(cache_file) # bypass build if cache exists if has_cache and not force: try: + # save the list of additional directories included in the cache, or None if the cache does not + # include any additional directories + with open(cached_directories_file, 'wb') as file: + pickle.dump(additional_directories, file, protocol=PICKLE_PROTOCOL) return load_cache_file(cache_file) except Exception as e: # work around some rare Windows quirks @@ -92,6 +100,8 @@ def load_or_build( from licensedcode.models import licenses_data_dir as ldd from licensedcode.models import rules_data_dir as rdd from licensedcode.models import load_licenses + from licensedcode.models import load_licenses_from_multiple_dirs + from licensedcode.models import get_license_dirs from scancode import lockfile licenses_data_dir = licenses_data_dir or ldd @@ -106,13 +116,21 @@ def load_or_build( # Here, the cache is either stale or non-existing: we need to # rebuild all cached data (e.g. mostly the index) and cache it - licenses_db = load_licenses(licenses_data_dir=licenses_data_dir) + if additional_directories: + additional_license_dirs = get_license_dirs(additional_dirs=additional_directories) + combined_directories = [licenses_data_dir] + additional_license_dirs + licenses_db = load_licenses_from_multiple_dirs(license_directories=combined_directories) + else: + licenses_db = load_licenses(licenses_data_dir=licenses_data_dir) + # create a single merged index containing license data from licenses_data_dir + # and data from additional directories index = build_index( licenses_db=licenses_db, licenses_data_dir=licenses_data_dir, rules_data_dir=rules_data_dir, index_all_languages=index_all_languages, + additional_directories=additional_directories, ) spdx_symbols = build_spdx_symbols(licenses_db=licenses_db) @@ -131,6 +149,11 @@ def load_or_build( with open(cache_file, 'wb') as fn: pickle.dump(license_cache, fn, protocol=PICKLE_PROTOCOL) + # save the list of additional directories included in the cache, or None if the cache does not + # include any additional directories + with open(cached_directories_file, 'wb') as file: + pickle.dump(additional_directories, file, protocol=PICKLE_PROTOCOL) + return license_cache except lockfile.LockTimeout: @@ -143,27 +166,50 @@ def build_index( licenses_data_dir=None, rules_data_dir=None, index_all_languages=False, + additional_directories=None, ): """ Return an index built from rules and licenses directories If ``index_all_languages`` is True, include texts and rules in all languages. Otherwise, only include the English license texts and rules (the default) + If ``additional_directories`` is not None, we will include licenses and rules + from these additional directories in the returned index. """ from licensedcode.index import LicenseIndex + from licensedcode.models import get_license_dirs + from licensedcode.models import get_rule_dirs from licensedcode.models import get_rules + from licensedcode.models import get_rules_from_multiple_dirs from licensedcode.models import get_all_spdx_key_tokens from licensedcode.models import get_license_tokens from licensedcode.models import licenses_data_dir as ldd from licensedcode.models import rules_data_dir as rdd from licensedcode.models import load_licenses + from licensedcode.models import load_licenses_from_multiple_dirs from licensedcode.legalese import common_license_words licenses_data_dir = licenses_data_dir or ldd rules_data_dir = rules_data_dir or rdd - licenses_db = licenses_db or load_licenses(licenses_data_dir=licenses_data_dir) - rules = get_rules(licenses_db=licenses_db, rules_data_dir=rules_data_dir) + if not licenses_db: + if additional_directories: + # combine the licenses in these additional directories with the licenses in the original DB + additional_license_dirs = get_license_dirs(additional_dirs=additional_directories) + combined_license_directories = [licenses_data_dir] + additional_license_dirs + # generate a single combined license db with all licenses + licenses_db = load_licenses_from_multiple_dirs(license_dirs=combined_license_directories) + else: + licenses_db = load_licenses(licenses_data_dir=licenses_data_dir) + + if additional_directories: + # if we have additional directories, extract the rules from them + additional_rule_dirs = get_rule_dirs(additional_dirs=additional_directories) + # then combine the rules in these additional directories with the rules in the original rules directory + combined_rule_directories = [rules_data_dir] + additional_rule_dirs + rules = get_rules_from_multiple_dirs(licenses_db=licenses_db, rule_directories=combined_rule_directories) + else: + rules = get_rules(licenses_db=licenses_db, rules_data_dir=rules_data_dir) legalese = common_license_words spdx_tokens = set(get_all_spdx_key_tokens(licenses_db)) @@ -299,7 +345,7 @@ def build_unknown_spdx_symbol(licenses_db=None): return LicenseSymbolLike(licenses_db['unknown-spdx']) -def get_cache(force=False, index_all_languages=False): +def get_cache(force=False, index_all_languages=False, additional_directories=None): """ Return a LicenseCache either rebuilt, cached or loaded from disk. @@ -307,16 +353,18 @@ def get_cache(force=False, index_all_languages=False): building the license index. Otherwise, only include the English license \ texts and rules (the default) """ - populate_cache(force=force, index_all_languages=index_all_languages) + populate_cache(force=force, index_all_languages=index_all_languages, additional_directories=additional_directories) global _LICENSE_CACHE return _LICENSE_CACHE -def populate_cache(force=False, index_all_languages=False): +def populate_cache(force=False, index_all_languages=False, additional_directories=None): """ Load or build and cache a LicenseCache. Return None. """ global _LICENSE_CACHE + if need_cache_rebuild(additional_directories): + force = True if force or not _LICENSE_CACHE: _LICENSE_CACHE = LicenseCache.load_or_build( licensedcode_cache_dir=licensedcode_cache_dir, @@ -325,9 +373,39 @@ def populate_cache(force=False, index_all_languages=False): index_all_languages=index_all_languages, # used for testing only timeout=LICENSE_INDEX_LOCK_TIMEOUT, + additional_directories=additional_directories, ) +def need_cache_rebuild(additional_directories): + """ + Return true if we need to rebuild the index cache. + """ + idx_cache_dir = os.path.join(licensedcode_cache_dir, LICENSE_INDEX_DIR) + cached_directories_file = os.path.join(idx_cache_dir, CACHED_DIRECTORIES_FILENAME) + + has_cached_directories = os.path.exists(cached_directories_file) + should_rebuild_cache = False + + if has_cached_directories: + # if we have cached additional directories of licenses, check if those licenses are equal to the additional + # directories passed in + with open(cached_directories_file, 'rb') as file: + # it's possible that pickle.load(file) results in None + cached_additional_directories = pickle.load(file) or set() + + # we need to rebuild the cache if the list of additional directories we passed in is not a subset of + # the set of additional directories currently included in the index cache + should_rebuild_cache = additional_directories is not None \ + and not set(additional_directories).issubset(cached_additional_directories) + else: + # otherwise, we don't have a file of cached directories. If there are additional directories passed in, + # we know we need to make a new cache file. + if additional_directories: + should_rebuild_cache = True + return should_rebuild_cache + + def load_cache_file(cache_file): """ Return a LicenseCache loaded from ``cache_file``. @@ -346,11 +424,15 @@ def load_cache_file(cache_file): raise Exception(msg) from e -def get_index(force=False, index_all_languages=False): +def get_index(force=False, index_all_languages=False, additional_directories=None): """ Return and eventually build and cache a LicenseIndex. """ - return get_cache(force=force, index_all_languages=index_all_languages).index + return get_cache( + force=force, + index_all_languages=index_all_languages, + additional_directories=additional_directories + ).index get_cached_index = get_index diff --git a/src/licensedcode/models.py b/src/licensedcode/models.py index c681a43828a..f2a0635d98b 100644 --- a/src/licensedcode/models.py +++ b/src/licensedcode/models.py @@ -20,6 +20,7 @@ from os.path import dirname from os.path import exists from os.path import join +from pathlib import Path import attr import saneyaml @@ -772,6 +773,68 @@ def get_rules( return chain(licenses_as_rules, rules) +def get_license_dirs( + additional_dirs, +): + """ + Takes in a list of additional directories specified during license detection + and produces a list of all the subdirectories containing license files. + """ + # convert to absolute path in case user passes in a relative path, which messes up building rules from licenses + return [f"{str(Path(path).absolute())}/licenses" for path in additional_dirs] + + +def get_rule_dirs( + additional_dirs, +): + """ + Takes in a list of additional directories specified during license detection + and produces a list of all the subdirectories containing rule files. + """ + return [f"{str(Path(path).absolute())}/rules" for path in additional_dirs] + + +def load_licenses_from_multiple_dirs( + license_directories, + with_deprecated=False, +): + """ + Takes in a list of directories containing additional licenses to use in + license detection and combines all the licenses into the same mapping. + """ + combined_licenses = {} + for license_dir in license_directories: + licenses = load_licenses(licenses_data_dir=license_dir, with_deprecated=False) + # this syntax for merging is described here: https://stackoverflow.com/a/26853961 + combined_licenses = {**combined_licenses, **licenses} + return combined_licenses + + +def get_rules_from_multiple_dirs( + licenses_db, + rule_directories, +): + """ + Takes in a license database, which is a mapping from key->License objects, + and a list of all directories containing rules to use in license detection. + Combines all rules together into the same data structure and validates them. + """ + if rule_directories: + combined_rules = [] + for rules_dir in rule_directories: + r = list(load_rules( + rules_data_dir=rules_dir, + )) + combined_rules.append(r) + # flatten lists of rules into a single iterable + rules = list(chain.from_iterable(combined_rules)) + validate_rules(rules, licenses_db) + licenses_as_rules = build_rules_from_licenses(licenses_db) + return chain(licenses_as_rules, rules) + else: + return get_rules(licenses_db=licenses_db, rules_data_dir=rules_data_dir) + + class InvalidRule(Exception): pass diff --git a/src/licensedcode/plugin_license.py b/src/licensedcode/plugin_license.py index 8658ffbc356..ff1ba3fc6d8 100644 --- a/src/licensedcode/plugin_license.py +++ b/src/licensedcode/plugin_license.py @@ -18,6 +18,7 @@ from commoncode.resource import clean_path from plugincode.scan import ScanPlugin from plugincode.scan import scan_impl +import click from scancode.api import SCANCODE_LICENSEDB_URL @@ -139,6 +140,15 @@ class LicenseScanner(ScanPlugin): help_group=SCAN_OPTIONS_GROUP, ), + PluggableCommandLineOption( + ('-dir', '--additional_directories'), + required_options=['license'], + multiple=True, + type=click.Path(exists=True, readable=True, path_type=str), + help='Include additional directories for license detection.', + help_group=SCAN_OPTIONS_GROUP, + ), + PluggableCommandLineOption( ('--reindex-licenses',), is_flag=True, is_eager=True, @@ -167,7 +177,8 @@ def setup(self, **kwargs): loaded index. """ from licensedcode.cache import populate_cache - populate_cache() + additional_directories = kwargs.get('additional_directories') + populate_cache(additional_directories=additional_directories) def get_scanner( self, @@ -176,6 +187,7 @@ def get_scanner( license_text_diagnostics=False, license_url_template=SCANCODE_LICENSEDB_URL, unknown_licenses=False, + additional_directories=None, **kwargs ): @@ -186,6 +198,7 @@ def get_scanner( license_text_diagnostics=license_text_diagnostics, license_url_template=license_url_template, unknown_licenses=unknown_licenses, + additional_directories=additional_directories, ) def process_codebase(self, codebase, unknown_licenses, **kwargs): diff --git a/src/scancode/api.py b/src/scancode/api.py index 1f3333c6a4b..1acd26ef174 100644 --- a/src/scancode/api.py +++ b/src/scancode/api.py @@ -142,6 +142,7 @@ def get_licenses( license_url_template=SCANCODE_LICENSEDB_URL, unknown_licenses=False, deadline=sys.maxsize, + additional_directories=None, **kwargs, ): """ @@ -168,7 +169,7 @@ def get_licenses( from licensedcode import cache from licensedcode.spans import Span - idx = cache.get_index() + idx = cache.get_index(additional_directories=additional_directories) detected_licenses = [] detected_expressions = [] @@ -252,6 +253,7 @@ def _licenses_data_from_match( result['homepage_url'] = lic.homepage_url result['text_url'] = lic.text_urls[0] if lic.text_urls else '' result['reference_url'] = license_url_template.format(lic.key) + # TODO: change this in the case of a private license? result['scancode_text_url'] = SCANCODE_LICENSE_TEXT_URL.format(lic.key) result['scancode_data_url'] = SCANCODE_LICENSE_DATA_URL.format(lic.key) diff --git a/tests/licensedcode/data/example_external_licenses/example1/licenses/example1.LICENSE b/tests/licensedcode/data/example_external_licenses/example1/licenses/example1.LICENSE new file mode 100644 index 00000000000..8fe2a4b5ad1 --- /dev/null +++ b/tests/licensedcode/data/example_external_licenses/example1/licenses/example1.LICENSE @@ -0,0 +1 @@ +The quick brown fox jumps over the lazy dog. \ No newline at end of file diff --git a/tests/licensedcode/data/example_external_licenses/example1/licenses/example1.yml b/tests/licensedcode/data/example_external_licenses/example1/licenses/example1.yml new file mode 100644 index 00000000000..d7d1ea640ec --- /dev/null +++ b/tests/licensedcode/data/example_external_licenses/example1/licenses/example1.yml @@ -0,0 +1,5 @@ +key: example1 +short_name: Example External License 1 +name: Example External License 1 +category: Permissive +owner: NexB \ No newline at end of file diff --git a/tests/licensedcode/data/example_external_licenses/example1/rules/example1_1.RULE b/tests/licensedcode/data/example_external_licenses/example1/rules/example1_1.RULE new file mode 100644 index 00000000000..ef512c3e024 --- /dev/null +++ b/tests/licensedcode/data/example_external_licenses/example1/rules/example1_1.RULE @@ -0,0 +1,2 @@ +The quick brown fox jumps over the lazy dog. +The quick brown fox jumps over the lazy dog. \ No newline at end of file diff --git a/tests/licensedcode/data/example_external_licenses/example1/rules/example1_1.yml b/tests/licensedcode/data/example_external_licenses/example1/rules/example1_1.yml new file mode 100644 index 00000000000..96535e6c24b --- /dev/null +++ b/tests/licensedcode/data/example_external_licenses/example1/rules/example1_1.yml @@ -0,0 +1,2 @@ +license_expression: example1 +is_license_text: yes \ No newline at end of file diff --git a/tests/licensedcode/data/example_external_licenses/example2/licenses/example2.LICENSE b/tests/licensedcode/data/example_external_licenses/example2/licenses/example2.LICENSE new file mode 100644 index 00000000000..4abca0c149f --- /dev/null +++ b/tests/licensedcode/data/example_external_licenses/example2/licenses/example2.LICENSE @@ -0,0 +1,7 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, +sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. +Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi +ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit +in voluptate velit esse cillum dolore eu fugiat nulla pariatur. +Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia +deserunt mollit anim id est laborum. diff --git a/tests/licensedcode/data/example_external_licenses/example2/licenses/example2.yml b/tests/licensedcode/data/example_external_licenses/example2/licenses/example2.yml new file mode 100644 index 00000000000..d255bd8d70e --- /dev/null +++ b/tests/licensedcode/data/example_external_licenses/example2/licenses/example2.yml @@ -0,0 +1,5 @@ +key: example2 +short_name: Example External License 2 +name: Example External License 2 +category: Permissive +owner: NexB \ No newline at end of file diff --git a/tests/licensedcode/data/example_external_licenses/example2/rules/example2.RULE b/tests/licensedcode/data/example_external_licenses/example2/rules/example2.RULE new file mode 100644 index 00000000000..c27a1f5e008 --- /dev/null +++ b/tests/licensedcode/data/example_external_licenses/example2/rules/example2.RULE @@ -0,0 +1 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit diff --git a/tests/licensedcode/data/example_external_licenses/example2/rules/example2.yml b/tests/licensedcode/data/example_external_licenses/example2/rules/example2.yml new file mode 100644 index 00000000000..0a6aebb4284 --- /dev/null +++ b/tests/licensedcode/data/example_external_licenses/example2/rules/example2.yml @@ -0,0 +1,2 @@ +license_expression: example2 +is_license_text: yes \ No newline at end of file diff --git a/tests/licensedcode/data/plugin_license/external_licenses/scan.expected.json b/tests/licensedcode/data/plugin_license/external_licenses/scan.expected.json new file mode 100644 index 00000000000..6226a744fa7 --- /dev/null +++ b/tests/licensedcode/data/plugin_license/external_licenses/scan.expected.json @@ -0,0 +1,81 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "options": { + "input": "", + "-dir": "", + "--json": "", + "--license": true, + "--strip-root": true + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "output_format_version": "2.0.0", + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic", + "platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022", + "python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]" + }, + "spdx_license_list_version": "3.16", + "files_count": 2 + } + } + ], + "files": [ + { + "path": "license.txt", + "type": "file", + "licenses": [ + { + "key": "example1", + "score": 100.0, + "name": "Example External License 1", + "short_name": "Example External License 1", + "category": "Permissive", + "is_exception": false, + "is_unknown": false, + "owner": "NexB", + "homepage_url": "", + "text_url": "", + "reference_url": "https://scancode-licensedb.aboutcode.org/example1", + "scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/example1.LICENSE", + "scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/example1.yml", + "spdx_license_key": "", + "spdx_url": "", + "start_line": 1, + "end_line": 1, + "matched_rule": { + "identifier": "example1.LICENSE", + "license_expression": "example1", + "licenses": [ + "example1" + ], + "referenced_filenames": [], + "is_license_text": true, + "is_license_notice": false, + "is_license_reference": false, + "is_license_tag": false, + "is_license_intro": false, + "has_unknown": false, + "matcher": "2-aho", + "rule_length": 9, + "matched_length": 9, + "match_coverage": 100.0, + "rule_relevance": 100 + } + } + ], + "license_expressions": [ + "example1" + ], + "percentage_of_license_text": 10.98, + "scan_errors": [] + } + ] +} diff --git a/tests/licensedcode/data/plugin_license/external_licenses/scan/license.txt b/tests/licensedcode/data/plugin_license/external_licenses/scan/license.txt new file mode 100644 index 00000000000..0eacf6b73a6 --- /dev/null +++ b/tests/licensedcode/data/plugin_license/external_licenses/scan/license.txt @@ -0,0 +1,6 @@ +The quick brown fox jumps over the lazy dog. Lorem ipsum dolor sit amet, consectetur adipiscing elit, +sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud +exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit +in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, +sunt in culpa qui officia deserunt mollit anim id est laborum. +This is a test license. diff --git a/tests/licensedcode/data/plugin_license/external_licenses/scan_multiple.expected.json b/tests/licensedcode/data/plugin_license/external_licenses/scan_multiple.expected.json new file mode 100644 index 00000000000..22af1c6ed68 --- /dev/null +++ b/tests/licensedcode/data/plugin_license/external_licenses/scan_multiple.expected.json @@ -0,0 +1,120 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "options": { + "input": "", + "-dir": "", + "--json": "", + "--license": true, + "--strip-root": true + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "output_format_version": "2.0.0", + "message": null, + "errors": [], + "warnings": [], + "extra_data": { + "system_environment": { + "operating_system": "linux", + "cpu_architecture": "64", + "platform": "Linux-5.4.0-109-generic-x86_64-with-Ubuntu-18.04-bionic", + "platform_version": "#123~18.04.1-Ubuntu SMP Fri Apr 8 09:48:52 UTC 2022", + "python_version": "3.6.9 (default, Mar 15 2022, 13:55:28) \n[GCC 8.4.0]" + }, + "spdx_license_list_version": "3.16", + "files_count": 2 + } + } + ], + "files": [ + { + "path": "license.txt", + "type": "file", + "licenses": [ + { + "key": "example1", + "score": 100.0, + "name": "Example External License 1", + "short_name": "Example External License 1", + "category": "Permissive", + "is_exception": false, + "is_unknown": false, + "owner": "NexB", + "homepage_url": "", + "text_url": "", + "reference_url": "https://scancode-licensedb.aboutcode.org/example1", + "scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/example1.LICENSE", + "scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/example1.yml", + "spdx_license_key": "", + "spdx_url": "", + "start_line": 1, + "end_line": 1, + "matched_rule": { + "identifier": "example1.LICENSE", + "license_expression": "example1", + "licenses": [ + "example1" + ], + "referenced_filenames": [], + "is_license_text": true, + "is_license_notice": false, + "is_license_reference": false, + "is_license_tag": false, + "is_license_intro": false, + "has_unknown": false, + "matcher": "2-aho", + "rule_length": 9, + "matched_length": 9, + "match_coverage": 100.0, + "rule_relevance": 100 + } + }, + { + "key": "example2", + "score": 100.0, + "name": "Example External License 2", + "short_name": "Example External License 2", + "category": "Permissive", + "is_exception": false, + "is_unknown": false, + "owner": "NexB", + "homepage_url": "", + "text_url": "", + "reference_url": "https://scancode-licensedb.aboutcode.org/example2", + "scancode_text_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/example2.LICENSE", + "scancode_data_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/example2.yml", + "spdx_license_key": "", + "spdx_url": "", + "start_line": 1, + "end_line": 5, + "matched_rule": { + "identifier": "example2.LICENSE", + "license_expression": "example2", + "licenses": [ + "example2" + ], + "referenced_filenames": [], + "is_license_text": true, + "is_license_notice": false, + "is_license_reference": false, + "is_license_tag": false, + "is_license_intro": false, + "has_unknown": false, + "matcher": "2-aho", + "rule_length": 69, + "matched_length": 69, + "match_coverage": 100.0, + "rule_relevance": 100 + } + } + ], + "license_expressions": [ + "example1", + "example2" + ], + "percentage_of_license_text": 95.12, + "scan_errors": [] + } + ] +} diff --git a/tests/licensedcode/test_plugin_license.py b/tests/licensedcode/test_plugin_license.py index 0d5d20a469a..5eada5c99d9 100644 --- a/tests/licensedcode/test_plugin_license.py +++ b/tests/licensedcode/test_plugin_license.py @@ -242,6 +242,44 @@ def test_reindex_licenses_works(): run_scan_click(['--reindex-licenses']) +@pytest.mark.scanslow +def test_detection_with_single_external_license_directory(): + test_dir = test_env.get_test_loc('plugin_license/external_licenses/scan', copy=True) + example1_dir = test_env.get_test_loc('example_external_licenses/example1') + result_file = test_env.get_temp_file('json') + args = [ + '--license', + '--strip-root', + '--verbose', + '-dir', example1_dir, + '--json', result_file, + test_dir, + ] + run_scan_click(args) + test_loc = test_env.get_test_loc('plugin_license/external_licenses/scan.expected.json') + check_json_scan(test_loc, result_file, regen=REGEN_TEST_FIXTURES) + + +@pytest.mark.scanslow +def test_detection_with_multiple_external_license_directories(): + test_dir = test_env.get_test_loc('plugin_license/external_licenses/scan', copy=True) + example1_dir = test_env.get_test_loc('example_external_licenses/example1') + example2_dir = test_env.get_test_loc('example_external_licenses/example2') + result_file = test_env.get_temp_file('json') + args = [ + '--license', + '--strip-root', + '--verbose', + '-dir', example1_dir, + '-dir', example2_dir, + '--json', result_file, + test_dir, + ] + run_scan_click(args) + test_loc = test_env.get_test_loc('plugin_license/external_licenses/scan_multiple.expected.json') + check_json_scan(test_loc, result_file, regen=REGEN_TEST_FIXTURES) + + @pytest.mark.scanslow def test_scan_license_with_url_template(): test_dir = test_env.get_test_loc('plugin_license/license_url', copy=True) diff --git a/tests/scancode/data/help/help.txt b/tests/scancode/data/help/help.txt index 190dc310455..67afe570595 100644 --- a/tests/scancode/data/help/help.txt +++ b/tests/scancode/data/help/help.txt @@ -21,24 +21,27 @@ Options: -u, --url Scan for urls. scan options: - --license-score INTEGER Do not return license matches with a score lower - than this score. A number between 0 and 100. - [default: 0] - --license-text Include the detected licenses matched text. - --license-text-diagnostics In the matched license text, include diagnostic - highlights surrounding with square brackets [] - words that are not matched. - --license-url-template TEXT Set the template URL used for the license - reference URLs. Curly braces ({}) are replaced by - the license key. [default: https://scancode- - licensedb.aboutcode.org/{}] - --max-email INT Report only up to INT emails found in a file. Use - 0 for no limit. [default: 50] - --max-url INT Report only up to INT urls found in a file. Use 0 - for no limit. [default: 50] - --unknown-licenses [EXPERIMENTAL] Detect unknown licenses and follow - license references such as "See license in file - COPYING". + --license-score INTEGER Do not return license matches with a score lower + than this score. A number between 0 and 100. + [default: 0] + --license-text Include the detected licenses matched text. + --license-text-diagnostics In the matched license text, include diagnostic + highlights surrounding with square brackets [] + words that are not matched. + --license-url-template TEXT Set the template URL used for the license + reference URLs. Curly braces ({}) are replaced + by the license key. [default: + https://scancode-licensedb.aboutcode.org/{}] + --max-email INT Report only up to INT emails found in a file. + Use 0 for no limit. [default: 50] + --max-url INT Report only up to INT urls found in a file. + Use 0 for no limit. [default: 50] + --unknown-licenses [EXPERIMENTAL] Detect unknown licenses and + follow license references such as "See license + in file COPYING". + -dir, --additional_directories PATH + Include additional directories for license + detection. output formats: --json FILE Write scan output as compact JSON to FILE.