diff --git a/src/licensedcode/detection.py b/src/licensedcode/detection.py index 23da63c5b71..68fb855f964 100644 --- a/src/licensedcode/detection.py +++ b/src/licensedcode/detection.py @@ -333,6 +333,25 @@ def identifier_with_expression(self): id_safe_expression = python_safe_name(s=str(self.license_expression)) return "{}-{}".format(id_safe_expression, self._identifier) + @property + def is_unknown(self): + """ + Return True if there are unknown license keys in the license expression + for this detection, return False otherwise. + """ + unknown_license_keys = [ + "unknown-license-reference", + "unknown-spdx", + "unknown", + "free-unknown" + ] + + for license_key in unknown_license_keys: + if license_key in self.license_expression: + return True + + return False + def get_start_end_line(self): """ Return start and end line for a license detection issue, from the @@ -1355,6 +1374,58 @@ def has_references_to_local_files(license_matches): ) +def use_referenced_license_expression(referenced_license_expression, license_detection, licensing=Licensing()): + """ + """ + if license_detection.is_unknown: + return True + + if referenced_license_expression == license_detection.license_expression: + return True + + dependent_license_keys = { + "lgpl": "gpl", + } + + license_keys_with_or_later = [ + "gpl", "lgpl", "agpl" + ] + + license_keys = set( + licensing.license_keys(expression=license_detection.license_expression) + ) + referenced_license_keys = set( + licensing.license_keys(expression=referenced_license_expression) + ) + same_expression = referenced_license_expression == license_detection.license_expression + same_license_keys = license_keys == referenced_license_keys + + if same_license_keys and not same_expression: + return False + + for primary_key, dependent_key in dependent_license_keys.items(): + dependent_key_only_in_referenced = dependent_key in referenced_license_keys and dependent_key not in license_keys + if primary_key in license_keys and dependent_key_only_in_referenced: + return False + + all_license_keys_special = [ + key in license_keys_with_or_later + for key in license_keys + ] + all_referenced_license_keys_special = [ + key in license_keys_with_or_later + for key in referenced_license_keys + ] + + if all_license_keys_special and all_referenced_license_keys_special and not same_license_keys: + True + + if len(referenced_license_keys) > 5: + return False + + return True + + def get_detected_license_expression( analysis, license_matches=None, diff --git a/src/licensedcode/plugin_license.py b/src/licensedcode/plugin_license.py index a2e3b0638d1..87fcc051be8 100644 --- a/src/licensedcode/plugin_license.py +++ b/src/licensedcode/plugin_license.py @@ -30,6 +30,7 @@ from licensedcode.detection import LicenseDetectionFromResult from licensedcode.detection import sort_unique_detections from licensedcode.detection import UniqueDetection +from licensedcode.detection import use_referenced_license_expression from packagedcode.utils import combine_expressions from scancode.api import SCANCODE_LICENSEDB_URL @@ -301,6 +302,12 @@ def add_referenced_filenames_license_matches_for_detections(resource, codebase): analysis=DetectionCategory.UNKNOWN_FILE_REFERENCE_LOCAL.value, post_scan=True, ) + if not use_referenced_license_expression( + referenced_license_expression=license_expression, + license_detection=license_detection, + ): + continue + license_expression_spdx = build_spdx_license_expression( license_expression=str(license_expression), licensing=get_cache().licensing, diff --git a/src/packagedcode/licensing.py b/src/packagedcode/licensing.py index 2c576a73a9a..925ca268759 100644 --- a/src/packagedcode/licensing.py +++ b/src/packagedcode/licensing.py @@ -26,6 +26,7 @@ from licensedcode.detection import detect_licenses from licensedcode.detection import LicenseDetectionFromResult from licensedcode.detection import populate_matches_with_path +from licensedcode.detection import use_referenced_license_expression from licensedcode.spans import Span from licensedcode import query @@ -133,6 +134,11 @@ def add_referenced_license_matches_for_package(resource, codebase): analysis=DetectionCategory.PACKAGE_UNKNOWN_FILE_REFERENCE_LOCAL.value, post_scan=True, ) + if not use_referenced_license_expression( + referenced_license_expression=license_expression, + license_detection=license_detection_object, + ): + continue license_expression_spdx = build_spdx_license_expression( license_expression=str(license_expression), licensing=get_cache().licensing, @@ -258,6 +264,11 @@ def add_referenced_license_detection_from_package(resource, codebase): analysis=analysis, post_scan=True, ) + if not use_referenced_license_expression( + referenced_license_expression=license_expression, + license_detection=license_detection_object, + ): + continue license_expression_spdx = build_spdx_license_expression( license_expression=str(license_expression), licensing=get_cache().licensing,