diff --git a/src/packagedcode/__init__.py b/src/packagedcode/__init__.py index 5f1f6f8bac3..2d775ffe8fb 100644 --- a/src/packagedcode/__init__.py +++ b/src/packagedcode/__init__.py @@ -12,6 +12,7 @@ from packagedcode import about from packagedcode import bower from packagedcode import build +from packagedcode import build_gradle from packagedcode import cargo from packagedcode import chef from packagedcode import debian @@ -107,7 +108,8 @@ msi.MsiInstallerPackage, windows.MicrosoftUpdateManifest, pubspec.PubspecYaml, - pubspec.PubspecLock + pubspec.PubspecLock, + build_gradle.BuildGradle, ] PACKAGE_MANIFESTS_BY_TYPE = { diff --git a/src/packagedcode/build.py b/src/packagedcode/build.py index 3733a4fa2d9..099ef043ce5 100644 --- a/src/packagedcode/build.py +++ b/src/packagedcode/build.py @@ -14,7 +14,6 @@ import attr -from commoncode import filetype from commoncode import fileutils from packagedcode import models from packagedcode.utils import combine_expressions diff --git a/src/packagedcode/build_gradle.py b/src/packagedcode/build_gradle.py new file mode 100644 index 00000000000..d970e9ca144 --- /dev/null +++ b/src/packagedcode/build_gradle.py @@ -0,0 +1,305 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging + +from packageurl import PackageURL +from pygmars import Token +from pygmars.parse import Parser +from pygments import lex +from pygments.lexers import GroovyLexer +import attr + +from packagedcode import models +from packagedcode.build import BaseBuildManifestPackage + + +TRACE = False + +logger = logging.getLogger(__name__) + +if TRACE: + import sys + logging.basicConfig(stream=sys.stdout) + logger.setLevel(logging.DEBUG) + + +grammar = """ + LIT-STRING: {} + + PACKAGE-IDENTIFIER: { ? ? } + DEPENDENCY-1: {{3} } + + DEPENDENCY-2: { } + + DEPENDENCY-3: { ? } + + DEPENDENCY-4: { ? } + + DEPENDENCY-5: { } + + NESTED-DEPENDENCY-1: { + } +""" + + +def get_tokens(contents): + for i, (token, value) in enumerate(lex(contents, GroovyLexer())): + yield i, token, value + + +def get_pygmar_tokens(contents): + tokens = Token.from_pygments_tokens(get_tokens(contents)) + for token in tokens: + if token.label == 'NAME' and token.value == 'dependencies': + token.label = 'DEPENDENCIES-START' + yield token + + +def get_parse_tree(build_gradle_location): + # Open build.gradle and create a Pygmars parse tree from its contents + with open(build_gradle_location) as f: + contents = f.read() + parser = Parser(grammar, trace=0) + return parser.parse(list(get_pygmar_tokens(contents))) + + +def is_literal_string(string): + return string == 'LITERAL-STRING-SINGLE' or string == 'LITERAL-STRING-DOUBLE' + + +def remove_quotes(string): + """ + Remove starting and ending quotes from `string`. + + If `string` has no starting or ending quotes, return `string`. + """ + quoted = lambda x: (x.startswith('"') and x.endswith('"')) or (x.startswith("'") and x.endswith("'")) + if quoted: + return string[1:-1] + else: + return string + + +def get_dependencies_from_parse_tree(parse_tree): + dependencies = [] + in_dependency_block = False + brackets_counter = 0 + first_bracket_seen = False + in_nested_dependency = False + nested_dependency_parenthesis_counter = 0 + first_parenthesis_seen = False + for tree_node in parse_tree: + if tree_node.label == 'DEPENDENCIES-START': + in_dependency_block = True + continue + + if in_dependency_block: + if tree_node.label == 'OPERATOR': + if tree_node.value == '{': + if not first_bracket_seen: + first_bracket_seen = True + brackets_counter += 1 + elif tree_node.value == '}': + brackets_counter -= 1 + + if brackets_counter == 0 and first_bracket_seen: + break + + # TODO: Find way to simplify logic with DEPENDENCY-1 + if tree_node.label == 'NESTED-DEPENDENCY-1': + dependency = {} + in_nested_dependency = True + scope = None + last_key = None + for child_node in tree_node.leaves(): + if child_node.label == 'NAME': + scope = child_node.value + + if child_node.label == 'OPERATOR' and child_node.value == '(': + if not first_parenthesis_seen: + first_parenthesis_seen = True + nested_dependency_parenthesis_counter += 1 + + if child_node.label == 'NAME-LABEL': + value = child_node.value + if value == 'group:': + last_key = 'namespace' + if value == 'name:': + last_key = 'name' + if value == 'version:': + last_key = 'version' + + if is_literal_string(child_node.label): + dependency[last_key] = remove_quotes(child_node.value) + if scope: + dependency['scope'] = scope + dependencies.append(dependency) + + if in_nested_dependency: + if tree_node.label == 'OPERATOR' and tree_node.value == ')': + nested_dependency_parenthesis_counter -= 1 + + if nested_dependency_parenthesis_counter == 0 and first_parenthesis_seen: + in_nested_dependency = False + scope = None + + if tree_node.label == 'DEPENDENCY-1': + name_label_to_dep_field_name = { + 'group:': 'namespace', + 'name:': 'name', + 'version:': 'version' + } + dependency = {} + last_key = None + for child_node in tree_node.leaves(): + value = child_node.value + if child_node.label == 'NAME-LABEL': + last_key = name_label_to_dep_field_name.get(value, '') + if is_literal_string(child_node.label): + if last_key: + dependency[last_key] = remove_quotes(value) + if in_nested_dependency and scope: + dependency['scope'] = scope + dependencies.append(dependency) + + if tree_node.label == 'DEPENDENCY-2': + dependency = {} + for child_node in tree_node.leaves(): + if child_node.label == 'NAME': + dependency['scope'] = child_node.value + if is_literal_string(child_node.label): + value = child_node.value + value = remove_quotes(value) + + namespace = '' + name = '' + version = '' + split_value = value.split(':') + split_value_length = len(split_value) + if split_value_length == 4: + # We are assuming `value` is in the form of "namespace:name:version:module" + # We are currently not reporting down to the module level + namespace, name, version, _ = split_value + if split_value_length == 3: + # We are assuming `value` is in the form of "namespace:name:version" + namespace, name, version = split_value + if split_value_length == 2: + # We are assuming `value` is in the form of "namespace:name" + namespace, name = split_value + + dependency['namespace'] = namespace + dependency['name'] = name + dependency['version'] = version + dependencies.append(dependency) + + if tree_node.label == 'DEPENDENCY-3': + dependency = {} + for child_node in tree_node.leaves(): + if child_node.label == 'NAME': + dependency['scope'] = child_node.value + if is_literal_string(child_node.label): + value = child_node.value + value = remove_quotes(value) + # We are assuming `value` is in the form of "namespace:name:version" + split_dependency_string = value.split(':') + if len(split_dependency_string) != 3: + break + namespace, name, version = split_dependency_string + dependency['namespace'] = namespace + dependency['name'] = name + dependency['version'] = version + dependencies.append(dependency) + + # TODO: See if you can refactor logic with DEPENDENCY-1 + if tree_node.label == 'DEPENDENCY-4': + dependency = {} + last_key = None + for child_node in tree_node.leaves(): + if child_node.label == 'NAME': + dependency['scope'] = child_node.value + if child_node.label == 'NAME-LABEL': + value = child_node.value + if value == 'group:': + last_key = 'namespace' + if value == 'name:': + last_key = 'name' + if value == 'version:': + last_key = 'version' + if is_literal_string(child_node.label): + dependency[last_key] = remove_quotes(child_node.value) + dependencies.append(dependency) + + if tree_node.label == 'DEPENDENCY-5': + dependency = {} + for child_node in tree_node.leaves(): + if child_node.label == 'NAME': + dependency['scope'] = child_node.value + if child_node.label == 'NAME-ATTRIBUTE': + dependency['name'] = child_node.value + dependencies.append(dependency) + return dependencies + + +def get_dependencies(build_gradle_location): + parse_tree = get_parse_tree(build_gradle_location) + # Parse `parse_tree` for dependencies and print them + return get_dependencies_from_parse_tree(parse_tree) + + +def build_package(cls, dependencies): + package_dependencies = [] + for dependency in dependencies: + # Ignore collected dependencies that do not have a name + name = dependency.get('name', '') + if not name: + continue + + namespace = dependency.get('namespace', '') + version = dependency.get('version', '') + scope = dependency.get('scope', '') + is_runtime = True + is_optional = False + if 'test' in scope.lower(): + is_runtime = False + is_optional = True + + package_dependencies.append( + models.DependentPackage( + purl=PackageURL( + type='build.gradle', + namespace=namespace, + name=name, + version=version + ).to_string(), + scope=scope, + requirement=version, + is_runtime=is_runtime, + is_optional=is_optional, + ) + ) + + yield cls( + dependencies=package_dependencies, + ) + + +@attr.s() +class BuildGradle(BaseBuildManifestPackage, models.PackageManifest): + file_patterns = ('build.gradle',) + extensions = ('.gradle',) + # TODO: Not sure what the default type should be, change this to something + # more appropriate later + default_type = 'build.gradle' + + @classmethod + def recognize(cls, location): + if not cls.is_manifest(location): + return + dependencies = get_dependencies(location) + return build_package(cls, dependencies) diff --git a/tests/packagedcode/data/build_gradle/build.gradle b/tests/packagedcode/data/build_gradle/build.gradle new file mode 100644 index 00000000000..2400322bf6c --- /dev/null +++ b/tests/packagedcode/data/build_gradle/build.gradle @@ -0,0 +1,16 @@ +dependencies { + // DEPENDENCY-5 + api dependencies.lombok + + // DEPENDENCY-4 + api group: 'com.google', name: 'guava', version: '1.0' + + // DEPENDENCY-4 + usageDependencies group: 'org.apache', name: 'commons', version: '1.0' + + // DEPENDENCY-1 + runtimeOnly( + [group: 'org.jacoco', name: 'org.jacoco.ant', version: '0.7.4.201502262128', classifier: "nodeps", transitive: false], + [group: 'org.jacoco', name: 'org.jacoco.agent', version: '0.7.4.201502262128', classifier: "runtime", transitive: false] + ) +} diff --git a/tests/packagedcode/data/build_gradle/end2end-expected.json b/tests/packagedcode/data/build_gradle/end2end-expected.json new file mode 100644 index 00000000000..85d0570a128 --- /dev/null +++ b/tests/packagedcode/data/build_gradle/end2end-expected.json @@ -0,0 +1,170 @@ +{ + "headers": [ + { + "tool_name": "scancode-toolkit", + "options": { + "input": "", + "--json-pp": "", + "--package": true + }, + "notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.", + "output_format_version": "2.0.0", + "message": null, + "errors": [], + "extra_data": { + "spdx_license_list_version": "3.15", + "OUTDATED": "WARNING: Outdated ScanCode Toolkit version! You are using an outdated version of ScanCode Toolkit: 30.1.0 released on: 2021-09-24. A new version is available with important improvements including bug and security fixes, updated license, copyright and package detection, and improved scanning accuracy. Please download and install the latest version of ScanCode. Visit https://github.com/nexB/scancode-toolkit/releases for details.", + "files_count": 1 + } + } + ], + "packages": [ + { + "type": "build.gradle", + "namespace": null, + "name": null, + "version": null, + "qualifiers": {}, + "subpath": null, + "primary_language": null, + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": null, + "download_url": null, + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "license_expression": null, + "declared_license": null, + "notice_text": null, + "root_path": null, + "dependencies": [ + { + "purl": "pkg:build.gradle/com.google/guava@1.0", + "requirement": "1.0", + "scope": "api", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:build.gradle/org.apache/commons@1.0", + "requirement": "1.0", + "scope": "usageDependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:build.gradle/org.jacoco.ant@0.7.4.201502262128", + "requirement": "0.7.4.201502262128", + "scope": "", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:build.gradle/org.jacoco.agent@0.7.4.201502262128", + "requirement": "0.7.4.201502262128", + "scope": "", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + } + ], + "contains_source_code": null, + "source_packages": [], + "extra_data": {}, + "purl": null, + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null + } + ], + "files": [ + { + "path": "build.gradle", + "type": "file", + "package_manifests": [ + { + "type": "build.gradle", + "namespace": null, + "name": null, + "version": null, + "qualifiers": {}, + "subpath": null, + "primary_language": null, + "description": null, + "release_date": null, + "parties": [], + "keywords": [], + "homepage_url": null, + "download_url": null, + "size": null, + "sha1": null, + "md5": null, + "sha256": null, + "sha512": null, + "bug_tracking_url": null, + "code_view_url": null, + "vcs_url": null, + "copyright": null, + "license_expression": null, + "declared_license": null, + "notice_text": null, + "root_path": null, + "dependencies": [ + { + "purl": "pkg:build.gradle/com.google/guava@1.0", + "requirement": "1.0", + "scope": "api", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:build.gradle/org.apache/commons@1.0", + "requirement": "1.0", + "scope": "usageDependencies", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:build.gradle/org.jacoco.ant@0.7.4.201502262128", + "requirement": "0.7.4.201502262128", + "scope": "", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + }, + { + "purl": "pkg:build.gradle/org.jacoco.agent@0.7.4.201502262128", + "requirement": "0.7.4.201502262128", + "scope": "", + "is_runtime": true, + "is_optional": false, + "is_resolved": false + } + ], + "contains_source_code": null, + "source_packages": [], + "extra_data": {}, + "purl": null, + "repository_homepage_url": null, + "repository_download_url": null, + "api_data_url": null + } + ], + "scan_errors": [] + } + ] +} \ No newline at end of file diff --git a/tests/packagedcode/data/plugin/help.txt b/tests/packagedcode/data/plugin/help.txt index 769e14ac3ca..2640191a612 100644 --- a/tests/packagedcode/data/plugin/help.txt +++ b/tests/packagedcode/data/plugin/help.txt @@ -49,6 +49,12 @@ Package: buck class: packagedcode.build:BuckPackage file_patterns: BUCK +-------------------------------------------- +Package: build.gradle + class: packagedcode.build_gradle:BuildGradle + file_patterns: build.gradle + extensions: .gradle + -------------------------------------------- Package: cab class: packagedcode.models:CabPackage diff --git a/tests/packagedcode/test_build_gradle.py b/tests/packagedcode/test_build_gradle.py new file mode 100644 index 00000000000..22adc555dfc --- /dev/null +++ b/tests/packagedcode/test_build_gradle.py @@ -0,0 +1,88 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os.path + +from packagedcode import build_gradle +from packagedcode import models +from scancode.cli_test_utils import check_json_scan +from scancode.cli_test_utils import run_scan_click +from packages_test_utils import PackageTester + + +class TestBuildGradle(PackageTester): + test_data_dir = os.path.join(os.path.dirname(__file__), 'data/build_gradle') + + def test_end2end_scan_can_detect_build_gradle(self): + test_file = self.get_test_loc('build.gradle') + expected_file = self.get_test_loc('end2end-expected.json') + result_file = self.get_temp_file() + run_scan_click(['--package', test_file, '--json-pp', result_file]) + check_json_scan(expected_file, result_file, regen=False) + + def test_build_gradle_recognize(self): + test_file = self.get_test_loc('build.gradle') + result_packages = build_gradle.BuildGradle.recognize(test_file) + + expected_packages = [ + build_gradle.BuildGradle( + type='build.gradle', + dependencies = [ + models.DependentPackage( + purl='pkg:build.gradle/com.google/guava@1.0', + requirement='1.0', + scope='api', + is_runtime=True, + is_optional=False, + is_resolved=False + ), + models.DependentPackage( + purl='pkg:build.gradle/org.apache/commons@1.0', + requirement='1.0', + scope='usageDependencies', + is_runtime=True, + is_optional=False, + is_resolved=False + ), + models.DependentPackage( + purl='pkg:build.gradle/org.jacoco.ant@0.7.4.201502262128', + requirement='0.7.4.201502262128', + scope='', + is_runtime=True, + is_optional=False, + is_resolved=False + ), + models.DependentPackage( + purl='pkg:build.gradle/org.jacoco.agent@0.7.4.201502262128', + requirement='0.7.4.201502262128', + scope='', + is_runtime=True, + is_optional=False, + is_resolved=False + ), + ] + ), + ] + compare_package_results(expected_packages, result_packages) + + +def compare_package_results(expected, result): + # We don't want to compare `root_path`, since the result will always + # have a different `root_path` than the expected result + result_packages = [] + for result_package in result: + r = result_package.to_dict() + r.pop('root_path') + result_packages.append(r) + expected_packages = [] + for expected_package in expected: + e = expected_package.to_dict() + e.pop('root_path') + expected_packages.append(e) + assert result_packages == expected_packages