Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Detect dependencies from build.gradle files #2822

Merged
merged 2 commits into from
Mar 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/packagedcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from packagedcode import about
from packagedcode import bower
from packagedcode import build
from packagedcode import build_gradle
from packagedcode import cargo
from packagedcode import chef
from packagedcode import debian
Expand Down Expand Up @@ -107,7 +108,8 @@
msi.MsiInstallerPackage,
windows.MicrosoftUpdateManifest,
pubspec.PubspecYaml,
pubspec.PubspecLock
pubspec.PubspecLock,
build_gradle.BuildGradle,
]

PACKAGE_MANIFESTS_BY_TYPE = {
Expand Down
1 change: 0 additions & 1 deletion src/packagedcode/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

import attr

from commoncode import filetype
from commoncode import fileutils
from packagedcode import models
from packagedcode.utils import combine_expressions
Expand Down
305 changes: 305 additions & 0 deletions src/packagedcode/build_gradle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,305 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging

from packageurl import PackageURL
from pygmars import Token
from pygmars.parse import Parser
from pygments import lex
from pygments.lexers import GroovyLexer
import attr

from packagedcode import models
from packagedcode.build import BaseBuildManifestPackage


TRACE = False

logger = logging.getLogger(__name__)

if TRACE:
import sys
logging.basicConfig(stream=sys.stdout)
logger.setLevel(logging.DEBUG)


grammar = """
LIT-STRING: {<LITERAL-STRING-SINGLE|LITERAL-STRING-DOUBLE>}

PACKAGE-IDENTIFIER: {<OPERATOR> <TEXT>? <NAME-LABEL> <TEXT>? <LIT-STRING>}
DEPENDENCY-1: {<PACKAGE-IDENTIFIER>{3} <OPERATOR>}

DEPENDENCY-2: {<NAME> <TEXT> <LIT-STRING> <TEXT>}

DEPENDENCY-3: {<NAME> <TEXT>? <OPERATOR> <LIT-STRING> <OPERATOR>}

DEPENDENCY-4: {<NAME> <TEXT> <NAME-LABEL> <TEXT> <LIT-STRING> <PACKAGE-IDENTIFIER> <PACKAGE-IDENTIFIER> <OPERATOR>? <TEXT>}

DEPENDENCY-5: {<NAME> <TEXT> <NAME> <OPERATOR> <NAME-ATTRIBUTE>}

NESTED-DEPENDENCY-1: {<NAME> <OPERATOR> <DEPENDENCY-1>+ }
"""


def get_tokens(contents):
for i, (token, value) in enumerate(lex(contents, GroovyLexer())):
yield i, token, value


def get_pygmar_tokens(contents):
tokens = Token.from_pygments_tokens(get_tokens(contents))
for token in tokens:
if token.label == 'NAME' and token.value == 'dependencies':
token.label = 'DEPENDENCIES-START'
yield token


def get_parse_tree(build_gradle_location):
# Open build.gradle and create a Pygmars parse tree from its contents
with open(build_gradle_location) as f:
contents = f.read()
parser = Parser(grammar, trace=0)
return parser.parse(list(get_pygmar_tokens(contents)))


def is_literal_string(string):
return string == 'LITERAL-STRING-SINGLE' or string == 'LITERAL-STRING-DOUBLE'


def remove_quotes(string):
"""
Remove starting and ending quotes from `string`.

If `string` has no starting or ending quotes, return `string`.
"""
quoted = lambda x: (x.startswith('"') and x.endswith('"')) or (x.startswith("'") and x.endswith("'"))
if quoted:
return string[1:-1]
else:
return string


def get_dependencies_from_parse_tree(parse_tree):
dependencies = []
in_dependency_block = False
brackets_counter = 0
first_bracket_seen = False
in_nested_dependency = False
nested_dependency_parenthesis_counter = 0
first_parenthesis_seen = False
for tree_node in parse_tree:
if tree_node.label == 'DEPENDENCIES-START':
in_dependency_block = True
continue

if in_dependency_block:
if tree_node.label == 'OPERATOR':
if tree_node.value == '{':
if not first_bracket_seen:
first_bracket_seen = True
brackets_counter += 1
elif tree_node.value == '}':
brackets_counter -= 1

if brackets_counter == 0 and first_bracket_seen:
break

# TODO: Find way to simplify logic with DEPENDENCY-1
if tree_node.label == 'NESTED-DEPENDENCY-1':
dependency = {}
in_nested_dependency = True
scope = None
last_key = None
for child_node in tree_node.leaves():
if child_node.label == 'NAME':
scope = child_node.value

if child_node.label == 'OPERATOR' and child_node.value == '(':
if not first_parenthesis_seen:
first_parenthesis_seen = True
nested_dependency_parenthesis_counter += 1

if child_node.label == 'NAME-LABEL':
value = child_node.value
if value == 'group:':
last_key = 'namespace'
if value == 'name:':
last_key = 'name'
if value == 'version:':
last_key = 'version'

if is_literal_string(child_node.label):
dependency[last_key] = remove_quotes(child_node.value)
if scope:
dependency['scope'] = scope
dependencies.append(dependency)

if in_nested_dependency:
if tree_node.label == 'OPERATOR' and tree_node.value == ')':
nested_dependency_parenthesis_counter -= 1

if nested_dependency_parenthesis_counter == 0 and first_parenthesis_seen:
in_nested_dependency = False
scope = None

if tree_node.label == 'DEPENDENCY-1':
name_label_to_dep_field_name = {
'group:': 'namespace',
'name:': 'name',
'version:': 'version'
}
dependency = {}
last_key = None
for child_node in tree_node.leaves():
value = child_node.value
if child_node.label == 'NAME-LABEL':
last_key = name_label_to_dep_field_name.get(value, '')
if is_literal_string(child_node.label):
if last_key:
dependency[last_key] = remove_quotes(value)
if in_nested_dependency and scope:
dependency['scope'] = scope
dependencies.append(dependency)

if tree_node.label == 'DEPENDENCY-2':
dependency = {}
for child_node in tree_node.leaves():
if child_node.label == 'NAME':
dependency['scope'] = child_node.value
if is_literal_string(child_node.label):
value = child_node.value
value = remove_quotes(value)

namespace = ''
name = ''
version = ''
split_value = value.split(':')
split_value_length = len(split_value)
if split_value_length == 4:
# We are assuming `value` is in the form of "namespace:name:version:module"
# We are currently not reporting down to the module level
namespace, name, version, _ = split_value
if split_value_length == 3:
# We are assuming `value` is in the form of "namespace:name:version"
namespace, name, version = split_value
if split_value_length == 2:
# We are assuming `value` is in the form of "namespace:name"
namespace, name = split_value

dependency['namespace'] = namespace
dependency['name'] = name
dependency['version'] = version
dependencies.append(dependency)

if tree_node.label == 'DEPENDENCY-3':
dependency = {}
for child_node in tree_node.leaves():
if child_node.label == 'NAME':
dependency['scope'] = child_node.value
if is_literal_string(child_node.label):
value = child_node.value
value = remove_quotes(value)
# We are assuming `value` is in the form of "namespace:name:version"
split_dependency_string = value.split(':')
if len(split_dependency_string) != 3:
break
namespace, name, version = split_dependency_string
dependency['namespace'] = namespace
dependency['name'] = name
dependency['version'] = version
dependencies.append(dependency)

# TODO: See if you can refactor logic with DEPENDENCY-1
if tree_node.label == 'DEPENDENCY-4':
dependency = {}
last_key = None
for child_node in tree_node.leaves():
if child_node.label == 'NAME':
dependency['scope'] = child_node.value
if child_node.label == 'NAME-LABEL':
value = child_node.value
if value == 'group:':
last_key = 'namespace'
if value == 'name:':
last_key = 'name'
if value == 'version:':
last_key = 'version'
if is_literal_string(child_node.label):
dependency[last_key] = remove_quotes(child_node.value)
dependencies.append(dependency)

if tree_node.label == 'DEPENDENCY-5':
dependency = {}
for child_node in tree_node.leaves():
if child_node.label == 'NAME':
dependency['scope'] = child_node.value
if child_node.label == 'NAME-ATTRIBUTE':
dependency['name'] = child_node.value
dependencies.append(dependency)
return dependencies


def get_dependencies(build_gradle_location):
parse_tree = get_parse_tree(build_gradle_location)
# Parse `parse_tree` for dependencies and print them
return get_dependencies_from_parse_tree(parse_tree)


def build_package(cls, dependencies):
package_dependencies = []
for dependency in dependencies:
# Ignore collected dependencies that do not have a name
name = dependency.get('name', '')
if not name:
continue

namespace = dependency.get('namespace', '')
version = dependency.get('version', '')
scope = dependency.get('scope', '')
is_runtime = True
is_optional = False
if 'test' in scope.lower():
is_runtime = False
is_optional = True

package_dependencies.append(
models.DependentPackage(
purl=PackageURL(
type='build.gradle',
namespace=namespace,
name=name,
version=version
).to_string(),
scope=scope,
requirement=version,
is_runtime=is_runtime,
is_optional=is_optional,
)
)

yield cls(
dependencies=package_dependencies,
)


@attr.s()
class BuildGradle(BaseBuildManifestPackage, models.PackageManifest):
file_patterns = ('build.gradle',)
extensions = ('.gradle',)
# TODO: Not sure what the default type should be, change this to something
# more appropriate later
default_type = 'build.gradle'

@classmethod
def recognize(cls, location):
if not cls.is_manifest(location):
return
dependencies = get_dependencies(location)
return build_package(cls, dependencies)
16 changes: 16 additions & 0 deletions tests/packagedcode/data/build_gradle/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
dependencies {
// DEPENDENCY-5
api dependencies.lombok

// DEPENDENCY-4
api group: 'com.google', name: 'guava', version: '1.0'

// DEPENDENCY-4
usageDependencies group: 'org.apache', name: 'commons', version: '1.0'

// DEPENDENCY-1
runtimeOnly(
[group: 'org.jacoco', name: 'org.jacoco.ant', version: '0.7.4.201502262128', classifier: "nodeps", transitive: false],
[group: 'org.jacoco', name: 'org.jacoco.agent', version: '0.7.4.201502262128', classifier: "runtime", transitive: false]
)
}
Loading