From 72b023b78327732017bd0a14f4f11e16a503b691 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Wed, 13 Nov 2024 10:25:19 +1000 Subject: [PATCH 1/4] fix: block terminal prompts in find source Signed-off-by: Ben Selwyn-Smith --- src/macaron/repo_finder/repo_finder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/macaron/repo_finder/repo_finder.py b/src/macaron/repo_finder/repo_finder.py index 1a58fc575..b0b3cc317 100644 --- a/src/macaron/repo_finder/repo_finder.py +++ b/src/macaron/repo_finder/repo_finder.py @@ -235,7 +235,9 @@ def get_tags_via_git_remote(repo: str) -> dict[str, str] | None: """ tags = {} try: - tag_data = git.cmd.Git().ls_remote("--tags", repo) + command = git.cmd.Git() + command.update_environment(GIT_TERMINAL_PROMPT="") + tag_data = command.ls_remote("--tags", repo) except git.exc.GitCommandError as error: logger.debug("Failed to retrieve tags: %s", error) return None From 9c55252fd49510126e43bf709beb5508bff1f904 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Fri, 15 Nov 2024 11:10:29 +1000 Subject: [PATCH 2/4] chore: use command line git for ls-remote; disable stdin; add integration test for bad repository Signed-off-by: Ben Selwyn-Smith --- src/macaron/repo_finder/repo_finder.py | 22 +++++----- src/macaron/slsa_analyzer/git_url.py | 40 +++++++++++++++++++ .../cases/find_source_fail/test.yaml | 20 ++++++++++ 3 files changed, 72 insertions(+), 10 deletions(-) create mode 100644 tests/integration/cases/find_source_fail/test.yaml diff --git a/src/macaron/repo_finder/repo_finder.py b/src/macaron/repo_finder/repo_finder.py index b0b3cc317..bb74f0c78 100644 --- a/src/macaron/repo_finder/repo_finder.py +++ b/src/macaron/repo_finder/repo_finder.py @@ -36,7 +36,6 @@ import os from urllib.parse import ParseResult, urlunparse -import git from packageurl import PackageURL from macaron.config.defaults import defaults @@ -47,7 +46,7 @@ from macaron.repo_finder.repo_finder_deps_dev import DepsDevRepoFinder from macaron.repo_finder.repo_finder_java import JavaRepoFinder from macaron.repo_finder.repo_utils import generate_report, prepare_repo -from macaron.slsa_analyzer.git_url import GIT_REPOS_DIR +from macaron.slsa_analyzer.git_url import GIT_REPOS_DIR, list_remote_references logger: logging.Logger = logging.getLogger(__name__) @@ -170,11 +169,11 @@ def find_source(purl_string: str, input_repo: str | None) -> bool: # Disable other loggers for cleaner output. logging.getLogger("macaron.slsa_analyzer.analyzer").disabled = True - logging.getLogger("macaron.slsa_analyzer.git_url").disabled = True if defaults.getboolean("repofinder", "find_source_should_clone"): logger.debug("Preparing repo: %s", found_repo) repo_dir = os.path.join(global_config.output_path, GIT_REPOS_DIR) + logging.getLogger("macaron.slsa_analyzer.git_url").disabled = True git_obj = prepare_repo( repo_dir, found_repo, @@ -233,14 +232,17 @@ def get_tags_via_git_remote(repo: str) -> dict[str, str] | None: dict[str] A dictionary of tags mapped to their commits, or None if the operation failed.. """ - tags = {} - try: - command = git.cmd.Git() - command.update_environment(GIT_TERMINAL_PROMPT="") - tag_data = command.ls_remote("--tags", repo) - except git.exc.GitCommandError as error: - logger.debug("Failed to retrieve tags: %s", error) + tag_data = list_remote_references(["--tags"], repo) + if not tag_data: return None + tags = {} + # try: + # command = git.cmd.Git() + # command.update_environment(GIT_TERMINAL_PROMPT="") + # tag_data = command.ls_remote("--tags", repo) + # except git.exc.GitCommandError as error: + # logger.debug("Failed to retrieve tags: %s", error) + # return None for tag_line in tag_data.splitlines(): tag_line = tag_line.strip() diff --git a/src/macaron/slsa_analyzer/git_url.py b/src/macaron/slsa_analyzer/git_url.py index e34c113da..59335f489 100644 --- a/src/macaron/slsa_analyzer/git_url.py +++ b/src/macaron/slsa_analyzer/git_url.py @@ -19,6 +19,7 @@ from pydriller.git import Git from macaron.config.defaults import defaults +from macaron.config.global_config import global_config from macaron.environment_variables import get_patched_env from macaron.errors import CloneError @@ -376,6 +377,45 @@ def clone_remote_repo(clone_dir: str, url: str) -> Repo | None: return Repo(path=clone_dir) +def list_remote_references(arguments: list[str], repo: str) -> str | None: + """Retrieve references from a remote repository using Git's ``ls-remote``. + + Parameters + ---------- + arguments: list[str] + The arguments to pass into the command. + repo: str + The repository to run the command on. + + Returns + ------- + str + The result of the command. + """ + try: + result = subprocess.run( # nosec B603 + args=["git", "ls-remote"] + arguments + [repo], + capture_output=True, + # By setting stdin to /dev/null and using a new session, we prevent all possible user input prompts. + stdin=subprocess.DEVNULL, + start_new_session=True, + cwd=global_config.output_path, + check=False, + ) + except (subprocess.CalledProcessError, OSError): + return None + + if result.returncode != 0: + error_string = result.stderr.decode("utf-8").strip() + if error_string.startswith("fatal: could not read Username"): + # Occurs when a repository cannot be accesses either because it does not exist, or it requires a login + # that is blocked. + logger.error("Could not access repository: %s", repo) + return None + + return str(result.stdout) + + def resolve_local_path(start_dir: str, local_path: str) -> str: """Resolve the local path and check if it's within a directory. diff --git a/tests/integration/cases/find_source_fail/test.yaml b/tests/integration/cases/find_source_fail/test.yaml new file mode 100644 index 000000000..ed998cc6c --- /dev/null +++ b/tests/integration/cases/find_source_fail/test.yaml @@ -0,0 +1,20 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Analyzing the find source command on the a non-existent repository. + +tags: +- macaron-python-package +- macaron-docker-image + +steps: +- name: Run macaron find source on private repository + kind: find-source + options: + command_args: + - -purl + - pkg:maven/com.example/example@1.0.0 + - -rp + - https://github.com/oracle/hopefully-this-repository-will-never-exist-0 + expect_fail: true From 2c13eb8147696a00c421b3720650b456d709a1cd Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Fri, 15 Nov 2024 12:47:38 +1000 Subject: [PATCH 3/4] chore: decode output of command Signed-off-by: Ben Selwyn-Smith --- src/macaron/slsa_analyzer/git_url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/macaron/slsa_analyzer/git_url.py b/src/macaron/slsa_analyzer/git_url.py index 59335f489..f07520837 100644 --- a/src/macaron/slsa_analyzer/git_url.py +++ b/src/macaron/slsa_analyzer/git_url.py @@ -413,7 +413,7 @@ def list_remote_references(arguments: list[str], repo: str) -> str | None: logger.error("Could not access repository: %s", repo) return None - return str(result.stdout) + return result.stdout.decode("utf-8") def resolve_local_path(start_dir: str, local_path: str) -> str: From 18399794ea1e5a5179664ef2951c40f1854141d7 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Fri, 15 Nov 2024 16:20:28 +1000 Subject: [PATCH 4/4] chore: addressed PR feedback Signed-off-by: Ben Selwyn-Smith --- src/macaron/repo_finder/repo_finder.py | 7 ------- src/macaron/slsa_analyzer/git_url.py | 4 +++- tests/integration/cases/find_source_fail/test.yaml | 2 +- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/macaron/repo_finder/repo_finder.py b/src/macaron/repo_finder/repo_finder.py index bb74f0c78..d9b4df1e5 100644 --- a/src/macaron/repo_finder/repo_finder.py +++ b/src/macaron/repo_finder/repo_finder.py @@ -236,13 +236,6 @@ def get_tags_via_git_remote(repo: str) -> dict[str, str] | None: if not tag_data: return None tags = {} - # try: - # command = git.cmd.Git() - # command.update_environment(GIT_TERMINAL_PROMPT="") - # tag_data = command.ls_remote("--tags", repo) - # except git.exc.GitCommandError as error: - # logger.debug("Failed to retrieve tags: %s", error) - # return None for tag_line in tag_data.splitlines(): tag_line = tag_line.strip() diff --git a/src/macaron/slsa_analyzer/git_url.py b/src/macaron/slsa_analyzer/git_url.py index f07520837..a516186ac 100644 --- a/src/macaron/slsa_analyzer/git_url.py +++ b/src/macaron/slsa_analyzer/git_url.py @@ -408,9 +408,11 @@ def list_remote_references(arguments: list[str], repo: str) -> str | None: if result.returncode != 0: error_string = result.stderr.decode("utf-8").strip() if error_string.startswith("fatal: could not read Username"): - # Occurs when a repository cannot be accesses either because it does not exist, or it requires a login + # Occurs when a repository cannot be accessed either because it does not exist, or it requires a login # that is blocked. logger.error("Could not access repository: %s", repo) + else: + logger.error("Failed to retrieve remote references from repo: %s", repo) return None return result.stdout.decode("utf-8") diff --git a/tests/integration/cases/find_source_fail/test.yaml b/tests/integration/cases/find_source_fail/test.yaml index ed998cc6c..2ecd88fc1 100644 --- a/tests/integration/cases/find_source_fail/test.yaml +++ b/tests/integration/cases/find_source_fail/test.yaml @@ -2,7 +2,7 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. description: | - Analyzing the find source command on the a non-existent repository. + Analyzing the find source command on a non-existent repository. tags: - macaron-python-package