Skip to content

Commit 12d8593

Browse files
author
Trong Nhan Mai
authored
feat: obtain Java and Python artifacts from .m2 or Python virtual environment from input (#864)
Signed-off-by: Trong Nhan Mai <trong.nhan.mai@oracle.com>
1 parent f1f281f commit 12d8593

File tree

15 files changed

+917
-159
lines changed

15 files changed

+917
-159
lines changed

docs/source/pages/cli_usage/command_analyze.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Usage
2424
[-d DIGEST] [-pe PROVENANCE_EXPECTATION]
2525
[--skip-deps] [--deps-depth DEPS_DEPTH] [-g TEMPLATE_PATH]
2626
[--python-venv PYTHON_VENV]
27+
[--local-maven-repo LOCAL_MAVEN_REPO]
2728
2829
-------
2930
Options
@@ -79,6 +80,10 @@ Options
7980

8081
The path to the Python virtual environment of the target software component.
8182

83+
.. option:: --local-maven-repo LOCAL_MAVEN_REPO
84+
85+
The path to the local .m2 directory. If this option is not used, Macaron will use the default location at $HOME/.m2
86+
8287
-----------
8388
Environment
8489
-----------

scripts/release_scripts/run_macaron.sh

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,14 +328,18 @@ if [[ $command == "analyze" ]]; then
328328
python_venv_path="$2"
329329
shift
330330
;;
331+
--local-maven-repo)
332+
local_maven_repo="$2"
333+
shift
334+
;;
331335
*)
332336
rest_command+=("$1")
333337
;;
334338
esac
335339
shift
336340
done
337341
elif [[ $command == "verify-policy" ]]; then
338-
while [[ $# -gt 0 ]]; do
342+
while [[ $# -gt 0 ]]; do
339343
case $1 in
340344
-d|--database)
341345
arg_database="$2"
@@ -351,6 +355,15 @@ elif [[ $command == "verify-policy" ]]; then
351355
esac
352356
shift
353357
done
358+
elif [[ $command == "dump-defaults" ]]; then
359+
while [[ $# -gt 0 ]]; do
360+
case $1 in
361+
*)
362+
rest_command+=("$1")
363+
;;
364+
esac
365+
shift
366+
done
354367
fi
355368

356369
# MACARON entrypoint - Main argvs
@@ -455,6 +468,47 @@ if [[ -n "${python_venv_path:-}" ]]; then
455468
mount_dir_ro "--python-venv" "$python_venv_path" "$python_venv_in_container"
456469
fi
457470

471+
# Mount the local Maven repo from the
472+
# host file system into the container's
473+
# ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly.
474+
if [[ -n "${local_maven_repo:-}" ]]; then
475+
local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly"
476+
argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly")
477+
478+
mount_dir_ro "--local-maven-repo" "$local_maven_repo" "$local_maven_repo_in_container"
479+
else
480+
# Mounting default local maven repo only
481+
# when the user doesn't provide --local-maven-repo AND `analyze` command is used.
482+
if [[ "$command" == "analyze" ]]; then
483+
# We mount the host's $HOME/.m2 into the container's
484+
# ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly ONLY IF $HOME/.m2 directory exists.
485+
if [[ -d "$HOME/.m2" ]]; then
486+
local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly"
487+
argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly")
488+
489+
mount_dir_ro "--local-maven-repo" "$HOME/.m2" "$local_maven_repo_in_container"
490+
# If the host's $HOME/.m2 doesn't exist, we create and mount an empty directory ${output}/analyze_local_maven_repo_readonly
491+
# into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly. And then provide
492+
# ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly into the --local-maven-repo
493+
# flag.
494+
# This is because:
495+
# - By default if --local-maven-repo is not used, Macaron uses $HOME/.m2 of the current
496+
# environment as the local maven repo.
497+
# - If --local-maven-repo is not set when Macaron is running in the Docker container, it will try to
498+
# use $HOME/.m2 WITHIN the container. This is not desirable as this $HOME/.m2 is being used
499+
# by the cyclonedx plugins for dependency resolution, which requires read write. We treat the local
500+
# maven repo as a read only directory, hence they cannot share.
501+
else
502+
local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly"
503+
output_local_maven_repo="${output}/analyze_local_maven_repo_readonly"
504+
mkdir -p "$output_local_maven_repo"
505+
argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly")
506+
507+
mount_dir_ro "--local-maven-repo" "$output_local_maven_repo" "$local_maven_repo_in_container"
508+
fi
509+
fi
510+
fi
511+
458512
# MACARON entrypoint - verify-policy command argvs
459513
# This is for macaron verify-policy command.
460514
# Determine the database path to be mounted into ${MACARON_WORKSPACE}/database/macaron.db

src/macaron/__main__.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,29 @@ def analyze_slsa_levels_single(analyzer_single_args: argparse.Namespace) -> None
7474
sys.exit(os.EX_OSFILE)
7575
global_config.load_python_venv(analyzer_single_args.python_venv)
7676

77+
# Set local maven repo path.
78+
if analyzer_single_args.local_maven_repo is None:
79+
# Load the default user local .m2 directory.
80+
# Exit on error if $HOME is not set or empty.
81+
home_dir = os.getenv("HOME")
82+
if not home_dir:
83+
logger.critical("Environment variable HOME is not set.")
84+
sys.exit(os.EX_USAGE)
85+
86+
local_maven_repo = os.path.join(home_dir, ".m2")
87+
if not os.path.isdir(local_maven_repo):
88+
logger.debug("The default local Maven repo at %s does not exist. Ignore ...")
89+
global_config.local_maven_repo = None
90+
91+
global_config.local_maven_repo = local_maven_repo
92+
else:
93+
user_provided_local_maven_repo = analyzer_single_args.local_maven_repo
94+
if not os.path.isdir(user_provided_local_maven_repo):
95+
logger.error("The user provided local Maven repo at %s is not valid.", user_provided_local_maven_repo)
96+
sys.exit(os.EX_USAGE)
97+
98+
global_config.local_maven_repo = user_provided_local_maven_repo
99+
77100
analyzer = Analyzer(global_config.output_path, global_config.build_log_path)
78101

79102
# Initiate reporters.
@@ -453,6 +476,14 @@ def main(argv: list[str] | None = None) -> None:
453476
),
454477
)
455478

479+
single_analyze_parser.add_argument(
480+
"--local-maven-repo",
481+
required=False,
482+
help=(
483+
"The path to the local .m2 directory. If this option is not used, Macaron will use the default location at $HOME/.m2"
484+
),
485+
)
486+
456487
# Dump the default values.
457488
sub_parser.add_parser(name="dump-defaults", description="Dumps the defaults.ini file to the output directory.")
458489

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
2+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
3+
4+
"""This module declares types and utilities for handling local artifacts."""
5+
6+
import fnmatch
7+
import glob
8+
import os
9+
10+
from packageurl import PackageURL
11+
12+
from macaron.artifact.maven import construct_maven_repository_path
13+
from macaron.errors import LocalArtifactFinderError
14+
15+
16+
def construct_local_artifact_dirs_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None:
17+
"""Return a list of glob pattern(s) representing the directory that contains the local maven artifacts for ``maven_purl``.
18+
19+
The glob pattern(s) can be used to search in `<...>/.m2/repository` directory.
20+
21+
Parameters
22+
----------
23+
maven_purl : PackageURL
24+
A maven type PackageURL instance.
25+
26+
Returns
27+
-------
28+
list[str] | None
29+
A list of glob patterns or None if an error happened.
30+
31+
Examples
32+
--------
33+
>>> from packageurl import PackageURL
34+
>>> purl = PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0")
35+
>>> construct_local_artifact_dirs_glob_pattern_maven_purl(purl)
36+
['com/oracle/macaron/macaron/0.13.0']
37+
"""
38+
if maven_purl.type != "maven":
39+
return None
40+
41+
group = maven_purl.namespace
42+
artifact = maven_purl.name
43+
version = maven_purl.version
44+
45+
if group is None or version is None:
46+
return None
47+
48+
return [construct_maven_repository_path(group, artifact, version)]
49+
50+
51+
def construct_local_artifact_dirs_glob_pattern_pypi_purl(pypi_purl: PackageURL) -> list[str] | None:
52+
"""Return a list of glob pattern(s) representing directories that contains the artifacts in a Python virtual environment.
53+
54+
The glob pattern(s) can be used to search in `<...>/<python_venv>/lib/python3.x/site-packages`
55+
directory.
56+
57+
Parameters
58+
----------
59+
pypi_purl : PackageURL
60+
A pypi type PackageURL instance.
61+
62+
Returns
63+
-------
64+
list[str] | None
65+
A list of glob patterns or None if an error happened.
66+
67+
Examples
68+
--------
69+
>>> from packageurl import PackageURL
70+
>>> purl = PackageURL.from_string("pkg:pypi/django@1.11.1")
71+
>>> construct_local_artifact_dirs_glob_pattern_pypi_purl(purl)
72+
['django', 'django-1.11.1.dist-info', 'django-1.11.1.data']
73+
"""
74+
if pypi_purl.type != "pypi":
75+
return None
76+
77+
name = pypi_purl.name
78+
version = pypi_purl.version
79+
80+
if version is None:
81+
return None
82+
83+
# These patterns are from the content of a wheel file, which are extracted into the site-packages
84+
# directory. References:
85+
# https://packaging.python.org/en/latest/specifications/binary-distribution-format/#file-contents
86+
glob_patterns = []
87+
glob_patterns.append(name)
88+
glob_patterns.append(f"{name}-{version}.dist-info")
89+
glob_patterns.append(f"{name}-{version}.data")
90+
91+
return glob_patterns
92+
93+
94+
def find_artifact_dirs_from_local_maven_repo(
95+
local_maven_repo: str,
96+
glob_patterns: list[str],
97+
) -> list[str]:
98+
"""Find directories that contains maven artifacts within a maven local repository.
99+
100+
``local_maven_repo`` should be in format `<...>/.m2/repository`.
101+
102+
Parameters
103+
----------
104+
local_maven_repo: str
105+
The path to the directory to find artifacts.
106+
glob_patterns: list[str]
107+
The list of glob patterns that matches to artifact directory names.
108+
109+
Returns
110+
-------
111+
list[str]
112+
The list of paths to artifact directories in the form of ``venv_site_package_path``/path/to/artifact_dir
113+
If no artifact directory is found, this list will be empty.
114+
115+
Raises
116+
------
117+
LocalArtifactFinderError
118+
If ``local_maven_repo`` doesn't exist.
119+
"""
120+
if not os.path.isdir(local_maven_repo):
121+
raise LocalArtifactFinderError(f"{local_maven_repo} doesn't exist.")
122+
123+
artifact_paths = []
124+
for pattern in glob_patterns:
125+
found_paths = glob.glob(
126+
root_dir=local_maven_repo,
127+
pathname=pattern,
128+
)
129+
130+
for found_path in found_paths:
131+
full_path = os.path.join(local_maven_repo, found_path)
132+
if os.path.isdir(full_path):
133+
artifact_paths.append(full_path)
134+
135+
return artifact_paths
136+
137+
138+
def find_artifact_dirs_from_python_venv(
139+
venv_site_package_path: str,
140+
glob_patterns: list[str],
141+
) -> list[str]:
142+
"""Find directories within a python virtual environment.
143+
144+
For packages in the virtual environment, we will treat their name case-insensitively.
145+
https://packaging.python.org/en/latest/specifications/name-normalization/
146+
147+
``venv_site_package_path`` should be in format `<...>/lib/python3.*/site-packages/`.
148+
149+
Parameters
150+
----------
151+
venv_path: str
152+
The path to the local directory to find artifacts.
153+
glob_patterns: list[str]
154+
The list of glob patterns that matches to artifact directory names.
155+
156+
Returns
157+
-------
158+
list[str]
159+
The list of paths to artifact directories in the form of ``venv_site_package_path``/path/to/artifact_dir
160+
If no artifact directory is found, this list will be empty.
161+
162+
Raises
163+
------
164+
LocalArtifactFinderError
165+
If ``venv_site_package_path`` doesn't exist or if we cannot view the sub-directory of it.
166+
"""
167+
if not os.path.isdir(venv_site_package_path):
168+
raise LocalArtifactFinderError(f"{venv_site_package_path} doesn't exist.")
169+
170+
artifact_paths = []
171+
172+
try:
173+
venv_path_entries = os.listdir(venv_site_package_path)
174+
except (NotADirectoryError, PermissionError, FileNotFoundError) as error:
175+
error_msg = f"Cannot view the sub-directory of venv {venv_site_package_path}"
176+
raise LocalArtifactFinderError(error_msg) from error
177+
178+
all_package_dirs: list[str] = []
179+
for entry in venv_path_entries:
180+
entry_path = os.path.join(venv_site_package_path, entry)
181+
if os.path.isdir(entry_path):
182+
all_package_dirs.append(entry)
183+
184+
for package_dir in all_package_dirs:
185+
for pattern in glob_patterns:
186+
if fnmatch.fnmatch(package_dir.lower(), pattern.lower()):
187+
full_path = os.path.join(venv_site_package_path, package_dir)
188+
artifact_paths.append(full_path)
189+
190+
return artifact_paths
191+
192+
193+
def get_local_artifact_dirs(
194+
purl: PackageURL,
195+
local_artifact_repo_path: str,
196+
) -> list[str]:
197+
"""Return the paths to directories that store local artifacts for a PackageURL.
198+
199+
We look for local artifacts of ``purl`` in ``local_artifact_repo_path``.
200+
201+
This function returns a list of paths (as strings), each has the format
202+
``local_artifact_repo_path``/path/to/artifact_dir``
203+
204+
This will mean that no path to an artifact is returned. Therefore, it's the responsibility
205+
of this function caller to inspect the artifact directory to obtain the required
206+
artifact.
207+
208+
We assume that ``local_artifact_repo_path`` exists.
209+
210+
Parameters
211+
----------
212+
purl : PackageURL
213+
The purl we want to find local artifacts
214+
local_artifact_repo_path : str
215+
The local artifact repo directory.
216+
217+
Returns
218+
-------
219+
list[str]
220+
The list contains the artifact directory paths. It will be empty if no artifact can be found.
221+
222+
Raises
223+
------
224+
LocalArtifactFinderError
225+
If an error happens when looking for local artifacts.
226+
"""
227+
purl_type = purl.type
228+
229+
if purl_type == "maven":
230+
maven_artifact_patterns = construct_local_artifact_dirs_glob_pattern_maven_purl(purl)
231+
if not maven_artifact_patterns:
232+
raise LocalArtifactFinderError(f"Cannot generate maven artifact patterns for {purl}")
233+
234+
return find_artifact_dirs_from_local_maven_repo(
235+
local_maven_repo=local_artifact_repo_path,
236+
glob_patterns=maven_artifact_patterns,
237+
)
238+
239+
if purl_type == "pypi":
240+
pypi_artifact_patterns = construct_local_artifact_dirs_glob_pattern_pypi_purl(purl)
241+
if not pypi_artifact_patterns:
242+
raise LocalArtifactFinderError(f"Cannot generate Python package patterns for {purl}")
243+
244+
return find_artifact_dirs_from_python_venv(
245+
venv_site_package_path=local_artifact_repo_path,
246+
glob_patterns=pypi_artifact_patterns,
247+
)
248+
249+
raise LocalArtifactFinderError(f"Unsupported PURL type {purl_type}")

0 commit comments

Comments
 (0)