Skip to content

Commit 621fad7

Browse files
authored
chore: improve commit finder and input handling (#741)
Signed-off-by: Ben Selwyn-Smith <benselwynsmith@googlemail.com>
1 parent 51a1aad commit 621fad7

File tree

4 files changed

+87
-5
lines changed

4 files changed

+87
-5
lines changed

scripts/dev_scripts/integration_tests.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,9 +262,9 @@ echo "apache/maven: Analyzing using a CycloneDx SBOM file of a software componen
262262
echo -e "----------------------------------------------------------------------------------\n"
263263
SBOM_FILE=$WORKSPACE/tests/dependency_analyzer/cyclonedx/resources/private_mirror_apache_maven.json
264264
DEP_EXPECTED=$WORKSPACE/tests/dependency_analyzer/expected_results/private_mirror_apache_maven.json
265-
DEP_RESULT=$WORKSPACE/output/reports/private_domain_com/apache/maven/dependencies.json
265+
DEP_RESULT=$WORKSPACE/output/reports/private-domain_com/apache/maven/dependencies.json
266266

267-
$RUN_MACARON analyze -purl pkg:private_domain.com/apache/maven -sbom "$SBOM_FILE" || log_fail
267+
$RUN_MACARON analyze -purl pkg:private-domain.com/apache/maven -sbom "$SBOM_FILE" || log_fail
268268

269269
check_or_update_expected_output $COMPARE_DEPS $DEP_RESULT $DEP_EXPECTED || log_fail
270270

src/macaron/repo_finder/commit_finder.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@
101101
hex_only_pattern = re.compile("^[0-9a-f]+$", flags=re.IGNORECASE)
102102
numeric_only_pattern = re.compile("^[0-9]+$")
103103
versioned_string = re.compile("^([a-z]+)(0*)([1-9]+[0-9]*)$", flags=re.IGNORECASE) # e.g. RC1, M5, etc.
104+
multiple_zero_pattern = re.compile("^0+$")
104105

105106

106107
class AbstractPurlType(Enum):
@@ -307,6 +308,7 @@ def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, lis
307308
if not version:
308309
return None, []
309310

311+
# Escape input to prevent it being treated as regex.
310312
name = re.escape(name)
311313

312314
# The version is split on non-alphanumeric characters to separate the version parts from the non-version parts.
@@ -328,8 +330,8 @@ def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, lis
328330

329331
this_version_pattern = ""
330332
has_non_numeric_suffix = False
331-
# Detect versions that end with a zero, so the zero can be made optional.
332-
has_trailing_zero = len(split) > 2 and split[-1] == "0"
333+
# Detect versions that end with a zero number (0, 00, 000, etc.), so that part can be made optional.
334+
has_trailing_zero = len(split) > 2 and multiple_zero_pattern.match(split[-1])
333335
for count, part in enumerate(parts):
334336
numeric_only = numeric_only_pattern.match(part)
335337

@@ -344,7 +346,7 @@ def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, lis
344346
# This part will be made optional in the regex if it matches the correct requirements:
345347
# - There is more than one version part, e.g. 1.2 (2), 1.2.3 (3)
346348
# - AND either of:
347-
# - This is the last version part and it has a trailing zero, e.g. 10
349+
# - This is the last version part, and it has a trailing zero, e.g. 10
348350
# - OR has_non_numeric_suffix is True (See its comments above for more details)
349351
optional = len(split) > 1 and ((count == len(split) - 1 and has_trailing_zero) or has_non_numeric_suffix)
350352

@@ -356,6 +358,10 @@ def _build_version_pattern(name: str, version: str) -> tuple[Pattern | None, lis
356358
elif count > 1:
357359
this_version_pattern = this_version_pattern + INFIX_3
358360

361+
if numeric_only:
362+
# Allow for any number of preceding zeros when the part is numeric only. E.g. 000 + 1, 0 + 20
363+
this_version_pattern = this_version_pattern + "0*"
364+
359365
# Add the current part to the pattern.
360366
this_version_pattern = this_version_pattern + part
361367

src/macaron/slsa_analyzer/analyzer.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""This module handles the cloning and analyzing a Git repo."""
55
import logging
66
import os
7+
import re
78
import sys
89
from datetime import datetime, timezone
910
from pathlib import Path
@@ -304,6 +305,9 @@ def run_single(
304305
repo_id = config.get_value("id")
305306
try:
306307
parsed_purl = Analyzer.parse_purl(config)
308+
# Validate PURL type as per https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst
309+
if parsed_purl and not re.match(r"^[a-z.+-][a-z0-9.+-]*$", parsed_purl.type):
310+
raise InvalidPURLError(f"Invalid purl type: {parsed_purl.type}")
307311
except InvalidPURLError as error:
308312
logger.error(error)
309313
return Record(

tests/e2e/repo_finder/resources/tags.json

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303726,5 +303726,77 @@
303726303726
"comment": ""
303727303727
}
303728303728
]
303729+
},
303730+
{
303731+
"tags": [
303732+
"2015.04.28",
303733+
"2015.09.06",
303734+
"2015.09.06.1",
303735+
"2015.09.06.2",
303736+
"2015.11.20",
303737+
"2015.11.20.1",
303738+
"2016.02.28",
303739+
"2016.08.02",
303740+
"2016.08.08",
303741+
"2016.08.31",
303742+
"2016.09.26",
303743+
"2017.01.23",
303744+
"2017.04.17",
303745+
"2017.07.27",
303746+
"2017.07.27.1",
303747+
"2017.11.05",
303748+
"2018.01.18",
303749+
"2018.04.16",
303750+
"2018.08.13",
303751+
"2018.08.24",
303752+
"2018.10.15",
303753+
"2018.11.29",
303754+
"2019.03.09",
303755+
"2019.06.16",
303756+
"2019.09.11",
303757+
"2019.11.28",
303758+
"2020.04.05",
303759+
"2020.04.05.1",
303760+
"2020.04.05.2",
303761+
"2020.06.20",
303762+
"2020.11.08",
303763+
"2020.12.05",
303764+
"2021.05.30",
303765+
"2021.10.08",
303766+
"2022.05.18",
303767+
"2022.05.18.1",
303768+
"2022.06.15",
303769+
"2022.06.15.1",
303770+
"2022.06.15.2",
303771+
"2022.09.14",
303772+
"2022.09.24",
303773+
"2022.12.07",
303774+
"2023.05.07",
303775+
"2023.07.22",
303776+
"2023.11.17",
303777+
"2024.02.02",
303778+
"v1.0.0",
303779+
"v1.0.1"
303780+
],
303781+
"artifacts": [
303782+
{
303783+
"purl": "pkg:pypi/certifi@2024.2.2",
303784+
"repo": "https://github.com/certifi/python-certifi",
303785+
"match": "2024.02.02",
303786+
"comment": ""
303787+
},
303788+
{
303789+
"purl": "pkg:pypi/certifi@2022.5.18",
303790+
"repo": "https://github.com/certifi/python-certifi",
303791+
"match": "2022.05.18",
303792+
"comment": ""
303793+
},
303794+
{
303795+
"purl": "pkg:pypi/certifi@1.0.1",
303796+
"repo": "https://github.com/certifi/python-certifi",
303797+
"match": "v1.0.1",
303798+
"comment": ""
303799+
}
303800+
]
303729303801
}
303730303802
]

0 commit comments

Comments
 (0)