Skip to content

Commit

Permalink
Merge pull request #164 from nexB/support-custom-repo-url
Browse files Browse the repository at this point in the history
Add support for custom maven repository URLs
  • Loading branch information
JonoYang authored Aug 8, 2023
2 parents c775297 + 3b2493b commit cd5a1c5
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 11 deletions.
13 changes: 13 additions & 0 deletions minecode/tests/test_maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,6 +726,19 @@ def test_map_maven_package(self):
expected_purl_str = 'pkg:maven/classworlds/classworlds@1.1'
self.assertEqual(expected_purl_str, package.purl)

def test_map_maven_package_custom_repo_url(self):
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(0, package_count)
custom_repo_purl = "pkg:maven/org.eclipse.core/runtime@20070801?repository_url=https://packages.atlassian.com/mvn/maven-atlassian-external/"
package_url = PackageURL.from_string(custom_repo_purl)
maven_visitor.map_maven_package(package_url, packagedb.models.PackageContentType.BINARY)
package_count = packagedb.models.Package.objects.all().count()
self.assertEqual(1, package_count)
package = packagedb.models.Package.objects.all().first()
expected_repo_url = 'https://packages.atlassian.com/mvn/maven-atlassian-external//org/eclipse/core/runtime/20070801/runtime-20070801.jar'
self.assertEqual(expected_repo_url, package.download_url)


def test_process_request(self):
purl_str = 'pkg:maven/org.apache.twill/twill-core@0.12.0'
download_url = 'https://repo1.maven.org/maven2/org/apache/twill/twill-core/0.12.0/twill-core-0.12.0.jar'
Expand Down
38 changes: 27 additions & 11 deletions minecode/visitors/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@
logger.setLevel(logging.DEBUG)


MAVEN_BASE_URL = 'https://repo1.maven.org/maven2'


class GzipFileWithTrailing(gzip.GzipFile):
"""
A subclass of gzip.GzipFile supporting files with trailing garbage. Ignore
Expand Down Expand Up @@ -104,7 +107,7 @@ def get_seeds(self):
# also has a npm mirrors: https://maven-eu.nuxeo.org/nexus/#view-repositories;npmjs~browsestorage


def get_pom_text(namespace, name, version, qualifiers={}):
def get_pom_text(namespace, name, version, qualifiers={}, base_url=MAVEN_BASE_URL):
"""
Return the contents of the POM file of the package described by the purl
field arguments in a string.
Expand All @@ -116,7 +119,8 @@ def get_pom_text(namespace, name, version, qualifiers={}):
namespace=namespace,
name=name,
version=version,
qualifiers=qualifiers
qualifiers=qualifiers,
base_url=base_url,
)
# Get and parse POM info
pom_url = urls['api_data_url']
Expand Down Expand Up @@ -151,7 +155,7 @@ def get_package_sha1(package):
return sha1


def fetch_parent(pom_text):
def fetch_parent(pom_text, base_url=MAVEN_BASE_URL):
"""
Return the parent pom text of `pom_text`, or None if `pom_text` has no parent.
"""
Expand All @@ -171,20 +175,21 @@ def fetch_parent(pom_text):
namespace=parent_namespace,
name=parent_name,
version=parent_version,
qualifiers={}
qualifiers={},
base_url=base_url,
)
return parent_pom_text


def get_ancestry(pom_text):
def get_ancestry(pom_text, base_url=MAVEN_BASE_URL):
"""
Return a list of pom text of the ancestors of `pom`. The list is ordered
from oldest ancestor to newest. The list is empty is there is no parent pom.
"""
ancestors = []
has_parent = True
while has_parent:
parent_pom_text = fetch_parent(pom_text)
parent_pom_text = fetch_parent(pom_text=pom_text, base_url=base_url)
if not parent_pom_text:
has_parent = False
else:
Expand All @@ -193,7 +198,7 @@ def get_ancestry(pom_text):
return reversed(ancestors)


def get_merged_ancestor_package_from_maven_package(package):
def get_merged_ancestor_package_from_maven_package(package, base_url=MAVEN_BASE_URL):
"""
Merge package details of a package with its ancestor pom
and return the merged package.
Expand All @@ -205,6 +210,7 @@ def get_merged_ancestor_package_from_maven_package(package):
namespace=package.namespace,
version=package.version,
qualifiers=package.qualifiers,
base_url=base_url,
)
merged_package = merge_ancestors(
ancestor_pom_texts=get_ancestry(pom_text),
Expand Down Expand Up @@ -279,11 +285,17 @@ def map_maven_package(package_url, package_content):
db_package = None
error = ''

if "repository_url" in package_url.qualifiers:
base_url = package_url.qualifiers["repository_url"]
else:
base_url = MAVEN_BASE_URL

pom_text = get_pom_text(
namespace=package_url.namespace,
name=package_url.name,
version=package_url.version,
qualifiers=package_url.qualifiers
qualifiers=package_url.qualifiers,
base_url=base_url,
)
if not pom_text:
msg = f'Package does not exist on maven: {package_url}'
Expand All @@ -295,18 +307,22 @@ def map_maven_package(package_url, package_content):
'maven_pom',
'maven',
'Java',
text=pom_text
text=pom_text,
base_url=base_url,
)
ancestor_pom_texts = get_ancestry(pom_text)
ancestor_pom_texts = get_ancestry(pom_text=pom_text, base_url=base_url)
package = merge_ancestors(
ancestor_pom_texts=ancestor_pom_texts,
package=package
)


urls = get_urls(
namespace=package_url.namespace,
name=package_url.name,
version=package_url.version,
qualifiers=package_url.qualifiers
qualifiers=package_url.qualifiers,
base_url=base_url,
)
# In the case of looking up a maven package with qualifiers of
# `classifiers=sources`, the purl of the package created from the pom does
Expand Down

0 comments on commit cd5a1c5

Please sign in to comment.