diff --git a/news/5898.bugfix.rst b/news/5898.bugfix.rst new file mode 100644 index 0000000000..0b9c5c264a --- /dev/null +++ b/news/5898.bugfix.rst @@ -0,0 +1 @@ +Handle more variations in private index html to improve hash collection. diff --git a/pipenv/project.py b/pipenv/project.py index ca173fbcaf..7a8978f028 100644 --- a/pipenv/project.py +++ b/pipenv/project.py @@ -292,17 +292,16 @@ def get_hashes_from_pypi(self, ireq, source): return None def get_hashes_from_remote_index_urls(self, ireq, source): - pkg_url = f"{source['url']}/{ireq.name}/" + normalized_name = normalize_name(ireq.name) + url_name = normalized_name.replace(".", "-") + pkg_url = f"{source['url']}/{url_name}/" session = self.get_requests_session_for_source(source) + try: collected_hashes = set() - # Grab the hashes from the new warehouse API. response = session.get(pkg_url, timeout=10) - # Create an instance of the parser parser = PackageIndexHTMLParser() - # Feed the HTML to the parser parser.feed(response.text) - # Extract hrefs hrefs = parser.urls version = "" @@ -310,19 +309,45 @@ def get_hashes_from_remote_index_urls(self, ireq, source): spec = next(iter(s for s in ireq.specifier), None) if spec: version = spec.version + + # We'll check if the href looks like a version-specific page (i.e., ends with '/') for package_url in hrefs: - if version in parse.unquote(package_url): + parsed_url = parse.urlparse(package_url) + if version in parsed_url.path and parsed_url.path.endswith("/"): + # This might be a version-specific page. Fetch and parse it + version_url = urljoin(pkg_url, package_url) + version_response = session.get(version_url, timeout=10) + version_parser = PackageIndexHTMLParser() + version_parser.feed(version_response.text) + version_hrefs = version_parser.urls + + # Process these new hrefs as potential wheels + for v_package_url in version_hrefs: + url_params = parse.urlparse(v_package_url).fragment + params_dict = parse.parse_qs(url_params) + if params_dict.get(FAVORITE_HASH): + collected_hashes.add(params_dict[FAVORITE_HASH][0]) + else: # Fallback to downloading the file to obtain hash + v_package_full_url = urljoin(version_url, v_package_url) + link = Link(v_package_full_url) + file_hash = self.get_file_hash(session, link) + if file_hash: + collected_hashes.add(file_hash) + elif version in parse.unquote(package_url): + # Process the current href as a potential wheel from the main page url_params = parse.urlparse(package_url).fragment params_dict = parse.parse_qs(url_params) if params_dict.get(FAVORITE_HASH): collected_hashes.add(params_dict[FAVORITE_HASH][0]) else: # Fallback to downloading the file to obtain hash - package_url = urljoin(source["url"], package_url) - link = Link(package_url) + package_full_url = urljoin(pkg_url, package_url) + link = Link(package_full_url) file_hash = self.get_file_hash(session, link) if file_hash: collected_hashes.add(file_hash) + return self.prepend_hash_types(collected_hashes, FAVORITE_HASH) + except (ValueError, KeyError, ConnectionError): if self.s.is_verbose(): click.echo( @@ -1198,8 +1223,12 @@ def add_pipfile_entry_to_pipfile(self, name, normalized_name, entry, category=No return newly_added, category, normalized_name def src_name_from_url(self, index_url): - name, _, tld_guess = urllib.parse.urlsplit(index_url).netloc.rpartition(".") - src_name = name.replace(".", "") + location = urllib.parse.urlsplit(index_url).netloc + if "." in location: + name, _, tld_guess = location.rpartition(".") + else: + name = location + src_name = name.replace(".", "").replace(":", "") try: self.get_source(name=src_name) except SourceNotFound: @@ -1221,7 +1250,7 @@ def add_index_to_pipfile(self, index, verify_ssl=True): with contextlib.suppress(SourceNotFound): source = self.get_source(name=index) - if source is not None: + if source is not None and source.get("name"): return source["name"] source = {"url": index, "verify_ssl": verify_ssl} source["name"] = self.src_name_from_url(index)