Skip to content

Commit

Permalink
rearrange explanatory comments
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicexplorer committed Aug 9, 2023
1 parent 4cb96d0 commit 4a03cc8
Showing 1 changed file with 31 additions and 16 deletions.
47 changes: 31 additions & 16 deletions src/pip/_internal/network/lazy_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,24 @@ def dist_from_wheel_url(name: str, url: str, session: Session) -> BaseDistributi
is raised.
"""
try:
# After context manager exit, wheel.name will point to a deleted file path.
# Add `delete_backing_file=False` to disable this for debugging.
with LazyHTTPFile(url, session) as lazy_file:
with indent_log():
logger.debug("begin prefetching for %s", name)
lazy_file.prefetch_contiguous_dist_info(name)
logger.debug("done prefetching for %s", name)
lazy_file.prefetch_contiguous_dist_info(name)

# For read-only ZIP files, ZipFile only needs methods read,
# seek, seekable and tell, not the whole IO protocol.
wheel = MemoryWheel(lazy_file.name, lazy_file)
# After context manager exit, wheel.name is an invalid file by intention.
return get_wheel_distribution(wheel, canonicalize_name(name))
except (BadZipFile, UnsupportedWheel):
raise InvalidWheel(url, name)


class ReadOnlyIOWrapper(BinaryIO):
"""Implement read-side ``BinaryIO`` methods wrapping an inner ``BinaryIO``.
For read-only ZIP files, ``ZipFile`` only needs read, seek, seekable, and tell.
``LazyHTTPFile`` subclasses this and therefore must only implement its lazy read().
"""

_file: BinaryIO

def __init__(self, inner: BinaryIO) -> None:
Expand Down Expand Up @@ -160,8 +162,8 @@ def __next__(self) -> bytes:
# guess at how the server will interpret caching directives for range requests (we
# especially don't want the server to return 200 OK with the entire file contents).
# TODO: consider If-Match (etag) and handling 304 File Modified, here or in the
# unpack_url() method.
_UNCACHED_HEADERS = {**HEADERS, 'Cache-Control': 'no-cache'}
# unpack_url() method for non-lazy downloads.
_UNCACHED_HEADERS = {**HEADERS, "Cache-Control": "no-cache"}


class LazyHTTPFile(ReadOnlyIOWrapper):
Expand All @@ -182,9 +184,9 @@ def __init__(
url: str,
session: Session,
initial_chunk_size: int = _DEFAULT_INITIAL_FETCH,
delete_backing_file: bool = True,
) -> None:
# Add delete=False and print the file's `.name` to debug invalid virtual zips.
super().__init__(cast(BinaryIO, NamedTemporaryFile()))
super().__init__(cast(BinaryIO, NamedTemporaryFile(delete=delete_backing_file)))

self._request_count = 0
self._session = session
Expand Down Expand Up @@ -387,16 +389,27 @@ def _download(self, start: int, end: int) -> None:
self._file.write(chunk)

def prefetch_contiguous_dist_info(self, name: str) -> None:
"""
Read contents of entire dist-info section of wheel.
"""Read contents of entire dist-info section of wheel.
pip will read every entry in this directory when generating a dist from a wheel,
so prepopulating the file contents avoids waiting for multiple range requests.
We know pip will read every entry in this directory when generating a dist from
a wheel, so prepopulating the file contents avoids waiting for further
range requests.
"""
# Clarify in debug output which requests were sent during __init__, which during
# the prefetch, and which during the dist metadata generation.
with indent_log():
logger.debug("begin prefetching dist-info for %s", name)
self._prefetch_contiguous_dist_info(name)
logger.debug("done prefetching dist-info for %s", name)

def _prefetch_contiguous_dist_info(self, name: str) -> None:
dist_info_prefix = re.compile(r"^[^/]*\.dist-info/")
start: int | None = None
end: int | None = None

# This may perform further requests if __init__() did not pull in the entire
# central directory at the end of the file (although _DEFAULT_INITIAL_FETCH
# should be set large enough to avoid this).
zf = ZipFile(self)

for info in zf.infolist():
Expand All @@ -405,12 +418,14 @@ def prefetch_contiguous_dist_info(self, name: str) -> None:
start = info.header_offset
continue
else:
# The last .dist-info/ entry may be before the end of the file if the
# wheel's entries are sorted lexicographically (which is unusual).
if not dist_info_prefix.search(info.filename):
end = info.header_offset
break
if start is None:
raise UnsupportedWheel(
f"no {dist_info_prefix} directory found for {name} in {self.name}"
f"no {dist_info_prefix!r} directory found for {name} in {self.name}"
)
# If the last entries of the zip are the .dist-info/ dir (as usual), then give
# us everything until the start of the central directory.
Expand Down

0 comments on commit 4a03cc8

Please sign in to comment.