Skip to content

Commit

Permalink
Implement heuristic to get non-ASCII ZIP entries
Browse files Browse the repository at this point in the history
  • Loading branch information
uranusjr committed Aug 2, 2020
1 parent 89d8cba commit fb0c51c
Showing 1 changed file with 26 additions and 2 deletions.
28 changes: 26 additions & 2 deletions src/pip/_internal/operations/install/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,29 @@ def __init__(self, src_record_path, dest_path, zip_file):
self._zip_file = zip_file
self.changed = False

def _getinfo(self):
# type: (text_type) -> zipfile.ZipInfo
if not PY2:
return self._zip_file.getinfo(self.src_record_path)

# Python 2 does not expose a way to detect a ZIP's encoding, so we
# "guess" with the heuristics below:
# 1. Try encoding the path with UTF-8.
# 2. Check the matching info's flags for language encoding (bit 11).
# 3. If the flag is set, assume UTF-8 is correct.
# 4. If any of the above step fails, fallback to getting an info with
# CP437 (matching Python 3).
try:
arcname = self.src_record_path.encode("utf-8")
except UnicodeEncodeError:
pass
else:
info = self._zip_file.getinfo(arcname)
if info.flag_bits & 0x800:
return info
arcname = self.src_record_path.encode("cp437")
return self._zip_file.getinfo(arcname)

def save(self):
# type: () -> None
# directory creation is lazy and after file filtering
Expand All @@ -439,11 +462,12 @@ def save(self):
if os.path.exists(self.dest_path):
os.unlink(self.dest_path)

with self._zip_file.open(self.src_record_path) as f:
zipinfo = self._getinfo()

with self._zip_file.open(zipinfo) as f:
with open(self.dest_path, "wb") as dest:
shutil.copyfileobj(f, dest)

zipinfo = self._zip_file.getinfo(self.src_record_path)
if zip_item_is_executable(zipinfo):
set_extracted_file_to_default_mode_plus_executable(self.dest_path)

Expand Down

0 comments on commit fb0c51c

Please sign in to comment.