pypa · cjerdonek · Jan 24, 2019 · Jan 24, 2019 · Jan 24, 2019
diff --git a/news/6165.bugfix b/news/6165.bugfix
@@ -0,0 +1 @@
+Allow ``RECORD`` lines with more than three elements, and display a warning.
diff --git a/src/pip/_internal/wheel.py b/src/pip/_internal/wheel.py
@@ -55,7 +55,7 @@
     from pip._internal.cache import WheelCache  # noqa: F401
     from pip._internal.pep425tags import Pep425Tag  # noqa: F401
 
-    InstalledCSVRow = Tuple[str, Union[str, Text], str]
+    InstalledCSVRow = Tuple[str, ...]
 
 
 VERSION_COMPATIBLE = (1, 0)
@@ -64,6 +64,10 @@
 logger = logging.getLogger(__name__)
 
 
+def normpath(src, p):
+    return os.path.relpath(src, p).replace(os.path.sep, '/')
+
+
 def rehash(path, blocksize=1 << 20):
     # type: (str, int) -> Tuple[str, str]
     """Return (hash, length) for path using hashlib.sha256()"""
@@ -255,6 +259,35 @@ def sorted_outrows(outrows):
     return sorted(outrows, key=lambda row: tuple(str(x) for x in row))
 
 
+def get_csv_rows_for_installed(
+    old_csv_rows,  # type: Iterable[List[str]]
+    installed,  # type: Dict[str, str]
+    changed,  # type: set
+    generated,  # type: List[str]
+    lib_dir,  # type: str
+):
+    # type: (...) -> List[InstalledCSVRow]
+    installed_rows = []  # type: List[InstalledCSVRow]
+    for row in old_csv_rows:
+        if len(row) > 3:
+            logger.warning(
+                'RECORD line has more than three elements: {}'.format(row)
+            )
+        fpath = row[0]
+        fpath = installed.pop(fpath, fpath)
+        if fpath in changed:
+            digest, length = rehash(fpath)
+            row[1] = digest
+            row[2] = length
+        installed_rows.append(tuple(row))
+    for f in generated:
+        digest, length = rehash(f)
+        installed_rows.append((normpath(f, lib_dir), digest, str(length)))
+    for f in installed:
+        installed_rows.append((installed[f], '', ''))
+    return installed_rows
+
+
 def move_wheel_files(
     name,  # type: str
     req,  # type: Requirement
@@ -305,9 +338,6 @@ def move_wheel_files(
                 compileall.compile_dir(source, force=True, quiet=True)
         logger.debug(stdout.getvalue())
 
-    def normpath(src, p):
-        return os.path.relpath(src, p).replace(os.path.sep, '/')
-
     def record_installed(srcfile, destfile, modified=False):
         """Map archive RECORD paths to installation RECORD paths."""
         oldpath = normpath(srcfile, wheeldir)
@@ -559,28 +589,16 @@ def _get_script_text(entry):
     shutil.move(temp_installer, installer)
     generated.append(installer)
 
-    def get_csv_rows_for_installed(old_csv_rows):
-        # type: (Iterable[List[str]]) -> List[InstalledCSVRow]
-        installed_rows = []  # type: List[InstalledCSVRow]
-        for fpath, digest, length in old_csv_rows:
-            fpath = installed.pop(fpath, fpath)
-            if fpath in changed:
-                digest, length = rehash(fpath)
-            installed_rows.append((fpath, digest, str(length)))
-        for f in generated:
-            digest, length = rehash(f)
-            installed_rows.append((normpath(f, lib_dir), digest, str(length)))
-        for f in installed:
-            installed_rows.append((installed[f], '', ''))
-        return installed_rows
-
     # Record details of all files installed
     record = os.path.join(info_dir[0], 'RECORD')
     temp_record = os.path.join(info_dir[0], 'RECORD.pip')
     with open_for_csv(record, 'r') as record_in:
         with open_for_csv(temp_record, 'w+') as record_out:
             reader = csv.reader(record_in)
-            outrows = get_csv_rows_for_installed(reader)
+            outrows = get_csv_rows_for_installed(
+                reader, installed=installed, changed=changed,
+                generated=generated, lib_dir=lib_dir,
+            )
             writer = csv.writer(record_out)
             # Sort to simplify testing.
             for row in sorted_outrows(outrows):

diff --git a/tests/unit/test_wheel.py b/tests/unit/test_wheel.py
@@ -1,6 +1,8 @@
 """Tests for wheel binary packages and .dist-info."""
+import csv
 import logging
 import os
+import textwrap
 
 import pytest
 from mock import Mock, patch
@@ -76,6 +78,63 @@ def test_sorted_outrows(outrows, expected):
     assert actual == expected
 
 
+def call_get_csv_rows_for_installed(tmpdir, text):
+    path = tmpdir.join('temp.txt')
+    path.write(text)
+
+    installed = {}
+    changed = set()
+    generated = []
+    lib_dir = '/lib/dir'
+
+    with wheel.open_for_csv(path, 'r') as f:
+        reader = csv.reader(f)
+        outrows = wheel.get_csv_rows_for_installed(
+            reader, installed=installed, changed=changed,
+            generated=generated, lib_dir=lib_dir,
+        )
+    return outrows
+
+
+def test_get_csv_rows_for_installed(tmpdir, caplog):
+    text = textwrap.dedent("""\
+    a,b,c
+    d,e,f
+    """)
+    outrows = call_get_csv_rows_for_installed(tmpdir, text)
+
+    expected = [
+        ('a', 'b', 'c'),
+        ('d', 'e', 'f'),
+    ]
+    assert outrows == expected
+    # Check there were no warnings.
+    assert len(caplog.records) == 0
+
+
+def test_get_csv_rows_for_installed__long_lines(tmpdir, caplog):
+    text = textwrap.dedent("""\
+    a,b,c,d
+    e,f,g
+    h,i,j,k
+    """)
+    outrows = call_get_csv_rows_for_installed(tmpdir, text)
+
+    expected = [
+        ('a', 'b', 'c', 'd'),
+        ('e', 'f', 'g'),
+        ('h', 'i', 'j', 'k'),
+    ]
+    assert outrows == expected
+
+    messages = [rec.message for rec in caplog.records]
+    expected = [
+        "RECORD line has more than three elements: ['a', 'b', 'c', 'd']",
+        "RECORD line has more than three elements: ['h', 'i', 'j', 'k']"
+    ]
+    assert messages == expected
+
+
 def test_wheel_version(tmpdir, data):
     future_wheel = 'futurewheel-1.9-py2.py3-none-any.whl'
     broken_wheel = 'brokenwheel-1.0-py2.py3-none-any.whl'
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Allow ``RECORD`` lines with more than three elements, and display a warning.