diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index a6d38eab99977..62c511c0edf55 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -299,10 +299,10 @@ def _exclude_implicit_index(self, alldata): # error: Cannot determine type of 'index_col' offset = len(self.index_col) # type: ignore[has-type] - if self._col_indices is not None and len(names) != len(self._col_indices): - names = [names[i] for i in sorted(self._col_indices)] - - return {name: alldata[i + offset] for i, name in enumerate(names)}, names + len_alldata = len(alldata) + return { + name: alldata[i + offset] for i, name in enumerate(names) if i < len_alldata + }, names # legacy def get_chunk(self, size=None): @@ -473,7 +473,12 @@ def _infer_columns(self): self._handle_usecols(columns, names) else: num_original_columns = len(names) - columns = [names] + if self._col_indices is not None and len(names) != len( + self._col_indices + ): + columns = [[names[i] for i in sorted(self._col_indices)]] + else: + columns = [names] else: columns = self._handle_usecols(columns, columns[0]) else: diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py index 27bad29550d82..371b8bea7def2 100644 --- a/pandas/tests/io/parser/usecols/test_usecols_basic.py +++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -373,3 +373,18 @@ def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols): result = parser.read_csv(StringIO(data), header=0, names=names, usecols=usecols) expected = DataFrame({"A": [1, 5], "C": [3, 7]}) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("names", [None, ["a", "b"]]) +def test_usecols_indices_out_of_bounds(all_parsers, names): + # GH#25623 + parser = all_parsers + data = """ +a,b +1,2 + """ + result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0) + expected = DataFrame({"a": [1], "b": [None]}) + if names is None and parser.engine == "python": + expected = DataFrame({"a": [1]}) + tm.assert_frame_equal(result, expected)