Skip to content

ENH: Replace skiprows with skip_rows to begin standardizing underscore usage in keyword arguments #22587

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions asv_bench/benchmarks/io/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ class ReadCSVSkipRows(BaseIO):
goal_time = 0.2
fname = '__test__.csv'
params = [None, 10000]
param_names = ['skiprows']
param_names = ['skip_rows']

def setup(self, skiprows):
def setup(self, skip_rows):
N = 20000
index = tm.makeStringIndex(N)
df = DataFrame({'float1': np.random.randn(N),
Expand All @@ -100,8 +100,8 @@ def setup(self, skiprows):
index=index)
df.to_csv(self.fname)

def time_skipprows(self, skiprows):
read_csv(self.fname, skiprows=skiprows)
def time_skipprows(self, skip_rows):
read_csv(self.fname, skip_rows=skip_rows)


class ReadUint64Integers(StringIORewind):
Expand Down
2 changes: 1 addition & 1 deletion doc/source/cookbook.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1034,7 +1034,7 @@ Option 1: pass rows explicitly to skip rows

.. ipython:: python

pd.read_csv(StringIO(data), sep=';', skiprows=[11,12],
pd.read_csv(StringIO(data), sep=';', skip_rows=[11,12],
index_col=0, parse_dates=True, header=10)

Option 2: read column names and then data
Expand Down
20 changes: 10 additions & 10 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ false_values : list, default ``None``
Values to consider as ``False``.
skipinitialspace : boolean, default ``False``
Skip spaces after delimiter.
skiprows : list-like or integer, default ``None``
skip_rows : list-like or integer, default ``None``
Line numbers to skip (0-indexed) or number of lines to skip (int) at the start
of the file.

Expand All @@ -197,7 +197,7 @@ skiprows : list-like or integer, default ``None``

data = 'col1,col2,col3\na,b,1\na,b,2\nc,d,3'
pd.read_csv(StringIO(data))
pd.read_csv(StringIO(data), skiprows=lambda x: x % 2 != 0)
pd.read_csv(StringIO(data), skip_rows=lambda x: x % 2 != 0)

skipfooter : int, default ``0``
Number of lines at bottom of file to skip (unsupported with engine='c').
Expand Down Expand Up @@ -326,7 +326,7 @@ comment : str, default ``None``
Indicates remainder of line should not be parsed. If found at the beginning of
a line, the line will be ignored altogether. This parameter must be a single
character. Like empty lines (as long as ``skip_blank_lines=True``), fully
commented lines are ignored by the parameter `header` but not by `skiprows`.
commented lines are ignored by the parameter `header` but not by `skip_rows`.
For example, if ``comment='#'``, parsing '#empty\\na,b,c\\n1,2,3' with
`header=0` will result in 'a,b,c' being treated as the header.
encoding : str, default ``None``
Expand Down Expand Up @@ -651,24 +651,24 @@ If ``skip_blank_lines=False``, then ``read_csv`` will not ignore blank lines:

The presence of ignored lines might create ambiguities involving line numbers;
the parameter ``header`` uses row numbers (ignoring commented/empty
lines), while ``skiprows`` uses line numbers (including commented/empty lines):
lines), while ``skip_rows`` uses line numbers (including commented/empty lines):

.. ipython:: python

data = '#comment\na,b,c\nA,B,C\n1,2,3'
pd.read_csv(StringIO(data), comment='#', header=1)
data = 'A,B,C\n#comment\na,b,c\n1,2,3'
pd.read_csv(StringIO(data), comment='#', skiprows=2)
pd.read_csv(StringIO(data), comment='#', skip_rows=2)

If both ``header`` and ``skiprows`` are specified, ``header`` will be
relative to the end of ``skiprows``. For example:
If both ``header`` and ``skip_rows`` are specified, ``header`` will be
relative to the end of ``skip_rows``. For example:

.. ipython:: python

data = '# empty\n# second empty line\n# third empty' \
'line\nX,Y,Z\n1,2,3\nA,B,C\n1,2.,4.\n5.,NaN,10.0'
print(data)
pd.read_csv(StringIO(data), comment='#', skiprows=4, header=1)
pd.read_csv(StringIO(data), comment='#', skip_rows=4, header=1)

.. _io.comments:

Expand Down Expand Up @@ -2373,14 +2373,14 @@ Specify a number of rows to skip:

.. code-block:: python

dfs = pd.read_html(url, skiprows=0)
dfs = pd.read_html(url, skip_rows=0)

Specify a number of rows to skip using a list (``xrange`` (Python 2 only) works
as well):

.. code-block:: python

dfs = pd.read_html(url, skiprows=range(2))
dfs = pd.read_html(url, skip_rows=range(2))

Specify an HTML attribute:

Expand Down
18 changes: 9 additions & 9 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ cdef class TextReader:
object header, orig_header, names, header_start, header_end
object index_col
object low_memory
object skiprows
object skip_rows
object dtype
object encoding
object compression
Expand Down Expand Up @@ -348,7 +348,7 @@ cdef class TextReader:
false_values=None,
allow_leading_cols=True,
low_memory=False,
skiprows=None,
skip_rows=None,
skipfooter=0,
verbose=False,
mangle_dupe_cols=True,
Expand Down Expand Up @@ -436,8 +436,8 @@ cdef class TextReader:
self.parser.error_bad_lines = int(error_bad_lines)
self.parser.warn_bad_lines = int(warn_bad_lines)

self.skiprows = skiprows
if skiprows is not None:
self.skip_rows = skip_rows
if skip_rows is not None:
self._make_skiprow_set()

self.skipfooter = skipfooter
Expand Down Expand Up @@ -605,13 +605,13 @@ cdef class TextReader:
self.parser.quotechar = ord(quote_char)

cdef _make_skiprow_set(self):
if isinstance(self.skiprows, (int, np.integer)):
parser_set_skipfirstnrows(self.parser, self.skiprows)
elif not callable(self.skiprows):
for i in self.skiprows:
if isinstance(self.skip_rows, (int, np.integer)):
parser_set_skipfirstnrows(self.parser, self.skip_rows)
elif not callable(self.skip_rows):
for i in self.skip_rows:
parser_add_skiprow(self.parser, i)
else:
self.parser.skipfunc = <PyObject *> self.skiprows
self.parser.skipfunc = <PyObject *> self.skip_rows

cdef _setup_parser_source(self, source):
cdef:
Expand Down
18 changes: 9 additions & 9 deletions pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@

.. versionadded:: 0.19.0

skiprows : list-like
skip_rows : list-like
Rows to skip at the beginning (0-indexed)
nrows : int, default None
Number of rows to parse
Expand Down Expand Up @@ -295,7 +295,7 @@ def read_excel(io,
converters=None,
true_values=None,
false_values=None,
skiprows=None,
skip_rows=None,
nrows=None,
na_values=None,
parse_dates=False,
Expand Down Expand Up @@ -330,7 +330,7 @@ def read_excel(io,
converters=converters,
true_values=true_values,
false_values=false_values,
skiprows=skiprows,
skip_rows=skip_rows,
nrows=nrows,
na_values=na_values,
parse_dates=parse_dates,
Expand Down Expand Up @@ -422,7 +422,7 @@ def parse(self,
converters=None,
true_values=None,
false_values=None,
skiprows=None,
skip_rows=None,
nrows=None,
na_values=None,
parse_dates=False,
Expand Down Expand Up @@ -457,7 +457,7 @@ def parse(self,
converters=converters,
true_values=true_values,
false_values=false_values,
skiprows=skiprows,
skip_rows=skip_rows,
nrows=nrows,
na_values=na_values,
parse_dates=parse_dates,
Expand Down Expand Up @@ -511,7 +511,7 @@ def _parse_excel(self,
dtype=None,
true_values=None,
false_values=None,
skiprows=None,
skip_rows=None,
nrows=None,
na_values=None,
verbose=False,
Expand Down Expand Up @@ -649,8 +649,8 @@ def _parse_cell(cell_contents, cell_typ):
header_names = []
control_row = [True] * len(data[0])
for row in header:
if is_integer(skiprows):
row += skiprows
if is_integer(skip_rows):
row += skip_rows

data[row], control_row = _fill_mi_header(
data[row], control_row)
Expand Down Expand Up @@ -687,7 +687,7 @@ def _parse_cell(cell_contents, cell_typ):
dtype=dtype,
true_values=true_values,
false_values=false_values,
skiprows=skiprows,
skip_rows=skip_rows,
nrows=nrows,
na_values=na_values,
parse_dates=parse_dates,
Expand Down
36 changes: 20 additions & 16 deletions pandas/io/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,32 +85,36 @@ def _remove_whitespace(s, regex=_RE_WHITESPACE):
return regex.sub(' ', s.strip())


def _get_skiprows(skiprows):
def _get_skiprows(skip_rows):
"""Get an iterator given an integer, slice or container.

Parameters
----------
skiprows : int, slice, container
skip_rows : int, slice, container
The iterator to use to skip rows; can also be a slice.

Raises
------
TypeError
* If `skiprows` is not a slice, integer, or Container
* If `skip_rows` is not a slice, integer, or Container

Returns
-------
it : iterable
A proper iterator to use to skip rows of a DataFrame.
"""
if isinstance(skiprows, slice):
return lrange(skiprows.start or 0, skiprows.stop, skiprows.step or 1)
elif isinstance(skiprows, numbers.Integral) or is_list_like(skiprows):
return skiprows
elif skiprows is None:
if isinstance(skip_rows, slice):
return lrange(
skip_rows.start or 0,
skip_rows.stop,
skip_rows.step or 1
)
elif isinstance(skip_rows, numbers.Integral) or is_list_like(skip_rows):
return skip_rows
elif skip_rows is None:
return 0
raise TypeError('%r is not a valid type for skipping rows' %
type(skiprows).__name__)
type(skip_rows).__name__)


def _read(obj):
Expand Down Expand Up @@ -779,7 +783,7 @@ def _expand_elements(body):
def _data_to_frame(**kwargs):
head, body, foot = kwargs.pop('data')
header = kwargs.pop('header')
kwargs['skiprows'] = _get_skiprows(kwargs['skiprows'])
kwargs['skip_rows'] = _get_skiprows(kwargs['skip_rows'])
if head:
body = head + body

Expand Down Expand Up @@ -922,7 +926,7 @@ def _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs):


def read_html(io, match='.+', flavor=None, header=None, index_col=None,
skiprows=None, attrs=None, parse_dates=False,
skip_rows=None, attrs=None, parse_dates=False,
tupleize_cols=None, thousands=',', encoding=None,
decimal='.', converters=None, na_values=None,
keep_default_na=True, displayed_only=True):
Expand Down Expand Up @@ -956,7 +960,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
index_col : int or list-like or None, optional
The column (or list of columns) to use to create the index.

skiprows : int or list-like or slice or None, optional
skip_rows : int or list-like or slice or None, optional
0-based. Number of rows to skip after parsing the column integer. If a
sequence of integers or a slice is given, will skip the rows indexed by
that sequence. Note that a single element sequence means 'skip the nth
Expand Down Expand Up @@ -1060,7 +1064,7 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
.. versionadded:: 0.21.0

Similar to :func:`~pandas.read_csv` the `header` argument is applied
**after** `skiprows` is applied.
**after** `skip_rows` is applied.

This function will *always* return a list of :class:`DataFrame` *or*
it will fail, e.g., it will *not* return an empty list.
Expand All @@ -1077,13 +1081,13 @@ def read_html(io, match='.+', flavor=None, header=None, index_col=None,
_importers()

# Type check here. We don't want to parse only to fail because of an
# invalid value of an integer skiprows.
if isinstance(skiprows, numbers.Integral) and skiprows < 0:
# invalid value of an integer skip_rows.
if isinstance(skip_rows, numbers.Integral) and skip_rows < 0:
raise ValueError('cannot skip rows starting from the end of the '
'data (you passed a negative value)')
_validate_header_arg(header)
return _parse(flavor=flavor, io=io, match=match, header=header,
index_col=index_col, skiprows=skiprows,
index_col=index_col, skip_rows=skip_rows,
parse_dates=parse_dates, tupleize_cols=tupleize_cols,
thousands=thousands, attrs=attrs, encoding=encoding,
decimal=decimal, converters=converters, na_values=na_values,
Expand Down
Loading