From 8325caf4a56ea5f2158205551d1cf66ce5776262 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Thu, 25 Oct 2018 13:05:34 +0200 Subject: [PATCH 01/11] Add flake8-rst Added flake8-rst as development dependency Added flake8-rst configuration Added checks to code_checks.sh Signed-off-by: Fabian Haase --- ci/code_checks.sh | 16 +++++++++++++++- ci/environment-dev.yaml | 1 + ci/requirements-optional-pip.txt | 2 +- ci/requirements_dev.txt | 1 + setup.cfg | 6 ++++++ 5 files changed, 24 insertions(+), 2 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 00f17d5c91537..7334cc1dd6ae7 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -15,7 +15,7 @@ # $ ./ci/code_checks.sh doctests # run doctests echo "inside $0" -[[ $LINT ]] || { echo "NOT Linting. To lint use: LINT=true $0 $1"; exit 0; } +[[ $LINT || $1 ]] || { echo "NOT Linting. To lint use: LINT=true $0 $1"; exit 0; } [[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "doctests" ]] || { echo "Unkown command $1. Usage: $0 [lint|patterns|doctests]"; exit 9999; } source activate pandas @@ -44,6 +44,17 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403 RET=$(($RET + $?)) ; echo $MSG "DONE" + echo "flake8-rst --version" + flake8-rst --version + + MSG='Linting code-blocks in .py docstrings' ; echo $MSG + flake8-rst pandas + RET=$(($RET + $?)) ; echo $MSG "DONE" + + MSG='Linting code-blocks in .rst documentation' ; echo $MSG + flake8-rst doc --filename=*.rst + RET=$(($RET + $?)) ; echo $MSG "DONE" + # readability/casting: Warnings about C casting instead of C++ casting # runtime/int: Warnings about using C number types instead of C++ ones # build/include_subdir: Warnings about prefacing included header files with directory @@ -56,6 +67,9 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime RET=$(($RET + $?)) ; echo $MSG "DONE" + echo "isort --version-number" + isort --version-number + # Imports - Check formatting using isort see setup.cfg for settings MSG='Check import format using isort ' ; echo $MSG isort --recursive --check-only pandas diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml index 3e69b1f725b24..2718c1cd582b6 100644 --- a/ci/environment-dev.yaml +++ b/ci/environment-dev.yaml @@ -7,6 +7,7 @@ dependencies: - NumPy - flake8 - flake8-comprehensions + - flake8-rst - hypothesis>=3.58.0 - isort - moto diff --git a/ci/requirements-optional-pip.txt b/ci/requirements-optional-pip.txt index ebe0c4ca88ee6..bdee3567fa2ca 100644 --- a/ci/requirements-optional-pip.txt +++ b/ci/requirements-optional-pip.txt @@ -17,7 +17,7 @@ numexpr>=2.6.1 openpyxl==2.5.5 pyarrow pymysql -tables +pytables>=3.4.2 pytest-cov pytest-xdist s3fs diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt index 6a8b8d64d943b..a1cb20c265974 100644 --- a/ci/requirements_dev.txt +++ b/ci/requirements_dev.txt @@ -4,6 +4,7 @@ Cython>=0.28.2 NumPy flake8 flake8-comprehensions +flake8-rst hypothesis>=3.58.0 isort moto diff --git a/setup.cfg b/setup.cfg index 84f19e56ad3bc..ef77c0b0f3ae5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,6 +29,12 @@ exclude = doc/temp/*.py, .eggs/*.py, versioneer.py + .tox + +[flake8-rst] +bootstrap = + import pandas as pd + import numpy as np [yapf] based_on_style = pep8 From 226bc53907f62fdc46f5dcf1f7d2e7abac6fa8b7 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Fri, 26 Oct 2018 09:07:56 +0200 Subject: [PATCH 02/11] Fixed PEP8 issues in docstrings Signed-off-by: Fabian Haase --- doc/source/enhancingperf.rst | 88 +++++++++++++++++++----------------- doc/source/reshaping.rst | 8 ++-- doc/source/timeseries.rst | 5 +- pandas/core/missing.py | 7 +-- setup.cfg | 2 + 5 files changed, 60 insertions(+), 50 deletions(-) diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst index 8f8a9fe3e50e0..364e8bd9a1242 100644 --- a/doc/source/enhancingperf.rst +++ b/doc/source/enhancingperf.rst @@ -298,7 +298,7 @@ advanced Cython techniques: Even faster, with the caveat that a bug in our Cython code (an off-by-one error, for example) might cause a segfault because memory access isn't checked. -For more about ``boundscheck`` and ``wraparound``, see the Cython docs on +For more about ``boundscheck`` and ``wraparound``, see the Cython docs on `compiler directives `__. .. _enhancingperf.numba: @@ -323,7 +323,7 @@ Numba works by generating optimized machine code using the LLVM compiler infrast Jit ~~~ -We demonstrate how to use Numba to just-in-time compile our code. We simply +We demonstrate how to use Numba to just-in-time compile our code. We simply take the plain Python code from above and annotate with the ``@jit`` decorator. .. code-block:: python @@ -332,35 +332,38 @@ take the plain Python code from above and annotate with the ``@jit`` decorator. @numba.jit def f_plain(x): - return x * (x - 1) + return x * (x - 1) + @numba.jit def integrate_f_numba(a, b, N): - s = 0 - dx = (b - a) / N - for i in range(N): - s += f_plain(a + i * dx) - return s * dx + s = 0 + dx = (b - a) / N + for i in range(N): + s += f_plain(a + i * dx) + return s * dx + @numba.jit def apply_integrate_f_numba(col_a, col_b, col_N): - n = len(col_N) - result = np.empty(n, dtype='float64') - assert len(col_a) == len(col_b) == n - for i in range(n): - result[i] = integrate_f_numba(col_a[i], col_b[i], col_N[i]) - return result + n = len(col_N) + result = np.empty(n, dtype='float64') + assert len(col_a) == len(col_b) == n + for i in range(n): + result[i] = integrate_f_numba(col_a[i], col_b[i], col_N[i]) + return result + def compute_numba(df): - result = apply_integrate_f_numba(df['a'].values, df['b'].values, df['N'].values) - return pd.Series(result, index=df.index, name='result') + result = apply_integrate_f_numba(df['a'].values, df['b'].values, + df['N'].values) + return pd.Series(result, index=df.index, name='result') -Note that we directly pass NumPy arrays to the Numba function. ``compute_numba`` is just a wrapper that provides a nicer interface by passing/returning pandas objects. +Note that we directly pass NumPy arrays to the Numba function. ``compute_numba`` is just a wrapper that provides a +nicer interface by passing/returning pandas objects. -.. code-block:: ipython - - In [4]: %timeit compute_numba(df) - 1000 loops, best of 3: 798 us per loop +>>> %timeit compute_numba(df) +1000 loops, best of 3: 798 us per loop In this example, using Numba was faster than Cython. @@ -376,24 +379,25 @@ Consider the following toy example of doubling each observation: import numba def double_every_value_nonumba(x): - return x*2 + return x * 2 + @numba.vectorize def double_every_value_withnumba(x): - return x*2 + return x * 2 - # Custom function without numba - In [5]: %timeit df['col1_doubled'] = df.a.apply(double_every_value_nonumba) - 1000 loops, best of 3: 797 us per loop +>>> # Custom function without numba +>>> %timeit df['col1_doubled'] = df.a.apply(double_every_value_nonumba) +1000 loops, best of 3: 797 us per loop - # Standard implementation (faster than a custom function) - In [6]: %timeit df['col1_doubled'] = df.a*2 - 1000 loops, best of 3: 233 us per loop +>>> # Standard implementation (faster than a custom function) +>>> %timeit df['col1_doubled'] = df.a*2 +1000 loops, best of 3: 233 us per loop - # Custom function with numba - In [7]: %timeit df['col1_doubled'] = double_every_value_withnumba(df.a.values) - 1000 loops, best of 3: 145 us per loop +>>> # Custom function with numba +>>> %timeit df['col1_doubled'] = double_every_value_withnumba(df.a.values) +1000 loops, best of 3: 145 us per loop Caveats ~~~~~~~ @@ -402,18 +406,18 @@ Caveats Numba will execute on any function, but can only accelerate certain classes of functions. -Numba is best at accelerating functions that apply numerical functions to NumPy -arrays. When passed a function that only uses operations it knows how to +Numba is best at accelerating functions that apply numerical functions to NumPy +arrays. When passed a function that only uses operations it knows how to accelerate, it will execute in ``nopython`` mode. -If Numba is passed a function that includes something it doesn't know how to -work with -- a category that currently includes sets, lists, dictionaries, or -string functions -- it will revert to ``object mode``. In ``object mode``, -Numba will execute but your code will not speed up significantly. If you would -prefer that Numba throw an error if it cannot compile a function in a way that -speeds up your code, pass Numba the argument -``nopython=True`` (e.g. ``@numba.jit(nopython=True)``). For more on -troubleshooting Numba modes, see the `Numba troubleshooting page +If Numba is passed a function that includes something it doesn't know how to +work with -- a category that currently includes sets, lists, dictionaries, or +string functions -- it will revert to ``object mode``. In ``object mode``, +Numba will execute but your code will not speed up significantly. If you would +prefer that Numba throw an error if it cannot compile a function in a way that +speeds up your code, pass Numba the argument +``nopython=True`` (e.g. ``@numba.jit(nopython=True)``). For more on +troubleshooting Numba modes, see the `Numba troubleshooting page `__. Read more in the `Numba docs `__. diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 7d9925d800441..e29bb3d699d66 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -48,10 +48,12 @@ For the curious here is how the above ``DataFrame`` was created: import pandas.util.testing as tm; tm.N = 3 def unpivot(frame): N, K = frame.shape - data = {'value' : frame.values.ravel('F'), - 'variable' : np.asarray(frame.columns).repeat(N), - 'date' : np.tile(np.asarray(frame.index), K)} + data = {'value': frame.values.ravel('F'), + 'variable': np.asarray(frame.columns).repeat(N), + 'date': np.tile(np.asarray(frame.index), K)} return pd.DataFrame(data, columns=['date', 'variable', 'value']) + + df = unpivot(tm.makeTimeDataFrame()) To select out everything for variable ``A`` we could do: diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 85b0abe421eb2..eeadb2d1fbc5f 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -898,7 +898,7 @@ custom date increment logic, such as adding business days: .. code-block:: python class BDay(DateOffset): - """DateOffset increments between business days""" + """DateOffset increments between business days""" def apply(self, other): ... @@ -2133,7 +2133,8 @@ To convert from an ``int64`` based YYYYMMDD representation. s def conv(x): - return pd.Period(year = x // 10000, month = x//100 % 100, day = x%100, freq='D') + return pd.Period(year=x // 10000, month=x // 100 % 100, + day=x % 100, freq='D') s.apply(conv) s.apply(conv)[2] diff --git a/pandas/core/missing.py b/pandas/core/missing.py index b2daec327d618..222873cd7f81a 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -760,9 +760,10 @@ def _interp_limit(invalid, fw_limit, bw_limit): .. code-block:: python - for x in np.where(invalid)[0]: - if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): - yield x + def _interp_limit(invalid, fw_limit, bw_limit): + for x in np.where(invalid)[0]: + if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): + yield x """ # handle forward first; the backward direction is the same except # 1. operate on the reversed array diff --git a/setup.cfg b/setup.cfg index ef77c0b0f3ae5..5a700fbb387cd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,6 +35,8 @@ exclude = bootstrap = import pandas as pd import numpy as np +ignore = + F821, # undefined name [yapf] based_on_style = pep8 From 472df59cc35f2edee8240e4a768929efdbd71ce6 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Fri, 26 Oct 2018 20:55:40 +0200 Subject: [PATCH 03/11] Fixing PEP8 issues within code-blocks --- doc/source/10min.rst | 56 +++++++++++++-------------- doc/source/comparison_with_sas.rst | 17 ++++---- doc/source/contributing_docstring.rst | 18 ++++++--- doc/source/gotchas.rst | 36 +++++++---------- doc/source/groupby.rst | 12 +++--- doc/source/timeseries.rst | 2 +- setup.cfg | 6 +-- 7 files changed, 71 insertions(+), 76 deletions(-) diff --git a/doc/source/10min.rst b/doc/source/10min.rst index fbbe94a72c71e..b5938a24ce6c5 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -45,7 +45,7 @@ a default integer index: .. ipython:: python - s = pd.Series([1,3,5,np.nan,6,8]) + s = pd.Series([1, 3, 5, np.nan, 6, 8]) s Creating a :class:`DataFrame` by passing a NumPy array, with a datetime index @@ -62,12 +62,12 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s .. ipython:: python - df2 = pd.DataFrame({ 'A' : 1., - 'B' : pd.Timestamp('20130102'), - 'C' : pd.Series(1,index=list(range(4)),dtype='float32'), - 'D' : np.array([3] * 4,dtype='int32'), - 'E' : pd.Categorical(["test","train","test","train"]), - 'F' : 'foo' }) + df2 = pd.DataFrame({'A': 1., + 'B': pd.Timestamp('20130102'), + 'C': pd.Series(1, index=list(range(4)),dtype='float32'), + 'D': np.array([3] * 4, dtype='int32'), + 'E': pd.Categorical(["test", "train", "test", "train"]), + 'F': 'foo'}) df2 The columns of the resulting ``DataFrame`` have different @@ -283,9 +283,9 @@ Using the :func:`~Series.isin` method for filtering: .. ipython:: python df2 = df.copy() - df2['E'] = ['one', 'one','two','three','four','three'] + df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three'] df2 - df2[df2['E'].isin(['two','four'])] + df2[df2['E'].isin(['two', 'four'])] Setting ~~~~~~~ @@ -295,7 +295,7 @@ by the indexes. .. ipython:: python - s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6)) + s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6)) s1 df['F'] = s1 @@ -394,7 +394,7 @@ In addition, pandas automatically broadcasts along the specified dimension. .. ipython:: python - s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2) + s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2) s df.sub(s, axis='index') @@ -492,7 +492,7 @@ section. .. ipython:: python - df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D']) + df = pd.DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D']) df s = df.iloc[3] df.append(s, ignore_index=True) @@ -512,12 +512,12 @@ See the :ref:`Grouping section `. .. ipython:: python - df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', - 'foo', 'bar', 'foo', 'foo'], - 'B' : ['one', 'one', 'two', 'three', - 'two', 'two', 'one', 'three'], - 'C' : np.random.randn(8), - 'D' : np.random.randn(8)}) + df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) df Grouping and then applying the :meth:`~DataFrame.sum` function to the resulting @@ -532,7 +532,7 @@ apply the ``sum`` function. .. ipython:: python - df.groupby(['A','B']).sum() + df.groupby(['A', 'B']).sum() Reshaping --------- @@ -578,11 +578,11 @@ See the section on :ref:`Pivot Tables `. .. ipython:: python - df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3, - 'B' : ['A', 'B', 'C'] * 4, - 'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2, - 'D' : np.random.randn(12), - 'E' : np.random.randn(12)}) + df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3, + 'B': ['A', 'B', 'C'] * 4, + 'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2, + 'D': np.random.randn(12), + 'E': np.random.randn(12)}) df We can produce pivot tables from this data very easily: @@ -653,7 +653,7 @@ pandas can include categorical data in a ``DataFrame``. For full docs, see the .. ipython:: python - df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) + df = pd.DataFrame({"id":[1, 2, 3, 4, 5, 6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']}) Convert the raw grades to a categorical data type. @@ -753,13 +753,13 @@ Writing to a HDF5 Store. .. ipython:: python - df.to_hdf('foo.h5','df') + df.to_hdf('foo.h5', 'df') Reading from a HDF5 Store. .. ipython:: python - pd.read_hdf('foo.h5','df') + pd.read_hdf('foo.h5', 'df') .. ipython:: python :suppress: @@ -796,7 +796,7 @@ If you are attempting to perform an operation you might see an exception like: .. code-block:: python >>> if pd.Series([False, True, False]): - print("I was true") + ... print("I was true") Traceback ... ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). diff --git a/doc/source/comparison_with_sas.rst b/doc/source/comparison_with_sas.rst index 0354ad473544b..e1ab3b9ee6b29 100644 --- a/doc/source/comparison_with_sas.rst +++ b/doc/source/comparison_with_sas.rst @@ -298,8 +298,8 @@ see the :ref:`timeseries documentation` for more details. .. ipython:: python :suppress: - tips = tips.drop(['date1','date2','date1_year', - 'date2_month','date1_next','months_between'], axis=1) + tips = tips.drop(['date1', 'date2', 'date1_year', 'date2_month', + 'date1_next', 'months_between'], axis=1) Selection of Columns ~~~~~~~~~~~~~~~~~~~~ @@ -744,12 +744,9 @@ XPORT is a relatively limited format and the parsing of it is not as optimized as some of the other pandas readers. An alternative way to interop data between SAS and pandas is to serialize to csv. -.. code-block:: python - - # version 0.17, 10M rows - - In [8]: %time df = pd.read_sas('big.xpt') - Wall time: 14.6 s +>>> # version 0.17, 10M rows +>>> %time df = pd.read_sas('big.xpt') +Wall time: 14.6 s - In [9]: %time df = pd.read_csv('big.csv') - Wall time: 4.86 s +>>> %time df = pd.read_csv('big.csv') +Wall time: 4.86 s diff --git a/doc/source/contributing_docstring.rst b/doc/source/contributing_docstring.rst index afb554aeffbc3..3b9e859f59978 100644 --- a/doc/source/contributing_docstring.rst +++ b/doc/source/contributing_docstring.rst @@ -189,7 +189,7 @@ infinitive verb. .. code-block:: python - def astype(dtype): + def astype(dtype): # noqa: F811 """ Casts Series type. @@ -197,7 +197,8 @@ infinitive verb. """ pass - def astype(dtype): + + def astype(dtype): # noqa: F811 """ Method to cast Series type. @@ -205,7 +206,8 @@ infinitive verb. """ pass - def astype(dtype): + + def astype(dtype): # noqa: F811 """ Cast Series type @@ -213,7 +215,8 @@ infinitive verb. """ pass - def astype(dtype): + + def astype(dtype): # noqa: F811 """ Cast Series type from its current type to the new type defined in the parameter dtype. @@ -624,6 +627,7 @@ A simple example could be: .. code-block:: python class Series: + def head(self, n=5): """ Return the first elements of the Series. @@ -684,9 +688,8 @@ shown: import numpy as np import pandas as pd - Any other module used in the examples must be explicitly imported, one per line (as -recommended in `PEP-8 `_) +recommended in :pep:`8#imports`) and avoiding aliases. Avoid excessive imports, but if needed, imports from the standard library go first, followed by third-party libraries (like matplotlib). @@ -720,6 +723,7 @@ positional arguments ``head(3)``. .. code-block:: python class Series: + def mean(self): """ Compute the mean of the input. @@ -946,12 +950,14 @@ substitute the children's class names in this docstring. """Apply my function to %(klass)s.""" ... + class ChildA(Parent): @Substitution(klass="ChildA") @Appender(Parent.my_function.__doc__) def my_function(self): ... + class ChildB(Parent): @Substitution(klass="ChildB") @Appender(Parent.my_function.__doc__) diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index 79e312ca12833..82a5cf4d8645c 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -96,40 +96,32 @@ something to a ``bool``. This happens in an ``if``-statement or when using the boolean operations: ``and``, ``or``, and ``not``. It is not clear what the result of the following code should be: -.. code-block:: python - - >>> if pd.Series([False, True, False]): - ... +>>> if pd.Series([False, True, False]): +... print("I was true") Should it be ``True`` because it's not zero-length, or ``False`` because there are ``False`` values? It is unclear, so instead, pandas raises a ``ValueError``: -.. code-block:: python - - >>> if pd.Series([False, True, False]): - print("I was true") - Traceback - ... - ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). +>>> if pd.Series([False, True, False]): +... print("I was true") +Traceback + ... +ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). You need to explicitly choose what you want to do with the ``DataFrame``, e.g. use :meth:`~DataFrame.any`, :meth:`~DataFrame.all` or :meth:`~DataFrame.empty`. Alternatively, you might want to compare if the pandas object is ``None``: -.. code-block:: python - - >>> if pd.Series([False, True, False]) is not None: - print("I was not None") - >>> I was not None +>>> if pd.Series([False, True, False]) is not None: +... print("I was not None") +I was not None Below is how to check if any of the values are ``True``: -.. code-block:: python - - >>> if pd.Series([False, True, False]).any(): - print("I am any") - >>> I am any +>>> if pd.Series([False, True, False]).any(): +... print("I am any") +I am any To evaluate single-element pandas objects in a boolean context, use the method :meth:`~DataFrame.bool`: @@ -316,7 +308,7 @@ Occasionally you may have to deal with data that were created on a machine with a different byte order than the one on which you are running Python. A common symptom of this issue is an error like: -.. code-block:: python +.. code-block:: console Traceback ... diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index fecc336049a40..a1e6a272d8ad2 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -79,12 +79,12 @@ pandas objects can be split on any of their axes. The abstract definition of grouping is to provide a mapping of labels to group names. To create a GroupBy object (more on what the GroupBy object is later), you may do the following: -.. code-block:: ipython +.. code-block:: python - # default is axis=0 - >>> grouped = obj.groupby(key) - >>> grouped = obj.groupby(key, axis=1) - >>> grouped = obj.groupby([key1, key2]) + # default is axis=0 + >>> grouped = obj.groupby(key) + >>> grouped = obj.groupby(key, axis=1) + >>> grouped = obj.groupby([key1, key2]) The mapping can be specified many different ways: @@ -1272,7 +1272,7 @@ arbitrary function, for example: .. code-block:: python - (df.groupby(['Store', 'Product']).pipe(report_func) + df.groupby(['Store', 'Product']).pipe(report_func) where ``report_func`` takes a GroupBy object and creates a report from that. diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index eeadb2d1fbc5f..55a614bdbf34a 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -898,7 +898,7 @@ custom date increment logic, such as adding business days: .. code-block:: python class BDay(DateOffset): - """DateOffset increments between business days""" + """DateOffset increments between business days""" def apply(self, other): ... diff --git a/setup.cfg b/setup.cfg index 5a700fbb387cd..9b3d3db2c1b16 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,11 +32,11 @@ exclude = .tox [flake8-rst] -bootstrap = - import pandas as pd - import numpy as np ignore = F821, # undefined name + F401, # imported but unused + W391, # blank line at end of file [Seems to be a bug (v0.4.1)] + [yapf] based_on_style = pep8 From 11a363267f4fd0bf402e2065a5061d6069101c4f Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Sat, 27 Oct 2018 10:18:56 +0200 Subject: [PATCH 04/11] Fixing PEP8 issues Signed-off-by: Fabian Haase --- doc/source/contributing.rst | 20 +++++++---- doc/source/enhancingperf.rst | 2 ++ doc/source/io.rst | 65 +++++++++++++++++++++++++----------- 3 files changed, 61 insertions(+), 26 deletions(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index f898ef54e4101..406546aa930c2 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -744,7 +744,7 @@ Transitioning to ``pytest`` .. code-block:: python class TestReallyCoolFeature(object): - .... + .... # noqa: E999 Going forward, we are moving to a more *functional* style using the `pytest `__ framework, which offers a richer testing framework that will facilitate testing and developing. Thus, instead of writing test classes, we will write test functions like this: @@ -752,7 +752,7 @@ framework that will facilitate testing and developing. Thus, instead of writing .. code-block:: python def test_really_cool_feature(): - .... + .... # noqa: E999 Using ``pytest`` ~~~~~~~~~~~~~~~~ @@ -777,25 +777,32 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place import pandas as pd from pandas.util import testing as tm + @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64']) def test_dtypes(dtype): assert str(np.dtype(dtype)) == dtype - @pytest.mark.parametrize('dtype', ['float32', - pytest.param('int16', marks=pytest.mark.skip), - pytest.param('int32', - marks=pytest.mark.xfail(reason='to show how it works'))]) + + @pytest.mark.parametrize('dtype', + ['float32', + pytest.param('int16', marks=pytest.mark.skip), + pytest.param('int32', marks=pytest.mark.xfail( + reason='example')) + ]) def test_mark(dtype): assert str(np.dtype(dtype)) == 'float32' + @pytest.fixture def series(): return pd.Series([1, 2, 3]) + @pytest.fixture(params=['int8', 'int16', 'int32', 'int64']) def dtype(request): return request.param + def test_series(series, dtype): result = series.astype(dtype) assert result.dtype == dtype @@ -864,6 +871,7 @@ for details `_. st.lists(any_json_value), st.dictionaries(st.text(), any_json_value) )) + @given(value=any_json_value) def test_json_roundtrip(value): result = json.loads(json.dumps(value)) diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst index 364e8bd9a1242..d4f17f0778ac8 100644 --- a/doc/source/enhancingperf.rst +++ b/doc/source/enhancingperf.rst @@ -330,6 +330,7 @@ take the plain Python code from above and annotate with the ``@jit`` decorator. import numba + @numba.jit def f_plain(x): return x * (x - 1) @@ -378,6 +379,7 @@ Consider the following toy example of doubling each observation: import numba + def double_every_value_nonumba(x): return x * 2 diff --git a/doc/source/io.rst b/doc/source/io.rst index 56da4dbea8706..4d79930407aaf 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1834,8 +1834,7 @@ For example: .. code-block:: python - DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json() # raises - + >>> DataFrame([1.0, 2.0, complex(1.0, 2.0)]).to_json() # raises RuntimeError: Unhandled numpy dtype 15 can be dealt with by specifying a simple ``default_handler``: @@ -2411,8 +2410,8 @@ columns to strings. .. code-block:: python url_mcc = 'https://en.wikipedia.org/wiki/Mobile_country_code' - dfs = pd.read_html(url_mcc, match='Telekom Albania', header=0, converters={'MNC': - str}) + dfs = pd.read_html(url_mcc, match='Telekom Albania', header=0, + converters={'MNC': str}) .. versionadded:: 0.19 @@ -2724,7 +2723,8 @@ different parameters: data = {} # For when Sheet1's format differs from Sheet2 with pd.ExcelFile('path_to_file.xls') as xls: - data['Sheet1'] = pd.read_excel(xls, 'Sheet1', index_col=None, na_values=['NA']) + data['Sheet1'] = pd.read_excel(xls, 'Sheet1', index_col=None, + na_values=['NA']) data['Sheet2'] = pd.read_excel(xls, 'Sheet2', index_col=1) Note that if the same parsing parameters are used for all sheets, a list @@ -2735,11 +2735,14 @@ of sheet names can simply be passed to ``read_excel`` with no loss in performanc # using the ExcelFile class data = {} with pd.ExcelFile('path_to_file.xls') as xls: - data['Sheet1'] = read_excel(xls, 'Sheet1', index_col=None, na_values=['NA']) - data['Sheet2'] = read_excel(xls, 'Sheet2', index_col=None, na_values=['NA']) + data['Sheet1'] = read_excel(xls, 'Sheet1', index_col=None, + na_values=['NA']) + data['Sheet2'] = read_excel(xls, 'Sheet2', index_col=None, + na_values=['NA']) # equivalent using the read_excel function - data = read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'], index_col=None, na_values=['NA']) + data = read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'], + index_col=None, na_values=['NA']) .. _io.excel.specifying_sheets: @@ -2899,7 +2902,10 @@ missing data to recover integer dtype: .. code-block:: python - cfun = lambda x: int(x) if x else -1 + def cfun(x): + return int(x) if x else -1 + + read_excel('path_to_file.xls', 'Sheet1', converters={'MyInts': cfun}) dtype Specifications @@ -3040,7 +3046,7 @@ argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are: writer = ExcelWriter('path_to_file.xlsx', engine='xlsxwriter') # Or via pandas configuration. - from pandas import options + from pandas import options # noqa: E402 options.io.excel.xlsx.writer = 'xlsxwriter' df.to_excel('path_to_file.xlsx', sheet_name='Sheet1') @@ -3067,7 +3073,7 @@ which takes the contents of the clipboard buffer and passes them to the ``read_csv`` method. For instance, you can copy the following text to the clipboard (CTRL-C on many operating systems): -.. code-block:: python +.. code-block:: console A B C x 1 4 p @@ -3476,9 +3482,9 @@ This format is specified by default when using ``put`` or ``to_hdf`` or by ``for .. code-block:: python - pd.DataFrame(randn(10, 2)).to_hdf('test_fixed.h5', 'df') + >>> pd.DataFrame(randn(10, 2)).to_hdf('test_fixed.h5', 'df') - pd.read_hdf('test_fixed.h5', 'df', where='index>5') + >>> pd.read_hdf('test_fixed.h5', 'df', where='index>5') TypeError: cannot pass a where specification when reading a fixed format. this store must be selected in its entirety @@ -3576,12 +3582,11 @@ will yield a tuple for each group key along with the relative keys of its conten .. code-block:: python - In [8]: store.foo.bar.bah + >>> store.foo.bar.bah AttributeError: 'HDFStore' object has no attribute 'foo' # you can directly access the actual PyTables node but using the root node - In [9]: store.root.foo.bar.bah - Out[9]: + >>> store.root.foo.bar.bah /foo/bar/bah (Group) '' children := ['block0_items' (Array), 'block0_values' (Array), 'axis0' (Array), 'axis1' (Array)] @@ -3735,7 +3740,7 @@ The right-hand side of the sub-expression (after a comparison operator) can be: .. code-block:: python string = "HolyMoly'" - store.select('df', 'index == %s' % string) + store.select('df', 'index == %s' % string) The latter will **not** work and will raise a ``SyntaxError``.Note that there's a single quote followed by a double quote in the ``string`` @@ -3941,7 +3946,7 @@ The default is 50,000 rows returned in a chunk. .. code-block:: python - for df in pd.read_hdf('store.h5','df', chunksize=3): + for df in pd.read_hdf('store.h5', 'df', chunksize=3): print(df) Note, that the chunksize keyword applies to the **source** rows. So if you @@ -4841,7 +4846,8 @@ to pass to :func:`pandas.to_datetime`: .. code-block:: python pd.read_sql_table('data', engine, parse_dates={'Date': '%Y-%m-%d'}) - pd.read_sql_table('data', engine, parse_dates={'Date': {'format': '%Y-%m-%d %H:%M:%S'}}) + pd.read_sql_table('data', engine, + parse_dates={'Date': {'format': '%Y-%m-%d %H:%M:%S'}}) You can check if a table exists using :func:`~pandas.io.sql.has_table` @@ -5349,6 +5355,7 @@ And here's the code: sz = 1000000 df = pd.DataFrame({'A': randn(sz), 'B': [1] * sz}) + def test_sql_write(df): if os.path.exists('test.sql'): os.remove('test.sql') @@ -5356,55 +5363,73 @@ And here's the code: df.to_sql(name='test_table', con=sql_db) sql_db.close() + def test_sql_read(): sql_db = sqlite3.connect('test.sql') pd.read_sql_query("select * from test_table", sql_db) sql_db.close() + def test_hdf_fixed_write(df): df.to_hdf('test_fixed.hdf', 'test', mode='w') + def test_hdf_fixed_read(): pd.read_hdf('test_fixed.hdf', 'test') + def test_hdf_fixed_write_compress(df): df.to_hdf('test_fixed_compress.hdf', 'test', mode='w', complib='blosc') + def test_hdf_fixed_read_compress(): pd.read_hdf('test_fixed_compress.hdf', 'test') + def test_hdf_table_write(df): df.to_hdf('test_table.hdf', 'test', mode='w', format='table') + def test_hdf_table_read(): pd.read_hdf('test_table.hdf', 'test') + def test_hdf_table_write_compress(df): - df.to_hdf('test_table_compress.hdf', 'test', mode='w', complib='blosc', format='table') + df.to_hdf('test_table_compress.hdf', 'test', mode='w', + complib='blosc', format='table') + def test_hdf_table_read_compress(): pd.read_hdf('test_table_compress.hdf', 'test') + def test_csv_write(df): df.to_csv('test.csv', mode='w') + def test_csv_read(): pd.read_csv('test.csv', index_col=0) + def test_feather_write(df): df.to_feather('test.feather') + def test_feather_read(): pd.read_feather('test.feather') + def test_pickle_write(df): df.to_pickle('test.pkl') + def test_pickle_read(): pd.read_pickle('test.pkl') + def test_pickle_write_compress(df): df.to_pickle('test.pkl.compress', compression='xz') + def test_pickle_read_compress(): pd.read_pickle('test.pkl.compress', compression='xz') From 7fd215d432f44f7a13941e36f7a846ef10a4696a Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Sat, 27 Oct 2018 11:09:04 +0200 Subject: [PATCH 05/11] Fixing PEP8 issues cookbook.rst:971 -> python of .. code-block:: deleted to disable flake8-rst checks. %timeit is invalid Syntax, # noqa: E999 does not have an effect. Signed-off-by: Fabian Haase --- doc/source/cookbook.rst | 24 +++++++++++------------- doc/source/extending.rst | 7 ++++++- doc/source/indexing.rst | 14 +++++++------- doc/source/missing_data.rst | 5 ++--- 4 files changed, 26 insertions(+), 24 deletions(-) diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index be8457fc14a4f..db73bd4be5a8f 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -968,14 +968,11 @@ Parsing date components in multi-columns Parsing date components in multi-columns is faster with a format -.. code-block:: python - - In [30]: i = pd.date_range('20000101',periods=10000) - - In [31]: df = pd.DataFrame(dict(year = i.year, month = i.month, day = i.day)) +.. code-block:: - In [32]: df.head() - Out[32]: + >>> i = pd.date_range('20000101', periods=10000) + >>> df = pd.DataFrame({year: i.year, month: i.month, day: i.day}) + >>> df.head() day month year 0 1 1 2000 1 2 1 2000 @@ -983,14 +980,14 @@ Parsing date components in multi-columns is faster with a format 3 4 1 2000 4 5 1 2000 - In [33]: %timeit pd.to_datetime(df.year*10000+df.month*100+df.day,format='%Y%m%d') + >>> %timeit pd.to_datetime(df.year * 10000 + df.month * 100 + df.day, + ... format='%Y%m%d') 100 loops, best of 3: 7.08 ms per loop # simulate combinging into a string, then parsing - In [34]: ds = df.apply(lambda x: "%04d%02d%02d" % (x['year'],x['month'],x['day']),axis=1) - - In [35]: ds.head() - Out[35]: + >>> ds = df.apply(lambda x: "%04d%02d%02d" % + ... (x['year'], x['month'], x['day']), axis=1) + >>> ds.head() 0 20000101 1 20000102 2 20000103 @@ -998,7 +995,7 @@ Parsing date components in multi-columns is faster with a format 4 20000105 dtype: object - In [36]: %timeit pd.to_datetime(ds) + >>> %timeit pd.to_datetime(ds) 1 loops, best of 3: 488 ms per loop Skip row between header and data @@ -1255,6 +1252,7 @@ The `method` argument within `DataFrame.corr` can accept a callable in addition ... ... return cov_ab / std_a / std_b ... + ... >>> df = pd.DataFrame(np.random.normal(size=(100, 3))) ... >>> df.corr(method=distcorr) diff --git a/doc/source/extending.rst b/doc/source/extending.rst index ab940384594bc..4f3f030718c60 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -157,6 +157,7 @@ your ``MyExtensionArray`` class, as follows: class MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin): pass + MyExtensionArray._add_arithmetic_ops() MyExtensionArray._add_comparison_ops() @@ -189,6 +190,7 @@ To use a test, subclass it: from pandas.tests.extension import base + class TestConstructors(base.BaseConstructorsTests): pass @@ -261,6 +263,7 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame def _constructor_expanddim(self): return SubclassedDataFrame + class SubclassedDataFrame(DataFrame): @property @@ -281,7 +284,7 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame >>> type(to_framed) - >>> df = SubclassedDataFrame({'A', [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}) + >>> df = SubclassedDataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': [7, 8, 9]}) >>> df A B C 0 1 4 7 @@ -297,6 +300,7 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame 0 1 4 1 2 5 2 3 6 + >>> type(sliced1) @@ -306,6 +310,7 @@ Below example shows how to define ``SubclassedSeries`` and ``SubclassedDataFrame 1 2 2 3 Name: A, dtype: int64 + >>> type(sliced2) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 1c63acce6e3fa..5740ab5fa6921 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -537,10 +537,10 @@ A list of indexers where any element is out of bounds will raise an .. code-block:: python - dfl.iloc[[4, 5, 6]] + >>> dfl.iloc[[4, 5, 6]] IndexError: positional indexers are out-of-bounds - dfl.iloc[:, 4] + >>> dfl.iloc[:, 4] IndexError: single positional indexer is out-of-bounds .. _indexing.callable: @@ -1794,7 +1794,7 @@ interpreter executes this code: .. code-block:: python - dfmi.loc[:,('one','second')] = value + dfmi.loc[:, ('one', 'second')] = value # becomes dfmi.loc.__setitem__((slice(None), ('one', 'second')), value) @@ -1827,10 +1827,10 @@ that you've done this: .. code-block:: python def do_something(df): - foo = df[['bar', 'baz']] # Is foo a view? A copy? Nobody knows! - # ... many lines here ... - foo['quux'] = value # We don't know whether this will modify df or not! - return foo + foo = df[['bar', 'baz']] # Is foo a view? A copy? Nobody knows! + # ... many lines here ... + foo['quux'] = value # We don't know whether this will modify df or not! + return foo Yikes! diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index e4b5578af15f0..636418a6c89fd 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -696,9 +696,8 @@ You can also operate on the DataFrame in place: .. code-block:: python - s = pd.Series([True, False, True]) - s.replace({'a string': 'new value', True: False}) # raises - + >>> s = pd.Series([True, False, True]) + >>> s.replace({'a string': 'new value', True: False}) # raises TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str' will raise a ``TypeError`` because one of the ``dict`` keys is not of the From d1be499e2d4d278b1733420205ec51a8bfc3b89b Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Sat, 27 Oct 2018 11:30:42 +0200 Subject: [PATCH 06/11] Fixing PEP8 issues Signed-off-by: Fabian Haase --- doc/source/advanced.rst | 22 ++++++++++------------ doc/source/basics.rst | 23 ++++++++--------------- doc/source/dsintro.rst | 7 +++---- doc/source/reshaping.rst | 6 +++++- 4 files changed, 26 insertions(+), 32 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 835c4cc9d4ab3..818f7d38650a0 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -318,13 +318,13 @@ As usual, **both sides** of the slicers are included as this is label indexing. .. code-block:: python - df.loc[(slice('A1','A3'),.....), :] + df.loc[(slice('A1', 'A3'), ...), :] # noqa: E999   You should **not** do this:   .. code-block:: python - df.loc[(slice('A1','A3'),.....)] + df.loc[(slice('A1', 'A3'), ...)] # noqa: E999 .. ipython:: python @@ -740,15 +740,13 @@ values **not** in the categories, similarly to how you can reindex **any** panda .. code-block:: python - In [9]: df3 = pd.DataFrame({'A' : np.arange(6), - 'B' : pd.Series(list('aabbca')).astype('category')}) + >>> df3 = pd.DataFrame({'A': np.arange(6), + ... 'B': pd.Series(list('aabbca')).astype('category')}) + >>> df3 = df3.set_index('B') + >>> df3.index + CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'], categories=[u'a', u'b', u'c'], ordered=False, name=u'B', dtype='category') - In [11]: df3 = df3.set_index('B') - - In [11]: df3.index - Out[11]: CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'], categories=[u'a', u'b', u'c'], ordered=False, name=u'B', dtype='category') - - In [12]: pd.concat([df2, df3] + >>> pd.concat([df2, df3]) TypeError: categories must match existing categories when appending .. _indexing.rangeindex: @@ -1033,11 +1031,11 @@ On the other hand, if the index is not monotonic, then both slice bounds must be .. code-block:: python # 0 is not in the index - In [9]: df.loc[0:4, :] + >>> df.loc[0:4, :] KeyError: 0 # 3 is not a unique label - In [11]: df.loc[2:3, :] + >>> df.loc[2:3, :] KeyError: 'Cannot get right slice bound for non-unique label: 3' ``Index.is_monotonic_increasing`` and ``Index.is_monotonic_decreasing`` only check that diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 6eeb97349100a..31916524f8715 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -302,23 +302,17 @@ To evaluate single-element pandas objects in a boolean context, use the method .. warning:: - You might be tempted to do the following: + Using a DataFrame as a condition will raise errors, + as you are trying to compare multiple values: .. code-block:: python >>> if df: - ... - - Or - - .. code-block:: python - - >>> df and df2 - - These will both raise errors, as you are trying to compare multiple values. - - .. code-block:: python + ... do_something() + ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). + >>> if df and df2: + ... do_something() ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). See :ref:`gotchas` for a more detailed discussion. @@ -732,9 +726,8 @@ with the equivalent .. code-block:: python >>> (df.pipe(h) - .pipe(g, arg1=1) - .pipe(f, arg2=2, arg3=3) - ) + ... .pipe(g, arg1=1) + ... .pipe(f, arg2=2, arg3=3)) Pandas encourages the second style, which is known as method chaining. ``pipe`` makes it easy to use your own or another library's functions diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index efa52a6f7cfe2..3bbd2b1a77743 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -566,13 +566,12 @@ To write code compatible with all versions of Python, split the assignment in tw .. code-block:: python >>> dependent = pd.DataFrame({"A": [1, 1, 1]}) - >>> dependent.assign(A=lambda x: x["A"] + 1, - B=lambda x: x["A"] + 2) + >>> dependent.assign(A=lambda x: x["A"] + 1, B=lambda x: x["A"] + 2) For Python 3.5 and earlier the expression creating ``B`` refers to the "old" value of ``A``, ``[1, 1, 1]``. The output is then - .. code-block:: python + .. code-block:: console A B 0 2 3 @@ -582,7 +581,7 @@ To write code compatible with all versions of Python, split the assignment in tw For Python 3.6 and later, the expression creating ``A`` refers to the "new" value of ``A``, ``[2, 2, 2]``, which results in - .. code-block:: python + .. code-block:: console A B 0 2 4 diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index e29bb3d699d66..6163b6f2ae89a 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -45,7 +45,11 @@ For the curious here is how the above ``DataFrame`` was created: .. code-block:: python - import pandas.util.testing as tm; tm.N = 3 + import pandas.util.testing as tm + + tm.N = 3 + + def unpivot(frame): N, K = frame.shape data = {'value': frame.values.ravel('F'), From acea30ad446f01067a169ea51674c0c480d8de43 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Sat, 27 Oct 2018 12:18:46 +0200 Subject: [PATCH 07/11] Fixing PEP8 issues readding F401 detection Signed-off-by: Fabian Haase --- doc/source/contributing_docstring.rst | 4 ++-- doc/source/io.rst | 1 - pandas/core/accessor.py | 5 +++-- setup.cfg | 1 - 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/doc/source/contributing_docstring.rst b/doc/source/contributing_docstring.rst index 3b9e859f59978..ade7120f71d63 100644 --- a/doc/source/contributing_docstring.rst +++ b/doc/source/contributing_docstring.rst @@ -685,8 +685,8 @@ shown: .. code-block:: python - import numpy as np - import pandas as pd + import numpy as np # noqa: F401 + import pandas as pd # noqa: F401 Any other module used in the examples must be explicitly imported, one per line (as recommended in :pep:`8#imports`) diff --git a/doc/source/io.rst b/doc/source/io.rst index 4d79930407aaf..2b826cd4c9243 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -5350,7 +5350,6 @@ And here's the code: import pandas as pd import sqlite3 from numpy.random import randn - from pandas.io import sql sz = 1000000 df = pd.DataFrame({'A': randn(sz), 'B': [1] * sz}) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index eab529584d1fb..fd04df5c168ce 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -201,8 +201,9 @@ def decorator(accessor): .. code-block:: python - def __init__(self, pandas_object): - + def __init__(self, pandas_object): # noqa: E999 + ... + For consistency with pandas methods, you should raise an ``AttributeError`` if the data passed to your accessor has an incorrect dtype. diff --git a/setup.cfg b/setup.cfg index 9b3d3db2c1b16..814ee92e343d4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,7 +34,6 @@ exclude = [flake8-rst] ignore = F821, # undefined name - F401, # imported but unused W391, # blank line at end of file [Seems to be a bug (v0.4.1)] From 77df97f78c7807e479fd7189741a471b2830c6df Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Sat, 27 Oct 2018 14:35:16 +0200 Subject: [PATCH 08/11] Fix travis build adding flake8-rst to travis-36.yaml reverting changes to requirements-optional-pip.txt pytables seems to be tables on PyPI [running ./scripts/convert_deps.py previously changed it] Signed-off-by: Fabian Haase --- ci/requirements-optional-pip.txt | 2 +- ci/travis-36.yaml | 1 + pandas/core/accessor.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/requirements-optional-pip.txt b/ci/requirements-optional-pip.txt index bdee3567fa2ca..ebe0c4ca88ee6 100644 --- a/ci/requirements-optional-pip.txt +++ b/ci/requirements-optional-pip.txt @@ -17,7 +17,7 @@ numexpr>=2.6.1 openpyxl==2.5.5 pyarrow pymysql -pytables>=3.4.2 +tables pytest-cov pytest-xdist s3fs diff --git a/ci/travis-36.yaml b/ci/travis-36.yaml index 7aa27beacf976..89f2aa2251383 100644 --- a/ci/travis-36.yaml +++ b/ci/travis-36.yaml @@ -10,6 +10,7 @@ dependencies: - feather-format - flake8>=3.5 - flake8-comprehensions + - flake8-rst - gcsfs - geopandas - html5lib diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 59f21ddfb3d3f..6694737737562 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -206,7 +206,7 @@ def decorator(accessor): def __init__(self, pandas_object): # noqa: E999 ... - + For consistency with pandas methods, you should raise an ``AttributeError`` if the data passed to your accessor has an incorrect dtype. From 67cf0431311bab67072ad0744a01d71388182311 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Wed, 31 Oct 2018 18:58:34 +0100 Subject: [PATCH 09/11] revert back to minimal changes in doc folder Signed-off-by: Fabian Haase --- doc/source/advanced.rst | 63 +++++++++++++++++++++++++----- doc/source/basics.rst | 37 +++++++++++++----- doc/source/comparison_with_sas.rst | 17 ++++---- doc/source/contributing.rst | 10 ++--- doc/source/cookbook.rst | 23 ++++++----- doc/source/enhancingperf.rst | 19 ++++----- doc/source/gotchas.rst | 34 ++++++++++------ doc/source/groupby.rst | 8 ++-- doc/source/io.rst | 10 ++--- setup.cfg | 1 - 10 files changed, 148 insertions(+), 74 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 818f7d38650a0..24c117a534209 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -503,6 +503,47 @@ method, allowing you to permute the hierarchical index levels in one step: df[:5].reorder_levels([1,0], axis=0) +.. _advanced.index_names: + +Renaming names of an ``Index`` or ``MultiIndex`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :meth:`~DataFrame.rename` method is used to rename the labels of a +``MultiIndex``, and is typically used to rename the columns of a ``DataFrame``. +The ``columns`` argument of ``rename`` allows a dictionary to be specified +that includes only the columns you wish to rename. + +.. ipython:: python + + df.rename(columns={0: "col0", 1: "col1"}) + +This method can also be used to rename specific labels of the main index +of the ``DataFrame``. + +.. ipython:: python + + df.rename(index={"one" : "two", "y" : "z"}) + +The :meth:`~DataFrame.rename_axis` method is used to rename the name of a +``Index`` or ``MultiIndex``. In particular, the names of the levels of a +``MultiIndex`` can be specified, which is useful if ``reset_index()`` is later +used to move the values from the ``MultiIndex`` to a column. + +.. ipython:: python + + df.rename_axis(index=['abc', 'def']) + +Note that the columns of a ``DataFrame`` are an index, so that using +``rename_axis`` with the ``columns`` argument will change the name of that +index. + +.. ipython:: python + + df.rename_axis(columns="Cols").columns + +Both ``rename`` and ``rename_axis`` support specifying a dictionary, +``Series`` or a mapping function to map labels/names to new values. + Sorting a ``MultiIndex`` ------------------------ @@ -738,15 +779,17 @@ values **not** in the categories, similarly to how you can reindex **any** panda Reshaping and Comparison operations on a ``CategoricalIndex`` must have the same categories or a ``TypeError`` will be raised. - .. code-block:: python + .. code-block:: ipython + + In [9]: df3 = pd.DataFrame({'A' : np.arange(6), + 'B' : pd.Series(list('aabbca')).astype('category')}) - >>> df3 = pd.DataFrame({'A': np.arange(6), - ... 'B': pd.Series(list('aabbca')).astype('category')}) - >>> df3 = df3.set_index('B') - >>> df3.index - CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'], categories=[u'a', u'b', u'c'], ordered=False, name=u'B', dtype='category') + In [11]: df3 = df3.set_index('B') - >>> pd.concat([df2, df3]) + In [11]: df3.index + Out[11]: CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'], categories=[u'a', u'b', u'c'], ordered=False, name=u'B', dtype='category') + + In [12]: pd.concat([df2, df3] TypeError: categories must match existing categories when appending .. _indexing.rangeindex: @@ -1028,14 +1071,14 @@ On the other hand, if the index is not monotonic, then both slice bounds must be # OK because 2 and 4 are in the index df.loc[2:4, :] -.. code-block:: python +.. code-block:: ipython # 0 is not in the index - >>> df.loc[0:4, :] + In [9]: df.loc[0:4, :] KeyError: 0 # 3 is not a unique label - >>> df.loc[2:3, :] + In [11]: df.loc[2:3, :] KeyError: 'Cannot get right slice bound for non-unique label: 3' ``Index.is_monotonic_increasing`` and ``Index.is_monotonic_decreasing`` only check that diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 31916524f8715..40913c4f0f39c 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -302,17 +302,23 @@ To evaluate single-element pandas objects in a boolean context, use the method .. warning:: - Using a DataFrame as a condition will raise errors, - as you are trying to compare multiple values: + You might be tempted to do the following: .. code-block:: python - >>> if df: - ... do_something() - ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). + >>> if df: # noqa: E999 + ... + + Or + + .. code-block:: python + + >>> df and df2 # noqa: E999 + + These will both raise errors, as you are trying to compare multiple values. + + .. code-block:: console - >>> if df and df2: - ... do_something() ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). See :ref:`gotchas` for a more detailed discussion. @@ -1459,8 +1465,21 @@ for altering the ``Series.name`` attribute. .. _basics.rename_axis: -The Panel class has a related :meth:`~Panel.rename_axis` class which can rename -any of its three axes. +.. versionadded:: 0.24.0 + +The methods :meth:`~DataFrame.rename_axis` and :meth:`~Series.rename_axis` +allow specific names of a `MultiIndex` to be changed (as opposed to the +labels). + +.. ipython:: python + + df = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], + 'y': [10, 20, 30, 40, 50, 60]}, + index=pd.MultiIndex.from_product([['a', 'b', 'c'], [1, 2]], + names=['let', 'num'])) + df + df.rename_axis(index={'let': 'abc'}) + df.rename_axis(index=str.upper) .. _basics.iteration: diff --git a/doc/source/comparison_with_sas.rst b/doc/source/comparison_with_sas.rst index e1ab3b9ee6b29..144ed6123cf90 100644 --- a/doc/source/comparison_with_sas.rst +++ b/doc/source/comparison_with_sas.rst @@ -298,8 +298,8 @@ see the :ref:`timeseries documentation` for more details. .. ipython:: python :suppress: - tips = tips.drop(['date1', 'date2', 'date1_year', 'date2_month', - 'date1_next', 'months_between'], axis=1) + tips = tips.drop(['date1','date2','date1_year', + 'date2_month','date1_next','months_between'], axis=1) Selection of Columns ~~~~~~~~~~~~~~~~~~~~ @@ -744,9 +744,12 @@ XPORT is a relatively limited format and the parsing of it is not as optimized as some of the other pandas readers. An alternative way to interop data between SAS and pandas is to serialize to csv. ->>> # version 0.17, 10M rows ->>> %time df = pd.read_sas('big.xpt') -Wall time: 14.6 s +.. code-block:: ipython ->>> %time df = pd.read_csv('big.csv') -Wall time: 4.86 s + # version 0.17, 10M rows + + In [8]: %time df = pd.read_sas('big.xpt') + Wall time: 14.6 s + + In [9]: %time df = pd.read_csv('big.csv') + Wall time: 4.86 s diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 0ef428655e7c9..4a8f618af9e7e 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -783,12 +783,10 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place assert str(np.dtype(dtype)) == dtype - @pytest.mark.parametrize('dtype', - ['float32', - pytest.param('int16', marks=pytest.mark.skip), - pytest.param('int32', marks=pytest.mark.xfail( - reason='example')) - ]) + @pytest.mark.parametrize( + 'dtype', ['float32', pytest.param('int16', marks=pytest.mark.skip), + pytest.param('int32', marks=pytest.mark.xfail( + reason='to show how it works'))]) def test_mark(dtype): assert str(np.dtype(dtype)) == 'float32' diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index db73bd4be5a8f..097abe20373e2 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -968,11 +968,14 @@ Parsing date components in multi-columns Parsing date components in multi-columns is faster with a format -.. code-block:: +.. code-block:: ipython - >>> i = pd.date_range('20000101', periods=10000) - >>> df = pd.DataFrame({year: i.year, month: i.month, day: i.day}) - >>> df.head() + In [30]: i = pd.date_range('20000101',periods=10000) + + In [31]: df = pd.DataFrame(dict(year = i.year, month = i.month, day = i.day)) + + In [32]: df.head() + Out[32]: day month year 0 1 1 2000 1 2 1 2000 @@ -980,14 +983,14 @@ Parsing date components in multi-columns is faster with a format 3 4 1 2000 4 5 1 2000 - >>> %timeit pd.to_datetime(df.year * 10000 + df.month * 100 + df.day, - ... format='%Y%m%d') + In [33]: %timeit pd.to_datetime(df.year*10000+df.month*100+df.day,format='%Y%m%d') 100 loops, best of 3: 7.08 ms per loop # simulate combinging into a string, then parsing - >>> ds = df.apply(lambda x: "%04d%02d%02d" % - ... (x['year'], x['month'], x['day']), axis=1) - >>> ds.head() + In [34]: ds = df.apply(lambda x: "%04d%02d%02d" % (x['year'],x['month'],x['day']),axis=1) + + In [35]: ds.head() + Out[35]: 0 20000101 1 20000102 2 20000103 @@ -995,7 +998,7 @@ Parsing date components in multi-columns is faster with a format 4 20000105 dtype: object - >>> %timeit pd.to_datetime(ds) + In [36]: %timeit pd.to_datetime(ds) 1 loops, best of 3: 488 ms per loop Skip row between header and data diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst index d4f17f0778ac8..471f23e7af4e6 100644 --- a/doc/source/enhancingperf.rst +++ b/doc/source/enhancingperf.rst @@ -388,18 +388,19 @@ Consider the following toy example of doubling each observation: def double_every_value_withnumba(x): return x * 2 +.. code-block:: ipython ->>> # Custom function without numba ->>> %timeit df['col1_doubled'] = df.a.apply(double_every_value_nonumba) -1000 loops, best of 3: 797 us per loop + # Custom function without numba + In [5]: %timeit df['col1_doubled'] = df.a.apply(double_every_value_nonumba) + 1000 loops, best of 3: 797 us per loop ->>> # Standard implementation (faster than a custom function) ->>> %timeit df['col1_doubled'] = df.a*2 -1000 loops, best of 3: 233 us per loop + # Standard implementation (faster than a custom function) + In [6]: %timeit df['col1_doubled'] = df.a*2 + 1000 loops, best of 3: 233 us per loop ->>> # Custom function with numba ->>> %timeit df['col1_doubled'] = double_every_value_withnumba(df.a.values) -1000 loops, best of 3: 145 us per loop + # Custom function with numba + In [7]: %timeit df['col1_doubled'] = double_every_value_withnumba(df.a.values) + 1000 loops, best of 3: 145 us per loop Caveats ~~~~~~~ diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index 82a5cf4d8645c..6c22be0d91b24 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -96,32 +96,40 @@ something to a ``bool``. This happens in an ``if``-statement or when using the boolean operations: ``and``, ``or``, and ``not``. It is not clear what the result of the following code should be: ->>> if pd.Series([False, True, False]): -... print("I was true") +.. code-block:: python + + >>> if pd.Series([False, True, False]): # noqa: E999 + ... Should it be ``True`` because it's not zero-length, or ``False`` because there are ``False`` values? It is unclear, so instead, pandas raises a ``ValueError``: ->>> if pd.Series([False, True, False]): -... print("I was true") -Traceback - ... -ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). +.. code-block:: python + + >>> if pd.Series([False, True, False]): + ... print("I was true") + Traceback + ... + ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). You need to explicitly choose what you want to do with the ``DataFrame``, e.g. use :meth:`~DataFrame.any`, :meth:`~DataFrame.all` or :meth:`~DataFrame.empty`. Alternatively, you might want to compare if the pandas object is ``None``: ->>> if pd.Series([False, True, False]) is not None: -... print("I was not None") -I was not None +.. code-block:: python + + >>> if pd.Series([False, True, False]) is not None: + ... print("I was not None") + I was not None Below is how to check if any of the values are ``True``: ->>> if pd.Series([False, True, False]).any(): -... print("I am any") -I am any +.. code-block:: python + + >>> if pd.Series([False, True, False]).any(): + ... print("I am any") + I am any To evaluate single-element pandas objects in a boolean context, use the method :meth:`~DataFrame.bool`: diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index a1e6a272d8ad2..d50877a602c95 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -81,10 +81,10 @@ object (more on what the GroupBy object is later), you may do the following: .. code-block:: python - # default is axis=0 - >>> grouped = obj.groupby(key) - >>> grouped = obj.groupby(key, axis=1) - >>> grouped = obj.groupby([key1, key2]) + # default is axis=0 + >>> grouped = obj.groupby(key) + >>> grouped = obj.groupby(key, axis=1) + >>> grouped = obj.groupby([key1, key2]) The mapping can be specified many different ways: diff --git a/doc/source/io.rst b/doc/source/io.rst index 2b826cd4c9243..0db9af31bba6b 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3483,7 +3483,6 @@ This format is specified by default when using ``put`` or ``to_hdf`` or by ``for .. code-block:: python >>> pd.DataFrame(randn(10, 2)).to_hdf('test_fixed.h5', 'df') - >>> pd.read_hdf('test_fixed.h5', 'df', where='index>5') TypeError: cannot pass a where specification when reading a fixed format. this store must be selected in its entirety @@ -3580,13 +3579,14 @@ will yield a tuple for each group key along with the relative keys of its conten Hierarchical keys cannot be retrieved as dotted (attribute) access as described above for items stored under the root node. - .. code-block:: python + .. code-block:: ipython - >>> store.foo.bar.bah + In [8]: store.foo.bar.bah AttributeError: 'HDFStore' object has no attribute 'foo' # you can directly access the actual PyTables node but using the root node - >>> store.root.foo.bar.bah + In [9]: store.root.foo.bar.bah + Out[9]: /foo/bar/bah (Group) '' children := ['block0_items' (Array), 'block0_values' (Array), 'axis0' (Array), 'axis1' (Array)] @@ -3737,7 +3737,7 @@ The right-hand side of the sub-expression (after a comparison operator) can be: instead of this - .. code-block:: python + .. code-block:: ipython string = "HolyMoly'" store.select('df', 'index == %s' % string) diff --git a/setup.cfg b/setup.cfg index c73ce3de2891b..d306596a54700 100644 --- a/setup.cfg +++ b/setup.cfg @@ -28,7 +28,6 @@ exclude = doc/temp/*.py, .eggs/*.py, versioneer.py - .tox [flake8-rst] ignore = From 948f176007bf7244a64ecd41b9f50b01e8784dd4 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Thu, 1 Nov 2018 11:25:12 +0100 Subject: [PATCH 10/11] Cleanup Signed-off-by: Fabian Haase --- doc/source/basics.rst | 4 ++-- doc/source/contributing_docstring.rst | 2 +- doc/source/enhancingperf.rst | 6 ++++-- doc/source/gotchas.rst | 2 +- doc/source/missing_data.rst | 2 +- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 40913c4f0f39c..518226dddd2b3 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -313,11 +313,11 @@ To evaluate single-element pandas objects in a boolean context, use the method .. code-block:: python - >>> df and df2 # noqa: E999 + >>> df and df2 These will both raise errors, as you are trying to compare multiple values. - .. code-block:: console + .. code-block:: python-traceback ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all(). diff --git a/doc/source/contributing_docstring.rst b/doc/source/contributing_docstring.rst index ade7120f71d63..dcaf339c0228b 100644 --- a/doc/source/contributing_docstring.rst +++ b/doc/source/contributing_docstring.rst @@ -189,7 +189,7 @@ infinitive verb. .. code-block:: python - def astype(dtype): # noqa: F811 + def astype(dtype): """ Casts Series type. diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst index 471f23e7af4e6..2ca8a2b7ac0f8 100644 --- a/doc/source/enhancingperf.rst +++ b/doc/source/enhancingperf.rst @@ -363,8 +363,10 @@ take the plain Python code from above and annotate with the ``@jit`` decorator. Note that we directly pass NumPy arrays to the Numba function. ``compute_numba`` is just a wrapper that provides a nicer interface by passing/returning pandas objects. ->>> %timeit compute_numba(df) -1000 loops, best of 3: 798 us per loop +.. code-block:: ipython + + In [4]: %timeit compute_numba(df) + 1000 loops, best of 3: 798 us per loop In this example, using Numba was faster than Cython. diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index 6c22be0d91b24..0eb2a4eed8581 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -316,7 +316,7 @@ Occasionally you may have to deal with data that were created on a machine with a different byte order than the one on which you are running Python. A common symptom of this issue is an error like: -.. code-block:: console +.. code-block:: python-traceback Traceback ... diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 636418a6c89fd..4864637691607 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -727,7 +727,7 @@ rules introduced in the table below. :header: "data type", "Cast to" :widths: 40, 40 - integer, float + integer, float boolean, object float, no cast object, no cast From c01e22d3ad5d8476e920bae101b53955d821818c Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Thu, 1 Nov 2018 11:27:04 +0100 Subject: [PATCH 11/11] Optional noqa removes Signed-off-by: Fabian Haase --- doc/source/contributing.rst | 4 ++-- doc/source/contributing_docstring.rst | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 4a8f618af9e7e..aa20199c95ca7 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -744,7 +744,7 @@ Transitioning to ``pytest`` .. code-block:: python class TestReallyCoolFeature(object): - .... # noqa: E999 + pass Going forward, we are moving to a more *functional* style using the `pytest `__ framework, which offers a richer testing framework that will facilitate testing and developing. Thus, instead of writing test classes, we will write test functions like this: @@ -752,7 +752,7 @@ framework that will facilitate testing and developing. Thus, instead of writing .. code-block:: python def test_really_cool_feature(): - .... # noqa: E999 + pass Using ``pytest`` ~~~~~~~~~~~~~~~~ diff --git a/doc/source/contributing_docstring.rst b/doc/source/contributing_docstring.rst index dcaf339c0228b..b7b42abd611e3 100644 --- a/doc/source/contributing_docstring.rst +++ b/doc/source/contributing_docstring.rst @@ -197,8 +197,9 @@ infinitive verb. """ pass +.. code-block:: python - def astype(dtype): # noqa: F811 + def astype(dtype): """ Method to cast Series type. @@ -206,8 +207,9 @@ infinitive verb. """ pass +.. code-block:: python - def astype(dtype): # noqa: F811 + def astype(dtype): """ Cast Series type @@ -215,8 +217,9 @@ infinitive verb. """ pass +.. code-block:: python - def astype(dtype): # noqa: F811 + def astype(dtype): """ Cast Series type from its current type to the new type defined in the parameter dtype.