Skip to content

Commit d77b823

Browse files
committed
Merge remote-tracking branch 'upstream/master' into jorisvandenbossche-ea-array-protocol
2 parents 9506fef + 58a59bd commit d77b823

File tree

300 files changed

+3737
-3294
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

300 files changed

+3737
-3294
lines changed

Diff for: ci/code_checks.sh

+11-1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,13 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
4444
flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
4545
RET=$(($RET + $?)) ; echo $MSG "DONE"
4646

47+
echo "flake8-rst --version"
48+
flake8-rst --version
49+
50+
MSG='Linting code-blocks in .rst documentation' ; echo $MSG
51+
flake8-rst doc/source --filename=*.rst
52+
RET=$(($RET + $?)) ; echo $MSG "DONE"
53+
4754
# Check that cython casting is of the form `<type>obj` as opposed to `<type> obj`;
4855
# it doesn't make a difference, but we want to be internally consistent.
4956
# Note: this grep pattern is (intended to be) equivalent to the python
@@ -64,6 +71,9 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
6471
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime
6572
RET=$(($RET + $?)) ; echo $MSG "DONE"
6673

74+
echo "isort --version-number"
75+
isort --version-number
76+
6777
# Imports - Check formatting using isort see setup.cfg for settings
6878
MSG='Check import format using isort ' ; echo $MSG
6979
isort --recursive --check-only pandas
@@ -141,7 +151,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
141151

142152
MSG='Doctests generic.py' ; echo $MSG
143153
pytest -q --doctest-modules pandas/core/generic.py \
144-
-k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -to_json -transpose -values -xs"
154+
-k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -to_json -transpose -values -xs"
145155
RET=$(($RET + $?)) ; echo $MSG "DONE"
146156

147157
MSG='Doctests top-level reshaping functions' ; echo $MSG

Diff for: ci/deps/travis-36.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ dependencies:
99
- fastparquet
1010
- flake8>=3.5
1111
- flake8-comprehensions
12+
- flake8-rst
1213
- gcsfs
1314
- geopandas
1415
- html5lib

Diff for: ci/environment-dev.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ dependencies:
77
- NumPy
88
- flake8
99
- flake8-comprehensions
10+
- flake8-rst
1011
- hypothesis>=3.58.0
1112
- isort
1213
- moto

Diff for: ci/requirements_dev.txt

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ Cython>=0.28.2
44
NumPy
55
flake8
66
flake8-comprehensions
7+
flake8-rst
78
hypothesis>=3.58.0
89
isort
910
moto

Diff for: doc/source/10min.rst

+28-28
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ a default integer index:
4545

4646
.. ipython:: python
4747
48-
s = pd.Series([1,3,5,np.nan,6,8])
48+
s = pd.Series([1, 3, 5, np.nan, 6, 8])
4949
s
5050
5151
Creating a :class:`DataFrame` by passing a NumPy array, with a datetime index
@@ -62,12 +62,12 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s
6262

6363
.. ipython:: python
6464
65-
df2 = pd.DataFrame({ 'A' : 1.,
66-
'B' : pd.Timestamp('20130102'),
67-
'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
68-
'D' : np.array([3] * 4,dtype='int32'),
69-
'E' : pd.Categorical(["test","train","test","train"]),
70-
'F' : 'foo' })
65+
df2 = pd.DataFrame({'A': 1.,
66+
'B': pd.Timestamp('20130102'),
67+
'C': pd.Series(1, index=list(range(4)),dtype='float32'),
68+
'D': np.array([3] * 4, dtype='int32'),
69+
'E': pd.Categorical(["test", "train", "test", "train"]),
70+
'F': 'foo'})
7171
df2
7272
7373
The columns of the resulting ``DataFrame`` have different
@@ -283,9 +283,9 @@ Using the :func:`~Series.isin` method for filtering:
283283
.. ipython:: python
284284
285285
df2 = df.copy()
286-
df2['E'] = ['one', 'one','two','three','four','three']
286+
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
287287
df2
288-
df2[df2['E'].isin(['two','four'])]
288+
df2[df2['E'].isin(['two', 'four'])]
289289
290290
Setting
291291
~~~~~~~
@@ -295,7 +295,7 @@ by the indexes.
295295

296296
.. ipython:: python
297297
298-
s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))
298+
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
299299
s1
300300
df['F'] = s1
301301
@@ -394,7 +394,7 @@ In addition, pandas automatically broadcasts along the specified dimension.
394394

395395
.. ipython:: python
396396
397-
s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2)
397+
s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
398398
s
399399
df.sub(s, axis='index')
400400
@@ -492,7 +492,7 @@ section.
492492

493493
.. ipython:: python
494494
495-
df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])
495+
df = pd.DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D'])
496496
df
497497
s = df.iloc[3]
498498
df.append(s, ignore_index=True)
@@ -512,12 +512,12 @@ See the :ref:`Grouping section <groupby>`.
512512

513513
.. ipython:: python
514514
515-
df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
516-
'foo', 'bar', 'foo', 'foo'],
517-
'B' : ['one', 'one', 'two', 'three',
518-
'two', 'two', 'one', 'three'],
519-
'C' : np.random.randn(8),
520-
'D' : np.random.randn(8)})
515+
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
516+
'foo', 'bar', 'foo', 'foo'],
517+
'B': ['one', 'one', 'two', 'three',
518+
'two', 'two', 'one', 'three'],
519+
'C': np.random.randn(8),
520+
'D': np.random.randn(8)})
521521
df
522522
523523
Grouping and then applying the :meth:`~DataFrame.sum` function to the resulting
@@ -532,7 +532,7 @@ apply the ``sum`` function.
532532

533533
.. ipython:: python
534534
535-
df.groupby(['A','B']).sum()
535+
df.groupby(['A', 'B']).sum()
536536
537537
Reshaping
538538
---------
@@ -578,11 +578,11 @@ See the section on :ref:`Pivot Tables <reshaping.pivot>`.
578578

579579
.. ipython:: python
580580
581-
df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
582-
'B' : ['A', 'B', 'C'] * 4,
583-
'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
584-
'D' : np.random.randn(12),
585-
'E' : np.random.randn(12)})
581+
df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3,
582+
'B': ['A', 'B', 'C'] * 4,
583+
'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
584+
'D': np.random.randn(12),
585+
'E': np.random.randn(12)})
586586
df
587587
588588
We can produce pivot tables from this data very easily:
@@ -653,7 +653,7 @@ pandas can include categorical data in a ``DataFrame``. For full docs, see the
653653

654654
.. ipython:: python
655655
656-
df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
656+
df = pd.DataFrame({"id":[1, 2, 3, 4, 5, 6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
657657
658658
Convert the raw grades to a categorical data type.
659659

@@ -753,13 +753,13 @@ Writing to a HDF5 Store.
753753

754754
.. ipython:: python
755755
756-
df.to_hdf('foo.h5','df')
756+
df.to_hdf('foo.h5', 'df')
757757
758758
Reading from a HDF5 Store.
759759

760760
.. ipython:: python
761761
762-
pd.read_hdf('foo.h5','df')
762+
pd.read_hdf('foo.h5', 'df')
763763
764764
.. ipython:: python
765765
:suppress:
@@ -796,7 +796,7 @@ If you are attempting to perform an operation you might see an exception like:
796796
.. code-block:: python
797797
798798
>>> if pd.Series([False, True, False]):
799-
print("I was true")
799+
... print("I was true")
800800
Traceback
801801
...
802802
ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all().

Diff for: doc/source/advanced.rst

+5-5
Original file line numberDiff line numberDiff line change
@@ -318,13 +318,13 @@ As usual, **both sides** of the slicers are included as this is label indexing.
318318

319319
.. code-block:: python
320320
321-
df.loc[(slice('A1','A3'),.....), :]
321+
df.loc[(slice('A1', 'A3'), ...), :] # noqa: E999
322322
323323
  You should **not** do this:
324324

325325
.. code-block:: python
326326
327-
df.loc[(slice('A1','A3'),.....)]
327+
df.loc[(slice('A1', 'A3'), ...)] # noqa: E999
328328
329329
.. ipython:: python
330330
@@ -532,7 +532,7 @@ used to move the values from the ``MultiIndex`` to a column.
532532
.. ipython:: python
533533
534534
df.rename_axis(index=['abc', 'def'])
535-
535+
536536
Note that the columns of a ``DataFrame`` are an index, so that using
537537
``rename_axis`` with the ``columns`` argument will change the name of that
538538
index.
@@ -779,7 +779,7 @@ values **not** in the categories, similarly to how you can reindex **any** panda
779779
Reshaping and Comparison operations on a ``CategoricalIndex`` must have the same categories
780780
or a ``TypeError`` will be raised.
781781

782-
.. code-block:: python
782+
.. code-block:: ipython
783783
784784
In [9]: df3 = pd.DataFrame({'A' : np.arange(6),
785785
'B' : pd.Series(list('aabbca')).astype('category')})
@@ -1071,7 +1071,7 @@ On the other hand, if the index is not monotonic, then both slice bounds must be
10711071
# OK because 2 and 4 are in the index
10721072
df.loc[2:4, :]
10731073
1074-
.. code-block:: python
1074+
.. code-block:: ipython
10751075
10761076
# 0 is not in the index
10771077
In [9]: df.loc[0:4, :]

Diff for: doc/source/basics.rst

+5-6
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,8 @@ To evaluate single-element pandas objects in a boolean context, use the method
306306

307307
.. code-block:: python
308308
309-
>>> if df:
310-
...
309+
>>> if df: # noqa: E999
310+
...
311311
312312
Or
313313

@@ -317,7 +317,7 @@ To evaluate single-element pandas objects in a boolean context, use the method
317317
318318
These will both raise errors, as you are trying to compare multiple values.
319319

320-
.. code-block:: python
320+
.. code-block:: python-traceback
321321
322322
ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all().
323323
@@ -732,9 +732,8 @@ with the equivalent
732732
.. code-block:: python
733733
734734
>>> (df.pipe(h)
735-
.pipe(g, arg1=1)
736-
.pipe(f, arg2=2, arg3=3)
737-
)
735+
... .pipe(g, arg1=1)
736+
... .pipe(f, arg2=2, arg3=3))
738737
739738
Pandas encourages the second style, which is known as method chaining.
740739
``pipe`` makes it easy to use your own or another library's functions

Diff for: doc/source/comparison_with_sas.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -744,7 +744,7 @@ XPORT is a relatively limited format and the parsing of it is not as
744744
optimized as some of the other pandas readers. An alternative way
745745
to interop data between SAS and pandas is to serialize to csv.
746746

747-
.. code-block:: python
747+
.. code-block:: ipython
748748
749749
# version 0.17, 10M rows
750750

Diff for: doc/source/contributing.rst

+12-6
Original file line numberDiff line numberDiff line change
@@ -792,15 +792,15 @@ Transitioning to ``pytest``
792792
.. code-block:: python
793793
794794
class TestReallyCoolFeature(object):
795-
....
795+
pass
796796
797797
Going forward, we are moving to a more *functional* style using the `pytest <http://docs.pytest.org/en/latest/>`__ framework, which offers a richer testing
798798
framework that will facilitate testing and developing. Thus, instead of writing test classes, we will write test functions like this:
799799

800800
.. code-block:: python
801801
802802
def test_really_cool_feature():
803-
....
803+
pass
804804
805805
Using ``pytest``
806806
~~~~~~~~~~~~~~~~
@@ -825,25 +825,30 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place
825825
import pandas as pd
826826
from pandas.util import testing as tm
827827
828+
828829
@pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64'])
829830
def test_dtypes(dtype):
830831
assert str(np.dtype(dtype)) == dtype
831832
832-
@pytest.mark.parametrize('dtype', ['float32',
833-
pytest.param('int16', marks=pytest.mark.skip),
834-
pytest.param('int32',
835-
marks=pytest.mark.xfail(reason='to show how it works'))])
833+
834+
@pytest.mark.parametrize(
835+
'dtype', ['float32', pytest.param('int16', marks=pytest.mark.skip),
836+
pytest.param('int32', marks=pytest.mark.xfail(
837+
reason='to show how it works'))])
836838
def test_mark(dtype):
837839
assert str(np.dtype(dtype)) == 'float32'
838840
841+
839842
@pytest.fixture
840843
def series():
841844
return pd.Series([1, 2, 3])
842845
846+
843847
@pytest.fixture(params=['int8', 'int16', 'int32', 'int64'])
844848
def dtype(request):
845849
return request.param
846850
851+
847852
def test_series(series, dtype):
848853
result = series.astype(dtype)
849854
assert result.dtype == dtype
@@ -912,6 +917,7 @@ for details <https://hypothesis.readthedocs.io/en/latest/index.html>`_.
912917
st.lists(any_json_value), st.dictionaries(st.text(), any_json_value)
913918
))
914919
920+
915921
@given(value=any_json_value)
916922
def test_json_roundtrip(value):
917923
result = json.loads(json.dumps(value))

0 commit comments

Comments
 (0)