Skip to content

Commit 1ae595f

Browse files
authored
Merge branch 'master' into get_loc-nan
2 parents 9cf08c1 + 3992126 commit 1ae595f

File tree

521 files changed

+19982
-17080
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

521 files changed

+19982
-17080
lines changed

.travis.yml

+11-2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ matrix:
3030
- python: 3.5
3131

3232
include:
33+
- dist: bionic
34+
# 18.04
35+
python: 3.8.0
36+
env:
37+
- JOB="3.8-dev" PATTERN="(not slow and not network)"
38+
3339
- dist: trusty
3440
env:
3541
- JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network)"
@@ -71,24 +77,27 @@ before_install:
7177
# This overrides travis and tells it to look nowhere.
7278
- export BOTO_CONFIG=/dev/null
7379

80+
7481
install:
7582
- echo "install start"
7683
- ci/prep_cython_cache.sh
7784
- ci/setup_env.sh
7885
- ci/submit_cython_cache.sh
7986
- echo "install done"
8087

88+
8189
before_script:
8290
# display server (for clipboard functionality) needs to be started here,
8391
# does not work if done in install:setup_env.sh (GH-26103)
8492
- export DISPLAY=":99.0"
8593
- echo "sh -e /etc/init.d/xvfb start"
86-
- sh -e /etc/init.d/xvfb start
94+
- if [ "$JOB" != "3.8-dev" ]; then sh -e /etc/init.d/xvfb start; fi
8795
- sleep 3
8896

8997
script:
9098
- echo "script start"
91-
- source activate pandas-dev
99+
- echo "$JOB"
100+
- if [ "$JOB" != "3.8-dev" ]; then source activate pandas-dev; fi
92101
- ci/run_tests.sh
93102

94103
after_script:

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<div align="center">
2-
<img src="https://github.com/pandas-dev/pandas/blob/master/doc/logo/pandas_logo.png"><br>
2+
<img src="https://dev.pandas.io/static/img/pandas.svg"><br>
33
</div>
44

55
-----------------
@@ -190,7 +190,7 @@ or for installing in [development mode](https://pip.pypa.io/en/latest/reference/
190190

191191

192192
```sh
193-
python -m pip install --no-build-isolation -e .
193+
python -m pip install -e . --no-build-isolation --no-use-pep517
194194
```
195195

196196
If you have `make`, you can also use `make develop` to run the same command.

asv_bench/benchmarks/categoricals.py

+14
Original file line numberDiff line numberDiff line change
@@ -282,4 +282,18 @@ def time_sort_values(self):
282282
self.index.sort_values(ascending=False)
283283

284284

285+
class SearchSorted:
286+
def setup(self):
287+
N = 10 ** 5
288+
self.ci = tm.makeCategoricalIndex(N).sort_values()
289+
self.c = self.ci.values
290+
self.key = self.ci.categories[1]
291+
292+
def time_categorical_index_contains(self):
293+
self.ci.searchsorted(self.key)
294+
295+
def time_categorical_contains(self):
296+
self.c.searchsorted(self.key)
297+
298+
285299
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/ctors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class SeriesConstructors:
6767
def setup(self, data_fmt, with_index, dtype):
6868
if data_fmt in (gen_of_str, gen_of_tuples) and with_index:
6969
raise NotImplementedError(
70-
"Series constructors do not support " "using generators with indexes"
70+
"Series constructors do not support using generators with indexes"
7171
)
7272
N = 10 ** 4
7373
if dtype == "float":

asv_bench/benchmarks/eval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def time_add(self, engine, threads):
2727

2828
def time_and(self, engine, threads):
2929
pd.eval(
30-
"(self.df > 0) & (self.df2 > 0) & " "(self.df3 > 0) & (self.df4 > 0)",
30+
"(self.df > 0) & (self.df2 > 0) & (self.df3 > 0) & (self.df4 > 0)",
3131
engine=engine,
3232
)
3333

asv_bench/benchmarks/io/hdf.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,11 @@ def time_write_store_table_dc(self):
8888

8989
def time_query_store_table_wide(self):
9090
self.store.select(
91-
"table_wide", where="index > self.start_wide and " "index < self.stop_wide"
91+
"table_wide", where="index > self.start_wide and index < self.stop_wide"
9292
)
9393

9494
def time_query_store_table(self):
95-
self.store.select("table", where="index > self.start and " "index < self.stop")
95+
self.store.select("table", where="index > self.start and index < self.stop")
9696

9797
def time_store_repr(self):
9898
repr(self.store)

asv_bench/benchmarks/join_merge.py

+46-13
Original file line numberDiff line numberDiff line change
@@ -273,10 +273,10 @@ def time_merge_ordered(self):
273273

274274

275275
class MergeAsof:
276-
params = [["backward", "forward", "nearest"]]
277-
param_names = ["direction"]
276+
params = [["backward", "forward", "nearest"], [None, 5]]
277+
param_names = ["direction", "tolerance"]
278278

279-
def setup(self, direction):
279+
def setup(self, direction, tolerance):
280280
one_count = 200000
281281
two_count = 1000000
282282

@@ -303,6 +303,9 @@ def setup(self, direction):
303303
df1["time32"] = np.int32(df1.time)
304304
df2["time32"] = np.int32(df2.time)
305305

306+
df1["timeu64"] = np.uint64(df1.time)
307+
df2["timeu64"] = np.uint64(df2.time)
308+
306309
self.df1a = df1[["time", "value1"]]
307310
self.df2a = df2[["time", "value2"]]
308311
self.df1b = df1[["time", "key", "value1"]]
@@ -313,22 +316,52 @@ def setup(self, direction):
313316
self.df2d = df2[["time32", "value2"]]
314317
self.df1e = df1[["time", "key", "key2", "value1"]]
315318
self.df2e = df2[["time", "key", "key2", "value2"]]
319+
self.df1f = df1[["timeu64", "value1"]]
320+
self.df2f = df2[["timeu64", "value2"]]
321+
322+
def time_on_int(self, direction, tolerance):
323+
merge_asof(
324+
self.df1a, self.df2a, on="time", direction=direction, tolerance=tolerance
325+
)
316326

317-
def time_on_int(self, direction):
318-
merge_asof(self.df1a, self.df2a, on="time", direction=direction)
327+
def time_on_int32(self, direction, tolerance):
328+
merge_asof(
329+
self.df1d, self.df2d, on="time32", direction=direction, tolerance=tolerance
330+
)
319331

320-
def time_on_int32(self, direction):
321-
merge_asof(self.df1d, self.df2d, on="time32", direction=direction)
332+
def time_on_uint64(self, direction, tolerance):
333+
merge_asof(
334+
self.df1f, self.df2f, on="timeu64", direction=direction, tolerance=tolerance
335+
)
322336

323-
def time_by_object(self, direction):
324-
merge_asof(self.df1b, self.df2b, on="time", by="key", direction=direction)
337+
def time_by_object(self, direction, tolerance):
338+
merge_asof(
339+
self.df1b,
340+
self.df2b,
341+
on="time",
342+
by="key",
343+
direction=direction,
344+
tolerance=tolerance,
345+
)
325346

326-
def time_by_int(self, direction):
327-
merge_asof(self.df1c, self.df2c, on="time", by="key2", direction=direction)
347+
def time_by_int(self, direction, tolerance):
348+
merge_asof(
349+
self.df1c,
350+
self.df2c,
351+
on="time",
352+
by="key2",
353+
direction=direction,
354+
tolerance=tolerance,
355+
)
328356

329-
def time_multiby(self, direction):
357+
def time_multiby(self, direction, tolerance):
330358
merge_asof(
331-
self.df1e, self.df2e, on="time", by=["key", "key2"], direction=direction
359+
self.df1e,
360+
self.df2e,
361+
on="time",
362+
by=["key", "key2"],
363+
direction=direction,
364+
tolerance=tolerance,
332365
)
333366

334367

asv_bench/benchmarks/offset.py

-49
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
1-
from datetime import datetime
21
import warnings
32

4-
import numpy as np
5-
63
import pandas as pd
74

85
try:
@@ -54,24 +51,6 @@ def time_apply_index(self, offset):
5451
offset.apply_index(self.rng)
5552

5653

57-
class OnOffset:
58-
59-
params = offsets
60-
param_names = ["offset"]
61-
62-
def setup(self, offset):
63-
self.dates = [
64-
datetime(2016, m, d)
65-
for m in [10, 11, 12]
66-
for d in [1, 2, 3, 28, 29, 30, 31]
67-
if not (m == 11 and d == 31)
68-
]
69-
70-
def time_on_offset(self, offset):
71-
for date in self.dates:
72-
offset.onOffset(date)
73-
74-
7554
class OffsetSeriesArithmetic:
7655

7756
params = offsets
@@ -99,31 +78,3 @@ def setup(self, offset):
9978
def time_add_offset(self, offset):
10079
with warnings.catch_warnings(record=True):
10180
self.data + offset
102-
103-
104-
class OffestDatetimeArithmetic:
105-
106-
params = offsets
107-
param_names = ["offset"]
108-
109-
def setup(self, offset):
110-
self.date = datetime(2011, 1, 1)
111-
self.dt64 = np.datetime64("2011-01-01 09:00Z")
112-
113-
def time_apply(self, offset):
114-
offset.apply(self.date)
115-
116-
def time_apply_np_dt64(self, offset):
117-
offset.apply(self.dt64)
118-
119-
def time_add(self, offset):
120-
self.date + offset
121-
122-
def time_add_10(self, offset):
123-
self.date + (10 * offset)
124-
125-
def time_subtract(self, offset):
126-
self.date - offset
127-
128-
def time_subtract_10(self, offset):
129-
self.date - (10 * offset)

asv_bench/benchmarks/period.py

+4-63
Original file line numberDiff line numberDiff line change
@@ -1,71 +1,12 @@
1+
"""
2+
Period benchmarks with non-tslibs dependencies. See
3+
benchmarks.tslibs.period for benchmarks that rely only on tslibs.
4+
"""
15
from pandas import DataFrame, Period, PeriodIndex, Series, date_range, period_range
26

37
from pandas.tseries.frequencies import to_offset
48

59

6-
class PeriodProperties:
7-
8-
params = (
9-
["M", "min"],
10-
[
11-
"year",
12-
"month",
13-
"day",
14-
"hour",
15-
"minute",
16-
"second",
17-
"is_leap_year",
18-
"quarter",
19-
"qyear",
20-
"week",
21-
"daysinmonth",
22-
"dayofweek",
23-
"dayofyear",
24-
"start_time",
25-
"end_time",
26-
],
27-
)
28-
param_names = ["freq", "attr"]
29-
30-
def setup(self, freq, attr):
31-
self.per = Period("2012-06-01", freq=freq)
32-
33-
def time_property(self, freq, attr):
34-
getattr(self.per, attr)
35-
36-
37-
class PeriodUnaryMethods:
38-
39-
params = ["M", "min"]
40-
param_names = ["freq"]
41-
42-
def setup(self, freq):
43-
self.per = Period("2012-06-01", freq=freq)
44-
45-
def time_to_timestamp(self, freq):
46-
self.per.to_timestamp()
47-
48-
def time_now(self, freq):
49-
self.per.now(freq)
50-
51-
def time_asfreq(self, freq):
52-
self.per.asfreq("A")
53-
54-
55-
class PeriodConstructor:
56-
params = [["D"], [True, False]]
57-
param_names = ["freq", "is_offset"]
58-
59-
def setup(self, freq, is_offset):
60-
if is_offset:
61-
self.freq = to_offset(freq)
62-
else:
63-
self.freq = freq
64-
65-
def time_period_constructor(self, freq, is_offset):
66-
Period("2012-06-01", freq=freq)
67-
68-
6910
class PeriodIndexConstructor:
7011

7112
params = [["D"], [True, False]]

asv_bench/benchmarks/rolling.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,15 @@ def peakmem_rolling(self, constructor, window, dtype, method):
2828
class Apply:
2929
params = (
3030
["DataFrame", "Series"],
31-
[10, 1000],
31+
[3, 300],
3232
["int", "float"],
3333
[sum, np.sum, lambda x: np.sum(x) + 5],
3434
[True, False],
3535
)
36-
param_names = ["contructor", "window", "dtype", "function", "raw"]
36+
param_names = ["constructor", "window", "dtype", "function", "raw"]
3737

3838
def setup(self, constructor, window, dtype, function, raw):
39-
N = 10 ** 5
39+
N = 10 ** 3
4040
arr = (100 * np.random.random(N)).astype(dtype)
4141
self.roll = getattr(pd, constructor)(arr).rolling(window)
4242

0 commit comments

Comments
 (0)