Skip to content

Commit 8da9632

Browse files
Merging from master
2 parents 6a9c229 + 090957c commit 8da9632

File tree

85 files changed

+675
-843
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

85 files changed

+675
-843
lines changed

doc/source/user_guide/text.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,9 @@ l. For ``StringDtype``, :ref:`string accessor methods<api.series.str>`
8787

8888
.. ipython:: python
8989
90-
s.astype(object).str.count("a")
91-
s.astype(object).dropna().str.count("a")
90+
s2 = pd.Series(["a", None, "b"], dtype="object")
91+
s2.str.count("a")
92+
s2.dropna().str.count("a")
9293
9394
When NA values are present, the output dtype is float64. Similarly for
9495
methods returning boolean values.

doc/source/whatsnew/v1.0.0.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ Other enhancements
227227
- Added new writer for exporting Stata dta files in version 118, ``StataWriter118``. This format supports exporting strings containing Unicode characters (:issue:`23573`)
228228
- :meth:`Series.map` now accepts ``collections.abc.Mapping`` subclasses as a mapper (:issue:`29733`)
229229
- The ``pandas.datetime`` class is now deprecated. Import from ``datetime`` instead (:issue:`30296`)
230+
- :meth:`Timestamp.fromisocalendar` is now compatible with python 3.8 and above (:issue:`28115`)
230231

231232

232233

@@ -836,6 +837,7 @@ Datetimelike
836837
- Bug in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` where inplace addition and subtraction did not actually operate inplace (:issue:`24115`)
837838
- Bug in :func:`pandas.to_datetime` when called with ``Series`` storing ``IntegerArray`` raising ``TypeError`` instead of returning ``Series`` (:issue:`30050`)
838839
- Bug in :func:`date_range` with custom business hours as ``freq`` and given number of ``periods`` (:issue:`30593`)
840+
- Bug in :class:`PeriodIndex` comparisons with incorrectly casting integers to :class:`Period` objects, inconsistent with the :class:`Period` comparison behavior (:issue:`30722`)
839841

840842
Timedelta
841843
^^^^^^^^^
@@ -885,6 +887,7 @@ Interval
885887

886888
- Bug in :meth:`IntervalIndex.get_indexer` where a :class:`Categorical` or :class:`CategoricalIndex` ``target`` would incorrectly raise a ``TypeError`` (:issue:`30063`)
887889
- Bug in ``pandas.core.dtypes.cast.infer_dtype_from_scalar`` where passing ``pandas_dtype=True`` did not infer :class:`IntervalDtype` (:issue:`30337`)
890+
- Bug in :class:`Series` constructor where constructing a ``Series`` from a ``list`` of :class:`Interval` objects resulted in ``object`` dtype instead of :class:`IntervalDtype` (:issue:`23563`)
888891
- Bug in :class:`IntervalDtype` where the ``kind`` attribute was incorrectly set as ``None`` instead of ``"O"`` (:issue:`30568`)
889892
- Bug in :class:`IntervalIndex`, :class:`~arrays.IntervalArray`, and :class:`Series` with interval data where equality comparisons were incorrect (:issue:`24112`)
890893

@@ -1001,7 +1004,7 @@ Reshaping
10011004
Sparse
10021005
^^^^^^
10031006
- Bug in :class:`SparseDataFrame` arithmetic operations incorrectly casting inputs to float (:issue:`28107`)
1004-
-
1007+
- Bug in ``DataFrame.sparse`` returning a ``Series`` when there was a column named ``sparse`` rather than the accessor (:issue:`30758`)
10051008
-
10061009

10071010
ExtensionArray

pandas/_libs/interval.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ cdef class Interval(IntervalMixin):
326326
def __hash__(self):
327327
return hash((self.left, self.right, self.closed))
328328

329-
def __contains__(self, key):
329+
def __contains__(self, key) -> bool:
330330
if _interval_like(key):
331331
raise TypeError("__contains__ not defined for two intervals")
332332
return ((self.left < key if self.open_left else self.left <= key) and

pandas/_libs/tslib.pyx

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
120120
elif box == "datetime":
121121
func_create = create_datetime_from_ts
122122
else:
123-
raise ValueError("box must be one of 'datetime', 'date', 'time' or"
124-
" 'timestamp'")
123+
raise ValueError("box must be one of 'datetime', 'date', 'time' or 'timestamp'")
125124

126125
if is_utc(tz) or tz is None:
127126
for i in range(n):

pandas/_libs/tslibs/nattype.pyx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ from cpython.object cimport (
55
from cpython.datetime cimport (datetime,
66
PyDateTime_Check, PyDelta_Check,
77
PyDateTime_IMPORT)
8+
9+
from cpython.version cimport PY_MINOR_VERSION
10+
811
PyDateTime_IMPORT
912

1013
import numpy as np
@@ -19,6 +22,7 @@ from pandas._libs.tslibs.util cimport (
1922
get_nat, is_integer_object, is_float_object, is_datetime64_object,
2023
is_timedelta64_object)
2124

25+
2226
# ----------------------------------------------------------------------
2327
# Constants
2428
nat_strings = {'NaT', 'nat', 'NAT', 'nan', 'NaN', 'NAN'}
@@ -427,6 +431,10 @@ class NaTType(_NaT):
427431
tzname = _make_error_func('tzname', datetime)
428432
utcoffset = _make_error_func('utcoffset', datetime)
429433

434+
# "fromisocalendar" was introduced in 3.8
435+
if PY_MINOR_VERSION >= 8:
436+
fromisocalendar = _make_error_func('fromisocalendar', datetime)
437+
430438
# ----------------------------------------------------------------------
431439
# The remaining methods have docstrings copy/pasted from the analogous
432440
# Timestamp methods.

pandas/_libs/tslibs/strptime.pyx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,8 @@ def array_strptime(object[:] values, object fmt,
278278
"the ISO year directive '%G' and a weekday "
279279
"directive '%A', '%a', '%w', or '%u'.")
280280
else:
281-
raise ValueError("ISO week directive '%V' is incompatible with"
282-
" the year directive '%Y'. Use the ISO year "
281+
raise ValueError("ISO week directive '%V' is incompatible with "
282+
"the year directive '%Y'. Use the ISO year "
283283
"'%G' instead.")
284284

285285
# If we know the wk of the year and what day of that wk, we can figure

pandas/_libs/tslibs/timestamps.pyx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -814,9 +814,9 @@ default 'raise'
814814
'shift_backward')
815815
if nonexistent not in nonexistent_options and not isinstance(
816816
nonexistent, timedelta):
817-
raise ValueError("The nonexistent argument must be one of 'raise',"
818-
" 'NaT', 'shift_forward', 'shift_backward' or"
819-
" a timedelta object")
817+
raise ValueError("The nonexistent argument must be one of 'raise', "
818+
"'NaT', 'shift_forward', 'shift_backward' or "
819+
"a timedelta object")
820820

821821
if self.tzinfo is None:
822822
# tz naive, localize

pandas/_testing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1287,7 +1287,7 @@ def assert_frame_equal(
12871287
>>> assert_frame_equal(df1, df2)
12881288
Traceback (most recent call last):
12891289
...
1290-
AssertionError: Attributes of DataFrame.iloc[:, 1] are different
1290+
AssertionError: Attributes of DataFrame.iloc[:, 1] (column name="b") are different
12911291
12921292
Attribute "dtype" are different
12931293
[left]: int64
@@ -1366,7 +1366,7 @@ def assert_frame_equal(
13661366
check_names=check_names,
13671367
check_datetimelike_compat=check_datetimelike_compat,
13681368
check_categorical=check_categorical,
1369-
obj=f"{obj}.iloc[:, {i}]",
1369+
obj=f'{obj}.iloc[:, {i}] (column name="{col}")',
13701370
)
13711371

13721372

pandas/core/arrays/categorical.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
)
4040
from pandas.core.dtypes.dtypes import CategoricalDtype
4141
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
42-
from pandas.core.dtypes.inference import is_hashable
42+
from pandas.core.dtypes.inference import is_array_like, is_hashable
4343
from pandas.core.dtypes.missing import isna, notna
4444

4545
from pandas.core import ops
@@ -1874,7 +1874,7 @@ def __iter__(self):
18741874
"""
18751875
return iter(self._internal_get_values().tolist())
18761876

1877-
def __contains__(self, key):
1877+
def __contains__(self, key) -> bool:
18781878
"""
18791879
Returns True if `key` is in this Categorical.
18801880
"""
@@ -1884,7 +1884,7 @@ def __contains__(self, key):
18841884

18851885
return contains(self, key, container=self._codes)
18861886

1887-
def _tidy_repr(self, max_vals=10, footer=True):
1887+
def _tidy_repr(self, max_vals=10, footer=True) -> str:
18881888
""" a short repr displaying only max_vals and an optional (but default
18891889
footer)
18901890
"""
@@ -1921,7 +1921,7 @@ def _repr_categories(self):
19211921
category_strs = [x.strip() for x in category_strs]
19221922
return category_strs
19231923

1924-
def _repr_categories_info(self):
1924+
def _repr_categories_info(self) -> str:
19251925
"""
19261926
Returns a string representation of the footer.
19271927
"""
@@ -1951,11 +1951,11 @@ def _repr_categories_info(self):
19511951
# replace to simple save space by
19521952
return levheader + "[" + levstring.replace(" < ... < ", " ... ") + "]"
19531953

1954-
def _repr_footer(self):
1954+
def _repr_footer(self) -> str:
19551955
info = self._repr_categories_info()
19561956
return f"Length: {len(self)}\n{info}"
19571957

1958-
def _get_repr(self, length=True, na_rep="NaN", footer=True):
1958+
def _get_repr(self, length=True, na_rep="NaN", footer=True) -> str:
19591959
from pandas.io.formats import format as fmt
19601960

19611961
formatter = fmt.CategoricalFormatter(
@@ -1998,7 +1998,10 @@ def __getitem__(self, key):
19981998
else:
19991999
return self.categories[i]
20002000

2001-
elif com.is_bool_indexer(key):
2001+
if is_list_like(key) and not is_array_like(key):
2002+
key = np.asarray(key)
2003+
2004+
if com.is_bool_indexer(key):
20022005
key = check_bool_array_indexer(self, key)
20032006

20042007
return self._constructor(

pandas/core/arrays/datetimelike.py

Lines changed: 98 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
1212
from pandas._libs.tslibs.timestamps import RoundTo, round_nsint64
1313
from pandas._typing import DatetimeLikeScalar
14+
from pandas.compat import set_function_name
1415
from pandas.compat.numpy import function as nv
1516
from pandas.errors import AbstractMethodError, NullFrequencyError, PerformanceWarning
1617
from pandas.util._decorators import Appender, Substitution
@@ -37,19 +38,94 @@
3738
from pandas.core.dtypes.inference import is_array_like
3839
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
3940

40-
from pandas.core import missing, nanops
41+
from pandas.core import missing, nanops, ops
4142
from pandas.core.algorithms import checked_add_with_arr, take, unique1d, value_counts
4243
import pandas.core.common as com
4344
from pandas.core.indexers import check_bool_array_indexer
4445
from pandas.core.ops.common import unpack_zerodim_and_defer
45-
from pandas.core.ops.invalid import make_invalid_op
46+
from pandas.core.ops.invalid import invalid_comparison, make_invalid_op
4647

4748
from pandas.tseries import frequencies
4849
from pandas.tseries.offsets import DateOffset, Tick
4950

5051
from .base import ExtensionArray, ExtensionOpsMixin
5152

5253

54+
def _datetimelike_array_cmp(cls, op):
55+
"""
56+
Wrap comparison operations to convert Timestamp/Timedelta/Period-like to
57+
boxed scalars/arrays.
58+
"""
59+
opname = f"__{op.__name__}__"
60+
nat_result = opname == "__ne__"
61+
62+
@unpack_zerodim_and_defer(opname)
63+
def wrapper(self, other):
64+
65+
if isinstance(other, str):
66+
try:
67+
# GH#18435 strings get a pass from tzawareness compat
68+
other = self._scalar_from_string(other)
69+
except ValueError:
70+
# failed to parse as Timestamp/Timedelta/Period
71+
return invalid_comparison(self, other, op)
72+
73+
if isinstance(other, self._recognized_scalars) or other is NaT:
74+
other = self._scalar_type(other)
75+
self._check_compatible_with(other)
76+
77+
other_i8 = self._unbox_scalar(other)
78+
79+
result = op(self.view("i8"), other_i8)
80+
if isna(other):
81+
result.fill(nat_result)
82+
83+
elif not is_list_like(other):
84+
return invalid_comparison(self, other, op)
85+
86+
elif len(other) != len(self):
87+
raise ValueError("Lengths must match")
88+
89+
else:
90+
if isinstance(other, list):
91+
# TODO: could use pd.Index to do inference?
92+
other = np.array(other)
93+
94+
if not isinstance(other, (np.ndarray, type(self))):
95+
return invalid_comparison(self, other, op)
96+
97+
if is_object_dtype(other):
98+
# We have to use comp_method_OBJECT_ARRAY instead of numpy
99+
# comparison otherwise it would fail to raise when
100+
# comparing tz-aware and tz-naive
101+
with np.errstate(all="ignore"):
102+
result = ops.comp_method_OBJECT_ARRAY(
103+
op, self.astype(object), other
104+
)
105+
o_mask = isna(other)
106+
107+
elif not type(self)._is_recognized_dtype(other.dtype):
108+
return invalid_comparison(self, other, op)
109+
110+
else:
111+
# For PeriodDType this casting is unnecessary
112+
other = type(self)._from_sequence(other)
113+
self._check_compatible_with(other)
114+
115+
result = op(self.view("i8"), other.view("i8"))
116+
o_mask = other._isnan
117+
118+
if o_mask.any():
119+
result[o_mask] = nat_result
120+
121+
if self._hasnans:
122+
result[self._isnan] = nat_result
123+
124+
return result
125+
126+
return set_function_name(wrapper, opname, cls)
127+
128+
53129
class AttributesMixin:
54130
_data: np.ndarray
55131

@@ -109,7 +185,7 @@ def _unbox_scalar(self, value: Union[Period, Timestamp, Timedelta, NaTType]) ->
109185
raise AbstractMethodError(self)
110186

111187
def _check_compatible_with(
112-
self, other: Union[Period, Timestamp, Timedelta, NaTType]
188+
self, other: Union[Period, Timestamp, Timedelta, NaTType], setitem: bool = False
113189
) -> None:
114190
"""
115191
Verify that `self` and `other` are compatible.
@@ -123,6 +199,9 @@ def _check_compatible_with(
123199
Parameters
124200
----------
125201
other
202+
setitem : bool, default False
203+
For __setitem__ we may have stricter compatiblity resrictions than
204+
for comparisons.
126205
127206
Raises
128207
------
@@ -500,10 +579,10 @@ def __setitem__(
500579
return
501580

502581
value = type(self)._from_sequence(value, dtype=self.dtype)
503-
self._check_compatible_with(value)
582+
self._check_compatible_with(value, setitem=True)
504583
value = value.asi8
505584
elif isinstance(value, self._scalar_type):
506-
self._check_compatible_with(value)
585+
self._check_compatible_with(value, setitem=True)
507586
value = self._unbox_scalar(value)
508587
elif is_valid_nat_for_dtype(value, self.dtype):
509588
value = iNaT
@@ -588,7 +667,17 @@ def _validate_fill_value(self, fill_value):
588667
------
589668
ValueError
590669
"""
591-
raise AbstractMethodError(self)
670+
if isna(fill_value):
671+
fill_value = iNaT
672+
elif isinstance(fill_value, self._recognized_scalars):
673+
self._check_compatible_with(fill_value)
674+
fill_value = self._scalar_type(fill_value)
675+
fill_value = self._unbox_scalar(fill_value)
676+
else:
677+
raise ValueError(
678+
f"'fill_value' should be a {self._scalar_type}. Got '{fill_value}'."
679+
)
680+
return fill_value
592681

593682
def take(self, indices, allow_fill=False, fill_value=None):
594683
if allow_fill:
@@ -921,6 +1010,7 @@ def _is_unique(self):
9211010

9221011
# ------------------------------------------------------------------
9231012
# Arithmetic Methods
1013+
_create_comparison_method = classmethod(_datetimelike_array_cmp)
9241014

9251015
# pow is invalid for all three subclasses; TimedeltaArray will override
9261016
# the multiplication and division ops
@@ -1472,6 +1562,8 @@ def mean(self, skipna=True):
14721562
return self._box_func(result)
14731563

14741564

1565+
DatetimeLikeArrayMixin._add_comparison_ops()
1566+
14751567
# -------------------------------------------------------------------
14761568
# Shared Constructor Helpers
14771569

0 commit comments

Comments
 (0)