pandas-dev · MichaelTiemannOSC · Aug 25, 2023 · Aug 25, 2023 · Aug 29, 2023 · Aug 29, 2023
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -351,6 +351,7 @@ Styler
 
 Other
 ^^^^^
+- Add complex128 to the types of numerical data we test across the test suite (:issue:`54761`)
 - Bug in :func:`cut` incorrectly allowing cutting of timezone-aware datetimes with timezone-naive bins (:issue:`54964`)
 
 .. ***DO NOT USE THIS SECTION***

diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py
@@ -100,6 +100,11 @@ def _astype_nansafe(
  elif np.issubdtype(arr.dtype, np.floating) and dtype.kind in "iu":
  return _astype_float_to_int_nansafe(arr, dtype, copy)
 
+ elif np.issubdtype(arr.dtype, np.complexfloating) and is_object_dtype(dtype):
+ res = arr.astype(dtype, copy=copy)
+ res[np.isnan(arr)] = np.nan
+ return res
+
  elif arr.dtype == object:
  # if we have a datetime/timedelta array of objects
  # then coerce to datetime64[ns] and use DatetimeArray.astype

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -1003,11 +1003,25 @@ def nanvar(
  # cancellation errors and relatively accurate for small numbers of
  # observations.
  #
- # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
- avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
+ # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance...
+ if values.dtype.kind == "c":
+ avg = _ensure_numeric(values.sum(axis=axis, dtype=values.dtype)) / count
+ else:
+ avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
  if axis is not None:
  avg = np.expand_dims(avg, axis)
- sqr = _ensure_numeric((avg - values) ** 2)
+ # ...but also,
+ # see https://numpy.org/doc/stable/reference/generated/numpy.nanvar.html#numpy-nanvar
+ # which explains why computing the variance of complex numbers
+ # requires first normalizing the complex differences to magnitudes
+ if values.dtype.kind == "c":
+ deltas = _ensure_numeric(avg - values)
+ avg_re = np.real(deltas)
+ avg_im = np.imag(deltas)
+ sqr = avg_re**2 + avg_im**2
+ else:
+ sqr = _ensure_numeric((avg - values) ** 2)
+
  if mask is not None:
  np.putmask(sqr, mask, 0)
  result = sqr.sum(axis=axis, dtype=np.float64) / d

diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
@@ -977,7 +977,7 @@ def test_frame_operators_none_to_nan(self):
  df = pd.DataFrame({"a": ["a", None, "b"]})
  tm.assert_frame_equal(df + df, pd.DataFrame({"a": ["aa", np.nan, "bb"]}))
 
- @pytest.mark.parametrize("dtype", ("float", "int64"))
+ @pytest.mark.parametrize("dtype", ("float", "int64", "complex128"))
  def test_frame_operators_empty_like(self, dtype):
  # Test for issue #10181
  frames = [
@@ -1059,7 +1059,7 @@ def test_series_divmod_zero(self):
 class TestUFuncCompat:
  # TODO: add more dtypes
  @pytest.mark.parametrize("holder", [Index, RangeIndex, Series])
- @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
+ @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64, np.complex128])
  def test_ufunc_compat(self, holder, dtype):
  box = Series if holder is Series else Index
 
@@ -1075,45 +1075,75 @@ def test_ufunc_compat(self, holder, dtype):
 
  # TODO: add more dtypes
  @pytest.mark.parametrize("holder", [Index, Series])
- @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64])
+ @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64, np.complex128])
  def test_ufunc_coercions(self, holder, dtype):
  idx = holder([1, 2, 3, 4, 5], dtype=dtype, name="x")
  box = Series if holder is Series else Index
 
  result = np.sqrt(idx)
- assert result.dtype == "f8" and isinstance(result, box)
- exp = Index(np.sqrt(np.array([1, 2, 3, 4, 5], dtype=np.float64)), name="x")
+ if result.dtype.kind == "c":
+ assert result.dtype == dtype and isinstance(result, box)
+ exp_dtype = dtype
+ else:
+ assert result.dtype == "f8" and isinstance(result, box)
+ exp_dtype = np.float64
+ exp = Index(np.sqrt(np.array([1, 2, 3, 4, 5], dtype=exp_dtype)), name="x")
  exp = tm.box_expected(exp, box)
  tm.assert_equal(result, exp)
 
  result = np.divide(idx, 2.0)
- assert result.dtype == "f8" and isinstance(result, box)
- exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=np.float64, name="x")
+ if result.dtype.kind == "c":
+ assert result.dtype == dtype and isinstance(result, box)
+ exp_dtype = dtype
+ else:
+ assert result.dtype == "f8" and isinstance(result, box)
+ exp_dtype = np.float64
+ exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x")
  exp = tm.box_expected(exp, box)
  tm.assert_equal(result, exp)
 
  # _evaluate_numeric_binop
  result = idx + 2.0
- assert result.dtype == "f8" and isinstance(result, box)
- exp = Index([3.0, 4.0, 5.0, 6.0, 7.0], dtype=np.float64, name="x")
+ if result.dtype.kind == "c":
+ assert result.dtype == dtype and isinstance(result, box)
+ exp_dtype = dtype
+ else:
+ assert result.dtype == "f8" and isinstance(result, box)
+ exp_dtype = np.float64
+ exp = Index([3.0, 4.0, 5.0, 6.0, 7.0], dtype=exp_dtype, name="x")
  exp = tm.box_expected(exp, box)
  tm.assert_equal(result, exp)
 
  result = idx - 2.0
- assert result.dtype == "f8" and isinstance(result, box)
- exp = Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype=np.float64, name="x")
+ if result.dtype.kind == "c":
+ assert result.dtype == dtype and isinstance(result, box)
+ exp_dtype = dtype
+ else:
+ assert result.dtype == "f8" and isinstance(result, box)
+ exp_dtype = np.float64
+ exp = Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype=exp_dtype, name="x")
  exp = tm.box_expected(exp, box)
  tm.assert_equal(result, exp)
 
  result = idx * 1.0
- assert result.dtype == "f8" and isinstance(result, box)
- exp = Index([1.0, 2.0, 3.0, 4.0, 5.0], dtype=np.float64, name="x")
+ if result.dtype.kind == "c":
+ assert result.dtype == dtype and isinstance(result, box)
+ exp_dtype = dtype
+ else:
+ assert result.dtype == "f8" and isinstance(result, box)
+ exp_dtype = np.float64
+ exp = Index([1.0, 2.0, 3.0, 4.0, 5.0], dtype=exp_dtype, name="x")
  exp = tm.box_expected(exp, box)
  tm.assert_equal(result, exp)
 
  result = idx / 2.0
- assert result.dtype == "f8" and isinstance(result, box)
- exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=np.float64, name="x")
+ if result.dtype.kind == "c":
+ assert result.dtype == dtype and isinstance(result, box)
+ exp_dtype = dtype
+ else:
+ assert result.dtype == "f8" and isinstance(result, box)
+ exp_dtype = np.float64
+ exp = Index([0.5, 1.0, 1.5, 2.0, 2.5], dtype=exp_dtype, name="x")
  exp = tm.box_expected(exp, box)
  tm.assert_equal(result, exp)
 
@@ -1367,7 +1397,7 @@ def test_numeric_compat2_floordiv(self, idx, div, expected):
  # __floordiv__
  tm.assert_index_equal(idx // div, expected, exact=True)
 
- @pytest.mark.parametrize("dtype", [np.int64, np.float64])
+ @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.complex128])
  @pytest.mark.parametrize("delta", [1, 0, -1])
  def test_addsub_arithmetic(self, dtype, delta):
  # GH#8142

diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py
@@ -8,6 +8,7 @@
 
 from pandas.core.dtypes.common import (
  is_bool_dtype,
+ is_complex_dtype,
  is_integer_dtype,
 )
 
@@ -272,6 +273,9 @@ def get_reduction_result_dtype(dtype):
  data = data.astype("Float64")
  if method == "mean":
  tm.assert_extension_array_equal(result, data)
+ elif is_complex_dtype(data) and method in ["std", "var"]:
+ # std and var produce real-only results
+ tm.assert_extension_array_equal(result, data - data, check_dtype=False)
  else:
  tm.assert_extension_array_equal(result, data - data)
 

diff --git a/pandas/tests/extension/base/io.py b/pandas/tests/extension/base/io.py
@@ -9,7 +9,13 @@
 
 class BaseParsingTests:
  @pytest.mark.parametrize("engine", ["c", "python"])
- def test_EA_types(self, engine, data):
+ def test_EA_types(self, engine, data, request):
+ if engine == "c" and data.dtype.kind == "c":
+ request.node.add_marker(
+ pytest.mark.xfail(
+ reason=f"engine '{engine}' cannot parse the dtype {data.dtype.name}"
+ )
+ )
  df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))})
  csv_output = df.to_csv(index=False, na_rep=np.nan)
  result = pd.read_csv(

diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
@@ -19,7 +19,7 @@ class BaseOpsUtil:
  divmod_exc: type[Exception] | None = TypeError
 
  def _get_expected_exception(
- self, op_name: str, obj, other
+ self, op_name: str, obj, other, request
  ) -> type[Exception] | None:
  # Find the Exception, if any we expect to raise calling
  # obj.__op_name__(other)
@@ -54,8 +54,8 @@ def get_op_from_name(self, op_name: str):
  # case that still requires overriding _check_op or _combine, please let
  # us know at github.com/pandas-dev/pandas/issues
  @final
- def check_opname(self, ser: pd.Series, op_name: str, other):
- exc = self._get_expected_exception(op_name, ser, other)
+ def check_opname(self, ser: pd.Series, op_name: str, other, request):
+ exc = self._get_expected_exception(op_name, ser, other, request)
  op = self.get_op_from_name(op_name)
 
  self._check_op(ser, op, other, op_name, exc)
@@ -91,12 +91,12 @@ def _check_op(
 
  # see comment on check_opname
  @final
- def _check_divmod_op(self, ser: pd.Series, op, other):
+ def _check_divmod_op(self, ser: pd.Series, op, other, request):
  # check that divmod behavior matches behavior of floordiv+mod
  if op is divmod:
- exc = self._get_expected_exception("__divmod__", ser, other)
+ exc = self._get_expected_exception("__divmod__", ser, other, request)
  else:
- exc = self._get_expected_exception("__rdivmod__", ser, other)
+ exc = self._get_expected_exception("__rdivmod__", ser, other, request)
  if exc is None:
  result_div, result_mod = op(ser, other)
  if op is divmod:
@@ -128,53 +128,53 @@ class BaseArithmeticOpsTests(BaseOpsUtil):
  series_array_exc: type[Exception] | None = TypeError
  divmod_exc: type[Exception] | None = TypeError
 
- def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
+ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request):
  # series & scalar
  if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype):
  pytest.skip("Skip testing Python string formatting")
 
  op_name = all_arithmetic_operators
  ser = pd.Series(data)
- self.check_opname(ser, op_name, ser.iloc[0])
+ self.check_opname(ser, op_name, ser.iloc[0], request)
 
- def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
+ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request):
  # frame & scalar
  if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype):
  pytest.skip("Skip testing Python string formatting")
 
  op_name = all_arithmetic_operators
  df = pd.DataFrame({"A": data})
- self.check_opname(df, op_name, data[0])
+ self.check_opname(df, op_name, data[0], request)
 
- def test_arith_series_with_array(self, data, all_arithmetic_operators):
+ def test_arith_series_with_array(self, data, all_arithmetic_operators, request):
  # ndarray & other series
  op_name = all_arithmetic_operators
  ser = pd.Series(data)
- self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)))
+ self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)), request)
 
- def test_divmod(self, data):
+ def test_divmod(self, data, request):
  ser = pd.Series(data)
- self._check_divmod_op(ser, divmod, 1)
- self._check_divmod_op(1, ops.rdivmod, ser)
+ self._check_divmod_op(ser, divmod, 1, request)
+ self._check_divmod_op(1, ops.rdivmod, ser, request)
 
- def test_divmod_series_array(self, data, data_for_twos):
+ def test_divmod_series_array(self, data, data_for_twos, request):
  ser = pd.Series(data)
- self._check_divmod_op(ser, divmod, data)
+ self._check_divmod_op(ser, divmod, data, request)
 
  other = data_for_twos
- self._check_divmod_op(other, ops.rdivmod, ser)
+ self._check_divmod_op(other, ops.rdivmod, ser, request)
 
  other = pd.Series(other)
- self._check_divmod_op(other, ops.rdivmod, ser)
+ self._check_divmod_op(other, ops.rdivmod, ser, request)
 
- def test_add_series_with_extension_array(self, data):
+ def test_add_series_with_extension_array(self, data, request):
  # Check adding an ExtensionArray to a Series of the same dtype matches
  # the behavior of adding the arrays directly and then wrapping in a
  # Series.
 
  ser = pd.Series(data)
 
- exc = self._get_expected_exception("__add__", ser, data)
+ exc = self._get_expected_exception("__add__", ser, data, request)
  if exc is not None:
  with pytest.raises(exc):
  ser + data

diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
@@ -337,7 +337,8 @@ def test_setitem_slice_array(self, data):
 
  def test_setitem_scalar_key_sequence_raise(self, data):
  arr = data[:5].copy()
- with pytest.raises(ValueError):
+ # complex128 data raises TypeError; other numeric types raise ValueError
+ with pytest.raises((ValueError, TypeError)):
  arr[0] = arr[[0, 1]]
 
  def test_setitem_preserves_views(self, data):
@@ -438,7 +439,7 @@ def test_setitem_invalid(self, data, invalid_scalar):
  data[:] = invalid_scalar
 
  def test_setitem_2d_values(self, data):
- # GH50085
+ # GH54445
  original = data.copy()
  df = pd.DataFrame({"a": data, "b": data})
  df.loc[[0, 1], :] = df.loc[[1, 0], :].values