diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 98b91bf4a152c..27d413783931a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -994,6 +994,7 @@ Numeric - Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`) - Bug in :meth:`Series.dot` returning ``object`` dtype for :class:`ArrowDtype` and nullable-dtype data (:issue:`61375`) - Bug in :meth:`Series.std` and :meth:`Series.var` when using complex-valued data (:issue:`61645`) +- Bug in :meth:`Series.var` incorrectly computing variance for complex arrays by discarding the imaginary part during mean calculation (:issue:`62421`) - Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`) Conversion diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 2c3b70f7efd2e..362deb82c074c 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1013,7 +1013,11 @@ def nanvar( # observations. # # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count + if values.dtype.kind == "c": + # For complex numbers, preserve the dtype to avoid discarding imaginary part + avg = _ensure_numeric(values.sum(axis=axis, dtype=values.dtype)) / count + else: + avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count if axis is not None: avg = np.expand_dims(avg, axis) if values.dtype.kind == "c": diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index db27572b9da26..5fc80ffabc2ef 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -787,6 +787,27 @@ def test_var_complex_array(self): assert ser.var(ddof=1) == 1.0 assert ser.std(ddof=1) == 1.0 + @pytest.mark.parametrize( + "values,ddof,expected", + [ + ([1 + 2j, 2 + 3j, 3 + 4j], 1, 2.0), + ([1 + 2j, 2 + 3j, 3 + 4j], 0, 4 / 3), + ([1 + 2j, 2 + 3j, 3 + 4j, np.nan + 0j], 1, 2.0), + ], + ) + def test_var_complex_values(self, values, ddof, expected): + # GH#62421 + ser = Series(values, dtype=np.complex128) + result = ser.var(ddof=ddof) + tm.assert_almost_equal(result, expected) + + def test_var_complex_dtype_preserved(self): + # GH#62421 + ser = Series([1 + 2j, 2 + 3j, 3 + 4j], dtype=np.complex128) + mean = ser.mean() + assert isinstance(mean, complex) + assert mean == 2 + 3j + @pytest.mark.parametrize("dtype", ("m8[ns]", "M8[ns]", "M8[ns, UTC]")) def test_empty_timeseries_reductions_return_nat(self, dtype, skipna): # covers GH#11245