Skip to content

Commit f42afd3

Browse files
committed
Fix variance calculation for complex numbers by preserving dtype
1 parent c8213d1 commit f42afd3

File tree

3 files changed

+25
-1
lines changed

3 files changed

+25
-1
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -993,6 +993,7 @@ Numeric
993993
- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`)
994994
- Bug in :meth:`Series.dot` returning ``object`` dtype for :class:`ArrowDtype` and nullable-dtype data (:issue:`61375`)
995995
- Bug in :meth:`Series.std` and :meth:`Series.var` when using complex-valued data (:issue:`61645`)
996+
- Bug in :meth:`Series.var` incorrectly computing variance for complex arrays by discarding the imaginary part during mean calculation (:issue:`62421`)
996997
- Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`)
997998

998999
Conversion

pandas/core/nanops.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1013,7 +1013,11 @@ def nanvar(
10131013
# observations.
10141014
#
10151015
# See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
1016-
avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
1016+
if values.dtype.kind == "c":
1017+
# For complex numbers, preserve the dtype to avoid discarding imaginary part
1018+
avg = _ensure_numeric(values.sum(axis=axis, dtype=values.dtype)) / count
1019+
else:
1020+
avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
10171021
if axis is not None:
10181022
avg = np.expand_dims(avg, axis)
10191023
if values.dtype.kind == "c":

pandas/tests/reductions/test_reductions.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,25 @@ def test_var_complex_array(self):
787787
assert ser.var(ddof=1) == 1.0
788788
assert ser.std(ddof=1) == 1.0
789789

790+
ser2 = Series([1 + 2j, 2 + 3j, 3 + 4j], dtype=np.complex128)
791+
expected_var = 2.0
792+
tm.assert_almost_equal(ser2.var(ddof=1), expected_var)
793+
tm.assert_almost_equal(
794+
ser2.var(ddof=1), np.var([1 + 2j, 2 + 3j, 3 + 4j], ddof=1)
795+
)
796+
797+
# Test with NaN
798+
ser3 = Series([1 + 2j, 2 + 3j, 3 + 4j, np.nan + 0j], dtype=np.complex128)
799+
tm.assert_almost_equal(ser3.var(ddof=1), expected_var)
800+
801+
# Test other ddof values
802+
tm.assert_almost_equal(ser2.var(ddof=0), 4 / 3)
803+
804+
# Test that imaginary part is preserved in mean calculation
805+
mean = ser2.mean()
806+
assert isinstance(mean, complex)
807+
assert mean == 2 + 3j
808+
790809
@pytest.mark.parametrize("dtype", ("m8[ns]", "M8[ns]", "M8[ns, UTC]"))
791810
def test_empty_timeseries_reductions_return_nat(self, dtype, skipna):
792811
# covers GH#11245

0 commit comments

Comments
 (0)