Skip to content

Commit 890d79b

Browse files
committed
Fix variance calculation for complex numbers by preserving dtype
1 parent c8213d1 commit 890d79b

File tree

3 files changed

+27
-1
lines changed

3 files changed

+27
-1
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -993,6 +993,7 @@ Numeric
993993
- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`)
994994
- Bug in :meth:`Series.dot` returning ``object`` dtype for :class:`ArrowDtype` and nullable-dtype data (:issue:`61375`)
995995
- Bug in :meth:`Series.std` and :meth:`Series.var` when using complex-valued data (:issue:`61645`)
996+
- Bug in :meth:`Series.var` incorrectly computing variance for complex arrays by discarding the imaginary part during mean calculation (:issue:`62421`)
996997
- Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`)
997998

998999
Conversion

pandas/core/nanops.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1013,7 +1013,11 @@ def nanvar(
10131013
# observations.
10141014
#
10151015
# See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
1016-
avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
1016+
if values.dtype.kind == "c":
1017+
# For complex numbers, preserve the dtype to avoid discarding imaginary part
1018+
avg = _ensure_numeric(values.sum(axis=axis, dtype=values.dtype)) / count
1019+
else:
1020+
avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count
10171021
if axis is not None:
10181022
avg = np.expand_dims(avg, axis)
10191023
if values.dtype.kind == "c":

pandas/tests/reductions/test_reductions.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,27 @@ def test_var_complex_array(self):
787787
assert ser.var(ddof=1) == 1.0
788788
assert ser.std(ddof=1) == 1.0
789789

790+
@pytest.mark.parametrize(
791+
"values,ddof,expected",
792+
[
793+
([1 + 2j, 2 + 3j, 3 + 4j], 1, 2.0),
794+
([1 + 2j, 2 + 3j, 3 + 4j], 0, 4 / 3),
795+
([1 + 2j, 2 + 3j, 3 + 4j, np.nan + 0j], 1, 2.0),
796+
],
797+
)
798+
def test_var_complex_values(self, values, ddof, expected):
799+
# GH#62421
800+
ser = Series(values, dtype=np.complex128)
801+
result = ser.var(ddof=ddof)
802+
tm.assert_almost_equal(result, expected)
803+
804+
def test_var_complex_dtype_preserved(self):
805+
# GH#62421
806+
ser = Series([1 + 2j, 2 + 3j, 3 + 4j], dtype=np.complex128)
807+
mean = ser.mean()
808+
assert isinstance(mean, complex)
809+
assert mean == 2 + 3j
810+
790811
@pytest.mark.parametrize("dtype", ("m8[ns]", "M8[ns]", "M8[ns, UTC]"))
791812
def test_empty_timeseries_reductions_return_nat(self, dtype, skipna):
792813
# covers GH#11245

0 commit comments

Comments
 (0)