Skip to content

BUG: FloatingArray(float16data) #44715

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,8 @@ ExtensionArray
- Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`)
- Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`)
- Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`)
- Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`44715`)
- Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`44715`)
- Bug in :meth:`Series.where` with ``ExtensionDtype`` when ``other`` is a NA scalar incompatible with the series dtype (e.g. ``NaT`` with a numeric dtype) incorrectly casting to a compatible NA value (:issue:`44697`)
-

Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/floating.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,10 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
"values should be floating numpy array. Use "
"the 'pd.array' function instead"
)
if values.dtype == np.float16:
# If we don't raise here, then accessing self.dtype would raise
raise TypeError("FloatingArray does not support np.float16 dtype.")

super().__init__(values, mask, copy=copy)

@classmethod
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,10 @@ def reconstruct(x):
return IntegerArray(x, m)
elif is_float_dtype(x.dtype):
m = mask.copy()
if x.dtype == np.float16:
# reached in e.g. np.sqrt on BooleanArray
# we don't support float16
x = x.astype(np.float32)
return FloatingArray(x, m)
else:
x[mask] = np.nan
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,12 @@ def array(
elif inferred_dtype == "integer":
return IntegerArray._from_sequence(data, copy=copy)

elif inferred_dtype in ("floating", "mixed-integer-float"):
elif (
inferred_dtype in ("floating", "mixed-integer-float")
and getattr(data, "dtype", None) != np.float16
):
# GH#44715 Exclude np.float16 bc FloatingArray does not support it;
# we will fall back to PandasArray.
return FloatingArray._from_sequence(data, copy=copy)

elif inferred_dtype == "boolean":
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/arrays/boolean/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,17 @@ def test_ufuncs_unary(ufunc):
tm.assert_series_equal(result, expected)


def test_ufunc_numeric():
# np.sqrt on np.bool returns float16, which we upcast to Float32
# bc we do not have Float16
arr = pd.array([True, False, None], dtype="boolean")

res = np.sqrt(arr)

expected = pd.array([1, 0, None], dtype="Float32")
tm.assert_extension_array_equal(res, expected)


@pytest.mark.parametrize("values", [[True, False], [True, None]])
def test_ufunc_reduce_raises(values):
a = pd.array(values, dtype="boolean")
Expand Down
34 changes: 34 additions & 0 deletions pandas/tests/arrays/floating/test_construction.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import locale

import numpy as np
import pytest

from pandas.compat import (
is_platform_windows,
np_version_under1p19,
)

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import FloatingArray
Expand Down Expand Up @@ -40,6 +47,33 @@ def test_floating_array_constructor():
FloatingArray(values)


def test_floating_array_disallows_float16(request):
# GH#44715
arr = np.array([1, 2], dtype=np.float16)
mask = np.array([False, False])

msg = "FloatingArray does not support np.float16 dtype"
with pytest.raises(TypeError, match=msg):
FloatingArray(arr, mask)

if not np_version_under1p19:
# Troubleshoot
# https://github.com/numpy/numpy/issues/20512#issuecomment-985807740
lowered = np.core._type_aliases.english_lower("Float16")
assert lowered == "float16", lowered

if np_version_under1p19 or (
locale.getlocale()[0] != "en_US" and not is_platform_windows()
):
# the locale condition may need to be refined; this fails on
# the CI in the ZH_CN build
mark = pytest.mark.xfail(reason="numpy does not raise on np.dtype('Float16')")
request.node.add_marker(mark)

with pytest.raises(TypeError, match="data type 'Float16' not understood"):
pd.array([1.0, 2.0], dtype="Float16")


def test_floating_array_constructor_copy():
values = np.array([1, 2, 3, 4], dtype="float64")
mask = np.array([False, False, False, True], dtype="bool")
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/arrays/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@
# String alias passes through to NumPy
([1, 2], "float32", PandasArray(np.array([1, 2], dtype="float32"))),
([1, 2], "int64", PandasArray(np.array([1, 2], dtype=np.int64))),
# GH#44715 FloatingArray does not support float16, so fall back to PandasArray
(
np.array([1, 2], dtype=np.float16),
None,
PandasArray(np.array([1, 2], dtype=np.float16)),
),
# idempotency with e.g. pd.array(pd.array([1, 2], dtype="int64"))
(
PandasArray(np.array([1, 2], dtype=np.int32)),
Expand Down