Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix upcasting with python builtin numbers and numpy 2 #8946

Merged
merged 52 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
f3c2c93
Fix upcasting with python builtin numbers and numpy 2
djhoese Apr 15, 2024
2c8a607
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 15, 2024
fcbd821
Remove old commented code
djhoese Apr 15, 2024
3f7670b
Try updating result_type for array API
dcherian Apr 18, 2024
6115dd8
xfail
dcherian Apr 18, 2024
e3493b0
Revert "Try updating result_type for array API"
dcherian Apr 18, 2024
e27f572
Merge branch 'main' into bugfix-scalar-arr-casting
dcherian Apr 18, 2024
5b4384f
Determine python scalar dtype from array arguments
djhoese Apr 28, 2024
33d48e4
Merge branch 'main' into bugfix-scalar-arr-casting
keewis May 22, 2024
712c244
extend `get_array_namespace` to get the same namespace for multiple v…
keewis May 23, 2024
c486f8e
reflow error message
keewis May 23, 2024
6a55378
allow weakly dtyped data as a separate argument to `result_type`
keewis May 23, 2024
c38c07f
allow passing a `dtype` argument to `duck_array_ops.asarray`
keewis May 23, 2024
7b39cbe
rewrite `as_shared_dtype`
keewis May 23, 2024
85d986f
fall back to the `astype` method if `xp.astype` doesn't exist
keewis May 25, 2024
a89cba8
determine the smallest possible dtype and call `xp.result_type` again
keewis May 25, 2024
62900d6
map python types to dtype names
keewis May 25, 2024
3e2855d
apply the custom rules after both dtype combinations
keewis May 25, 2024
66fe0c5
add tests for passing scalars to `result_type`
keewis May 29, 2024
476cb11
apply the custom casting rules to scalar types + explicit dtypes
keewis May 29, 2024
4da4771
go back to the simple dtypes
keewis May 29, 2024
e193e90
comment on the fallback: if we don't have explicit dtypes, use the de…
keewis May 29, 2024
cda1ad0
Merge branch 'main' into bugfix-scalar-arr-casting
keewis May 29, 2024
47becb8
ignore the typing
keewis May 29, 2024
03974b1
don't support `datetime` scalars since that would be a new feature
keewis Jun 5, 2024
92b35b3
make sure passing only scalars also works
keewis Jun 5, 2024
109468a
move the fallback version of `result_type` into a separate function
keewis Jun 5, 2024
efb0f84
ignore the custom `inf` / `ninf` / `na` objects
keewis Jun 5, 2024
7b0d478
more temporary fixes
keewis Jun 5, 2024
aa2d372
Merge branch 'main' into bugfix-scalar-arr-casting
keewis Jun 5, 2024
463d0d6
remove debug print statements
keewis Jun 5, 2024
d33c742
change the representation for inf / ninf for boolean and nonnumeric a…
keewis Jun 7, 2024
51f15fa
pass arrays instead of relying on implicit casting using `asarray`
keewis Jun 7, 2024
2391cfb
refactor the implementation of `result_type`
keewis Jun 7, 2024
aaced1d
Merge branch 'main' into bugfix-scalar-arr-casting
keewis Jun 7, 2024
4e86aa2
remove obsolete comment
keewis Jun 7, 2024
c28233c
proper placement of the `result_type` comment
keewis Jun 7, 2024
0977707
back to using a list instead of a tuple
keewis Jun 7, 2024
57b4eec
expect the platform-specific default dtype for integers
keewis Jun 8, 2024
96e75c6
move `_future_array_api_result_type` to `npcompat`
keewis Jun 10, 2024
afa495e
rename `is_scalar_type` to `is_weak_scalar_type`
keewis Jun 10, 2024
1dc9d46
move the dispatch to `_future_array_api_result_type` to `npcompat`
keewis Jun 10, 2024
b405916
support passing *only* weak dtypes to `_future_array_api_result_type`
keewis Jun 10, 2024
66233a1
don't `xfail` the Array API `where` test
keewis Jun 10, 2024
445195b
determine the array namespace if not passed and no `cupy` arrays present
keewis Jun 10, 2024
b2a6379
comment on what to do with `npcompat.result_type`
keewis Jun 10, 2024
79d4a26
Merge branch 'main' into bugfix-scalar-arr-casting
keewis Jun 10, 2024
ab284f7
move the `result_type` compat code to `array_api_compat`
keewis Jun 10, 2024
cf11228
also update the comment
keewis Jun 10, 2024
8dbacbc
mention the Array API issue on python scalars in a comment
keewis Jun 10, 2024
d0e08d8
Merge branch 'main' into bugfix-scalar-arr-casting
keewis Jun 10, 2024
4c094c1
Merge branch 'main' into bugfix-scalar-arr-casting
keewis Jun 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 51 additions & 14 deletions xarray/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,10 @@ def get_pos_infinity(dtype, max_for_int=False):
if isdtype(dtype, "complex floating"):
return np.inf + 1j * np.inf

return INF
if isdtype(dtype, "bool"):
return True

return np.array(INF, dtype=object)


def get_neg_infinity(dtype, min_for_int=False):
Expand Down Expand Up @@ -159,7 +162,10 @@ def get_neg_infinity(dtype, min_for_int=False):
if isdtype(dtype, "complex floating"):
return -np.inf - 1j * np.inf

return NINF
if isdtype(dtype, "bool"):
return False

return np.array(NINF, dtype=object)


def is_datetime_like(dtype) -> bool:
Expand Down Expand Up @@ -209,8 +215,41 @@ def isdtype(dtype, kind: str | tuple[str, ...], xp=None) -> bool:
return xp.isdtype(dtype, kind)


def is_scalar_type(t):
keewis marked this conversation as resolved.
Show resolved Hide resolved
return isinstance(t, (bool, int, float, complex, str, bytes))


def _future_array_api_result_type(*arrays_and_dtypes, xp):
strongly_dtyped = [t for t in arrays_and_dtypes if not is_scalar_type(t)]
weakly_dtyped = [t for t in arrays_and_dtypes if is_scalar_type(t)]

dtype = xp.result_type(*strongly_dtyped)
if not weakly_dtyped:
return dtype

possible_dtypes = {
complex: "complex64",
float: "float32",
int: "int8",
bool: "bool",
str: "str",
bytes: "bytes",
}
dtypes = [possible_dtypes.get(type(x), "object") for x in weakly_dtyped]

return xp.result_type(dtype, *dtypes)


def preprocess_scalar_types(t):
if isinstance(t, (str, bytes)):
return type(t)
else:
return t


def result_type(
*arrays_and_dtypes: np.typing.ArrayLike | np.typing.DTypeLike,
xp=None,
) -> np.dtype:
"""Like np.result_type, but with type promotion rules matching pandas.

Expand All @@ -229,24 +268,22 @@ def result_type(
"""
from xarray.core.duck_array_ops import get_array_namespace

# TODO(shoyer): consider moving this logic into get_array_namespace()
# or another helper function.
namespaces = {get_array_namespace(t) for t in arrays_and_dtypes}
non_numpy = namespaces - {np}
if non_numpy:
[xp] = non_numpy
else:
xp = np

types = {xp.result_type(t) for t in arrays_and_dtypes}
if xp is None:
xp = get_array_namespace(arrays_and_dtypes)

types = {xp.result_type(preprocess_scalar_types(t)) for t in arrays_and_dtypes}
if any(isinstance(t, np.dtype) for t in types):
# only check if there's numpy dtypes – the array API does not
# define the types we're checking for
for left, right in PROMOTE_TO_OBJECT:
if any(np.issubdtype(t, left) for t in types) and any(
np.issubdtype(t, right) for t in types
):
return xp.dtype(object)
return np.dtype(object)

return xp.result_type(*arrays_and_dtypes)
if xp is np or any(
isinstance(getattr(t, "dtype", t), np.dtype) for t in arrays_and_dtypes
):
return xp.result_type(*map(preprocess_scalar_types, arrays_and_dtypes))
else:
return _future_array_api_result_type(*arrays_and_dtypes, xp=xp)
52 changes: 40 additions & 12 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,26 @@
dask_available = module_available("dask")


def get_array_namespace(x):
if hasattr(x, "__array_namespace__"):
return x.__array_namespace__()
def get_array_namespace(*values):
def _get_array_namespace(x):
if hasattr(x, "__array_namespace__"):
return x.__array_namespace__()
else:
return np

namespaces = {_get_array_namespace(t) for t in values}
non_numpy = namespaces - {np}

if len(non_numpy) > 1:
raise TypeError(
"cannot deal with more than one type supporting the array API at the same time"
)
elif non_numpy:
[xp] = non_numpy
else:
return np
xp = np

return xp


def einsum(*args, **kwargs):
Expand Down Expand Up @@ -224,8 +239,16 @@ def astype(data, dtype, **kwargs):
return data.astype(dtype, **kwargs)


def asarray(data, xp=np):
return data if is_duck_array(data) else xp.asarray(data)
def asarray(data, xp=np, dtype=None):
converted = data if is_duck_array(data) else xp.asarray(data)

if dtype is None or converted.dtype == dtype:
return converted

if xp is np or not hasattr(xp, "astype"):
return converted.astype(dtype)
else:
return xp.astype(converted, dtype)


def as_shared_dtype(scalars_or_arrays, xp=np):
Expand All @@ -239,23 +262,28 @@ def as_shared_dtype(scalars_or_arrays, xp=np):
):
return scalars_or_arrays
raise ValueError(
f"Cannot cast arrays to shared type, found array types {[x.dtype for x in scalars_or_arrays]}"
"Cannot cast arrays to shared type, found"
f" array types {[x.dtype for x in scalars_or_arrays]}"
)

# Avoid calling array_type("cupy") repeatidely in the any check
array_type_cupy = array_type("cupy")
if any(isinstance(x, array_type_cupy) for x in scalars_or_arrays):
import cupy as cp

arrays = [asarray(x, xp=cp) for x in scalars_or_arrays]
else:
arrays = [asarray(x, xp=xp) for x in scalars_or_arrays]
xp = cp
keewis marked this conversation as resolved.
Show resolved Hide resolved

# split into weakly and strongly dtyped
# TODO (keewis): remove this and the `weakly_dtyped` parameter to
# result_type once we can require a version of the Array API that allows
# passing scalars to `xp.result_type`
keewis marked this conversation as resolved.
Show resolved Hide resolved
dtype = dtypes.result_type(*scalars_or_arrays, xp=xp)

# Pass arrays directly instead of dtypes to result_type so scalars
# get handled properly.
# Note that result_type() safely gets the dtype from dask arrays without
# evaluating them.
out_type = dtypes.result_type(*arrays)
return [astype(x, out_type, copy=False) for x in arrays]
return tuple(asarray(x, dtype=dtype, xp=xp) for x in scalars_or_arrays)


def broadcast_to(array, shape):
Expand Down
3 changes: 3 additions & 0 deletions xarray/tests/test_array_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ def test_unstack(arrays: tuple[xr.DataArray, xr.DataArray]) -> None:
assert_equal(actual, expected)


@pytest.mark.xfail(
reason="We don't know how to handle python scalars with array API arrays. This is strictly prohibited by the array API"
)
keewis marked this conversation as resolved.
Show resolved Hide resolved
def test_where() -> None:
np_arr = xr.DataArray(np.array([1, 0]), dims="x")
xp_arr = xr.DataArray(xp.asarray([1, 0]), dims="x")
Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3004,7 +3004,7 @@ def test_fillna(self) -> None:
expected = b.copy()
assert_identical(expected, actual)

actual = a.fillna(range(4))
actual = a.fillna(np.arange(4))
assert_identical(expected, actual)

actual = a.fillna(b[:3])
Expand All @@ -3017,7 +3017,7 @@ def test_fillna(self) -> None:
a.fillna({0: 0})

with pytest.raises(ValueError, match=r"broadcast"):
a.fillna([1, 2])
a.fillna(np.array([1, 2]))

def test_align(self) -> None:
array = DataArray(
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5209,7 +5209,7 @@ def test_fillna(self) -> None:
actual6 = ds.fillna(expected)
assert_identical(expected, actual6)

actual7 = ds.fillna(range(4))
actual7 = ds.fillna(np.arange(4))
assert_identical(expected, actual7)

actual8 = ds.fillna(b[:3])
Expand Down
20 changes: 17 additions & 3 deletions xarray/tests/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,23 @@ def test_result_type(args, expected) -> None:
assert actual == expected


def test_result_type_scalar() -> None:
actual = dtypes.result_type(np.arange(3, dtype=np.float32), np.nan)
assert actual == np.float32
@pytest.mark.parametrize(
["values", "expected"],
(
([np.arange(3, dtype="float32"), np.nan], np.float32),
([np.arange(3, dtype="int8"), 1], np.int8),
([np.array(["a", "b"], dtype=str), np.nan], object),
([np.array([b"a", b"b"], dtype=bytes), True], object),
([np.array([b"a", b"b"], dtype=bytes), "c"], object),
([np.array(["a", "b"], dtype=str), "c"], np.dtype(str)),
([np.array(["a", "b"], dtype=str), None], object),
([0, 1], np.int64),
),
)
def test_result_type_scalars(values, expected) -> None:
actual = dtypes.result_type(*values)

assert np.issubdtype(actual, expected)


def test_result_type_dask_array() -> None:
Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def test_count(self):
assert 1 == count(np.datetime64("2000-01-01"))

def test_where_type_promotion(self):
result = where([True, False], [1, 2], ["a", "b"])
result = where(np.array([True, False]), np.array([1, 2]), np.array(["a", "b"]))
assert_array_equal(result, np.array([1, "b"], dtype=object))

result = where([True, False], np.array([1, 2], np.float32), np.nan)
Expand Down Expand Up @@ -214,7 +214,7 @@ def test_stack_type_promotion(self):
assert_array_equal(result, np.array([1, "b"], dtype=object))

def test_concatenate_type_promotion(self):
result = concatenate([[1], ["b"]])
result = concatenate([np.array([1]), np.array(["b"])])
assert_array_equal(result, np.array([1, "b"], dtype=object))

@pytest.mark.filterwarnings("error")
Expand Down
Loading