Skip to content

Commit

Permalink
BUG: Fix astype from float32 to string (#36464) (#36519)
Browse files Browse the repository at this point in the history
Co-authored-by: Daniel Saxton <2658661+dsaxton@users.noreply.github.com>
  • Loading branch information
jorisvandenbossche and dsaxton authored Sep 21, 2020
1 parent d05a9ca commit 4d5ff7e
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Bug fixes
- Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`)
- Bug in :meth:`DataFrame.sort_values` raising an ``AttributeError`` when sorting on a key that casts column to categorical dtype (:issue:`36383`)
- Bug in :meth:`DataFrame.stack` raising a ``ValueError`` when stacking :class:`MultiIndex` columns based on position when the levels had duplicate names (:issue:`36353`)
- Bug in :meth:`Series.astype` showing too much precision when casting from ``np.float32`` to string dtype (:issue:`36451`)
- Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` when using ``NaN`` and a row length above 1,000,000 (:issue:`22205`)

.. ---------------------------------------------------------------------------
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -650,11 +650,12 @@ cpdef ndarray[object] ensure_string_array(
Py_ssize_t i = 0, n = len(arr)

result = np.asarray(arr, dtype="object")

if copy and result is arr:
result = result.copy()

for i in range(n):
val = result[i]
val = arr[i]
if not checknull(val):
result[i] = str(val)
else:
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,11 +199,9 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
if dtype:
assert dtype == "string"

result = np.asarray(scalars, dtype="object")

# convert non-na-likes to str, and nan-likes to StringDtype.na_value
result = lib.ensure_string_array(
result, na_value=StringDtype.na_value, copy=copy
scalars, na_value=StringDtype.na_value, copy=copy
)

return cls(result)
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,3 +336,12 @@ def test_memory_usage():
series = pd.Series(["a", "b", "c"], dtype="string")

assert 0 < series.nbytes <= series.memory_usage() < series.memory_usage(deep=True)


@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
def test_astype_from_float_dtype(dtype):
# https://github.com/pandas-dev/pandas/issues/36451
s = pd.Series([0.1], dtype=dtype)
result = s.astype("string")
expected = pd.Series(["0.1"], dtype="string")
tm.assert_series_equal(result, expected)
9 changes: 9 additions & 0 deletions pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pytest

from pandas import Interval, Series, Timestamp, date_range
Expand Down Expand Up @@ -46,3 +47,11 @@ def test_astype_ignores_errors_for_extension_dtypes(self, values, errors):
msg = "(Cannot cast)|(could not convert)"
with pytest.raises((ValueError, TypeError), match=msg):
values.astype(float, errors=errors)

@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
def test_astype_from_float_to_str(self, dtype):
# https://github.com/pandas-dev/pandas/issues/36451
s = Series([0.1], dtype=dtype)
result = s.astype(str)
expected = Series(["0.1"])
tm.assert_series_equal(result, expected)

0 comments on commit 4d5ff7e

Please sign in to comment.