diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index da48d10e9ef58..ba29a17057d02 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1443,6 +1443,7 @@ Reshaping - Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`) - Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have a :class:`MultiIndex` for columns (:issue:`23033`). - Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`) +- Bug in ``Series`` construction when passing no data and ``dtype=str`` (:issue:`22477`) .. _whatsnew_0240.bug_fixes.sparse: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c7c6f89eb13a4..3c5f8830441f7 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -6,7 +6,7 @@ from pandas._libs import lib, tslib, tslibs from pandas._libs.tslibs import OutOfBoundsDatetime, Period, iNaT -from pandas.compat import PY3, string_types, text_type +from pandas.compat import PY3, string_types, text_type, to_str from .common import ( _INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, _string_dtypes, @@ -1216,11 +1216,16 @@ def construct_1d_arraylike_from_scalar(value, length, dtype): if not isinstance(dtype, (np.dtype, type(np.dtype))): dtype = dtype.dtype - # coerce if we have nan for an integer dtype - # GH 22858: only cast to float if an index - # (passed here as length) is specified if length and is_integer_dtype(dtype) and isna(value): - dtype = np.float64 + # coerce if we have nan for an integer dtype + dtype = np.dtype('float64') + elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): + # we need to coerce to object dtype to avoid + # to allow numpy to take our string as a scalar value + dtype = object + if not isna(value): + value = to_str(value) + subarr = np.empty(length, dtype=dtype) subarr.fill(value) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index a01266870b8fc..33177ac452414 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -419,7 +419,7 @@ def is_datetime64_dtype(arr_or_dtype): return False try: tipo = _get_dtype_type(arr_or_dtype) - except TypeError: + except (TypeError, UnicodeEncodeError): return False return issubclass(tipo, np.datetime64) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index ce0cf0d5c089e..f5a445e2cca9a 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -134,6 +134,17 @@ def test_constructor_no_data_index_order(self): result = pd.Series(index=['b', 'a', 'c']) assert result.index.tolist() == ['b', 'a', 'c'] + def test_constructor_no_data_string_type(self): + # GH 22477 + result = pd.Series(index=[1], dtype=str) + assert np.isnan(result.iloc[0]) + + @pytest.mark.parametrize('item', ['entry', 'ѐ', 13]) + def test_constructor_string_element_string_type(self, item): + # GH 22477 + result = pd.Series(item, index=[1], dtype=str) + assert result.iloc[0] == str(item) + def test_constructor_dtype_str_na_values(self, string_dtype): # https://github.com/pandas-dev/pandas/issues/21083 ser = Series(['x', None], dtype=string_dtype)