REF (string dtype): rename using_pyarrow_string_dtype to using_string…

…_dtype (pandas-dev#59320)
WillAyd · Oct 3, 2024 · a86ea93 · a86ea93
1 parent 5778049
commit a86ea93
Show file tree

Hide file tree

Showing 45 changed files with 120 additions and 174 deletions.
diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
@@ -52,6 +52,6 @@ def using_nullable_dtypes() -> bool:
     return _mode_options["nullable_dtypes"]
 
 
-def using_pyarrow_string_dtype() -> bool:
+def using_string_dtype() -> bool:
     _mode_options = _global_config["future"]
     return _mode_options["infer_string"]
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -37,7 +37,7 @@ from cython cimport (
     floating,
 )
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 from pandas._libs.missing import check_na_tuples_nonequal
 
@@ -2725,7 +2725,7 @@ def maybe_convert_objects(ndarray[object] objects,
         seen.object_ = True
 
     elif seen.str_:
-        if using_pyarrow_string_dtype() and is_string_array(objects, skipna=True):
+        if using_string_dtype() and is_string_array(objects, skipna=True):
             from pandas.core.arrays.string_ import StringDtype
 
             dtype = StringDtype(storage="pyarrow_numpy")

diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -19,7 +19,7 @@
 import numpy as np
 from numpy import ma
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 from pandas._libs import lib
 from pandas._libs.tslibs import (
@@ -566,11 +566,7 @@ def sanitize_array(
     if not is_list_like(data):
         if index is None:
             raise ValueError("index must be specified when data is not list-like")
-        if (
-            isinstance(data, str)
-            and using_pyarrow_string_dtype()
-            and original_dtype is None
-        ):
+        if isinstance(data, str) and using_string_dtype() and original_dtype is None:
             from pandas.core.arrays.string_ import StringDtype
 
             dtype = StringDtype("pyarrow_numpy")
@@ -604,14 +600,10 @@ def sanitize_array(
             subarr = data
             if data.dtype == object:
                 subarr = maybe_infer_to_datetimelike(data)
-                if (
-                    object_index
-                    and using_pyarrow_string_dtype()
-                    and is_string_dtype(subarr)
-                ):
+                if object_index and using_string_dtype() and is_string_dtype(subarr):
                     # Avoid inference when string option is set
                     subarr = data
-            elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
+            elif data.dtype.kind == "U" and using_string_dtype():
                 from pandas.core.arrays.string_ import StringDtype
 
                 dtype = StringDtype(storage="pyarrow_numpy")

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -18,7 +18,7 @@
 
 import numpy as np
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 from pandas._libs import (
     Interval,
@@ -798,7 +798,7 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
         # coming out as np.str_!
 
         dtype = _dtype_obj
-        if using_pyarrow_string_dtype():
+        if using_string_dtype():
             from pandas.core.arrays.string_ import StringDtype
 
             dtype = StringDtype(storage="pyarrow_numpy")

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -13,7 +13,7 @@
 import numpy as np
 from numpy import ma
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 from pandas._libs import lib
 
@@ -375,7 +375,7 @@ def ndarray_to_mgr(
             bp = BlockPlacement(slice(len(columns)))
             nb = new_block_2d(values, placement=bp, refs=refs)
             block_values = [nb]
-    elif dtype is None and values.dtype.kind == "U" and using_pyarrow_string_dtype():
+    elif dtype is None and values.dtype.kind == "U" and using_string_dtype():
         dtype = StringDtype(storage="pyarrow_numpy")
 
         obj_columns = list(values)

diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py
@@ -6,7 +6,7 @@
     Any,
 )
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
@@ -120,7 +120,7 @@ def read_feather(
     with get_handle(
         path, "rb", storage_options=storage_options, is_text=False
     ) as handles:
-        if dtype_backend is lib.no_default and not using_pyarrow_string_dtype():
+        if dtype_backend is lib.no_default and not using_string_dtype():
             return feather.read_feather(
                 handles.handle, columns=columns, use_threads=bool(use_threads)
             )
@@ -137,7 +137,7 @@ def read_feather(
         elif dtype_backend == "pyarrow":
             return pa_table.to_pandas(types_mapper=pd.ArrowDtype)
 
-        elif using_pyarrow_string_dtype():
+        elif using_string_dtype():
             return pa_table.to_pandas(types_mapper=arrow_string_types_mapper())
         else:
             raise NotImplementedError
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
@@ -9,7 +9,7 @@
     Literal,
 )
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
@@ -127,7 +127,7 @@ def read_orc(
             df = pa_table.to_pandas(types_mapper=mapping.get)
         return df
     else:
-        if using_pyarrow_string_dtype():
+        if using_string_dtype():
             types_mapper = arrow_string_types_mapper()
         else:
             types_mapper = None

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -12,7 +12,7 @@
 import warnings
 from warnings import catch_warnings
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 from pandas._config.config import _get_option
 
 from pandas._libs import lib
@@ -257,7 +257,7 @@ def read(
             to_pandas_kwargs["types_mapper"] = mapping.get
         elif dtype_backend == "pyarrow":
             to_pandas_kwargs["types_mapper"] = pd.ArrowDtype  # type: ignore[assignment]
-        elif using_pyarrow_string_dtype():
+        elif using_string_dtype():
             to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper()
 
         manager = _get_option("mode.data_manager", silent=True)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -3,7 +3,7 @@
 from typing import TYPE_CHECKING
 import warnings
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
@@ -295,7 +295,7 @@ def read(self) -> DataFrame:
             dtype_mapping = _arrow_dtype_mapping()
             dtype_mapping[pa.null()] = pd.Int64Dtype()
             frame = table.to_pandas(types_mapper=dtype_mapping.get)
-        elif using_pyarrow_string_dtype():
+        elif using_string_dtype():
             frame = table.to_pandas(types_mapper=arrow_string_types_mapper())
 
         else:

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -31,7 +31,7 @@
     config,
     get_option,
     using_copy_on_write,
-    using_pyarrow_string_dtype,
+    using_string_dtype,
 )
 
 from pandas._libs import (
@@ -3224,7 +3224,7 @@ def read(
         index = self.read_index("index", start=start, stop=stop)
         values = self.read_array("values", start=start, stop=stop)
         result = Series(values, index=index, name=self.name, copy=False)
-        if using_pyarrow_string_dtype() and is_string_array(values, skipna=True):
+        if using_string_dtype() and is_string_array(values, skipna=True):
             result = result.astype("string[pyarrow_numpy]")
         return result
 
@@ -3293,7 +3293,7 @@ def read(
 
             columns = items[items.get_indexer(blk_items)]
             df = DataFrame(values.T, columns=columns, index=axes[1], copy=False)
-            if using_pyarrow_string_dtype() and is_string_array(values, skipna=True):
+            if using_string_dtype() and is_string_array(values, skipna=True):
                 df = df.astype("string[pyarrow_numpy]")
             dfs.append(df)
 
@@ -4679,9 +4679,9 @@ def read(
             else:
                 # Categorical
                 df = DataFrame._from_arrays([values], columns=cols_, index=index_)
-            if not (using_pyarrow_string_dtype() and values.dtype.kind == "O"):
+            if not (using_string_dtype() and values.dtype.kind == "O"):
                 assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)
-            if using_pyarrow_string_dtype() and is_string_array(
+            if using_string_dtype() and is_string_array(
                 values,  # type: ignore[arg-type]
                 skipna=True,
             ):

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -32,7 +32,7 @@
 
 import numpy as np
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
@@ -2215,7 +2215,7 @@ def read_table(
             from pandas.io._util import _arrow_dtype_mapping
 
             mapping = _arrow_dtype_mapping().get
-        elif using_pyarrow_string_dtype():
+        elif using_string_dtype():
             from pandas.io._util import arrow_string_types_mapper
 
             arrow_string_types_mapper()

diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py
@@ -8,7 +8,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 import pandas.util._test_decorators as td
 
@@ -303,7 +303,7 @@ def test_iadd_string(self):
         index += "_x"
         assert "a_x" in index
 
-    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="add doesn't work")
+    @pytest.mark.xfail(using_string_dtype(), reason="add doesn't work")
     def test_add(self):
         index = pd.Index([str(i) for i in range(10)])
         expected = pd.Index(index.values * 2)

diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py
@@ -6,7 +6,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 from pandas.core.dtypes.common import (
     is_float_dtype,
@@ -449,7 +449,7 @@ def test_constructor_str_unknown(self):
         with pytest.raises(ValueError, match="Unknown dtype"):
             Categorical([1, 2], dtype="foo")
 
-    @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="Can't be NumPy strings")
+    @pytest.mark.xfail(using_string_dtype(), reason="Can't be NumPy strings")
     def test_constructor_np_strs(self):
         # GH#31499 Hashtable.map_locations needs to work on np.str_ objects
         cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])

diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 from pandas import (
     Categorical,
@@ -78,7 +78,7 @@ def test_print_none_width(self):
             assert exp == repr(a)
 
     @pytest.mark.skipif(
-        using_pyarrow_string_dtype(),
+        using_string_dtype(),
         reason="Change once infer_string is set to True by default",
     )
     def test_unicode_print(self):

diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 from pandas.compat import PYPY
 
@@ -83,7 +83,7 @@ def test_ndarray_compat_properties(index_or_series_obj):
 
 
 @pytest.mark.skipif(
-    PYPY or using_pyarrow_string_dtype(),
+    PYPY or using_string_dtype(),
     reason="not relevant for PyPy doesn't work properly for arrow strings",
 )
 def test_memory_usage(index_or_series_memory_obj):

diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 import pandas as pd
 import pandas._testing as tm
@@ -100,7 +100,7 @@ def test_nunique_null(null_obj, index_or_series_obj):
 
 
 @pytest.mark.single_cpu
-@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="decoding fails")
+@pytest.mark.xfail(using_string_dtype(), reason="decoding fails")
 def test_unique_bad_unicode(index_or_series):
     # regression test for #34550
     uval = "\ud83d"  # smiley emoji

diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
@@ -5,7 +5,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 from pandas.core.dtypes.common import is_string_dtype
 
@@ -37,7 +37,7 @@ def _get_expected_exception(
         else:
             result = self.frame_scalar_exc
 
-        if using_pyarrow_string_dtype() and result is not None:
+        if using_string_dtype() and result is not None:
             import pyarrow as pa
 
             result = (  # type: ignore[assignment]

diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
@@ -18,7 +18,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 import pandas as pd
 from pandas import Categorical
@@ -103,7 +103,7 @@ def test_contains(self, data, data_missing):
                 continue
             assert na_value_obj not in data
             # this section suffers from super method
-            if not using_pyarrow_string_dtype():
+            if not using_string_dtype():
                 assert na_value_obj in data_missing
 
     def test_empty(self, dtype):

diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_pyarrow_string_dtype
+from pandas._config import using_string_dtype
 
 from pandas import (
     DataFrame,
@@ -44,9 +44,7 @@ def test_constructor_single_row(self):
         )
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.skipif(
-        using_pyarrow_string_dtype(), reason="columns inferring logic broken"
-    )
+    @pytest.mark.skipif(using_string_dtype(), reason="columns inferring logic broken")
     def test_constructor_list_of_series(self):
         data = [
             OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),