Skip to content

Commit

Permalink
REF (string dtype): rename using_pyarrow_string_dtype to using_string…
Browse files Browse the repository at this point in the history
…_dtype (pandas-dev#59320)
  • Loading branch information
jorisvandenbossche committed Oct 3, 2024
1 parent 5778049 commit a86ea93
Show file tree
Hide file tree
Showing 45 changed files with 120 additions and 174 deletions.
2 changes: 1 addition & 1 deletion pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,6 @@ def using_nullable_dtypes() -> bool:
return _mode_options["nullable_dtypes"]


def using_pyarrow_string_dtype() -> bool:
def using_string_dtype() -> bool:
_mode_options = _global_config["future"]
return _mode_options["infer_string"]
4 changes: 2 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ from cython cimport (
floating,
)

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs.missing import check_na_tuples_nonequal

Expand Down Expand Up @@ -2725,7 +2725,7 @@ def maybe_convert_objects(ndarray[object] objects,
seen.object_ = True

elif seen.str_:
if using_pyarrow_string_dtype() and is_string_array(objects, skipna=True):
if using_string_dtype() and is_string_array(objects, skipna=True):
from pandas.core.arrays.string_ import StringDtype

dtype = StringDtype(storage="pyarrow_numpy")
Expand Down
16 changes: 4 additions & 12 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import numpy as np
from numpy import ma

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import lib
from pandas._libs.tslibs import (
Expand Down Expand Up @@ -566,11 +566,7 @@ def sanitize_array(
if not is_list_like(data):
if index is None:
raise ValueError("index must be specified when data is not list-like")
if (
isinstance(data, str)
and using_pyarrow_string_dtype()
and original_dtype is None
):
if isinstance(data, str) and using_string_dtype() and original_dtype is None:
from pandas.core.arrays.string_ import StringDtype

dtype = StringDtype("pyarrow_numpy")
Expand Down Expand Up @@ -604,14 +600,10 @@ def sanitize_array(
subarr = data
if data.dtype == object:
subarr = maybe_infer_to_datetimelike(data)
if (
object_index
and using_pyarrow_string_dtype()
and is_string_dtype(subarr)
):
if object_index and using_string_dtype() and is_string_dtype(subarr):
# Avoid inference when string option is set
subarr = data
elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
elif data.dtype.kind == "U" and using_string_dtype():
from pandas.core.arrays.string_ import StringDtype

dtype = StringDtype(storage="pyarrow_numpy")
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import numpy as np

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import (
Interval,
Expand Down Expand Up @@ -798,7 +798,7 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
# coming out as np.str_!

dtype = _dtype_obj
if using_pyarrow_string_dtype():
if using_string_dtype():
from pandas.core.arrays.string_ import StringDtype

dtype = StringDtype(storage="pyarrow_numpy")
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import numpy as np
from numpy import ma

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import lib

Expand Down Expand Up @@ -375,7 +375,7 @@ def ndarray_to_mgr(
bp = BlockPlacement(slice(len(columns)))
nb = new_block_2d(values, placement=bp, refs=refs)
block_values = [nb]
elif dtype is None and values.dtype.kind == "U" and using_pyarrow_string_dtype():
elif dtype is None and values.dtype.kind == "U" and using_string_dtype():
dtype = StringDtype(storage="pyarrow_numpy")

obj_columns = list(values)
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/feather_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
Any,
)

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -120,7 +120,7 @@ def read_feather(
with get_handle(
path, "rb", storage_options=storage_options, is_text=False
) as handles:
if dtype_backend is lib.no_default and not using_pyarrow_string_dtype():
if dtype_backend is lib.no_default and not using_string_dtype():
return feather.read_feather(
handles.handle, columns=columns, use_threads=bool(use_threads)
)
Expand All @@ -137,7 +137,7 @@ def read_feather(
elif dtype_backend == "pyarrow":
return pa_table.to_pandas(types_mapper=pd.ArrowDtype)

elif using_pyarrow_string_dtype():
elif using_string_dtype():
return pa_table.to_pandas(types_mapper=arrow_string_types_mapper())
else:
raise NotImplementedError
4 changes: 2 additions & 2 deletions pandas/io/orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
Literal,
)

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -127,7 +127,7 @@ def read_orc(
df = pa_table.to_pandas(types_mapper=mapping.get)
return df
else:
if using_pyarrow_string_dtype():
if using_string_dtype():
types_mapper = arrow_string_types_mapper()
else:
types_mapper = None
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import warnings
from warnings import catch_warnings

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype
from pandas._config.config import _get_option

from pandas._libs import lib
Expand Down Expand Up @@ -257,7 +257,7 @@ def read(
to_pandas_kwargs["types_mapper"] = mapping.get
elif dtype_backend == "pyarrow":
to_pandas_kwargs["types_mapper"] = pd.ArrowDtype # type: ignore[assignment]
elif using_pyarrow_string_dtype():
elif using_string_dtype():
to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper()

manager = _get_option("mode.data_manager", silent=True)
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/parsers/arrow_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import TYPE_CHECKING
import warnings

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -295,7 +295,7 @@ def read(self) -> DataFrame:
dtype_mapping = _arrow_dtype_mapping()
dtype_mapping[pa.null()] = pd.Int64Dtype()
frame = table.to_pandas(types_mapper=dtype_mapping.get)
elif using_pyarrow_string_dtype():
elif using_string_dtype():
frame = table.to_pandas(types_mapper=arrow_string_types_mapper())

else:
Expand Down
10 changes: 5 additions & 5 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
config,
get_option,
using_copy_on_write,
using_pyarrow_string_dtype,
using_string_dtype,
)

from pandas._libs import (
Expand Down Expand Up @@ -3224,7 +3224,7 @@ def read(
index = self.read_index("index", start=start, stop=stop)
values = self.read_array("values", start=start, stop=stop)
result = Series(values, index=index, name=self.name, copy=False)
if using_pyarrow_string_dtype() and is_string_array(values, skipna=True):
if using_string_dtype() and is_string_array(values, skipna=True):
result = result.astype("string[pyarrow_numpy]")
return result

Expand Down Expand Up @@ -3293,7 +3293,7 @@ def read(

columns = items[items.get_indexer(blk_items)]
df = DataFrame(values.T, columns=columns, index=axes[1], copy=False)
if using_pyarrow_string_dtype() and is_string_array(values, skipna=True):
if using_string_dtype() and is_string_array(values, skipna=True):
df = df.astype("string[pyarrow_numpy]")
dfs.append(df)

Expand Down Expand Up @@ -4679,9 +4679,9 @@ def read(
else:
# Categorical
df = DataFrame._from_arrays([values], columns=cols_, index=index_)
if not (using_pyarrow_string_dtype() and values.dtype.kind == "O"):
if not (using_string_dtype() and values.dtype.kind == "O"):
assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)
if using_pyarrow_string_dtype() and is_string_array(
if using_string_dtype() and is_string_array(
values, # type: ignore[arg-type]
skipna=True,
):
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

import numpy as np

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -2215,7 +2215,7 @@ def read_table(
from pandas.io._util import _arrow_dtype_mapping

mapping = _arrow_dtype_mapping().get
elif using_pyarrow_string_dtype():
elif using_string_dtype():
from pandas.io._util import arrow_string_types_mapper

arrow_string_types_mapper()
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arithmetic/test_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

import pandas.util._test_decorators as td

Expand Down Expand Up @@ -303,7 +303,7 @@ def test_iadd_string(self):
index += "_x"
assert "a_x" in index

@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="add doesn't work")
@pytest.mark.xfail(using_string_dtype(), reason="add doesn't work")
def test_add(self):
index = pd.Index([str(i) for i in range(10)])
expected = pd.Index(index.values * 2)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas.core.dtypes.common import (
is_float_dtype,
Expand Down Expand Up @@ -449,7 +449,7 @@ def test_constructor_str_unknown(self):
with pytest.raises(ValueError, match="Unknown dtype"):
Categorical([1, 2], dtype="foo")

@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="Can't be NumPy strings")
@pytest.mark.xfail(using_string_dtype(), reason="Can't be NumPy strings")
def test_constructor_np_strs(self):
# GH#31499 Hashtable.map_locations needs to work on np.str_ objects
cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/categorical/test_repr.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas import (
Categorical,
Expand Down Expand Up @@ -78,7 +78,7 @@ def test_print_none_width(self):
assert exp == repr(a)

@pytest.mark.skipif(
using_pyarrow_string_dtype(),
using_string_dtype(),
reason="Change once infer_string is set to True by default",
)
def test_unicode_print(self):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/base/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas.compat import PYPY

Expand Down Expand Up @@ -83,7 +83,7 @@ def test_ndarray_compat_properties(index_or_series_obj):


@pytest.mark.skipif(
PYPY or using_pyarrow_string_dtype(),
PYPY or using_string_dtype(),
reason="not relevant for PyPy doesn't work properly for arrow strings",
)
def test_memory_usage(index_or_series_memory_obj):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/base/test_unique.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

import pandas as pd
import pandas._testing as tm
Expand Down Expand Up @@ -100,7 +100,7 @@ def test_nunique_null(null_obj, index_or_series_obj):


@pytest.mark.single_cpu
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="decoding fails")
@pytest.mark.xfail(using_string_dtype(), reason="decoding fails")
def test_unique_bad_unicode(index_or_series):
# regression test for #34550
uval = "\ud83d" # smiley emoji
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/base/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas.core.dtypes.common import is_string_dtype

Expand Down Expand Up @@ -37,7 +37,7 @@ def _get_expected_exception(
else:
result = self.frame_scalar_exc

if using_pyarrow_string_dtype() and result is not None:
if using_string_dtype() and result is not None:
import pyarrow as pa

result = ( # type: ignore[assignment]
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

import pandas as pd
from pandas import Categorical
Expand Down Expand Up @@ -103,7 +103,7 @@ def test_contains(self, data, data_missing):
continue
assert na_value_obj not in data
# this section suffers from super method
if not using_pyarrow_string_dtype():
if not using_string_dtype():
assert na_value_obj in data_missing

def test_empty(self, dtype):
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/frame/constructors/test_from_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas import (
DataFrame,
Expand Down Expand Up @@ -44,9 +44,7 @@ def test_constructor_single_row(self):
)
tm.assert_frame_equal(result, expected)

@pytest.mark.skipif(
using_pyarrow_string_dtype(), reason="columns inferring logic broken"
)
@pytest.mark.skipif(using_string_dtype(), reason="columns inferring logic broken")
def test_constructor_list_of_series(self):
data = [
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
Expand Down
Loading

0 comments on commit a86ea93

Please sign in to comment.