Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF (string dtype): rename using_pyarrow_string_dtype to using_string_dtype #59320

Merged
merged 1 commit into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@
from pandas._config.display import detect_console_encoding


def using_pyarrow_string_dtype() -> bool:
def using_string_dtype() -> bool:
_mode_options = _global_config["future"]
return _mode_options["infer_string"]
4 changes: 2 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ from cython cimport (
floating,
)

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs.missing import check_na_tuples_nonequal

Expand Down Expand Up @@ -2699,7 +2699,7 @@ def maybe_convert_objects(ndarray[object] objects,
seen.object_ = True

elif seen.str_:
if using_pyarrow_string_dtype() and is_string_array(objects, skipna=True):
if using_string_dtype() and is_string_array(objects, skipna=True):
from pandas.core.arrays.string_ import StringDtype

dtype = StringDtype(storage="pyarrow_numpy")
Expand Down
10 changes: 3 additions & 7 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import numpy as np
from numpy import ma

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import lib
from pandas._libs.tslibs import (
Expand Down Expand Up @@ -571,11 +571,7 @@ def sanitize_array(
if not is_list_like(data):
if index is None:
raise ValueError("index must be specified when data is not list-like")
if (
isinstance(data, str)
and using_pyarrow_string_dtype()
and original_dtype is None
):
if isinstance(data, str) and using_string_dtype() and original_dtype is None:
from pandas.core.arrays.string_ import StringDtype

dtype = StringDtype("pyarrow_numpy")
Expand Down Expand Up @@ -609,7 +605,7 @@ def sanitize_array(
subarr = data
if data.dtype == object and infer_object:
subarr = maybe_infer_to_datetimelike(data)
elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
elif data.dtype.kind == "U" and using_string_dtype():
from pandas.core.arrays.string_ import StringDtype

dtype = StringDtype(storage="pyarrow_numpy")
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import numpy as np

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import (
Interval,
Expand Down Expand Up @@ -798,7 +798,7 @@ def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:
# coming out as np.str_!

dtype = _dtype_obj
if using_pyarrow_string_dtype():
if using_string_dtype():
from pandas.core.arrays.string_ import StringDtype

dtype = StringDtype(storage="pyarrow_numpy")
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import numpy as np
from numpy import ma

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import lib

Expand Down Expand Up @@ -301,7 +301,7 @@ def ndarray_to_mgr(
bp = BlockPlacement(slice(len(columns)))
nb = new_block_2d(values, placement=bp, refs=refs)
block_values = [nb]
elif dtype is None and values.dtype.kind == "U" and using_pyarrow_string_dtype():
elif dtype is None and values.dtype.kind == "U" and using_string_dtype():
dtype = StringDtype(storage="pyarrow_numpy")

obj_columns = list(values)
Expand Down
6 changes: 3 additions & 3 deletions pandas/io/feather_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
)
import warnings

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -131,7 +131,7 @@ def read_feather(
with get_handle(
path, "rb", storage_options=storage_options, is_text=False
) as handles:
if dtype_backend is lib.no_default and not using_pyarrow_string_dtype():
if dtype_backend is lib.no_default and not using_string_dtype():
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
Expand All @@ -155,7 +155,7 @@ def read_feather(
elif dtype_backend == "pyarrow":
return pa_table.to_pandas(types_mapper=pd.ArrowDtype)

elif using_pyarrow_string_dtype():
elif using_string_dtype():
return pa_table.to_pandas(types_mapper=arrow_string_types_mapper())
else:
raise NotImplementedError
4 changes: 2 additions & 2 deletions pandas/io/orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
Literal,
)

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -136,7 +136,7 @@ def read_orc(
df = pa_table.to_pandas(types_mapper=mapping.get)
return df
else:
if using_pyarrow_string_dtype():
if using_string_dtype():
types_mapper = arrow_string_types_mapper()
else:
types_mapper = None
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
filterwarnings,
)

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -257,7 +257,7 @@ def read(
to_pandas_kwargs["types_mapper"] = mapping.get
elif dtype_backend == "pyarrow":
to_pandas_kwargs["types_mapper"] = pd.ArrowDtype # type: ignore[assignment]
elif using_pyarrow_string_dtype():
elif using_string_dtype():
to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper()

path_or_handle, handles, filesystem = _get_path_or_handle(
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/parsers/arrow_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import TYPE_CHECKING
import warnings

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -301,7 +301,7 @@ def read(self) -> DataFrame:
dtype_mapping = _arrow_dtype_mapping()
dtype_mapping[pa.null()] = pd.Int64Dtype()
frame = table.to_pandas(types_mapper=dtype_mapping.get)
elif using_pyarrow_string_dtype():
elif using_string_dtype():
frame = table.to_pandas(types_mapper=arrow_string_types_mapper())

else:
Expand Down
10 changes: 5 additions & 5 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from pandas._config import (
config,
get_option,
using_pyarrow_string_dtype,
using_string_dtype,
)

from pandas._libs import (
Expand Down Expand Up @@ -3294,7 +3294,7 @@ def read(
index = self.read_index("index", start=start, stop=stop)
values = self.read_array("values", start=start, stop=stop)
result = Series(values, index=index, name=self.name, copy=False)
if using_pyarrow_string_dtype() and is_string_array(values, skipna=True):
if using_string_dtype() and is_string_array(values, skipna=True):
result = result.astype("string[pyarrow_numpy]")
return result

Expand Down Expand Up @@ -3363,7 +3363,7 @@ def read(

columns = items[items.get_indexer(blk_items)]
df = DataFrame(values.T, columns=columns, index=axes[1], copy=False)
if using_pyarrow_string_dtype() and is_string_array(values, skipna=True):
if using_string_dtype() and is_string_array(values, skipna=True):
df = df.astype("string[pyarrow_numpy]")
dfs.append(df)

Expand Down Expand Up @@ -4735,9 +4735,9 @@ def read(
else:
# Categorical
df = DataFrame._from_arrays([values], columns=cols_, index=index_)
if not (using_pyarrow_string_dtype() and values.dtype.kind == "O"):
if not (using_string_dtype() and values.dtype.kind == "O"):
assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)
if using_pyarrow_string_dtype() and is_string_array(
if using_string_dtype() and is_string_array(
values, # type: ignore[arg-type]
skipna=True,
):
Expand Down
4 changes: 2 additions & 2 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

import numpy as np

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas._libs import lib
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -2197,7 +2197,7 @@ def read_table(
from pandas.io._util import _arrow_dtype_mapping

mapping = _arrow_dtype_mapping().get
elif using_pyarrow_string_dtype():
elif using_string_dtype():
from pandas.io._util import arrow_string_types_mapper

arrow_string_types_mapper()
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arithmetic/test_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

import pandas.util._test_decorators as td

Expand Down Expand Up @@ -303,7 +303,7 @@ def test_iadd_string(self):
index += "_x"
assert "a_x" in index

@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="add doesn't work")
@pytest.mark.xfail(using_string_dtype(), reason="add doesn't work")
def test_add(self):
index = pd.Index([str(i) for i in range(10)])
expected = pd.Index(index.values * 2)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas.core.dtypes.common import (
is_float_dtype,
Expand Down Expand Up @@ -442,7 +442,7 @@ def test_constructor_str_unknown(self):
with pytest.raises(ValueError, match="Unknown dtype"):
Categorical([1, 2], dtype="foo")

@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="Can't be NumPy strings")
@pytest.mark.xfail(using_string_dtype(), reason="Can't be NumPy strings")
def test_constructor_np_strs(self):
# GH#31499 Hashtable.map_locations needs to work on np.str_ objects
cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/arrays/categorical/test_repr.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas import (
Categorical,
Expand Down Expand Up @@ -78,7 +78,7 @@ def test_print_none_width(self):
assert exp == repr(a)

@pytest.mark.skipif(
using_pyarrow_string_dtype(),
using_string_dtype(),
reason="Change once infer_string is set to True by default",
)
def test_unicode_print(self):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/base/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas.compat import PYPY

Expand Down Expand Up @@ -82,7 +82,7 @@ def test_ndarray_compat_properties(index_or_series_obj):


@pytest.mark.skipif(
PYPY or using_pyarrow_string_dtype(),
PYPY or using_string_dtype(),
reason="not relevant for PyPy doesn't work properly for arrow strings",
)
def test_memory_usage(index_or_series_memory_obj):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/base/test_unique.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

import pandas as pd
import pandas._testing as tm
Expand Down Expand Up @@ -100,7 +100,7 @@ def test_nunique_null(null_obj, index_or_series_obj):


@pytest.mark.single_cpu
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="decoding fails")
@pytest.mark.xfail(using_string_dtype(), reason="decoding fails")
def test_unique_bad_unicode(index_or_series):
# regression test for #34550
uval = "\ud83d" # smiley emoji
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/base/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas.core.dtypes.common import is_string_dtype

Expand Down Expand Up @@ -37,7 +37,7 @@ def _get_expected_exception(
else:
result = self.frame_scalar_exc

if using_pyarrow_string_dtype() and result is not None:
if using_string_dtype() and result is not None:
import pyarrow as pa

result = ( # type: ignore[assignment]
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

import pandas as pd
from pandas import Categorical
Expand Down Expand Up @@ -99,7 +99,7 @@ def test_contains(self, data, data_missing):
continue
assert na_value_obj not in data
# this section suffers from super method
if not using_pyarrow_string_dtype():
if not using_string_dtype():
assert na_value_obj in data_missing

def test_empty(self, dtype):
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/frame/constructors/test_from_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas import (
DataFrame,
Expand Down Expand Up @@ -44,9 +44,7 @@ def test_constructor_single_row(self):
)
tm.assert_frame_equal(result, expected)

@pytest.mark.skipif(
using_pyarrow_string_dtype(), reason="columns inferring logic broken"
)
@pytest.mark.skipif(using_string_dtype(), reason="columns inferring logic broken")
def test_constructor_list_of_series(self):
data = [
OrderedDict([["a", 1.5], ["b", 3.0], ["c", 4.0]]),
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/constructors/test_from_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np
import pytest

from pandas._config import using_pyarrow_string_dtype
from pandas._config import using_string_dtype

from pandas.compat import is_platform_little_endian

Expand Down Expand Up @@ -58,7 +58,7 @@ def test_from_records_with_datetimes(self):
tm.assert_frame_equal(result, expected)

@pytest.mark.skipif(
using_pyarrow_string_dtype(), reason="dtype checking logic doesn't work"
using_string_dtype(), reason="dtype checking logic doesn't work"
)
def test_from_records_sequencelike(self):
df = DataFrame(
Expand Down
Loading
Loading