7575
7676from pandas .io .formats import printing
7777
78+ if HAS_PYARROW :
79+ import pyarrow as pa
80+ import pyarrow .compute as pc
81+
7882if TYPE_CHECKING :
7983 from collections .abc import MutableMapping
8084
@@ -128,10 +132,10 @@ class StringDtype(StorageExtensionDtype):
128132 Examples
129133 --------
130134 >>> pd.StringDtype()
131- <StringDtype(storage='python', na_value=<NA>)>
132-
133- >>> pd.StringDtype(storage="pyarrow")
134135 <StringDtype(na_value=<NA>)>
136+
137+ >>> pd.StringDtype(storage="python")
138+ <StringDtype(storage='python', na_value=<NA>)>
135139 """
136140
137141 @property
@@ -156,16 +160,11 @@ def __init__(
156160 ) -> None :
157161 # infer defaults
158162 if storage is None :
159- if na_value is not libmissing .NA :
160- storage = get_option ("mode.string_storage" )
161- if storage == "auto" :
162- if HAS_PYARROW :
163- storage = "pyarrow"
164- else :
165- storage = "python"
166- else :
167- storage = get_option ("mode.string_storage" )
168- if storage == "auto" :
163+ storage = get_option ("mode.string_storage" )
164+ if storage == "auto" :
165+ if HAS_PYARROW :
166+ storage = "pyarrow"
167+ else :
169168 storage = "python"
170169
171170 if storage == "pyarrow_numpy" :
@@ -343,7 +342,15 @@ def __from_arrow__(
343342 Construct StringArray from pyarrow Array/ChunkedArray.
344343 """
345344 if self .storage == "pyarrow" :
346- from pandas .core .arrays .string_arrow import ArrowStringArray
345+ from pandas .core .arrays .string_arrow import (
346+ ArrowStringArray ,
347+ _chk_pyarrow_available ,
348+ )
349+
350+ _chk_pyarrow_available ()
351+
352+ if not pa .types .is_large_string (array .type ):
353+ array = pc .cast (array , pa .large_string ())
347354
348355 return ArrowStringArray (array , dtype = self )
349356
@@ -612,7 +619,7 @@ class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc]
612619 Examples
613620 --------
614621 >>> pd.array(["This is", "some text", None, "data."], dtype="string")
615- <StringArray >
622+ <ArrowStringArray >
616623 ['This is', 'some text', <NA>, 'data.']
617624 Length: 4, dtype: string
618625
@@ -624,15 +631,15 @@ class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc]
624631 ['1', 1]
625632 Length: 2, dtype: object
626633 >>> pd.array(["1", 1], dtype="string")
627- <StringArray >
634+ <ArrowStringArray >
628635 ['1', '1']
629636 Length: 2, dtype: string
630637
631638 However, instantiating StringArrays directly with non-strings will raise an error.
632639
633640 For comparison methods, `StringArray` returns a :class:`pandas.BooleanArray`:
634641
635- >>> pd.array(["a", None, "c"], dtype="string") == "a"
642+ >>> pd.array(["a", None, "c"], dtype="string[python] ") == "a"
636643 <BooleanArray>
637644 [True, <NA>, False]
638645 Length: 3, dtype: boolean
0 commit comments