99import numpy as np
1010from numpy .random import randint
1111
12- from pandas .compat import range , u
12+ from pandas .compat import range , u , PY3
1313import pandas .compat as compat
1414from pandas import Index , Series , DataFrame , isna , MultiIndex , notna , concat
1515
@@ -118,6 +118,55 @@ def any_string_method(request):
118118 return request .param
119119
120120
121+ # subset of the full set from pandas/conftest.py
122+ _any_allowed_skipna_inferred_dtype = [
123+ ('string' , ['a' , np .nan , 'c' ]),
124+ ('unicode' if not PY3 else 'string' , [u ('a' ), np .nan , u ('c' )]),
125+ ('bytes' if PY3 else 'string' , [b'a' , np .nan , b'c' ]),
126+ ('empty' , [np .nan , np .nan , np .nan ]),
127+ ('empty' , []),
128+ ('mixed-integer' , ['a' , np .nan , 2 ])
129+ ]
130+ ids , _ = zip (* _any_allowed_skipna_inferred_dtype ) # use inferred type as id
131+
132+
133+ @pytest .fixture (params = _any_allowed_skipna_inferred_dtype , ids = ids )
134+ def any_allowed_skipna_inferred_dtype (request ):
135+ """
136+ Fixture for all (inferred) dtypes allowed in StringMethods.__init__
137+
138+ The covered (inferred) types are:
139+ * 'string'
140+ * 'unicode' (if PY2)
141+ * 'empty'
142+ * 'bytes' (if PY3)
143+ * 'mixed'
144+ * 'mixed-integer'
145+
146+ Returns
147+ -------
148+ inferred_dtype : str
149+ The string for the inferred dtype from _libs.lib.infer_dtype
150+ values : np.ndarray
151+ An array of object dtype that will be inferred to have
152+ `inferred_dtype`
153+
154+ Examples
155+ --------
156+ >>> import pandas._libs.lib as lib
157+ >>>
158+ >>> def test_something(any_allowed_skipna_inferred_dtype):
159+ ... inferred_dtype, values = any_skipna_inferred_dtype
160+ ... # will pass
161+ ... assert lib.infer_dtype(values, skipna=True) == inferred_dtype
162+ """
163+ inferred_dtype , values = request .param
164+ values = np .array (values , dtype = object ) # object dtype to avoid casting
165+
166+ # correctness of inference tested in tests/dtypes/test_inference.py
167+ return inferred_dtype , values
168+
169+
121170class TestStringMethods (object ):
122171
123172 def test_api (self ):
@@ -126,11 +175,103 @@ def test_api(self):
126175 assert Series .str is strings .StringMethods
127176 assert isinstance (Series (['' ]).str , strings .StringMethods )
128177
129- # GH 9184
130- invalid = Series ([1 ])
131- with pytest .raises (AttributeError , match = "only use .str accessor" ):
132- invalid .str
133- assert not hasattr (invalid , 'str' )
178+ @pytest .mark .parametrize ('dtype' , [object , 'category' ])
179+ @pytest .mark .parametrize ('box' , [Series , Index ])
180+ def test_api_per_dtype (self , box , dtype , any_skipna_inferred_dtype ):
181+ # one instance of parametrized fixture
182+ inferred_dtype , values = any_skipna_inferred_dtype
183+
184+ t = box (values , dtype = dtype ) # explicit dtype to avoid casting
185+
186+ # TODO: get rid of these xfails
187+ if dtype == 'category' and inferred_dtype in ['period' , 'interval' ]:
188+ pytest .xfail (reason = 'Conversion to numpy array fails because '
189+ 'the ._values-attribute is not a numpy array for '
190+ 'PeriodArray/IntervalArray; see GH 23553' )
191+ if box == Index and inferred_dtype in ['empty' , 'bytes' ]:
192+ pytest .xfail (reason = 'Raising too restrictively; '
193+ 'solved by GH 23167' )
194+ if (box == Index and dtype == object
195+ and inferred_dtype in ['boolean' , 'date' , 'time' ]):
196+ pytest .xfail (reason = 'Inferring incorrectly because of NaNs; '
197+ 'solved by GH 23167' )
198+ if (box == Series
199+ and (dtype == object and inferred_dtype not in [
200+ 'string' , 'unicode' , 'empty' ,
201+ 'bytes' , 'mixed' , 'mixed-integer' ])
202+ or (dtype == 'category'
203+ and inferred_dtype in ['decimal' , 'boolean' , 'time' ])):
204+ pytest .xfail (reason = 'Not raising correctly; solved by GH 23167' )
205+
206+ types_passing_constructor = ['string' , 'unicode' , 'empty' ,
207+ 'bytes' , 'mixed' , 'mixed-integer' ]
208+ if inferred_dtype in types_passing_constructor :
209+ # GH 6106
210+ assert isinstance (t .str , strings .StringMethods )
211+ else :
212+ # GH 9184, GH 23011, GH 23163
213+ with pytest .raises (AttributeError , match = 'Can only use .str '
214+ 'accessor with string values.*' ):
215+ t .str
216+ assert not hasattr (t , 'str' )
217+
218+ @pytest .mark .parametrize ('dtype' , [object , 'category' ])
219+ @pytest .mark .parametrize ('box' , [Series , Index ])
220+ def test_api_per_method (self , box , dtype ,
221+ any_allowed_skipna_inferred_dtype ,
222+ any_string_method ):
223+ # this test does not check correctness of the different methods,
224+ # just that the methods work on the specified (inferred) dtypes,
225+ # and raise on all others
226+
227+ # one instance of each parametrized fixture
228+ inferred_dtype , values = any_allowed_skipna_inferred_dtype
229+ method_name , args , kwargs = any_string_method
230+
231+ # TODO: get rid of these xfails
232+ if (method_name not in ['encode' , 'decode' , 'len' ]
233+ and inferred_dtype == 'bytes' ):
234+ pytest .xfail (reason = 'Not raising for "bytes", see GH 23011;'
235+ 'Also: malformed method names, see GH 23551; '
236+ 'solved by GH 23167' )
237+ if (method_name == 'cat'
238+ and inferred_dtype in ['mixed' , 'mixed-integer' ]):
239+ pytest .xfail (reason = 'Bad error message; should raise better; '
240+ 'solved by GH 23167' )
241+ if box == Index and inferred_dtype in ['empty' , 'bytes' ]:
242+ pytest .xfail (reason = 'Raising too restrictively; '
243+ 'solved by GH 23167' )
244+ if (box == Index and dtype == object
245+ and inferred_dtype in ['boolean' , 'date' , 'time' ]):
246+ pytest .xfail (reason = 'Inferring incorrectly because of NaNs; '
247+ 'solved by GH 23167' )
248+ if box == Index and dtype == 'category' :
249+ pytest .xfail (reason = 'Broken methods on CategoricalIndex; '
250+ 'see GH 23556' )
251+
252+ t = box (values , dtype = dtype ) # explicit dtype to avoid casting
253+ method = getattr (t .str , method_name )
254+
255+ bytes_allowed = method_name in ['encode' , 'decode' , 'len' ]
256+ # as of v0.23.4, all methods except 'cat' are very lenient with the
257+ # allowed data types, just returning NaN for entries that error.
258+ # This could be changed with an 'errors'-kwarg to the `str`-accessor,
259+ # see discussion in GH 13877
260+ mixed_allowed = method_name not in ['cat' ]
261+
262+ allowed_types = (['string' , 'unicode' , 'empty' ]
263+ + ['bytes' ] * bytes_allowed
264+ + ['mixed' , 'mixed-integer' ] * mixed_allowed )
265+
266+ if inferred_dtype in allowed_types :
267+ method (* args , ** kwargs ) # works!
268+ else :
269+ # GH 23011, GH 23163
270+ msg = ('Cannot use .str.{name} with values of inferred dtype '
271+ '{inferred_dtype!r}.' .format (name = method_name ,
272+ inferred_dtype = inferred_dtype ))
273+ with pytest .raises (TypeError , match = msg ):
274+ method (* args , ** kwargs )
134275
135276 def test_api_for_categorical (self , any_string_method ):
136277 # https://github.com/pandas-dev/pandas/issues/10661
0 commit comments