@@ -2385,6 +2385,11 @@ def extract(
23852385 2 NaN
23862386 dtype: object
23872387 """
2388+ from pandas import (
2389+ DataFrame ,
2390+ array as pd_array ,
2391+ )
2392+
23882393 if not isinstance (expand , bool ):
23892394 raise ValueError ("expand must be True or False" )
23902395
@@ -2396,7 +2401,40 @@ def extract(
23962401 raise ValueError ("only one regex group is supported with Index" )
23972402
23982403 # TODO: dispatch
2399- return str_extract (self , pat , flags , expand = expand )
2404+
2405+ obj = self ._data
2406+ result_dtype = _result_dtype (obj )
2407+
2408+ returns_df = regex .groups > 1 or expand
2409+
2410+ if returns_df :
2411+ name = None
2412+ columns = _get_group_names (regex )
2413+
2414+ if obj .array .size == 0 :
2415+ result = DataFrame (columns = columns , dtype = result_dtype )
2416+
2417+ else :
2418+ result_list = _str_extract (
2419+ obj .array , pat , flags = flags , expand = returns_df
2420+ )
2421+
2422+ result_index : Index | None
2423+ if isinstance (obj , ABCSeries ):
2424+ result_index = obj .index
2425+ else :
2426+ result_index = None
2427+
2428+ result = DataFrame (
2429+ result_list , columns = columns , index = result_index , dtype = result_dtype
2430+ )
2431+
2432+ else :
2433+ name = _get_single_group_name (regex )
2434+ result_arr = _str_extract (obj .array , pat , flags = flags , expand = returns_df )
2435+ # not dispatching, so we have to reconstruct here.
2436+ result = pd_array (result_arr , dtype = result_dtype )
2437+ return self ._wrap_result (result , name = name )
24002438
24012439 @forbid_nonstring_types (["bytes" ])
24022440 def extractall (self , pat , flags = 0 ):
@@ -3110,45 +3148,6 @@ def f(x):
31103148 return np .array ([f (val )[0 ] for val in np .asarray (arr )], dtype = object )
31113149
31123150
3113- def str_extract (accessor : StringMethods , pat : str , flags : int = 0 , expand : bool = True ):
3114- from pandas import (
3115- DataFrame ,
3116- array as pd_array ,
3117- )
3118-
3119- obj = accessor ._data
3120- result_dtype = _result_dtype (obj )
3121- regex = re .compile (pat , flags = flags )
3122- returns_df = regex .groups > 1 or expand
3123-
3124- if returns_df :
3125- name = None
3126- columns = _get_group_names (regex )
3127-
3128- if obj .array .size == 0 :
3129- result = DataFrame (columns = columns , dtype = result_dtype )
3130-
3131- else :
3132- result_list = _str_extract (obj .array , pat , flags = flags , expand = returns_df )
3133-
3134- result_index : Index | None
3135- if isinstance (obj , ABCSeries ):
3136- result_index = obj .index
3137- else :
3138- result_index = None
3139-
3140- result = DataFrame (
3141- result_list , columns = columns , index = result_index , dtype = result_dtype
3142- )
3143-
3144- else :
3145- name = _get_single_group_name (regex )
3146- result_arr = _str_extract (obj .array , pat , flags = flags , expand = returns_df )
3147- # not dispatching, so we have to reconstruct here.
3148- result = pd_array (result_arr , dtype = result_dtype )
3149- return accessor ._wrap_result (result , name = name )
3150-
3151-
31523151def str_extractall (arr , pat , flags = 0 ):
31533152 regex = re .compile (pat , flags = flags )
31543153 # the regex must contain capture groups.
0 commit comments