From 3037f915c52c56a25f3cf0181e5453f0359adc3e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 16 Mar 2022 10:48:23 +0000 Subject: [PATCH 1/3] REGR: only convert at end for Block.replace_list --- pandas/core/internals/blocks.py | 79 ++++++++++----------------------- 1 file changed, 24 insertions(+), 55 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 69f66973d0954..85c965df441b6 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -555,6 +555,8 @@ def replace( to_replace, value, inplace: bool = False, + # conversion is done at the end if we're called from replace_list + convert: bool = True, # mask may be pre-computed if we're called from replace_list mask: npt.NDArray[np.bool_] | None = None, ) -> list[Block]: @@ -573,6 +575,7 @@ def replace( if isinstance(values, Categorical): # TODO: avoid special-casing blk = self if inplace else self.copy() + # TODO: should we pass convert here? moot point if special casing avoided. blk.values._replace(to_replace=to_replace, value=value, inplace=True) return [blk] @@ -593,7 +596,7 @@ def replace( elif self._can_hold_element(value): blk = self if inplace else self.copy() putmask_inplace(blk.values, mask, value) - if not (self.is_object and value is None): + if convert and not (self.is_object and value is None): # if the user *explicitly* gave None, we keep None, otherwise # may downcast to NaN blocks = blk.convert(numeric=False, copy=False) @@ -607,6 +610,7 @@ def replace( to_replace=to_replace, value=value, inplace=True, + convert=convert, mask=mask, ) @@ -620,6 +624,7 @@ def replace( to_replace=to_replace, value=value, inplace=True, + convert=convert, mask=mask[i : i + 1], ) ) @@ -631,8 +636,10 @@ def _replace_regex( to_replace, value, inplace: bool = False, + # conversion is done at the end if we're called from replace_list convert: bool = True, - mask=None, + # mask may be pre-computed if we're called from replace_list + mask: npt.NDArray[np.bool_] | None = None, ) -> list[Block]: """ Replace elements by the given value. @@ -665,7 +672,10 @@ def _replace_regex( replace_regex(new_values, rx, value, mask) block = self.make_block(new_values) - return block.convert(numeric=False, copy=False) + if convert: + return block.convert(numeric=False, copy=False) + else: + return [block] @final def replace_list( @@ -708,13 +718,14 @@ def replace_list( masks = [extract_bool_array(x) for x in masks] # type: ignore[arg-type] rb = [self if inplace else self.copy()] - for i, (src, dest) in enumerate(pairs): + for i, (to_replace, value) in enumerate(pairs): + use_regex = should_use_regex(regex, to_replace) convert = i == src_len # only convert once at the end new_rb: list[Block] = [] - # GH-39338: _replace_coerce can split a block into - # single-column blocks, so track the index so we know - # where to index into the mask + # GH-39338: Block.replace and Block._replace_regex can split a block + # into single-column blocks, so track the index so we know where to + # index into the mask for blk_num, blk in enumerate(rb): if len(rb) == 1: m = masks[i] @@ -723,12 +734,13 @@ def replace_list( assert not isinstance(mib, bool) m = mib[blk_num : blk_num + 1] - result = blk._replace_coerce( - to_replace=src, - value=dest, - mask=m, + func = blk._replace_regex if use_regex else blk.replace + result = func( + to_replace=to_replace, + value=value, inplace=inplace, - regex=regex, + convert=False, + mask=m, ) if convert and blk.is_object and not all(x is None for x in dest_list): # GH#44498 avoid unwanted cast-back @@ -739,49 +751,6 @@ def replace_list( rb = new_rb return rb - @final - def _replace_coerce( - self, - to_replace, - value, - mask: npt.NDArray[np.bool_], - inplace: bool = True, - regex: bool = False, - ) -> list[Block]: - """ - Replace value corresponding to the given boolean array with another - value. - - Parameters - ---------- - to_replace : object or pattern - Scalar to replace or regular expression to match. - value : object - Replacement object. - mask : np.ndarray[bool] - True indicate corresponding element is ignored. - inplace : bool, default True - Perform inplace modification. - regex : bool, default False - If true, perform regular expression substitution. - - Returns - ------- - List[Block] - """ - if should_use_regex(regex, to_replace): - return self._replace_regex( - to_replace, - value, - inplace=inplace, - convert=False, - mask=mask, - ) - else: - return self.replace( - to_replace=to_replace, value=value, inplace=inplace, mask=mask - ) - # --------------------------------------------------------------------- # 2D Methods - Shared by NumpyBlock and NDArrayBackedExtensionBlock # but not ExtensionBlock From 72efb3918d2823008cfe365574fca07942562ed0 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 20 Mar 2022 11:41:13 +0000 Subject: [PATCH 2/3] redo call to _replace_coerce --- pandas/core/internals/blocks.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 102a228575e95..cc2b994a9871e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -718,13 +718,12 @@ def replace_list( rb = [self if inplace else self.copy()] for i, (to_replace, value) in enumerate(pairs): - use_regex = should_use_regex(regex, to_replace) convert = i == src_len # only convert once at the end new_rb: list[Block] = [] - # GH-39338: Block.replace and Block._replace_regex can split a block - # into single-column blocks, so track the index so we know where to - # index into the mask + # GH-39338: _replace_coerce can split a block into + # single-column blocks, so track the index so we know + # where to index into the mask for blk_num, blk in enumerate(rb): if len(rb) == 1: m = masks[i] @@ -733,13 +732,12 @@ def replace_list( assert not isinstance(mib, bool) m = mib[blk_num : blk_num + 1] - func = blk._replace_regex if use_regex else blk.replace - result = func( + result = blk._replace_coerce( to_replace=to_replace, value=value, - inplace=inplace, - convert=False, mask=m, + inplace=inplace, + regex=regex, ) if convert and blk.is_object and not all(x is None for x in dest_list): # GH#44498 avoid unwanted cast-back From 563be85da8ae59701d6a13a4518a9ca4e41e8587 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 20 Mar 2022 12:05:31 +0000 Subject: [PATCH 3/3] add convert=False to call to self.replace --- pandas/core/internals/blocks.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index cc2b994a9871e..0dda83f4e9c3f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -795,7 +795,11 @@ def _replace_coerce( putmask_inplace(nb.values, mask, value) return [nb] return self.replace( - to_replace=to_replace, value=value, inplace=inplace, mask=mask + to_replace=to_replace, + value=value, + inplace=inplace, + convert=False, + mask=mask, ) # ---------------------------------------------------------------------