-
Notifications
You must be signed in to change notification settings - Fork 902
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Migrate string replace.pxd to pylibcudf (#15839)
xref #15162 Change replace.pxd to use pylibcudf APIs. Authors: - Thomas Li (https://github.com/lithomas1) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: #15839
- Loading branch information
Showing
10 changed files
with
362 additions
and
79 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
7 changes: 7 additions & 0 deletions
7
docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
strings | ||
======= | ||
|
||
.. toctree:: | ||
:maxdepth: 1 | ||
|
||
replace |
6 changes: 6 additions & 0 deletions
6
docs/cudf/source/user_guide/api_docs/pylibcudf/strings/replace.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
======= | ||
replace | ||
======= | ||
|
||
.. automodule:: cudf._lib.pylibcudf.strings.replace | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from . cimport capitalize, case, char_types, find | ||
from . cimport capitalize, case, char_types, find, replace |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from . import capitalize, case, char_types, find | ||
from . import capitalize, case, char_types, find, replace |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from cudf._lib.pylibcudf.column cimport Column | ||
from cudf._lib.pylibcudf.libcudf.types cimport size_type | ||
from cudf._lib.pylibcudf.scalar cimport Scalar | ||
|
||
|
||
cpdef Column replace( | ||
Column input, | ||
Scalar target, | ||
Scalar repl, | ||
size_type maxrepl = * | ||
) | ||
cpdef Column replace_multiple( | ||
Column input, | ||
Column target, | ||
Column repl, | ||
size_type maxrepl = * | ||
) | ||
cpdef Column replace_slice( | ||
Column input, | ||
Scalar repl = *, | ||
size_type start = *, | ||
size_type stop = * | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
|
||
from cudf._lib.pylibcudf.column cimport Column | ||
from cudf._lib.pylibcudf.libcudf.column.column cimport column | ||
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar | ||
from cudf._lib.pylibcudf.libcudf.scalar.scalar_factories cimport ( | ||
make_string_scalar as cpp_make_string_scalar, | ||
) | ||
from cudf._lib.pylibcudf.libcudf.strings.replace cimport ( | ||
replace as cpp_replace, | ||
replace_multiple as cpp_replace_multiple, | ||
replace_slice as cpp_replace_slice, | ||
) | ||
from cudf._lib.pylibcudf.libcudf.types cimport size_type | ||
from cudf._lib.pylibcudf.scalar cimport Scalar | ||
|
||
|
||
cpdef Column replace( | ||
Column input, | ||
Scalar target, | ||
Scalar repl, | ||
size_type maxrepl = -1 | ||
): | ||
"""Replaces target string within each string with the specified replacement string. | ||
Null string entries will return null output string entries. | ||
For details, see :cpp:func:`replace`. | ||
Parameters | ||
---------- | ||
input : Column | ||
The input strings | ||
target : Scalar | ||
String to search for in each string. | ||
repl : Scalar | ||
String to replace target with. | ||
maxrepl : size_type, default -1 | ||
Maximum times to replace if target appears multiple times in the input string. | ||
Default of -1 specifies to replace all occurrences of target in each string. | ||
Returns | ||
------- | ||
pylibcudf.Column | ||
New string column with target replaced. | ||
""" | ||
cdef: | ||
unique_ptr[column] c_result | ||
const string_scalar* target_str | ||
const string_scalar* repl_str | ||
|
||
target_str = <string_scalar *>(target.c_obj.get()) | ||
repl_str = <string_scalar *>(repl.c_obj.get()) | ||
|
||
with nogil: | ||
c_result = move(cpp_replace( | ||
input.view(), | ||
target_str[0], | ||
repl_str[0], | ||
maxrepl, | ||
)) | ||
|
||
return Column.from_libcudf(move(c_result)) | ||
|
||
|
||
cpdef Column replace_multiple( | ||
Column input, | ||
Column target, | ||
Column repl, | ||
size_type maxrepl = -1 | ||
): | ||
"""Replaces target string within each string with the specified replacement string. | ||
Null string entries will return null output string entries. | ||
For details, see :cpp:func:`replace_multiple`. | ||
Parameters | ||
---------- | ||
input : Column | ||
The input strings | ||
target : Column | ||
Column containing strings to search for in the input column. | ||
repl : Column | ||
Column containing strings to replace target with. | ||
Each target, when found, will be replaced by the value at the | ||
corresponding index in the repl Column. | ||
Must be of the same length as target. | ||
Returns | ||
------- | ||
pylibcudf.Column | ||
New string column with target replaced. | ||
""" | ||
cdef unique_ptr[column] c_result | ||
|
||
with nogil: | ||
c_result = move(cpp_replace_multiple( | ||
input.view(), | ||
target.view(), | ||
repl.view(), | ||
)) | ||
|
||
return Column.from_libcudf(move(c_result)) | ||
|
||
|
||
cpdef Column replace_slice( | ||
Column input, | ||
# TODO: default scalar values | ||
# https://github.com/rapidsai/cudf/issues/15505 | ||
Scalar repl = None, | ||
size_type start = 0, | ||
size_type stop = -1 | ||
): | ||
"""Replaces each string in the column with the provided repl string | ||
within the [start,stop) character position range. | ||
Null string entries will return null output string entries. | ||
This function can be used to insert a string into specific position | ||
by specifying the same position value for start and stop. | ||
The repl string can be appended to each string by specifying -1 | ||
for both start and stop. | ||
For details, see :cpp:func:`replace_slice`. | ||
Parameters | ||
---------- | ||
input : Column | ||
The input strings | ||
repl : Scalar, default "" | ||
String scalar to replace target with. | ||
start : size_type, default 0 | ||
Start position where repl will be added. | ||
stop : size_type, default -1 | ||
End position (exclusive) to use for replacement. | ||
Returns | ||
------- | ||
pylibcudf.Column | ||
New string column | ||
""" | ||
cdef unique_ptr[column] c_result | ||
|
||
if repl is None: | ||
repl = Scalar.from_libcudf( | ||
cpp_make_string_scalar("".encode()) | ||
) | ||
|
||
cdef const string_scalar* scalar_str = <string_scalar*>(repl.c_obj.get()) | ||
|
||
with nogil: | ||
c_result = move(cpp_replace_slice( | ||
input.view(), | ||
scalar_str[0], | ||
start, | ||
stop | ||
)) | ||
|
||
return Column.from_libcudf(move(c_result)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.