Skip to content

Commit

Permalink
Deprecate current libcudf nvtext minhash functions
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Oct 23, 2024
1 parent e7653a7 commit dc29a76
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 9 deletions.
4 changes: 2 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,8 @@ ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binary
ConfigureBench(TEXT_BENCH text/ngrams.cpp text/subword.cpp)

ConfigureNVBench(
TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/minhash.cpp
text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp text/word_minhash.cpp
TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/normalize.cpp
text/replace.cpp text/tokenize.cpp text/vocab.cpp
)

# ##################################################################################################
Expand Down
24 changes: 18 additions & 6 deletions cpp/include/nvtext/minhash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ namespace CUDF_EXPORT nvtext {
*
* This function uses MurmurHash3_x86_32 for the hash algorithm.
*
* @deprecated Deprecated in 24.12
*
* @throw std::invalid_argument if the width < 2
*
* @param input Strings column to compute minhash
Expand All @@ -51,7 +53,7 @@ namespace CUDF_EXPORT nvtext {
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Minhash values for each string in input
*/
std::unique_ptr<cudf::column> minhash(
[[deprecated]] std::unique_ptr<cudf::column> minhash(
cudf::strings_column_view const& input,
cudf::numeric_scalar<uint32_t> seed = 0,
cudf::size_type width = 4,
Expand All @@ -71,6 +73,8 @@ std::unique_ptr<cudf::column> minhash(
*
* Any null row entries result in corresponding null output rows.
*
* @deprecated Deprecated in 24.12 - to be replaced in a future release
*
* @throw std::invalid_argument if the width < 2
* @throw std::invalid_argument if seeds is empty
* @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit
Expand All @@ -83,7 +87,7 @@ std::unique_ptr<cudf::column> minhash(
* @param mr Device memory resource used to allocate the returned column's device memory
* @return List column of minhash values for each string per seed
*/
std::unique_ptr<cudf::column> minhash(
[[deprecated]] std::unique_ptr<cudf::column> minhash(
cudf::strings_column_view const& input,
cudf::device_span<uint32_t const> seeds,
cudf::size_type width = 4,
Expand All @@ -102,6 +106,8 @@ std::unique_ptr<cudf::column> minhash(
* The hash function returns 2 uint64 values but only the first value
* is used with the minhash calculation.
*
* @deprecated Deprecated in 24.12
*
* @throw std::invalid_argument if the width < 2
*
* @param input Strings column to compute minhash
Expand All @@ -112,7 +118,7 @@ std::unique_ptr<cudf::column> minhash(
* @param mr Device memory resource used to allocate the returned column's device memory
* @return Minhash values as UINT64 for each string in input
*/
std::unique_ptr<cudf::column> minhash64(
[[deprecated]] std::unique_ptr<cudf::column> minhash64(
cudf::strings_column_view const& input,
cudf::numeric_scalar<uint64_t> seed = 0,
cudf::size_type width = 4,
Expand All @@ -132,6 +138,8 @@ std::unique_ptr<cudf::column> minhash64(
*
* Any null row entries result in corresponding null output rows.
*
* @deprecated Deprecated in 24.12 - to be replaced in a future release
*
* @throw std::invalid_argument if the width < 2
* @throw std::invalid_argument if seeds is empty
* @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit
Expand All @@ -144,7 +152,7 @@ std::unique_ptr<cudf::column> minhash64(
* @param mr Device memory resource used to allocate the returned column's device memory
* @return List column of minhash values for each string per seed
*/
std::unique_ptr<cudf::column> minhash64(
[[deprecated]] std::unique_ptr<cudf::column> minhash64(
cudf::strings_column_view const& input,
cudf::device_span<uint64_t const> seeds,
cudf::size_type width = 4,
Expand All @@ -164,6 +172,8 @@ std::unique_ptr<cudf::column> minhash64(
*
* Any null row entries result in corresponding null output rows.
*
* @deprecated Deprecated in 24.12
*
* @throw std::invalid_argument if seeds is empty
* @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit
*
Expand All @@ -173,7 +183,7 @@ std::unique_ptr<cudf::column> minhash64(
* @param mr Device memory resource used to allocate the returned column's device memory
* @return List column of minhash values for each string per seed
*/
std::unique_ptr<cudf::column> word_minhash(
[[deprecated]] std::unique_ptr<cudf::column> word_minhash(
cudf::lists_column_view const& input,
cudf::device_span<uint32_t const> seeds,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
Expand All @@ -193,6 +203,8 @@ std::unique_ptr<cudf::column> word_minhash(
*
* Any null row entries result in corresponding null output rows.
*
* @deprecated Deprecated in 24.12
*
* @throw std::invalid_argument if seeds is empty
* @throw std::overflow_error if `seeds.size() * input.size()` exceeds the column size limit
*
Expand All @@ -202,7 +214,7 @@ std::unique_ptr<cudf::column> word_minhash(
* @param mr Device memory resource used to allocate the returned column's device memory
* @return List column of minhash values for each string per seed
*/
std::unique_ptr<cudf::column> word_minhash64(
[[deprecated]] std::unique_ptr<cudf::column> word_minhash64(
cudf::lists_column_view const& input,
cudf::device_span<uint64_t const> seeds,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
Expand Down
1 change: 0 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,6 @@ ConfigureTest(
text/bpe_tests.cpp
text/edit_distance_tests.cpp
text/jaccard_tests.cpp
text/minhash_tests.cpp
text/ngrams_tests.cpp
text/ngrams_tokenize_tests.cpp
text/normalize_tests.cpp
Expand Down

0 comments on commit dc29a76

Please sign in to comment.