diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index aab0a9b2d49..5fd68bfb26c 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -502,6 +502,7 @@ add_library(
   src/reductions/product.cu
   src/reductions/reductions.cpp
   src/reductions/scan/rank_scan.cu
+  src/reductions/scan/ewm.cu
   src/reductions/scan/scan.cpp
   src/reductions/scan/scan_exclusive.cu
   src/reductions/scan/scan_inclusive.cu
diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index 67705863d41..4bfef9767ca 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -85,8 +85,7 @@ static cudf::string_scalar create_random_input(int32_t num_chars,
 
   // extract the chars from the returned strings column.
   auto input_column_contents = input_column->release();
-  auto chars_column_contents = input_column_contents.children[1]->release();
-  auto chars_buffer          = chars_column_contents.data.release();
+  auto chars_buffer          = input_column_contents.data.release();
 
   // turn the chars in to a string scalar.
   return cudf::string_scalar(std::move(*chars_buffer));
@@ -218,7 +217,7 @@ NVBENCH_BENCH_TYPES(bench_multibyte_split,
 NVBENCH_BENCH_TYPES(bench_multibyte_split, NVBENCH_TYPE_AXES(source_type_list))
   .set_name("multibyte_split_source")
   .set_min_samples(4)
-  .add_int64_axis("strip_delimiters", {1})
+  .add_int64_axis("strip_delimiters", {0, 1})
   .add_int64_axis("delim_size", {1})
   .add_int64_axis("delim_percent", {1})
   .add_int64_power_of_two_axis("size_approx", {15, 30})
diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp
index d458c831f19..3c1023017be 100644
--- a/cpp/include/cudf/aggregation.hpp
+++ b/cpp/include/cudf/aggregation.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -103,6 +103,7 @@ class aggregation {
     NUNIQUE,         ///< count number of unique elements
     NTH_ELEMENT,     ///< get the nth element
     ROW_NUMBER,      ///< get row-number of current index (relative to rolling window)
+    EWMA,            ///< get exponential weighted moving average at current index
     RANK,            ///< get rank of current index
     COLLECT_LIST,    ///< collect values into a list
     COLLECT_SET,     ///< collect values into a list without duplicate entries
@@ -250,6 +251,8 @@ class segmented_reduce_aggregation : public virtual aggregation {
 enum class udf_type : bool { CUDA, PTX };
 /// Type of correlation method.
 enum class correlation_type : int32_t { PEARSON, KENDALL, SPEARMAN };
+/// Type of treatment of EWM input values' first value
+enum class ewm_history : int32_t { INFINITE, FINITE };
 
 /// Factory to create a SUM aggregation
 /// @return A SUM aggregation object
@@ -411,6 +414,42 @@ std::unique_ptr<Base> make_nth_element_aggregation(
 template <typename Base = aggregation>
 std::unique_ptr<Base> make_row_number_aggregation();
 
+/**
+ * @brief Factory to create an EWMA aggregation
+ *
+ * `EWMA` returns a non-nullable column with the same type as the input,
+ * whose values are the exponentially weighted moving average of the input
+ * sequence. Let these values be known as the y_i.
+ *
+ * EWMA aggregations are parameterized by a center of mass (`com`) which
+ * affects the contribution of the previous values (y_0 ... y_{i-1}) in
+ * computing the y_i.
+ *
+ * EWMA aggregations are also parameterized by a history `cudf::ewm_history`.
+ * Special considerations have to be given to the mathematical treatment of
+ * the first value of the input sequence. There are two approaches to this,
+ * one which considers the first value of the sequence to be the exponential
+ * weighted moving average of some infinite history of data, and one which
+ * takes the first value to be the only datapoint known. These assumptions
+ * lead to two different formulas for the y_i. `ewm_history` selects which.
+ *
+ * EWMA aggregations have special null handling. Nulls have two effects. The
+ * first is to propagate forward the last valid value as far as it has been
+ * computed. This could be thought of as the nulls not affecting the average
+ * in any way. The second effect changes the way the y_i are computed. Since
+ * a moving average is conceptually designed to weight contributing values by
+ * their recency, nulls ought to count as valid periods even though they do
+ * not change the average. For example, if the input sequence is {1, NULL, 3}
+ * then when computing y_2 one should weigh y_0 as if it occurs two periods
+ * before y_2 rather than just one.
+ *
+ * @param center_of_mass the center of mass.
+ * @param history which assumption to make about the first value
+ * @return A EWM aggregation object
+ */
+template <typename Base = aggregation>
+std::unique_ptr<Base> make_ewma_aggregation(double const center_of_mass, ewm_history history);
+
 /**
  * @brief Factory to create a RANK aggregation
  *
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index edee83783b8..843414817e3 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -76,6 +76,8 @@ class simple_aggregations_collector {  // Declares the interface for the simple
                                                           class nth_element_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
                                                           class row_number_aggregation const& agg);
+  virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
+                                                          class ewma_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(data_type col_type,
                                                           class rank_aggregation const& agg);
   virtual std::vector<std::unique_ptr<aggregation>> visit(
@@ -141,6 +143,7 @@ class aggregation_finalizer {  // Declares the interface for the finalizer
   virtual void visit(class correlation_aggregation const& agg);
   virtual void visit(class tdigest_aggregation const& agg);
   virtual void visit(class merge_tdigest_aggregation const& agg);
+  virtual void visit(class ewma_aggregation const& agg);
 };
 
 /**
@@ -667,6 +670,40 @@ class row_number_aggregation final : public rolling_aggregation {
   void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
 };
 
+/**
+ * @brief Derived class for specifying an ewma aggregation
+ */
+class ewma_aggregation final : public scan_aggregation {
+ public:
+  double const center_of_mass;
+  cudf::ewm_history history;
+
+  ewma_aggregation(double const center_of_mass, cudf::ewm_history history)
+    : aggregation{EWMA}, center_of_mass{center_of_mass}, history{history}
+  {
+  }
+
+  std::unique_ptr<aggregation> clone() const override
+  {
+    return std::make_unique<ewma_aggregation>(*this);
+  }
+
+  std::vector<std::unique_ptr<aggregation>> get_simple_aggregations(
+    data_type col_type, simple_aggregations_collector& collector) const override
+  {
+    return collector.visit(col_type, *this);
+  }
+
+  bool is_equal(aggregation const& _other) const override
+  {
+    if (!this->aggregation::is_equal(_other)) { return false; }
+    auto const& other = dynamic_cast<ewma_aggregation const&>(_other);
+    return this->center_of_mass == other.center_of_mass and this->history == other.history;
+  }
+
+  void finalize(aggregation_finalizer& finalizer) const override { finalizer.visit(*this); }
+};
+
 /**
  * @brief Derived class for specifying a rank aggregation
  */
@@ -1336,6 +1373,11 @@ struct target_type_impl<Source, aggregation::ROW_NUMBER> {
   using type = size_type;
 };
 
+template <typename Source>
+struct target_type_impl<Source, aggregation::EWMA> {
+  using type = double;
+};
+
 // Always use size_type accumulator for RANK
 template <typename Source>
 struct target_type_impl<Source, aggregation::RANK> {
@@ -1536,6 +1578,8 @@ CUDF_HOST_DEVICE inline decltype(auto) aggregation_dispatcher(aggregation::Kind
       return f.template operator()<aggregation::TDIGEST>(std::forward<Ts>(args)...);
     case aggregation::MERGE_TDIGEST:
       return f.template operator()<aggregation::MERGE_TDIGEST>(std::forward<Ts>(args)...);
+    case aggregation::EWMA:
+      return f.template operator()<aggregation::EWMA>(std::forward<Ts>(args)...);
     default: {
 #ifndef __CUDA_ARCH__
       CUDF_FAIL("Unsupported aggregation.");
diff --git a/cpp/src/aggregation/aggregation.cpp b/cpp/src/aggregation/aggregation.cpp
index adee9147740..5422304c5cb 100644
--- a/cpp/src/aggregation/aggregation.cpp
+++ b/cpp/src/aggregation/aggregation.cpp
@@ -154,6 +154,12 @@ std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
   return visit(col_type, static_cast<aggregation const&>(agg));
 }
 
+std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
+  data_type col_type, ewma_aggregation const& agg)
+{
+  return visit(col_type, static_cast<aggregation const&>(agg));
+}
+
 std::vector<std::unique_ptr<aggregation>> simple_aggregations_collector::visit(
   data_type col_type, rank_aggregation const& agg)
 {
@@ -333,6 +339,11 @@ void aggregation_finalizer::visit(row_number_aggregation const& agg)
   visit(static_cast<aggregation const&>(agg));
 }
 
+void aggregation_finalizer::visit(ewma_aggregation const& agg)
+{
+  visit(static_cast<aggregation const&>(agg));
+}
+
 void aggregation_finalizer::visit(rank_aggregation const& agg)
 {
   visit(static_cast<aggregation const&>(agg));
@@ -665,6 +676,17 @@ std::unique_ptr<Base> make_row_number_aggregation()
 template std::unique_ptr<aggregation> make_row_number_aggregation<aggregation>();
 template std::unique_ptr<rolling_aggregation> make_row_number_aggregation<rolling_aggregation>();
 
+/// Factory to create an EWMA aggregation
+template <typename Base>
+std::unique_ptr<Base> make_ewma_aggregation(double const com, cudf::ewm_history history)
+{
+  return std::make_unique<detail::ewma_aggregation>(com, history);
+}
+template std::unique_ptr<aggregation> make_ewma_aggregation<aggregation>(double const com,
+                                                                         cudf::ewm_history history);
+template std::unique_ptr<scan_aggregation> make_ewma_aggregation<scan_aggregation>(
+  double const com, cudf::ewm_history history);
+
 /// Factory to create a RANK aggregation
 template <typename Base>
 std::unique_ptr<Base> make_rank_aggregation(rank_method method,
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index ca15b532d07..bed4dbc5a66 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -296,19 +296,6 @@ size_t column_size(column_view const& column, rmm::cuda_stream_view stream)
   CUDF_FAIL("Unexpected compound type");
 }
 
-// checks to see if the given column has a fixed size.  This doesn't
-// check every row, so assumes string and list columns are not fixed, even
-// if each row is the same width.
-// TODO: update this if FIXED_LEN_BYTE_ARRAY is ever supported for writes.
-bool is_col_fixed_width(column_view const& column)
-{
-  if (column.type().id() == type_id::STRUCT) {
-    return std::all_of(column.child_begin(), column.child_end(), is_col_fixed_width);
-  }
-
-  return is_fixed_width(column.type());
-}
-
 /**
  * @brief Extends SchemaElement to add members required in constructing parquet_column_view
  *
@@ -946,6 +933,15 @@ struct parquet_column_view {
     return schema_node.converted_type.value_or(UNKNOWN);
   }
 
+  // Checks to see if the given column has a fixed-width data type. This doesn't
+  // check every value, so it assumes string and list columns are not fixed-width, even
+  // if each value has the same size.
+  [[nodiscard]] bool is_fixed_width() const
+  {
+    // lists and strings are not fixed width
+    return max_rep_level() == 0 and physical_type() != Type::BYTE_ARRAY;
+  }
+
   std::vector<std::string> const& get_path_in_schema() { return path_in_schema; }
 
   // LIST related member functions
@@ -1764,7 +1760,7 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta,
     // unbalanced in final page sizes, so using 4 which seems to be a good
     // compromise at smoothing things out without getting fragment sizes too small.
     auto frag_size_fn = [&](auto const& col, size_t col_size) {
-      int const target_frags_per_page = is_col_fixed_width(col) ? 1 : 4;
+      int const target_frags_per_page = col.is_fixed_width() ? 1 : 4;
       auto const avg_len =
         target_frags_per_page * util::div_rounding_up_safe<size_t>(col_size, input.num_rows());
       if (avg_len > 0) {
@@ -1775,8 +1771,8 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta,
       }
     };
 
-    std::transform(single_streams_table.begin(),
-                   single_streams_table.end(),
+    std::transform(parquet_columns.begin(),
+                   parquet_columns.end(),
                    column_sizes.begin(),
                    column_frag_size.begin(),
                    frag_size_fn);
diff --git a/cpp/src/io/text/byte_range_info.cpp b/cpp/src/io/text/byte_range_info.cpp
index 290e0451839..6a7836ed4e1 100644
--- a/cpp/src/io/text/byte_range_info.cpp
+++ b/cpp/src/io/text/byte_range_info.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,7 +31,7 @@ std::vector<byte_range_info> create_byte_range_infos_consecutive(int64_t total_b
   auto range_size = util::div_rounding_up_safe(total_bytes, range_count);
   auto ranges     = std::vector<byte_range_info>();
 
-  ranges.reserve(range_size);
+  ranges.reserve(range_count);
 
   for (int64_t i = 0; i < range_count; i++) {
     auto offset = i * range_size;
diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp
index 596ca3458c8..58faa0ebfe4 100644
--- a/cpp/src/io/text/data_chunk_source_factories.cpp
+++ b/cpp/src/io/text/data_chunk_source_factories.cpp
@@ -120,7 +120,11 @@ class istream_data_chunk_reader : public data_chunk_reader {
   {
   }
 
-  void skip_bytes(std::size_t size) override { _datastream->ignore(size); };
+  void skip_bytes(std::size_t size) override
+  {
+    // 20% faster than _datastream->ignore(size) for large files
+    _datastream->seekg(_datastream->tellg() + static_cast<std::ifstream::pos_type>(size));
+  };
 
   std::unique_ptr<device_data_chunk> get_next_chunk(std::size_t read_size,
                                                     rmm::cuda_stream_view stream) override
@@ -265,7 +269,7 @@ class file_data_chunk_source : public data_chunk_source {
   [[nodiscard]] std::unique_ptr<data_chunk_reader> create_reader() const override
   {
     return std::make_unique<istream_data_chunk_reader>(
-      std::make_unique<std::ifstream>(_filename, std::ifstream::in));
+      std::make_unique<std::ifstream>(_filename, std::ifstream::in | std::ifstream::binary));
   }
 
  private:
diff --git a/cpp/src/reductions/scan/ewm.cu b/cpp/src/reductions/scan/ewm.cu
new file mode 100644
index 00000000000..3fa2de450ad
--- /dev/null
+++ b/cpp/src/reductions/scan/ewm.cu
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scan.cuh"
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <cuda/functional>
+#include <thrust/scan.h>
+#include <thrust/transform_scan.h>
+
+namespace cudf {
+namespace detail {
+
+template <typename T>
+using pair_type = thrust::pair<T, T>;
+
+/**
+ * @brief functor to be summed over in a prefix sum such that
+ * the recurrence in question is solved. See
+ * G. E. Blelloch. Prefix sums and their applications. Technical Report
+ * CMU-CS-90-190, Nov. 1990. S. 1.4
+ * for details
+ */
+template <typename T>
+class recurrence_functor {
+ public:
+  __device__ pair_type<T> operator()(pair_type<T> ci, pair_type<T> cj)
+  {
+    return {ci.first * cj.first, ci.second * cj.first + cj.second};
+  }
+};
+
+template <typename T>
+struct ewma_functor_base {
+  T beta;
+  const pair_type<T> IDENTITY{1.0, 0.0};
+};
+
+template <typename T, bool is_numerator>
+struct ewma_adjust_nulls_functor : public ewma_functor_base<T> {
+  __device__ pair_type<T> operator()(thrust::tuple<bool, int, T> const data)
+  {
+    // Not const to allow for updating the input value
+    auto [valid, exp, input] = data;
+    if (!valid) { return this->IDENTITY; }
+    if constexpr (not is_numerator) { input = 1; }
+
+    // The value is non-null, but nulls preceding it
+    // must adjust the second element of the pair
+    T const beta = this->beta;
+    return {beta * ((exp != 0) ? pow(beta, exp) : 1), input};
+  }
+};
+
+template <typename T, bool is_numerator>
+struct ewma_adjust_no_nulls_functor : public ewma_functor_base<T> {
+  __device__ pair_type<T> operator()(T const data)
+  {
+    T const beta = this->beta;
+    if constexpr (is_numerator) {
+      return {beta, data};
+    } else {
+      return {beta, 1.0};
+    }
+  }
+};
+
+template <typename T>
+struct ewma_noadjust_nulls_functor : public ewma_functor_base<T> {
+  /*
+    In the null case, a denominator actually has to be computed. The formula is
+    y_{i+1} = (1 - alpha)x_{i-1} + alpha x_i, but really there is a "denominator"
+    which is the sum of the weights: alpha + (1 - alpha) == 1. If a null is
+    encountered, that means that the "previous" value is downweighted by a
+    factor (for each missing value). For example with a single null:
+    data = {x_0, NULL, x_1},
+    y_2 = (1 - alpha)**2 x_0 + alpha * x_2 / (alpha + (1-alpha)**2)
+
+    As such, the pairs must be updated before summing like the adjusted case to
+    properly downweight the previous values. But now but we also need to compute
+    the normalization factors and divide the results into them at the end.
+  */
+  __device__ pair_type<T> operator()(thrust::tuple<T, size_type, bool, size_type> const data)
+  {
+    T const beta                              = this->beta;
+    auto const [input, index, valid, nullcnt] = data;
+    if (index == 0) {
+      return {beta, input};
+    } else {
+      if (!valid) { return this->IDENTITY; }
+      // preceding value is valid, return normal pair
+      if (nullcnt == 0) { return {beta, (1.0 - beta) * input}; }
+      // one or more preceding values is null, adjust by how many
+      T const factor = (1.0 - beta) + pow(beta, nullcnt + 1);
+      return {(beta * (pow(beta, nullcnt)) / factor), ((1.0 - beta) * input) / factor};
+    }
+  }
+};
+
+template <typename T>
+struct ewma_noadjust_no_nulls_functor : public ewma_functor_base<T> {
+  __device__ pair_type<T> operator()(thrust::tuple<T, size_type> const data)
+  {
+    T const beta              = this->beta;
+    auto const [input, index] = data;
+    if (index == 0) {
+      return {beta, input};
+    } else {
+      return {beta, (1.0 - beta) * input};
+    }
+  }
+};
+
+/**
+* @brief Return an array whose values y_i are the number of null entries
+* in between the last valid entry of the input and the current index.
+* Example: {1, NULL, 3, 4, NULL, NULL, 7}
+        -> {0, 0     1, 0, 0,    1,    2}
+*/
+rmm::device_uvector<cudf::size_type> null_roll_up(column_view const& input,
+                                                  rmm::cuda_stream_view stream)
+{
+  rmm::device_uvector<cudf::size_type> output(input.size(), stream);
+
+  auto device_view = column_device_view::create(input);
+  auto invalid_it  = thrust::make_transform_iterator(
+    cudf::detail::make_validity_iterator(*device_view),
+    cuda::proclaim_return_type<int>([] __device__(int valid) -> int { return 1 - valid; }));
+
+  // valid mask {1, 0, 1, 0, 0, 1} leads to output array {0, 0, 1, 0, 1, 2}
+  thrust::inclusive_scan_by_key(rmm::exec_policy(stream),
+                                invalid_it,
+                                invalid_it + input.size() - 1,
+                                invalid_it,
+                                std::next(output.begin()));
+  return output;
+}
+
+template <typename T>
+rmm::device_uvector<T> compute_ewma_adjust(column_view const& input,
+                                           T const beta,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::device_async_resource_ref mr)
+{
+  rmm::device_uvector<T> output(input.size(), stream);
+  rmm::device_uvector<pair_type<T>> pairs(input.size(), stream);
+
+  if (input.has_nulls()) {
+    rmm::device_uvector<cudf::size_type> nullcnt = null_roll_up(input, stream);
+    auto device_view                             = column_device_view::create(input);
+    auto valid_it = cudf::detail::make_validity_iterator(*device_view);
+    auto data =
+      thrust::make_zip_iterator(thrust::make_tuple(valid_it, nullcnt.begin(), input.begin<T>()));
+
+    thrust::transform_inclusive_scan(rmm::exec_policy(stream),
+                                     data,
+                                     data + input.size(),
+                                     pairs.begin(),
+                                     ewma_adjust_nulls_functor<T, true>{beta},
+                                     recurrence_functor<T>{});
+    thrust::transform(rmm::exec_policy(stream),
+                      pairs.begin(),
+                      pairs.end(),
+                      output.begin(),
+                      [] __device__(pair_type<T> pair) -> T { return pair.second; });
+
+    thrust::transform_inclusive_scan(rmm::exec_policy(stream),
+                                     data,
+                                     data + input.size(),
+                                     pairs.begin(),
+                                     ewma_adjust_nulls_functor<T, false>{beta},
+                                     recurrence_functor<T>{});
+
+  } else {
+    thrust::transform_inclusive_scan(rmm::exec_policy(stream),
+                                     input.begin<T>(),
+                                     input.end<T>(),
+                                     pairs.begin(),
+                                     ewma_adjust_no_nulls_functor<T, true>{beta},
+                                     recurrence_functor<T>{});
+    thrust::transform(rmm::exec_policy(stream),
+                      pairs.begin(),
+                      pairs.end(),
+                      output.begin(),
+                      [] __device__(pair_type<T> pair) -> T { return pair.second; });
+    auto itr = thrust::make_counting_iterator<size_type>(0);
+
+    thrust::transform_inclusive_scan(rmm::exec_policy(stream),
+                                     itr,
+                                     itr + input.size(),
+                                     pairs.begin(),
+                                     ewma_adjust_no_nulls_functor<T, false>{beta},
+                                     recurrence_functor<T>{});
+  }
+
+  thrust::transform(
+    rmm::exec_policy(stream),
+    pairs.begin(),
+    pairs.end(),
+    output.begin(),
+    output.begin(),
+    [] __device__(pair_type<T> pair, T numerator) -> T { return numerator / pair.second; });
+
+  return output;
+}
+
+template <typename T>
+rmm::device_uvector<T> compute_ewma_noadjust(column_view const& input,
+                                             T const beta,
+                                             rmm::cuda_stream_view stream,
+                                             rmm::device_async_resource_ref mr)
+{
+  rmm::device_uvector<T> output(input.size(), stream);
+  rmm::device_uvector<pair_type<T>> pairs(input.size(), stream);
+  rmm::device_uvector<cudf::size_type> nullcnt =
+    [&input, stream]() -> rmm::device_uvector<cudf::size_type> {
+    if (input.has_nulls()) {
+      return null_roll_up(input, stream);
+    } else {
+      return rmm::device_uvector<cudf::size_type>(input.size(), stream);
+    }
+  }();
+  // denominators are all 1 and do not need to be computed
+  // pairs are all (beta, 1-beta x_i) except for the first one
+
+  if (!input.has_nulls()) {
+    auto data = thrust::make_zip_iterator(
+      thrust::make_tuple(input.begin<T>(), thrust::make_counting_iterator<size_type>(0)));
+    thrust::transform_inclusive_scan(rmm::exec_policy(stream),
+                                     data,
+                                     data + input.size(),
+                                     pairs.begin(),
+                                     ewma_noadjust_no_nulls_functor<T>{beta},
+                                     recurrence_functor<T>{});
+
+  } else {
+    auto device_view = column_device_view::create(input);
+    auto valid_it    = detail::make_validity_iterator(*device_view);
+
+    auto data = thrust::make_zip_iterator(thrust::make_tuple(
+      input.begin<T>(), thrust::make_counting_iterator<size_type>(0), valid_it, nullcnt.begin()));
+
+    thrust::transform_inclusive_scan(rmm::exec_policy(stream),
+                                     data,
+                                     data + input.size(),
+                                     pairs.begin(),
+                                     ewma_noadjust_nulls_functor<T>{beta},
+                                     recurrence_functor<T>());
+  }
+
+  // copy the second elements to the output for now
+  thrust::transform(rmm::exec_policy(stream),
+                    pairs.begin(),
+                    pairs.end(),
+                    output.begin(),
+                    [] __device__(pair_type<T> pair) -> T { return pair.second; });
+  return output;
+}
+
+struct ewma_functor {
+  template <typename T, CUDF_ENABLE_IF(!std::is_floating_point<T>::value)>
+  std::unique_ptr<column> operator()(scan_aggregation const& agg,
+                                     column_view const& input,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::device_async_resource_ref mr)
+  {
+    CUDF_FAIL("Unsupported type for EWMA.");
+  }
+
+  template <typename T, CUDF_ENABLE_IF(std::is_floating_point<T>::value)>
+  std::unique_ptr<column> operator()(scan_aggregation const& agg,
+                                     column_view const& input,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::device_async_resource_ref mr)
+  {
+    auto const ewma_agg       = dynamic_cast<ewma_aggregation const*>(&agg);
+    auto const history        = ewma_agg->history;
+    auto const center_of_mass = ewma_agg->center_of_mass;
+
+    // center of mass is easier for the user, but the recurrences are
+    // better expressed in terms of the derived parameter `beta`
+    T const beta = center_of_mass / (center_of_mass + 1.0);
+
+    auto result = [&]() {
+      if (history == cudf::ewm_history::INFINITE) {
+        return compute_ewma_adjust(input, beta, stream, mr);
+      } else {
+        return compute_ewma_noadjust(input, beta, stream, mr);
+      }
+    }();
+    return std::make_unique<column>(cudf::data_type(cudf::type_to_id<T>()),
+                                    input.size(),
+                                    result.release(),
+                                    rmm::device_buffer{},
+                                    0);
+  }
+};
+
+std::unique_ptr<column> exponentially_weighted_moving_average(column_view const& input,
+                                                              scan_aggregation const& agg,
+                                                              rmm::cuda_stream_view stream,
+                                                              rmm::device_async_resource_ref mr)
+{
+  return type_dispatcher(input.type(), ewma_functor{}, agg, input, stream, mr);
+}
+
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/src/reductions/scan/scan.cuh b/cpp/src/reductions/scan/scan.cuh
index aeb9e516cd4..6c237741ac3 100644
--- a/cpp/src/reductions/scan/scan.cuh
+++ b/cpp/src/reductions/scan/scan.cuh
@@ -36,6 +36,12 @@ std::pair<rmm::device_buffer, size_type> mask_scan(column_view const& input_view
                                                    rmm::cuda_stream_view stream,
                                                    rmm::device_async_resource_ref mr);
 
+// exponentially weighted moving average of the input
+std::unique_ptr<column> exponentially_weighted_moving_average(column_view const& input,
+                                                              scan_aggregation const& agg,
+                                                              rmm::cuda_stream_view stream,
+                                                              rmm::device_async_resource_ref mr);
+
 template <template <typename> typename DispatchFn>
 std::unique_ptr<column> scan_agg_dispatch(column_view const& input,
                                           scan_aggregation const& agg,
@@ -59,6 +65,7 @@ std::unique_ptr<column> scan_agg_dispatch(column_view const& input,
       if (is_fixed_point(input.type())) CUDF_FAIL("decimal32/64/128 cannot support product scan");
       return type_dispatcher<dispatch_storage_type>(
         input.type(), DispatchFn<DeviceProduct>(), input, output_mask, stream, mr);
+    case aggregation::EWMA: return exponentially_weighted_moving_average(input, agg, stream, mr);
     default: CUDF_FAIL("Unsupported aggregation operator for scan");
   }
 }
diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu
index ad2eaa6a471..7c02a8d1b99 100644
--- a/cpp/src/reductions/scan/scan_inclusive.cu
+++ b/cpp/src/reductions/scan/scan_inclusive.cu
@@ -182,7 +182,8 @@ std::unique_ptr<column> scan_inclusive(column_view const& input,
 
   auto output = scan_agg_dispatch<scan_dispatcher>(
     input, agg, static_cast<bitmask_type*>(mask.data()), stream, mr);
-  output->set_null_mask(std::move(mask), null_count);
+  // Use the null mask produced by the op for EWM
+  if (agg.kind != aggregation::EWMA) { output->set_null_mask(std::move(mask), null_count); }
 
   // If the input is a structs column, we also need to push down nulls from the parent output column
   // into the children columns.
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index eda470d2309..9f14455f42d 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -205,6 +205,7 @@ ConfigureTest(
 ConfigureTest(
   REDUCTIONS_TEST
   reductions/collect_ops_tests.cpp
+  reductions/ewm_tests.cpp
   reductions/rank_tests.cpp
   reductions/reduction_tests.cpp
   reductions/scan_tests.cpp
diff --git a/cpp/tests/ast/transform_tests.cpp b/cpp/tests/ast/transform_tests.cpp
index ef1d09e5652..6b350c137d0 100644
--- a/cpp/tests/ast/transform_tests.cpp
+++ b/cpp/tests/ast/transform_tests.cpp
@@ -65,6 +65,22 @@ TEST_F(TransformTest, ColumnReference)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
 }
 
+TEST_F(TransformTest, BasicAdditionDoubleCast)
+{
+  auto c_0 = column_wrapper<double>{3, 20, 1, 50};
+  std::vector<__int128_t> data1{10, 7, 20, 0};
+  auto c_1 = cudf::test::fixed_point_column_wrapper<__int128_t>(
+    data1.begin(), data1.end(), numeric::scale_type{0});
+  auto table      = cudf::table_view{{c_0, c_1}};
+  auto col_ref_0  = cudf::ast::column_reference(0);
+  auto col_ref_1  = cudf::ast::column_reference(1);
+  auto cast       = cudf::ast::operation(cudf::ast::ast_operator::CAST_TO_FLOAT64, col_ref_1);
+  auto expression = cudf::ast::operation(cudf::ast::ast_operator::ADD, col_ref_0, cast);
+  auto expected   = column_wrapper<double>{13, 27, 21, 50};
+  auto result     = cudf::compute_column(table, expression);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view(), verbosity);
+}
+
 TEST_F(TransformTest, Literal)
 {
   auto c_0   = column_wrapper<int32_t>{3, 20, 1, 50};
diff --git a/cpp/tests/reductions/ewm_tests.cpp b/cpp/tests/reductions/ewm_tests.cpp
new file mode 100644
index 00000000000..09cec688509
--- /dev/null
+++ b/cpp/tests/reductions/ewm_tests.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scan_tests.hpp"
+
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/reduction.hpp>
+
+template <typename T>
+struct TypedEwmScanTest : BaseScanTest<T> {
+  inline void test_ungrouped_ewma_scan(cudf::column_view const& input,
+                                       cudf::column_view const& expect_vals,
+                                       cudf::scan_aggregation const& agg,
+                                       cudf::null_policy null_handling)
+  {
+    auto col_out = cudf::scan(input, agg, cudf::scan_type::INCLUSIVE, null_handling);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expect_vals, col_out->view());
+  }
+};
+
+TYPED_TEST_SUITE(TypedEwmScanTest, cudf::test::FloatingPointTypes);
+
+TYPED_TEST(TypedEwmScanTest, Ewm)
+{
+  auto const v = make_vector<TypeParam>({1.0, 2.0, 3.0, 4.0, 5.0});
+  auto col     = this->make_column(v);
+
+  auto const expected_ewma_vals_adjust = cudf::test::fixed_width_column_wrapper<TypeParam>{
+    {1.0, 1.75, 2.61538461538461497469, 3.54999999999999982236, 4.52066115702479365268}};
+
+  auto const expected_ewma_vals_noadjust =
+    cudf::test::fixed_width_column_wrapper<TypeParam>{{1.0,
+                                                       1.66666666666666651864,
+                                                       2.55555555555555535818,
+                                                       3.51851851851851815667,
+                                                       4.50617283950617242283}};
+
+  this->test_ungrouped_ewma_scan(
+    *col,
+    expected_ewma_vals_adjust,
+    *cudf::make_ewma_aggregation<cudf::scan_aggregation>(0.5, cudf::ewm_history::INFINITE),
+    cudf::null_policy::INCLUDE);
+  this->test_ungrouped_ewma_scan(
+    *col,
+    expected_ewma_vals_noadjust,
+    *cudf::make_ewma_aggregation<cudf::scan_aggregation>(0.5, cudf::ewm_history::FINITE),
+    cudf::null_policy::INCLUDE);
+}
+
+TYPED_TEST(TypedEwmScanTest, EwmWithNulls)
+{
+  auto const v = make_vector<TypeParam>({1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0});
+  auto const b = thrust::host_vector<bool>(std::vector<bool>{1, 0, 1, 0, 0, 1, 1});
+  auto col     = this->make_column(v, b);
+
+  auto const expected_ewma_vals_adjust =
+    cudf::test::fixed_width_column_wrapper<TypeParam>{{1.0,
+                                                       1.0,
+                                                       2.79999999999999982236,
+                                                       2.79999999999999982236,
+                                                       2.79999999999999982236,
+                                                       5.87351778656126466416,
+                                                       6.70977596741344139986}};
+
+  auto const expected_ewma_vals_noadjust =
+    cudf::test::fixed_width_column_wrapper<TypeParam>{{1.0,
+                                                       1.0,
+                                                       2.71428571428571441260,
+                                                       2.71428571428571441260,
+                                                       2.71428571428571441260,
+                                                       5.82706766917293172980,
+                                                       6.60902255639097724327}};
+
+  this->test_ungrouped_ewma_scan(
+    *col,
+    expected_ewma_vals_adjust,
+    *cudf::make_ewma_aggregation<cudf::scan_aggregation>(0.5, cudf::ewm_history::INFINITE),
+    cudf::null_policy::INCLUDE);
+  this->test_ungrouped_ewma_scan(
+    *col,
+    expected_ewma_vals_noadjust,
+    *cudf::make_ewma_aggregation<cudf::scan_aggregation>(0.5, cudf::ewm_history::FINITE),
+    cudf::null_policy::INCLUDE);
+}
diff --git a/docs/cudf/source/user_guide/api_docs/dataframe.rst b/docs/cudf/source/user_guide/api_docs/dataframe.rst
index 70e4bd060ca..02fd9f7b396 100644
--- a/docs/cudf/source/user_guide/api_docs/dataframe.rst
+++ b/docs/cudf/source/user_guide/api_docs/dataframe.rst
@@ -137,6 +137,7 @@ Computations / descriptive stats
    DataFrame.describe
    DataFrame.diff
    DataFrame.eval
+   DataFrame.ewm
    DataFrame.kurt
    DataFrame.kurtosis
    DataFrame.max
diff --git a/docs/cudf/source/user_guide/api_docs/general_utilities.rst b/docs/cudf/source/user_guide/api_docs/general_utilities.rst
index d9c53c3fbbd..8d0edc0b100 100644
--- a/docs/cudf/source/user_guide/api_docs/general_utilities.rst
+++ b/docs/cudf/source/user_guide/api_docs/general_utilities.rst
@@ -8,6 +8,8 @@ Testing functions
    :toctree: api/
 
    cudf.testing.testing.assert_column_equal
+   cudf.testing.testing.assert_eq
    cudf.testing.testing.assert_frame_equal
    cudf.testing.testing.assert_index_equal
+   cudf.testing.testing.assert_neq
    cudf.testing.testing.assert_series_equal
diff --git a/docs/cudf/source/user_guide/api_docs/series.rst b/docs/cudf/source/user_guide/api_docs/series.rst
index 5dc87a97337..48a7dc8ff87 100644
--- a/docs/cudf/source/user_guide/api_docs/series.rst
+++ b/docs/cudf/source/user_guide/api_docs/series.rst
@@ -138,6 +138,7 @@ Computations / descriptive stats
    Series.describe
    Series.diff
    Series.digitize
+   Series.ewm
    Series.factorize
    Series.kurt
    Series.max
diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py
index f8f674fecec..d90f3ea1aca 100644
--- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py
+++ b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_csv.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import sys
 from io import StringIO
@@ -13,7 +13,7 @@
     compare_content,
     run_test,
 )
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pythonfuzz(data_handle=CSVReader)
diff --git a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py
index 2f5e6204f7c..69e9437be93 100644
--- a/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py
+++ b/python/cudf/cudf/_fuzz_testing/tests/fuzz_test_json.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import io
 import sys
@@ -9,7 +9,7 @@
 from cudf._fuzz_testing.json import JSONReader, JSONWriter
 from cudf._fuzz_testing.main import pythonfuzz
 from cudf._fuzz_testing.utils import ALL_POSSIBLE_VALUES, run_test
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pythonfuzz(data_handle=JSONReader)
diff --git a/python/cudf/cudf/_fuzz_testing/utils.py b/python/cudf/cudf/_fuzz_testing/utils.py
index d685174f3c2..e6dfe2eae62 100644
--- a/python/cudf/cudf/_fuzz_testing/utils.py
+++ b/python/cudf/cudf/_fuzz_testing/utils.py
@@ -8,7 +8,7 @@
 import pyarrow as pa
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 from cudf.utils.dtypes import (
     pandas_dtypes_to_np_dtypes,
     pyarrow_dtypes_to_pandas_dtypes,
diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 11f801ba772..1616c24eec2 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -58,6 +58,14 @@ class Aggregation:
             if dropna else pylibcudf.types.NullPolicy.INCLUDE
         ))
 
+    @classmethod
+    def ewma(cls, com=1.0, adjust=True):
+        return cls(pylibcudf.aggregation.ewma(
+            com,
+            pylibcudf.aggregation.EWMHistory.INFINITE
+            if adjust else pylibcudf.aggregation.EWMHistory.FINITE
+        ))
+
     @classmethod
     def size(cls):
         return cls(pylibcudf.aggregation.count(pylibcudf.types.NullPolicy.INCLUDE))
diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
index 0b0bbdb2589..c706351a683 100644
--- a/python/cudf/cudf/_lib/csv.pyx
+++ b/python/cudf/cudf/_lib/csv.pyx
@@ -8,7 +8,7 @@ from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
 cimport cudf._lib.pylibcudf.libcudf.types as libcudf_types
-from cudf._lib.io.datasource cimport Datasource, NativeFileDatasource
+from cudf._lib.pylibcudf.io.datasource cimport Datasource, NativeFileDatasource
 from cudf._lib.pylibcudf.libcudf.types cimport data_type
 from cudf._lib.types cimport dtype_to_data_type
 
diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt
index 2408fa1c12f..620229a1275 100644
--- a/python/cudf/cudf/_lib/io/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/io/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources datasource.pyx utils.pyx)
+set(cython_sources utils.pyx)
 set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
   CXX
diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx
index 3c14ec46122..1d7c56888d9 100644
--- a/python/cudf/cudf/_lib/io/utils.pyx
+++ b/python/cudf/cudf/_lib/io/utils.pyx
@@ -8,7 +8,7 @@ from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
 from cudf._lib.column cimport Column
-from cudf._lib.io.datasource cimport Datasource
+from cudf._lib.pylibcudf.io.datasource cimport Datasource
 from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink
 from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource
 from cudf._lib.pylibcudf.libcudf.io.types cimport (
diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx
index d3e6053ef4b..9609e3131b4 100644
--- a/python/cudf/cudf/_lib/orc.pyx
+++ b/python/cudf/cudf/_lib/orc.pyx
@@ -23,12 +23,12 @@ except ImportError:
 
 cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
 from cudf._lib.column cimport Column
-from cudf._lib.io.datasource cimport NativeFileDatasource
 from cudf._lib.io.utils cimport (
     make_sink_info,
     make_source_info,
     update_column_struct_field_names,
 )
+from cudf._lib.pylibcudf.io.datasource cimport NativeFileDatasource
 from cudf._lib.pylibcudf.libcudf.io.data_sink cimport data_sink
 from cudf._lib.pylibcudf.libcudf.io.orc cimport (
     chunked_orc_writer_options,
diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index f6f9cfa9a7c..7914ed7e9d9 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -37,12 +37,12 @@ cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
 cimport cudf._lib.pylibcudf.libcudf.types as cudf_types
 from cudf._lib.column cimport Column
 from cudf._lib.expressions cimport Expression
-from cudf._lib.io.datasource cimport NativeFileDatasource
 from cudf._lib.io.utils cimport (
     make_sinks_info,
     make_source_info,
     update_struct_field_names,
 )
+from cudf._lib.pylibcudf.io.datasource cimport NativeFileDatasource
 from cudf._lib.pylibcudf.libcudf.expressions cimport expression
 from cudf._lib.pylibcudf.libcudf.io.parquet cimport (
     chunked_parquet_reader as cpp_chunked_parquet_reader,
diff --git a/python/cudf/cudf/_lib/pylibcudf/aggregation.pxd b/python/cudf/cudf/_lib/pylibcudf/aggregation.pxd
index 8526728656b..0981d0e855a 100644
--- a/python/cudf/cudf/_lib/pylibcudf/aggregation.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/aggregation.pxd
@@ -6,6 +6,7 @@ from cudf._lib.pylibcudf.libcudf.aggregation cimport (
     Kind as kind_t,
     aggregation,
     correlation_type,
+    ewm_history,
     groupby_aggregation,
     groupby_scan_aggregation,
     rank_method,
@@ -80,6 +81,8 @@ cpdef Aggregation argmax()
 
 cpdef Aggregation argmin()
 
+cpdef Aggregation ewma(float center_of_mass, ewm_history history)
+
 cpdef Aggregation nunique(null_policy null_handling = *)
 
 cpdef Aggregation nth_element(size_type n, null_policy null_handling = *)
diff --git a/python/cudf/cudf/_lib/pylibcudf/aggregation.pyx b/python/cudf/cudf/_lib/pylibcudf/aggregation.pyx
index 7bb64e32a1b..eed2f6de585 100644
--- a/python/cudf/cudf/_lib/pylibcudf/aggregation.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/aggregation.pyx
@@ -8,6 +8,7 @@ from libcpp.utility cimport move
 from cudf._lib.pylibcudf.libcudf.aggregation cimport (
     aggregation,
     correlation_type,
+    ewm_history,
     groupby_aggregation,
     groupby_scan_aggregation,
     make_all_aggregation,
@@ -19,6 +20,7 @@ from cudf._lib.pylibcudf.libcudf.aggregation cimport (
     make_correlation_aggregation,
     make_count_aggregation,
     make_covariance_aggregation,
+    make_ewma_aggregation,
     make_max_aggregation,
     make_mean_aggregation,
     make_median_aggregation,
@@ -52,6 +54,8 @@ from cudf._lib.pylibcudf.libcudf.types cimport (
 from cudf._lib.pylibcudf.libcudf.aggregation import Kind  # no-cython-lint
 from cudf._lib.pylibcudf.libcudf.aggregation import \
     correlation_type as CorrelationType  # no-cython-lint
+from cudf._lib.pylibcudf.libcudf.aggregation import \
+    ewm_history as EWMHistory  # no-cython-lint
 from cudf._lib.pylibcudf.libcudf.aggregation import \
     rank_method as RankMethod  # no-cython-lint
 from cudf._lib.pylibcudf.libcudf.aggregation import \
@@ -202,6 +206,28 @@ cpdef Aggregation max():
     return Aggregation.from_libcudf(move(make_max_aggregation[aggregation]()))
 
 
+cpdef Aggregation ewma(float center_of_mass, ewm_history history):
+    """Create a EWMA aggregation.
+
+    For details, see :cpp:func:`make_ewma_aggregation`.
+
+    Parameters
+    ----------
+    center_of_mass : float
+        The decay in terms of the center of mass
+    history : ewm_history
+        Whether or not to treat the history as infinite.
+
+    Returns
+    -------
+    Aggregation
+        The EWMA aggregation.
+    """
+    return Aggregation.from_libcudf(
+        move(make_ewma_aggregation[aggregation](center_of_mass, history))
+    )
+
+
 cpdef Aggregation count(null_policy null_handling = null_policy.EXCLUDE):
     """Create a count aggregation.
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/io/CMakeLists.txt
index 2cfec101bab..32f0f5543e4 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/pylibcudf/io/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources avro.pyx types.pyx)
+set(cython_sources avro.pyx datasource.pyx types.pyx)
 
 set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
@@ -21,5 +21,5 @@ rapids_cython_create_modules(
   LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_io_ ASSOCIATED_TARGETS cudf
 )
 
-set(targets_using_arrow_headers pylibcudf_io_avro pylibcudf_io_types)
+set(targets_using_arrow_headers pylibcudf_io_avro pylibcudf_io_datasource pylibcudf_io_types)
 link_to_pyarrow_headers("${targets_using_arrow_headers}")
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/io/__init__.pxd
index 250292746c1..cfd6d2cd281 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/__init__.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/io/__init__.pxd
@@ -1,4 +1,4 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from . cimport avro, types
+from . cimport avro, datasource, types
 from .types cimport SourceInfo, TableWithMetadata
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/__init__.py b/python/cudf/cudf/_lib/pylibcudf/io/__init__.py
index 5242c741911..a54ba1834dc 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/__init__.py
+++ b/python/cudf/cudf/_lib/pylibcudf/io/__init__.py
@@ -1,4 +1,4 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from . import avro, types
+from . import avro, datasource, types
 from .types import SourceInfo, TableWithMetadata
diff --git a/python/cudf/cudf/_lib/io/datasource.pxd b/python/cudf/cudf/_lib/pylibcudf/io/datasource.pxd
similarity index 100%
rename from python/cudf/cudf/_lib/io/datasource.pxd
rename to python/cudf/cudf/_lib/pylibcudf/io/datasource.pxd
diff --git a/python/cudf/cudf/_lib/io/datasource.pyx b/python/cudf/cudf/_lib/pylibcudf/io/datasource.pyx
similarity index 100%
rename from python/cudf/cudf/_lib/io/datasource.pyx
rename to python/cudf/cudf/_lib/pylibcudf/io/datasource.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/types.pyx b/python/cudf/cudf/_lib/pylibcudf/io/types.pyx
index cd777232b33..ab3375da662 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/types.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/io/types.pyx
@@ -4,6 +4,8 @@ from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
+from cudf._lib.pylibcudf.io.datasource cimport Datasource
+from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource
 from cudf._lib.pylibcudf.libcudf.io.types cimport (
     host_buffer,
     source_info,
@@ -56,9 +58,8 @@ cdef class SourceInfo:
 
     Parameters
     ----------
-    sources : List[Union[str, os.PathLike, bytes, io.BytesIO]]
-        A homogeneous list of sources (this can be a string filename,
-        an os.PathLike, bytes, or an io.BytesIO) to read from.
+    sources : List[Union[str, os.PathLike, bytes, io.BytesIO, DataSource]]
+        A homogeneous list of sources to read from.
 
         Mixing different types of sources will raise a `ValueError`.
     """
@@ -68,6 +69,7 @@ cdef class SourceInfo:
             raise ValueError("Need to pass at least one source")
 
         cdef vector[string] c_files
+        cdef vector[datasource*] c_datasources
 
         if isinstance(sources[0], (os.PathLike, str)):
             c_files.reserve(len(sources))
@@ -84,6 +86,13 @@ cdef class SourceInfo:
 
             self.c_obj = move(source_info(c_files))
             return
+        elif isinstance(sources[0], Datasource):
+            for csrc in sources:
+                if not isinstance(csrc, Datasource):
+                    raise ValueError("All sources must be of the same type!")
+                c_datasources.push_back((<Datasource>csrc).get_datasource())
+            self.c_obj = move(source_info(c_datasources))
+            return
 
         # TODO: host_buffer is deprecated API, use host_span instead
         cdef vector[host_buffer] c_host_buffers
@@ -106,5 +115,11 @@ cdef class SourceInfo:
                 c_buffer = bio.getbuffer()  # check if empty?
                 c_host_buffers.push_back(host_buffer(<char*>&c_buffer[0],
                                                      c_buffer.shape[0]))
+        else:
+            raise ValueError("Sources must be a list of str/paths, "
+                             "bytes, io.BytesIO, or a Datasource")
+
+        if empty_buffer is True:
+            c_host_buffers.push_back(host_buffer(<char*>NULL, 0))
 
-        self.c_obj = source_info(c_host_buffers)
+        self.c_obj = move(source_info(c_host_buffers))
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/aggregation.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/aggregation.pxd
index 8c14bc45723..fe04db52094 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/aggregation.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/aggregation.pxd
@@ -79,6 +79,10 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         KENDALL
         SPEARMAN
 
+    cpdef enum class ewm_history(int32_t):
+        INFINITE
+        FINITE
+
     cpdef enum class rank_method(int32_t):
         FIRST
         AVERAGE
@@ -143,6 +147,10 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil:
         string user_defined_aggregator,
         data_type output_type) except +
 
+    cdef unique_ptr[T] make_ewma_aggregation[T](
+        double com, ewm_history adjust
+    ) except +
+
     cdef unique_ptr[T] make_correlation_aggregation[T](
         correlation_type type, size_type min_periods) except +
 
diff --git a/python/cudf/cudf/core/_internals/timezones.py b/python/cudf/cudf/core/_internals/timezones.py
index 269fcf3e37f..29cb9d7bd12 100644
--- a/python/cudf/cudf/core/_internals/timezones.py
+++ b/python/cudf/cudf/core/_internals/timezones.py
@@ -1,21 +1,50 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 from __future__ import annotations
 
+import datetime
 import os
 import zoneinfo
 from functools import lru_cache
 from typing import TYPE_CHECKING, Literal
 
 import numpy as np
+import pandas as pd
 
+import cudf
 from cudf._lib.timezone import make_timezone_transition_table
-from cudf.core.column.column import as_column
 
 if TYPE_CHECKING:
     from cudf.core.column.datetime import DatetimeColumn
     from cudf.core.column.timedelta import TimeDeltaColumn
 
 
+def get_compatible_timezone(dtype: pd.DatetimeTZDtype) -> pd.DatetimeTZDtype:
+    """Convert dtype.tz object to zoneinfo object if possible."""
+    tz = dtype.tz
+    if isinstance(tz, zoneinfo.ZoneInfo):
+        return dtype
+    if cudf.get_option("mode.pandas_compatible"):
+        raise NotImplementedError(
+            f"{tz} must be a zoneinfo.ZoneInfo object in pandas_compatible mode."
+        )
+    elif (tzname := getattr(tz, "zone", None)) is not None:
+        # pytz-like
+        key = tzname
+    elif (tz_file := getattr(tz, "_filename", None)) is not None:
+        # dateutil-like
+        key = tz_file.split("zoneinfo/")[-1]
+    elif isinstance(tz, datetime.tzinfo):
+        # Try to get UTC-like tzinfos
+        reference = datetime.datetime.now()
+        key = tz.tzname(reference)
+        if not (isinstance(key, str) and key.lower() == "utc"):
+            raise NotImplementedError(f"cudf does not support {tz}")
+    else:
+        raise NotImplementedError(f"cudf does not support {tz}")
+    new_tz = zoneinfo.ZoneInfo(key)
+    return pd.DatetimeTZDtype(dtype.unit, new_tz)
+
+
 @lru_cache(maxsize=20)
 def get_tz_data(zone_name: str) -> tuple[DatetimeColumn, TimeDeltaColumn]:
     """
@@ -87,6 +116,8 @@ def _read_tzfile_as_columns(
     )
 
     if not transition_times_and_offsets:
+        from cudf.core.column.column import as_column
+
         # this happens for UTC-like zones
         min_date = np.int64(np.iinfo("int64").min + 1).astype("M8[s]")
         return (as_column([min_date]), as_column([np.timedelta64(0, "s")]))
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index f538180805b..231af30c06d 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1068,51 +1068,34 @@ def notnull(self) -> ColumnBase:
 
         return result
 
-    def fillna(
-        self,
-        fill_value: Any = None,
-        method: str | None = None,
-    ) -> Self:
-        """
-        Fill null values with *fill_value*
-        """
-        if fill_value is not None:
-            fill_is_scalar = np.isscalar(fill_value)
-
-            if fill_is_scalar:
-                if fill_value == _DEFAULT_CATEGORICAL_VALUE:
-                    fill_value = self.codes.dtype.type(fill_value)
-                else:
-                    try:
-                        fill_value = self._encode(fill_value)
-                        fill_value = self.codes.dtype.type(fill_value)
-                    except ValueError as err:
-                        err_msg = "fill value must be in categories"
-                        raise ValueError(err_msg) from err
+    def _validate_fillna_value(
+        self, fill_value: ScalarLike | ColumnLike
+    ) -> cudf.Scalar | ColumnBase:
+        """Align fill_value for .fillna based on column type."""
+        if cudf.api.types.is_scalar(fill_value):
+            if fill_value != _DEFAULT_CATEGORICAL_VALUE:
+                try:
+                    fill_value = self._encode(fill_value)
+                except ValueError as err:
+                    raise ValueError(
+                        f"{fill_value=} must be in categories"
+                    ) from err
+            return cudf.Scalar(fill_value, dtype=self.codes.dtype)
+        else:
+            fill_value = column.as_column(fill_value, nan_as_null=False)
+            if isinstance(fill_value.dtype, CategoricalDtype):
+                if self.dtype != fill_value.dtype:
+                    raise TypeError(
+                        "Cannot set a categorical with another without identical categories"
+                    )
             else:
-                fill_value = column.as_column(fill_value, nan_as_null=False)
-                if isinstance(fill_value, CategoricalColumn):
-                    if self.dtype != fill_value.dtype:
-                        raise TypeError(
-                            "Cannot set a Categorical with another, "
-                            "without identical categories"
-                        )
-                # TODO: only required if fill_value has a subset of the
-                # categories:
-                fill_value = fill_value._set_categories(
-                    self.categories,
-                    is_unique=True,
-                )
-                fill_value = column.as_column(fill_value.codes).astype(
-                    self.codes.dtype
+                raise TypeError(
+                    "Cannot set a categorical with non-categorical data"
                 )
-
-        # Validation of `fill_value` will have to be performed
-        # before returning self.
-        if not self.nullable:
-            return self
-
-        return super().fillna(fill_value, method=method)
+            fill_value = fill_value._set_categories(
+                self.categories,
+            )
+            return fill_value.codes.astype(self.codes.dtype)
 
     def indices_of(
         self, value: ScalarLike
@@ -1372,11 +1355,13 @@ def _set_categories(
         if not (is_unique or new_cats.is_unique):
             new_cats = cudf.Series(new_cats)._column.unique()
 
+        if cur_cats.equals(new_cats, check_dtypes=True):
+            # TODO: Internal usages don't always need a copy; add a copy keyword
+            # as_ordered shallow copies
+            return self.copy().as_ordered(ordered=ordered)
+
         cur_codes = self.codes
-        max_cat_size = (
-            len(cur_cats) if len(cur_cats) > len(new_cats) else len(new_cats)
-        )
-        out_code_dtype = min_unsigned_type(max_cat_size)
+        out_code_dtype = min_unsigned_type(max(len(cur_cats), len(new_cats)))
 
         cur_order = column.as_column(range(len(cur_codes)))
         old_codes = column.as_column(
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index c4e715aeb45..dfcdfbb9d91 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -47,6 +47,7 @@
     is_string_dtype,
 )
 from cudf.core._compat import PANDAS_GE_210
+from cudf.core._internals.timezones import get_compatible_timezone
 from cudf.core.abc import Serializable
 from cudf.core.buffer import (
     Buffer,
@@ -665,15 +666,32 @@ def _check_scatter_key_length(
                 f"{num_keys}"
             )
 
+    def _validate_fillna_value(
+        self, fill_value: ScalarLike | ColumnLike
+    ) -> cudf.Scalar | ColumnBase:
+        """Align fill_value for .fillna based on column type."""
+        if is_scalar(fill_value):
+            return cudf.Scalar(fill_value, dtype=self.dtype)
+        return as_column(fill_value)
+
     def fillna(
         self,
-        fill_value: Any = None,
-        method: str | None = None,
+        fill_value: ScalarLike | ColumnLike,
+        method: Literal["ffill", "bfill", None] = None,
     ) -> Self:
         """Fill null values with ``value``.
 
         Returns a copy with null filled.
         """
+        if not self.has_nulls(include_nan=True):
+            return self.copy()
+        elif method is None:
+            if is_scalar(fill_value) and libcudf.scalar._is_null_host_scalar(
+                fill_value
+            ):
+                return self.copy()
+            else:
+                fill_value = self._validate_fillna_value(fill_value)
         return libcudf.replace.replace_nulls(
             input_col=self.nans_to_nulls(),
             replacement=fill_value,
@@ -1854,6 +1872,21 @@ def as_column(
             arbitrary.dtype,
             (pd.CategoricalDtype, pd.IntervalDtype, pd.DatetimeTZDtype),
         ):
+            if isinstance(arbitrary.dtype, pd.DatetimeTZDtype):
+                new_tz = get_compatible_timezone(arbitrary.dtype)
+                arbitrary = arbitrary.astype(new_tz)
+            if isinstance(arbitrary.dtype, pd.CategoricalDtype) and isinstance(
+                arbitrary.dtype.categories.dtype, pd.DatetimeTZDtype
+            ):
+                new_tz = get_compatible_timezone(
+                    arbitrary.dtype.categories.dtype
+                )
+                new_cats = arbitrary.dtype.categories.astype(new_tz)
+                new_dtype = pd.CategoricalDtype(
+                    categories=new_cats, ordered=arbitrary.dtype.ordered
+                )
+                arbitrary = arbitrary.astype(new_dtype)
+
             return as_column(
                 pa.array(arbitrary, from_pandas=True),
                 nan_as_null=nan_as_null,
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 9ac761b6be1..121076b69ce 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -8,19 +8,23 @@
 import locale
 import re
 from locale import nl_langinfo
-from typing import TYPE_CHECKING, Any, Literal, Sequence, cast
+from typing import TYPE_CHECKING, Literal, Sequence, cast
 
 import numpy as np
 import pandas as pd
 import pyarrow as pa
-from typing_extensions import Self
 
 import cudf
 from cudf import _lib as libcudf
 from cudf._lib.labeling import label_bins
 from cudf._lib.search import search_sorted
-from cudf.api.types import is_datetime64_dtype, is_scalar, is_timedelta64_dtype
+from cudf.api.types import is_datetime64_dtype, is_timedelta64_dtype
 from cudf.core._compat import PANDAS_GE_220
+from cudf.core._internals.timezones import (
+    check_ambiguous_and_nonexistent,
+    get_compatible_timezone,
+    get_tz_data,
+)
 from cudf.core.column import ColumnBase, as_column, column, string
 from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion
 from cudf.utils.dtypes import _get_base_dtype
@@ -282,8 +286,6 @@ def __contains__(self, item: ScalarLike) -> bool:
 
     @functools.cached_property
     def time_unit(self) -> str:
-        if isinstance(self.dtype, pd.DatetimeTZDtype):
-            return self.dtype.unit
         return np.datetime_data(self.dtype)[0]
 
     @property
@@ -638,22 +640,6 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
         else:
             return result_col
 
-    def fillna(
-        self,
-        fill_value: Any = None,
-        method: str | None = None,
-    ) -> Self:
-        if fill_value is not None:
-            if cudf.utils.utils._isnat(fill_value):
-                return self.copy(deep=True)
-            if is_scalar(fill_value):
-                if not isinstance(fill_value, cudf.Scalar):
-                    fill_value = cudf.Scalar(fill_value, dtype=self.dtype)
-            else:
-                fill_value = column.as_column(fill_value, nan_as_null=False)
-
-        return super().fillna(fill_value, method)
-
     def indices_of(
         self, value: ScalarLike
     ) -> cudf.core.column.NumericalColumn:
@@ -725,8 +711,6 @@ def _find_ambiguous_and_nonexistent(
         transitions occur in the time zone database for the given timezone.
         If no transitions occur, the tuple `(False, False)` is returned.
         """
-        from cudf.core._internals.timezones import get_tz_data
-
         transition_times, offsets = get_tz_data(zone_name)
         offsets = offsets.astype(f"timedelta64[{self.time_unit}]")  # type: ignore[assignment]
 
@@ -785,26 +769,22 @@ def tz_localize(
         ambiguous: Literal["NaT"] = "NaT",
         nonexistent: Literal["NaT"] = "NaT",
     ):
-        from cudf.core._internals.timezones import (
-            check_ambiguous_and_nonexistent,
-            get_tz_data,
-        )
-
         if tz is None:
             return self.copy()
         ambiguous, nonexistent = check_ambiguous_and_nonexistent(
             ambiguous, nonexistent
         )
-        dtype = pd.DatetimeTZDtype(self.time_unit, tz)
+        dtype = get_compatible_timezone(pd.DatetimeTZDtype(self.time_unit, tz))
+        tzname = dtype.tz.key
         ambiguous_col, nonexistent_col = self._find_ambiguous_and_nonexistent(
-            tz
+            tzname
         )
         localized = self._scatter_by_column(
             self.isnull() | (ambiguous_col | nonexistent_col),
             cudf.Scalar(cudf.NaT, dtype=self.dtype),
         )
 
-        transition_times, offsets = get_tz_data(tz)
+        transition_times, offsets = get_tz_data(tzname)
         transition_times_local = (transition_times + offsets).astype(
             localized.dtype
         )
@@ -845,7 +825,7 @@ def __init__(
             offset=offset,
             null_count=null_count,
         )
-        self._dtype = dtype
+        self._dtype = get_compatible_timezone(dtype)
 
     def to_pandas(
         self,
@@ -865,6 +845,10 @@ def to_arrow(self):
             self._local_time.to_arrow(), str(self.dtype.tz)
         )
 
+    @functools.cached_property
+    def time_unit(self) -> str:
+        return self.dtype.unit
+
     @property
     def _utc_time(self):
         """Return UTC time as naive timestamps."""
@@ -880,8 +864,6 @@ def _utc_time(self):
     @property
     def _local_time(self):
         """Return the local time as naive timestamps."""
-        from cudf.core._internals.timezones import get_tz_data
-
         transition_times, offsets = get_tz_data(str(self.dtype.tz))
         transition_times = transition_times.astype(_get_base_dtype(self.dtype))
         indices = search_sorted([transition_times], [self], "right") - 1
@@ -911,10 +893,6 @@ def __repr__(self):
         )
 
     def tz_localize(self, tz: str | None, ambiguous="NaT", nonexistent="NaT"):
-        from cudf.core._internals.timezones import (
-            check_ambiguous_and_nonexistent,
-        )
-
         if tz is None:
             return self._local_time
         ambiguous, nonexistent = check_ambiguous_and_nonexistent(
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index e9d9b4933e5..d66908b5f94 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -4,12 +4,11 @@
 
 import warnings
 from decimal import Decimal
-from typing import TYPE_CHECKING, Any, Sequence, cast
+from typing import TYPE_CHECKING, Sequence, cast
 
 import cupy as cp
 import numpy as np
 import pyarrow as pa
-from typing_extensions import Self
 
 import cudf
 from cudf import _lib as libcudf
@@ -31,7 +30,7 @@
 from .numerical_base import NumericalBaseColumn
 
 if TYPE_CHECKING:
-    from cudf._typing import ColumnBinaryOperand, Dtype
+    from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike
 
 
 class DecimalBaseColumn(NumericalBaseColumn):
@@ -135,30 +134,20 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str):
 
         return result
 
-    def fillna(
-        self,
-        fill_value: Any = None,
-        method: str | None = None,
-    ) -> Self:
-        """Fill null values with ``value``.
-
-        Returns a copy with null filled.
-        """
+    def _validate_fillna_value(
+        self, fill_value: ScalarLike | ColumnLike
+    ) -> cudf.Scalar | ColumnBase:
+        """Align fill_value for .fillna based on column type."""
         if isinstance(fill_value, (int, Decimal)):
-            fill_value = cudf.Scalar(fill_value, dtype=self.dtype)
-        elif (
-            isinstance(fill_value, DecimalBaseColumn)
-            or isinstance(fill_value, cudf.core.column.NumericalColumn)
-            and is_integer_dtype(fill_value.dtype)
+            return cudf.Scalar(fill_value, dtype=self.dtype)
+        elif isinstance(fill_value, ColumnBase) and (
+            isinstance(self.dtype, DecimalDtype) or self.dtype.kind in "iu"
         ):
-            fill_value = fill_value.astype(self.dtype)
-        else:
-            raise TypeError(
-                "Decimal columns only support using fillna with decimal and "
-                "integer values"
-            )
-
-        return super().fillna(fill_value, method=method)
+            return fill_value.astype(self.dtype)
+        raise TypeError(
+            "Decimal columns only support using fillna with decimal and "
+            "integer values"
+        )
 
     def normalize_binop_value(self, other):
         if isinstance(other, ColumnBase):
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 098cf43421b..76c64e1aea0 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -532,57 +532,26 @@ def find_and_replace(
             replaced, df._data["old"], df._data["new"]
         )
 
-    def fillna(
-        self,
-        fill_value: Any = None,
-        method: str | None = None,
-    ) -> Self:
-        """
-        Fill null values with *fill_value*
-        """
-        col = self.nans_to_nulls()
-
-        if col.null_count == 0:
-            return col
-
-        if method is not None:
-            return super().fillna(fill_value, method)
-
-        if fill_value is None:
-            raise ValueError("Must specify either 'fill_value' or 'method'")
-
-        if (
-            isinstance(fill_value, cudf.Scalar)
-            and fill_value.dtype == col.dtype
-        ):
-            return super().fillna(fill_value, method)
-
-        if np.isscalar(fill_value):
-            # cast safely to the same dtype as self
-            fill_value_casted = col.dtype.type(fill_value)
-            if not np.isnan(fill_value) and (fill_value_casted != fill_value):
+    def _validate_fillna_value(
+        self, fill_value: ScalarLike | ColumnLike
+    ) -> cudf.Scalar | ColumnBase:
+        """Align fill_value for .fillna based on column type."""
+        if is_scalar(fill_value):
+            cudf_obj = cudf.Scalar(fill_value)
+            if not as_column(cudf_obj).can_cast_safely(self.dtype):
                 raise TypeError(
                     f"Cannot safely cast non-equivalent "
-                    f"{type(fill_value).__name__} to {col.dtype.name}"
+                    f"{type(fill_value).__name__} to {self.dtype.name}"
                 )
-            fill_value = cudf.Scalar(fill_value_casted)
         else:
-            fill_value = column.as_column(fill_value, nan_as_null=False)
-            if is_integer_dtype(col.dtype):
-                # cast safely to the same dtype as self
-                if fill_value.dtype != col.dtype:
-                    new_fill_value = fill_value.astype(col.dtype)
-                    if not (new_fill_value == fill_value).all():
-                        raise TypeError(
-                            f"Cannot safely cast non-equivalent "
-                            f"{fill_value.dtype.type.__name__} to "
-                            f"{col.dtype.type.__name__}"
-                        )
-                    fill_value = new_fill_value
-            else:
-                fill_value = fill_value.astype(col.dtype)
-
-        return super().fillna(fill_value, method)
+            cudf_obj = as_column(fill_value, nan_as_null=False)
+            if not cudf_obj.can_cast_safely(self.dtype):  # type: ignore[attr-defined]
+                raise TypeError(
+                    f"Cannot safely cast non-equivalent "
+                    f"{cudf_obj.dtype.type.__name__} to "
+                    f"{self.dtype.type.__name__}"
+                )
+        return cudf_obj.astype(self.dtype)
 
     def can_cast_safely(self, to_dtype: DtypeObj) -> bool:
         """
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 2451a9cc0af..936cd1eccb0 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5,12 +5,11 @@
 import re
 import warnings
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, Sequence, cast, overload
+from typing import TYPE_CHECKING, Sequence, cast, overload
 
 import numpy as np
 import pandas as pd
 import pyarrow as pa
-from typing_extensions import Self
 
 import cudf
 import cudf.api.types
@@ -5838,21 +5837,6 @@ def find_and_replace(
             res = self
         return libcudf.replace.replace(res, df._data["old"], df._data["new"])
 
-    def fillna(
-        self,
-        fill_value: Any = None,
-        method: str | None = None,
-    ) -> Self:
-        if fill_value is not None:
-            if not is_scalar(fill_value):
-                fill_value = column.as_column(fill_value, dtype=self.dtype)
-            elif cudf._lib.scalar._is_null_host_scalar(fill_value):
-                # Trying to fill <NA> with <NA> value? Return copy.
-                return self.copy(deep=True)
-            else:
-                fill_value = cudf.Scalar(fill_value, dtype=self.dtype)
-        return super().fillna(fill_value, method=method)
-
     def normalize_binop_value(self, other) -> column.ColumnBase | cudf.Scalar:
         if (
             isinstance(other, (column.ColumnBase, cudf.Scalar))
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 26b449f1863..8f41bcb6422 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -4,12 +4,11 @@
 
 import datetime
 import functools
-from typing import TYPE_CHECKING, Any, Sequence, cast
+from typing import TYPE_CHECKING, Sequence, cast
 
 import numpy as np
 import pandas as pd
 import pyarrow as pa
-from typing_extensions import Self
 
 import cudf
 from cudf import _lib as libcudf
@@ -252,22 +251,6 @@ def normalize_binop_value(self, other) -> ColumnBinaryOperand:
     def time_unit(self) -> str:
         return np.datetime_data(self.dtype)[0]
 
-    def fillna(
-        self,
-        fill_value: Any = None,
-        method: str | None = None,
-    ) -> Self:
-        if fill_value is not None:
-            if cudf.utils.utils._isnat(fill_value):
-                return self.copy(deep=True)
-            if is_scalar(fill_value):
-                fill_value = cudf.Scalar(fill_value)
-                dtype = self.dtype
-                fill_value = fill_value.astype(dtype)
-            else:
-                fill_value = column.as_column(fill_value, nan_as_null=False)
-        return super().fillna(fill_value, method)
-
     def as_numerical_column(
         self, dtype: Dtype
     ) -> "cudf.core.column.NumericalColumn":
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 76bb9d2a8ed..f0d8157011d 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -2980,6 +2980,32 @@ def set_index(
         df.index = idx
         return df if not inplace else None
 
+    @_cudf_nvtx_annotate
+    def fillna(
+        self, value=None, method=None, axis=None, inplace=False, limit=None
+    ):  # noqa: D102
+        if isinstance(value, (pd.Series, pd.DataFrame)):
+            value = cudf.from_pandas(value)
+        if isinstance(value, cudf.Series):
+            # Align value.index to self.columns
+            value = value.reindex(self._column_names)
+        elif isinstance(value, cudf.DataFrame):
+            if not self.index.equals(value.index):
+                # Align value.index to self.index
+                value = value.reindex(self.index)
+            value = dict(value.items())
+        elif isinstance(value, abc.Mapping):
+            # Align value.indexes to self.index
+            value = {
+                key: value.reindex(self.index)
+                if isinstance(value, cudf.Series)
+                else value
+                for key, value in value.items()
+            }
+        return super().fillna(
+            value=value, method=method, axis=axis, inplace=inplace, limit=limit
+        )
+
     @_cudf_nvtx_annotate
     def where(self, cond, other=None, inplace=False):
         from cudf.core._internals.where import (
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 38bff3946d6..8ca71180c00 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import copy
 import operator
 import pickle
 import warnings
@@ -20,6 +19,7 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf.api.types import is_dtype_equal, is_scalar
+from cudf.core._compat import PANDAS_LT_300
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column import (
     ColumnBase,
@@ -38,7 +38,7 @@
 if TYPE_CHECKING:
     from types import ModuleType
 
-    from cudf._typing import Dtype
+    from cudf._typing import Dtype, ScalarLike
 
 
 # TODO: It looks like Frame is missing a declaration of `copy`, need to add
@@ -613,8 +613,8 @@ def where(self, cond, other=None, inplace: bool = False) -> Self | None:
     @_cudf_nvtx_annotate
     def fillna(
         self,
-        value=None,
-        method: Literal["ffill", "bfill", "pad", "backfill"] | None = None,
+        value: None | ScalarLike | cudf.Series = None,
+        method: Literal["ffill", "bfill", "pad", "backfill", None] = None,
         axis=None,
         inplace: bool = False,
         limit=None,
@@ -725,6 +725,16 @@ def fillna(
             raise ValueError("Cannot specify both 'value' and 'method'.")
 
         if method:
+            # Do not remove until pandas 3.0 support is added.
+            assert (
+                PANDAS_LT_300
+            ), "Need to drop after pandas-3.0 support is added."
+            warnings.warn(
+                f"{type(self).__name__}.fillna with 'method' is "
+                "deprecated and will raise in a future version. "
+                "Use obj.ffill() or obj.bfill() instead.",
+                FutureWarning,
+            )
             if method not in {"ffill", "bfill", "pad", "backfill"}:
                 raise NotImplementedError(
                     f"Fill method {method} is not supported"
@@ -734,57 +744,24 @@ def fillna(
             elif method == "backfill":
                 method = "bfill"
 
-        # TODO: This logic should be handled in different subclasses since
-        # different Frames support different types of values.
-        if isinstance(value, cudf.Series):
-            value = value.reindex(self._data.names)
-        elif isinstance(value, cudf.DataFrame):
-            if not self.index.equals(value.index):  # type: ignore[attr-defined]
-                value = value.reindex(self.index)  # type: ignore[attr-defined]
-            else:
-                value = value
-        elif not isinstance(value, abc.Mapping):
-            value = {name: copy.deepcopy(value) for name in self._data.names}
-        else:
-            value = {
-                key: value.reindex(self.index)  # type: ignore[attr-defined]
-                if isinstance(value, cudf.Series)
-                else value
-                for key, value in value.items()
-            }
-
-        filled_data = {}
-        for col_name, col in self._data.items():
-            if col_name in value and method is None:
-                replace_val = value[col_name]
-            else:
-                replace_val = None
-            should_fill = (
-                (
-                    col_name in value
-                    and col.has_nulls(include_nan=True)
-                    and not libcudf.scalar._is_null_host_scalar(replace_val)
-                )
-                or method is not None
-                or (
-                    isinstance(col, cudf.core.column.CategoricalColumn)
-                    and not libcudf.scalar._is_null_host_scalar(replace_val)
-                )
+        if is_scalar(value):
+            value = {name: value for name in self._column_names}
+        elif not isinstance(value, (abc.Mapping, cudf.Series)):
+            raise TypeError(
+                f'"value" parameter must be a scalar, dict '
+                f"or Series, but you passed a "
+                f'"{type(value).__name__}"'
             )
-            if should_fill:
-                filled_data[col_name] = col.fillna(replace_val, method)
-            else:
-                filled_data[col_name] = col.copy(deep=True)
+
+        filled_columns = [
+            col.fillna(value[name], method) if name in value else col.copy()
+            for name, col in self._data.items()
+        ]
 
         return self._mimic_inplace(
-            self._from_data(
-                data=ColumnAccessor(
-                    data=filled_data,
-                    multiindex=self._data.multiindex,
-                    level_names=self._data.level_names,
-                    rangeindex=self._data.rangeindex,
-                    label_dtype=self._data.label_dtype,
-                    verify=False,
+            self._from_data_like_self(
+                self._data._from_columns_like_self(
+                    filled_columns, verify=False
                 )
             ),
             inplace=inplace,
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index f1b74adefed..280a6e92eab 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -52,7 +52,7 @@
     _post_process_output_col,
     _return_arr_from_dtype,
 )
-from cudf.core.window import Rolling
+from cudf.core.window import ExponentialMovingWindow, Rolling
 from cudf.utils import docutils, ioutils
 from cudf.utils._numba import _CUDFNumbaConfig
 from cudf.utils.docutils import copy_docstring
@@ -1853,6 +1853,32 @@ def rolling(
             win_type=win_type,
         )
 
+    @copy_docstring(ExponentialMovingWindow)
+    def ewm(
+        self,
+        com: float | None = None,
+        span: float | None = None,
+        halflife: float | None = None,
+        alpha: float | None = None,
+        min_periods: int | None = 0,
+        adjust: bool = True,
+        ignore_na: bool = False,
+        axis: int = 0,
+        times: str | np.ndarray | None = None,
+    ):
+        return ExponentialMovingWindow(
+            self,
+            com=com,
+            span=span,
+            halflife=halflife,
+            alpha=alpha,
+            min_periods=min_periods,
+            adjust=adjust,
+            ignore_na=ignore_na,
+            axis=axis,
+            times=times,
+        )
+
     @_cudf_nvtx_annotate
     def nans_to_nulls(self):
         """
@@ -2701,11 +2727,24 @@ def sort_index(
             if ignore_index:
                 out = out.reset_index(drop=True)
         else:
-            labels = sorted(self._data.names, reverse=not ascending)
-            out = self[labels]
+            labels = sorted(self._column_names, reverse=not ascending)
+            result_columns = (self._data[label] for label in labels)
             if ignore_index:
-                out._data.rangeindex = True
-                out._data.names = list(range(self._num_columns))
+                ca = ColumnAccessor(
+                    dict(enumerate(result_columns)),
+                    rangeindex=True,
+                    verify=False,
+                )
+            else:
+                ca = ColumnAccessor(
+                    dict(zip(labels, result_columns)),
+                    rangeindex=self._data.rangeindex,
+                    multiindex=self._data.multiindex,
+                    level_names=self._data.level_names,
+                    label_dtype=self._data.label_dtype,
+                    verify=False,
+                )
+            out = self._from_data_like_self(ca)
 
         return self._mimic_inplace(out, inplace=inplace)
 
@@ -3178,29 +3217,6 @@ def _split(self, splits, keep_index=True):
             for i in range(len(splits) + 1)
         ]
 
-    @_cudf_nvtx_annotate
-    def fillna(
-        self, value=None, method=None, axis=None, inplace=False, limit=None
-    ):  # noqa: D102
-        if method is not None:
-            # Do not remove until pandas 3.0 support is added.
-            assert (
-                PANDAS_LT_300
-            ), "Need to drop after pandas-3.0 support is added."
-            warnings.warn(
-                f"{type(self).__name__}.fillna with 'method' is "
-                "deprecated and will raise in a future version. "
-                "Use obj.ffill() or obj.bfill() instead.",
-                FutureWarning,
-            )
-        old_index = self.index
-        ret = super().fillna(value, method, axis, inplace, limit)
-        if inplace:
-            self.index = old_index
-        else:
-            ret.index = old_index
-        return ret
-
     @_cudf_nvtx_annotate
     def bfill(self, value=None, axis=None, inplace=None, limit=None):
         """
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index c0716d7709a..15ad0813601 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1797,20 +1797,12 @@ def fillna(
     ):
         if isinstance(value, pd.Series):
             value = Series.from_pandas(value)
-
-        if not (is_scalar(value) or isinstance(value, (abc.Mapping, Series))):
-            raise TypeError(
-                f'"value" parameter must be a scalar, dict '
-                f"or Series, but you passed a "
-                f'"{type(value).__name__}"'
-            )
-
-        if isinstance(value, (abc.Mapping, Series)):
+        elif isinstance(value, abc.Mapping):
             value = Series(value)
+        if isinstance(value, cudf.Series):
             if not self.index.equals(value.index):
                 value = value.reindex(self.index)
-            value = value._column
-
+            value = {self.name: value._column}
         return super().fillna(
             value=value, method=method, axis=axis, inplace=inplace, limit=limit
         )
diff --git a/python/cudf/cudf/core/window/__init__.py b/python/cudf/cudf/core/window/__init__.py
index 8ea3eb0179b..23522588d33 100644
--- a/python/cudf/cudf/core/window/__init__.py
+++ b/python/cudf/cudf/core/window/__init__.py
@@ -1,3 +1,3 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION
-
+# Copyright (c) 2019-2024, NVIDIA CORPORATION
+from cudf.core.window.ewm import ExponentialMovingWindow
 from cudf.core.window.rolling import Rolling
diff --git a/python/cudf/cudf/core/window/ewm.py b/python/cudf/cudf/core/window/ewm.py
new file mode 100644
index 00000000000..21693e106bd
--- /dev/null
+++ b/python/cudf/cudf/core/window/ewm.py
@@ -0,0 +1,200 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+
+from __future__ import annotations
+
+import numpy as np
+
+from cudf._lib.reduce import scan
+from cudf.api.types import is_numeric_dtype
+from cudf.core.window.rolling import _RollingBase
+
+
+class ExponentialMovingWindow(_RollingBase):
+    r"""
+    Provide exponential weighted (EW) functions.
+    Available EW functions: ``mean()``
+    Exactly one parameter: ``com``, ``span``, ``halflife``, or ``alpha``
+    must be provided.
+
+    Parameters
+    ----------
+    com : float, optional
+        Specify decay in terms of center of mass,
+        :math:`\alpha = 1 / (1 + com)`, for :math:`com \geq 0`.
+    span : float, optional
+        Specify decay in terms of span,
+        :math:`\alpha = 2 / (span + 1)`, for :math:`span \geq 1`.
+    halflife : float, str, timedelta, optional
+        Specify decay in terms of half-life,
+        :math:`\alpha = 1 - \exp\left(-\ln(2) / halflife\right)`, for
+        :math:`halflife > 0`.
+    alpha : float, optional
+        Specify smoothing factor :math:`\alpha` directly,
+        :math:`0 < \alpha \leq 1`.
+    min_periods : int, default 0
+        Not Supported
+    adjust : bool, default True
+        Controls assumptions about the first value in the sequence.
+        https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.ewm.html
+        for details.
+    ignore_na : bool, default False
+        Not Supported
+    axis : {0, 1}, default 0
+        Not Supported
+    times : str, np.ndarray, Series, default None
+        Not Supported
+
+    Returns
+    -------
+    ``ExponentialMovingWindow`` object
+
+    Notes
+    -----
+    cuDF input data may contain both nulls and nan values. For the purposes
+    of this method, they are taken to have the same meaning, meaning nulls
+    in cuDF will affect the result the same way that nan values would using
+    the equivalent pandas method.
+
+    .. pandas-compat::
+        **cudf.core.window.ExponentialMovingWindow**
+
+        The parameters ``min_periods``, ``ignore_na``, ``axis``, and ``times``
+        are not yet supported. Behavior is defined only for data that begins
+        with a valid (non-null) element.
+
+        Currently, only ``mean`` is a supported method.
+
+    Examples
+    --------
+    >>> df = cudf.DataFrame({'B': [0, 1, 2, cudf.NA, 4]})
+    >>> df
+          B
+    0     0
+    1     1
+    2     2
+    3  <NA>
+    4     4
+    >>> df.ewm(com=0.5).mean()
+              B
+    0  0.000000
+    1  0.750000
+    2  1.615385
+    3  1.615385
+    4  3.670213
+
+    >>> df.ewm(com=0.5, adjust=False).mean()
+              B
+    0  0.000000
+    1  0.666667
+    2  1.555556
+    3  1.555556
+    4  3.650794
+    """
+
+    def __init__(
+        self,
+        obj,
+        com: float | None = None,
+        span: float | None = None,
+        halflife: float | None = None,
+        alpha: float | None = None,
+        min_periods: int | None = 0,
+        adjust: bool = True,
+        ignore_na: bool = False,
+        axis: int = 0,
+        times: str | np.ndarray | None = None,
+    ):
+        if (min_periods, ignore_na, axis, times) != (0, False, 0, None):
+            raise NotImplementedError(
+                "The parameters `min_periods`, `ignore_na`, "
+                "`axis`, and `times` are not yet supported."
+            )
+
+        self.obj = obj
+        self.adjust = adjust
+        self.com = get_center_of_mass(com, span, halflife, alpha)
+
+    def mean(self):
+        """
+        Calculate the ewm (exponential weighted moment) mean.
+        """
+        return self._apply_agg("ewma")
+
+    def var(self, bias):
+        raise NotImplementedError("ewmvar not yet supported.")
+
+    def std(self, bias):
+        raise NotImplementedError("ewmstd not yet supported.")
+
+    def corr(self, other):
+        raise NotImplementedError("ewmcorr not yet supported.")
+
+    def cov(self, other):
+        raise NotImplementedError("ewmcov not yet supported.")
+
+    def _apply_agg_series(self, sr, agg_name):
+        if not is_numeric_dtype(sr.dtype):
+            raise TypeError("No numeric types to aggregate")
+
+        # libcudf ewm has special casing for nulls only
+        # and come what may with nans. It treats those nulls like
+        # pandas does nans in the same positions mathematically.
+        # as such we need to convert the nans to nulls before
+        # passing them in.
+        to_libcudf_column = sr._column.astype("float64").nans_to_nulls()
+
+        return self.obj._from_data_like_self(
+            self.obj._data._from_columns_like_self(
+                [
+                    scan(
+                        agg_name,
+                        to_libcudf_column,
+                        True,
+                        com=self.com,
+                        adjust=self.adjust,
+                    )
+                ]
+            )
+        )
+
+
+def get_center_of_mass(
+    comass: float | None,
+    span: float | None,
+    halflife: float | None,
+    alpha: float | None,
+) -> float:
+    valid_count = count_not_none(comass, span, halflife, alpha)
+    if valid_count > 1:
+        raise ValueError(
+            "comass, span, halflife, and alpha are mutually exclusive"
+        )
+
+    # Convert to center of mass; domain checks ensure 0 < alpha <= 1
+    if comass is not None:
+        if comass < 0:
+            raise ValueError("comass must satisfy: comass >= 0")
+    elif span is not None:
+        if span < 1:
+            raise ValueError("span must satisfy: span >= 1")
+        comass = (span - 1) / 2
+    elif halflife is not None:
+        if halflife <= 0:
+            raise ValueError("halflife must satisfy: halflife > 0")
+        decay = 1 - np.exp(np.log(0.5) / halflife)
+        comass = 1 / decay - 1
+    elif alpha is not None:
+        if alpha <= 0 or alpha > 1:
+            raise ValueError("alpha must satisfy: 0 < alpha <= 1")
+        comass = (1 - alpha) / alpha
+    else:
+        raise ValueError("Must pass one of comass, span, halflife, or alpha")
+
+    return float(comass)
+
+
+def count_not_none(*args) -> int:
+    """
+    Returns the count of arguments that are not None.
+    """
+    return sum(x is not None for x in args)
diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
index 7d140a1ffa5..29391c68471 100644
--- a/python/cudf/cudf/core/window/rolling.py
+++ b/python/cudf/cudf/core/window/rolling.py
@@ -14,7 +14,27 @@
 from cudf.utils.utils import GetAttrGetItemMixin
 
 
-class Rolling(GetAttrGetItemMixin, Reducible):
+class _RollingBase:
+    """
+    Contains methods common to all kinds of rolling
+    """
+
+    def _apply_agg_dataframe(self, df, agg_name):
+        result_df = cudf.DataFrame({})
+        for i, col_name in enumerate(df.columns):
+            result_col = self._apply_agg_series(df[col_name], agg_name)
+            result_df.insert(i, col_name, result_col)
+        result_df.index = df.index
+        return result_df
+
+    def _apply_agg(self, agg_name):
+        if isinstance(self.obj, cudf.Series):
+            return self._apply_agg_series(self.obj, agg_name)
+        else:
+            return self._apply_agg_dataframe(self.obj, agg_name)
+
+
+class Rolling(GetAttrGetItemMixin, _RollingBase, Reducible):
     """
     Rolling window calculations.
 
diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py
index 698dd946022..0ba432d6d0e 100644
--- a/python/cudf/cudf/pandas/_wrappers/pandas.py
+++ b/python/cudf/cudf/pandas/_wrappers/pandas.py
@@ -789,7 +789,7 @@ def Index__new__(cls, *args, **kwargs):
 
 ExponentialMovingWindow = make_intermediate_proxy_type(
     "ExponentialMovingWindow",
-    _Unusable,
+    cudf.core.window.ewm.ExponentialMovingWindow,
     pd.core.window.ewm.ExponentialMovingWindow,
 )
 
diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py
index 1540c6850e7..dfb729cae6b 100644
--- a/python/cudf/cudf/pandas/fast_slow_proxy.py
+++ b/python/cudf/cudf/pandas/fast_slow_proxy.py
@@ -17,7 +17,7 @@
 import numpy as np
 
 from ..options import _env_get_bool
-from ..testing._utils import assert_eq
+from ..testing import assert_eq
 from .annotation import nvtx
 
 
diff --git a/python/cudf/cudf/pylibcudf_tests/test_source_info.py b/python/cudf/cudf/pylibcudf_tests/test_source_info.py
index 71a3ecbcc30..019321b7259 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_source_info.py
+++ b/python/cudf/cudf/pylibcudf_tests/test_source_info.py
@@ -2,13 +2,21 @@
 
 import io
 
+import pyarrow as pa
 import pytest
 
 import cudf._lib.pylibcudf as plc
+from cudf._lib.pylibcudf.io.datasource import NativeFileDatasource
 
 
 @pytest.mark.parametrize(
-    "source", ["a.txt", b"hello world", io.BytesIO(b"hello world")]
+    "source",
+    [
+        "a.txt",
+        b"hello world",
+        io.BytesIO(b"hello world"),
+        NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")),
+    ],
 )
 def test_source_info_ctor(source, tmp_path):
     if isinstance(source, str):
@@ -28,6 +36,10 @@ def test_source_info_ctor(source, tmp_path):
         ["a.txt", "a.txt"],
         [b"hello world", b"hello there"],
         [io.BytesIO(b"hello world"), io.BytesIO(b"hello there")],
+        [
+            NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")),
+            NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")),
+        ],
     ],
 )
 def test_source_info_ctor_multiple(sources, tmp_path):
@@ -54,6 +66,11 @@ def test_source_info_ctor_multiple(sources, tmp_path):
             io.BytesIO(b"hello there"),
             b"hello world",
         ],
+        [
+            NativeFileDatasource(pa.PythonFile(io.BytesIO(), mode="r")),
+            "awef.txt",
+            b"hello world",
+        ],
     ],
 )
 def test_source_info_ctor_mixing_invalid(sources, tmp_path):
@@ -67,3 +84,8 @@ def test_source_info_ctor_mixing_invalid(sources, tmp_path):
             sources[i] = str(file)
     with pytest.raises(ValueError):
         plc.io.SourceInfo(sources)
+
+
+def test_source_info_invalid():
+    with pytest.raises(ValueError):
+        plc.io.SourceInfo([123])
diff --git a/python/cudf/cudf/testing/__init__.py b/python/cudf/cudf/testing/__init__.py
index 1843344bc81..4e92b43b9f9 100644
--- a/python/cudf/cudf/testing/__init__.py
+++ b/python/cudf/cudf/testing/__init__.py
@@ -1,7 +1,9 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from cudf.testing.testing import (
+    assert_eq,
     assert_frame_equal,
     assert_index_equal,
+    assert_neq,
     assert_series_equal,
 )
diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index e067d15af4c..a6a2d4eea00 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -2,12 +2,10 @@
 
 import itertools
 import string
-import warnings
 from collections import abc
 from contextlib import contextmanager
 from decimal import Decimal
 
-import cupy
 import numpy as np
 import pandas as pd
 import pytest
@@ -15,7 +13,6 @@
 from numba.core.typing.templates import AbstractTemplate
 from numba.cuda.cudadecl import registry as cuda_decl_registry
 from numba.cuda.cudaimpl import lower as cuda_lower
-from pandas import testing as tm
 
 import cudf
 from cudf._lib.null_mask import bitmask_allocation_size_bytes
@@ -113,81 +110,6 @@ def count_zero(arr):
     return np.count_nonzero(arr == 0)
 
 
-def assert_eq(left, right, **kwargs):
-    """Assert that two cudf-like things are equivalent
-
-    This equality test works for pandas/cudf dataframes/series/indexes/scalars
-    in the same way, and so makes it easier to perform parametrized testing
-    without switching between assert_frame_equal/assert_series_equal/...
-    functions.
-    """
-    # dtypes that we support but Pandas doesn't will convert to
-    # `object`. Check equality before that happens:
-    if kwargs.get("check_dtype", True):
-        if hasattr(left, "dtype") and hasattr(right, "dtype"):
-            if isinstance(
-                left.dtype, cudf.core.dtypes._BaseDtype
-            ) and not isinstance(
-                left.dtype, cudf.CategoricalDtype
-            ):  # leave categorical comparison to Pandas
-                assert_eq(left.dtype, right.dtype)
-
-    if hasattr(left, "to_pandas"):
-        left = left.to_pandas()
-    if hasattr(right, "to_pandas"):
-        right = right.to_pandas()
-    if isinstance(left, cupy.ndarray):
-        left = cupy.asnumpy(left)
-    if isinstance(right, cupy.ndarray):
-        right = cupy.asnumpy(right)
-
-    if isinstance(left, (pd.DataFrame, pd.Series, pd.Index)):
-        # TODO: A warning is emitted from the function
-        # pandas.testing.assert_[series, frame, index]_equal for some inputs:
-        # "DeprecationWarning: elementwise comparison failed; this will raise
-        # an error in the future."
-        # or "FutureWarning: elementwise ..."
-        # This warning comes from a call from pandas to numpy. It is ignored
-        # here because it cannot be fixed within cudf.
-        with warnings.catch_warnings():
-            warnings.simplefilter(
-                "ignore", (DeprecationWarning, FutureWarning)
-            )
-            if isinstance(left, pd.DataFrame):
-                tm.assert_frame_equal(left, right, **kwargs)
-            elif isinstance(left, pd.Series):
-                tm.assert_series_equal(left, right, **kwargs)
-            else:
-                tm.assert_index_equal(left, right, **kwargs)
-
-    elif isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
-        if np.issubdtype(left.dtype, np.floating) and np.issubdtype(
-            right.dtype, np.floating
-        ):
-            assert np.allclose(left, right, equal_nan=True)
-        else:
-            assert np.array_equal(left, right)
-    else:
-        # Use the overloaded __eq__ of the operands
-        if left == right:
-            return True
-        elif any(np.issubdtype(type(x), np.floating) for x in (left, right)):
-            np.testing.assert_almost_equal(left, right)
-        else:
-            np.testing.assert_equal(left, right)
-    return True
-
-
-def assert_neq(left, right, **kwargs):
-    __tracebackhide__ = True
-    try:
-        assert_eq(left, right, **kwargs)
-    except AssertionError:
-        pass
-    else:
-        raise AssertionError
-
-
 def assert_exceptions_equal(
     lfunc,
     rfunc,
diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index dffbbe92fc1..e56c8d867cb 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -2,9 +2,12 @@
 
 from __future__ import annotations
 
+import warnings
+
 import cupy as cp
 import numpy as np
 import pandas as pd
+from pandas import testing as tm
 
 import cudf
 from cudf._lib.unary import is_nan
@@ -708,3 +711,100 @@ def assert_frame_equal(
             atol=atol,
             obj=f'Column name="{col}"',
         )
+
+
+def assert_eq(left, right, **kwargs):
+    """Assert that two cudf-like things are equivalent
+
+    Parameters
+    ----------
+    left
+        Object to compare
+    right
+        Object to compare
+    kwargs
+        Keyword arguments to control behaviour of comparisons. See
+        :func:`assert_frame_equal`, :func:`assert_series_equal`, and
+        :func:`assert_index_equal`.
+
+    Notes
+    -----
+    This equality test works for pandas/cudf dataframes/series/indexes/scalars
+    in the same way, and so makes it easier to perform parametrized testing
+    without switching between assert_frame_equal/assert_series_equal/...
+    functions.
+
+    Raises
+    ------
+    AssertionError
+        If the two objects do not compare equal.
+    """
+    # dtypes that we support but Pandas doesn't will convert to
+    # `object`. Check equality before that happens:
+    if kwargs.get("check_dtype", True):
+        if hasattr(left, "dtype") and hasattr(right, "dtype"):
+            if isinstance(
+                left.dtype, cudf.core.dtypes._BaseDtype
+            ) and not isinstance(
+                left.dtype, cudf.CategoricalDtype
+            ):  # leave categorical comparison to Pandas
+                assert_eq(left.dtype, right.dtype)
+
+    if hasattr(left, "to_pandas"):
+        left = left.to_pandas()
+    if hasattr(right, "to_pandas"):
+        right = right.to_pandas()
+    if isinstance(left, cp.ndarray):
+        left = cp.asnumpy(left)
+    if isinstance(right, cp.ndarray):
+        right = cp.asnumpy(right)
+
+    if isinstance(left, (pd.DataFrame, pd.Series, pd.Index)):
+        # TODO: A warning is emitted from the function
+        # pandas.testing.assert_[series, frame, index]_equal for some inputs:
+        # "DeprecationWarning: elementwise comparison failed; this will raise
+        # an error in the future."
+        # or "FutureWarning: elementwise ..."
+        # This warning comes from a call from pandas to numpy. It is ignored
+        # here because it cannot be fixed within cudf.
+        with warnings.catch_warnings():
+            warnings.simplefilter(
+                "ignore", (DeprecationWarning, FutureWarning)
+            )
+            if isinstance(left, pd.DataFrame):
+                tm.assert_frame_equal(left, right, **kwargs)
+            elif isinstance(left, pd.Series):
+                tm.assert_series_equal(left, right, **kwargs)
+            else:
+                tm.assert_index_equal(left, right, **kwargs)
+
+    elif isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
+        if np.issubdtype(left.dtype, np.floating) and np.issubdtype(
+            right.dtype, np.floating
+        ):
+            assert np.allclose(left, right, equal_nan=True)
+        else:
+            assert np.array_equal(left, right)
+    else:
+        # Use the overloaded __eq__ of the operands
+        if left == right:
+            return True
+        elif any(np.issubdtype(type(x), np.floating) for x in (left, right)):
+            np.testing.assert_almost_equal(left, right)
+        else:
+            np.testing.assert_equal(left, right)
+    return True
+
+
+def assert_neq(left, right, **kwargs):
+    """Assert that two cudf-like things are not equal.
+
+    Provides the negation of the meaning of :func:`assert_eq`.
+    """
+    __tracebackhide__ = True
+    try:
+        assert_eq(left, right, **kwargs)
+    except AssertionError:
+        pass
+    else:
+        raise AssertionError
diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 30d8f1c8422..437bc4cba67 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 import itertools
 import os
@@ -11,7 +11,7 @@
 import rmm  # noqa: F401
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 _CURRENT_DIRECTORY = str(pathlib.Path(__file__).resolve().parent)
 
diff --git a/python/cudf/cudf/tests/dataframe/test_conversion.py b/python/cudf/cudf/tests/dataframe/test_conversion.py
index fa7e5ec1d4c..d1de7245634 100644
--- a/python/cudf/cudf/tests/dataframe/test_conversion.py
+++ b/python/cudf/cudf/tests/dataframe/test_conversion.py
@@ -1,9 +1,9 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 import pandas as pd
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 def test_convert_dtypes():
diff --git a/python/cudf/cudf/tests/dataframe/test_io_serialization.py b/python/cudf/cudf/tests/dataframe/test_io_serialization.py
index ad81609470c..57948afe1d8 100644
--- a/python/cudf/cudf/tests/dataframe/test_io_serialization.py
+++ b/python/cudf/cudf/tests/dataframe/test_io_serialization.py
@@ -8,7 +8,7 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/groupby/test_computation.py b/python/cudf/cudf/tests/groupby/test_computation.py
index 04c56ef7462..630fcdc4dce 100644
--- a/python/cudf/cudf/tests/groupby/test_computation.py
+++ b/python/cudf/cudf/tests/groupby/test_computation.py
@@ -1,9 +1,9 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 import pandas as pd
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
diff --git a/python/cudf/cudf/tests/groupby/test_groupby_obj.py b/python/cudf/cudf/tests/groupby/test_groupby_obj.py
index 04b483e08dc..ab2b16d263c 100644
--- a/python/cudf/cudf/tests/groupby/test_groupby_obj.py
+++ b/python/cudf/cudf/tests/groupby/test_groupby_obj.py
@@ -2,7 +2,7 @@
 from numpy.testing import assert_array_equal
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 def test_groupby_14955():
diff --git a/python/cudf/cudf/tests/groupby/test_indexing.py b/python/cudf/cudf/tests/groupby/test_indexing.py
index 57e8bc1c2d8..43b6183fca5 100644
--- a/python/cudf/cudf/tests/groupby/test_indexing.py
+++ b/python/cudf/cudf/tests/groupby/test_indexing.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 def test_rank_return_type_compatible_mode():
diff --git a/python/cudf/cudf/tests/groupby/test_transform.py b/python/cudf/cudf/tests/groupby/test_transform.py
index 78d7fbfd879..f7138036ddf 100644
--- a/python/cudf/cudf/tests/groupby/test_transform.py
+++ b/python/cudf/cudf/tests/groupby/test_transform.py
@@ -4,7 +4,7 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.fixture(params=[False, True], ids=["no-null-keys", "null-keys"])
diff --git a/python/cudf/cudf/tests/indexes/datetime/test_indexing.py b/python/cudf/cudf/tests/indexes/datetime/test_indexing.py
index f2c2d9a263b..4c0ce2ed191 100644
--- a/python/cudf/cudf/tests/indexes/datetime/test_indexing.py
+++ b/python/cudf/cudf/tests/indexes/datetime/test_indexing.py
@@ -1,19 +1,17 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+import zoneinfo
 
 import pandas as pd
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 def test_slice_datetimetz_index():
+    tz = zoneinfo.ZoneInfo("US/Eastern")
     data = ["2001-01-01", "2001-01-02", None, None, "2001-01-03"]
-    pidx = pd.DatetimeIndex(data, dtype="datetime64[ns]").tz_localize(
-        "US/Eastern"
-    )
-    idx = cudf.DatetimeIndex(data, dtype="datetime64[ns]").tz_localize(
-        "US/Eastern"
-    )
+    pidx = pd.DatetimeIndex(data, dtype="datetime64[ns]").tz_localize(tz)
+    idx = cudf.DatetimeIndex(data, dtype="datetime64[ns]").tz_localize(tz)
     expected = pidx[1:4]
     got = idx[1:4]
     assert_eq(expected, got)
diff --git a/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py b/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py
index b28ef131025..7cc629270b1 100644
--- a/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py
+++ b/python/cudf/cudf/tests/indexes/datetime/test_time_specific.py
@@ -1,29 +1,28 @@
 # Copyright (c) 2022-2024, NVIDIA CORPORATION.
+import zoneinfo
+
 import pandas as pd
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 def test_tz_localize():
+    tz = zoneinfo.ZoneInfo("America/New_York")
     pidx = pd.date_range("2001-01-01", "2001-01-02", freq="1s")
     pidx = pidx.astype("<M8[ns]")
     idx = cudf.from_pandas(pidx)
     assert pidx.dtype == idx.dtype
-    assert_eq(
-        pidx.tz_localize("America/New_York"),
-        idx.tz_localize("America/New_York"),
-    )
+    assert_eq(pidx.tz_localize(tz), idx.tz_localize(tz))
 
 
 def test_tz_convert():
+    tz = zoneinfo.ZoneInfo("America/New_York")
     pidx = pd.date_range("2023-01-01", periods=3, freq="h")
     idx = cudf.from_pandas(pidx)
     pidx = pidx.tz_localize("UTC")
     idx = idx.tz_localize("UTC")
-    assert_eq(
-        pidx.tz_convert("America/New_York"), idx.tz_convert("America/New_York")
-    )
+    assert_eq(pidx.tz_convert(tz), idx.tz_convert(tz))
 
 
 def test_delocalize_naive():
diff --git a/python/cudf/cudf/tests/indexes/test_interval.py b/python/cudf/cudf/tests/indexes/test_interval.py
index d59041e32d5..87b76ab7609 100644
--- a/python/cudf/cudf/tests/indexes/test_interval.py
+++ b/python/cudf/cudf/tests/indexes/test_interval.py
@@ -7,7 +7,7 @@
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.core.index import IntervalIndex, interval_range
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 def test_interval_constructor_default_closed():
diff --git a/python/cudf/cudf/tests/input_output/test_text.py b/python/cudf/cudf/tests/input_output/test_text.py
index acba13bb5b0..e9406d080d4 100644
--- a/python/cudf/cudf/tests/input_output/test_text.py
+++ b/python/cudf/cudf/tests/input_output/test_text.py
@@ -1,11 +1,11 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 from io import StringIO
 
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.fixture(scope="module")
diff --git a/python/cudf/cudf/tests/series/test_conversion.py b/python/cudf/cudf/tests/series/test_conversion.py
index 43ac35e41a6..e1dd359e1ba 100644
--- a/python/cudf/cudf/tests/series/test_conversion.py
+++ b/python/cudf/cudf/tests/series/test_conversion.py
@@ -1,9 +1,9 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 import pandas as pd
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/series/test_datetimelike.py b/python/cudf/cudf/tests/series/test_datetimelike.py
index 58ffc610c3c..cea86a5499e 100644
--- a/python/cudf/cudf/tests/series/test_datetimelike.py
+++ b/python/cudf/cudf/tests/series/test_datetimelike.py
@@ -1,13 +1,15 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
+import datetime
 import os
+import zoneinfo
 
 import pandas as pd
 import pytest
 
 import cudf
 from cudf import date_range
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 def _get_all_zones():
@@ -70,7 +72,7 @@ def test_localize_ambiguous(request, unit, zone_name):
         dtype=f"datetime64[{unit}]",
     )
     expect = s.to_pandas().dt.tz_localize(
-        zone_name, ambiguous="NaT", nonexistent="NaT"
+        zoneinfo.ZoneInfo(zone_name), ambiguous="NaT", nonexistent="NaT"
     )
     got = s.dt.tz_localize(zone_name)
     assert_eq(expect, got)
@@ -96,7 +98,7 @@ def test_localize_nonexistent(request, unit, zone_name):
         dtype=f"datetime64[{unit}]",
     )
     expect = s.to_pandas().dt.tz_localize(
-        zone_name, ambiguous="NaT", nonexistent="NaT"
+        zoneinfo.ZoneInfo(zone_name), ambiguous="NaT", nonexistent="NaT"
     )
     got = s.dt.tz_localize(zone_name)
     assert_eq(expect, got)
@@ -130,6 +132,9 @@ def test_delocalize_naive():
     "to_tz", ["Europe/London", "America/Chicago", "UTC", None]
 )
 def test_convert(from_tz, to_tz):
+    from_tz = zoneinfo.ZoneInfo(from_tz)
+    if to_tz is not None:
+        to_tz = zoneinfo.ZoneInfo(to_tz)
     ps = pd.Series(pd.date_range("2023-01-01", periods=3, freq="h"))
     gs = cudf.from_pandas(ps)
     ps = ps.dt.tz_localize(from_tz)
@@ -169,6 +174,8 @@ def test_convert_from_naive():
     ],
 )
 def test_convert_edge_cases(data, original_timezone, target_timezone):
+    original_timezone = zoneinfo.ZoneInfo(original_timezone)
+    target_timezone = zoneinfo.ZoneInfo(target_timezone)
     ps = pd.Series(data, dtype="datetime64[s]").dt.tz_localize(
         original_timezone
     )
@@ -229,10 +236,33 @@ def test_tz_convert_naive_typeerror():
     "klass", ["Series", "DatetimeIndex", "Index", "CategoricalIndex"]
 )
 def test_from_pandas_obj_tz_aware(klass):
-    tz_aware_data = [
-        pd.Timestamp("2020-01-01", tz="UTC").tz_convert("US/Pacific")
-    ]
+    tz = zoneinfo.ZoneInfo("US/Pacific")
+    tz_aware_data = [pd.Timestamp("2020-01-01", tz="UTC").tz_convert(tz)]
     pandas_obj = getattr(pd, klass)(tz_aware_data)
     result = cudf.from_pandas(pandas_obj)
     expected = getattr(cudf, klass)(tz_aware_data)
     assert_eq(result, expected)
+
+
+@pytest.mark.parametrize(
+    "klass", ["Series", "DatetimeIndex", "Index", "CategoricalIndex"]
+)
+def test_from_pandas_obj_tz_aware_unsupported(klass):
+    tz = datetime.timezone(datetime.timedelta(hours=1))
+    tz_aware_data = [pd.Timestamp("2020-01-01", tz="UTC").tz_convert(tz)]
+    pandas_obj = getattr(pd, klass)(tz_aware_data)
+    with pytest.raises(NotImplementedError):
+        cudf.from_pandas(pandas_obj)
+
+
+@pytest.mark.parametrize(
+    "klass", ["Series", "DatetimeIndex", "Index", "CategoricalIndex"]
+)
+def test_pandas_compatible_non_zoneinfo_raises(klass):
+    pytz = pytest.importorskip("pytz")
+    tz = pytz.timezone("US/Pacific")
+    tz_aware_data = [pd.Timestamp("2020-01-01", tz="UTC").tz_convert(tz)]
+    pandas_obj = getattr(pd, klass)(tz_aware_data)
+    with cudf.option_context("mode.pandas_compatible", True):
+        with pytest.raises(NotImplementedError):
+            cudf.from_pandas(pandas_obj)
diff --git a/python/cudf/cudf/tests/test_apply_rows.py b/python/cudf/cudf/tests/test_apply_rows.py
index 8870eb421c7..a11022c1a17 100644
--- a/python/cudf/cudf/tests/test_apply_rows.py
+++ b/python/cudf/cudf/tests/test_apply_rows.py
@@ -1,10 +1,11 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 import pytest
 
 import cudf
 from cudf.core.column import column
-from cudf.testing._utils import assert_eq, gen_rand_series
+from cudf.testing import assert_eq
+from cudf.testing._utils import gen_rand_series
 
 
 def _kernel_multiply(a, b, out):
diff --git a/python/cudf/cudf/tests/test_applymap.py b/python/cudf/cudf/tests/test_applymap.py
index d720e6ce2ce..ce1dcce5887 100644
--- a/python/cudf/cudf/tests/test_applymap.py
+++ b/python/cudf/cudf/tests/test_applymap.py
@@ -4,7 +4,7 @@
 
 from cudf import NA, DataFrame
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.testing import _utils as utils
+from cudf.testing import assert_eq
 
 
 @pytest.mark.skipif(
@@ -46,7 +46,7 @@ def test_applymap_dataframe(data, func, na_action, request):
     with pytest.warns(FutureWarning):
         got = gdf.applymap(func, na_action=na_action)
 
-    utils.assert_eq(expect, got, check_dtype=False)
+    assert_eq(expect, got, check_dtype=False)
 
 
 def test_applymap_raise_cases():
diff --git a/python/cudf/cudf/tests/test_array_function.py b/python/cudf/cudf/tests/test_array_function.py
index e6b89e2c5fa..773141ee71a 100644
--- a/python/cudf/cudf/tests/test_array_function.py
+++ b/python/cudf/cudf/tests/test_array_function.py
@@ -5,7 +5,7 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 # To determine if NEP18 is available in the current version of NumPy we simply
diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py
index b036c1f13f3..41b9188f036 100644
--- a/python/cudf/cudf/tests/test_array_ufunc.py
+++ b/python/cudf/cudf/tests/test_array_ufunc.py
@@ -15,11 +15,8 @@
     PANDAS_LT_300,
     PANDAS_VERSION,
 )
-from cudf.testing._utils import (
-    assert_eq,
-    expect_warning_if,
-    set_random_null_mask_inplace,
-)
+from cudf.testing import assert_eq
+from cudf.testing._utils import expect_warning_if, set_random_null_mask_inplace
 
 _UFUNCS = [
     obj
diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
index 238e8d990cc..2ec1d1d2f28 100644
--- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
+++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
@@ -23,7 +23,7 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 from cudf.testing.dataset_generator import rand_dataframe
 
 
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index fa371914c3e..7d8c3b53115 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -15,7 +15,7 @@
 from cudf import Index, Series
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.core.buffer.spill_manager import get_global_manager
-from cudf.testing import _utils as utils
+from cudf.testing import _utils as utils, assert_eq
 from cudf.utils.dtypes import (
     BOOL_TYPES,
     DATETIME_TYPES,
@@ -194,7 +194,7 @@ def test_series_binop(binop, obj_class):
     if obj_class == "Index":
         result = Series(result)
 
-    utils.assert_eq(result, expect)
+    assert_eq(result, expect)
 
 
 @pytest.mark.parametrize("binop", _binops)
@@ -318,7 +318,7 @@ def test_series_compare_nulls(cmpop, dtypes):
     expect[expect_mask] = cmpop(lser[expect_mask], rser[expect_mask])
 
     got = cmpop(lser, rser)
-    utils.assert_eq(expect, got)
+    assert_eq(expect, got)
 
 
 @pytest.fixture
@@ -349,7 +349,7 @@ def test_str_series_compare_str(
         Series.from_pandas(str_series_cmp_data), "a"
     )
 
-    utils.assert_eq(expect, got.to_pandas(nullable=True))
+    assert_eq(expect, got.to_pandas(nullable=True))
 
 
 def test_str_series_compare_str_reflected(
@@ -360,7 +360,7 @@ def test_str_series_compare_str_reflected(
         "a", Series.from_pandas(str_series_cmp_data)
     )
 
-    utils.assert_eq(expect, got.to_pandas(nullable=True))
+    assert_eq(expect, got.to_pandas(nullable=True))
 
 
 def test_str_series_compare_num(
@@ -371,7 +371,7 @@ def test_str_series_compare_num(
         Series.from_pandas(str_series_cmp_data), cmp_scalar
     )
 
-    utils.assert_eq(expect, got.to_pandas(nullable=True))
+    assert_eq(expect, got.to_pandas(nullable=True))
 
 
 def test_str_series_compare_num_reflected(
@@ -382,7 +382,7 @@ def test_str_series_compare_num_reflected(
         cmp_scalar, Series.from_pandas(str_series_cmp_data)
     )
 
-    utils.assert_eq(expect, got.to_pandas(nullable=True))
+    assert_eq(expect, got.to_pandas(nullable=True))
 
 
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
@@ -612,12 +612,12 @@ def test_different_shapes_and_columns(binop):
     # Empty frame on the right side
     pd_frame = binop(pd.DataFrame({"x": [1, 2]}), pd.DataFrame({}))
     cd_frame = binop(cudf.DataFrame({"x": [1, 2]}), cudf.DataFrame({}))
-    utils.assert_eq(cd_frame, pd_frame)
+    assert_eq(cd_frame, pd_frame)
 
     # Empty frame on the left side
     pd_frame = pd.DataFrame({}) + pd.DataFrame({"x": [1, 2]})
     cd_frame = cudf.DataFrame({}) + cudf.DataFrame({"x": [1, 2]})
-    utils.assert_eq(cd_frame, pd_frame)
+    assert_eq(cd_frame, pd_frame)
 
     # Note: the below rely on a discrepancy between cudf and pandas
     # While pandas inserts columns in alphabetical order, cudf inserts in the
@@ -627,12 +627,12 @@ def test_different_shapes_and_columns(binop):
     # More rows on the left side
     pd_frame = pd.DataFrame({"x": [1, 2, 3]}) + pd.DataFrame({"y": [1, 2]})
     cd_frame = cudf.DataFrame({"x": [1, 2, 3]}) + cudf.DataFrame({"y": [1, 2]})
-    utils.assert_eq(cd_frame, pd_frame)
+    assert_eq(cd_frame, pd_frame)
 
     # More rows on the right side
     pd_frame = pd.DataFrame({"x": [1, 2]}) + pd.DataFrame({"y": [1, 2, 3]})
     cd_frame = cudf.DataFrame({"x": [1, 2]}) + cudf.DataFrame({"y": [1, 2, 3]})
-    utils.assert_eq(cd_frame, pd_frame)
+    assert_eq(cd_frame, pd_frame)
 
 
 @pytest.mark.parametrize("binop", _binops)
@@ -650,7 +650,7 @@ def test_different_shapes_and_same_columns(binop):
     )
     # cast x as float64 so it matches pandas dtype
     cd_frame["x"] = cd_frame["x"].astype(np.float64)
-    utils.assert_eq(cd_frame, pd_frame)
+    assert_eq(cd_frame, pd_frame)
 
 
 @pytest.mark.parametrize("binop", _binops)
@@ -680,7 +680,7 @@ def test_different_shapes_and_columns_with_unaligned_indices(binop):
     # cast x and y as float64 so it matches pandas dtype
     cd_frame["x"] = cd_frame["x"].astype(np.float64)
     cd_frame["y"] = cd_frame["y"].astype(np.float64)
-    utils.assert_eq(cd_frame, pd_frame)
+    assert_eq(cd_frame, pd_frame)
 
     pdf1 = pd.DataFrame({"x": [1, 1]}, index=["a", "a"])
     pdf2 = pd.DataFrame({"x": [2]}, index=["a"])
@@ -688,7 +688,7 @@ def test_different_shapes_and_columns_with_unaligned_indices(binop):
     gdf2 = cudf.DataFrame.from_pandas(pdf2)
     pd_frame = binop(pdf1, pdf2)
     cd_frame = binop(gdf1, gdf2)
-    utils.assert_eq(pd_frame, cd_frame)
+    assert_eq(pd_frame, cd_frame)
 
 
 @pytest.mark.parametrize(
@@ -717,12 +717,12 @@ def test_df_different_index_shape(df2, binop):
 def test_boolean_scalar_binop(op):
     psr = pd.Series(np.random.choice([True, False], 10))
     gsr = cudf.from_pandas(psr)
-    utils.assert_eq(op(psr, True), op(gsr, True))
-    utils.assert_eq(op(psr, False), op(gsr, False))
+    assert_eq(op(psr, True), op(gsr, True))
+    assert_eq(op(psr, False), op(gsr, False))
 
     # cuDF scalar
-    utils.assert_eq(op(psr, True), op(gsr, cudf.Scalar(True)))
-    utils.assert_eq(op(psr, False), op(gsr, cudf.Scalar(False)))
+    assert_eq(op(psr, True), op(gsr, cudf.Scalar(True)))
+    assert_eq(op(psr, False), op(gsr, cudf.Scalar(False)))
 
 
 @pytest.mark.parametrize("func", _operators_arithmetic)
@@ -747,7 +747,7 @@ def test_operator_func_between_series(dtype, func, has_nulls, fill_value):
         pdf_series_b, fill_value=fill_value
     )
 
-    utils.assert_eq(pdf_result, gdf_result)
+    assert_eq(pdf_result, gdf_result)
 
 
 @pytest.mark.parametrize("func", _operators_arithmetic)
@@ -773,7 +773,7 @@ def test_operator_func_series_and_scalar(
         scalar, fill_value=fill_value
     )
 
-    utils.assert_eq(pdf_series_result, gdf_series_result)
+    assert_eq(pdf_series_result, gdf_series_result)
 
 
 _permu_values = [0, 1, None, np.nan]
@@ -812,9 +812,9 @@ def test_operator_func_between_series_logical(
         and np.isnan(fill_value)
     ):
         with pytest.raises(AssertionError):
-            utils.assert_eq(expect, got)
+            assert_eq(expect, got)
         return
-    utils.assert_eq(expect, got)
+    assert_eq(expect, got)
 
 
 @pytest.mark.parametrize("dtype", ["float32", "float64"])
@@ -851,7 +851,7 @@ def test_operator_func_series_and_scalar_logical(
     expect = pdf_series_result
     got = gdf_series_result.to_pandas(nullable=True)
 
-    utils.assert_eq(expect, got)
+    assert_eq(expect, got)
 
 
 @pytest.mark.parametrize("func", _operators_arithmetic)
@@ -887,7 +887,7 @@ def gen_df():
     got = getattr(gdf1, func)(gdf2, fill_value=fill_value)
     expect = getattr(pdf1, func)(pdf2, fill_value=fill_value)[list(got._data)]
 
-    utils.assert_eq(expect, got)
+    assert_eq(expect, got)
 
 
 @pytest.mark.parametrize("func", _operators_comparison)
@@ -923,7 +923,7 @@ def gen_df():
     got = getattr(gdf1, func)(gdf2)
     expect = getattr(pdf1, func)(pdf2)[list(got._data)]
 
-    utils.assert_eq(expect, got)
+    assert_eq(expect, got)
 
 
 @pytest.mark.parametrize(
@@ -949,7 +949,7 @@ def gen_df():
 def test_binop_bool_uint(func, rhs):
     psr = pd.Series([True, False, False])
     gsr = cudf.from_pandas(psr)
-    utils.assert_eq(
+    assert_eq(
         getattr(psr, func)(rhs), getattr(gsr, func)(rhs), check_dtype=False
     )
 
@@ -977,7 +977,7 @@ def test_floordiv_zero_float64(series_dtype, divisor_dtype, scalar_divisor):
     else:
         pd_div = pd.Series([0], dtype=divisor_dtype)
         cudf_div = cudf.from_pandas(pd_div)
-    utils.assert_eq(sr // pd_div, cr // cudf_div)
+    assert_eq(sr // pd_div, cr // cudf_div)
 
 
 @pytest.mark.parametrize("scalar_divisor", [False, True])
@@ -1023,27 +1023,27 @@ def test_floordiv_zero_bool(scalar_divisor):
 def test_rmod_zero_nan(dtype):
     sr = pd.Series([1, 1, 0], dtype=dtype)
     cr = cudf.from_pandas(sr)
-    utils.assert_eq(1 % sr, 1 % cr)
+    assert_eq(1 % sr, 1 % cr)
     expected_dtype = np.float64 if cr.dtype.kind != "f" else dtype
-    utils.assert_eq(1 % cr, cudf.Series([0, 0, None], dtype=expected_dtype))
+    assert_eq(1 % cr, cudf.Series([0, 0, None], dtype=expected_dtype))
 
 
 def test_series_misc_binop():
     pds = pd.Series([1, 2, 4], name="abc xyz")
     gds = cudf.Series([1, 2, 4], name="abc xyz")
 
-    utils.assert_eq(pds + 1, gds + 1)
-    utils.assert_eq(1 + pds, 1 + gds)
+    assert_eq(pds + 1, gds + 1)
+    assert_eq(1 + pds, 1 + gds)
 
-    utils.assert_eq(pds + pds, gds + gds)
+    assert_eq(pds + pds, gds + gds)
 
     pds1 = pd.Series([1, 2, 4], name="hello world")
     gds1 = cudf.Series([1, 2, 4], name="hello world")
 
-    utils.assert_eq(pds + pds1, gds + gds1)
-    utils.assert_eq(pds1 + pds, gds1 + gds)
+    assert_eq(pds + pds1, gds + gds1)
+    assert_eq(pds1 + pds, gds1 + gds)
 
-    utils.assert_eq(pds1 + pds + 5, gds1 + gds + 5)
+    assert_eq(pds1 + pds + 5, gds1 + gds + 5)
 
 
 def test_int8_float16_binop():
@@ -1051,7 +1051,7 @@ def test_int8_float16_binop():
     b = np.float16(2)
     expect = cudf.Series([0.5])
     got = a / b
-    utils.assert_eq(expect, got, check_dtype=False)
+    assert_eq(expect, got, check_dtype=False)
 
 
 @pytest.mark.parametrize("dtype", ["int64", "float64", "str"])
@@ -1061,7 +1061,7 @@ def test_vector_to_none_binops(dtype):
     expect = Series([None] * 4).astype(dtype)
     got = data + None
 
-    utils.assert_eq(expect, got)
+    assert_eq(expect, got)
 
 
 def dtype_scalar(val, dtype):
@@ -1747,12 +1747,12 @@ def test_datetime_dateoffset_binaryop(
     expect = op(psr, poffset)
     got = op(gsr, goffset)
 
-    utils.assert_eq(expect, got)
+    assert_eq(expect, got)
 
     expect = op(psr, -poffset)
     got = op(gsr, -goffset)
 
-    utils.assert_eq(expect, got)
+    assert_eq(expect, got)
 
 
 @pytest.mark.parametrize(
@@ -1793,7 +1793,7 @@ def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op):
     expect = op(psr, poffset)
     got = op(gsr, goffset)
 
-    utils.assert_eq(expect, got)
+    assert_eq(expect, got)
 
 
 @pytest.mark.parametrize("n_periods", [0, 1, -1, 12, -12])
@@ -1840,7 +1840,7 @@ def test_datetime_dateoffset_binaryop_reflected(
 
     # TODO: Remove check_dtype once we get some clarity on:
     # https://github.com/pandas-dev/pandas/issues/57448
-    utils.assert_eq(expect, got, check_dtype=False)
+    assert_eq(expect, got, check_dtype=False)
 
     with pytest.raises(TypeError):
         poffset - psr
@@ -1878,7 +1878,7 @@ def test_binops_with_lhs_numpy_scalar(frame, dtype):
     expected = data.to_pandas() == val
     got = data == val
 
-    utils.assert_eq(expected, got)
+    assert_eq(expected, got)
 
 
 @pytest.mark.parametrize(
@@ -2302,7 +2302,7 @@ def test_binops_decimal(op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype):
 
     got = op(a, b)
     assert expect.dtype == got.dtype
-    utils.assert_eq(expect, got)
+    assert_eq(expect, got)
 
 
 @pytest.mark.parametrize(
@@ -2355,7 +2355,7 @@ def test_binops_reflect_decimal(
 
     got = getattr(a, op)(b)
     assert expect.dtype == got.dtype
-    utils.assert_eq(expect, got)
+    assert_eq(expect, got)
 
 
 @pytest.mark.parametrize("powers", [0, 1, 2, 3])
@@ -2371,7 +2371,7 @@ def test_binops_decimal_pow(powers):
     )
     ps = s.to_pandas()
 
-    utils.assert_eq(s**powers, ps**powers, check_dtype=False)
+    assert_eq(s**powers, ps**powers, check_dtype=False)
 
 
 def test_binops_raise_error():
@@ -2554,7 +2554,7 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected):
 
     actual = op(lhs, rhs)
 
-    utils.assert_eq(expected, actual)
+    assert_eq(expected, actual)
 
 
 @pytest.mark.parametrize(
@@ -2804,7 +2804,7 @@ def decimal_series(input, dtype):
 
     got = op(lhs, rhs)
     assert expect.dtype == got.dtype
-    utils.assert_eq(expect, got)
+    assert_eq(expect, got)
 
 
 @pytest.mark.parametrize(
@@ -2979,7 +2979,7 @@ def test_binops_decimal_scalar_compare(args, reflected):
 
     actual = op(lhs, rhs)
 
-    utils.assert_eq(expected, actual)
+    assert_eq(expected, actual)
 
 
 @pytest.mark.parametrize(
@@ -3042,7 +3042,7 @@ def test_equality_ops_index_mismatch(fn):
     expected = getattr(pa, fn)(pb)
     actual = getattr(a, fn)(b).to_pandas(nullable=True)
 
-    utils.assert_eq(expected, actual)
+    assert_eq(expected, actual)
 
 
 def generate_test_null_equals_columnops_data():
@@ -3132,7 +3132,7 @@ def test_empty_column(binop, data, scalar):
     got = binop(gdf, scalar)
     expected = binop(pdf, scalar)
 
-    utils.assert_eq(expected, got)
+    assert_eq(expected, got)
 
 
 @pytest.mark.parametrize(
@@ -3179,7 +3179,7 @@ def test_binops_dot(df, other):
     expected = pdf @ host_other
     got = df @ other
 
-    utils.assert_eq(expected, got)
+    assert_eq(expected, got)
 
 
 def test_binop_dot_preserve_index():
@@ -3187,7 +3187,7 @@ def test_binop_dot_preserve_index():
     df = cudf.DataFrame(np.eye(2), columns=["A", "B"], index=["A", "B"])
     result = ser @ df
     expected = ser.to_pandas() @ df.to_pandas()
-    utils.assert_eq(result, expected)
+    assert_eq(result, expected)
 
 
 def test_binop_series_with_repeated_index():
@@ -3198,7 +3198,7 @@ def test_binop_series_with_repeated_index():
     gsr2 = cudf.from_pandas(psr2)
     expected = psr1 - psr2
     got = gsr1 - gsr2
-    utils.assert_eq(expected, got)
+    assert_eq(expected, got)
 
 
 def test_binop_integer_power_series_series():
@@ -3209,7 +3209,7 @@ def test_binop_integer_power_series_series():
     ps_exponent = gs_exponent.to_pandas()
     expected = ps_base**ps_exponent
     got = gs_base**gs_exponent
-    utils.assert_eq(expected, got)
+    assert_eq(expected, got)
 
 
 def test_binop_integer_power_series_scalar():
@@ -3219,7 +3219,7 @@ def test_binop_integer_power_series_scalar():
     ps_base = gs_base.to_pandas()
     expected = ps_base**exponent.value
     got = gs_base**exponent
-    utils.assert_eq(expected, got)
+    assert_eq(expected, got)
 
 
 def test_binop_integer_power_series_int():
@@ -3229,7 +3229,7 @@ def test_binop_integer_power_series_int():
     ps_base = gs_base.to_pandas()
     expected = ps_base**exponent
     got = gs_base**exponent
-    utils.assert_eq(expected, got)
+    assert_eq(expected, got)
 
 
 def test_binop_integer_power_scalar_series():
@@ -3239,7 +3239,7 @@ def test_binop_integer_power_scalar_series():
     ps_exponent = gs_exponent.to_pandas()
     expected = base.value**ps_exponent
     got = base**gs_exponent
-    utils.assert_eq(expected, got)
+    assert_eq(expected, got)
 
 
 def test_binop_integer_power_scalar_scalar():
@@ -3248,7 +3248,7 @@ def test_binop_integer_power_scalar_scalar():
     exponent = cudf.Scalar(1)
     expected = base.value**exponent.value
     got = base**exponent
-    utils.assert_eq(expected, got)
+    assert_eq(expected, got)
 
 
 def test_binop_integer_power_scalar_int():
@@ -3257,7 +3257,7 @@ def test_binop_integer_power_scalar_int():
     exponent = 1
     expected = base.value**exponent
     got = base**exponent
-    utils.assert_eq(expected, got)
+    assert_eq(expected, got)
 
 
 def test_binop_integer_power_int_series():
@@ -3267,7 +3267,7 @@ def test_binop_integer_power_int_series():
     ps_exponent = gs_exponent.to_pandas()
     expected = base**ps_exponent
     got = base**gs_exponent
-    utils.assert_eq(expected, got)
+    assert_eq(expected, got)
 
 
 def test_binop_integer_power_int_scalar():
@@ -3276,7 +3276,7 @@ def test_binop_integer_power_int_scalar():
     exponent = cudf.Scalar(1)
     expected = base**exponent.value
     got = base**exponent
-    utils.assert_eq(expected, got)
+    assert_eq(expected, got)
 
 
 def test_numpy_int_scalar_binop():
@@ -3291,7 +3291,7 @@ def test_binop_index_series(op):
     actual = op(gi, gs)
     expected = op(gi.to_pandas(), gs.to_pandas())
 
-    utils.assert_eq(expected, actual)
+    assert_eq(expected, actual)
 
 
 @pytest.mark.parametrize("name1", utils.SERIES_OR_INDEX_NAMES)
@@ -3307,7 +3307,7 @@ def test_binop_index_dt_td_series_with_names(name1, name2):
         expected = gi.to_pandas() + gs.to_pandas()
     actual = gi + gs
 
-    utils.assert_eq(expected, actual)
+    assert_eq(expected, actual)
 
 
 @pytest.mark.parametrize("data1", [[1, 2, 3], [10, 11, None]])
@@ -3319,9 +3319,9 @@ def test_binop_eq_ne_index_series(data1, data2):
     actual = gi == gs
     expected = gi.to_pandas() == gs.to_pandas()
 
-    utils.assert_eq(expected, actual)
+    assert_eq(expected, actual)
 
     actual = gi != gs
     expected = gi.to_pandas() != gs.to_pandas()
 
-    utils.assert_eq(expected, actual)
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/tests/test_categorical.py b/python/cudf/cudf/tests/test_categorical.py
index c36595192e4..9b6029582ce 100644
--- a/python/cudf/cudf/tests/test_categorical.py
+++ b/python/cudf/cudf/tests/test_categorical.py
@@ -11,11 +11,8 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import (
-    NUMERIC_TYPES,
-    assert_eq,
-    assert_exceptions_equal,
-)
+from cudf.testing import assert_eq
+from cudf.testing._utils import NUMERIC_TYPES, assert_exceptions_equal
 
 
 @contextmanager
diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py
index a8a297c155f..ea919c786b9 100644
--- a/python/cudf/cudf/tests/test_column.py
+++ b/python/cudf/cudf/tests/test_column.py
@@ -9,7 +9,8 @@
 import cudf
 from cudf._lib.transform import mask_to_bools
 from cudf.core.column.column import as_column
-from cudf.testing._utils import assert_eq, assert_exceptions_equal
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 from cudf.utils import dtypes as dtypeutils
 
 dtypes = sorted(
diff --git a/python/cudf/cudf/tests/test_column_accessor.py b/python/cudf/cudf/tests/test_column_accessor.py
index f1f6097d6a9..f3343c37d1d 100644
--- a/python/cudf/cudf/tests/test_column_accessor.py
+++ b/python/cudf/cudf/tests/test_column_accessor.py
@@ -6,7 +6,7 @@
 
 import cudf
 from cudf.core.column_accessor import ColumnAccessor
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 simple_test_data = [
     {},
diff --git a/python/cudf/cudf/tests/test_concat.py b/python/cudf/cudf/tests/test_concat.py
index 4b43a33c8c8..c1c03de48d4 100644
--- a/python/cudf/cudf/tests/test_concat.py
+++ b/python/cudf/cudf/tests/test_concat.py
@@ -10,11 +10,8 @@
 
 import cudf
 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
-from cudf.testing._utils import (
-    assert_eq,
-    assert_exceptions_equal,
-    expect_warning_if,
-)
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
 @contextmanager
diff --git a/python/cudf/cudf/tests/test_contains.py b/python/cudf/cudf/tests/test_contains.py
index a65ab1780b6..fe86df99d35 100644
--- a/python/cudf/cudf/tests/test_contains.py
+++ b/python/cudf/cudf/tests/test_contains.py
@@ -9,12 +9,8 @@
 import cudf
 from cudf import Series
 from cudf.core.index import Index, RangeIndex
-from cudf.testing._utils import (
-    DATETIME_TYPES,
-    NUMERIC_TYPES,
-    TIMEDELTA_TYPES,
-    assert_eq,
-)
+from cudf.testing import assert_eq
+from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES
 
 
 def cudf_date_series(start, stop, freq):
diff --git a/python/cudf/cudf/tests/test_copying.py b/python/cudf/cudf/tests/test_copying.py
index 0bc9ffa8004..9b6f82ec705 100644
--- a/python/cudf/cudf/tests/test_copying.py
+++ b/python/cudf/cudf/tests/test_copying.py
@@ -8,7 +8,8 @@
 import cudf
 from cudf import Series
 from cudf.core.buffer.spill_manager import get_global_manager
-from cudf.testing._utils import NUMERIC_TYPES, OTHER_TYPES, assert_eq
+from cudf.testing import assert_eq
+from cudf.testing._utils import NUMERIC_TYPES, OTHER_TYPES
 
 pytestmark = pytest.mark.spilling
 
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index 5009a7f2628..09617306606 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -18,7 +18,8 @@
 import cudf
 from cudf import read_csv
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.testing._utils import assert_eq, assert_exceptions_equal
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 
 def make_numeric_dataframe(nrows, dtype):
diff --git a/python/cudf/cudf/tests/test_cuda_apply.py b/python/cudf/cudf/tests/test_cuda_apply.py
index 7fdf9754534..dc892caba3b 100644
--- a/python/cudf/cudf/tests/test_cuda_apply.py
+++ b/python/cudf/cudf/tests/test_cuda_apply.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 """
 Test method that apply GPU kernel to a frame.
@@ -9,7 +9,7 @@
 from numba import cuda
 
 from cudf import DataFrame
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize("nelem", [1, 2, 64, 128, 129])
diff --git a/python/cudf/cudf/tests/test_cuda_array_interface.py b/python/cudf/cudf/tests/test_cuda_array_interface.py
index 06d63561fc1..29f2f46e3c7 100644
--- a/python/cudf/cudf/tests/test_cuda_array_interface.py
+++ b/python/cudf/cudf/tests/test_cuda_array_interface.py
@@ -11,12 +11,8 @@
 
 import cudf
 from cudf.core.buffer.spill_manager import get_global_manager
-from cudf.testing._utils import (
-    DATETIME_TYPES,
-    NUMERIC_TYPES,
-    TIMEDELTA_TYPES,
-    assert_eq,
-)
+from cudf.testing import assert_eq
+from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES
 
 
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES)
diff --git a/python/cudf/cudf/tests/test_custom_accessor.py b/python/cudf/cudf/tests/test_custom_accessor.py
index 5ffe255d0f8..278e63f3e8b 100644
--- a/python/cudf/cudf/tests/test_custom_accessor.py
+++ b/python/cudf/cudf/tests/test_custom_accessor.py
@@ -4,7 +4,7 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @cudf.api.extensions.register_dataframe_accessor("point")
diff --git a/python/cudf/cudf/tests/test_cut.py b/python/cudf/cudf/tests/test_cut.py
index 24c1eaa8f02..3f31da035aa 100644
--- a/python/cudf/cudf/tests/test_cut.py
+++ b/python/cudf/cudf/tests/test_cut.py
@@ -9,7 +9,7 @@
 import pytest
 
 from cudf.core.cut import cut
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 3661e13bd39..05ee8346afa 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -30,14 +30,12 @@
 from cudf.core.buffer.spill_manager import get_global_manager
 from cudf.core.column import column
 from cudf.errors import MixedTypeError
-from cudf.testing import _utils as utils
+from cudf.testing import _utils as utils, assert_eq, assert_neq
 from cudf.testing._utils import (
     ALL_TYPES,
     DATETIME_TYPES,
     NUMERIC_TYPES,
-    assert_eq,
     assert_exceptions_equal,
-    assert_neq,
     does_not_raise,
     expect_warning_if,
     gen_rand,
@@ -3660,6 +3658,12 @@ def test_dataframe_mulitindex_sort_index(
         assert_eq(expected, got)
 
 
+def test_sort_index_axis_1_ignore_index_true_columnaccessor_state_names():
+    gdf = cudf.DataFrame([[1, 2, 3]], columns=["b", "a", "c"])
+    result = gdf.sort_index(axis=1, ignore_index=True)
+    assert result._data.names == tuple(result._data.keys())
+
+
 @pytest.mark.parametrize("dtype", dtypes + ["category"])
 def test_dataframe_0_row_dtype(dtype):
     if dtype == "category":
diff --git a/python/cudf/cudf/tests/test_dataframe_copy.py b/python/cudf/cudf/tests/test_dataframe_copy.py
index fec52d82ab1..45bd31ef58e 100644
--- a/python/cudf/cudf/tests/test_dataframe_copy.py
+++ b/python/cudf/cudf/tests/test_dataframe_copy.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 from copy import copy, deepcopy
 
 import cupy as cp
@@ -7,7 +7,8 @@
 import pytest
 
 from cudf.core.dataframe import DataFrame
-from cudf.testing._utils import ALL_TYPES, assert_eq, assert_neq
+from cudf.testing import assert_eq, assert_neq
+from cudf.testing._utils import ALL_TYPES
 
 """
 DataFrame copy expectations
diff --git a/python/cudf/cudf/tests/test_datasets.py b/python/cudf/cudf/tests/test_datasets.py
index 8e5e5ab66c4..7f4e249a6d7 100644
--- a/python/cudf/cudf/tests/test_datasets.py
+++ b/python/cudf/cudf/tests/test_datasets.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 def test_dataset_timeseries():
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index e3ecaafae5b..092e9790c63 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -15,10 +15,10 @@
 from cudf import DataFrame, Series
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.core.index import DatetimeIndex
+from cudf.testing import assert_eq
 from cudf.testing._utils import (
     DATETIME_TYPES,
     NUMERIC_TYPES,
-    assert_eq,
     assert_exceptions_equal,
     expect_warning_if,
 )
diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py
index 0745e5aba48..c41a938f6ea 100644
--- a/python/cudf/cudf/tests/test_decimal.py
+++ b/python/cudf/cudf/tests/test_decimal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 import decimal
 from decimal import Decimal
@@ -11,12 +11,12 @@
 import cudf
 from cudf.core.column import Decimal32Column, Decimal64Column, NumericalColumn
 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype
+from cudf.testing import assert_eq
 from cudf.testing._utils import (
     FLOAT_TYPES,
     INTEGER_TYPES,
     SIGNED_TYPES,
     _decimal_series,
-    assert_eq,
     expect_warning_if,
 )
 
diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index 8ce4da792a4..7f48e414180 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -20,7 +20,7 @@
     from_dataframe,
     protocol_dtype_to_cupy_dtype,
 )
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.fixture(
diff --git a/python/cudf/cudf/tests/test_dlpack.py b/python/cudf/cudf/tests/test_dlpack.py
index 7ea3979b0f1..ebcc35784ee 100644
--- a/python/cudf/cudf/tests/test_dlpack.py
+++ b/python/cudf/cudf/tests/test_dlpack.py
@@ -9,7 +9,7 @@
 from packaging import version
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 nelems = [0, 3, 10]
 dtype = [np.uint16, np.int32, np.float64]
diff --git a/python/cudf/cudf/tests/test_dropna.py b/python/cudf/cudf/tests/test_dropna.py
index c3c8ed922f0..ed0cf0053ea 100644
--- a/python/cudf/cudf/tests/test_dropna.py
+++ b/python/cudf/cudf/tests/test_dropna.py
@@ -5,7 +5,7 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_dtypes.py b/python/cudf/cudf/tests/test_dtypes.py
index 0efd8d9781c..edb534a3618 100644
--- a/python/cudf/cudf/tests/test_dtypes.py
+++ b/python/cudf/cudf/tests/test_dtypes.py
@@ -17,7 +17,7 @@
     ListDtype,
     StructDtype,
 )
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 from cudf.utils.dtypes import np_to_pa_dtype
 
 
diff --git a/python/cudf/cudf/tests/test_duplicates.py b/python/cudf/cudf/tests/test_duplicates.py
index 161b245953b..0b4ed52ba96 100644
--- a/python/cudf/cudf/tests/test_duplicates.py
+++ b/python/cudf/cudf/tests/test_duplicates.py
@@ -9,7 +9,8 @@
 
 import cudf
 from cudf import concat
-from cudf.testing._utils import assert_eq, assert_exceptions_equal
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 # most tests are similar to pandas drop_duplicates
 
diff --git a/python/cudf/cudf/tests/test_ewm.py b/python/cudf/cudf/tests/test_ewm.py
new file mode 100644
index 00000000000..6cb3c19d5a8
--- /dev/null
+++ b/python/cudf/cudf/tests/test_ewm.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        [1.0, 2.0, 3.0, 4.0, 5.0],
+        [5.0, cudf.NA, 3.0, cudf.NA, 8.5],
+        [5.0, cudf.NA, 3.0, cudf.NA, cudf.NA, 4.5],
+        [5.0, cudf.NA, 3.0, 4.0, cudf.NA, 5.0],
+    ],
+)
+@pytest.mark.parametrize(
+    "params",
+    [
+        {"com": 0.1},
+        {"com": 0.5},
+        {"span": 1.5},
+        {"span": 2.5},
+        {"halflife": 0.5},
+        {"halflife": 1.5},
+        {"alpha": 0.1},
+        {"alpha": 0.5},
+    ],
+)
+@pytest.mark.parametrize("adjust", [True, False])
+def test_ewma(data, params, adjust):
+    """
+    The most basic test asserts that we obtain
+    the same numerical values as pandas for various
+    sets of keyword arguemnts that effect the raw
+    coefficients of the formula
+    """
+    params["adjust"] = adjust
+
+    gsr = cudf.Series(data, dtype="float64")
+    psr = gsr.to_pandas()
+
+    expect = psr.ewm(**params).mean()
+    got = gsr.ewm(**params).mean()
+
+    assert_eq(expect, got)
diff --git a/python/cudf/cudf/tests/test_factorize.py b/python/cudf/cudf/tests/test_factorize.py
index f8782681f62..47f9180dcb1 100644
--- a/python/cudf/cudf/tests/test_factorize.py
+++ b/python/cudf/cudf/tests/test_factorize.py
@@ -7,7 +7,7 @@
 
 import cudf
 from cudf import DataFrame, Index
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize("ncats,nelem", [(2, 2), (2, 10), (10, 100)])
diff --git a/python/cudf/cudf/tests/test_feather.py b/python/cudf/cudf/tests/test_feather.py
index 12a325fa4e8..7e5523bb8c7 100644
--- a/python/cudf/cudf/tests/test_feather.py
+++ b/python/cudf/cudf/tests/test_feather.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 import os
 from string import ascii_letters
@@ -9,7 +9,8 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import NUMERIC_TYPES, assert_eq
+from cudf.testing import assert_eq
+from cudf.testing._utils import NUMERIC_TYPES
 
 
 @pytest.fixture(params=[0, 1, 10, 100])
diff --git a/python/cudf/cudf/tests/test_gcs.py b/python/cudf/cudf/tests/test_gcs.py
index a677ace18ec..fc22d8bc0ea 100644
--- a/python/cudf/cudf/tests/test_gcs.py
+++ b/python/cudf/cudf/tests/test_gcs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import io
 import os
@@ -8,7 +8,7 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 gcsfs = pytest.importorskip("gcsfs")
 
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 674f694a224..826a0e52f57 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -28,11 +28,11 @@
 from cudf.core.udf._ops import arith_ops, comparison_ops, unary_ops
 from cudf.core.udf.groupby_typing import SUPPORTED_GROUPBY_NUMPY_TYPES
 from cudf.core.udf.utils import UDFError, precompiled
+from cudf.testing import assert_eq
 from cudf.testing._utils import (
     DATETIME_TYPES,
     SIGNED_TYPES,
     TIMEDELTA_TYPES,
-    assert_eq,
     assert_exceptions_equal,
     expect_warning_if,
 )
diff --git a/python/cudf/cudf/tests/test_hdf.py b/python/cudf/cudf/tests/test_hdf.py
index d420c95cfb4..430ed973f19 100644
--- a/python/cudf/cudf/tests/test_hdf.py
+++ b/python/cudf/cudf/tests/test_hdf.py
@@ -8,7 +8,8 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import NUMERIC_TYPES, UNSIGNED_TYPES, assert_eq
+from cudf.testing import assert_eq
+from cudf.testing._utils import NUMERIC_TYPES, UNSIGNED_TYPES
 
 pytest.importorskip("tables")
 
diff --git a/python/cudf/cudf/tests/test_hdfs.py b/python/cudf/cudf/tests/test_hdfs.py
index f8de16f8609..098b5192d4a 100644
--- a/python/cudf/cudf/tests/test_hdfs.py
+++ b/python/cudf/cudf/tests/test_hdfs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import os
 from io import BytesIO
@@ -10,7 +10,7 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 if not os.environ.get("RUN_HDFS_TESTS"):
     pytestmark = pytest.mark.skip("Env not configured to run HDFS tests")
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index a59836df5ba..05dcd85df6a 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -18,6 +18,7 @@
 from cudf.api.extensions import no_default
 from cudf.api.types import is_bool_dtype
 from cudf.core.index import CategoricalIndex, DatetimeIndex, Index, RangeIndex
+from cudf.testing import assert_eq
 from cudf.testing._utils import (
     ALL_TYPES,
     FLOAT_TYPES,
@@ -28,7 +29,6 @@
     UNSIGNED_TYPES,
     assert_column_memory_eq,
     assert_column_memory_ne,
-    assert_eq,
     assert_exceptions_equal,
     expect_warning_if,
 )
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 009e48a8669..7005cbc6834 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -11,10 +11,9 @@
 
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.testing import _utils as utils
+from cudf.testing import _utils as utils, assert_eq
 from cudf.testing._utils import (
     INTEGER_TYPES,
-    assert_eq,
     assert_exceptions_equal,
     expect_warning_if,
 )
diff --git a/python/cudf/cudf/tests/test_interpolate.py b/python/cudf/cudf/tests/test_interpolate.py
index a0e90cc89a2..4a0dc331e1a 100644
--- a/python/cudf/cudf/tests/test_interpolate.py
+++ b/python/cudf/cudf/tests/test_interpolate.py
@@ -4,11 +4,8 @@
 
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.testing._utils import (
-    assert_eq,
-    assert_exceptions_equal,
-    expect_warning_if,
-)
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_interval.py b/python/cudf/cudf/tests/test_interval.py
index 013f4439ad5..1b395c09ba8 100644
--- a/python/cudf/cudf/tests/test_interval.py
+++ b/python/cudf/cudf/tests/test_interval.py
@@ -6,7 +6,7 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_join_order.py b/python/cudf/cudf/tests/test_join_order.py
index 8d71a6c05b8..9ea4ba007d2 100644
--- a/python/cudf/cudf/tests/test_join_order.py
+++ b/python/cudf/cudf/tests/test_join_order.py
@@ -8,7 +8,7 @@
 
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.fixture(params=[False, True], ids=["unsorted", "sorted"])
diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index f36774daab2..b1ce69e58ef 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -9,11 +9,11 @@
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.core.dtypes import CategoricalDtype, Decimal64Dtype, Decimal128Dtype
+from cudf.testing import assert_eq
 from cudf.testing._utils import (
     INTEGER_TYPES,
     NUMERIC_TYPES,
     TIMEDELTA_TYPES,
-    assert_eq,
     assert_exceptions_equal,
     expect_warning_if,
 )
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index ba6a8f94719..297040b6d95 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -14,11 +14,11 @@
 
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
+from cudf.testing import assert_eq
 from cudf.testing._utils import (
     DATETIME_TYPES,
     NUMERIC_TYPES,
     TIMEDELTA_TYPES,
-    assert_eq,
     expect_warning_if,
 )
 
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index f04cb8a91a4..f76143cb381 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -12,12 +12,8 @@
 from cudf import NA
 from cudf._lib.copying import get_element
 from cudf.api.types import is_scalar
-from cudf.testing._utils import (
-    DATETIME_TYPES,
-    NUMERIC_TYPES,
-    TIMEDELTA_TYPES,
-    assert_eq,
-)
+from cudf.testing import assert_eq
+from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_monotonic.py b/python/cudf/cudf/tests/test_monotonic.py
index 3c627a5fe89..0896d91570e 100644
--- a/python/cudf/cudf/tests/test_monotonic.py
+++ b/python/cudf/cudf/tests/test_monotonic.py
@@ -12,7 +12,7 @@
 import cudf
 from cudf import Index, MultiIndex, Series
 from cudf.core.index import CategoricalIndex, DatetimeIndex, RangeIndex
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize("testrange", [(10, 20, 1), (0, -10, -1), (5, 5, 1)])
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
index 7b95e4f9a44..07c2e9c3fcf 100644
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ b/python/cudf/cudf/tests/test_multiindex.py
@@ -21,12 +21,8 @@
 import cudf
 from cudf.api.extensions import no_default
 from cudf.core.column import as_column
-from cudf.testing._utils import (
-    assert_eq,
-    assert_exceptions_equal,
-    assert_neq,
-    expect_warning_if,
-)
+from cudf.testing import assert_eq, assert_neq
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
 @contextmanager
diff --git a/python/cudf/cudf/tests/test_numerical.py b/python/cudf/cudf/tests/test_numerical.py
index 03081208739..1b0589254f5 100644
--- a/python/cudf/cudf/tests/test_numerical.py
+++ b/python/cudf/cudf/tests/test_numerical.py
@@ -5,7 +5,8 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import NUMERIC_TYPES, assert_eq, expect_warning_if
+from cudf.testing import assert_eq
+from cudf.testing._utils import NUMERIC_TYPES, expect_warning_if
 from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
 
 
diff --git a/python/cudf/cudf/tests/test_numpy_interop.py b/python/cudf/cudf/tests/test_numpy_interop.py
index 46324a85bb4..fa664d52ecf 100644
--- a/python/cudf/cudf/tests/test_numpy_interop.py
+++ b/python/cudf/cudf/tests/test_numpy_interop.py
@@ -1,10 +1,10 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 import numpy as np
 import pytest
 
 from cudf import DataFrame, Series
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 def test_to_records_noindex():
diff --git a/python/cudf/cudf/tests/test_onehot.py b/python/cudf/cudf/tests/test_onehot.py
index cd0055ad78b..154e1e19072 100644
--- a/python/cudf/cudf/tests/test_onehot.py
+++ b/python/cudf/cudf/tests/test_onehot.py
@@ -7,7 +7,7 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 pytestmark = pytest.mark.spilling
 
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index b83b8f08a8b..e0884a5819a 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -15,9 +15,8 @@
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.io.orc import ORCWriter
-from cudf.testing import assert_frame_equal
+from cudf.testing import assert_eq, assert_frame_equal
 from cudf.testing._utils import (
-    assert_eq,
     expect_warning_if,
     gen_rand_series,
     supported_numpy_dtypes,
diff --git a/python/cudf/cudf/tests/test_pack.py b/python/cudf/cudf/tests/test_pack.py
index da506a8d5b2..ad78621c5fa 100644
--- a/python/cudf/cudf/tests/test_pack.py
+++ b/python/cudf/cudf/tests/test_pack.py
@@ -20,7 +20,7 @@
 
 from cudf import DataFrame, Index, Series
 from cudf._lib.copying import pack, unpack
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 def test_sizeof_packed_dataframe():
diff --git a/python/cudf/cudf/tests/test_pandas_interop.py b/python/cudf/cudf/tests/test_pandas_interop.py
index 78cf5b998e8..5782437e394 100644
--- a/python/cudf/cudf/tests/test_pandas_interop.py
+++ b/python/cudf/cudf/tests/test_pandas_interop.py
@@ -1,11 +1,11 @@
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 import numpy as np
 import pandas as pd
 
 import cudf
 from cudf import DataFrame
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 def test_to_pandas():
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 2596fe8cd37..e1e7952605b 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -28,12 +28,8 @@
     ParquetWriter,
     merge_parquet_filemetadata,
 )
-from cudf.testing import dataset_generator as dg
-from cudf.testing._utils import (
-    TIMEDELTA_TYPES,
-    assert_eq,
-    set_random_null_mask_inplace,
-)
+from cudf.testing import assert_eq, dataset_generator as dg
+from cudf.testing._utils import TIMEDELTA_TYPES, set_random_null_mask_inplace
 
 
 @contextmanager
@@ -2947,6 +2943,61 @@ def test_per_column_options_string_col(tmpdir, encoding):
     assert encoding in fmd.row_group(0).column(0).encodings
 
 
+@pytest.mark.parametrize(
+    "num_rows",
+    [200, 10000],
+)
+def test_parquet_bss_round_trip(tmpdir, num_rows):
+    def flba(i):
+        hasher = hashlib.sha256()
+        hasher.update(i.to_bytes(4, "little"))
+        return hasher.digest()
+
+    # use pyarrow to write table of types that support BYTE_STREAM_SPLIT encoding
+    rows_per_rowgroup = 5000
+    fixed_data = pa.array(
+        [flba(i) for i in range(num_rows)], type=pa.binary(32)
+    )
+    i32_data = pa.array(list(range(num_rows)), type=pa.int32())
+    i64_data = pa.array(list(range(num_rows)), type=pa.int64())
+    f32_data = pa.array([float(i) for i in range(num_rows)], type=pa.float32())
+    f64_data = pa.array([float(i) for i in range(num_rows)], type=pa.float64())
+    padf = pa.Table.from_arrays(
+        [fixed_data, i32_data, i64_data, f32_data, f64_data],
+        names=["flba", "i32", "i64", "f32", "f64"],
+    )
+    padf_fname = tmpdir.join("padf.parquet")
+    pq.write_table(
+        padf,
+        padf_fname,
+        column_encoding="BYTE_STREAM_SPLIT",
+        use_dictionary=False,
+        row_group_size=rows_per_rowgroup,
+    )
+
+    # round trip data with cudf
+    cdf = cudf.read_parquet(padf_fname)
+    cdf_fname = tmpdir.join("cdf.parquet")
+    cdf.to_parquet(
+        cdf_fname,
+        column_type_length={"flba": 32},
+        column_encoding={
+            "flba": "BYTE_STREAM_SPLIT",
+            "i32": "BYTE_STREAM_SPLIT",
+            "i64": "BYTE_STREAM_SPLIT",
+            "f32": "BYTE_STREAM_SPLIT",
+            "f64": "BYTE_STREAM_SPLIT",
+        },
+        row_group_size_rows=rows_per_rowgroup,
+    )
+
+    # now read back in with pyarrow to test it was written properly by cudf
+    padf2 = pq.read_table(padf_fname)
+    padf3 = pq.read_table(cdf_fname)
+    assert_eq(padf2, padf3)
+    assert_eq(padf2.schema[0].type, padf3.schema[0].type)
+
+
 def test_parquet_reader_rle_boolean(datadir):
     fname = datadir / "rle_boolean_encoding.parquet"
 
diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py
index 13a07ef8adc..719e8a33285 100644
--- a/python/cudf/cudf/tests/test_pickling.py
+++ b/python/cudf/cudf/tests/test_pickling.py
@@ -8,7 +8,7 @@
 
 from cudf import DataFrame, Index, RangeIndex, Series
 from cudf.core.buffer import as_buffer
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 pytestmark = pytest.mark.spilling
 
diff --git a/python/cudf/cudf/tests/test_quantiles.py b/python/cudf/cudf/tests/test_quantiles.py
index 8b126073a0f..7d8303df0c3 100644
--- a/python/cudf/cudf/tests/test_quantiles.py
+++ b/python/cudf/cudf/tests/test_quantiles.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import re
 
@@ -6,7 +6,8 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq, assert_exceptions_equal
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 
 def test_single_q():
diff --git a/python/cudf/cudf/tests/test_query.py b/python/cudf/cudf/tests/test_query.py
index cf9e70d85c7..b12209fd3b9 100644
--- a/python/cudf/cudf/tests/test_query.py
+++ b/python/cudf/cudf/tests/test_query.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
 
 
 import datetime
@@ -11,7 +11,7 @@
 
 import cudf
 from cudf import DataFrame
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 from cudf.utils import queryutils
 
 _params_query_parser = []
diff --git a/python/cudf/cudf/tests/test_query_mask.py b/python/cudf/cudf/tests/test_query_mask.py
index ae5171f28d4..9372681187d 100644
--- a/python/cudf/cudf/tests/test_query_mask.py
+++ b/python/cudf/cudf/tests/test_query_mask.py
@@ -1,11 +1,11 @@
-# Copyright (c) 2019-2022, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 import numpy as np
 import pandas as pd
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 _data = [
     {"a": [0, 1.0, 2.0, None, np.nan, None, 3, 5]},
diff --git a/python/cudf/cudf/tests/test_rank.py b/python/cudf/cudf/tests/test_rank.py
index 1a5f25e320f..4c1d8ce92ae 100644
--- a/python/cudf/cudf/tests/test_rank.py
+++ b/python/cudf/cudf/tests/test_rank.py
@@ -7,7 +7,8 @@
 import pytest
 
 from cudf import DataFrame
-from cudf.testing._utils import assert_eq, assert_exceptions_equal
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 
 @pytest.fixture
diff --git a/python/cudf/cudf/tests/test_reductions.py b/python/cudf/cudf/tests/test_reductions.py
index c6ffa1d2bc7..1247fa362ce 100644
--- a/python/cudf/cudf/tests/test_reductions.py
+++ b/python/cudf/cudf/tests/test_reductions.py
@@ -11,13 +11,8 @@
 import cudf
 from cudf import Series
 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
-from cudf.testing import _utils as utils
-from cudf.testing._utils import (
-    NUMERIC_TYPES,
-    assert_eq,
-    expect_warning_if,
-    gen_rand,
-)
+from cudf.testing import _utils as utils, assert_eq
+from cudf.testing._utils import NUMERIC_TYPES, expect_warning_if, gen_rand
 
 params_dtype = NUMERIC_TYPES
 
diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py
index 9466398964a..d4fe5ff3bb5 100644
--- a/python/cudf/cudf/tests/test_replace.py
+++ b/python/cudf/cudf/tests/test_replace.py
@@ -12,10 +12,10 @@
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
+from cudf.testing import assert_eq
 from cudf.testing._utils import (
     INTEGER_TYPES,
     NUMERIC_TYPES,
-    assert_eq,
     assert_exceptions_equal,
     expect_warning_if,
 )
diff --git a/python/cudf/cudf/tests/test_resampling.py b/python/cudf/cudf/tests/test_resampling.py
index d7a3fea1273..95fa8e9a50a 100644
--- a/python/cudf/cudf/tests/test_resampling.py
+++ b/python/cudf/cudf/tests/test_resampling.py
@@ -5,7 +5,7 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 def assert_resample_results_equal(lhs, rhs, **kwargs):
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index daa1e70808f..50db4302b75 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -10,12 +10,8 @@
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.core.buffer.spill_manager import get_global_manager
-from cudf.testing._utils import (
-    ALL_TYPES,
-    DATETIME_TYPES,
-    NUMERIC_TYPES,
-    assert_eq,
-)
+from cudf.testing import assert_eq
+from cudf.testing._utils import ALL_TYPES, DATETIME_TYPES, NUMERIC_TYPES
 
 pytest_xfail = pytest.mark.xfail
 pytestmark = pytest.mark.spilling
diff --git a/python/cudf/cudf/tests/test_rolling.py b/python/cudf/cudf/tests/test_rolling.py
index 1d1d7ae8d29..135870f7359 100644
--- a/python/cudf/cudf/tests/test_rolling.py
+++ b/python/cudf/cudf/tests/test_rolling.py
@@ -7,7 +7,7 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 from cudf.testing.dataset_generator import rand_dataframe
 
 
diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py
index cdce17eeb76..a44bf791767 100644
--- a/python/cudf/cudf/tests/test_s3.py
+++ b/python/cudf/cudf/tests/test_s3.py
@@ -12,7 +12,7 @@
 from fsspec.core import get_fs_token_paths
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 moto = pytest.importorskip("moto", minversion="3.1.6")
 boto3 = pytest.importorskip("boto3")
diff --git a/python/cudf/cudf/tests/test_scan.py b/python/cudf/cudf/tests/test_scan.py
index 4cbc2197cfd..b76566b00e2 100644
--- a/python/cudf/cudf/tests/test_scan.py
+++ b/python/cudf/cudf/tests/test_scan.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 from itertools import product
 
@@ -8,12 +8,8 @@
 
 import cudf
 from cudf.core.dtypes import Decimal32Dtype, Decimal64Dtype, Decimal128Dtype
-from cudf.testing._utils import (
-    INTEGER_TYPES,
-    NUMERIC_TYPES,
-    assert_eq,
-    gen_rand,
-)
+from cudf.testing import assert_eq
+from cudf.testing._utils import INTEGER_TYPES, NUMERIC_TYPES, gen_rand
 
 params_sizes = [0, 1, 2, 5]
 
diff --git a/python/cudf/cudf/tests/test_search.py b/python/cudf/cudf/tests/test_search.py
index 3ba652ff6c0..65943518113 100644
--- a/python/cudf/cudf/tests/test_search.py
+++ b/python/cudf/cudf/tests/test_search.py
@@ -5,7 +5,8 @@
 import pytest
 
 import cudf
-from cudf.testing._utils import assert_eq, gen_rand, random_bitmask
+from cudf.testing import assert_eq
+from cudf.testing._utils import gen_rand, random_bitmask
 
 
 @pytest.mark.parametrize("side", ["left", "right"])
diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py
index f26d78e7783..0b892a51895 100644
--- a/python/cudf/cudf/tests/test_serialize.py
+++ b/python/cudf/cudf/tests/test_serialize.py
@@ -9,8 +9,7 @@
 import pytest
 
 import cudf
-from cudf.testing import _utils as utils
-from cudf.testing._utils import assert_eq
+from cudf.testing import _utils as utils, assert_eq
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 52956c230ba..467d0c46ae7 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -17,11 +17,11 @@
 from cudf.api.extensions import no_default
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.errors import MixedTypeError
+from cudf.testing import assert_eq
 from cudf.testing._utils import (
     NUMERIC_TYPES,
     SERIES_OR_INDEX_NAMES,
     TIMEDELTA_TYPES,
-    assert_eq,
     assert_exceptions_equal,
     expect_warning_if,
     gen_rand,
@@ -1054,6 +1054,18 @@ def test_fillna_with_nan(data, nan_as_null, fill_value):
     assert_eq(expected, actual)
 
 
+def test_fillna_categorical_with_non_categorical_raises():
+    ser = cudf.Series([1, None], dtype="category")
+    with pytest.raises(TypeError):
+        ser.fillna(cudf.Series([1, 2]))
+
+
+def test_fillna_categorical_with_different_categories_raises():
+    ser = cudf.Series([1, None], dtype="category")
+    with pytest.raises(TypeError):
+        ser.fillna(cudf.Series([1, 2]), dtype="category")
+
+
 def test_series_mask_mixed_dtypes_error():
     s = cudf.Series(["a", "b", "c"])
     with pytest.raises(
diff --git a/python/cudf/cudf/tests/test_seriesmap.py b/python/cudf/cudf/tests/test_seriesmap.py
index 9da08e483c9..3d8b6a79d2a 100644
--- a/python/cudf/cudf/tests/test_seriesmap.py
+++ b/python/cudf/cudf/tests/test_seriesmap.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 from itertools import product
 from math import floor
@@ -9,7 +9,8 @@
 
 import cudf
 from cudf import Series
-from cudf.testing._utils import assert_eq, assert_exceptions_equal
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 
 def test_series_map_basic():
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index ff2f7bd41f2..69122cdbafa 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -6,11 +6,8 @@
 
 import cudf
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-from cudf.testing._utils import (
-    assert_eq,
-    assert_exceptions_equal,
-    expect_warning_if,
-)
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 
 @pytest.mark.parametrize("df", [pd.DataFrame({"a": [1, 2, 3]})])
diff --git a/python/cudf/cudf/tests/test_sorting.py b/python/cudf/cudf/tests/test_sorting.py
index 449f21721f4..a8ffce6e88b 100644
--- a/python/cudf/cudf/tests/test_sorting.py
+++ b/python/cudf/cudf/tests/test_sorting.py
@@ -10,10 +10,10 @@
 from cudf import DataFrame, Series
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.core.column import NumericalColumn
+from cudf.testing import assert_eq
 from cudf.testing._utils import (
     DATETIME_TYPES,
     NUMERIC_TYPES,
-    assert_eq,
     assert_exceptions_equal,
     expect_warning_if,
 )
diff --git a/python/cudf/cudf/tests/test_spilling.py b/python/cudf/cudf/tests/test_spilling.py
index 59b8e6d2e70..7af83a99d60 100644
--- a/python/cudf/cudf/tests/test_spilling.py
+++ b/python/cudf/cudf/tests/test_spilling.py
@@ -39,7 +39,7 @@
     SpillableBufferOwner,
     SpillLock,
 )
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 if get_global_manager() is not None:
     pytest.skip(
diff --git a/python/cudf/cudf/tests/test_stats.py b/python/cudf/cudf/tests/test_stats.py
index 27811d0fcde..d5f63fdab77 100644
--- a/python/cudf/cudf/tests/test_stats.py
+++ b/python/cudf/cudf/tests/test_stats.py
@@ -11,11 +11,8 @@
 from cudf.api.extensions import no_default
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.datasets import randomdata
-from cudf.testing._utils import (
-    assert_eq,
-    assert_exceptions_equal,
-    expect_warning_if,
-)
+from cudf.testing import assert_eq
+from cudf.testing._utils import assert_exceptions_equal, expect_warning_if
 
 params_dtypes = [np.int32, np.uint32, np.float32, np.float64]
 methods = ["min", "max", "sum", "mean", "var", "std"]
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 801c530da43..f447759d010 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -17,10 +17,10 @@
 from cudf import concat
 from cudf.core.column.string import StringColumn
 from cudf.core.index import Index
+from cudf.testing import assert_eq
 from cudf.testing._utils import (
     DATETIME_TYPES,
     NUMERIC_TYPES,
-    assert_eq,
     assert_exceptions_equal,
 )
 from cudf.utils import dtypes as dtypeutils
diff --git a/python/cudf/cudf/tests/test_string_udfs.py b/python/cudf/cudf/tests/test_string_udfs.py
index 5dbb86fe27d..4432d2afc8e 100644
--- a/python/cudf/cudf/tests/test_string_udfs.py
+++ b/python/cudf/cudf/tests/test_string_udfs.py
@@ -21,7 +21,8 @@
     udf_string,
 )
 from cudf.core.udf.utils import _get_extensionty_size, _ptx_file
-from cudf.testing._utils import assert_eq, sv_to_udf_str
+from cudf.testing import assert_eq
+from cudf.testing._utils import sv_to_udf_str
 from cudf.utils._numba import _CUDFNumbaConfig
 
 _PTX_FILE = _ptx_file()
diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py
index 60d9516f385..e91edc9eec6 100644
--- a/python/cudf/cudf/tests/test_struct.py
+++ b/python/cudf/cudf/tests/test_struct.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
 import numpy as np
 import pandas as pd
@@ -7,7 +7,8 @@
 
 import cudf
 from cudf.core.dtypes import StructDtype
-from cudf.testing._utils import DATETIME_TYPES, TIMEDELTA_TYPES, assert_eq
+from cudf.testing import assert_eq
+from cudf.testing._utils import DATETIME_TYPES, TIMEDELTA_TYPES
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_testing.py b/python/cudf/cudf/tests/test_testing.py
index 1994536f395..c3620db3880 100644
--- a/python/cudf/cudf/tests/test_testing.py
+++ b/python/cudf/cudf/tests/test_testing.py
@@ -17,9 +17,8 @@
     OTHER_TYPES,
     assert_column_memory_eq,
     assert_column_memory_ne,
-    assert_eq,
 )
-from cudf.testing.testing import assert_column_equal
+from cudf.testing.testing import assert_column_equal, assert_eq
 
 
 @pytest.fixture(
diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py
index 0c591965361..c4a2349f535 100644
--- a/python/cudf/cudf/tests/test_timedelta.py
+++ b/python/cudf/cudf/tests/test_timedelta.py
@@ -9,8 +9,8 @@
 import pytest
 
 import cudf
-from cudf.testing import _utils as utils
-from cudf.testing._utils import assert_eq, assert_exceptions_equal
+from cudf.testing import _utils as utils, assert_eq
+from cudf.testing._utils import assert_exceptions_equal
 
 _TIMEDELTA_DATA = [
     [1000000, 200000, 3000000],
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 4843decedba..087d10b8295 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -17,9 +17,9 @@
 )
 from cudf.core.udf.api import Masked
 from cudf.core.udf.utils import precompiled
+from cudf.testing import assert_eq
 from cudf.testing._utils import (
     _decimal_series,
-    assert_eq,
     parametrize_numeric_dtypes_pairwise,
     sv_to_udf_str,
 )
diff --git a/python/cudf/cudf/tests/test_unaops.py b/python/cudf/cudf/tests/test_unaops.py
index 15d9d03d4a7..dbbf4fba3a6 100644
--- a/python/cudf/cudf/tests/test_unaops.py
+++ b/python/cudf/cudf/tests/test_unaops.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2023, NVIDIA CORPORATION.
+# Copyright (c) 2019-2024, NVIDIA CORPORATION.
 
 import itertools
 import operator
@@ -10,7 +10,7 @@
 
 import cudf
 from cudf import Series
-from cudf.testing import _utils as utils
+from cudf.testing import _utils as utils, assert_eq
 
 _unaops = [operator.abs, operator.invert, operator.neg, np.ceil, np.floor]
 
@@ -128,4 +128,4 @@ def test_scalar_no_negative_bools():
 def test_series_bool_neg():
     sr = Series([True, False, True, None, False, None, True, True])
     psr = sr.to_pandas(nullable=True)
-    utils.assert_eq((-sr).to_pandas(nullable=True), -psr, check_dtype=True)
+    assert_eq((-sr).to_pandas(nullable=True), -psr, check_dtype=True)
diff --git a/python/cudf/cudf/tests/text/test_subword_tokenizer.py b/python/cudf/cudf/tests/text/test_subword_tokenizer.py
index b21edc0477f..78b58344374 100644
--- a/python/cudf/cudf/tests/text/test_subword_tokenizer.py
+++ b/python/cudf/cudf/tests/text/test_subword_tokenizer.py
@@ -7,7 +7,7 @@
 
 import cudf
 from cudf.core.subword_tokenizer import SubwordTokenizer
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.fixture(scope="module")
diff --git a/python/cudf/cudf/tests/text/test_text_methods.py b/python/cudf/cudf/tests/text/test_text_methods.py
index 36f7f3de828..52179f55da3 100644
--- a/python/cudf/cudf/tests/text/test_text_methods.py
+++ b/python/cudf/cudf/tests/text/test_text_methods.py
@@ -9,7 +9,7 @@
 import cudf
 from cudf.core.byte_pair_encoding import BytePairEncoder
 from cudf.core.tokenize_vocabulary import TokenizeVocabulary
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 def test_tokenize():
diff --git a/python/cudf/cudf/utils/_ptxcompiler.py b/python/cudf/cudf/utils/_ptxcompiler.py
index 54f5ea08ee1..9d7071d55a5 100644
--- a/python/cudf/cudf/utils/_ptxcompiler.py
+++ b/python/cudf/cudf/utils/_ptxcompiler.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,11 +14,14 @@
 
 import math
 import os
+import re
 import subprocess
 import sys
 import warnings
 
 NO_DRIVER = (math.inf, math.inf)
+START_TAG = "_VER_START"
+END_TAG = "_VER_END"
 
 NUMBA_CHECK_VERSION_CMD = """\
 from ctypes import c_int, byref
@@ -28,7 +31,7 @@
 drv_major = dv.value // 1000
 drv_minor = (dv.value - (drv_major * 1000)) // 10
 run_major, run_minor = cuda.runtime.get_version()
-print(f'{drv_major} {drv_minor} {run_major} {run_minor}')
+print(f'_VER_START{drv_major} {drv_minor} {run_major} {run_minor}_VER_END')
 """
 
 
@@ -61,7 +64,11 @@ def get_versions():
         warnings.warn(msg, UserWarning)
         return NO_DRIVER
 
-    versions = [int(s) for s in cp.stdout.strip().split()]
+    pattern = r"_VER_START(.*?)_VER_END"
+
+    ver_str = re.search(pattern, cp.stdout.decode()).group(1)
+
+    versions = [int(s) for s in ver_str.strip().split()]
     driver_version = tuple(versions[:2])
     runtime_version = tuple(versions[2:])
 
diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
index 5be4d350c0b..eed5037cbea 100644
--- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
+++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
@@ -1175,7 +1175,7 @@ def test_intermediates_are_proxied():
 
 def test_from_dataframe():
     cudf = pytest.importorskip("cudf")
-    from cudf.testing._utils import assert_eq
+    from cudf.testing import assert_eq
 
     data = {"foo": [1, 2, 3], "bar": [4, 5, 6]}
 
diff --git a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd
index 84a3a32646d..2de0bf39785 100644
--- a/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd
+++ b/python/cudf_kafka/cudf_kafka/_lib/kafka.pxd
@@ -7,7 +7,7 @@ from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
 
-from cudf._lib.io.datasource cimport Datasource
+from cudf._lib.pylibcudf.io.datasource cimport Datasource
 from cudf._lib.pylibcudf.libcudf.io.datasource cimport datasource
 
 
diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index c92e0714d54..871134665af 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -898,6 +898,7 @@ def __init__(self, dtype: plc.DataType, options: Any, agg: Expr) -> None:
         super().__init__(dtype)
         self.options = options
         self.children = (agg,)
+        raise NotImplementedError("Rolling window not implemented")
 
 
 class GroupedRollingWindow(Expr):
@@ -909,6 +910,7 @@ def __init__(self, dtype: plc.DataType, options: Any, agg: Expr, *by: Expr) -> N
         super().__init__(dtype)
         self.options = options
         self.children = (agg, *by)
+        raise NotImplementedError("Grouped rolling window not implemented")
 
 
 class Cast(Expr):
@@ -952,7 +954,9 @@ def __init__(
         self.options = options
         self.children = (value,)
         if name not in Agg._SUPPORTED:
-            raise NotImplementedError(f"Unsupported aggregation {name=}")
+            raise NotImplementedError(
+                f"Unsupported aggregation {name=}"
+            )  # pragma: no cover; all valid aggs are supported
         # TODO: nan handling in groupby case
         if name == "min":
             req = plc.aggregation.min()
@@ -978,7 +982,9 @@ def __init__(
         elif name == "count":
             req = plc.aggregation.count(null_handling=plc.types.NullPolicy.EXCLUDE)
         else:
-            raise NotImplementedError
+            raise NotImplementedError(
+                f"Unreachable, {name=} is incorrectly listed in _SUPPORTED"
+            )  # pragma: no cover
         self.request = req
         op = getattr(self, f"_{name}", None)
         if op is None:
@@ -988,7 +994,9 @@ def __init__(
         elif name in {"count", "first", "last"}:
             pass
         else:
-            raise AssertionError
+            raise NotImplementedError(
+                f"Unreachable, supported agg {name=} has no implementation"
+            )  # pragma: no cover
         self.op = op
 
     _SUPPORTED: ClassVar[frozenset[str]] = frozenset(
@@ -1010,11 +1018,15 @@ def __init__(
     def collect_agg(self, *, depth: int) -> AggInfo:
         """Collect information about aggregations in groupbys."""
         if depth >= 1:
-            raise NotImplementedError("Nested aggregations in groupby")
+            raise NotImplementedError(
+                "Nested aggregations in groupby"
+            )  # pragma: no cover; check_agg trips first
         (child,) = self.children
         ((expr, _, _),) = child.collect_agg(depth=depth + 1).requests
         if self.request is None:
-            raise NotImplementedError(f"Aggregation {self.name} in groupby")
+            raise NotImplementedError(
+                f"Aggregation {self.name} in groupby"
+            )  # pragma: no cover; __init__ trips first
         return AggInfo([(expr, self.request, self)])
 
     def _reduce(
@@ -1024,10 +1036,7 @@ def _reduce(
             plc.Column.from_scalar(
                 plc.reduce.reduce(column.obj, request, self.dtype),
                 1,
-            ),
-            is_sorted=plc.types.Sorted.YES,
-            order=plc.types.Order.ASCENDING,
-            null_order=plc.types.NullOrder.BEFORE,
+            )
         )
 
     def _count(self, column: Column) -> Column:
@@ -1040,10 +1049,7 @@ def _count(self, column: Column) -> Column:
                     ),
                 ),
                 1,
-            ),
-            is_sorted=plc.types.Sorted.YES,
-            order=plc.types.Order.ASCENDING,
-            null_order=plc.types.NullOrder.BEFORE,
+            )
         )
 
     def _min(self, column: Column, *, propagate_nans: bool) -> Column:
@@ -1054,10 +1060,7 @@ def _min(self, column: Column, *, propagate_nans: bool) -> Column:
                         pa.scalar(float("nan"), type=plc.interop.to_arrow(self.dtype))
                     ),
                     1,
-                ),
-                is_sorted=plc.types.Sorted.YES,
-                order=plc.types.Order.ASCENDING,
-                null_order=plc.types.NullOrder.BEFORE,
+                )
             )
         if column.nan_count > 0:
             column = column.mask_nans()
@@ -1071,31 +1074,18 @@ def _max(self, column: Column, *, propagate_nans: bool) -> Column:
                         pa.scalar(float("nan"), type=plc.interop.to_arrow(self.dtype))
                     ),
                     1,
-                ),
-                is_sorted=plc.types.Sorted.YES,
-                order=plc.types.Order.ASCENDING,
-                null_order=plc.types.NullOrder.BEFORE,
+                )
             )
         if column.nan_count > 0:
             column = column.mask_nans()
         return self._reduce(column, request=plc.aggregation.max())
 
     def _first(self, column: Column) -> Column:
-        return Column(
-            plc.copying.slice(column.obj, [0, 1])[0],
-            is_sorted=plc.types.Sorted.YES,
-            order=plc.types.Order.ASCENDING,
-            null_order=plc.types.NullOrder.BEFORE,
-        )
+        return Column(plc.copying.slice(column.obj, [0, 1])[0])
 
     def _last(self, column: Column) -> Column:
         n = column.obj.size()
-        return Column(
-            plc.copying.slice(column.obj, [n - 1, n])[0],
-            is_sorted=plc.types.Sorted.YES,
-            order=plc.types.Order.ASCENDING,
-            null_order=plc.types.NullOrder.BEFORE,
-        )
+        return Column(plc.copying.slice(column.obj, [n - 1, n])[0])
 
     def do_evaluate(
         self,
@@ -1106,7 +1096,9 @@ def do_evaluate(
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
         if context is not ExecutionContext.FRAME:
-            raise NotImplementedError(f"Agg in context {context}")
+            raise NotImplementedError(
+                f"Agg in context {context}"
+            )  # pragma: no cover; unreachable
         (child,) = self.children
         return self.op(child.evaluate(df, context=context, mapping=mapping))
 
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 3ccefac6b0a..b3dd6ae7cc3 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -427,8 +427,6 @@ def check_agg(agg: expr.Expr) -> int:
         if isinstance(agg, (expr.BinOp, expr.Cast)):
             return max(GroupBy.check_agg(child) for child in agg.children)
         elif isinstance(agg, expr.Agg):
-            if agg.name == "implode":
-                raise NotImplementedError("implode in groupby")
             return 1 + max(GroupBy.check_agg(child) for child in agg.children)
         elif isinstance(agg, (expr.Len, expr.Col, expr.Literal)):
             return 0
@@ -440,7 +438,9 @@ def __post_init__(self) -> None:
         if self.options.rolling is None and self.maintain_order:
             raise NotImplementedError("Maintaining order in groupby")
         if self.options.rolling:
-            raise NotImplementedError("rolling window/groupby")
+            raise NotImplementedError(
+                "rolling window/groupby"
+            )  # pragma: no cover; rollingwindow constructor has already raised
         if any(GroupBy.check_agg(a.value) > 1 for a in self.agg_requests):
             raise NotImplementedError("Nested aggregations in groupby")
         self.agg_infos = [req.collect_agg(depth=0) for req in self.agg_requests]
diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index 41bc3032bc5..5d289885f47 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -10,6 +10,7 @@
 from typing import Any
 
 import pyarrow as pa
+from typing_extensions import assert_never
 
 from polars.polars import _expr_nodes as pl_expr, _ir_nodes as pl_ir
 
@@ -354,17 +355,20 @@ def _(node: pl_expr.Function, visitor: NodeTraverser, dtype: plc.DataType) -> ex
 @_translate_expr.register
 def _(node: pl_expr.Window, visitor: NodeTraverser, dtype: plc.DataType) -> expr.Expr:
     # TODO: raise in groupby?
-    if node.partition_by is None:
+    if isinstance(node.options, pl_expr.RollingGroupOptions):
+        # pl.col("a").rolling(...)
         return expr.RollingWindow(
             dtype, node.options, translate_expr(visitor, n=node.function)
         )
-    else:
+    elif isinstance(node.options, pl_expr.WindowMapping):
+        # pl.col("a").over(...)
         return expr.GroupedRollingWindow(
             dtype,
             node.options,
             translate_expr(visitor, n=node.function),
             *(translate_expr(visitor, n=n) for n in node.partition_by),
         )
+    assert_never(node.options)
 
 
 @_translate_expr.register
diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index 7b0049daf11..3d4a643e1fc 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -70,7 +70,7 @@ def from_polars(dtype: pl.DataType) -> plc.DataType:
             return plc.DataType(plc.TypeId.TIMESTAMP_MICROSECONDS)
         elif dtype.time_unit == "ns":
             return plc.DataType(plc.TypeId.TIMESTAMP_NANOSECONDS)
-        assert dtype.time_unit is not None
+        assert dtype.time_unit is not None  # pragma: no cover
         assert_never(dtype.time_unit)
     elif isinstance(dtype, pl.Duration):
         if dtype.time_unit == "ms":
@@ -79,7 +79,7 @@ def from_polars(dtype: pl.DataType) -> plc.DataType:
             return plc.DataType(plc.TypeId.DURATION_MICROSECONDS)
         elif dtype.time_unit == "ns":
             return plc.DataType(plc.TypeId.DURATION_NANOSECONDS)
-        assert dtype.time_unit is not None
+        assert dtype.time_unit is not None  # pragma: no cover
         assert_never(dtype.time_unit)
     elif isinstance(dtype, pl.String):
         return plc.DataType(plc.TypeId.STRING)
diff --git a/python/cudf_polars/cudf_polars/utils/sorting.py b/python/cudf_polars/cudf_polars/utils/sorting.py
index 24fd449dd88..57f94c4ec4c 100644
--- a/python/cudf_polars/cudf_polars/utils/sorting.py
+++ b/python/cudf_polars/cudf_polars/utils/sorting.py
@@ -43,8 +43,8 @@ def sort_order(
         for d in descending
     ]
     null_precedence = []
-    # TODO: use strict=True when we drop py39
-    assert len(descending) == len(nulls_last)
+    if len(descending) != len(nulls_last) or len(descending) != num_keys:
+        raise ValueError("Mismatching length of arguments in sort_order")
     for asc, null_last in zip(column_order, nulls_last):
         if (asc == plc.types.Order.ASCENDING) ^ (not null_last):
             null_precedence.append(plc.types.NullOrder.AFTER)
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index face04b9bd8..effa4861e0c 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -52,6 +52,13 @@ version = {file = "cudf_polars/VERSION"}
 [tool.pytest.ini_options]
 xfail_strict = true
 
+[tool.coverage.report]
+exclude_also = [
+  "if TYPE_CHECKING:",
+  "class .*\\bProtocol\\):",
+  "assert_never\\("
+]
+
 [tool.ruff]
 line-length = 88
 indent-width = 4
diff --git a/python/cudf_polars/tests/expressions/test_agg.py b/python/cudf_polars/tests/expressions/test_agg.py
index b044bbb2885..2ffa1c4af6d 100644
--- a/python/cudf_polars/tests/expressions/test_agg.py
+++ b/python/cudf_polars/tests/expressions/test_agg.py
@@ -56,3 +56,17 @@ def test_agg(df, agg):
         with pytest.raises(AssertionError):
             assert_gpu_result_equal(q)
     assert_gpu_result_equal(q, check_dtypes=check_dtypes, check_exact=False)
+
+
+@pytest.mark.parametrize(
+    "propagate_nans",
+    [pytest.param(False, marks=pytest.mark.xfail(reason="Need to mask nans")), True],
+    ids=["mask_nans", "propagate_nans"],
+)
+@pytest.mark.parametrize("op", ["min", "max"])
+def test_agg_float_with_nans(propagate_nans, op):
+    df = pl.LazyFrame({"a": [1, 2, float("nan")]})
+    op = getattr(pl.Expr, f"nan_{op}" if propagate_nans else op)
+    q = df.select(op(pl.col("a")))
+
+    assert_gpu_result_equal(q)
diff --git a/python/cudf_polars/tests/expressions/test_datetime_basic.py b/python/cudf_polars/tests/expressions/test_datetime_basic.py
new file mode 100644
index 00000000000..6ba2a1dce1e
--- /dev/null
+++ b/python/cudf_polars/tests/expressions/test_datetime_basic.py
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import pytest
+
+import polars as pl
+
+from cudf_polars.testing.asserts import assert_gpu_result_equal
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        pl.Date(),
+        pl.Datetime("ms"),
+        pl.Datetime("us"),
+        pl.Datetime("ns"),
+        pl.Duration("ms"),
+        pl.Duration("us"),
+        pl.Duration("ns"),
+    ],
+    ids=repr,
+)
+def test_datetime_dataframe_scan(dtype):
+    ldf = pl.DataFrame(
+        {
+            "a": pl.Series([1, 2, 3, 4, 5, 6, 7], dtype=dtype),
+            "b": pl.Series([3, 4, 5, 6, 7, 8, 9], dtype=pl.UInt16),
+        }
+    ).lazy()
+
+    query = ldf.select(pl.col("b"), pl.col("a"))
+    assert_gpu_result_equal(query)
diff --git a/python/cudf_polars/tests/expressions/test_filter.py b/python/cudf_polars/tests/expressions/test_filter.py
index 783403d764c..1a8e994e3aa 100644
--- a/python/cudf_polars/tests/expressions/test_filter.py
+++ b/python/cudf_polars/tests/expressions/test_filter.py
@@ -2,19 +2,35 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
+import pytest
+
 import polars as pl
 
 from cudf_polars.testing.asserts import assert_gpu_result_equal
 
 
-def test_filter():
-    ldf = pl.DataFrame(
+@pytest.mark.parametrize(
+    "expr",
+    [
+        pytest.param(
+            pl.lit(value=False),
+            marks=pytest.mark.xfail(reason="Expression filter does not handle scalars"),
+        ),
+        pl.col("c"),
+        pl.col("b") > 2,
+    ],
+)
+@pytest.mark.parametrize("predicate_pushdown", [False, True])
+def test_filter_expression(expr, predicate_pushdown):
+    ldf = pl.LazyFrame(
         {
             "a": [1, 2, 3, 4, 5, 6, 7],
-            "b": [1, 1, 1, 1, 1, 1, 1],
+            "b": [0, 3, 1, 5, 6, 1, 0],
+            "c": [None, True, False, False, True, True, False],
         }
-    ).lazy()
+    )
 
-    # group-by is just to avoid the filter being pushed into the scan.
-    query = ldf.group_by(pl.col("a")).agg(pl.col("b").sum()).filter(pl.col("b") < 1)
-    assert_gpu_result_equal(query)
+    query = ldf.select(pl.col("a").filter(expr))
+    assert_gpu_result_equal(
+        query, collect_kwargs={"predicate_pushdown": predicate_pushdown}
+    )
diff --git a/python/cudf_polars/tests/expressions/test_rolling.py b/python/cudf_polars/tests/expressions/test_rolling.py
new file mode 100644
index 00000000000..d4920d35f14
--- /dev/null
+++ b/python/cudf_polars/tests/expressions/test_rolling.py
@@ -0,0 +1,41 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import pytest
+
+import polars as pl
+
+from cudf_polars import translate_ir
+
+
+def test_rolling():
+    dates = [
+        "2020-01-01 13:45:48",
+        "2020-01-01 16:42:13",
+        "2020-01-01 16:45:09",
+        "2020-01-02 18:12:48",
+        "2020-01-03 19:45:32",
+        "2020-01-08 23:16:43",
+    ]
+    df = (
+        pl.DataFrame({"dt": dates, "a": [3, 7, 5, 9, 2, 1]})
+        .with_columns(pl.col("dt").str.strptime(pl.Datetime))
+        .lazy()
+    )
+    q = df.with_columns(
+        sum_a=pl.sum("a").rolling(index_column="dt", period="2d"),
+        min_a=pl.min("a").rolling(index_column="dt", period="2d"),
+        max_a=pl.max("a").rolling(index_column="dt", period="2d"),
+    )
+    with pytest.raises(NotImplementedError):
+        _ = translate_ir(q._ldf.visit())
+
+
+def test_grouped_rolling():
+    df = pl.LazyFrame({"a": [1, 2, 3, 4, 5, 6], "b": [1, 2, 1, 3, 1, 2]})
+
+    q = df.select(pl.col("a").min().over("b"))
+    with pytest.raises(NotImplementedError):
+        _ = translate_ir(q._ldf.visit())
diff --git a/python/cudf_polars/tests/expressions/test_sort.py b/python/cudf_polars/tests/expressions/test_sort.py
new file mode 100644
index 00000000000..0195266f5c6
--- /dev/null
+++ b/python/cudf_polars/tests/expressions/test_sort.py
@@ -0,0 +1,53 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import itertools
+
+import pytest
+
+import polars as pl
+
+from cudf_polars.testing.asserts import assert_gpu_result_equal
+
+
+@pytest.mark.parametrize("descending", [False, True])
+@pytest.mark.parametrize("nulls_last", [False, True])
+def test_sort_expression(descending, nulls_last):
+    ldf = pl.LazyFrame(
+        {
+            "a": [5, -1, 3, 4, None, 8, 6, 7, None],
+        }
+    )
+
+    query = ldf.select(pl.col("a").sort(descending=descending, nulls_last=nulls_last))
+    assert_gpu_result_equal(query)
+
+
+@pytest.mark.parametrize(
+    "descending", itertools.combinations_with_replacement([False, True], 3)
+)
+@pytest.mark.parametrize(
+    "nulls_last", itertools.combinations_with_replacement([False, True], 3)
+)
+@pytest.mark.parametrize("maintain_order", [False, True], ids=["unstable", "stable"])
+def test_sort_by_expression(descending, nulls_last, maintain_order):
+    ldf = pl.LazyFrame(
+        {
+            "a": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+            "b": [1, 2, 2, 3, 9, 5, -1, 2, -2, 16],
+            "c": ["a", "A", "b", "b", "c", "d", "A", "Z", "ä", "̈Ä"],
+        }
+    )
+
+    query = ldf.select(
+        pl.col("a").sort_by(
+            pl.col("b"),
+            pl.col("c"),
+            pl.col("b") + pl.col("a"),
+            descending=descending,
+            nulls_last=nulls_last,
+            maintain_order=maintain_order,
+        )
+    )
+    assert_gpu_result_equal(query, check_row_order=maintain_order)
diff --git a/python/cudf_polars/tests/test_filter.py b/python/cudf_polars/tests/test_filter.py
new file mode 100644
index 00000000000..f39b348144b
--- /dev/null
+++ b/python/cudf_polars/tests/test_filter.py
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import pytest
+
+import polars as pl
+
+from cudf_polars.testing.asserts import assert_gpu_result_equal
+
+
+@pytest.mark.parametrize("expr", [pl.col("c"), pl.col("b") < 1, pl.lit(value=True)])
+@pytest.mark.parametrize("predicate_pushdown", [False, True])
+def test_filter(expr, predicate_pushdown):
+    ldf = pl.DataFrame(
+        {
+            "a": [1, 2, 3, 4, 5, 6, 7],
+            "b": [1, 1, 1, 1, 1, 1, 1],
+            "c": [True, False, False, True, True, True, None],
+        }
+    ).lazy()
+
+    query = ldf.filter(expr)
+    assert_gpu_result_equal(
+        query, collect_kwargs={"predicate_pushdown": predicate_pushdown}
+    )
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index d06a7ecf105..e70f923b097 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -6,6 +6,7 @@
 
 import polars as pl
 
+from cudf_polars import translate_ir
 from cudf_polars.testing.asserts import assert_gpu_result_equal
 
 
@@ -43,6 +44,7 @@ def keys(request):
         [pl.col("float") + pl.col("int")],
         [pl.col("float").max() - pl.col("int").min()],
         [pl.col("float").mean(), pl.col("int").std()],
+        [(pl.col("float") - pl.lit(2)).max()],
     ],
     ids=lambda aggs: "-".join(map(str, aggs)),
 )
@@ -72,7 +74,28 @@ def test_groupby(df: pl.LazyFrame, maintain_order, keys, exprs):
     if not maintain_order:
         sort_keys = list(q.schema.keys())[: len(keys)]
         q = q.sort(*sort_keys)
-    # from cudf_polars.dsl.translate import translate_ir
-    # ir = translate_ir(q._ldf.visit())
-    # from IPython import embed; embed()
+
     assert_gpu_result_equal(q, check_exact=False)
+
+
+def test_groupby_len(df, keys):
+    q = df.group_by(*keys).agg(pl.len())
+
+    # TODO: polars returns UInt32, libcudf returns Int32
+    with pytest.raises(AssertionError):
+        assert_gpu_result_equal(q, check_row_order=False)
+    assert_gpu_result_equal(q, check_dtypes=False, check_row_order=False)
+
+
+@pytest.mark.parametrize(
+    "expr",
+    [
+        pl.col("float").is_not_null(),
+        (pl.col("int").max() + pl.col("float").min()).max(),
+    ],
+)
+def test_groupby_unsupported(df, expr):
+    q = df.group_by("key1").agg(expr)
+
+    with pytest.raises(NotImplementedError):
+        _ = translate_ir(q._ldf.visit())
diff --git a/python/cudf_polars/tests/utils/test_dtypes.py b/python/cudf_polars/tests/utils/test_dtypes.py
new file mode 100644
index 00000000000..535fdd846a0
--- /dev/null
+++ b/python/cudf_polars/tests/utils/test_dtypes.py
@@ -0,0 +1,31 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import pytest
+
+import polars as pl
+
+from cudf_polars.utils.dtypes import from_polars
+
+
+@pytest.mark.parametrize(
+    "pltype",
+    [
+        pl.Time(),
+        pl.Struct({"a": pl.Int8, "b": pl.Float32}),
+        pl.Datetime("ms", time_zone="US/Pacific"),
+        pl.Array(pl.Int8, 2),
+        pl.Binary(),
+        pl.Categorical(),
+        pl.Enum(["a", "b"]),
+        pl.Field("a", pl.Int8),
+        pl.Object(),
+        pl.Unknown(),
+    ],
+    ids=repr,
+)
+def test_unhandled_dtype_conversion_raises(pltype):
+    with pytest.raises(NotImplementedError):
+        _ = from_polars(pltype)
diff --git a/python/cudf_polars/tests/utils/test_sorting.py b/python/cudf_polars/tests/utils/test_sorting.py
new file mode 100644
index 00000000000..4e98a3a7ce7
--- /dev/null
+++ b/python/cudf_polars/tests/utils/test_sorting.py
@@ -0,0 +1,21 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import annotations
+
+import pytest
+
+from cudf_polars.utils.sorting import sort_order
+
+
+@pytest.mark.parametrize(
+    "descending,nulls_last,num_keys",
+    [
+        ([True], [False, True], 3),
+        ([True, True], [False, True, False], 3),
+        ([False, True], [True], 3),
+    ],
+)
+def test_sort_order_raises_mismatch(descending, nulls_last, num_keys):
+    with pytest.raises(ValueError):
+        _ = sort_order(descending, nulls_last=nulls_last, num_keys=num_keys)
diff --git a/python/custreamz/custreamz/tests/test_kafka.py b/python/custreamz/custreamz/tests/test_kafka.py
index ad3b829544b..3a3c4e994d0 100644
--- a/python/custreamz/custreamz/tests/test_kafka.py
+++ b/python/custreamz/custreamz/tests/test_kafka.py
@@ -1,8 +1,8 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 import confluent_kafka as ck
 import pytest
 
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 
 @pytest.mark.parametrize("commit_offset", [1, 45, 100, 22, 1000, 10])
diff --git a/python/dask_cudf/dask_cudf/tests/test_accessor.py b/python/dask_cudf/dask_cudf/tests/test_accessor.py
index 58d28f0597e..6f04b5737da 100644
--- a/python/dask_cudf/dask_cudf/tests/test_accessor.py
+++ b/python/dask_cudf/dask_cudf/tests/test_accessor.py
@@ -9,7 +9,8 @@
 from dask import dataframe as dd
 
 from cudf import DataFrame, Series, date_range
-from cudf.testing._utils import assert_eq, does_not_raise
+from cudf.testing import assert_eq
+from cudf.testing._utils import does_not_raise
 
 import dask_cudf
 from dask_cudf.tests.utils import xfail_dask_expr
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index 7f8a619ae22..174923c2c7e 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -795,7 +795,7 @@ def test_dataframe_set_index():
         pddf = dd.from_pandas(pdf, npartitions=4)
         pddf = pddf.set_index("str")
 
-        from cudf.testing._utils import assert_eq
+        from cudf.testing import assert_eq
 
         assert_eq(ddf.compute(), pddf.compute())
 
diff --git a/python/dask_cudf/dask_cudf/tests/test_distributed.py b/python/dask_cudf/dask_cudf/tests/test_distributed.py
index 07fdb25dff9..be10b0d4843 100644
--- a/python/dask_cudf/dask_cudf/tests/test_distributed.py
+++ b/python/dask_cudf/dask_cudf/tests/test_distributed.py
@@ -9,7 +9,7 @@
 from distributed.utils_test import cleanup, loop, loop_in_thread  # noqa: F401
 
 import cudf
-from cudf.testing._utils import assert_eq
+from cudf.testing import assert_eq
 
 import dask_cudf