From bcf5e2e263f2749fa134714ef9fa2ba41f916fbb Mon Sep 17 00:00:00 2001 From: eitsupi Date: Sun, 14 Apr 2024 11:44:54 +0000 Subject: [PATCH] fix: more fixes for R tests --- DESCRIPTION | 2 +- R/expr__array.R | 11 ++--- R/expr__expr.R | 28 +++++++----- R/expr__list.R | 16 ++++--- R/extendr-wrappers.R | 2 +- R/lazyframe__lazy.R | 11 ++--- R/series__series.R | 35 ++++++++------- man/DataFrame_sort.Rd | 4 +- man/ExprArr_get.Rd | 8 ++-- man/ExprArr_sort.Rd | 5 +-- man/ExprList_get.Rd | 16 ++++--- man/Expr_arg_sort.Rd | 5 +-- man/Expr_set_sorted.Rd | 4 +- man/Expr_sort.Rd | 9 ++-- man/Expr_sort_by.Rd | 21 +++++++-- man/LazyFrame_sort.Rd | 11 +++-- man/Series_set_sorted.Rd | 6 ++- man/Series_sort.Rd | 18 ++++++-- man/pl_arg_sort_by.Rd | 3 +- src/rust/src/lazy/dsl.rs | 4 +- tests/testthat/_snaps/after-wrappers.md | 6 +-- tests/testthat/test-as_polars.R | 4 +- tests/testthat/test-concat.R | 2 +- tests/testthat/test-dataframe.R | 24 +++++----- tests/testthat/test-expr_array.R | 59 +++++++++++++------------ tests/testthat/test-expr_expr.R | 14 +++--- 26 files changed, 189 insertions(+), 139 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b86f2f53e..178bda6cf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -118,5 +118,5 @@ Collate: 'zzz.R' Config/rextendr/version: 0.3.1 VignetteBuilder: knitr -Config/polars/LibVersion: 0.38.2 +Config/polars/LibVersion: 0.39.0 Config/polars/RustToolchainVersion: nightly-2024-03-28 diff --git a/R/expr__array.R b/R/expr__array.R index 92f11cb0e..6cc7f486a 100644 --- a/R/expr__array.R +++ b/R/expr__array.R @@ -136,14 +136,11 @@ ExprArr_unique = function(maintain_order = FALSE) .pr$Expr$arr_unique(self, main #' #' This allows to extract one value per array only. #' +#' @inherit ExprList_get return #' @param index An Expr or something coercible to an Expr, that must return a #' single index. Values are 0-indexed (so index 0 would return the first item #' of every sub-array) and negative values start from the end (index `-1` -#' returns the last item). If the index is out of bounds, it will return a -#' `null`. Strings are parsed as column names. -#' -#' @return Expr -#' @aliases arr_get +#' returns the last item). #' @examples #' df = pl$DataFrame( #' values = list(c(1, 2), c(3, 4), c(NA_real_, 6)), @@ -156,8 +153,8 @@ ExprArr_unique = function(maintain_order = FALSE) .pr$Expr$arr_unique(self, main #' val_minus_1 = pl$col("values")$arr$get(-1), #' val_oob = pl$col("values")$arr$get(10) #' ) -ExprArr_get = function(index) { - .pr$Expr$arr_get(self, index) |> +ExprArr_get = function(index, ..., null_on_oob = TRUE) { + .pr$Expr$arr_get(self, index, null_on_oob) |> unwrap("in $arr$get():") } diff --git a/R/expr__expr.R b/R/expr__expr.R index 6a8c11874..865aa78c0 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -1377,16 +1377,13 @@ Expr_mode = use_extendr_wrapper #' #' Sort this column. If used in a groupby context, the groups are sorted. #' -#' @param ... Ignored -#' @param descending Sort in descending order. When sorting by multiple columns, -#' can be specified per column by passing a vector of booleans. -#' @param nulls_last If `TRUE`, place nulls values last. +#' @inheritParams Series_sort #' @return Expr #' @examples #' pl$DataFrame(a = c(6, 1, 0, NA, Inf, NaN))$ #' with_columns(sorted = pl$col("a")$sort()) Expr_sort = function(..., descending = FALSE, nulls_last = FALSE) { - .pr$Expr$sort(self, descending, nulls_last) + .pr$Expr$sort_with(self, descending, nulls_last) } #' Top k values @@ -1478,6 +1475,7 @@ Expr_search_sorted = function(element) { .pr$Expr$search_sorted(self, wrap_e(element)) } +# TODO: rewrite `by` to `...` #' Sort Expr by order of others #' #' Sort this column by the ordering of another column, or multiple other columns. @@ -1485,7 +1483,9 @@ Expr_search_sorted = function(element) { #' #' @param by One expression or a list of expressions and/or strings (interpreted #' as column names). -#' @inheritParams Expr_sort +#' @param maintain_order A logical to indicate whether the order should be maintained +#' if elements are equal. +#' @inheritParams Series_sort #' @return Expr #' @examples #' df = pl$DataFrame( @@ -1511,12 +1511,19 @@ Expr_search_sorted = function(element) { #' df$with_columns( #' sorted = pl$col("group")$sort_by(pl$col("value1")$sort(descending = TRUE)) #' ) -Expr_sort_by = function(by, descending = FALSE) { +Expr_sort_by = function( + by, ..., descending = FALSE, + nulls_last = FALSE, + multithreaded = TRUE, + maintain_order = FALSE) { .pr$Expr$sort_by( self, wrap_elist_result(by, str_to_lit = FALSE), - result(descending) - ) |> unwrap("in $sort_by:") + descending, + nulls_last, + maintain_order, + multithreaded + ) |> unwrap("in $sort_by():") } #' Gather values by index @@ -3143,6 +3150,7 @@ Expr_cumulative_eval = function(expr, min_periods = 1L, parallel = FALSE) { #' This enables downstream code to use fast paths for sorted arrays. WARNING: #' this doesn't check whether the data is actually sorted, you have to ensure of #' that yourself. +#' @param ... Ignored. #' @param descending Sort the columns in descending order. #' @return Expr #' @examples @@ -3154,7 +3162,7 @@ Expr_cumulative_eval = function(expr, min_periods = 1L, parallel = FALSE) { #' s2 = pl$select(pl$lit(c(1, 3, 2, 4))$set_sorted()$alias("a"))$get_column("a") #' s2$sort() #' s2$flags # returns TRUE while it's not actually sorted -Expr_set_sorted = function(descending = FALSE) { +Expr_set_sorted = function(..., descending = FALSE) { self$map_batches(\(s) { .pr$Series$set_sorted_mut(s, descending) # use private to bypass mut protection s diff --git a/R/expr__list.R b/R/expr__list.R index f89345440..d6e2adb8b 100644 --- a/R/expr__list.R +++ b/R/expr__list.R @@ -112,11 +112,12 @@ ExprList_concat = function(other) { #' @param index An Expr or something coercible to an Expr, that must return a #' single index. Values are 0-indexed (so index 0 would return the first item #' of every sublist) and negative values start from the end (index `-1` -#' returns the last item). If the index is out of bounds, it will return a -#' `null`. Strings are parsed as column names. -#' -#' @return Expr -#' @aliases list_get +#' returns the last item). +#' @param ... Ignored. +#' @param null_on_oob A logical to determine the behavior if an index is out of bounds: +#' - `TRUE` (default): set as `null` +#' - `FALSE`: raise an error +#' @return [Expr][Expr_class] #' @examples #' df = pl$DataFrame( #' values = list(c(2, 2, NA), c(1, 2, 3), NA_real_, NULL), @@ -128,7 +129,10 @@ ExprList_concat = function(other) { #' val_minus_1 = pl$col("values")$list$get(-1), #' val_oob = pl$col("values")$list$get(10) #' ) -ExprList_get = function(index) .pr$Expr$list_get(self, wrap_e(index, str_to_lit = FALSE)) +ExprList_get = function(index, ..., null_on_oob = TRUE) { + .pr$Expr$list_get(self, index, null_on_oob) |> + unwrap("in $list$get():") +} #' Get several values by index in a list #' diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 06bf14d2b..80d99652a 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -484,7 +484,7 @@ RPolarsExpr$to_physical <- function() .Call(wrap__RPolarsExpr__to_physical, self RPolarsExpr$cast <- function(data_type, strict) .Call(wrap__RPolarsExpr__cast, self, data_type, strict) -RPolarsExpr$sort <- function(descending, nulls_last) .Call(wrap__RPolarsExpr__sort, self, descending, nulls_last) +RPolarsExpr$sort_with <- function(descending, nulls_last) .Call(wrap__RPolarsExpr__sort_with, self, descending, nulls_last) RPolarsExpr$arg_sort <- function(descending, nulls_last) .Call(wrap__RPolarsExpr__arg_sort, self, descending, nulls_last) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 91e47e8a3..3f16adacf 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -1296,15 +1296,15 @@ LazyFrame_join = function( } -#' Sort a LazyFrame -#' @description Sort by one or more Expressions. +#' Sort the LazyFrame by the given columns +#' +#' @inheritParams Series_sort #' @param by Column(s) to sort by. Can be character vector of column names, #' a list of Expr(s) or a list with a mix of Expr(s) and column names. #' @param ... More columns to sort by as above but provided one Expr per argument. #' @param descending Logical. Sort in descending order (default is `FALSE`). This must be #' either of length 1 or a logical vector of the same length as the number of #' Expr(s) specified in `by` and `...`. -#' @param nulls_last Logical. Place `NULL`s at the end? Default is `FALSE`. #' @param maintain_order Whether the order should be maintained if elements are #' equal. If `TRUE`, streaming is not possible and performance might be worse #' since this requires a stable search. @@ -1326,10 +1326,11 @@ LazyFrame_sort = function( ..., descending = FALSE, nulls_last = FALSE, - maintain_order = FALSE) { + maintain_order = FALSE, + multithreaded = TRUE) { .pr$LazyFrame$sort_by_exprs( self, unpack_list(by, .context = "in $sort():"), err_on_named_args(...), - descending, nulls_last, maintain_order + descending, nulls_last, maintain_order, multithreaded ) |> unwrap("in $sort():") } diff --git a/R/series__series.R b/R/series__series.R index af2faa6d2..ebb496884 100644 --- a/R/series__series.R +++ b/R/series__series.R @@ -874,7 +874,7 @@ Series_is_sorted = function(descending = FALSE) { #' Set a sorted flag on a Series #' #' @inheritParams Expr_set_sorted -#' @param in_place If `TRUE`, this will set the flag mutably and return NULL. +#' @param in_place If `TRUE`, this will set the flag mutably and return `NULL`. #' Remember to use `options(polars.strictly_immutable = FALSE)` before using #' this parameter, otherwise an error will occur. If `FALSE` (default), it will #' return a cloned Series with the flag. @@ -886,46 +886,51 @@ Series_is_sorted = function(descending = FALSE) { #' @examples #' s = as_polars_series(1:4)$set_sorted() #' s$flags -Series_set_sorted = function(descending = FALSE, in_place = FALSE) { - if (in_place && polars_options()$strictly_immutable) { - stop(paste( +Series_set_sorted = function(..., descending = FALSE, in_place = FALSE) { + if (isTRUE(in_place) && polars_options()$strictly_immutable) { + Err_plain( "Using `in_place = TRUE` in `set_sorted()` breaks immutability. To enable mutable features run:\n", "`options(polars.strictly_immutable = FALSE)`" - )) + ) |> + unwrap("in $set_sorted():") } - if (!in_place) { + if (!isTRUE(in_place)) { self = self$clone() } .pr$Series$set_sorted_mut(self, descending) - if (in_place) invisible(NULL) else invisible(self) + if (isTRUE(in_place)) invisible(NULL) else invisible(self) } #' Sort a Series #' -#' @param descending Sort in descending order. -#' @inheritParams Expr_sort #' @inheritParams Series_set_sorted -#' +#' @param descending A logical. If `TRUE`, sort in descending order. +#' @param nulls_last A logical. If `TRUE`, place `null` values last insead of first. +#' @param multithreaded A logical. If `TRUE`, sort using multiple threads. #' @return [Series][Series_class] -#' #' @examples #' as_polars_series(c(1.5, NA, 1, NaN, Inf, -Inf))$sort() #' as_polars_series(c(1.5, NA, 1, NaN, Inf, -Inf))$sort(nulls_last = TRUE) -Series_sort = function(..., descending = FALSE, nulls_last = FALSE, in_place = FALSE) { +Series_sort = function( + ..., descending = FALSE, nulls_last = FALSE, multithreaded = TRUE, + in_place = FALSE) { + uw = \(res) unwrap(res, "in $sort():") if (isTRUE(in_place) && polars_options()$strictly_immutable) { - stop(paste( + Err_plain( "in place sort breaks immutability, to enable mutable features run:\n", "`options(polars.strictly_immutable = FALSE)`" - )) + ) |> + uw() } if (!isTRUE(in_place)) { self = self$clone() } - .pr$Series$sort(self, descending, nulls_last) + .pr$Series$sort(self, descending, nulls_last, multithreaded) |> + uw() } #' Convert Series to DataFrame diff --git a/man/DataFrame_sort.Rd b/man/DataFrame_sort.Rd index a352a9640..7cf630a77 100644 --- a/man/DataFrame_sort.Rd +++ b/man/DataFrame_sort.Rd @@ -22,7 +22,7 @@ a list of Expr(s) or a list with a mix of Expr(s) and column names.} either of length 1 or a logical vector of the same length as the number of Expr(s) specified in \code{by} and \code{...}.} -\item{nulls_last}{Logical. Place \code{NULL}s at the end? Default is \code{FALSE}.} +\item{nulls_last}{A logical. If \code{TRUE}, place \code{null} values last insead of first.} \item{maintain_order}{Whether the order should be maintained if elements are equal. If \code{TRUE}, streaming is not possible and performance might be worse @@ -32,7 +32,7 @@ since this requires a stable search.} DataFrame } \description{ -Sort by one or more Expressions. +Sort a DataFrame } \examples{ df = mtcars diff --git a/man/ExprArr_get.Rd b/man/ExprArr_get.Rd index 91c8d787c..bc199b2d2 100644 --- a/man/ExprArr_get.Rd +++ b/man/ExprArr_get.Rd @@ -2,20 +2,18 @@ % Please edit documentation in R/expr__array.R \name{ExprArr_get} \alias{ExprArr_get} -\alias{arr_get} \title{Get the value by index in an array} \usage{ -ExprArr_get(index) +ExprArr_get(index, ..., null_on_oob = TRUE) } \arguments{ \item{index}{An Expr or something coercible to an Expr, that must return a single index. Values are 0-indexed (so index 0 would return the first item of every sub-array) and negative values start from the end (index \code{-1} -returns the last item). If the index is out of bounds, it will return a -\code{null}. Strings are parsed as column names.} +returns the last item).} } \value{ -Expr +\link[=Expr_class]{Expr} } \description{ This allows to extract one value per array only. diff --git a/man/ExprArr_sort.Rd b/man/ExprArr_sort.Rd index 2acaabfa2..ffdd3404e 100644 --- a/man/ExprArr_sort.Rd +++ b/man/ExprArr_sort.Rd @@ -8,10 +8,9 @@ ExprArr_sort(descending = FALSE, nulls_last = FALSE) } \arguments{ -\item{descending}{Sort in descending order. When sorting by multiple columns, -can be specified per column by passing a vector of booleans.} +\item{descending}{A logical. If \code{TRUE}, sort in descending order.} -\item{nulls_last}{If \code{TRUE}, place nulls values last.} +\item{nulls_last}{A logical. If \code{TRUE}, place \code{null} values last insead of first.} } \description{ Sort values in an array diff --git a/man/ExprList_get.Rd b/man/ExprList_get.Rd index 806ba00d5..1fcbcb831 100644 --- a/man/ExprList_get.Rd +++ b/man/ExprList_get.Rd @@ -2,20 +2,26 @@ % Please edit documentation in R/expr__list.R \name{ExprList_get} \alias{ExprList_get} -\alias{list_get} \title{Get the value by index in a list} \usage{ -ExprList_get(index) +ExprList_get(index, ..., null_on_oob = TRUE) } \arguments{ \item{index}{An Expr or something coercible to an Expr, that must return a single index. Values are 0-indexed (so index 0 would return the first item of every sublist) and negative values start from the end (index \code{-1} -returns the last item). If the index is out of bounds, it will return a -\code{null}. Strings are parsed as column names.} +returns the last item).} + +\item{...}{Ignored.} + +\item{null_on_oob}{A logical to determine the behavior if an index is out of bounds: +\itemize{ +\item \code{TRUE} (default): set as \code{null} +\item \code{FALSE}: raise an error +}} } \value{ -Expr +\link[=Expr_class]{Expr} } \description{ This allows to extract one value per list only. To extract several values by diff --git a/man/Expr_arg_sort.Rd b/man/Expr_arg_sort.Rd index f045d3dad..a4fec9d9c 100644 --- a/man/Expr_arg_sort.Rd +++ b/man/Expr_arg_sort.Rd @@ -7,10 +7,9 @@ Expr_arg_sort(descending = FALSE, nulls_last = FALSE) } \arguments{ -\item{descending}{Sort in descending order. When sorting by multiple columns, -can be specified per column by passing a vector of booleans.} +\item{descending}{A logical. If \code{TRUE}, sort in descending order.} -\item{nulls_last}{If \code{TRUE}, place nulls values last.} +\item{nulls_last}{A logical. If \code{TRUE}, place \code{null} values last insead of first.} } \value{ Expr diff --git a/man/Expr_set_sorted.Rd b/man/Expr_set_sorted.Rd index f89d42bc3..38e784f80 100644 --- a/man/Expr_set_sorted.Rd +++ b/man/Expr_set_sorted.Rd @@ -4,9 +4,11 @@ \alias{Expr_set_sorted} \title{Flag an Expr as "sorted"} \usage{ -Expr_set_sorted(descending = FALSE) +Expr_set_sorted(..., descending = FALSE) } \arguments{ +\item{...}{Ignored.} + \item{descending}{Sort the columns in descending order.} } \value{ diff --git a/man/Expr_sort.Rd b/man/Expr_sort.Rd index d9db04227..214501ec3 100644 --- a/man/Expr_sort.Rd +++ b/man/Expr_sort.Rd @@ -4,13 +4,14 @@ \alias{Expr_sort} \title{Sort an Expr} \usage{ -Expr_sort(descending = FALSE, nulls_last = FALSE) +Expr_sort(..., descending = FALSE, nulls_last = FALSE) } \arguments{ -\item{descending}{Sort in descending order. When sorting by multiple columns, -can be specified per column by passing a vector of booleans.} +\item{...}{Ignored.} -\item{nulls_last}{If \code{TRUE}, place nulls values last.} +\item{descending}{A logical. If \code{TRUE}, sort in descending order.} + +\item{nulls_last}{A logical. If \code{TRUE}, place \code{null} values last insead of first.} } \value{ Expr diff --git a/man/Expr_sort_by.Rd b/man/Expr_sort_by.Rd index 3cbf86644..945d5542c 100644 --- a/man/Expr_sort_by.Rd +++ b/man/Expr_sort_by.Rd @@ -4,14 +4,29 @@ \alias{Expr_sort_by} \title{Sort Expr by order of others} \usage{ -Expr_sort_by(by, descending = FALSE) +Expr_sort_by( + by, + ..., + descending = FALSE, + nulls_last = FALSE, + multithreaded = TRUE, + maintain_order = FALSE +) } \arguments{ \item{by}{One expression or a list of expressions and/or strings (interpreted as column names).} -\item{descending}{Sort in descending order. When sorting by multiple columns, -can be specified per column by passing a vector of booleans.} +\item{...}{Ignored.} + +\item{descending}{A logical. If \code{TRUE}, sort in descending order.} + +\item{nulls_last}{A logical. If \code{TRUE}, place \code{null} values last insead of first.} + +\item{multithreaded}{A logical. If \code{TRUE}, sort using multiple threads.} + +\item{maintain_order}{A logical to indicate whether the order should be maintained +if elements are equal.} } \value{ Expr diff --git a/man/LazyFrame_sort.Rd b/man/LazyFrame_sort.Rd index d20812782..29c4d96fc 100644 --- a/man/LazyFrame_sort.Rd +++ b/man/LazyFrame_sort.Rd @@ -2,14 +2,15 @@ % Please edit documentation in R/lazyframe__lazy.R \name{LazyFrame_sort} \alias{LazyFrame_sort} -\title{Sort a LazyFrame} +\title{Sort the LazyFrame by the given columns} \usage{ LazyFrame_sort( by, ..., descending = FALSE, nulls_last = FALSE, - maintain_order = FALSE + maintain_order = FALSE, + multithreaded = TRUE ) } \arguments{ @@ -22,17 +23,19 @@ a list of Expr(s) or a list with a mix of Expr(s) and column names.} either of length 1 or a logical vector of the same length as the number of Expr(s) specified in \code{by} and \code{...}.} -\item{nulls_last}{Logical. Place \code{NULL}s at the end? Default is \code{FALSE}.} +\item{nulls_last}{A logical. If \code{TRUE}, place \code{null} values last insead of first.} \item{maintain_order}{Whether the order should be maintained if elements are equal. If \code{TRUE}, streaming is not possible and performance might be worse since this requires a stable search.} + +\item{multithreaded}{A logical. If \code{TRUE}, sort using multiple threads.} } \value{ LazyFrame } \description{ -Sort by one or more Expressions. +Sort the LazyFrame by the given columns } \examples{ df = mtcars diff --git a/man/Series_set_sorted.Rd b/man/Series_set_sorted.Rd index cd8f8b86d..a88e82cfb 100644 --- a/man/Series_set_sorted.Rd +++ b/man/Series_set_sorted.Rd @@ -4,12 +4,14 @@ \alias{Series_set_sorted} \title{Set a sorted flag on a Series} \usage{ -Series_set_sorted(descending = FALSE, in_place = FALSE) +Series_set_sorted(..., descending = FALSE, in_place = FALSE) } \arguments{ +\item{...}{Ignored.} + \item{descending}{Sort the columns in descending order.} -\item{in_place}{If \code{TRUE}, this will set the flag mutably and return NULL. +\item{in_place}{If \code{TRUE}, this will set the flag mutably and return \code{NULL}. Remember to use \code{options(polars.strictly_immutable = FALSE)} before using this parameter, otherwise an error will occur. If \code{FALSE} (default), it will return a cloned Series with the flag.} diff --git a/man/Series_sort.Rd b/man/Series_sort.Rd index 829c541ee..6d531111b 100644 --- a/man/Series_sort.Rd +++ b/man/Series_sort.Rd @@ -4,14 +4,24 @@ \alias{Series_sort} \title{Sort a Series} \usage{ -Series_sort(descending = FALSE, nulls_last = FALSE, in_place = FALSE) +Series_sort( + ..., + descending = FALSE, + nulls_last = FALSE, + multithreaded = TRUE, + in_place = FALSE +) } \arguments{ -\item{descending}{Sort in descending order.} +\item{...}{Ignored.} -\item{nulls_last}{If \code{TRUE}, place nulls values last.} +\item{descending}{A logical. If \code{TRUE}, sort in descending order.} -\item{in_place}{If \code{TRUE}, this will set the flag mutably and return NULL. +\item{nulls_last}{A logical. If \code{TRUE}, place \code{null} values last insead of first.} + +\item{multithreaded}{A logical. If \code{TRUE}, sort using multiple threads.} + +\item{in_place}{If \code{TRUE}, this will set the flag mutably and return \code{NULL}. Remember to use \code{options(polars.strictly_immutable = FALSE)} before using this parameter, otherwise an error will occur. If \code{FALSE} (default), it will return a cloned Series with the flag.} diff --git a/man/pl_arg_sort_by.Rd b/man/pl_arg_sort_by.Rd index 46ad29459..86cd764c0 100644 --- a/man/pl_arg_sort_by.Rd +++ b/man/pl_arg_sort_by.Rd @@ -10,8 +10,7 @@ pl_arg_sort_by(..., descending = FALSE) \item{...}{Column(s) to arg sort by. Can be Expr(s) or something coercible to Expr(s). Strings are parsed as column names.} -\item{descending}{Sort in descending order. When sorting by multiple columns, -can be specified per column by passing a vector of booleans.} +\item{descending}{A logical. If \code{TRUE}, sort in descending order.} } \value{ Expr diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 772af4149..fee626b35 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -248,7 +248,7 @@ impl RPolarsExpr { .into() } - pub fn sort(&self, descending: bool, nulls_last: bool) -> Self { + pub fn sort_with(&self, descending: bool, nulls_last: bool) -> Self { self.clone() .0 .sort(SortOptions { @@ -1103,7 +1103,7 @@ impl RPolarsExpr { .0 .clone() .list() - .get(robj_to!(PLExprCol, index)?, robj_to!(bool, null_on_oob)?) + .get(robj_to!(PLExpr, index)?, robj_to!(bool, null_on_oob)?) .into()) } diff --git a/tests/testthat/_snaps/after-wrappers.md b/tests/testthat/_snaps/after-wrappers.md index d86dcf345..53f46bb1c 100644 --- a/tests/testthat/_snaps/after-wrappers.md +++ b/tests/testthat/_snaps/after-wrappers.md @@ -403,8 +403,8 @@ [253] "shrink_dtype" "shuffle" [255] "sign" "sin" [257] "sinh" "skew" - [259] "slice" "sort" - [261] "sort_by" "std" + [259] "slice" "sort_by" + [261] "sort_with" "std" [263] "str_base64_decode" "str_base64_encode" [265] "str_concat" "str_contains" [267] "str_contains_any" "str_count_matches" @@ -715,7 +715,7 @@ [35] "rem" "rename_mut" [37] "rep" "set_sorted_mut" [39] "shape" "sleep" - [41] "sort_mut" "std" + [41] "sort" "std" [43] "struct_fields" "sub" [45] "sum" "to_fmt_char" [47] "to_frame" "to_r" diff --git a/tests/testthat/test-as_polars.R b/tests/testthat/test-as_polars.R index 3427b2b8c..8e727d9b8 100644 --- a/tests/testthat/test-as_polars.R +++ b/tests/testthat/test-as_polars.R @@ -252,14 +252,14 @@ test_that("from arrow Table and ChunkedArray", { lapply(at$columns, \(x) x$num_chunks) ) - expect_grepl_error(expect_identical( + expect_identical( as_polars_df.ArrowTabular(at, rechunk = TRUE)$ select(pl$all()$map_batches(\(s) s$chunk_lengths()))$ to_list() |> lapply(length) |> unname(), lapply(at$columns, \(x) x$num_chunks) - )) + ) # #not supported yet diff --git a/tests/testthat/test-concat.R b/tests/testthat/test-concat.R index 339186c07..5852a6606 100644 --- a/tests/testthat/test-concat.R +++ b/tests/testthat/test-concat.R @@ -45,7 +45,7 @@ test_that("concat dataframe", { ) # type 'relaxed' vertical concatenation is not allowed by default - expect_grepl_error(pl$concat(l_ver[[1L]], pl$DataFrame(a = 2, b = 42L), how = "vertical"), "data types don't match") + expect_grepl_error(pl$concat(l_ver[[1L]], pl$DataFrame(a = 2, b = 42L), how = "vertical"), "cannot extend/append Int32 with Float64") # check lazy eager is identical l_ver_lazy = lapply(l_ver, \(df) df$lazy()) diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index 9f6ce63eb..b2e8d4ae7 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -798,34 +798,34 @@ test_that("as_data_frame (backward compatibility)", { test_that("sort", { df = pl$DataFrame(mtcars) - w = df$sort("mpg")$to_data_frame() - x = df$sort(pl$col("mpg"))$to_data_frame() + w = df$sort("mpg", maintain_order = TRUE)$to_data_frame() + x = df$sort(pl$col("mpg"), maintain_order = TRUE)$to_data_frame() y = mtcars[order(mtcars$mpg), ] expect_equal(x, y, ignore_attr = TRUE) - w = df$sort(pl$col("cyl"), pl$col("mpg"))$to_data_frame() - x = df$sort("cyl", "mpg")$to_data_frame() - y = df$sort(c("cyl", "mpg"))$to_data_frame() + w = df$sort(pl$col("cyl"), pl$col("mpg"), maintain_order = TRUE)$to_data_frame() + x = df$sort("cyl", "mpg", maintain_order = TRUE)$to_data_frame() + y = df$sort(c("cyl", "mpg"), maintain_order = TRUE)$to_data_frame() z = mtcars[order(mtcars$cyl, mtcars$mpg), ] expect_equal(w, x, ignore_attr = TRUE) expect_equal(w, y, ignore_attr = TRUE) expect_equal(w, z, ignore_attr = TRUE) # expr: one increasing and one decreasing - x = df$sort(-pl$col("cyl"), pl$col("hp"))$to_data_frame() + x = df$sort(-pl$col("cyl"), pl$col("hp"), maintain_order = TRUE)$to_data_frame() y = mtcars[order(-mtcars$cyl, mtcars$hp), ] expect_equal(x, y, ignore_attr = TRUE) # descending arg - w = df$sort("cyl", "mpg", descending = TRUE)$to_data_frame() - x = df$sort(c("cyl", "mpg"), descending = TRUE)$to_data_frame() + w = df$sort("cyl", "mpg", descending = TRUE, maintain_order = TRUE)$to_data_frame() + x = df$sort(c("cyl", "mpg"), descending = TRUE, maintain_order = TRUE)$to_data_frame() y = mtcars[order(-mtcars$cyl, -mtcars$mpg), ] expect_equal(w, x, ignore_attr = TRUE) expect_equal(w, y, ignore_attr = TRUE) # descending arg: vector of boolean - w = df$sort("cyl", "mpg", descending = c(TRUE, FALSE))$to_data_frame() - x = df$sort(c("cyl", "mpg"), descending = c(TRUE, FALSE))$to_data_frame() + w = df$sort("cyl", "mpg", descending = c(TRUE, FALSE), maintain_order = TRUE)$to_data_frame() + x = df$sort(c("cyl", "mpg"), descending = c(TRUE, FALSE), maintain_order = TRUE)$to_data_frame() y = mtcars[order(-mtcars$cyl, mtcars$mpg), ] expect_equal(w, x, ignore_attr = TRUE) expect_equal(w, y, ignore_attr = TRUE) @@ -834,8 +834,8 @@ test_that("sort", { df = mtcars df$mpg[1] = NA df = pl$DataFrame(df) - a = df$sort("mpg", nulls_last = TRUE)$to_data_frame() - b = df$sort("mpg", nulls_last = FALSE)$to_data_frame() + a = df$sort("mpg", nulls_last = TRUE, maintain_order = TRUE)$to_data_frame() + b = df$sort("mpg", nulls_last = FALSE, maintain_order = TRUE)$to_data_frame() expect_true(is.na(a$mpg[32])) expect_true(is.na(b$mpg[1])) diff --git a/tests/testthat/test-expr_array.R b/tests/testthat/test-expr_array.R index 7c4bab9e6..1600561a7 100644 --- a/tests/testthat/test-expr_array.R +++ b/tests/testthat/test-expr_array.R @@ -17,37 +17,38 @@ test_that("arr$sum", { ) }) -test_that("arr$max and arr$min", { - skip_if_not(polars_info()$features$nightly) +# TODO: reenable if the upstream issue is fixed +# test_that("arr$max and arr$min", { +# skip_if_not(polars_info()$features$nightly) - df = pl$DataFrame( - ints = list(1:2, c(1L, NA_integer_), c(NA_integer_, NA_integer_)), - floats = list(c(1, 2), c(1, NA_real_), c(NA_real_, NA_real_)), - schema = list( - ints = pl$Array(pl$Int32, 2), - floats = pl$Array(pl$Float32, 2) - ) - ) - # max --- - expect_identical( - df$select(pl$col("ints")$arr$max())$to_list(), - list(ints = c(2L, 1L, NA_integer_)) - ) - expect_identical( - df$select(pl$col("floats")$arr$max())$to_list(), - list(floats = c(2, 1, NA_real_)) - ) +# df = pl$DataFrame( +# ints = list(1:2, c(1L, NA_integer_), c(NA_integer_, NA_integer_)), +# floats = list(c(1, 2), c(1, NA_real_), c(NA_real_, NA_real_)), +# schema = list( +# ints = pl$Array(pl$Int32, 2), +# floats = pl$Array(pl$Float32, 2) +# ) +# ) +# # max --- +# expect_identical( +# df$select(pl$col("ints")$arr$max())$to_list(), +# list(ints = c(2L, 1L, NA_integer_)) +# ) +# expect_identical( +# df$select(pl$col("floats")$arr$max())$to_list(), +# list(floats = c(2, 1, NA_real_)) +# ) - # min --- - expect_identical( - df$select(pl$col("ints")$arr$min())$to_list(), - list(ints = c(1L, 1L, NA_integer_)) - ) - expect_identical( - df$select(pl$col("floats")$arr$min())$to_list(), - list(floats = c(1, 1, NA_real_)) - ) -}) +# # min --- +# expect_identical( +# df$select(pl$col("ints")$arr$min())$to_list(), +# list(ints = c(1L, 1L, NA_integer_)) +# ) +# expect_identical( +# df$select(pl$col("floats")$arr$min())$to_list(), +# list(floats = c(1, 1, NA_real_)) +# ) +# }) test_that("arr$max and arr$min error if the nightly feature is false", { skip_if(polars_info()$features$nightly) diff --git a/tests/testthat/test-expr_expr.R b/tests/testthat/test-expr_expr.R index b3e4b39bb..b5ef6d216 100644 --- a/tests/testthat/test-expr_expr.R +++ b/tests/testthat/test-expr_expr.R @@ -705,7 +705,7 @@ test_that("Expr_append", { expect_grepl_error( pl$DataFrame(list())$select(pl$lit("Bob")$append(FALSE, upcast = FALSE)), - "match" + "cannot extend/append String with Boolean" ) }) @@ -998,8 +998,8 @@ test_that("sort_by", { ) expect_grepl_error(pl$lit(1:4)$sort_by(1)$to_r(), "different length") - expect_grepl_error(pl$lit(1:4)$sort_by("blop")$to_r(), "column 'blop' not available in 'DataFrame'") - expect_grepl_error(pl$lit(1:4)$sort_by("blop")$to_r(), "column 'blop' not available in 'DataFrame'") + expect_grepl_error(pl$lit(1:4)$sort_by("blop")$to_r(), "field not found") + expect_grepl_error(pl$lit(1:4)$sort_by("blop")$to_r(), "field not found") expect_grepl_error(pl$lit(1:4)$sort_by(df)$to_r(), "not convertible into.* Expr") expect_grepl_error(pl$lit(1:4)$sort_by(df)$to_r(), "not convertible into.* Expr") @@ -2229,10 +2229,10 @@ test_that("entropy", { r_entropy(1:3, base = 2, normalize = FALSE) ) - # TODO: https://github.com/pola-rs/polars/issues/15350 - pl$select(pl$lit(c("a", "b", "b", "c", "c", "c"))$entropy(base = 2)) - - pl$lit(c("a", "a", "a"))$entropy(base = 2, normalize = FALSE)$to_r() + expect_grepl_error( + pl$select(pl$lit(c("a", "b", "b", "c", "c", "c"))$entropy(base = 2)), + "expected numerical input" + ) })