Skip to content

Commit

Permalink
Rename argument drop_nulls to ignore_nulls in $all() and `$any(…
Browse files Browse the repository at this point in the history
…)` (#1050)

Co-authored-by: eitsupi <ts1s1andn@gmail.com>
  • Loading branch information
etiennebacher and eitsupi authored May 22, 2024
1 parent e0447c1 commit 3d2a333
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 62 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
- `pl$Struct()` now only accepts named inputs and objects of class `RPolarsField`.
For example, `pl$Struct(pl$Boolean)` doesn't work anymore and should be named
like `pl$Struct(a = pl$Boolean)` (#1053).
- In `$all()` and `$any()`, the argument `drop_nulls` is renamed `ignore_nulls`,
and this argument must be named (#1050).

## Polars R Package 0.16.4

Expand Down
67 changes: 41 additions & 26 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -565,41 +565,56 @@ Expr_alias = use_extendr_wrapper
#' Apply logical AND on a column
#'
#' Check if all values in a Boolean column are `TRUE`. This method is an
#' expression - not to be confused with `pl$all()` which is a function to select
#' all columns.
#' @param drop_nulls Logical. Default TRUE, as name says.
#' @return Boolean literal
#' expression - not to be confused with [`pl$all()`][pl_all] which is a function
#' to select all columns.
#'
#' @param ... Ignored.
#' @param ignore_nulls If `TRUE` (default), ignore null values. If `FALSE`,
#' [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to
#' deal with nulls: if the column contains any null values and no `TRUE` values,
#' the output is null.
#'
#' @return A logical value
#' @examples
#' pl$DataFrame(
#' all = c(TRUE, TRUE),
#' any = c(TRUE, FALSE),
#' none = c(FALSE, FALSE)
#' )$select(
#' # the first $all() selects all columns, the second one applies the AND
#' # logical on the values
#' pl$all()$all()
#' df = pl$DataFrame(
#' a = c(TRUE, TRUE),
#' b = c(TRUE, FALSE),
#' c = c(NA, TRUE),
#' d = c(NA, NA)
#' )
Expr_all = function(drop_nulls = TRUE) {
.pr$Expr$all(self, drop_nulls) |>
unwrap("in $all()")
#'
#' # By default, ignore null values. If there are only nulls, then all() returns
#' # TRUE.
#' df$select(pl$col("*")$all())
#'
#' # If we set ignore_nulls = FALSE, then we don't know if all values in column
#' # "c" are TRUE, so it returns null
#' df$select(pl$col("*")$all(ignore_nulls = FALSE))
Expr_all = function(..., ignore_nulls = TRUE) {
.pr$Expr$all(self, ignore_nulls) |>
unwrap("in $all():")
}

#' Apply logical OR on a column
#'
#' Check if any boolean value in a Boolean column is `TRUE`.
#' @param drop_nulls Logical. Default TRUE, as name says.
#' @return Boolean literal
#'
#' @inherit Expr_all params return
#' @examples
#' pl$DataFrame(
#' all = c(TRUE, TRUE),
#' any = c(TRUE, FALSE),
#' none = c(FALSE, FALSE)
#' )$select(
#' pl$all()$any()
#' df = pl$DataFrame(
#' a = c(TRUE, FALSE),
#' b = c(FALSE, FALSE),
#' c = c(NA, FALSE)
#' )
Expr_any = function(drop_nulls = TRUE) {
.pr$Expr$any(self, drop_nulls) |>
unwrap("in $all()")
#'
#' df$select(pl$col("*")$any())
#'
#' # If we set ignore_nulls = FALSE, then we don't know if any values in column
#' # "c" is TRUE, so it returns null
#' df$select(pl$col("*")$any(ignore_nulls = FALSE))
Expr_any = function(..., ignore_nulls = TRUE) {
.pr$Expr$any(self, ignore_nulls) |>
unwrap("in $any():")
}

#' Count elements
Expand Down
4 changes: 2 additions & 2 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -902,9 +902,9 @@ RPolarsExpr$unique_stable <- function() .Call(wrap__RPolarsExpr__unique_stable,

RPolarsExpr$agg_groups <- function() .Call(wrap__RPolarsExpr__agg_groups, self)

RPolarsExpr$all <- function(drop_nulls) .Call(wrap__RPolarsExpr__all, self, drop_nulls)
RPolarsExpr$all <- function(ignore_nulls) .Call(wrap__RPolarsExpr__all, self, ignore_nulls)

RPolarsExpr$any <- function(drop_nulls) .Call(wrap__RPolarsExpr__any, self, drop_nulls)
RPolarsExpr$any <- function(ignore_nulls) .Call(wrap__RPolarsExpr__any, self, ignore_nulls)

RPolarsExpr$is_between <- function(lower, upper, closed) .Call(wrap__RPolarsExpr__is_between, self, lower, upper, closed)

Expand Down
36 changes: 23 additions & 13 deletions man/Expr_all.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 18 additions & 9 deletions man/Expr_any.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions src/rust/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1578,11 +1578,11 @@ impl RPolarsExpr {

// boolean

pub fn all(&self, drop_nulls: Robj) -> RResult<Self> {
Ok(self.0.clone().all(robj_to!(bool, drop_nulls)?).into())
pub fn all(&self, ignore_nulls: Robj) -> RResult<Self> {
Ok(self.0.clone().all(robj_to!(bool, ignore_nulls)?).into())
}
pub fn any(&self, drop_nulls: Robj) -> RResult<Self> {
Ok(self.0.clone().any(robj_to!(bool, drop_nulls)?).into())
pub fn any(&self, ignore_nulls: Robj) -> RResult<Self> {
Ok(self.0.clone().any(robj_to!(bool, ignore_nulls)?).into())
}

fn is_between(&self, lower: Robj, upper: Robj, closed: Robj) -> RResult<Self> {
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-datatype.R
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,10 @@ test_that("Enum", {

df = pl$DataFrame(x = "a", y = "b", z = "c")$
with_columns(
pl$col("x")$cast(pl$Enum(c("a", "b", "c"))),
pl$col("y")$cast(pl$Enum(c("a", "b", "c"))),
pl$col("z")$cast(pl$Enum(c("a", "c")))
)
pl$col("x")$cast(pl$Enum(c("a", "b", "c"))),
pl$col("y")$cast(pl$Enum(c("a", "b", "c"))),
pl$col("z")$cast(pl$Enum(c("a", "c")))
)

expect_identical(
df$select(x_eq_y = pl$col("x") == pl$col("y"))$to_list(),
Expand Down
46 changes: 42 additions & 4 deletions tests/testthat/test-expr_expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -2585,8 +2585,8 @@ test_that("rolling: error if period is negative", {

df = pl$DataFrame(dt = dates, a = c(3, 7, 5, 9, 2, 1))$
with_columns(
pl$col("dt")$str$strptime(pl$Datetime("us"), format = "%Y-%m-%d %H:%M:%S")$set_sorted()
)
pl$col("dt")$str$strptime(pl$Datetime("us"), format = "%Y-%m-%d %H:%M:%S")$set_sorted()
)
expect_grepl_error(
df$select(pl$col("a")$rolling(index_column = "dt", period = "-2d")),
"rolling window period should be strictly positive"
Expand All @@ -2601,8 +2601,8 @@ test_that("rolling: passing a difftime as period works", {

df = pl$DataFrame(dt = dates, a = c(3, 7, 5, 9, 2, 1))$
with_columns(
pl$col("dt")$str$strptime(pl$Datetime("us"), format = "%Y-%m-%d %H:%M:%S")$set_sorted()
)
pl$col("dt")$str$strptime(pl$Datetime("us"), format = "%Y-%m-%d %H:%M:%S")$set_sorted()
)
expect_identical(
df$select(
sum_a_offset1 = pl$sum("a")$rolling(index_column = "dt", period = "2d", offset = "1d")
Expand Down Expand Up @@ -2804,3 +2804,41 @@ test_that("qcut works", {
df$select(qcut = pl$col("foo")$qcut(c("a", "b")))
)
})

test_that("any works", {
df = pl$DataFrame(
a = c(TRUE, FALSE),
b = c(FALSE, FALSE),
c = c(NA, FALSE),
d = c(NA, NA)
)

expect_identical(
df$select(pl$col("*")$any())$to_list(),
list(a = TRUE, b = FALSE, c = FALSE, d = FALSE)
)

expect_identical(
df$select(pl$col("*")$any(ignore_nulls = FALSE))$to_list(),
list(a = TRUE, b = FALSE, c = NA, d = NA)
)
})

test_that("all works", {
df = pl$DataFrame(
a = c(TRUE, TRUE),
b = c(TRUE, FALSE),
c = c(NA, TRUE),
d = c(NA, NA)
)

expect_identical(
df$select(pl$col("*")$all())$to_list(),
list(a = TRUE, b = FALSE, c = TRUE, d = TRUE)
)

expect_identical(
df$select(pl$col("*")$all(ignore_nulls = FALSE))$to_list(),
list(a = TRUE, b = FALSE, c = NA, d = NA)
)
})

0 comments on commit 3d2a333

Please sign in to comment.