Rename argument drop_nulls to ignore_nulls in $all() and `$any(…

…)` (#1050) Co-authored-by: eitsupi <ts1s1andn@gmail.com>
pola-rs · May 22, 2024 · 3d2a333 · 3d2a333
1 parent e0447c1
commit 3d2a333
Show file tree

Hide file tree

Showing 8 changed files with 136 additions and 62 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -13,6 +13,8 @@
 - `pl$Struct()` now only accepts named inputs and objects of class `RPolarsField`.
   For example, `pl$Struct(pl$Boolean)` doesn't work anymore and should be named
   like `pl$Struct(a = pl$Boolean)` (#1053).
+- In `$all()` and `$any()`, the argument `drop_nulls` is renamed `ignore_nulls`,
+  and this argument must be named (#1050).
 
 ## Polars R Package 0.16.4
 

diff --git a/R/expr__expr.R b/R/expr__expr.R
@@ -565,41 +565,56 @@ Expr_alias = use_extendr_wrapper
 #' Apply logical AND on a column
 #'
 #' Check if all values in a Boolean column are `TRUE`. This method is an
-#' expression - not to be confused with `pl$all()` which is a function to select
-#' all columns.
-#' @param drop_nulls Logical. Default TRUE, as name says.
-#' @return Boolean literal
+#' expression - not to be confused with [`pl$all()`][pl_all] which is a function
+#' to select all columns.
+#'
+#' @param ... Ignored.
+#' @param ignore_nulls If `TRUE` (default), ignore null values. If `FALSE`,
+#' [Kleene logic](https://en.wikipedia.org/wiki/Three-valued_logic) is used to
+#' deal with nulls: if the column contains any null values and no `TRUE` values,
+#' the output is null.
+#'
+#' @return A logical value
 #' @examples
-#' pl$DataFrame(
-#'   all = c(TRUE, TRUE),
-#'   any = c(TRUE, FALSE),
-#'   none = c(FALSE, FALSE)
-#' )$select(
-#'   # the first $all() selects all columns, the second one applies the AND
-#'   # logical on the values
-#'   pl$all()$all()
+#' df = pl$DataFrame(
+#'   a = c(TRUE, TRUE),
+#'   b = c(TRUE, FALSE),
+#'   c = c(NA, TRUE),
+#'   d = c(NA, NA)
 #' )
-Expr_all = function(drop_nulls = TRUE) {
-  .pr$Expr$all(self, drop_nulls) |>
-    unwrap("in $all()")
+#'
+#' # By default, ignore null values. If there are only nulls, then all() returns
+#' # TRUE.
+#' df$select(pl$col("*")$all())
+#'
+#' # If we set ignore_nulls = FALSE, then we don't know if all values in column
+#' # "c" are TRUE, so it returns null
+#' df$select(pl$col("*")$all(ignore_nulls = FALSE))
+Expr_all = function(..., ignore_nulls = TRUE) {
+  .pr$Expr$all(self, ignore_nulls) |>
+    unwrap("in $all():")
 }
 
 #' Apply logical OR on a column
 #'
 #' Check if any boolean value in a Boolean column is `TRUE`.
-#' @param drop_nulls Logical. Default TRUE, as name says.
-#' @return Boolean literal
+#'
+#' @inherit Expr_all params return
 #' @examples
-#' pl$DataFrame(
-#'   all = c(TRUE, TRUE),
-#'   any = c(TRUE, FALSE),
-#'   none = c(FALSE, FALSE)
-#' )$select(
-#'   pl$all()$any()
+#' df = pl$DataFrame(
+#'   a = c(TRUE, FALSE),
+#'   b = c(FALSE, FALSE),
+#'   c = c(NA, FALSE)
 #' )
-Expr_any = function(drop_nulls = TRUE) {
-  .pr$Expr$any(self, drop_nulls) |>
-    unwrap("in $all()")
+#'
+#' df$select(pl$col("*")$any())
+#'
+#' # If we set ignore_nulls = FALSE, then we don't know if any values in column
+#' # "c" is TRUE, so it returns null
+#' df$select(pl$col("*")$any(ignore_nulls = FALSE))
+Expr_any = function(..., ignore_nulls = TRUE) {
+  .pr$Expr$any(self, ignore_nulls) |>
+    unwrap("in $any():")
 }
 
 #' Count elements

diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R
@@ -902,9 +902,9 @@ RPolarsExpr$unique_stable <- function() .Call(wrap__RPolarsExpr__unique_stable,
 
 RPolarsExpr$agg_groups <- function() .Call(wrap__RPolarsExpr__agg_groups, self)
 
-RPolarsExpr$all <- function(drop_nulls) .Call(wrap__RPolarsExpr__all, self, drop_nulls)
+RPolarsExpr$all <- function(ignore_nulls) .Call(wrap__RPolarsExpr__all, self, ignore_nulls)
 
-RPolarsExpr$any <- function(drop_nulls) .Call(wrap__RPolarsExpr__any, self, drop_nulls)
+RPolarsExpr$any <- function(ignore_nulls) .Call(wrap__RPolarsExpr__any, self, ignore_nulls)
 
 RPolarsExpr$is_between <- function(lower, upper, closed) .Call(wrap__RPolarsExpr__is_between, self, lower, upper, closed)
 

diff --git a/man/Expr_all.Rd b/man/Expr_all.Rd
diff --git a/man/Expr_any.Rd b/man/Expr_any.Rd
diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs
@@ -1578,11 +1578,11 @@ impl RPolarsExpr {
 
     // boolean
 
-    pub fn all(&self, drop_nulls: Robj) -> RResult<Self> {
-        Ok(self.0.clone().all(robj_to!(bool, drop_nulls)?).into())
+    pub fn all(&self, ignore_nulls: Robj) -> RResult<Self> {
+        Ok(self.0.clone().all(robj_to!(bool, ignore_nulls)?).into())
     }
-    pub fn any(&self, drop_nulls: Robj) -> RResult<Self> {
-        Ok(self.0.clone().any(robj_to!(bool, drop_nulls)?).into())
+    pub fn any(&self, ignore_nulls: Robj) -> RResult<Self> {
+        Ok(self.0.clone().any(robj_to!(bool, ignore_nulls)?).into())
     }
 
     fn is_between(&self, lower: Robj, upper: Robj, closed: Robj) -> RResult<Self> {

diff --git a/tests/testthat/test-datatype.R b/tests/testthat/test-datatype.R
@@ -222,10 +222,10 @@ test_that("Enum", {
 
   df = pl$DataFrame(x = "a", y = "b", z = "c")$
     with_columns(
-      pl$col("x")$cast(pl$Enum(c("a", "b", "c"))),
-      pl$col("y")$cast(pl$Enum(c("a", "b", "c"))),
-      pl$col("z")$cast(pl$Enum(c("a", "c")))
-    )
+    pl$col("x")$cast(pl$Enum(c("a", "b", "c"))),
+    pl$col("y")$cast(pl$Enum(c("a", "b", "c"))),
+    pl$col("z")$cast(pl$Enum(c("a", "c")))
+  )
 
   expect_identical(
     df$select(x_eq_y = pl$col("x") == pl$col("y"))$to_list(),

diff --git a/tests/testthat/test-expr_expr.R b/tests/testthat/test-expr_expr.R
@@ -2585,8 +2585,8 @@ test_that("rolling: error if period is negative", {
 
   df = pl$DataFrame(dt = dates, a = c(3, 7, 5, 9, 2, 1))$
     with_columns(
-      pl$col("dt")$str$strptime(pl$Datetime("us"), format = "%Y-%m-%d %H:%M:%S")$set_sorted()
-    )
+    pl$col("dt")$str$strptime(pl$Datetime("us"), format = "%Y-%m-%d %H:%M:%S")$set_sorted()
+  )
   expect_grepl_error(
     df$select(pl$col("a")$rolling(index_column = "dt", period = "-2d")),
     "rolling window period should be strictly positive"
@@ -2601,8 +2601,8 @@ test_that("rolling: passing a difftime as period works", {
 
   df = pl$DataFrame(dt = dates, a = c(3, 7, 5, 9, 2, 1))$
     with_columns(
-      pl$col("dt")$str$strptime(pl$Datetime("us"), format = "%Y-%m-%d %H:%M:%S")$set_sorted()
-    )
+    pl$col("dt")$str$strptime(pl$Datetime("us"), format = "%Y-%m-%d %H:%M:%S")$set_sorted()
+  )
   expect_identical(
     df$select(
       sum_a_offset1 = pl$sum("a")$rolling(index_column = "dt", period = "2d", offset = "1d")
@@ -2804,3 +2804,41 @@ test_that("qcut works", {
     df$select(qcut = pl$col("foo")$qcut(c("a", "b")))
   )
 })
+
+test_that("any works", {
+  df = pl$DataFrame(
+    a = c(TRUE, FALSE),
+    b = c(FALSE, FALSE),
+    c = c(NA, FALSE),
+    d = c(NA, NA)
+  )
+
+  expect_identical(
+    df$select(pl$col("*")$any())$to_list(),
+    list(a = TRUE, b = FALSE, c = FALSE, d = FALSE)
+  )
+
+  expect_identical(
+    df$select(pl$col("*")$any(ignore_nulls = FALSE))$to_list(),
+    list(a = TRUE, b = FALSE, c = NA, d = NA)
+  )
+})
+
+test_that("all works", {
+  df = pl$DataFrame(
+    a = c(TRUE, TRUE),
+    b = c(TRUE, FALSE),
+    c = c(NA, TRUE),
+    d = c(NA, NA)
+  )
+
+  expect_identical(
+    df$select(pl$col("*")$all())$to_list(),
+    list(a = TRUE, b = FALSE, c = TRUE, d = TRUE)
+  )
+
+  expect_identical(
+    df$select(pl$col("*")$all(ignore_nulls = FALSE))$to_list(),
+    list(a = TRUE, b = FALSE, c = NA, d = NA)
+  )
+})