diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 477fa67e7c6..e1c391c4917 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -38,27 +38,33 @@ supported_dplyr_methods <- list(
   select = NULL,
   filter = NULL,
   collect = NULL,
-  summarise = NULL,
+  summarise = c(
+    "window functions not currently supported;",
+    'arguments `.drop = FALSE` and `.groups = "rowwise" not supported'
+  ),
   group_by = NULL,
   groups = NULL,
   group_vars = NULL,
   group_by_drop_default = NULL,
   ungroup = NULL,
-  mutate = NULL,
+  mutate = c(
+    "window functions (e.g. things that require aggregation within groups)",
+    "not currently supported"
+  ),
   transmute = NULL,
   arrange = NULL,
   rename = NULL,
-  pull = NULL,
+  pull = "returns an Arrow [ChunkedArray], not an R vector",
   relocate = NULL,
   compute = NULL,
   collapse = NULL,
-  distinct = NULL,
-  left_join = NULL,
-  right_join = NULL,
-  inner_join = NULL,
-  full_join = NULL,
-  semi_join = NULL,
-  anti_join = NULL,
+  distinct = "`.keep_all = TRUE` not supported",
+  left_join = "the `copy` and `na_matches` arguments are ignored",
+  right_join = "the `copy` and `na_matches` arguments are ignored",
+  inner_join = "the `copy` and `na_matches` arguments are ignored",
+  full_join = "the `copy` and `na_matches` arguments are ignored",
+  semi_join = "the `copy` and `na_matches` arguments are ignored",
+  anti_join = "the `copy` and `na_matches` arguments are ignored",
   count = NULL,
   tally = NULL,
   rename_with = NULL,
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index 6106adbc5e4..3ecc32d3fe5 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -29,52 +29,59 @@ register_bindings_datetime <- function() {
 }
 
 register_bindings_datetime_utility <- function() {
-  register_binding("base::strptime", function(x,
-                                              format = "%Y-%m-%d %H:%M:%S",
-                                              tz = "",
-                                              unit = "ms") {
-    # Arrow uses unit for time parsing, strptime() does not.
-    # Arrow has no default option for strptime (format, unit),
-    # we suggest following format = "%Y-%m-%d %H:%M:%S", unit = MILLI/1L/"ms",
-    # (ARROW-12809)
-
-    unit <- make_valid_time_unit(
-      unit,
-      c(valid_time64_units, valid_time32_units)
-    )
-
-    output <- build_expr(
-      "strptime",
-      x,
-      options =
-        list(
-          format = format,
-          unit = unit,
-          error_is_null = TRUE
-        )
-    )
-
-    if (tz == "") {
-      tz <- Sys.timezone()
-    }
+  register_binding(
+    "base::strptime",
+    function(x,
+             format = "%Y-%m-%d %H:%M:%S",
+             tz = "",
+             unit = "ms") {
+      # Arrow uses unit for time parsing, strptime() does not.
+      # Arrow has no default option for strptime (format, unit),
+      # we suggest following format = "%Y-%m-%d %H:%M:%S", unit = MILLI/1L/"ms",
+      # (ARROW-12809)
+
+      unit <- make_valid_time_unit(
+        unit,
+        c(valid_time64_units, valid_time32_units)
+      )
 
-    # if a timestamp does not contain timezone information (i.e. it is
-    # "timezone-naive") we can attach timezone information (i.e. convert it into
-    # a "timezone-aware" timestamp) with `assume_timezone`
-    # if we want to cast to a different timezone, we can only do it for
-    # timezone-aware timestamps, not for timezone-naive ones
-    if (!is.null(tz)) {
       output <- build_expr(
-        "assume_timezone",
-        output,
+        "strptime",
+        x,
         options =
           list(
-            timezone = tz
+            format = format,
+            unit = unit,
+            error_is_null = TRUE
           )
       )
-    }
-    output
-  })
+
+      if (tz == "") {
+        tz <- Sys.timezone()
+      }
+
+      # if a timestamp does not contain timezone information (i.e. it is
+      # "timezone-naive") we can attach timezone information (i.e. convert it into
+      # a "timezone-aware" timestamp) with `assume_timezone`
+      # if we want to cast to a different timezone, we can only do it for
+      # timezone-aware timestamps, not for timezone-naive ones
+      if (!is.null(tz)) {
+        output <- build_expr(
+          "assume_timezone",
+          output,
+          options =
+            list(
+              timezone = tz
+            )
+        )
+      }
+      output
+    },
+    notes = c(
+      "accepts a `unit` argument not present in the `base` function.",
+      'Valid values are "s", "ms" (default), "us", "ns".'
+    )
+  )
 
   register_binding("base::strftime", function(x,
                                               format = "",
@@ -251,23 +258,27 @@ register_bindings_datetime_components <- function() {
 }
 
 register_bindings_datetime_conversion <- function() {
-  register_binding("lubridate::make_datetime", function(year = 1970L,
-                                                        month = 1L,
-                                                        day = 1L,
-                                                        hour = 0L,
-                                                        min = 0L,
-                                                        sec = 0,
-                                                        tz = "UTC") {
-
-    # ParseTimestampStrptime currently ignores the timezone information (ARROW-12820).
-    # Stop if tz other than 'UTC' is provided.
-    if (tz != "UTC") {
-      arrow_not_supported("Time zone other than 'UTC'")
-    }
+  register_binding(
+    "lubridate::make_datetime",
+    function(year = 1970L,
+             month = 1L,
+             day = 1L,
+             hour = 0L,
+             min = 0L,
+             sec = 0,
+             tz = "UTC") {
+
+      # ParseTimestampStrptime currently ignores the timezone information (ARROW-12820).
+      # Stop if tz other than 'UTC' is provided.
+      if (tz != "UTC") {
+        arrow_not_supported("Time zone other than 'UTC'")
+      }
 
-    x <- call_binding("str_c", year, month, day, hour, min, sec, sep = "-")
-    build_expr("strptime", x, options = list(format = "%Y-%m-%d-%H-%M-%S", unit = 0L))
-  })
+      x <- call_binding("str_c", year, month, day, hour, min, sec, sep = "-")
+      build_expr("strptime", x, options = list(format = "%Y-%m-%d-%H-%M-%S", unit = 0L))
+    },
+    notes = "only supports UTC (default) timezone"
+  )
 
   register_binding("lubridate::make_date", function(year = 1970L,
                                                     month = 1L,
@@ -305,37 +316,44 @@ register_bindings_datetime_conversion <- function() {
     call_binding("make_datetime", year, month, day, hour, min, sec, tz)
   })
 
-  register_binding("base::as.Date", function(x,
-                                             format = NULL,
-                                             tryFormats = "%Y-%m-%d",
-                                             origin = "1970-01-01",
-                                             tz = "UTC") {
-    if (is.null(format) && length(tryFormats) > 1) {
-      abort(
-        paste(
-          "`as.Date()` with multiple `tryFormats` is not supported in Arrow,",
-          "consider using the lubridate specialised parsing functions such as, `ymd()`, `ymd()`, etc."
+  register_binding(
+    "base::as.Date",
+    function(x,
+             format = NULL,
+             tryFormats = "%Y-%m-%d",
+             origin = "1970-01-01",
+             tz = "UTC") {
+      if (is.null(format) && length(tryFormats) > 1) {
+        abort(
+          paste(
+            "`as.Date()` with multiple `tryFormats` is not supported in Arrow.",
+            "Consider using the lubridate specialised parsing functions `ymd()`, `ymd()`, etc."
+          )
         )
-      )
-    }
+      }
 
-    # base::as.Date() and lubridate::as_date() differ in the way they use the
-    # `tz` argument. Both cast to the desired timezone, if present. The
-    # difference appears when the `tz` argument is not set: `as.Date()` uses the
-    # default value ("UTC"), while `as_date()` keeps the original attribute
-    # => we only cast when we want the behaviour of the base version or when
-    # `tz` is set (i.e. not NULL)
-    if (call_binding("is.POSIXct", x)) {
-      x <- build_expr("cast", x, options = cast_options(to_type = timestamp(timezone = tz)))
-    }
+      # base::as.Date() and lubridate::as_date() differ in the way they use the
+      # `tz` argument. Both cast to the desired timezone, if present. The
+      # difference appears when the `tz` argument is not set: `as.Date()` uses the
+      # default value ("UTC"), while `as_date()` keeps the original attribute
+      # => we only cast when we want the behaviour of the base version or when
+      # `tz` is set (i.e. not NULL)
+      if (call_binding("is.POSIXct", x)) {
+        x <- build_expr("cast", x, options = cast_options(to_type = timestamp(timezone = tz)))
+      }
 
-    binding_as_date(
-      x = x,
-      format = format,
-      tryFormats = tryFormats,
-      origin = origin
+      binding_as_date(
+        x = x,
+        format = format,
+        tryFormats = tryFormats,
+        origin = origin
+      )
+    },
+    notes = c(
+      "Multiple `tryFormats` not supported in Arrow.",
+      "Consider using the lubridate specialised parsing functions `ymd()`, `ymd()`, etc."
     )
-  })
+  )
 
   register_binding("lubridate::as_date", function(x,
                                                   format = NULL,
@@ -412,108 +430,127 @@ register_bindings_datetime_conversion <- function() {
 }
 
 register_bindings_duration <- function() {
-  register_binding("base::difftime", function(time1,
-                                              time2,
-                                              tz,
-                                              units = "secs") {
-    if (units != "secs") {
-      abort("`difftime()` with units other than `secs` not supported in Arrow")
-    }
+  register_binding(
+    "base::difftime",
+    function(time1,
+             time2,
+             tz,
+             units = "secs") {
+      if (units != "secs") {
+        abort("`difftime()` with units other than `secs` not supported in Arrow")
+      }
 
-    if (!missing(tz)) {
-      warn("`tz` argument is not supported in Arrow, so it will be ignored")
-    }
+      if (!missing(tz)) {
+        warn("`tz` argument is not supported in Arrow, so it will be ignored")
+      }
 
-    # cast to timestamp if time1 and time2 are not dates or timestamp expressions
-    # (the subtraction of which would output a `duration`)
-    if (!call_binding("is.instant", time1)) {
-      time1 <- build_expr("cast", time1, options = cast_options(to_type = timestamp()))
-    }
+      # cast to timestamp if time1 and time2 are not dates or timestamp expressions
+      # (the subtraction of which would output a `duration`)
+      if (!call_binding("is.instant", time1)) {
+        time1 <- build_expr("cast", time1, options = cast_options(to_type = timestamp()))
+      }
 
-    if (!call_binding("is.instant", time2)) {
-      time2 <- build_expr("cast", time2, options = cast_options(to_type = timestamp()))
-    }
+      if (!call_binding("is.instant", time2)) {
+        time2 <- build_expr("cast", time2, options = cast_options(to_type = timestamp()))
+      }
 
-    # if time1 or time2 are timestamps they cannot be expressed in "s" /seconds
-    # otherwise they cannot be added subtracted with durations
-    # TODO delete the casting to "us" once
-    # https://issues.apache.org/jira/browse/ARROW-16060 is solved
-    if (inherits(time1, "Expression") &&
-      time1$type_id() %in% Type[c("TIMESTAMP")] && time1$type()$unit() != 2L) {
-      time1 <- build_expr("cast", time1, options = cast_options(to_type = timestamp("us")))
-    }
+      # if time1 or time2 are timestamps they cannot be expressed in "s" /seconds
+      # otherwise they cannot be added subtracted with durations
+      # TODO delete the casting to "us" once
+      # https://issues.apache.org/jira/browse/ARROW-16060 is solved
+      if (inherits(time1, "Expression") &&
+        time1$type_id() %in% Type[c("TIMESTAMP")] && time1$type()$unit() != 2L) {
+        time1 <- build_expr("cast", time1, options = cast_options(to_type = timestamp("us")))
+      }
 
-    if (inherits(time2, "Expression") &&
-      time2$type_id() %in% Type[c("TIMESTAMP")] && time2$type()$unit() != 2L) {
-      time2 <- build_expr("cast", time2, options = cast_options(to_type = timestamp("us")))
-    }
+      if (inherits(time2, "Expression") &&
+        time2$type_id() %in% Type[c("TIMESTAMP")] && time2$type()$unit() != 2L) {
+        time2 <- build_expr("cast", time2, options = cast_options(to_type = timestamp("us")))
+      }
 
-    # we need to go build the subtract expression instead of `time1 - time2` to
-    # prevent complaints when we try to subtract an R object from an Expression
-    subtract_output <- build_expr("-", time1, time2)
-    build_expr("cast", subtract_output, options = cast_options(to_type = duration("s")))
-  })
-  register_binding("base::as.difftime", function(x,
-                                                 format = "%X",
-                                                 units = "secs") {
-    # windows doesn't seem to like "%X"
-    if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") {
-      format <- "%H:%M:%S"
-    }
+      # we need to go build the subtract expression instead of `time1 - time2` to
+      # prevent complaints when we try to subtract an R object from an Expression
+      subtract_output <- build_expr("-", time1, time2)
+      build_expr("cast", subtract_output, options = cast_options(to_type = duration("s")))
+    },
+    notes = c(
+      'only supports `units = "secs"` (the default);',
+      "`tz` argument not supported"
+    )
+  )
 
-    if (units != "secs") {
-      abort("`as.difftime()` with units other than 'secs' not supported in Arrow")
-    }
+  register_binding(
+    "base::as.difftime",
+    function(x,
+             format = "%X",
+             units = "secs") {
+      # windows doesn't seem to like "%X"
+      if (format == "%X" & tolower(Sys.info()[["sysname"]]) == "windows") {
+        format <- "%H:%M:%S"
+      }
 
-    if (call_binding("is.character", x)) {
-      x <- build_expr("strptime", x, options = list(format = format, unit = 0L))
-      # we do a final cast to duration ("s") at the end
-      x <- make_duration(x$cast(time64("us")), unit = "us")
-    }
+      if (units != "secs") {
+        abort("`as.difftime()` with units other than 'secs' not supported in Arrow")
+      }
 
-    # numeric -> duration not supported in Arrow yet so we use int64() as an
-    # intermediate step
-    # TODO: revisit after ARROW-15862
+      if (call_binding("is.character", x)) {
+        x <- build_expr("strptime", x, options = list(format = format, unit = 0L))
+        # we do a final cast to duration ("s") at the end
+        x <- make_duration(x$cast(time64("us")), unit = "us")
+      }
 
-    if (call_binding("is.numeric", x)) {
-      # coerce x to be int64(). it should work for integer-like doubles and fail
-      # for pure doubles
-      # if we abort for all doubles, we risk erroring in cases in which
-      # coercion to int64() would work
-      x <- build_expr("cast", x, options = cast_options(to_type = int64()))
-    }
+      # numeric -> duration not supported in Arrow yet so we use int64() as an
+      # intermediate step
+      # TODO: revisit after ARROW-15862
 
-    build_expr("cast", x, options = cast_options(to_type = duration(unit = "s")))
-  })
+      if (call_binding("is.numeric", x)) {
+        # coerce x to be int64(). it should work for integer-like doubles and fail
+        # for pure doubles
+        # if we abort for all doubles, we risk erroring in cases in which
+        # coercion to int64() would work
+        x <- build_expr("cast", x, options = cast_options(to_type = int64()))
+      }
+
+      build_expr("cast", x, options = cast_options(to_type = duration(unit = "s")))
+    },
+    notes = 'only supports `units = "secs"` (the default)'
+  )
 }
 
 register_bindings_duration_constructor <- function() {
-  register_binding("lubridate::make_difftime", function(num = NULL,
-                                                        units = "secs",
-                                                        ...) {
-    if (units != "secs") {
-      abort("`make_difftime()` with units other than 'secs' not supported in Arrow")
-    }
+  register_binding(
+    "lubridate::make_difftime",
+    function(num = NULL,
+             units = "secs",
+             ...) {
+      if (units != "secs") {
+        abort("`make_difftime()` with units other than 'secs' not supported in Arrow")
+      }
 
-    chunks <- list(...)
+      chunks <- list(...)
 
-    # lubridate concatenates durations passed via the `num` argument with those
-    # passed via `...` resulting in a vector of length 2 - which is virtually
-    # unusable in a dplyr pipeline. Arrow errors in this situation
-    if (!is.null(num) && length(chunks) > 0) {
-      abort("`make_difftime()` with both `num` and `...` not supported in Arrow")
-    }
+      # lubridate concatenates durations passed via the `num` argument with those
+      # passed via `...` resulting in a vector of length 2 - which is virtually
+      # unusable in a dplyr pipeline. Arrow errors in this situation
+      if (!is.null(num) && length(chunks) > 0) {
+        abort("`make_difftime()` with both `num` and `...` not supported in Arrow")
+      }
 
-    if (!is.null(num)) {
-      # build duration from num if present
-      duration <- num
-    } else {
-      # build duration from chunks when nothing is passed via ...
-      duration <- duration_from_chunks(chunks)
-    }
+      if (!is.null(num)) {
+        # build duration from num if present
+        duration <- num
+      } else {
+        # build duration from chunks when nothing is passed via ...
+        duration <- duration_from_chunks(chunks)
+      }
 
-    make_duration(duration, "s")
-  })
+      make_duration(duration, "s")
+    },
+    notes = c(
+      'only supports `units = "secs"` (the default);',
+      "providing both `num` and `...` is not supported"
+    )
+  )
 }
 
 register_bindings_duration_helpers <- function() {
@@ -533,52 +570,62 @@ register_bindings_duration_helpers <- function() {
     )
   }
 
-  register_binding("lubridate::dpicoseconds", function(x = 1) {
-    abort("Duration in picoseconds not supported in Arrow.")
-  })
+  register_binding(
+    "lubridate::dpicoseconds",
+    function(x = 1) {
+      abort("Duration in picoseconds not supported in Arrow.")
+    },
+    notes = "not supported"
+  )
 }
 
 register_bindings_datetime_parsers <- function() {
-  register_binding("lubridate::parse_date_time", function(x,
-                                                          orders,
-                                                          tz = "UTC",
-                                                          truncated = 0,
-                                                          quiet = TRUE,
-                                                          exact = FALSE) {
-    if (!quiet) {
-      arrow_not_supported("`quiet = FALSE`")
-    }
+  register_binding(
+    "lubridate::parse_date_time",
+    function(x,
+             orders,
+             tz = "UTC",
+             truncated = 0,
+             quiet = TRUE,
+             exact = FALSE) {
+      if (!quiet) {
+        arrow_not_supported("`quiet = FALSE`")
+      }
 
-    if (truncated > 0) {
-      if (truncated > (nchar(orders) - 3)) {
-        arrow_not_supported(paste0("a value for `truncated` > ", nchar(orders) - 3))
+      if (truncated > 0) {
+        if (truncated > (nchar(orders) - 3)) {
+          arrow_not_supported(paste0("a value for `truncated` > ", nchar(orders) - 3))
+        }
+        # build several orders for truncated formats
+        orders <- map_chr(0:truncated, ~ substr(orders, start = 1, stop = nchar(orders) - .x))
       }
-      # build several orders for truncated formats
-      orders <- map_chr(0:truncated, ~ substr(orders, start = 1, stop = nchar(orders) - .x))
-    }
 
-    if (!inherits(x, "Expression")) {
-      x <- Expression$scalar(x)
-    }
+      if (!inherits(x, "Expression")) {
+        x <- Expression$scalar(x)
+      }
 
-    if (exact == TRUE) {
-      # no data processing takes place & we don't derive formats
-      parse_attempts <- build_strptime_exprs(x, orders)
-    } else {
-      parse_attempts <- attempt_parsing(x, orders = orders)
-    }
+      if (exact == TRUE) {
+        # no data processing takes place & we don't derive formats
+        parse_attempts <- build_strptime_exprs(x, orders)
+      } else {
+        parse_attempts <- attempt_parsing(x, orders = orders)
+      }
 
-    coalesce_output <- build_expr("coalesce", args = parse_attempts)
+      coalesce_output <- build_expr("coalesce", args = parse_attempts)
 
-    # we need this binding to be able to handle a NULL `tz`, which, in turn,
-    # will be used by bindings such as `ymd()` to return a date or timestamp,
-    # based on whether tz is NULL or not
-    if (!is.null(tz)) {
-      build_expr("assume_timezone", coalesce_output, options = list(timezone = tz))
-    } else {
-      coalesce_output
-    }
-  })
+      # we need this binding to be able to handle a NULL `tz`, which, in turn,
+      # will be used by bindings such as `ymd()` to return a date or timestamp,
+      # based on whether tz is NULL or not
+      if (!is.null(tz)) {
+        build_expr("assume_timezone", coalesce_output, options = list(timezone = tz))
+      } else {
+        coalesce_output
+      }
+    },
+    notes = c(
+      "`quiet = FALSE` is not supported"
+    )
+  )
 
   parser_vec <- c(
     "ymd", "ydm", "mdy", "myd", "dmy", "dym", "ym", "my", "yq",
@@ -610,45 +657,50 @@ register_bindings_datetime_parsers <- function() {
   for (order in parser_vec) {
     register_binding(
       paste0("lubridate::", tolower(order)),
-      parser_map_factory(order)
+      parser_map_factory(order),
+      notes = "`locale` argument not supported"
     )
   }
 
-  register_binding("lubridate::fast_strptime", function(x,
-                                                        format,
-                                                        tz = "UTC",
-                                                        lt = FALSE,
-                                                        cutoff_2000 = 68L) {
-    # `lt` controls the output `lt = TRUE` returns a POSIXlt (which doesn't play
-    # well with mutate, for example)
-    if (lt) {
-      arrow_not_supported("`lt = TRUE` argument")
-    }
-
-    # TODO revisit once https://issues.apache.org/jira/browse/ARROW-16596 is done
-    if (cutoff_2000 != 68L) {
-      arrow_not_supported("`cutoff_2000` != 68L argument")
-    }
+  register_binding(
+    "lubridate::fast_strptime",
+    function(x,
+             format,
+             tz = "UTC",
+             lt = FALSE,
+             cutoff_2000 = 68L) {
+      # `lt` controls the output `lt = TRUE` returns a POSIXlt (which doesn't play
+      # well with mutate, for example)
+      if (lt) {
+        arrow_not_supported("`lt = TRUE` argument")
+      }
 
-    parse_attempt_expressions <- list()
+      # TODO revisit once https://issues.apache.org/jira/browse/ARROW-16596 is done
+      if (cutoff_2000 != 68L) {
+        arrow_not_supported("`cutoff_2000` != 68L argument")
+      }
 
-    parse_attempt_expressions <- map(
-      format,
-      ~ build_expr(
-        "strptime",
-        x,
-        options = list(
-          format = .x,
-          unit = 0L,
-          error_is_null = TRUE
+      parse_attempt_expressions <- list()
+
+      parse_attempt_expressions <- map(
+        format,
+        ~ build_expr(
+          "strptime",
+          x,
+          options = list(
+            format = .x,
+            unit = 0L,
+            error_is_null = TRUE
+          )
         )
       )
-    )
 
-    coalesce_output <- build_expr("coalesce", args = parse_attempt_expressions)
+      coalesce_output <- build_expr("coalesce", args = parse_attempt_expressions)
 
-    build_expr("assume_timezone", coalesce_output, options = list(timezone = tz))
-  })
+      build_expr("assume_timezone", coalesce_output, options = list(timezone = tz))
+    },
+    notes = "non-default values of `lt` and `cutoff_2000` not supported"
+  )
 }
 
 register_bindings_datetime_rounding <- function() {
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index d399e37e101..8132537af87 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -36,38 +36,38 @@
 #' which returns an `arrow` [Table], or `collect()`, which pulls the resulting
 #' Table into an R `data.frame`.
 #'
-#' * [`anti_join()`][dplyr::anti_join()]
+#' * [`anti_join()`][dplyr::anti_join()]: the `copy` and `na_matches` arguments are ignored
 #' * [`arrange()`][dplyr::arrange()]
 #' * [`collapse()`][dplyr::collapse()]
 #' * [`collect()`][dplyr::collect()]
 #' * [`compute()`][dplyr::compute()]
 #' * [`count()`][dplyr::count()]
-#' * [`distinct()`][dplyr::distinct()]
+#' * [`distinct()`][dplyr::distinct()]: `.keep_all = TRUE` not supported
 #' * [`explain()`][dplyr::explain()]
 #' * [`filter()`][dplyr::filter()]
-#' * [`full_join()`][dplyr::full_join()]
+#' * [`full_join()`][dplyr::full_join()]: the `copy` and `na_matches` arguments are ignored
 #' * [`glimpse()`][dplyr::glimpse()]
 #' * [`group_by()`][dplyr::group_by()]
 #' * [`group_by_drop_default()`][dplyr::group_by_drop_default()]
 #' * [`group_vars()`][dplyr::group_vars()]
 #' * [`groups()`][dplyr::groups()]
-#' * [`inner_join()`][dplyr::inner_join()]
-#' * [`left_join()`][dplyr::left_join()]
-#' * [`mutate()`][dplyr::mutate()]
-#' * [`pull()`][dplyr::pull()]
+#' * [`inner_join()`][dplyr::inner_join()]: the `copy` and `na_matches` arguments are ignored
+#' * [`left_join()`][dplyr::left_join()]: the `copy` and `na_matches` arguments are ignored
+#' * [`mutate()`][dplyr::mutate()]: window functions (e.g. things that require aggregation within groups) not currently supported
+#' * [`pull()`][dplyr::pull()]: returns an Arrow [ChunkedArray], not an R vector
 #' * [`relocate()`][dplyr::relocate()]
 #' * [`rename()`][dplyr::rename()]
 #' * [`rename_with()`][dplyr::rename_with()]
-#' * [`right_join()`][dplyr::right_join()]
+#' * [`right_join()`][dplyr::right_join()]: the `copy` and `na_matches` arguments are ignored
 #' * [`select()`][dplyr::select()]
-#' * [`semi_join()`][dplyr::semi_join()]
+#' * [`semi_join()`][dplyr::semi_join()]: the `copy` and `na_matches` arguments are ignored
 #' * [`show_query()`][dplyr::show_query()]
 #' * [`slice_head()`][dplyr::slice_head()]: slicing within groups not supported; Arrow datasets do not have row order, so head is non-deterministic; `prop` only supported on queries where `nrow()` is knowable without evaluating
 #' * [`slice_max()`][dplyr::slice_max()]: slicing within groups not supported; `with_ties = TRUE` (dplyr default) is not supported; `prop` only supported on queries where `nrow()` is knowable without evaluating
 #' * [`slice_min()`][dplyr::slice_min()]: slicing within groups not supported; `with_ties = TRUE` (dplyr default) is not supported; `prop` only supported on queries where `nrow()` is knowable without evaluating
 #' * [`slice_sample()`][dplyr::slice_sample()]: slicing within groups not supported; `replace = TRUE` and the `weight_by` argument not supported; `n` only supported on queries where `nrow()` is knowable without evaluating
 #' * [`slice_tail()`][dplyr::slice_tail()]: slicing within groups not supported; Arrow datasets do not have row order, so tail is non-deterministic; `prop` only supported on queries where `nrow()` is knowable without evaluating
-#' * [`summarise()`][dplyr::summarise()]
+#' * [`summarise()`][dplyr::summarise()]: window functions not currently supported; arguments `.drop = FALSE` and `.groups = "rowwise" not supported
 #' * [`tally()`][dplyr::tally()]
 #' * [`transmute()`][dplyr::transmute()]
 #' * [`ungroup()`][dplyr::ungroup()]
@@ -121,8 +121,9 @@
 #' * [`all()`][base::all()]
 #' * [`any()`][base::any()]
 #' * [`as.character()`][base::as.character()]
-#' * [`as.Date()`][base::as.Date()]
-#' * [`as.difftime()`][base::as.difftime()]
+#' * [`as.Date()`][base::as.Date()]: Multiple `tryFormats` not supported in Arrow.
+#' Consider using the lubridate specialised parsing functions `ymd()`, `ymd()`, etc.
+#' * [`as.difftime()`][base::as.difftime()]: only supports `units = "secs"` (the default)
 #' * [`as.double()`][base::as.double()]
 #' * [`as.integer()`][base::as.integer()]
 #' * [`as.logical()`][base::as.logical()]
@@ -130,8 +131,10 @@
 #' * [`asin()`][base::asin()]
 #' * [`ceiling()`][base::ceiling()]
 #' * [`cos()`][base::cos()]
-#' * [`data.frame()`][base::data.frame()]
-#' * [`difftime()`][base::difftime()]
+#' * [`data.frame()`][base::data.frame()]: `row.names` and `check.rows` arguments not supported;
+#' `stringsAsFactors` must be `FALSE`
+#' * [`difftime()`][base::difftime()]: only supports `units = "secs"` (the default);
+#' `tz` argument not supported
 #' * [`endsWith()`][base::endsWith()]
 #' * [`exp()`][base::exp()]
 #' * [`floor()`][base::floor()]
@@ -160,7 +163,7 @@
 #' * [`max()`][base::max()]
 #' * [`mean()`][base::mean()]
 #' * [`min()`][base::min()]
-#' * [`nchar()`][base::nchar()]
+#' * [`nchar()`][base::nchar()]: `allowNA = TRUE` and `keepNA = TRUE` not supported
 #' * [`paste()`][base::paste()]: the `collapse` argument is not yet supported
 #' * [`paste0()`][base::paste0()]: the `collapse` argument is not yet supported
 #' * [`pmax()`][base::pmax()]
@@ -171,11 +174,12 @@
 #' * [`sqrt()`][base::sqrt()]
 #' * [`startsWith()`][base::startsWith()]
 #' * [`strftime()`][base::strftime()]
-#' * [`strptime()`][base::strptime()]
+#' * [`strptime()`][base::strptime()]: accepts a `unit` argument not present in the `base` function.
+#' Valid values are "s", "ms" (default), "us", "ns".
 #' * [`strrep()`][base::strrep()]
 #' * [`strsplit()`][base::strsplit()]
 #' * [`sub()`][base::sub()]
-#' * [`substr()`][base::substr()]
+#' * [`substr()`][base::substr()]: `start` and `stop` must be length 1
 #' * [`substring()`][base::substring()]
 #' * [`sum()`][base::sum()]
 #' * [`tan()`][base::tan()]
@@ -217,20 +221,20 @@
 #' * [`dmilliseconds()`][lubridate::dmilliseconds()]
 #' * [`dminutes()`][lubridate::dminutes()]
 #' * [`dmonths()`][lubridate::dmonths()]
-#' * [`dmy()`][lubridate::dmy()]
-#' * [`dmy_h()`][lubridate::dmy_h()]
-#' * [`dmy_hm()`][lubridate::dmy_hm()]
-#' * [`dmy_hms()`][lubridate::dmy_hms()]
+#' * [`dmy()`][lubridate::dmy()]: `locale` argument not supported
+#' * [`dmy_h()`][lubridate::dmy_h()]: `locale` argument not supported
+#' * [`dmy_hm()`][lubridate::dmy_hm()]: `locale` argument not supported
+#' * [`dmy_hms()`][lubridate::dmy_hms()]: `locale` argument not supported
 #' * [`dnanoseconds()`][lubridate::dnanoseconds()]
-#' * [`dpicoseconds()`][lubridate::dpicoseconds()]
+#' * [`dpicoseconds()`][lubridate::dpicoseconds()]: not supported
 #' * [`dseconds()`][lubridate::dseconds()]
 #' * [`dst()`][lubridate::dst()]
 #' * [`dweeks()`][lubridate::dweeks()]
 #' * [`dyears()`][lubridate::dyears()]
-#' * [`dym()`][lubridate::dym()]
+#' * [`dym()`][lubridate::dym()]: `locale` argument not supported
 #' * [`epiweek()`][lubridate::epiweek()]
 #' * [`epiyear()`][lubridate::epiyear()]
-#' * [`fast_strptime()`][lubridate::fast_strptime()]
+#' * [`fast_strptime()`][lubridate::fast_strptime()]: non-default values of `lt` and `cutoff_2000` not supported
 #' * [`floor_date()`][lubridate::floor_date()]
 #' * [`format_ISO8601()`][lubridate::format_ISO8601()]
 #' * [`hour()`][lubridate::hour()]
@@ -242,18 +246,19 @@
 #' * [`isoyear()`][lubridate::isoyear()]
 #' * [`leap_year()`][lubridate::leap_year()]
 #' * [`make_date()`][lubridate::make_date()]
-#' * [`make_datetime()`][lubridate::make_datetime()]
-#' * [`make_difftime()`][lubridate::make_difftime()]
+#' * [`make_datetime()`][lubridate::make_datetime()]: only supports UTC (default) timezone
+#' * [`make_difftime()`][lubridate::make_difftime()]: only supports `units = "secs"` (the default);
+#' providing both `num` and `...` is not supported
 #' * [`mday()`][lubridate::mday()]
-#' * [`mdy()`][lubridate::mdy()]
-#' * [`mdy_h()`][lubridate::mdy_h()]
-#' * [`mdy_hm()`][lubridate::mdy_hm()]
-#' * [`mdy_hms()`][lubridate::mdy_hms()]
+#' * [`mdy()`][lubridate::mdy()]: `locale` argument not supported
+#' * [`mdy_h()`][lubridate::mdy_h()]: `locale` argument not supported
+#' * [`mdy_hm()`][lubridate::mdy_hm()]: `locale` argument not supported
+#' * [`mdy_hms()`][lubridate::mdy_hms()]: `locale` argument not supported
 #' * [`minute()`][lubridate::minute()]
 #' * [`month()`][lubridate::month()]
-#' * [`my()`][lubridate::my()]
-#' * [`myd()`][lubridate::myd()]
-#' * [`parse_date_time()`][lubridate::parse_date_time()]
+#' * [`my()`][lubridate::my()]: `locale` argument not supported
+#' * [`myd()`][lubridate::myd()]: `locale` argument not supported
+#' * [`parse_date_time()`][lubridate::parse_date_time()]: `quiet = FALSE` is not supported
 #' * [`pm()`][lubridate::pm()]
 #' * [`qday()`][lubridate::qday()]
 #' * [`quarter()`][lubridate::quarter()]
@@ -264,17 +269,17 @@
 #' * [`wday()`][lubridate::wday()]
 #' * [`week()`][lubridate::week()]
 #' * [`yday()`][lubridate::yday()]
-#' * [`ydm()`][lubridate::ydm()]
-#' * [`ydm_h()`][lubridate::ydm_h()]
-#' * [`ydm_hm()`][lubridate::ydm_hm()]
-#' * [`ydm_hms()`][lubridate::ydm_hms()]
+#' * [`ydm()`][lubridate::ydm()]: `locale` argument not supported
+#' * [`ydm_h()`][lubridate::ydm_h()]: `locale` argument not supported
+#' * [`ydm_hm()`][lubridate::ydm_hm()]: `locale` argument not supported
+#' * [`ydm_hms()`][lubridate::ydm_hms()]: `locale` argument not supported
 #' * [`year()`][lubridate::year()]
-#' * [`ym()`][lubridate::ym()]
-#' * [`ymd()`][lubridate::ymd()]
-#' * [`ymd_h()`][lubridate::ymd_h()]
-#' * [`ymd_hm()`][lubridate::ymd_hm()]
-#' * [`ymd_hms()`][lubridate::ymd_hms()]
-#' * [`yq()`][lubridate::yq()]
+#' * [`ym()`][lubridate::ym()]: `locale` argument not supported
+#' * [`ymd()`][lubridate::ymd()]: `locale` argument not supported
+#' * [`ymd_h()`][lubridate::ymd_h()]: `locale` argument not supported
+#' * [`ymd_hm()`][lubridate::ymd_hm()]: `locale` argument not supported
+#' * [`ymd_hms()`][lubridate::ymd_hms()]: `locale` argument not supported
+#' * [`yq()`][lubridate::yq()]: `locale` argument not supported
 #'
 #' ## methods
 #'
@@ -290,8 +295,9 @@
 #'
 #' ## stats
 #'
-#' * [`median()`][stats::median()]
-#' * [`quantile()`][stats::quantile()]
+#' * [`median()`][stats::median()]: approximate median (t-digest) is computed
+#' * [`quantile()`][stats::quantile()]: `probs` must be length 1;
+#' approximate quantile (t-digest) is computed
 #' * [`sd()`][stats::sd()]
 #' * [`var()`][stats::var()]
 #'
@@ -301,8 +307,10 @@
 #'
 #' ## stringr
 #'
+#' Pattern modifiers `coll()` and `boundary()` are not supported in any functions.
+#'
 #' * [`str_c()`][stringr::str_c()]: the `collapse` argument is not yet supported
-#' * [`str_count()`][stringr::str_count()]
+#' * [`str_count()`][stringr::str_count()]: `pattern` must be a length 1 character vector
 #' * [`str_detect()`][stringr::str_detect()]
 #' * [`str_dup()`][stringr::str_dup()]
 #' * [`str_ends()`][stringr::str_ends()]
@@ -311,9 +319,9 @@
 #' * [`str_pad()`][stringr::str_pad()]
 #' * [`str_replace()`][stringr::str_replace()]
 #' * [`str_replace_all()`][stringr::str_replace_all()]
-#' * [`str_split()`][stringr::str_split()]
+#' * [`str_split()`][stringr::str_split()]: Case-insensitive string splitting and splitting into 0 parts not supported
 #' * [`str_starts()`][stringr::str_starts()]
-#' * [`str_sub()`][stringr::str_sub()]
+#' * [`str_sub()`][stringr::str_sub()]: `start` and `end` must be length 1
 #' * [`str_to_lower()`][stringr::str_to_lower()]
 #' * [`str_to_title()`][stringr::str_to_title()]
 #' * [`str_to_upper()`][stringr::str_to_upper()]
diff --git a/r/R/dplyr-funcs-string.R b/r/R/dplyr-funcs-string.R
index 69102f2f710..4b87ed1e761 100644
--- a/r/R/dplyr-funcs-string.R
+++ b/r/R/dplyr-funcs-string.R
@@ -254,18 +254,22 @@ register_bindings_string_regex <- function() {
     notes = "not yet in a released version of `stringr`, but it is supported in `arrow`"
   )
 
-  register_binding("stringr::str_count", function(string, pattern) {
-    opts <- get_stringr_pattern_options(enexpr(pattern))
-    if (!is.string(pattern)) {
-      arrow_not_supported("`pattern` must be a length 1 character vector; other values")
-    }
-    arrow_fun <- ifelse(opts$fixed, "count_substring", "count_substring_regex")
-    Expression$create(
-      arrow_fun,
-      string,
-      options = list(pattern = opts$pattern, ignore_case = opts$ignore_case)
-    )
-  })
+  register_binding(
+    "stringr::str_count",
+    function(string, pattern) {
+      opts <- get_stringr_pattern_options(enexpr(pattern))
+      if (!is.string(pattern)) {
+        arrow_not_supported("`pattern` must be a length 1 character vector; other values")
+      }
+      arrow_fun <- ifelse(opts$fixed, "count_substring", "count_substring_regex")
+      Expression$create(
+        arrow_fun,
+        string,
+        options = list(pattern = opts$pattern, ignore_case = opts$ignore_case)
+      )
+    },
+    notes = "`pattern` must be a length 1 character vector"
+  )
 
   register_binding("base::startsWith", function(x, prefix) {
     Expression$create(
@@ -372,58 +376,66 @@ register_bindings_string_regex <- function() {
     )
   })
 
-  register_binding("stringr::str_split", function(string,
-                                                  pattern,
-                                                  n = Inf,
-                                                  simplify = FALSE) {
-    opts <- get_stringr_pattern_options(enexpr(pattern))
-    arrow_fun <- ifelse(opts$fixed, "split_pattern", "split_pattern_regex")
-    if (opts$ignore_case) {
-      arrow_not_supported("Case-insensitive string splitting")
-    }
-    if (n == 0) {
-      arrow_not_supported("Splitting strings into zero parts")
-    }
-    if (identical(n, Inf)) {
-      n <- 0L
-    }
-    if (simplify) {
-      warning("Argument 'simplify = TRUE' will be ignored", call. = FALSE)
-    }
-    # The max_splits option in the Arrow C++ library controls the maximum number
-    # of places at which the string is split, whereas the argument n to
-    # str_split() controls the maximum number of pieces to return. So we must
-    # subtract 1 from n to get max_splits.
-    Expression$create(
-      arrow_fun,
-      string,
-      options = list(
-        pattern = opts$pattern,
-        reverse = FALSE,
-        max_splits = n - 1L
+  register_binding(
+    "stringr::str_split",
+    function(string,
+             pattern,
+             n = Inf,
+             simplify = FALSE) {
+      opts <- get_stringr_pattern_options(enexpr(pattern))
+      arrow_fun <- ifelse(opts$fixed, "split_pattern", "split_pattern_regex")
+      if (opts$ignore_case) {
+        arrow_not_supported("Case-insensitive string splitting")
+      }
+      if (n == 0) {
+        arrow_not_supported("Splitting strings into zero parts")
+      }
+      if (identical(n, Inf)) {
+        n <- 0L
+      }
+      if (simplify) {
+        warning("Argument 'simplify = TRUE' will be ignored", call. = FALSE)
+      }
+      # The max_splits option in the Arrow C++ library controls the maximum number
+      # of places at which the string is split, whereas the argument n to
+      # str_split() controls the maximum number of pieces to return. So we must
+      # subtract 1 from n to get max_splits.
+      Expression$create(
+        arrow_fun,
+        string,
+        options = list(
+          pattern = opts$pattern,
+          reverse = FALSE,
+          max_splits = n - 1L
+        )
       )
-    )
-  })
+    },
+    notes = "Case-insensitive string splitting and splitting into 0 parts not supported"
+  )
 }
 
 register_bindings_string_other <- function() {
-  register_binding("base::nchar", function(x, type = "chars", allowNA = FALSE, keepNA = NA) {
-    if (allowNA) {
-      arrow_not_supported("allowNA = TRUE")
-    }
-    if (is.na(keepNA)) {
-      keepNA <- !identical(type, "width")
-    }
-    if (!keepNA) {
-      # TODO: I think there is a fill_null kernel we could use, set null to 2
-      arrow_not_supported("keepNA = TRUE")
-    }
-    if (identical(type, "bytes")) {
-      Expression$create("binary_length", x)
-    } else {
-      Expression$create("utf8_length", x)
-    }
-  })
+  register_binding(
+    "base::nchar",
+    function(x, type = "chars", allowNA = FALSE, keepNA = NA) {
+      if (allowNA) {
+        arrow_not_supported("allowNA = TRUE")
+      }
+      if (is.na(keepNA)) {
+        keepNA <- !identical(type, "width")
+      }
+      if (!keepNA) {
+        # TODO: I think there is a fill_null kernel we could use, set null to 2
+        arrow_not_supported("keepNA = TRUE")
+      }
+      if (identical(type, "bytes")) {
+        Expression$create("binary_length", x)
+      } else {
+        Expression$create("utf8_length", x)
+      }
+    },
+    notes = "`allowNA = TRUE` and `keepNA = TRUE` not supported"
+  )
 
   register_binding("stringr::str_to_lower", function(string, locale = "en") {
     stop_if_locale_provided(locale)
@@ -450,37 +462,41 @@ register_bindings_string_other <- function() {
     Expression$create(trim_fun, string)
   })
 
-  register_binding("base::substr", function(x, start, stop) {
-    assert_that(
-      length(start) == 1,
-      msg = "`start` must be length 1 - other lengths are not supported in Arrow"
-    )
-    assert_that(
-      length(stop) == 1,
-      msg = "`stop` must be length 1 - other lengths are not supported in Arrow"
-    )
+  register_binding(
+    "base::substr",
+    function(x, start, stop) {
+      assert_that(
+        length(start) == 1,
+        msg = "`start` must be length 1 - other lengths are not supported in Arrow"
+      )
+      assert_that(
+        length(stop) == 1,
+        msg = "`stop` must be length 1 - other lengths are not supported in Arrow"
+      )
 
-    # substr treats values as if they're on a continous number line, so values
-    # 0 are effectively blank characters - set `start` to 1 here so Arrow mimics
-    # this behavior
-    if (start <= 0) {
-      start <- 1
-    }
+      # substr treats values as if they're on a continous number line, so values
+      # 0 are effectively blank characters - set `start` to 1 here so Arrow mimics
+      # this behavior
+      if (start <= 0) {
+        start <- 1
+      }
 
-    # if `stop` is lower than `start`, this is invalid, so set `stop` to
-    # 0 so that an empty string will be returned (consistent with base::substr())
-    if (stop < start) {
-      stop <- 0
-    }
+      # if `stop` is lower than `start`, this is invalid, so set `stop` to
+      # 0 so that an empty string will be returned (consistent with base::substr())
+      if (stop < start) {
+        stop <- 0
+      }
 
-    Expression$create(
-      "utf8_slice_codeunits",
-      x,
-      # we don't need to subtract 1 from `stop` as C++ counts exclusively
-      # which effectively cancels out the difference in indexing between R & C++
-      options = list(start = start - 1L, stop = stop)
-    )
-  })
+      Expression$create(
+        "utf8_slice_codeunits",
+        x,
+        # we don't need to subtract 1 from `stop` as C++ counts exclusively
+        # which effectively cancels out the difference in indexing between R & C++
+        options = list(start = start - 1L, stop = stop)
+      )
+    },
+    notes = "`start` and `stop` must be length 1"
+  )
 
   register_binding("base::substring", function(text, first, last) {
     call_binding("substr", x = text, start = first, stop = last)
@@ -520,7 +536,9 @@ register_bindings_string_other <- function() {
       string,
       options = list(start = start, stop = end)
     )
-  })
+  },
+  notes = "`start` and `end` must be length 1"
+  )
 
 
   register_binding("stringr::str_pad", function(string,
diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R
index 429ea51e029..296133daeed 100644
--- a/r/R/dplyr-funcs-type.R
+++ b/r/R/dplyr-funcs-type.R
@@ -135,42 +135,49 @@ register_bindings_type_cast <- function() {
     )
   })
 
-  register_binding("base::data.frame", function(...,
-                                                row.names = NULL,
-                                                check.rows = NULL,
-                                                check.names = TRUE,
-                                                fix.empty.names = TRUE,
-                                                stringsAsFactors = FALSE) {
-    # we need a specific value of stringsAsFactors because the default was
-    # TRUE in R <= 3.6
-    if (!identical(stringsAsFactors, FALSE)) {
-      arrow_not_supported("stringsAsFactors = TRUE")
-    }
+  register_binding(
+    "base::data.frame",
+    function(...,
+             row.names = NULL,
+             check.rows = NULL,
+             check.names = TRUE,
+             fix.empty.names = TRUE,
+             stringsAsFactors = FALSE) {
+      # we need a specific value of stringsAsFactors because the default was
+      # TRUE in R <= 3.6
+      if (!identical(stringsAsFactors, FALSE)) {
+        arrow_not_supported("stringsAsFactors = TRUE")
+      }
 
-    # ignore row.names and check.rows with a warning
-    if (!is.null(row.names)) arrow_not_supported("row.names")
-    if (!is.null(check.rows)) arrow_not_supported("check.rows")
+      # ignore row.names and check.rows with a warning
+      if (!is.null(row.names)) arrow_not_supported("row.names")
+      if (!is.null(check.rows)) arrow_not_supported("check.rows")
 
-    args <- dots_list(..., .named = fix.empty.names)
-    if (is.null(names(args))) {
-      names(args) <- rep("", length(args))
-    }
+      args <- dots_list(..., .named = fix.empty.names)
+      if (is.null(names(args))) {
+        names(args) <- rep("", length(args))
+      }
 
-    if (identical(check.names, TRUE)) {
-      if (identical(fix.empty.names, TRUE)) {
-        names(args) <- make.names(names(args), unique = TRUE)
-      } else {
-        name_emtpy <- names(args) == ""
-        names(args)[!name_emtpy] <- make.names(names(args)[!name_emtpy], unique = TRUE)
+      if (identical(check.names, TRUE)) {
+        if (identical(fix.empty.names, TRUE)) {
+          names(args) <- make.names(names(args), unique = TRUE)
+        } else {
+          name_emtpy <- names(args) == ""
+          names(args)[!name_emtpy] <- make.names(names(args)[!name_emtpy], unique = TRUE)
+        }
       }
-    }
 
-    build_expr(
-      "make_struct",
-      args = unname(args),
-      options = list(field_names = names(args))
+      build_expr(
+        "make_struct",
+        args = unname(args),
+        options = list(field_names = names(args))
+      )
+    },
+    notes = c(
+      "`row.names` and `check.rows` arguments not supported;",
+      "`stringsAsFactors` must be `FALSE`"
     )
-  })
+  )
 }
 
 register_bindings_type_inspect <- function() {
diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R
index 33054b6f406..3fb85f5490c 100644
--- a/r/R/dplyr-funcs.R
+++ b/r/R/dplyr-funcs.R
@@ -123,8 +123,11 @@ unregister_binding <- function(fun_name, registry = nse_funcs,
   invisible(previous_fun)
 }
 
-register_binding_agg <- function(fun_name, agg_fun, registry = agg_funcs) {
-  register_binding(fun_name, agg_fun, registry = registry)
+register_binding_agg <- function(fun_name,
+                                 agg_fun,
+                                 registry = agg_funcs,
+                                 notes = character(0)) {
+  register_binding(fun_name, agg_fun, registry = registry, notes = notes)
 }
 
 # Supports functions and tests that call previously-defined bindings
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 3181cee1378..20251c6a105 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -98,39 +98,50 @@ register_bindings_aggregate <- function() {
       options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof)
     )
   })
-  register_binding_agg("stats::quantile", function(x, probs, na.rm = FALSE) {
-    if (length(probs) != 1) {
-      arrow_not_supported("quantile() with length(probs) != 1")
-    }
-    # TODO: Bind to the Arrow function that returns an exact quantile and remove
-    # this warning (ARROW-14021)
-    warn(
-      "quantile() currently returns an approximate quantile in Arrow",
-      .frequency = "once",
-      .frequency_id = "arrow.quantile.approximate",
-      class = "arrow.quantile.approximate"
-    )
-    list(
-      fun = "tdigest",
-      data = x,
-      options = list(skip_nulls = na.rm, q = probs)
-    )
-  })
-  register_binding_agg("stats::median", function(x, na.rm = FALSE) {
-    # TODO: Bind to the Arrow function that returns an exact median and remove
-    # this warning (ARROW-14021)
-    warn(
-      "median() currently returns an approximate median in Arrow",
-      .frequency = "once",
-      .frequency_id = "arrow.median.approximate",
-      class = "arrow.median.approximate"
-    )
-    list(
-      fun = "approximate_median",
-      data = x,
-      options = list(skip_nulls = na.rm)
+  register_binding_agg(
+    "stats::quantile",
+    function(x, probs, na.rm = FALSE) {
+      if (length(probs) != 1) {
+        arrow_not_supported("quantile() with length(probs) != 1")
+      }
+      # TODO: Bind to the Arrow function that returns an exact quantile and remove
+      # this warning (ARROW-14021)
+      warn(
+        "quantile() currently returns an approximate quantile in Arrow",
+        .frequency = "once",
+        .frequency_id = "arrow.quantile.approximate",
+        class = "arrow.quantile.approximate"
+      )
+      list(
+        fun = "tdigest",
+        data = x,
+        options = list(skip_nulls = na.rm, q = probs)
+      )
+    },
+    notes = c(
+      "`probs` must be length 1;",
+      "approximate quantile (t-digest) is computed"
     )
-  })
+  )
+  register_binding_agg(
+    "stats::median",
+    function(x, na.rm = FALSE) {
+      # TODO: Bind to the Arrow function that returns an exact median and remove
+      # this warning (ARROW-14021)
+      warn(
+        "median() currently returns an approximate median in Arrow",
+        .frequency = "once",
+        .frequency_id = "arrow.median.approximate",
+        class = "arrow.median.approximate"
+      )
+      list(
+        fun = "approximate_median",
+        data = x,
+        options = list(skip_nulls = na.rm)
+      )
+    },
+    notes = "approximate median (t-digest) is computed"
+  )
   register_binding_agg("dplyr::n_distinct", function(..., na.rm = FALSE) {
     list(
       fun = "count_distinct",
diff --git a/r/data-raw/docgen.R b/r/data-raw/docgen.R
index 8db3bb7e804..6d8b06611c0 100644
--- a/r/data-raw/docgen.R
+++ b/r/data-raw/docgen.R
@@ -89,6 +89,10 @@ do_not_link <- c(
   "stringr::str_like" # Still only in the unreleased version
 )
 
+package_notes <- list(
+  stringr = "Pattern modifiers `coll()` and `boundary()` are not supported in any functions."
+)
+
 # Vectorized function to make entries for each function
 render_fun <- function(fun, pkg_fun, notes) {
   # Add () to fun if it's not an operator
@@ -114,12 +118,14 @@ render_pkg <- function(df, pkg) {
   bullets <- df %>%
     transmute(render_fun(fun, pkg_fun, notes)) %>%
     pull()
-  # Add header
-  bullets <- c(
-    paste0("## ", pkg, "\n#'"),
-    bullets
-  )
-  paste("#'", bullets, collapse = "\n")
+  header <- paste0("## ", pkg, "\n#'")
+  # Some packages have global notes to include
+  pkg_notes <- package_notes[[pkg]]
+  if (!is.null(pkg_notes)) {
+    pkg_notes <- paste(pkg_notes, collapse = "\n#' ")
+    header <- c(header, paste0(pkg_notes, "\n#'"))
+  }
+  paste("#'", c(header, bullets), collapse = "\n")
 }
 
 docs <- arrow:::.cache$docs
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index 5cbe211d00d..053438e0604 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -20,38 +20,38 @@ the query on the data. To run the query, call either \code{compute()},
 which returns an \code{arrow} \link{Table}, or \code{collect()}, which pulls the resulting
 Table into an R \code{data.frame}.
 \itemize{
-\item \code{\link[dplyr:filter-joins]{anti_join()}}
+\item \code{\link[dplyr:filter-joins]{anti_join()}}: the \code{copy} and \code{na_matches} arguments are ignored
 \item \code{\link[dplyr:arrange]{arrange()}}
 \item \code{\link[dplyr:compute]{collapse()}}
 \item \code{\link[dplyr:compute]{collect()}}
 \item \code{\link[dplyr:compute]{compute()}}
 \item \code{\link[dplyr:count]{count()}}
-\item \code{\link[dplyr:distinct]{distinct()}}
+\item \code{\link[dplyr:distinct]{distinct()}}: \code{.keep_all = TRUE} not supported
 \item \code{\link[dplyr:explain]{explain()}}
 \item \code{\link[dplyr:filter]{filter()}}
-\item \code{\link[dplyr:mutate-joins]{full_join()}}
+\item \code{\link[dplyr:mutate-joins]{full_join()}}: the \code{copy} and \code{na_matches} arguments are ignored
 \item \code{\link[dplyr:glimpse]{glimpse()}}
 \item \code{\link[dplyr:group_by]{group_by()}}
 \item \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}}
 \item \code{\link[dplyr:group_data]{group_vars()}}
 \item \code{\link[dplyr:group_data]{groups()}}
-\item \code{\link[dplyr:mutate-joins]{inner_join()}}
-\item \code{\link[dplyr:mutate-joins]{left_join()}}
-\item \code{\link[dplyr:mutate]{mutate()}}
-\item \code{\link[dplyr:pull]{pull()}}
+\item \code{\link[dplyr:mutate-joins]{inner_join()}}: the \code{copy} and \code{na_matches} arguments are ignored
+\item \code{\link[dplyr:mutate-joins]{left_join()}}: the \code{copy} and \code{na_matches} arguments are ignored
+\item \code{\link[dplyr:mutate]{mutate()}}: window functions (e.g. things that require aggregation within groups) not currently supported
+\item \code{\link[dplyr:pull]{pull()}}: returns an Arrow \link{ChunkedArray}, not an R vector
 \item \code{\link[dplyr:relocate]{relocate()}}
 \item \code{\link[dplyr:rename]{rename()}}
 \item \code{\link[dplyr:rename]{rename_with()}}
-\item \code{\link[dplyr:mutate-joins]{right_join()}}
+\item \code{\link[dplyr:mutate-joins]{right_join()}}: the \code{copy} and \code{na_matches} arguments are ignored
 \item \code{\link[dplyr:select]{select()}}
-\item \code{\link[dplyr:filter-joins]{semi_join()}}
+\item \code{\link[dplyr:filter-joins]{semi_join()}}: the \code{copy} and \code{na_matches} arguments are ignored
 \item \code{\link[dplyr:explain]{show_query()}}
 \item \code{\link[dplyr:slice]{slice_head()}}: slicing within groups not supported; Arrow datasets do not have row order, so head is non-deterministic; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
 \item \code{\link[dplyr:slice]{slice_max()}}: slicing within groups not supported; \code{with_ties = TRUE} (dplyr default) is not supported; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
 \item \code{\link[dplyr:slice]{slice_min()}}: slicing within groups not supported; \code{with_ties = TRUE} (dplyr default) is not supported; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
 \item \code{\link[dplyr:slice]{slice_sample()}}: slicing within groups not supported; \code{replace = TRUE} and the \code{weight_by} argument not supported; \code{n} only supported on queries where \code{nrow()} is knowable without evaluating
 \item \code{\link[dplyr:slice]{slice_tail()}}: slicing within groups not supported; Arrow datasets do not have row order, so tail is non-deterministic; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
-\item \code{\link[dplyr:summarise]{summarise()}}
+\item \code{\link[dplyr:summarise]{summarise()}}: window functions not currently supported; arguments \code{.drop = FALSE} and `.groups = "rowwise" not supported
 \item \code{\link[dplyr:count]{tally()}}
 \item \code{\link[dplyr:mutate]{transmute()}}
 \item \code{\link[dplyr:group_by]{ungroup()}}
@@ -107,8 +107,9 @@ as \code{arrow_ascii_is_decimal}.
 \item \code{\link[base:all]{all()}}
 \item \code{\link[base:any]{any()}}
 \item \code{\link[base:character]{as.character()}}
-\item \code{\link[base:as.Date]{as.Date()}}
-\item \code{\link[base:difftime]{as.difftime()}}
+\item \code{\link[base:as.Date]{as.Date()}}: Multiple \code{tryFormats} not supported in Arrow.
+Consider using the lubridate specialised parsing functions \code{ymd()}, \code{ymd()}, etc.
+\item \code{\link[base:difftime]{as.difftime()}}: only supports \code{units = "secs"} (the default)
 \item \code{\link[base:double]{as.double()}}
 \item \code{\link[base:integer]{as.integer()}}
 \item \code{\link[base:logical]{as.logical()}}
@@ -116,8 +117,10 @@ as \code{arrow_ascii_is_decimal}.
 \item \code{\link[base:Trig]{asin()}}
 \item \code{\link[base:Round]{ceiling()}}
 \item \code{\link[base:Trig]{cos()}}
-\item \code{\link[base:data.frame]{data.frame()}}
-\item \code{\link[base:difftime]{difftime()}}
+\item \code{\link[base:data.frame]{data.frame()}}: \code{row.names} and \code{check.rows} arguments not supported;
+\code{stringsAsFactors} must be \code{FALSE}
+\item \code{\link[base:difftime]{difftime()}}: only supports \code{units = "secs"} (the default);
+\code{tz} argument not supported
 \item \code{\link[base:startsWith]{endsWith()}}
 \item \code{\link[base:Log]{exp()}}
 \item \code{\link[base:Round]{floor()}}
@@ -146,7 +149,7 @@ as \code{arrow_ascii_is_decimal}.
 \item \code{\link[base:Extremes]{max()}}
 \item \code{\link[base:mean]{mean()}}
 \item \code{\link[base:Extremes]{min()}}
-\item \code{\link[base:nchar]{nchar()}}
+\item \code{\link[base:nchar]{nchar()}}: \code{allowNA = TRUE} and \code{keepNA = TRUE} not supported
 \item \code{\link[base:paste]{paste()}}: the \code{collapse} argument is not yet supported
 \item \code{\link[base:paste]{paste0()}}: the \code{collapse} argument is not yet supported
 \item \code{\link[base:Extremes]{pmax()}}
@@ -157,11 +160,12 @@ as \code{arrow_ascii_is_decimal}.
 \item \code{\link[base:MathFun]{sqrt()}}
 \item \code{\link[base:startsWith]{startsWith()}}
 \item \code{\link[base:strptime]{strftime()}}
-\item \code{\link[base:strptime]{strptime()}}
+\item \code{\link[base:strptime]{strptime()}}: accepts a \code{unit} argument not present in the \code{base} function.
+Valid values are "s", "ms" (default), "us", "ns".
 \item \code{\link[base:strrep]{strrep()}}
 \item \code{\link[base:strsplit]{strsplit()}}
 \item \code{\link[base:grep]{sub()}}
-\item \code{\link[base:substr]{substr()}}
+\item \code{\link[base:substr]{substr()}}: \code{start} and \code{stop} must be length 1
 \item \code{\link[base:substr]{substring()}}
 \item \code{\link[base:sum]{sum()}}
 \item \code{\link[base:Trig]{tan()}}
@@ -209,20 +213,20 @@ as \code{arrow_ascii_is_decimal}.
 \item \code{\link[lubridate:duration]{dmilliseconds()}}
 \item \code{\link[lubridate:duration]{dminutes()}}
 \item \code{\link[lubridate:duration]{dmonths()}}
-\item \code{\link[lubridate:ymd]{dmy()}}
-\item \code{\link[lubridate:ymd_hms]{dmy_h()}}
-\item \code{\link[lubridate:ymd_hms]{dmy_hm()}}
-\item \code{\link[lubridate:ymd_hms]{dmy_hms()}}
+\item \code{\link[lubridate:ymd]{dmy()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_hms]{dmy_h()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_hms]{dmy_hm()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_hms]{dmy_hms()}}: \code{locale} argument not supported
 \item \code{\link[lubridate:duration]{dnanoseconds()}}
-\item \code{\link[lubridate:duration]{dpicoseconds()}}
+\item \code{\link[lubridate:duration]{dpicoseconds()}}: not supported
 \item \code{\link[lubridate:duration]{dseconds()}}
 \item \code{\link[lubridate:dst]{dst()}}
 \item \code{\link[lubridate:duration]{dweeks()}}
 \item \code{\link[lubridate:duration]{dyears()}}
-\item \code{\link[lubridate:ymd]{dym()}}
+\item \code{\link[lubridate:ymd]{dym()}}: \code{locale} argument not supported
 \item \code{\link[lubridate:week]{epiweek()}}
 \item \code{\link[lubridate:year]{epiyear()}}
-\item \code{\link[lubridate:parse_date_time]{fast_strptime()}}
+\item \code{\link[lubridate:parse_date_time]{fast_strptime()}}: non-default values of \code{lt} and \code{cutoff_2000} not supported
 \item \code{\link[lubridate:round_date]{floor_date()}}
 \item \code{\link[lubridate:format_ISO8601]{format_ISO8601()}}
 \item \code{\link[lubridate:hour]{hour()}}
@@ -234,18 +238,19 @@ as \code{arrow_ascii_is_decimal}.
 \item \code{\link[lubridate:year]{isoyear()}}
 \item \code{\link[lubridate:leap_year]{leap_year()}}
 \item \code{\link[lubridate:make_datetime]{make_date()}}
-\item \code{\link[lubridate:make_datetime]{make_datetime()}}
-\item \code{\link[lubridate:make_difftime]{make_difftime()}}
+\item \code{\link[lubridate:make_datetime]{make_datetime()}}: only supports UTC (default) timezone
+\item \code{\link[lubridate:make_difftime]{make_difftime()}}: only supports \code{units = "secs"} (the default);
+providing both \code{num} and \code{...} is not supported
 \item \code{\link[lubridate:day]{mday()}}
-\item \code{\link[lubridate:ymd]{mdy()}}
-\item \code{\link[lubridate:ymd_hms]{mdy_h()}}
-\item \code{\link[lubridate:ymd_hms]{mdy_hm()}}
-\item \code{\link[lubridate:ymd_hms]{mdy_hms()}}
+\item \code{\link[lubridate:ymd]{mdy()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_hms]{mdy_h()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_hms]{mdy_hm()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_hms]{mdy_hms()}}: \code{locale} argument not supported
 \item \code{\link[lubridate:minute]{minute()}}
 \item \code{\link[lubridate:month]{month()}}
-\item \code{\link[lubridate:ymd]{my()}}
-\item \code{\link[lubridate:ymd]{myd()}}
-\item \code{\link[lubridate:parse_date_time]{parse_date_time()}}
+\item \code{\link[lubridate:ymd]{my()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd]{myd()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:parse_date_time]{parse_date_time()}}: \code{quiet = FALSE} is not supported
 \item \code{\link[lubridate:am]{pm()}}
 \item \code{\link[lubridate:day]{qday()}}
 \item \code{\link[lubridate:quarter]{quarter()}}
@@ -256,17 +261,17 @@ as \code{arrow_ascii_is_decimal}.
 \item \code{\link[lubridate:day]{wday()}}
 \item \code{\link[lubridate:week]{week()}}
 \item \code{\link[lubridate:day]{yday()}}
-\item \code{\link[lubridate:ymd]{ydm()}}
-\item \code{\link[lubridate:ymd_hms]{ydm_h()}}
-\item \code{\link[lubridate:ymd_hms]{ydm_hm()}}
-\item \code{\link[lubridate:ymd_hms]{ydm_hms()}}
+\item \code{\link[lubridate:ymd]{ydm()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_hms]{ydm_h()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_hms]{ydm_hm()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_hms]{ydm_hms()}}: \code{locale} argument not supported
 \item \code{\link[lubridate:year]{year()}}
-\item \code{\link[lubridate:ymd]{ym()}}
-\item \code{\link[lubridate:ymd]{ymd()}}
-\item \code{\link[lubridate:ymd_hms]{ymd_h()}}
-\item \code{\link[lubridate:ymd_hms]{ymd_hm()}}
-\item \code{\link[lubridate:ymd_hms]{ymd_hms()}}
-\item \code{\link[lubridate:ymd]{yq()}}
+\item \code{\link[lubridate:ymd]{ym()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd]{ymd()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_hms]{ymd_h()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_hms]{ymd_hm()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_hms]{ymd_hms()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd]{yq()}}: \code{locale} argument not supported
 }
 }
 
@@ -288,8 +293,9 @@ as \code{arrow_ascii_is_decimal}.
 
 \subsection{stats}{
 \itemize{
-\item \code{\link[stats:median]{median()}}
-\item \code{\link[stats:quantile]{quantile()}}
+\item \code{\link[stats:median]{median()}}: approximate median (t-digest) is computed
+\item \code{\link[stats:quantile]{quantile()}}: \code{probs} must be length 1;
+approximate quantile (t-digest) is computed
 \item \code{\link[stats:sd]{sd()}}
 \item \code{\link[stats:cor]{var()}}
 }
@@ -302,9 +308,11 @@ as \code{arrow_ascii_is_decimal}.
 }
 
 \subsection{stringr}{
+
+Pattern modifiers \code{coll()} and \code{boundary()} are not supported in any functions.
 \itemize{
 \item \code{\link[stringr:str_c]{str_c()}}: the \code{collapse} argument is not yet supported
-\item \code{\link[stringr:str_count]{str_count()}}
+\item \code{\link[stringr:str_count]{str_count()}}: \code{pattern} must be a length 1 character vector
 \item \code{\link[stringr:str_detect]{str_detect()}}
 \item \code{\link[stringr:str_dup]{str_dup()}}
 \item \code{\link[stringr:str_starts]{str_ends()}}
@@ -313,9 +321,9 @@ as \code{arrow_ascii_is_decimal}.
 \item \code{\link[stringr:str_pad]{str_pad()}}
 \item \code{\link[stringr:str_replace]{str_replace()}}
 \item \code{\link[stringr:str_replace]{str_replace_all()}}
-\item \code{\link[stringr:str_split]{str_split()}}
+\item \code{\link[stringr:str_split]{str_split()}}: Case-insensitive string splitting and splitting into 0 parts not supported
 \item \code{\link[stringr:str_starts]{str_starts()}}
-\item \code{\link[stringr:str_sub]{str_sub()}}
+\item \code{\link[stringr:str_sub]{str_sub()}}: \code{start} and \code{end} must be length 1
 \item \code{\link[stringr:case]{str_to_lower()}}
 \item \code{\link[stringr:case]{str_to_title()}}
 \item \code{\link[stringr:case]{str_to_upper()}}
diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R
index 81a9ba3f6e5..21a78ee06e4 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -289,7 +289,7 @@ test_that("filter environment scope", {
     tbl
   )
   isShortString <- function(x) nchar(x) < 10
-  skip("TODO: 14071")
+  skip("TODO: ARROW-14071")
   compare_dplyr_binding(
     .input %>%
       select(-fct) %>%
@@ -419,7 +419,6 @@ test_that("filter() with namespaced functions", {
 })
 
 test_that("filter() with across()", {
-
   compare_dplyr_binding(
     .input %>%
       filter(if_any(ends_with("l"), ~ is.na(.))) %>%
@@ -437,5 +436,4 @@ test_that("filter() with across()", {
       collect(),
     tbl
   )
-
 })
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 2608f9d6545..3ddc9ec3bed 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -1897,7 +1897,7 @@ test_that("`as.Date()` and `as_date()`", {
         )
       ) %>%
       collect(),
-    regexp = "consider using the lubridate specialised parsing functions"
+    regexp = "Consider using the lubridate specialised parsing functions"
   )
 
   # record batch test
@@ -1911,7 +1911,7 @@ test_that("`as.Date()` and `as_date()`", {
         )
       ) %>%
       collect(),
-    regexp = "consider using the lubridate specialised parsing functions"
+    regexp = "Consider using the lubridate specialised parsing functions"
   )
 
   # strptime does not support a partial format - Arrow returns NA, while