From 0b2e058e0f05e4d7fb51223b8e912266a2d95260 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 30 Aug 2019 02:55:43 +0800 Subject: [PATCH] clean up use of vapply --- R/data.table.R | 21 +++++++++++---------- R/fread.R | 4 ++-- R/groupingsets.R | 2 +- R/print.data.table.R | 2 +- R/setops.R | 5 ++--- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 43541e9af..9e8868028 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -76,7 +76,7 @@ data.table = function(..., keep.rownames=FALSE, check.names=FALSE, key=NULL, str } } if (isTRUE(stringsAsFactors)) { - for (j in which(vapply(ans, is.character, TRUE))) set(ans, NULL, j, as_factor(.subset2(ans, j))) + for (j in which(vapply_1b(ans, is.character))) set(ans, NULL, j, as_factor(.subset2(ans, j))) # as_factor is internal function in fread.R currently } alloc.col(ans) # returns a NAMED==0 object, unlike data.frame() @@ -881,7 +881,7 @@ replace_order = function(isub, verbose, env) { jvnames = gsub("^[.](N|I|GRP|BY)$","\\1",as.character(jsub)) # jsub is list()ed after it's eval'd inside dogroups. } - } else if (is.call(jsub) && as.character(jsub[[1L]])[[1L]] %chin% c("list",".")) { + } else if (is.call(jsub) && as.character(jsub[[1L]])[1L] %chin% c("list", ".")) { jsub[[1L]] = quote(list) jsubl = as.list.default(jsub) # TO DO: names(jsub) and names(jsub)="" seem to work so make use of that if (length(jsubl)>1L) { @@ -1177,6 +1177,7 @@ replace_order = function(isub, verbose, env) { assign(sym, get(getName, parent.frame()), SDenv) } # hash=TRUE (the default) does seem better as expected using e.g. test 645. TO DO experiment with 'size' argument + #browser() if (missingby || bynull || (!byjoin && !length(byval))) { # No grouping: 'by' = missing | NULL | character() | "" | list() # Considered passing a one-group to dogroups but it doesn't do the recycling of i within group, that's done here @@ -1310,8 +1311,8 @@ replace_order = function(isub, verbose, env) { jval = data.table(jval) # TO DO: should this be setDT(list(jval)) instead? } else { if (is.null(jvnames)) jvnames=names(jval) - lenjval = vapply(jval, length, 0L) - nulljval = vapply(jval, is.null, FALSE) + lenjval = vapply_1i(jval, length) + nulljval = vapply_1b(jval, is.null) if (lenjval[1L]==0L || any(lenjval != lenjval[1L])) { jval = as.data.table.list(jval) # does the vector expansion to create equal length vectors, and drops any NULL items jvnames = jvnames[!nulljval] # fix for #1477 @@ -1462,7 +1463,7 @@ replace_order = function(isub, verbose, env) { lockBinding(".iSD",SDenv) GForce = FALSE - if ( getOption("datatable.optimize")>=1 && (is.call(jsub) || (is.name(jsub) && as.character(jsub)[[1L]] %chin% c(".SD",".N"))) ) { # Ability to turn off if problems or to benchmark the benefit + if ( getOption("datatable.optimize")>=1 && (is.call(jsub) || (is.name(jsub) && as.character(jsub)[1L] %chin% c(".SD", ".N"))) ) { # Ability to turn off if problems or to benchmark the benefit # Optimization to reduce overhead of calling lapply over and over for each group oldjsub = jsub funi = 1L # Fix for #985 @@ -1621,7 +1622,7 @@ replace_order = function(isub, verbose, env) { if (getOption("datatable.optimize")>=2 && !is.data.table(i) && !byjoin && length(f__) && !length(lhs)) { if (!length(ansvars) && !use.I) { GForce = FALSE - if ( (is.name(jsub) && jsub == ".N") || (is.call(jsub) && length(jsub)==2L && length(as.character(jsub[[1L]])) && as.character(jsub[[1L]])[1L] == "list" && length(as.character(jsub[[2L]])) && as.character(jsub[[2L]])[1L] == ".N") ) { + if ( (is.name(jsub) && jsub == ".N") || (is.call(jsub) && length(jsub)==2L && jsub[[1L]]== "list" && jsub[[2L]] == ".N") ) { GForce = TRUE if (verbose) cat("GForce optimized j to '",deparse(jsub,width.cutoff=200L),"'\n",sep="") } @@ -2315,7 +2316,7 @@ copy = function(x) { if (!is.data.table(x)) { # fix for #1476. TODO: find if a cleaner fix is possible.. if (is.list(x)) { - anydt = vapply(x, is.data.table, TRUE, USE.NAMES=FALSE) + anydt = vapply_1b(x, is.data.table, use.names=FALSE) if (sum(anydt)) { newx[anydt] = lapply(newx[anydt], function(x) { .Call(C_unlock, x) @@ -2618,7 +2619,7 @@ setDF = function(x, rownames=NULL) { } x } else { - n = vapply(x, length, 0L) + n = vapply_1i(x, length) mn = max(n) if (any(n 1L && prod(vapply(i, length, integer(1L))) > 1e4){ + if(length(i) > 1L && prod(vapply_1i(i, length)) > 1e4){ ## CJ would result in more than 1e4 rows. This would be inefficient, especially memory-wise #2635 if (verbose) {cat("Subsetting optimization disabled because the cross-product of RHS values exceeds 1e4, causing memory problems.\n");flush.console()} return(NULL) diff --git a/R/fread.R b/R/fread.R index 46fa812e7..707697bfb 100644 --- a/R/fread.R +++ b/R/fread.R @@ -312,9 +312,9 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir()) if (stringsAsFactors) { if (is.double(stringsAsFactors)) { #2025 should_be_factor = function(v) is.character(v) && uniqueN(v) < nr * stringsAsFactors - cols_to_factor = which(vapply(ans, should_be_factor, logical(1L))) + cols_to_factor = which(vapply_1b(ans, should_be_factor)) } else { - cols_to_factor = which(vapply(ans, is.character, logical(1L))) + cols_to_factor = which(vapply_1b(ans, is.character)) } if (verbose) cat("stringsAsFactors=", stringsAsFactors, " converted ", length(cols_to_factor), " column(s): ", brackify(names(ans)[cols_to_factor]), "\n", sep="") for (j in cols_to_factor) set(ans, j=j, value=as_factor(.subset2(ans, j))) diff --git a/R/groupingsets.R b/R/groupingsets.R index dadcbb0fe..c39c6b209 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -83,7 +83,7 @@ groupingsets.data.table = function(x, j, by, sets, .SDcols, id = FALSE, jj, ...) setcolorder(empty, c("grouping", by, setdiff(names(empty), c("grouping", by)))) } # workaround for rbindlist fill=TRUE on integer64 #1459 - int64.cols = vapply(empty, inherits, logical(1L), "integer64") + int64.cols = vapply_1b(empty, inherits, "integer64") int64.cols = names(int64.cols)[int64.cols] if (length(int64.cols) && !requireNamespace("bit64", quietly=TRUE)) stop("Using integer64 class columns require to have 'bit64' package installed.") # nocov diff --git a/R/print.data.table.R b/R/print.data.table.R index ac82dcfaf..37c43f5e9 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -81,7 +81,7 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), factor = "", POSIXct = "", logical = "", IDate = "", integer64 = "", raw = "", expression = "", ordered = "") - classes = vapply(x, function(col) class(col)[1L], "", USE.NAMES=FALSE) + classes = vapply_1c(x, function(col) class(col)[1L], use.names=FALSE) abbs = unname(class_abb[classes]) if ( length(idx <- which(is.na(abbs))) ) abbs[idx] = paste0("<", classes[idx], ">") toprint = rbind(abbs, toprint) diff --git a/R/setops.R b/R/setops.R index 1dce93702..0c1103258 100644 --- a/R/setops.R +++ b/R/setops.R @@ -41,8 +41,7 @@ funique = function(x) { if (!identical(sort(names(x)), sort(names(y)))) stop("x and y must have the same column names") if (!identical(names(x), names(y))) stop("x and y must have the same column order") bad_types = c("raw", "complex", if (block_list) "list") - found = bad_types %chin% c(vapply(x, typeof, FUN.VALUE = ""), - vapply(y, typeof, FUN.VALUE = "")) + found = bad_types %chin% c(vapply_1c(x, typeof), vapply_1c(y, typeof)) if (any(found)) stop("unsupported column type", if (sum(found) > 1L) "s" else "", " found in x or y: ", brackify(bad_types[found])) if (!identical(lapply(x, class), lapply(y, class))) stop("x and y must have the same column classes") @@ -168,7 +167,7 @@ all.equal.data.table = function(target, current, trim.levels=TRUE, check.attribu if (ignore.row.order) { if (".seqn" %chin% names(target)) stop("None of the datasets to compare should contain a column named '.seqn'") - bad.type = setNames(c("raw","complex","list") %chin% c(vapply(current, typeof, FUN.VALUE = ""), vapply(target, typeof, FUN.VALUE = "")), c("raw","complex","list")) + bad.type = setNames(c("raw","complex","list") %chin% c(vapply_1c(current, typeof), vapply_1c(target, typeof)), c("raw","complex","list")) if (any(bad.type)) stop("Datasets to compare with 'ignore.row.order' must not have unsupported column types: ", brackify(names(bad.type)[bad.type])) if (between(tolerance, 0, sqrt(.Machine$double.eps), incbounds=FALSE)) {