diff --git a/.ci/.lintr.R b/.ci/.lintr.R new file mode 100644 index 000000000..09a0db3dc --- /dev/null +++ b/.ci/.lintr.R @@ -0,0 +1,105 @@ +for (f in list.files('ci/linters', full.names=TRUE)) source(f) +rm(f) + +linters = all_linters( + packages = "lintr", # TODO(lintr->3.2.0): Remove this. + # eq_assignment_linter(), + brace_linter(allow_single_line = TRUE), + # TODO(michaelchirico): Activate these incrementally. These are the + # parameterizations that match our style guide. + # implicit_assignment_linter(allow_lazy = TRUE, allow_scoped = TRUE), + # implicit_integer_linter(allow_colon = TRUE), + # system_time_linter = undesirable_function_linter(c( + # system.time = "Only run timings in benchmark.Rraw" + # )), + # undesirable_function_linter(modify_defaults( + # default_undesirable_functions, + # ifelse = "Use fifelse instead.", + # Sys.setenv = NULL, + # library = NULL, + # options = NULL, + # par = NULL, + # setwd = NULL + # )), + undesirable_operator_linter(modify_defaults( + default_undesirable_operators, + `<<-` = NULL + )), + # TODO(lintr#2441): Use upstream implementation. + assignment_linter = NULL, + # TODO(lintr#2442): Use this once x[ , j, by] is supported. + commas_linter = NULL, + commented_code_linter = NULL, + # TODO(linter->3.2.0): Activate this. + consecutive_assertion_linter = NULL, + cyclocomp_linter = NULL, + function_argument_linter = NULL, + indentation_linter = NULL, + infix_spaces_linter = NULL, + # TODO(R>3.2.0): Activate this, extending to recognize vapply_1i(x, length). + lengths_linter = NULL, + line_length_linter = NULL, + missing_package_linter = NULL, + namespace_linter = NULL, + nonportable_path_linter = NULL, + object_name_linter = NULL, + object_usage_linter = NULL, + quotes_linter = NULL, + semicolon_linter = NULL, + spaces_inside_linter = NULL, + spaces_left_parentheses_linter = NULL, + # TODO(michaelchirico): Only exclude from vignettes, not sure what's wrong. + strings_as_factors_linter = NULL, + # TODO(lintr->3.2.0): Fix on a valid TODO style, enforce it, and re-activate. + todo_comment_linter = NULL, + # TODO(michaelchirico): Enforce these and re-activate them one-by-one. Also stop using '<<-'. + brace_linter = NULL, + condition_call_linter = NULL, + conjunct_test_linter = NULL, + fixed_regex_linter = NULL, + function_left_parentheses_linter = NULL, + if_not_else_linter = NULL, + implicit_assignment_linter = NULL, + implicit_integer_linter = NULL, + keyword_quote_linter = NULL, + length_levels_linter = NULL, + matrix_apply_linter = NULL, + missing_argument_linter = NULL, + nzchar_linter = NULL, + object_overwrite_linter = NULL, + paren_body_linter = NULL, + redundant_equals_linter = NULL, + rep_len_linter = NULL, + repeat_linter = NULL, + return_linter = NULL, + sample_int_linter = NULL, + scalar_in_linter = NULL, + seq_linter = NULL, + undesirable_function_linter = NULL, + unnecessary_concatenation_linter = NULL, + unnecessary_lambda_linter = NULL, + unnecessary_nesting_linter = NULL, + unreachable_code_linter = NULL, + unused_import_linter = NULL +) +# TODO(lintr#2172): Glob with lintr itself. +exclusions = local({ + exclusion_for_dir <- function(dir, exclusions) { + files = list.files(dir, pattern = "\\.(R|Rmd)$") + stats::setNames(rep(list(exclusions), length(files)), files) + } + c( + exclusion_for_dir("tests", list( + quotes_linter = Inf, + # TODO(michaelchirico): Enforce these and re-activate them one-by-one. + implicit_integer_linter = Inf, + infix_spaces_linter = Inf, + undesirable_function_linter = Inf + )), + exclusion_for_dir("vignettes", list( + quotes_linter = Inf + # strings_as_factors_linter = Inf + # system_time_linter = Inf + )) + ) +}) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 000000000..7170016b1 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,35 @@ +on: + push: + branches: + - master + pull_request: + branches: + - master + +name: lint + +jobs: + lint: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: | + r-lib/lintr + local::. + needs: lint + + - name: Lint + run: lintr::lint_package() + shell: Rscript {0} + env: + LINTR_ERROR_ON_LINT: true + R_LINTR_LINTER_FILE: .ci/.lintr diff --git a/R/bmerge.R b/R/bmerge.R index ddaedc1b3..ff40fddb4 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -187,4 +187,3 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos ans$xo = xo # for further use by [.data.table return(ans) } - diff --git a/R/data.table.R b/R/data.table.R index e0cddb38f..5513fb276 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2300,7 +2300,7 @@ transform.data.table = function (`_data`, ...) { if (!cedta()) return(NextMethod()) # nocov `_data` = copy(`_data`) - e = eval(substitute(list(...)), `_data`, parent.frame()) + e = eval(substitute(list(...)), `_data`, parent.frame()) set(`_data`, ,names(e), e) `_data` } diff --git a/R/duplicated.R b/R/duplicated.R index 27b903812..0aad2ebdd 100644 --- a/R/duplicated.R +++ b/R/duplicated.R @@ -118,4 +118,3 @@ uniqueN = function(x, by = if (is.list(x)) seq_along(x) else NULL, na.rm=FALSE) length(starts) } } - diff --git a/R/foverlaps.R b/R/foverlaps.R index 9a0cd5580..54dc61f93 100644 --- a/R/foverlaps.R +++ b/R/foverlaps.R @@ -247,4 +247,3 @@ foverlaps = function(x, y, by.x=if (!is.null(key(x))) key(x) else key(y), by.y=k # Tests are added to ensure we cover these aspects (to my knowledge) to ensure that any undesirable changes in the future breaks those tests. # Conclusion: floating point manipulations are hell! - diff --git a/R/fwrite.R b/R/fwrite.R index b13b0afb7..37968d4ea 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -64,7 +64,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto", length(nThread)==1L && !is.na(nThread) && nThread>=1L ) - is_gzip = compress == "gzip" || (compress == "auto" && grepl("\\.gz$", file)) + is_gzip = compress == "gzip" || (compress == "auto" && endsWithAny(file, ".gz")) file = path.expand(file) # "~/foo/bar" if (append && (file=="" || file.exists(file))) { @@ -122,4 +122,3 @@ fwrite = function(x, file="", append=FALSE, quote="auto", } haszlib = function() .Call(Cdt_has_zlib) - diff --git a/R/onAttach.R b/R/onAttach.R index 6ff17972b..7a4a9be3e 100644 --- a/R/onAttach.R +++ b/R/onAttach.R @@ -21,7 +21,7 @@ nth = getDTthreads(verbose=FALSE) if (dev) packageStartupMessagef("data.table %s IN DEVELOPMENT built %s%s using %d threads (see ?getDTthreads). ", v, d, g, nth, appendLF=FALSE) - else + else packageStartupMessagef("data.table %s using %d threads (see ?getDTthreads). ", v, nth, appendLF=FALSE) packageStartupMessagef("Latest news: r-datatable.com") if (gettext("TRANSLATION CHECK") != "TRANSLATION CHECK") diff --git a/R/openmp-utils.R b/R/openmp-utils.R index f19120724..85f6b3257 100644 --- a/R/openmp-utils.R +++ b/R/openmp-utils.R @@ -13,4 +13,3 @@ setDTthreads = function(threads=NULL, restore_after_fork=NULL, percent=NULL, thr getDTthreads = function(verbose=getOption("datatable.verbose")) { .Call(CgetDTthreads, verbose) } - diff --git a/R/print.data.table.R b/R/print.data.table.R index 9e33e0c4d..f80a5833c 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -230,7 +230,7 @@ format_list_item.default = function(x, ...) { char.trunc = function(x, trunc.char = getOption("datatable.prettyprint.char")) { trunc.char = max(0L, suppressWarnings(as.integer(trunc.char[1L])), na.rm=TRUE) if (!is.character(x) || trunc.char <= 0L) return(x) - nchar_width = nchar(x, 'width') # Check whether string is full-width or half-width, #5096 + nchar_width = nchar(x, 'width') # Check whether string is full-width or half-width, #5096 nchar_chars = nchar(x, 'char') is_full_width = nchar_width > nchar_chars idx = pmin(nchar_width, nchar_chars) > trunc.char @@ -272,4 +272,3 @@ trunc_cols_message = function(not_printed, abbs, class, col.names){ n, brackify(paste0(not_printed, classes)) ) } - diff --git a/R/setkey.R b/R/setkey.R index 84488a803..62da9ebe8 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -353,4 +353,3 @@ CJ = function(..., sorted = TRUE, unique = FALSE) } l } - diff --git a/R/setops.R b/R/setops.R index 1034b0f0f..23dd6ec8f 100644 --- a/R/setops.R +++ b/R/setops.R @@ -290,4 +290,3 @@ all.equal.data.table = function(target, current, trim.levels=TRUE, check.attribu } TRUE } - diff --git a/R/tables.R b/R/tables.R index e47a1a42e..6a0209c86 100644 --- a/R/tables.R +++ b/R/tables.R @@ -60,4 +60,3 @@ tables = function(mb=type_size, order.col="NAME", width=80, } invisible(info) } - diff --git a/R/test.data.table.R b/R/test.data.table.R index 748e09512..43486e278 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -7,7 +7,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F if (length(memtest.id)) { if (length(memtest.id)==1L) memtest.id = rep(memtest.id, 2L) # for convenience of supplying one id rather than always a range stopifnot(length(memtest.id)<=2L, # conditions quoted to user when false so "<=2L" even though following conditions rely on ==2L - !anyNA(memtest.id), memtest.id[1L]<=memtest.id[2L]) + !anyNA(memtest.id), memtest.id[1L]<=memtest.id[2L]) if (memtest==0L) memtest=1L # using memtest.id implies memtest } if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { @@ -134,7 +134,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F owd = setwd(tempdir()) # ensure writeable directory; e.g. tests that plot may write .pdf here depending on device option and/or batch mode; #5190 on.exit(setwd(owd)) - + if (memtest) { catf("\n***\n*** memtest=%d. This should be the first call in a fresh R_GC_MEM_GROW=0 R session for best results. Ctrl-C now if not.\n***\n\n", memtest) if (is.na(rss())) stopf("memtest intended for Linux. Step through data.table:::rss() to see what went wrong.") diff --git a/R/timetaken.R b/R/timetaken.R index daa52c9f1..ae4b384fc 100644 --- a/R/timetaken.R +++ b/R/timetaken.R @@ -12,4 +12,3 @@ timetaken = function(started.at) tt = proc.time()-started.at # diff all 3 times paste0(format(tt[3L])," elapsed (", format(tt[1L]), " cpu)") } - diff --git a/R/transpose.R b/R/transpose.R index 684b135d4..6d6da6779 100644 --- a/R/transpose.R +++ b/R/transpose.R @@ -56,7 +56,7 @@ tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) { if (!(sum(!is_named) == 1L && !is_named[n] && is.function(type.convert[[n]]))) stopf("When the argument 'type.convert' contains an unnamed element, it is expected to be the last element and should be a function. More than one unnamed element is not allowed unless all elements are functions with length equal to %d (the length of the transpose list or 'keep' argument if it is specified).", length(keep)) else { - fothers = type.convert[[n]] + fothers = type.convert[[n]] type.convert = type.convert[-n] } } @@ -90,4 +90,3 @@ tstrsplit = function(x, ..., fill=NA, type.convert=FALSE, keep, names=FALSE) { setattr(ans, 'names', names) ans } - diff --git a/R/uniqlist.R b/R/uniqlist.R index 2a610ab1a..4f3600f83 100644 --- a/R/uniqlist.R +++ b/R/uniqlist.R @@ -21,4 +21,3 @@ uniqlengths = function(x, len) { ans = .Call(Cuniqlengths, as.integer(x), as.integer(len)) ans } - diff --git a/R/utils.R b/R/utils.R index a78e5450f..feacd2b00 100644 --- a/R/utils.R +++ b/R/utils.R @@ -165,4 +165,3 @@ rss = function() { #5515 #5517 round(ans / 1024, 1L) # return MB # nocov end } - diff --git a/inst/atime/tests.R b/inst/atime/tests.R index a0635d063..19c1b27a8 100644 --- a/inst/atime/tests.R +++ b/inst/atime/tests.R @@ -1,7 +1,7 @@ # A function to customize R package metadata and source files to facilitate version-specific installation and testing. # -# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R) -# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. +# This is specifically tailored for handling data.table which requires specific changes in non-standard files (such as the object file name in Makevars and version checking code in onLoad.R) +# to support testing across different versions (base and HEAD for PRs, commits associated with historical regressions, etc.) of the package. # It appends a SHA1 hash to the package name (PKG.SHA), ensuring each version can be installed and tested separately. # # @param old.Package Current name of the package. @@ -29,7 +29,7 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { Package_ <- gsub(".", "_", old.Package, fixed = TRUE) new.Package_ <- paste0(Package_, "_", sha) pkg_find_replace( - "DESCRIPTION", + "DESCRIPTION", paste0("Package:\\s+", old.Package), paste("Package:", new.Package)) pkg_find_replace( @@ -55,13 +55,13 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { } # A list of performance tests. -# +# # Each entry in this list corresponds to a performance test and contains a sublist with three mandatory arguments: # - N: A numeric sequence of data sizes to vary. # - setup: An expression evaluated for every data size before measuring time/memory. -# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions. +# - expr: An expression that will be evaluated for benchmarking performance across different git commit versions. # This must call a function from data.table using a syntax with double or triple colon prefix. -# The package name before the colons will be replaced by a new package name that uses the commit SHA hash. +# The package name before the colons will be replaced by a new package name that uses the commit SHA hash. # (For instance, data.table:::[.data.table will become data.table.some_40_digit_SHA1_hash:::[.data.table) # # Optional parameters that may be useful to configure tests: @@ -70,8 +70,9 @@ pkg.edit.fun = function(old.Package, new.Package, sha, new.pkg.path) { # - sha.vec: Named character vector or a list of vectors that specify data.table-specific commit SHAs for testing across those different git commit versions. # For historical regressions, use 'Before', 'Regression', and 'Fixed' (otherwise something like 'Slow' or 'Fast' ideally). # @note Please check https://github.com/tdhock/atime/blob/main/vignettes/data.table.Rmd for more information. +# nolint start: undesirable_operator_linter. ':::' needed+appropriate here. test.list <- list( - # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 + # Performance regression discussed in: https://github.com/Rdatatable/data.table/issues/4311 # Fixed in: https://github.com/Rdatatable/data.table/pull/4440 "Test regression fixed in #4440" = list( pkg.edit.fun = pkg.edit.fun, @@ -88,7 +89,7 @@ test.list <- list( # Test based on: https://github.com/Rdatatable/data.table/issues/5424 # Performance regression introduced from a commit in: https://github.com/Rdatatable/data.table/pull/4491 - # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 + # Fixed in: https://github.com/Rdatatable/data.table/pull/5463 "Test regression fixed in #5463" = list( pkg.edit.fun = pkg.edit.fun, N = 10^seq(3, 8), @@ -101,8 +102,9 @@ test.list <- list( key = "g") dt_mod <- copy(dt) }), - expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), + expr = quote(data.table:::`[.data.table`(dt_mod, , N := .N, by = g)), Before = "be2f72e6f5c90622fe72e1c315ca05769a9dc854", # Parent of the regression causing commit (https://github.com/Rdatatable/data.table/commit/e793f53466d99f86e70fc2611b708ae8c601a451) in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) Regression = "e793f53466d99f86e70fc2611b708ae8c601a451", # Commit responsible for regression in the PR that introduced the issue (https://github.com/Rdatatable/data.table/pull/4491/commits) - Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) + Fixed = "58409197426ced4714af842650b0cc3b9e2cb842") # Last commit in the PR that fixed the regression (https://github.com/Rdatatable/data.table/pull/5463/commits) ) +# nolint end: undesirable_operator_linter. diff --git a/vignettes/datatable-faq.Rmd b/vignettes/datatable-faq.Rmd index 97c11aeba..1501497bc 100644 --- a/vignettes/datatable-faq.Rmd +++ b/vignettes/datatable-faq.Rmd @@ -373,7 +373,7 @@ Yes: The general form is: -```{r, eval = FALSE} +```r DT[where, select|update, group by][order by][...] ... [...] ``` @@ -619,4 +619,4 @@ Please see [this answer](https://stackoverflow.com/a/10529888/403310). ```{r, echo=FALSE} setDTthreads(.old.th) -``` \ No newline at end of file +``` diff --git a/vignettes/datatable-intro.Rmd b/vignettes/datatable-intro.Rmd index c783c3fa7..e63caee5d 100644 --- a/vignettes/datatable-intro.Rmd +++ b/vignettes/datatable-intro.Rmd @@ -652,4 +652,4 @@ We will see how to *add/update/delete* columns *by reference* and how to combine ```{r, echo=FALSE} setDTthreads(.old.th) -``` \ No newline at end of file +``` diff --git a/vignettes/datatable-keys-fast-subset.Rmd b/vignettes/datatable-keys-fast-subset.Rmd index 3ec50640c..d85f69ad8 100644 --- a/vignettes/datatable-keys-fast-subset.Rmd +++ b/vignettes/datatable-keys-fast-subset.Rmd @@ -157,7 +157,7 @@ Once you *key* a *data.table* by certain columns, you can subset by querying tho flights[.("JFK")] ## alternatively -# flights[J("JFK")] (or) +# flights[J("JFK")] (or) # flights[list("JFK")] ``` @@ -464,7 +464,7 @@ Now let us look at binary search approach (method 2). Recall from [Properties of Here's a very simple illustration. Let's consider the (sorted) numbers shown below: -```{r eval = FALSE} +``` 1, 5, 10, 19, 22, 23, 30 ``` @@ -499,4 +499,4 @@ Key based subsets are **incredibly fast** and are particularly useful when the t ```{r, echo=FALSE} setDTthreads(.old.th) -``` \ No newline at end of file +``` diff --git a/vignettes/datatable-programming.Rmd b/vignettes/datatable-programming.Rmd index 0536f11d6..3ec1f57d5 100644 --- a/vignettes/datatable-programming.Rmd +++ b/vignettes/datatable-programming.Rmd @@ -420,4 +420,4 @@ DT[, cl, env = list(cl = cl)] ```{r cleanup, echo=FALSE} options(.opts) registerS3method("print", "data.frame", base::print.data.frame) -``` \ No newline at end of file +``` diff --git a/vignettes/datatable-sd-usage.Rmd b/vignettes/datatable-sd-usage.Rmd index 09243c820..bd2618d53 100644 --- a/vignettes/datatable-sd-usage.Rmd +++ b/vignettes/datatable-sd-usage.Rmd @@ -169,7 +169,7 @@ lm_coef = sapply(models, function(rhs) { }) barplot(lm_coef, names.arg = sapply(models, paste, collapse = '/'), main = 'Wins Coefficient\nWith Various Covariates', - col = col16, las = 2L, cex.names = .8) + col = col16, las = 2L, cex.names = 0.8) ``` The coefficient always has the expected sign (better pitchers tend to have more wins and fewer runs allowed), but the magnitude can vary substantially depending on what else we control for. @@ -254,4 +254,4 @@ The above is just a short introduction of the power of `.SD` in facilitating bea ```{r, echo=FALSE} setDTthreads(.old.th) -``` \ No newline at end of file +```