From f03b9d99e6cf19d2041aa61d44728672867fa6bf Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 28 Jun 2024 07:38:15 -0500 Subject: [PATCH 1/5] Add a new line to every line of source code I don't think this should affect downstream code (since some inputs would have previously added it and some did not) and it makes the code simpler and the output more consistent. --- NEWS.md | 1 + R/parse.R | 8 +------- man/parse_all.Rd | 4 +++- tests/testthat/test-conditions.R | 2 +- tests/testthat/test-eval.R | 4 ++-- tests/testthat/test-parse.R | 35 ++++++++++++++------------------ 6 files changed, 23 insertions(+), 31 deletions(-) diff --git a/NEWS.md b/NEWS.md index ef47b84..d1c2a22 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ # evaluate (development version) * `is.value()` has been removed since it tests for an object that evaluate never creates. +* `parse_all()` adds a `\n` to the end of every line, even the last one if it didn't have one in the input. * `parse_all()` no longer has a default method, which will generate better errors if you pass in something unexpectected. * The package now depends on R 4.0.0 in order to decrease our maintenance burden. * `evaluate()` automatically strips calls from conditions emitted by top-level code (these incorrectly get calls because they're wrapped inside `eval()`) (#150). diff --git a/R/parse.R b/R/parse.R index c536e54..ffcc088 100644 --- a/R/parse.R +++ b/R/parse.R @@ -38,16 +38,11 @@ parse_all <- function(x, filename = NULL, allow_error = FALSE) UseMethod("parse_ #' @export parse_all.character <- function(x, filename = NULL, allow_error = FALSE) { if (any(grepl("\n", x))) { - # Track whether or not last element has a newline - trailing_nl <- grepl("\n$", x[length(x)]) # Ensure that empty lines are not dropped by strsplit() x[x == ""] <- "\n" # Standardise to a character vector with one line per element; # this is the input that parse() is documented to accept x <- unlist(strsplit(x, "\n"), recursive = FALSE, use.names = FALSE) - } else { - lines <- x - trailing_nl <- FALSE } n <- length(x) @@ -101,8 +96,7 @@ parse_all.character <- function(x, filename = NULL, allow_error = FALSE) { res <- res[order(res$line), c("src", "expr")] # Restore newlines stripped while converting to vector of lines - nl <- c(rep("\n", nrow(res) - 1), if (trailing_nl) "\n" else "") - res$src <- paste0(res$src, nl) + res$src <- paste0(res$src, "\n") rownames(res) <- NULL res diff --git a/man/parse_all.Rd b/man/parse_all.Rd index 26eee69..44c3dc4 100644 --- a/man/parse_all.Rd +++ b/man/parse_all.Rd @@ -17,7 +17,9 @@ If a connection, will be opened and closed only if it was closed initially.} \value{ A data frame with columns \code{src}, a character vector of source code, and \code{expr}, a list-column of parsed expressions. There will be one row for each -top-level expression in \code{x}. A top-level expression is a complete expression +top-level expression in \code{x}. + +A top-level expression is a complete expression which would trigger execution if typed at the console. The \code{expression} object in \code{expr} can be of any length: it will be 0 if the top-level expression contains only whitespace and/or comments; 1 if the top-level diff --git a/tests/testthat/test-conditions.R b/tests/testthat/test-conditions.R index 195e67d..08a08e7 100644 --- a/tests/testthat/test-conditions.R +++ b/tests/testthat/test-conditions.R @@ -98,7 +98,7 @@ test_that("log_warning causes warnings to be emitted", { # And still recorded in eval result expect_output_types(ev, c("source", "warning")) - expect_equal(ev[[1]]$src, "f()") + expect_equal(ev[[1]]$src, "f()\n") expect_equal(ev[[2]], simpleWarning("Hi!", quote(f()))) }) diff --git a/tests/testthat/test-eval.R b/tests/testthat/test-eval.R index da55444..a6db288 100644 --- a/tests/testthat/test-eval.R +++ b/tests/testthat/test-eval.R @@ -24,11 +24,11 @@ test_that("log_echo causes output to be immediately written to stderr()", { res <- evaluate("f()", log_echo = TRUE), type = "message" ) - expect_equal(out, "f()") + expect_equal(out, c("f()", "")) # But still recorded in eval result expect_output_types(res, c("source", "text")) - expect_equal(res[[1]]$src, "f()") + expect_equal(res[[1]]$src, "f()\n") }) test_that("data sets loaded", { diff --git a/tests/testthat/test-parse.R b/tests/testthat/test-parse.R index 088f58e..5ba9613 100644 --- a/tests/testthat/test-parse.R +++ b/tests/testthat/test-parse.R @@ -1,21 +1,16 @@ test_that("can parse even if no expressions", { - expect_equal(parse_all("")$src, "") - expect_equal(parse_all("#")$src, "#") + expect_equal(parse_all("")$src, "\n") + expect_equal(parse_all("#")$src, "#\n") expect_equal(parse_all("#\n\n")$src, c("#\n", "\n")) }) -test_that("preserves trailing nl", { - expect_equal(parse_all("x")$src, "x") - expect_equal(parse_all("x\n")$src, "x\n") - - expect_equal(parse_all("")$src, "") +test_that("every line gets nl", { + expect_equal(parse_all("x")$src, "x\n") + expect_equal(parse_all("")$src, "\n") expect_equal(parse_all("\n")$src, "\n") - expect_equal(parse_all("{\n1\n}")$src, "{\n1\n}") - expect_equal(parse_all("{\n1\n}\n")$src, "{\n1\n}\n") - # even empty lines - expect_equal(parse_all("a\n\nb")$src, c("a\n", "\n", "b")) + expect_equal(parse_all("a\n\nb")$src, c("a\n", "\n", "b\n")) expect_equal(parse_all("a\n\nb\n")$src, c("a\n", "\n", "b\n")) expect_equal(parse_all("\n\n")$src, c("\n", "\n")) @@ -31,7 +26,7 @@ test_that("empty lines are never silently dropped", { # # 1 # ``` - expect_equal(parse_all(c("\n", "", "1"))$src, c("\n", "\n", "1")) + expect_equal(parse_all(c("\n", "", "1"))$src, c("\n", "\n", "1\n")) }) test_that("a character vector is equivalent to a multi-line string", { @@ -57,8 +52,8 @@ test_that("recombines multi-expression TLEs", { }) test_that("re-integrates lines without expressions", { - expect_equal(parse_all("1\n\n2")$src, c("1\n", "\n", "2")) - expect_equal(parse_all("1\n#\n2")$src, c("1\n", "#\n", "2")) + expect_equal(parse_all("1\n\n2")$src, c("1\n", "\n", "2\n")) + expect_equal(parse_all("1\n#\n2")$src, c("1\n", "#\n", "2\n")) }) test_that("expr is always an expression", { @@ -83,7 +78,7 @@ test_that("double quotes in Chinese characters not destroyed", { skip_if_not(l10n_info()[['UTF-8']]) out <- parse_all(c('1+1', '"你好"')) - expect_equal(out$src[[2]], '"你好"') + expect_equal(out$src[[2]], '"你好"\n') expect_equal(out$expr[[2]], expression("你好"), ignore_attr = "srcref") }) @@ -92,14 +87,14 @@ test_that("multibyte characters are parsed correctly", { code <- c("ϱ <- 1# g / ml", "äöüßÄÖÜπ <- 7 + 3# nonsense") out <- parse_all(code) - expect_equal(out$src, paste0(code, c("\n", ""))) + expect_equal(out$src, paste0(code, "\n")) }) # input types ------------------------------------------------------------------ test_that("can parse a call", { out <- parse_all(quote(f(a, b, c))) - expect_equal(out$src, "f(a, b, c)") + expect_equal(out$src, "f(a, b, c)\n") expect_equal( out$expr, I(list(expression(f(a, b, c)))), @@ -114,7 +109,7 @@ test_that("can parse a connection", { con <- file(path) out <- parse_all(con) - expect_equal(out$src, c("# 1\n", "1 + 1")) + expect_equal(out$src, c("# 1\n", "1 + 1\n")) expect_equal( out$expr, I(list(expression(), expression(1 + 1))), @@ -130,7 +125,7 @@ test_that("can parse a function", { # Hi 1 + 1 }) - expect_equal(out$src, c("# Hi\n", "1 + 1")) + expect_equal(out$src, c("# Hi\n", "1 + 1\n")) expect_equal( out$expr, I(list(expression(), expression(1 + 1))), @@ -145,7 +140,7 @@ test_that("parsing a function parses its body", { # Hi 1 + 1 }) - expect_equal(out$src, c("# Hi\n", "1 + 1")) + expect_equal(out$src, c("# Hi\n", "1 + 1\n")) }) test_that("dedents function body", { From 2b94a10ea4c369352bdec2e1e3e4afcafe62e678 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 28 Jun 2024 07:39:04 -0500 Subject: [PATCH 2/5] Correct minor screw ups --- NEWS.md | 2 +- R/parse.R | 4 +--- man/parse_all.Rd | 4 +--- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index d1c2a22..09ef8f7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,7 @@ # evaluate (development version) -* `is.value()` has been removed since it tests for an object that evaluate never creates. * `parse_all()` adds a `\n` to the end of every line, even the last one if it didn't have one in the input. +* `is.value()` has been removed since it tests for an object that evaluate never creates. * `parse_all()` no longer has a default method, which will generate better errors if you pass in something unexpectected. * The package now depends on R 4.0.0 in order to decrease our maintenance burden. * `evaluate()` automatically strips calls from conditions emitted by top-level code (these incorrectly get calls because they're wrapped inside `eval()`) (#150). diff --git a/R/parse.R b/R/parse.R index ffcc088..c2596f9 100644 --- a/R/parse.R +++ b/R/parse.R @@ -10,9 +10,7 @@ #' @return #' A data frame with columns `src`, a character vector of source code, and #' `expr`, a list-column of parsed expressions. There will be one row for each -#' top-level expression in `x`. -#' -#' A top-level expression is a complete expression +#' top-level expression in `x`. A top-level expression is a complete expression #' which would trigger execution if typed at the console. The `expression` #' object in `expr` can be of any length: it will be 0 if the top-level #' expression contains only whitespace and/or comments; 1 if the top-level diff --git a/man/parse_all.Rd b/man/parse_all.Rd index 44c3dc4..26eee69 100644 --- a/man/parse_all.Rd +++ b/man/parse_all.Rd @@ -17,9 +17,7 @@ If a connection, will be opened and closed only if it was closed initially.} \value{ A data frame with columns \code{src}, a character vector of source code, and \code{expr}, a list-column of parsed expressions. There will be one row for each -top-level expression in \code{x}. - -A top-level expression is a complete expression +top-level expression in \code{x}. A top-level expression is a complete expression which would trigger execution if typed at the console. The \code{expression} object in \code{expr} can be of any length: it will be 0 if the top-level expression contains only whitespace and/or comments; 1 if the top-level From eccecd57f74d96a5343d21bfcd6cac688d3f8ed5 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 2 Jul 2024 08:22:52 -0500 Subject: [PATCH 3/5] Polish docs --- R/parse.R | 27 ++++++++++++++++++--------- man/parse_all.Rd | 28 ++++++++++++++++++---------- 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/R/parse.R b/R/parse.R index ce3e92c..b9d85a9 100644 --- a/R/parse.R +++ b/R/parse.R @@ -8,15 +8,24 @@ #' @param filename string overriding the file name #' @param allow_error whether to allow syntax errors in `x` #' @return -#' A data frame with columns `src`, a character vector of source code, and -#' `expr`, a list-column of parsed expressions. There will be one row for each -#' top-level expression in `x`. A top-level expression is a complete expression -#' which would trigger execution if typed at the console. The `expression` -#' object in `expr` can be of any length: it will be 0 if the top-level -#' expression contains only whitespace and/or comments; 1 if the top-level -#' expression is a single scalar (like `TRUE`, `1`, or `"x"`), name, or call; -#' or 2 if the top-level expression uses `;` to put multiple expressions on -#' one line. The expressions have their srcrefs removed. +#' A data frame two columns, `src` and `expr`, and one row for each top-level +#' expression in `x`. +#' +#' `src` is a character vector of source code. Each element represents a +#' complete line (or multi-line) expression, i.e. it always has a terminal `\n`. +#' +#' `expr`, a list-column of top-level expressions. A top-level expression +#' is a complete expression which would trigger execution if typed at the +#' console. Each element is an [expression()] object, which can be of any +#' length. It will be length: +#' +#' * 0 if the top-level expression contains only whitespace and/or comments. +#' * 1 if the top-level expression is a single scalar ( +#' like `TRUE`, `1`, or `"x"`), name, or call +#' * 2 or more if the top-level expression uses `;` to put multiple expressions +#' on one line. +#' +#' The expressions have their srcrefs removed. #' #' If there are syntax errors in `x` and `allow_error = TRUE`, the data #' frame will have an attribute `PARSE_ERROR` that stores the error object. diff --git a/man/parse_all.Rd b/man/parse_all.Rd index 768ea8b..5e0966f 100644 --- a/man/parse_all.Rd +++ b/man/parse_all.Rd @@ -15,17 +15,25 @@ If a connection, will be opened and closed only if it was closed initially.} \item{allow_error}{whether to allow syntax errors in \code{x}} } \value{ -A data frame with columns \code{src}, a character vector of source code, and -\code{expr}, a list-column of parsed expressions. There will be one row for each -top-level expression in \code{x}. +A data frame two columns, \code{src} and \code{expr}, and one row for each top-level +expression in \code{x}. -A top-level expression is a complete expression -which would trigger execution if typed at the console. The \code{expression} -object in \code{expr} can be of any length: it will be 0 if the top-level -expression contains only whitespace and/or comments; 1 if the top-level -expression is a single scalar (like \code{TRUE}, \code{1}, or \code{"x"}), name, or call; -or 2 if the top-level expression uses \verb{;} to put multiple expressions on -one line. The expressions have their srcrefs removed. +\code{src} is a character vector of source code. Each element represents a +complete line (or multi-line) expression, i.e. it always has a terminal \verb{\\n}. + +\code{expr}, a list-column of top-level expressions. A top-level expression +is a complete expression which would trigger execution if typed at the +console. Each element is an \code{\link[=expression]{expression()}} object, which can be of any +length. It will be length: +\itemize{ +\item 0 if the top-level expression contains only whitespace and/or comments. +\item 1 if the top-level expression is a single scalar ( +like \code{TRUE}, \code{1}, or \code{"x"}), name, or call +\item 2 or more if the top-level expression uses \verb{;} to put multiple expressions +on one line. +} + +The expressions have their srcrefs removed. If there are syntax errors in \code{x} and \code{allow_error = TRUE}, the data frame will have an attribute \code{PARSE_ERROR} that stores the error object. From c8cb2a1c74e12f0331ffb0274e0123b671d764e8 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 2 Jul 2024 08:29:11 -0500 Subject: [PATCH 4/5] Update test --- tests/testthat/test-eval.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-eval.R b/tests/testthat/test-eval.R index a868551..1672db9 100644 --- a/tests/testthat/test-eval.R +++ b/tests/testthat/test-eval.R @@ -40,7 +40,7 @@ test_that("ACTIONS_STEP_DEBUG forces log_warning and log_echo to TRUE", { withr::local_envvar(ACTIONS_STEP_DEBUG = "true") capture.output(expect_warning(evaluate("f()"), "abc"), type = "message") }) - expect_equal(out, "f()") + expect_equal(out, "f()\n") }) test_that("data sets loaded", { From 57258fb90cc7e2d75da1997079ca85fcd22e6b9b Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 2 Jul 2024 08:34:43 -0500 Subject: [PATCH 5/5] Use correct fix --- tests/testthat/test-eval.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-eval.R b/tests/testthat/test-eval.R index 1672db9..8a559e9 100644 --- a/tests/testthat/test-eval.R +++ b/tests/testthat/test-eval.R @@ -40,7 +40,7 @@ test_that("ACTIONS_STEP_DEBUG forces log_warning and log_echo to TRUE", { withr::local_envvar(ACTIONS_STEP_DEBUG = "true") capture.output(expect_warning(evaluate("f()"), "abc"), type = "message") }) - expect_equal(out, "f()\n") + expect_equal(out, c("f()", "")) }) test_that("data sets loaded", {