diff --git a/NEWS.md b/NEWS.md index eb636ef5..674b32c0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # evaluate (development version) +* `parse_all()` consistently strips the trailing `\n` from the end of each `src` vector. * The package now depends on R 4.0.0 in order to decrease our maintenance burden. * `evaluate()` automatically strips calls from conditions emitted by top-level code (these incorrectly get calls because they're wrapped inside `eval()`) (#150). * `evalute(include_timing)` has been deprecated. I can't find any use of it on GitHub, and it adds substantial code complexity for little gain. diff --git a/R/parse.R b/R/parse.R index f57cb868..3e91d641 100644 --- a/R/parse.R +++ b/R/parse.R @@ -11,12 +11,15 @@ #' A data frame with columns `src`, a character vector of source code, and #' `expr`, a list-column of parsed expressions. There will be one row for each #' top-level expression in `x`. A top-level expression is a complete expression -#' which would trigger execution if typed at the console. The `expression` -#' object in `expr` can be of any length: it will be 0 if the top-level -#' expression contains only whitespace and/or comments; 1 if the top-level -#' expression is a single scalar (like `TRUE`, `1`, or `"x"`), name, or call; -#' or 2 if the top-level expression uses `;` to put multiple expressions on -#' one line. +#' which would trigger execution if typed at the console. +#' +#' The trailing `\n` at the end of each `src` is implicit. +#' +#' The `expression` object in `expr` can be of any length: it will be 0 if +#' the top-level expression contains only whitespace and/or comments; 1 if +#' the top-level expression is a single scalar (like `TRUE`, `1`, or `"x"`), +#' name, or call; or 2 if the top-level expression uses `;` to put multiple +#' expressions on one line. #' #' If there are syntax errors in `x` and `allow_error = TRUE`, the data #' frame will have an attribute `PARSE_ERROR` that stores the error object. @@ -35,99 +38,64 @@ parse_all <- function(x, filename = NULL, allow_error = FALSE) UseMethod("parse_ #' @export parse_all.character <- function(x, filename = NULL, allow_error = FALSE) { - if (length(grep("\n", x))) { - # strsplit('a\n', '\n') needs to return c('a', '') instead of c('a') - x <- gsub("\n$", "\n\n", x) - x[x == ""] <- "\n" + if (any(grepl("\n", x))) { + # Standardise to character vector with one line per element: + # this is the input that parse() is documented to accept x <- unlist(strsplit(x, "\n"), recursive = FALSE, use.names = FALSE) } n <- length(x) - if (is.null(filename)) - filename <- "" + filename <- filename %||% "" src <- srcfilecopy(filename, x) if (allow_error) { exprs <- tryCatch(parse(text = x, srcfile = src), error = identity) - if (inherits(exprs, 'error')) return(structure( - data.frame(src = paste(x, collapse = '\n'), expr = I(list(expression()))), - PARSE_ERROR = exprs - )) + if (inherits(exprs, 'error')) { + return(structure( + data.frame(src = paste(x, collapse = '\n'), expr = empty_expr()), + PARSE_ERROR = exprs + )) + } } else { exprs <- parse(text = x, srcfile = src) } - # No code, only comments and/or empty lines - ne <- length(exprs) - if (ne == 0) { - return(data.frame(src = append_break(x), expr = I(rep(list(expression()), n)))) - } - srcref <- attr(exprs, "srcref", exact = TRUE) - - # Stard/End line numbers of expressions - pos <- do.call(rbind, lapply(srcref, unclass))[, c(7, 8), drop = FALSE] - l1 <- pos[, 1] - l2 <- pos[, 2] - # Add a third column i to store the indices of expressions - pos <- cbind(pos, i = seq_len(nrow(pos))) - pos <- as.data.frame(pos) # split() does not work on matrices - - # Split line number pairs into groups: if the next start line is the same as - # the last end line, the two expressions must belong to the same group - spl <- cumsum(c(TRUE, l1[-1] != l2[-ne])) - # Extract src lines and expressions for each group; also record the start line - # number of this group so we can re-order src/expr later - res <- lapply(split(pos, spl), function(p) { + pos <- data.frame( + start = vapply(srcref, `[[`, 7, FUN.VALUE = integer(1)), + end = vapply(srcref, `[[`, 8, FUN.VALUE = integer(1)) + ) + pos$exprs <- exprs + + # parse() splits TLEs that use ; into multiple expressions so we + # join together expressions that overlaps on the same line(s) + line_group <- cumsum(is_new_line(pos$start, pos$end)) + tles <- lapply(split(pos, line_group), function(p) { n <- nrow(p) data.frame( - src = paste(x[p[1, 1]:p[n, 2]], collapse = "\n"), - expr = I(list(exprs[p[, 3]])), - line = p[1, 1] + src = paste(x[p$start[1]:p$end[n]], collapse = "\n"), + expr = I(list(p$exprs)), + line = p$start[1] ) }) - - # Now process empty expressions (comments/blank lines); see if there is a - # "gap" between the last end number + 1 and the next start number - 1 - pos <- cbind(c(1, l2 + 1), c(l1 - 1, n)) - pos <- pos[pos[, 1] <= pos[, 2], , drop = FALSE] - - # Extract src lines from the gaps, and assign empty expressions to them - res <- c(res, lapply(seq_len(nrow(pos)), function(i) { - p <- pos[i, ] - r <- p[1]:p[2] - data.frame( - src = x[r], - expr = I(rep(list(expression()), p[2] - p[1] + 1)), - line = r - 1 - ) - })) - - # Bind everything into a data frame, order it by line numbers, append \n to - # all src lines except the last one, and remove the line numbers - res <- do.call(rbind, res) - res <- res[order(res$line), ] - res$src <- append_break(res$src) - res$line <- NULL - - # For compatibility with evaluate (<= 0.5.7): remove the last empty line (YX: - # I think this is a bug) - n <- nrow(res) - if (res$src[n] == "") res <- res[-n, ] - + tles <- do.call(rbind, tles) + + # parse() drops comments and whitespace so we add them back in + gaps <- data.frame(start = c(1, pos$end + 1), end = c(pos$start - 1, n)) + gaps <- gaps[gaps$start <= gaps$end, ,] + # in sequence(), nvec is equivalent to length.out + lines <- sequence(from = gaps$start, nvec = gaps$end - gaps$start + 1) + comments <- data.frame( + src = x[lines], + expr = empty_expr(length(lines)), + line = lines + ) + + res <- rbind(tles, comments) + res <- res[order(res$line), c("src", "expr")] rownames(res) <- NULL res } -# YX: It seems evaluate (<= 0.5.7) had difficulties with preserving line breaks, -# so it ended up with adding \n to the first n-1 lines, which does not seem to -# be necessary to me, and is actually buggy. I'm not sure if it is worth shaking -# the earth and work with authors of reverse dependencies to sort this out. Also -# see #42. -append_break <- function(x) { - n <- length(x) - if (n <= 1) x else paste(x, rep(c("\n", ""), c(n - 1, 1)), sep = "") -} - #' @export parse_all.connection <- function(x, filename = NULL, ...) { if (!isOpen(x, "r")) { @@ -185,3 +153,19 @@ parse_all.call <- function(x, filename = NULL, ...) { out$expr <- list(as.expression(x)) out } + +# Helpers --------------------------------------------------------------------- + +empty_expr <- function(n = 1) { + I(rep(list(expression()), n)) +} + +is_new_line <- function(start, end) { + if (length(start) == 0) { + logical() + } else if (length(start) == 1) { + TRUE + } else { + c(TRUE, start[-1] != end[-length(end)]) + } +} diff --git a/man/parse_all.Rd b/man/parse_all.Rd index 26eee692..13be6999 100644 --- a/man/parse_all.Rd +++ b/man/parse_all.Rd @@ -18,12 +18,15 @@ If a connection, will be opened and closed only if it was closed initially.} A data frame with columns \code{src}, a character vector of source code, and \code{expr}, a list-column of parsed expressions. There will be one row for each top-level expression in \code{x}. A top-level expression is a complete expression -which would trigger execution if typed at the console. The \code{expression} -object in \code{expr} can be of any length: it will be 0 if the top-level -expression contains only whitespace and/or comments; 1 if the top-level -expression is a single scalar (like \code{TRUE}, \code{1}, or \code{"x"}), name, or call; -or 2 if the top-level expression uses \verb{;} to put multiple expressions on -one line. +which would trigger execution if typed at the console. + +The trailing \verb{\\n} at the end of each \code{src} is implicit. + +The \code{expression} object in \code{expr} can be of any length: it will be 0 if +the top-level expression contains only whitespace and/or comments; 1 if +the top-level expression is a single scalar (like \code{TRUE}, \code{1}, or \code{"x"}), +name, or call; or 2 if the top-level expression uses \verb{;} to put multiple +expressions on one line. If there are syntax errors in \code{x} and \code{allow_error = TRUE}, the data frame will have an attribute \code{PARSE_ERROR} that stores the error object. diff --git a/tests/testthat/test-eval.R b/tests/testthat/test-eval.R index f6553e01..3424625b 100644 --- a/tests/testthat/test-eval.R +++ b/tests/testthat/test-eval.R @@ -79,12 +79,6 @@ test_that("multiple expressions on one line can get printed as expected", { expect_output_types(ev, c("source", "text", "text")) }) -test_that("multiple lines of comments do not lose the terminating \\n", { - ev <- evaluate("# foo\n#bar") - expect_output_types(ev, c("source", "source")) - expect_equal(ev[[1]]$src, "# foo\n") -}) - test_that("check_stop_on_error converts integer to enum", { expect_equal(check_stop_on_error(0), "continue") expect_equal(check_stop_on_error(1), "stop") diff --git a/tests/testthat/test-parse.R b/tests/testthat/test-parse.R index 2c592125..c7e1c093 100644 --- a/tests/testthat/test-parse.R +++ b/tests/testthat/test-parse.R @@ -1,3 +1,50 @@ +test_that("can parse even if no expressions", { + expect_equal(parse_all("")$src, "") + expect_equal(parse_all("#")$src, "#") + expect_equal(parse_all("#\n\n")$src, c("#", "")) +}) + +test_that("every TLE has an implicit nl", { + expect_equal(parse_all("x")$src, "x") + expect_equal(parse_all("x\n")$src, "x") + expect_equal(parse_all("")$src, "") + expect_equal(parse_all("\n")$src, "") + + expect_equal(parse_all("{\n1\n}")$src, "{\n1\n}") + expect_equal(parse_all("{\n1\n}\n")$src, "{\n1\n}") + + # even empty lines + expect_equal(parse_all("a\n\nb")$src, c("a", "", "b")) + expect_equal(parse_all("\n\n")$src, c("", "")) +}) + +test_that("a character vector is equivalent to a multi-line string", { + expect_equal(parse_all(c("a", "b")), parse_all(c("a\nb"))) +}) + +test_that("recombines multi-expression TLEs", { + expect_equal( + parse_all("1;2;3")$expr[[1]], + expression(1, 2, 3), + ignore_attr = "srcref" + ) + expect_equal( + parse_all("1+\n2;3")$expr[[1]], + expression(1 + 2, 3), + ignore_attr = "srcref" + ) + expect_equal( + parse_all("1+\n2;3+\n4; 5")$expr[[1]], + expression(1 + 2, 3 + 4, 5), + ignore_attr = "srcref" + ) +}) + +test_that("re-integrates lines without expressions", { + expect_equal(parse_all("1\n\n2")$src, c("1", "", "2")) + expect_equal(parse_all("1\n#\n2")$src, c("1", "#", "2")) +}) + test_that("expr is always an expression", { expect_equal(parse_all("#")$expr[[1]], expression()) expect_equal(parse_all("1")$expr[[1]], expression(1), ignore_attr = "srcref") @@ -34,7 +81,7 @@ if (isTRUE(l10n_info()[['UTF-8']])) { test_that("multibyte characters are parsed correct", { code <- c("ϱ <- 1# g / ml", "äöüßÄÖÜπ <- 7 + 3# nonsense") - expect_identical(parse_all(code)$src, append_break(code)) + expect_identical(parse_all(code)$src, code) }) }