Skip to content

Consistently strip trailing \n when parsing #179

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# evaluate (development version)

* `parse_all()` consistently strips the trailing `\n` from the end of each `src` vector.
* The package now depends on R 4.0.0 in order to decrease our maintenance burden.
* `evaluate()` automatically strips calls from conditions emitted by top-level code (these incorrectly get calls because they're wrapped inside `eval()`) (#150).
* `evalute(include_timing)` has been deprecated. I can't find any use of it on GitHub, and it adds substantial code complexity for little gain.
Expand Down
142 changes: 63 additions & 79 deletions R/parse.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,15 @@
#' A data frame with columns `src`, a character vector of source code, and
#' `expr`, a list-column of parsed expressions. There will be one row for each
#' top-level expression in `x`. A top-level expression is a complete expression
#' which would trigger execution if typed at the console. The `expression`
#' object in `expr` can be of any length: it will be 0 if the top-level
#' expression contains only whitespace and/or comments; 1 if the top-level
#' expression is a single scalar (like `TRUE`, `1`, or `"x"`), name, or call;
#' or 2 if the top-level expression uses `;` to put multiple expressions on
#' one line.
#' which would trigger execution if typed at the console.
#'
#' The trailing `\n` at the end of each `src` is implicit.
#'
#' The `expression` object in `expr` can be of any length: it will be 0 if
#' the top-level expression contains only whitespace and/or comments; 1 if
#' the top-level expression is a single scalar (like `TRUE`, `1`, or `"x"`),
#' name, or call; or 2 if the top-level expression uses `;` to put multiple
#' expressions on one line.
#'
#' If there are syntax errors in `x` and `allow_error = TRUE`, the data
#' frame will have an attribute `PARSE_ERROR` that stores the error object.
Expand All @@ -35,99 +38,64 @@ parse_all <- function(x, filename = NULL, allow_error = FALSE) UseMethod("parse_

#' @export
parse_all.character <- function(x, filename = NULL, allow_error = FALSE) {
if (length(grep("\n", x))) {
# strsplit('a\n', '\n') needs to return c('a', '') instead of c('a')
x <- gsub("\n$", "\n\n", x)
x[x == ""] <- "\n"
if (any(grepl("\n", x))) {
# Standardise to character vector with one line per element:
# this is the input that parse() is documented to accept
x <- unlist(strsplit(x, "\n"), recursive = FALSE, use.names = FALSE)
}
n <- length(x)

if (is.null(filename))
filename <- "<text>"
filename <- filename %||% "<text>"
src <- srcfilecopy(filename, x)
if (allow_error) {
exprs <- tryCatch(parse(text = x, srcfile = src), error = identity)
if (inherits(exprs, 'error')) return(structure(
data.frame(src = paste(x, collapse = '\n'), expr = I(list(expression()))),
PARSE_ERROR = exprs
))
if (inherits(exprs, 'error')) {
return(structure(
data.frame(src = paste(x, collapse = '\n'), expr = empty_expr()),
PARSE_ERROR = exprs
))
}
} else {
exprs <- parse(text = x, srcfile = src)
}

# No code, only comments and/or empty lines
ne <- length(exprs)
if (ne == 0) {
return(data.frame(src = append_break(x), expr = I(rep(list(expression()), n))))
}

srcref <- attr(exprs, "srcref", exact = TRUE)

# Stard/End line numbers of expressions
pos <- do.call(rbind, lapply(srcref, unclass))[, c(7, 8), drop = FALSE]
l1 <- pos[, 1]
l2 <- pos[, 2]
# Add a third column i to store the indices of expressions
pos <- cbind(pos, i = seq_len(nrow(pos)))
pos <- as.data.frame(pos) # split() does not work on matrices

# Split line number pairs into groups: if the next start line is the same as
# the last end line, the two expressions must belong to the same group
spl <- cumsum(c(TRUE, l1[-1] != l2[-ne]))
# Extract src lines and expressions for each group; also record the start line
# number of this group so we can re-order src/expr later
res <- lapply(split(pos, spl), function(p) {
pos <- data.frame(
start = vapply(srcref, `[[`, 7, FUN.VALUE = integer(1)),
end = vapply(srcref, `[[`, 8, FUN.VALUE = integer(1))
)
pos$exprs <- exprs

# parse() splits TLEs that use ; into multiple expressions so we
# join together expressions that overlaps on the same line(s)
line_group <- cumsum(is_new_line(pos$start, pos$end))
tles <- lapply(split(pos, line_group), function(p) {
n <- nrow(p)
data.frame(
src = paste(x[p[1, 1]:p[n, 2]], collapse = "\n"),
expr = I(list(exprs[p[, 3]])),
line = p[1, 1]
src = paste(x[p$start[1]:p$end[n]], collapse = "\n"),
expr = I(list(p$exprs)),
line = p$start[1]
)
})

# Now process empty expressions (comments/blank lines); see if there is a
# "gap" between the last end number + 1 and the next start number - 1
pos <- cbind(c(1, l2 + 1), c(l1 - 1, n))
pos <- pos[pos[, 1] <= pos[, 2], , drop = FALSE]

# Extract src lines from the gaps, and assign empty expressions to them
res <- c(res, lapply(seq_len(nrow(pos)), function(i) {
p <- pos[i, ]
r <- p[1]:p[2]
data.frame(
src = x[r],
expr = I(rep(list(expression()), p[2] - p[1] + 1)),
line = r - 1
)
}))

# Bind everything into a data frame, order it by line numbers, append \n to
# all src lines except the last one, and remove the line numbers
res <- do.call(rbind, res)
res <- res[order(res$line), ]
res$src <- append_break(res$src)
res$line <- NULL

# For compatibility with evaluate (<= 0.5.7): remove the last empty line (YX:
# I think this is a bug)
n <- nrow(res)
if (res$src[n] == "") res <- res[-n, ]

tles <- do.call(rbind, tles)

# parse() drops comments and whitespace so we add them back in
gaps <- data.frame(start = c(1, pos$end + 1), end = c(pos$start - 1, n))
gaps <- gaps[gaps$start <= gaps$end, ,]
# in sequence(), nvec is equivalent to length.out
lines <- sequence(from = gaps$start, nvec = gaps$end - gaps$start + 1)
comments <- data.frame(
src = x[lines],
expr = empty_expr(length(lines)),
line = lines
)

res <- rbind(tles, comments)
res <- res[order(res$line), c("src", "expr")]
rownames(res) <- NULL
res
}

# YX: It seems evaluate (<= 0.5.7) had difficulties with preserving line breaks,
# so it ended up with adding \n to the first n-1 lines, which does not seem to
# be necessary to me, and is actually buggy. I'm not sure if it is worth shaking
# the earth and work with authors of reverse dependencies to sort this out. Also
# see #42.
append_break <- function(x) {
n <- length(x)
if (n <= 1) x else paste(x, rep(c("\n", ""), c(n - 1, 1)), sep = "")
}

#' @export
parse_all.connection <- function(x, filename = NULL, ...) {
if (!isOpen(x, "r")) {
Expand Down Expand Up @@ -185,3 +153,19 @@ parse_all.call <- function(x, filename = NULL, ...) {
out$expr <- list(as.expression(x))
out
}

# Helpers ---------------------------------------------------------------------

empty_expr <- function(n = 1) {
I(rep(list(expression()), n))
}

is_new_line <- function(start, end) {
if (length(start) == 0) {
logical()
} else if (length(start) == 1) {
TRUE
} else {
c(TRUE, start[-1] != end[-length(end)])
}
}
15 changes: 9 additions & 6 deletions man/parse_all.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 0 additions & 6 deletions tests/testthat/test-eval.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,6 @@ test_that("multiple expressions on one line can get printed as expected", {
expect_output_types(ev, c("source", "text", "text"))
})

test_that("multiple lines of comments do not lose the terminating \\n", {
ev <- evaluate("# foo\n#bar")
expect_output_types(ev, c("source", "source"))
expect_equal(ev[[1]]$src, "# foo\n")
})

test_that("check_stop_on_error converts integer to enum", {
expect_equal(check_stop_on_error(0), "continue")
expect_equal(check_stop_on_error(1), "stop")
Expand Down
49 changes: 48 additions & 1 deletion tests/testthat/test-parse.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,50 @@
test_that("can parse even if no expressions", {
expect_equal(parse_all("")$src, "")
expect_equal(parse_all("#")$src, "#")
expect_equal(parse_all("#\n\n")$src, c("#", ""))
})

test_that("every TLE has an implicit nl", {
expect_equal(parse_all("x")$src, "x")
expect_equal(parse_all("x\n")$src, "x")
expect_equal(parse_all("")$src, "")
expect_equal(parse_all("\n")$src, "")

expect_equal(parse_all("{\n1\n}")$src, "{\n1\n}")
expect_equal(parse_all("{\n1\n}\n")$src, "{\n1\n}")

# even empty lines
expect_equal(parse_all("a\n\nb")$src, c("a", "", "b"))
expect_equal(parse_all("\n\n")$src, c("", ""))
})

test_that("a character vector is equivalent to a multi-line string", {
expect_equal(parse_all(c("a", "b")), parse_all(c("a\nb")))
})

test_that("recombines multi-expression TLEs", {
expect_equal(
parse_all("1;2;3")$expr[[1]],
expression(1, 2, 3),
ignore_attr = "srcref"
)
expect_equal(
parse_all("1+\n2;3")$expr[[1]],
expression(1 + 2, 3),
ignore_attr = "srcref"
)
expect_equal(
parse_all("1+\n2;3+\n4; 5")$expr[[1]],
expression(1 + 2, 3 + 4, 5),
ignore_attr = "srcref"
)
})

test_that("re-integrates lines without expressions", {
expect_equal(parse_all("1\n\n2")$src, c("1", "", "2"))
expect_equal(parse_all("1\n#\n2")$src, c("1", "#", "2"))
})

test_that("expr is always an expression", {
expect_equal(parse_all("#")$expr[[1]], expression())
expect_equal(parse_all("1")$expr[[1]], expression(1), ignore_attr = "srcref")
Expand Down Expand Up @@ -34,7 +81,7 @@ if (isTRUE(l10n_info()[['UTF-8']])) {

test_that("multibyte characters are parsed correct", {
code <- c("ϱ <- 1# g / ml", "äöüßÄÖÜπ <- 7 + 3# nonsense")
expect_identical(parse_all(code)$src, append_break(code))
expect_identical(parse_all(code)$src, code)
})
}

Expand Down
Loading