From ca46e3980542c964ebc36971eb22e4b942214f4b Mon Sep 17 00:00:00 2001 From: Duncan Murdoch Date: Thu, 14 Dec 2023 15:35:39 -0500 Subject: [PATCH] Make latex_formatC0 based on Jeroen's patch to commonmark. --- DESCRIPTION | 2 +- NAMESPACE | 1 + NEWS.md | 3 ++- R/commonmark.R | 39 +++++++++++++++++++++++++++++++++ R/processConcordance.R | 32 ++++++++++++++++++++++++--- R/test_packages.R | 12 +++++++--- man/html_commonmark_document.Rd | 29 +++++++++++------------- man/processLatexConcordance.Rd | 7 +++++- 8 files changed, 100 insertions(+), 25 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4654fb8..08aac6c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,7 +21,7 @@ URL: https://github.com/dmurdoch/RmdConcord, BugReports: https://github.com/dmurdoch/RmdConcord/issues Imports: rmarkdown, tools, knitr (>= 1.42) -Suggests: markdown +Suggests: markdown, commonmark (>= 1.9.0) VignetteBuilder: knitr, rmarkdown SystemRequirements: pandoc (>=2.11.3 with 'commonmark_x' processing and 'sourcepos' extension, needed for diff --git a/NAMESPACE b/NAMESPACE index b671152..8f8f99b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(processConcordance, html_with_concordance, pdf_documentC0, pdf_with_concordance, + latex_formatC0, test_packages, tidy_validate) diff --git a/NEWS.md b/NEWS.md index 03f4405..5c10700 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,8 @@ * Added support for `markdown::html_format()` output via new `html_formatC()` function. -* Support for `markdown::latex_format()` is not planned. +* Support for `markdown::latex_format()` is planned, +but requires an update to the `commonmark` package. See `?html_formatC()` for some discussion of the issues. # RmdConcord 0.2.0 diff --git a/R/commonmark.R b/R/commonmark.R index 9cee2f8..69c0a05 100644 --- a/R/commonmark.R +++ b/R/commonmark.R @@ -24,3 +24,42 @@ html_formatC <-function(options = list(sourcepos = TRUE), ...) { res } +latex_formatC0 <- function(latex_engine = "pdflatex", + options = list(sourcepos = TRUE), + defineSconcordance = TRUE, + ...) { + + # Have we got the suggested dependencies? + test_packages(pandoc = FALSE) + + sourcepos <- options$sourcepos + + if (is.null(sourcepos)) + options$sourcepos <- sourcepos <- TRUE + + res <- markdown::latex_format(options = options, + latex_engine = latex_engine, ...) + res$knitr$opts_knit$concordance <- sourcepos + if (sourcepos) { + + # Should produce .tex, not go directly to .pdf + res$pandoc$RmdConcord_ext <- res$pandoc$ext + res$pandoc$ext = ".tex" + + # Replace the old post_processor with ours + res$RmdConcord_post_processor <- res$post_processor + res$post_processor <- function(yaml, infile, outfile, ...) { + workdir <- dirname(outfile) + # We should have a concordance file + concordanceFile <- paste0(sans_ext(normalizePath(infile)), "-concordance.tex") + origdir <- setwd(workdir) + on.exit(setwd(origdir)) + # Modify the .tex file + processLatexConcordance(outfile, followConcordance = concordanceFile, defineSconcordance = defineSconcordance, + infile = infile) + + outfile + } + } + res +} diff --git a/R/processConcordance.R b/R/processConcordance.R index 225e762..4deabe1 100644 --- a/R/processConcordance.R +++ b/R/processConcordance.R @@ -90,7 +90,8 @@ processConcordance <- function(filename, newfilename = filename, processLatexConcordance <- function(filename, newfilename = filename, rename = NULL, followConcordance = NULL, - defineSconcordance = TRUE) { + defineSconcordance = TRUE, + infile = "input") { # read the file lines <- readLines(filename) prevConcordance <- NULL @@ -106,7 +107,7 @@ processLatexConcordance <- function(filename, newfilename = filename, if (defineSconcordance) { # Insert \Sconcordance definition beginDoc <- match(TRUE, lines == "\\begin{document}") - lines <- append(lines, r"(\newcommand{\Sconcordance}[1]{% + lines <- append(lines, strsplit(r"(\newcommand{\Sconcordance}[1]{% \ifx\pdfoutput\undefined% \csname newcount\endcsname\pdfoutput\fi% \ifcase\pdfoutput\special{#1}% @@ -118,8 +119,32 @@ processLatexConcordance <- function(filename, newfilename = filename, \endgroup% \fi} -)", beginDoc - 1) +)", "\n")[[1]], beginDoc - 1) } + source_regexp <- " %sourcepos\\(([0-9]+):[^)]*\\)" + sourcecomments <- any(grepl(source_regexp, lines)) + if (sourcecomments) { + # First, remove the source comments from the + # header lines + for (i in c('title', 'author', 'date')) { + regexp <- paste0("(\\\\", i, "\\{.*)", source_regexp, "}$") + lines <- sub(regexp, "\\1}", lines) + } + srcline <- rep(NA_integer_, length(lines)) + srcfile <- rep(infile, length(lines)) + regexp <- paste0(".*", source_regexp, "$") + datapos <- grep(regexp, lines) + if (length(datapos) == 0) + stop("No sourcepos attributes found.") + srcline[datapos] <- as.integer(sub(regexp, "\\1", lines[datapos])) + oldname <- names(rename) + for (i in seq_along(rename)) + srcfile[datapos] <- sub(oldname[i], rename[i], srcfile[datapos], fixed = TRUE) + # Remove the sourcepos records now. + regexp <- paste0(source_regexp, "$") + lines[datapos] <- sub(regexp, "", lines[datapos]) + } else { + # insert line breaks lines <- gsub("\\datapos{", "%\n\\datapos{", lines, fixed = TRUE) # don't lose blank lines; they separate paragraphs. @@ -139,6 +164,7 @@ processLatexConcordance <- function(filename, newfilename = filename, # Remove the data-pos records now. There might be several on a line # but we want to ignore them all lines[datapos] <- gsub(regexp, "\\3", lines[datapos]) + } offset <- 0 repeat { if (all(is.na(srcline))) diff --git a/R/test_packages.R b/R/test_packages.R index 1d9345c..62e5bf4 100644 --- a/R/test_packages.R +++ b/R/test_packages.R @@ -6,9 +6,15 @@ test_packages <- function(error = TRUE, pandoc = TRUE) { if (pandoc && !pandoc_available("2.11.3")) message <- "Pandoc 2.11.3 or higher is needed. " - if (!pandoc && (!requireNamespace("markdown") || - packageVersion("markdown") < "1.12.1")) - message <- c(message, "markdown v 1.12.1 or higher is needed.") + if (!pandoc) { + if (!requireNamespace("markdown") || + packageVersion("markdown") < "1.12.1") + message <- c(message, "markdown v 1.12.1 or higher is needed.") + if (!requireNamespace("commonmark") || + packageVersion("commonmark") < "1.9.0" || + !("sourcepos" %in% names(formals(commonmark::markdown_latex)))) + message <- c(message, "commonmark v 1.9.0 or higher with sourcepos argument in commonmark::markdown_latex() is needed.") + } if (!length(message)) TRUE else if (error) diff --git a/man/html_commonmark_document.Rd b/man/html_commonmark_document.Rd index 12151d0..8d87861 100644 --- a/man/html_commonmark_document.Rd +++ b/man/html_commonmark_document.Rd @@ -3,7 +3,7 @@ \alias{html_vignetteC} \alias{pdf_documentC0} \alias{html_formatC} -\alias{latex_formatC} +\alias{latex_formatC0} \title{ R Markdown drivers to add concordance } @@ -26,6 +26,10 @@ pdf_documentC0(latex_engine = "pdflatex", defineSconcordance = TRUE, ...) html_formatC(options = list(sourcepos = TRUE), ...) +latex_formatC0(latex_engine = "pdflatex", + options = list(sourcepos = TRUE), + defineSconcordance = TRUE, + ...) } \arguments{ \item{latex_engine}{ @@ -53,9 +57,11 @@ Each driver modifies the standard driver from \pkg{rmarkdown} or and adds concordances. } \note{ -The \code{pdf_documentC0} function adds the concordances, +The \code{pdf_documentC0} and \code{latex_formatC0} +functions add the concordances, but they won't be interpreted by LaTeX or PDF previewers. -To get that to happen, use \code{patchDVI::pdf_documentC}. +To get that to happen, use \code{patchDVI::pdf_documentC} +or \code{patchDVI::latex_formatC} respectively. The \code{html_formatC} function requires \pkg{markdown} version 1.12.1 or higher. If a lower version of that @@ -68,19 +74,10 @@ be off by a few lines as the underlying Commonmark processor only issues source position records once per paragraph. -A \code{latex_formatC} driver appears as if it would be -quite messy and is not currently planned. The issues are: -\itemize{ -\item{\pkg{commonmark} doesn't support -source position attributes in LaTeX output} -\item{It doesn't allow edits between the parsing and rendering steps. This is what \code{pdf_documentC} does.} -\item A possible strategy would be to render first to XML -(which does keep source position attributes), then convert -the XML to LaTeX with macros inserted to record source -positions. However, \code{markdown::latex_format} produces -the final LaTeX document in several steps, and would have to -support this two-stage rendering on some but not all of -the steps.} +The \code{latex_formatC0} driver requires an update +to the \pkg{commonmark} package, to a version later +than 1.9.0. Currently a development version can be installed from Github using +\code{remotes::install_github("r-lib/commonmark@latex-sourcepos")}. } \value{ An R Markdown output format object which will add concordance diff --git a/man/processLatexConcordance.Rd b/man/processLatexConcordance.Rd index 70b9c64..4b9e56f 100644 --- a/man/processLatexConcordance.Rd +++ b/man/processLatexConcordance.Rd @@ -14,7 +14,8 @@ processLatexConcordance(filename, newfilename = filename, rename = NULL, followConcordance = NULL, - defineSconcordance = TRUE) + defineSconcordance = TRUE, + infile = "input") } \arguments{ \item{filename}{ @@ -37,6 +38,10 @@ concordances. \item{defineSconcordance}{ Whether to insert the definition of the \verb{\\Sconcordance} macro. } + \item{infile}{ +The source file to assume for the concordance if it is +not given in the file. + } } \value{ Called for the side effect of rewriting the concordance, it returns