diff --git a/DESCRIPTION b/DESCRIPTION index b44131b1..dcd3bb43 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: xml2 Title: Parse XML -Version: 1.3.2.9000 +Version: 1.3.2.9001 Authors@R: c(person(given = "Hadley", family = "Wickham", @@ -40,7 +40,7 @@ VignetteBuilder: knitr Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.0 +RoxygenNote: 7.1.1 SystemRequirements: libxml2: libxml2-dev (deb), libxml2-devel (rpm) Collate: diff --git a/NEWS.md b/NEWS.md index 13cf97f4..37ac7995 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # xml2 (development version) +* `xml_find_all.xml_nodeset()` gains a `flatten` argument to control whether to return a single nodeset or a list of nodesets (#311, @jakejh) + * `write_xml()` and `write_html()` now return NULL invisibly, as they did prior to version 1.3.0 (#307) # xml2 1.3.2 diff --git a/R/xml_find.R b/R/xml_find.R index efbf17cb..14113220 100644 --- a/R/xml_find.R +++ b/R/xml_find.R @@ -12,9 +12,11 @@ #' @param xpath A string containing a xpath (1.0) expression. #' @inheritParams xml_name -#' @return `xml_find_all` always returns a nodeset: if there are no matches -#' the nodeset will be empty. The result will always be unique; repeated -#' nodes are automatically de-duplicated. +#' @param ... Further arguments passed to or from other methods. +#' @return `xml_find_all` returns a nodeset if applied to a node, and a nodeset +#' or a list of nodesets if applied to a nodeset. If there are no matches, +#' the nodeset(s) will be empty. Within each nodeset, the result will always +#' be unique; repeated nodes are automatically de-duplicated. #' #' `xml_find_first` returns a node if applied to a node, and a nodeset #' if applied to a nodeset. The output is *always* the same size as @@ -46,11 +48,16 @@ #' ") #' para <- xml_find_all(x, ".//p") #' -#' # If you apply xml_find_all to a nodeset, it finds all matches, -#' # de-duplicates them, and returns as a single list. This means you +#' # By default, if you apply xml_find_all to a nodeset, it finds all matches, +#' # de-duplicates them, and returns as a single nodeset. This means you #' # never know how many results you'll get #' xml_find_all(para, ".//b") #' +#' # If you set flatten to FALSE, though, xml_find_all will return a list of +#' # nodesets, where each nodeset contains the matches for the corresponding +#' # node in the original nodeset. +#' xml_find_all(para, ".//b", flatten = FALSE) +#' #' # xml_find_first only returns the first match per input node. If there are 0 #' # matches it will return a missing node #' xml_find_first(para, ".//b") @@ -67,31 +74,37 @@ #' ') #' xml_find_all(x, ".//f:doc") #' xml_find_all(x, ".//f:doc", xml_ns(x)) -xml_find_all <- function(x, xpath, ns = xml_ns(x)) { +xml_find_all <- function(x, xpath, ns = xml_ns(x), ...) { UseMethod("xml_find_all") } #' @export -xml_find_all.xml_missing <- function(x, xpath, ns = xml_ns(x)) { +xml_find_all.xml_missing <- function(x, xpath, ns = xml_ns(x), ...) { xml_nodeset() } #' @export -xml_find_all.xml_node <- function(x, xpath, ns = xml_ns(x)) { +xml_find_all.xml_node <- function(x, xpath, ns = xml_ns(x), ...) { nodes <- .Call(xpath_search, x$node, x$doc, xpath, ns, Inf) xml_nodeset(nodes) } +#' @param flatten A logical indicating whether to return a single, flattened +#' nodeset or a list of nodesets. #' @export -xml_find_all.xml_nodeset <- function(x, xpath, ns = xml_ns(x)) { +#' @rdname xml_find_all +xml_find_all.xml_nodeset <- function(x, xpath, ns = xml_ns(x), flatten = TRUE, ...) { if (length(x) == 0) return(xml_nodeset()) - nodes <- unlist(recursive = FALSE, - lapply(x, function(x) - .Call(xpath_search, x$node, x$doc, xpath, ns, Inf))) + res <- lapply(x, function(x) .Call(xpath_search, x$node, x$doc, xpath, ns, Inf)) - xml_nodeset(nodes) + if (isTRUE(flatten)) { + return(xml_nodeset(unlist(recursive = FALSE, res))) + } + + res[] <- lapply(res, xml_nodeset) + res } #' @export diff --git a/man/xml_find_all.Rd b/man/xml_find_all.Rd index 8ede3306..0fb232c5 100644 --- a/man/xml_find_all.Rd +++ b/man/xml_find_all.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/xml_find.R \name{xml_find_all} \alias{xml_find_all} +\alias{xml_find_all.xml_nodeset} \alias{xml_find_first} \alias{xml_find_num} \alias{xml_find_chr} @@ -9,7 +10,9 @@ \alias{xml_find_one} \title{Find nodes that match an xpath expression.} \usage{ -xml_find_all(x, xpath, ns = xml_ns(x)) +xml_find_all(x, xpath, ns = xml_ns(x), ...) + +\method{xml_find_all}{xml_nodeset}(x, xpath, ns = xml_ns(x), flatten = TRUE, ...) xml_find_first(x, xpath, ns = xml_ns(x)) @@ -31,11 +34,17 @@ in namespace \code{foo}, it will be called \code{foo:bar}. (And similarly for attributes). Default namespaces must be given an explicit name. The ns is ignored when using \code{\link[=xml_name<-]{xml_name<-()}} and \code{\link[=xml_set_name]{xml_set_name()}}.} + +\item{...}{Further arguments passed to or from other methods.} + +\item{flatten}{A logical indicating whether to return a single, flattened +nodeset or a list of nodesets.} } \value{ -\code{xml_find_all} always returns a nodeset: if there are no matches -the nodeset will be empty. The result will always be unique; repeated -nodes are automatically de-duplicated. +\code{xml_find_all} returns a nodeset if applied to a node, and a nodeset +or a list of nodesets if applied to a nodeset. If there are no matches, +the nodeset(s) will be empty. Within each nodeset, the result will always +be unique; repeated nodes are automatically de-duplicated. \code{xml_find_first} returns a node if applied to a node, and a nodeset if applied to a nodeset. The output is \emph{always} the same size as @@ -79,11 +88,16 @@ x <- read_xml(" ") para <- xml_find_all(x, ".//p") -# If you apply xml_find_all to a nodeset, it finds all matches, -# de-duplicates them, and returns as a single list. This means you +# By default, if you apply xml_find_all to a nodeset, it finds all matches, +# de-duplicates them, and returns as a single nodeset. This means you # never know how many results you'll get xml_find_all(para, ".//b") +# If you set flatten to FALSE, though, xml_find_all will return a list of +# nodesets, where each nodeset contains the matches for the corresponding +# node in the original nodeset. +xml_find_all(para, ".//b", flatten = FALSE) + # xml_find_first only returns the first match per input node. If there are 0 # matches it will return a missing node xml_find_first(para, ".//b") diff --git a/tests/testthat/test-xml_find.R b/tests/testthat/test-xml_find.R index c525072f..93181f0d 100644 --- a/tests/testthat/test-xml_find.R +++ b/tests/testthat/test-xml_find.R @@ -49,6 +49,17 @@ test_that("no matches returns empty nodeset", { expect_equal(length(xml_find_all(x, "//baz")), 0) }) +test_that("xml_find_all returns nodeset or list of nodesets based on flatten", { + x <- read_xml("

Some text.

+

Some other text.

+

No bold here!

") + y <- xml_find_all(x, './/p') + z <- xml_find_all(y, './/b', flatten = FALSE) + expect_s3_class(xml_find_all(y, './/b'), 'xml_nodeset') + expect_type(z, 'list') + expect_s3_class(z[[1L]], 'xml_nodeset') +}) + # Find num --------------------------------------------------------------------- test_that("xml_find_num errors with non numeric results", { x <- read_xml("")