Skip to content

Commit

Permalink
Improve url manipulation tooling (#611)
Browse files Browse the repository at this point in the history
* Check inputs and export `url_modify()`. Fixes #464.
* Check inputs to `url_build()`. Fixes #482.
  • Loading branch information
hadley authored Dec 24, 2024
1 parent e58e86c commit e6f425e
Show file tree
Hide file tree
Showing 9 changed files with 325 additions and 53 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ export(secret_write_rds)
export(signal_total_pages)
export(throttle_status)
export(url_build)
export(url_modify)
export(url_parse)
export(with_mock)
export(with_mocked_responses)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# httr2 (development version)

* New `url_modify()` makes it easier to modify an existing url (#464).
* New `req_url_relative()` for constructing relative urls (#449).
* `url_parse()` gains `base_url` argument so you can also use it to parse relative URLs (#449).
* `url_parse()` now uses `curl::curl_parse_url()` which is much faster and more correct (#577).
Expand Down
126 changes: 103 additions & 23 deletions R/url.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
#' Parse and build URLs
#' Parse a URL into its component pieces
#'
#' `url_parse()` parses a URL into its component pieces; `url_build()` does
#' the reverse, converting a list of pieces into a string URL. See `r rfc(3986)`
#' for the details of the parsing algorithm.
#' `url_parse()` parses a URL into its component parts, powered by
#' [curl::curl_parse_url()]. The parsing algorithm follows the specifications
#' detailed in `r rfc(3986)`.
#'
#' @param url For `url_parse()` a string to parse into a URL;
#' for `url_build()` a URL to turn back into a string.
#' @param url A string containing the URL to parse.
#' @param base_url Use this as a parent, if `url` is a relative URL.
#' @returns
#' * `url_build()` returns a string.
#' * `url_parse()` returns a URL: a S3 list with class `httr2_url`
#' and elements `scheme`, `hostname`, `port`, `path`, `fragment`, `query`,
#' `username`, `password`.
#' @returns An S3 object of class `httr2_url` with the following components:
#' `scheme`, `hostname`, `username`, `password`, `port`, `path`, `query`, and
#' `fragment`.
#' @export
#' @family URL manipulation
#' @examples
#' url_parse("http://google.com/")
#' url_parse("http://google.com:80/")
Expand All @@ -22,12 +20,6 @@
#' # You can parse a relative URL if you also provide a base url
#' url_parse("foo", "http://google.com/bar/")
#' url_parse("..", "http://google.com/bar/")
#'
#' url <- url_parse("http://google.com/")
#' url$port <- 80
#' url$hostname <- "example.com"
#' url$query <- list(a = 1, b = 2, c = 3)
#' url_build(url)
url_parse <- function(url, base_url = NULL) {
check_string(url)
check_string(base_url, allow_null = TRUE)
Expand All @@ -48,10 +40,88 @@ url_parse <- function(url, base_url = NULL) {
parsed
}

url_modify <- function(url, ..., error_call = caller_env()) {
url <- url_parse(url)
url <- modify_list(url, ..., error_call = error_call)
url_build(url)
#' Modify a URL
#'
#' Modify components of a URL. The default value of each argument, `NULL`,
#' means leave the component as is. If you want to remove a component,
#' set it to `""`. Note that setting `scheme` or `hostname` to `""` will
#' create a relative URL.
#'
#' @param url A string or [parsed URL](url_parse).
#' @param scheme The scheme, typically either `http` or `https`.
#' @param hostname The hostname, e.g., `www.google.com` or `posit.co`.
#' @param username,password Username and password to embed in the URL.
#' Not generally recommended but needed for some legacy applications.
#' @param port An integer port number.
#' @param path The path, e.g., `/search`. Paths must start with `/`, so this
#' will be automatically added if omitted.
#' @param query Either a query string or a named list of query components.
#' @param fragment The fragment, e.g., `#section-1`.
#' @return An object of the same type as `url`.
#' @export
#' @family URL manipulation
#' @examples
#' url_modify("http://hadley.nz", path = "about")
#' url_modify("http://hadley.nz", scheme = "https")
#' url_modify("http://hadley.nz/abc", path = "/cde")
#' url_modify("http://hadley.nz/abc", path = "")
#' url_modify("http://hadley.nz?a=1", query = "b=2")
#' url_modify("http://hadley.nz?a=1", query = list(c = 3))
url_modify <- function(url,
scheme = NULL,
hostname = NULL,
username = NULL,
password = NULL,
port = NULL,
path = NULL,
query = NULL,
fragment = NULL) {

if (!is_string(url) && !is_url(url)) {
stop_input_type(url, "a string or parsed URL")
}
string_url <- is_string(url)
if (string_url) {
url <- url_parse(url)
}

check_string(scheme, allow_null = TRUE)
check_string(hostname, allow_null = TRUE)
check_string(username, allow_null = TRUE)
check_string(password, allow_null = TRUE)
check_number_whole(port, min = 1, allow_null = TRUE)
check_string(path, allow_null = TRUE)
check_string(fragment, allow_null = TRUE)

if (is_string(query)) {
query <- query_parse(query)
} else if (is.list(query) && (is_named(query) || length(query) == 0)) {
for (nm in names(query)) {
check_query_param(query[[nm]], paste0("query$", nm))
}
} else if (!is.null(query)) {
stop_input_type(query, "a character vector, named list, or NULL")
}

new <- compact(list(
scheme = scheme,
hostname = hostname,
username = username,
password = password,
port = port,
path = path,
query = query,
fragment = fragment
))
is_empty <- map_lgl(new, identical, "")
new[is_empty] <- list(NULL)
url[names(new)] <- new

if (string_url) {
url_build(url)
} else {
url
}
}

is_url <- function(x) inherits(x, "httr2_url")
Expand Down Expand Up @@ -91,9 +161,19 @@ print.httr2_url <- function(x, ...) {
invisible(x)
}

#' Build a string from a URL object
#'
#' This is the inverse of [url_parse()], taking a parsed URL object and
#' turning it back into a string.
#'
#' @param url An URL object created by [url_parse].
#' @family URL manipulation
#' @export
#' @rdname url_parse
url_build <- function(url) {
if (!is_url(url)) {
stop_input_type(url, "a parsed URL")
}

if (!is.null(url$query)) {
query <- query_build(url$query)
} else {
Expand All @@ -119,7 +199,7 @@ url_build <- function(url) {
authority <- NULL
}

if (!is.null(url$path) && !startsWith(url$path, "/")) {
if (is.null(url$path) || !startsWith(url$path, "/")) {
url$path <- paste0("/", url$path)
}

Expand Down
5 changes: 4 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,15 @@ reference:
contents:
- starts_with("resp_")

- title: URL manipulation
contents:
- starts_with("url_")

- title: Miscellaenous helpers
contents:
- curl_translate
- secrets
- obfuscate
- url_parse

- title: OAuth
desc: >
Expand Down
21 changes: 21 additions & 0 deletions man/url_build.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 60 additions & 0 deletions man/url_modify.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 14 additions & 21 deletions man/url_parse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit e6f425e

Please sign in to comment.