Skip to content

Commit

Permalink
Improve url manipulation tooling
Browse files Browse the repository at this point in the history
* Check inputs and export `url_modify()`. Fixes #464.
* Check inputs to `url_build()`. Fixes #449.
  • Loading branch information
hadley committed Dec 23, 2024
1 parent 9db8f7e commit 1cbbf96
Show file tree
Hide file tree
Showing 9 changed files with 304 additions and 53 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ export(secret_write_rds)
export(signal_total_pages)
export(throttle_status)
export(url_build)
export(url_modify)
export(url_parse)
export(with_mock)
export(with_mocked_responses)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# httr2 (development version)

* New `url_modify()` makes it easier to modify an existing url (#464).
* `url_parse()` now uses `curl::curl_parse_url()` which is much faster and more correct (#577).
* `req_retry()` now defaults to `max_tries = 2` with a message.
Set to `max_tries = 1` to disable retries.
Expand Down
126 changes: 103 additions & 23 deletions R/url.R
Original file line number Diff line number Diff line change
@@ -1,28 +1,20 @@
#' Parse and build URLs
#' Parse a URL
#'
#' `url_parse()` parses a URL into its component pieces; `url_build()` does
#' the reverse, converting a list of pieces into a string URL. See `r rfc(3986)`
#' for the details of the parsing algorithm.
#' `url_parse()` parses a URL into its component pieces, powered by
#' [curl::curl_parse_url()]. See `r rfc(3986)` for the details of the
#' parsing algorithm.
#'
#' @param url For `url_parse()` a string to parse into a URL;
#' for `url_build()` a URL to turn back into a string.
#' @returns
#' * `url_build()` returns a string.
#' * `url_parse()` returns a URL: a S3 list with class `httr2_url`
#' and elements `scheme`, `hostname`, `port`, `path`, `fragment`, `query`,
#' `username`, `password`.
#' @param url A string to parse.
#' @returns A URL, i.e. a S3 object with class `httr2_url` and elements
#' `scheme`, `hostname`, `username`, `password`, `port`, `path`, `query`, and
#' `fragment`.
#' @export
#' @family URL manipulation
#' @examples
#' url_parse("http://google.com/")
#' url_parse("http://google.com:80/")
#' url_parse("http://google.com:80/?a=1&b=2")
#' url_parse("http://username@google.com:80/path;test?a=1&b=2#40")
#'
#' url <- url_parse("http://google.com/")
#' url$port <- 80
#' url$hostname <- "example.com"
#' url$query <- list(a = 1, b = 2, c = 3)
#' url_build(url)
url_parse <- function(url) {
check_string(url)

Expand All @@ -42,10 +34,88 @@ url_parse <- function(url) {
parsed
}

url_modify <- function(url, ..., error_call = caller_env()) {
url <- url_parse(url)
url <- modify_list(url, ..., error_call = error_call)
url_build(url)
#' Modify a url
#'
#' Modify components of a URL. The default value of each argument, `NULL`,
#' means leave the component as is. If you want to remove a component,
#' set it to `""`. Note that setting `scheme` or `hostname` to `""` will
#' create a relative url.
#'
#' @param url A string or [parsed URL](url_parse).
#' @param scheme The scheme, typically either `http` or `https`.
#' @param hostname The hostname, e.g. `www.google.com` or `posit.co`.
#' @param username,password Username and password to embed in the URL.
#' Not generally recommended but needed for some legacy applications.
#' @param port An integer port number.
#' @param path The path, e.g. `/search`. Paths must start with `/`, so this
#' will be automatically added if ommitted.
#' @param query Either a query string or a named list of query components.
#' @param fragment The fragment, e.g. `#section-1`.
#' @return An object the same type as `url``.
#' @export
#' @family URL manipulation
#' @examples
#' url_modify("http://hadley.nz", path = "about")
#' url_modify("http://hadley.nz", scheme = "https")
#' url_modify("http://hadley.nz/abc", path = "/cde")
#' url_modify("http://hadley.nz/abc", path = "")
#' url_modify("http://hadley.nz?a=1", query = "b=2")
#' url_modify("http://hadley.nz?a=1", query = list(c = 3))
url_modify <- function(url,
scheme = NULL,
hostname = NULL,
username = NULL,
password = NULL,
port = NULL,
path = NULL,
query = NULL,
fragment = NULL) {

if (!is_string(url) && !is_url(url)) {
stop_input_type(url, "a string or parsed URL")
}
string_url <- is_string(url)
if (string_url) {
url <- url_parse(url)
}

check_string(scheme, allow_null = TRUE)
check_string(hostname, allow_null = TRUE)
check_string(username, allow_null = TRUE)
check_string(password, allow_null = TRUE)
check_number_whole(port, min = 1, allow_null = TRUE)
check_string(path, allow_null = TRUE)
check_string(fragment, allow_null = TRUE)

if (is_string(query)) {
query <- query_parse(query)

Check warning on line 91 in R/url.R

View check run for this annotation

Codecov / codecov/patch

R/url.R#L91

Added line #L91 was not covered by tests
} else if (is.list(query) && (is_named(query) || length(query) == 0)) {
for (nm in names(query)) {
check_query_param(query[[nm]], paste0("query$", nm))
}
} else if (!is.null(query)) {
stop_input_type(query, "a character vector, named list, or NULL")
}

new <- compact(list(
scheme = scheme,
hostname = hostname,
username = username,
password = password,
port = port,
path = path,
query = query,
fragment = fragment
))
is_empty <- map_lgl(new, identical, "")
new[is_empty] <- list(NULL)
url[names(new)] <- new

if (string_url) {
url_build(url)
} else {
url
}
}

is_url <- function(x) inherits(x, "httr2_url")
Expand Down Expand Up @@ -85,9 +155,19 @@ print.httr2_url <- function(x, ...) {
invisible(x)
}

#' Build a string from a URL object
#'
#' This is the converse of [url_parse], taking a parsed URL object and
#' turning it back into a string.
#'
#' @param url An URL object created by [url_parse].
#' @family URL manipulation
#' @export
#' @rdname url_parse
url_build <- function(url) {
if (!is_url(url)) {
stop_input_type(url, "a parsed URL")

Check warning on line 168 in R/url.R

View check run for this annotation

Codecov / codecov/patch

R/url.R#L168

Added line #L168 was not covered by tests
}

if (!is.null(url$query)) {
query <- query_build(url$query)
} else {
Expand All @@ -113,7 +193,7 @@ url_build <- function(url) {
authority <- NULL
}

if (!is.null(url$path) && !startsWith(url$path, "/")) {
if (is.null(url$path) || !startsWith(url$path, "/")) {
url$path <- paste0("/", url$path)
}

Expand Down
5 changes: 4 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,15 @@ reference:
contents:
- starts_with("resp_")

- title: URL manipulation
contents:
- starts_with("url_")

- title: Miscellaenous helpers
contents:
- curl_translate
- secrets
- obfuscate
- url_parse

- title: OAuth
desc: >
Expand Down
21 changes: 21 additions & 0 deletions man/url_build.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

60 changes: 60 additions & 0 deletions man/url_modify.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 14 additions & 21 deletions man/url_parse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

61 changes: 61 additions & 0 deletions tests/testthat/_snaps/url.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,67 @@
Error in `url_build()`:
! Cannot set url `password` without `username`.

# url_modify checks its inputs

Code
url_modify(1)
Condition
Error in `url_modify()`:
! `url` must be a string or parsed URL, not the number 1.
Code
url_modify(url, scheme = 1)
Condition
Error in `url_modify()`:
! `scheme` must be a single string or `NULL`, not the number 1.
Code
url_modify(url, hostname = 1)
Condition
Error in `url_modify()`:
! `hostname` must be a single string or `NULL`, not the number 1.
Code
url_modify(url, port = "x")
Condition
Error in `url_modify()`:
! `port` must be a whole number or `NULL`, not the string "x".
Code
url_modify(url, username = 1)
Condition
Error in `url_modify()`:
! `username` must be a single string or `NULL`, not the number 1.
Code
url_modify(url, password = 1)
Condition
Error in `url_modify()`:
! `password` must be a single string or `NULL`, not the number 1.
Code
url_modify(url, path = 1)
Condition
Error in `url_modify()`:
! `path` must be a single string or `NULL`, not the number 1.
Code
url_modify(url, fragment = 1)
Condition
Error in `url_modify()`:
! `fragment` must be a single string or `NULL`, not the number 1.

# checks various query formats

Code
url_modify(url, query = 1)
Condition
Error in `url_modify()`:
! `query` must be a character vector, named list, or NULL, not the number 1.
Code
url_modify(url, query = list(1))
Condition
Error in `url_modify()`:
! `query` must be a character vector, named list, or NULL, not a list.
Code
url_modify(url, query = list(x = 1:2))
Condition
Error in `url_modify()`:
! Query value `query$x` must be a length-1 atomic vector, not an integer vector.

# validates inputs

Code
Expand Down
Loading

0 comments on commit 1cbbf96

Please sign in to comment.