Skip to content

Commit

Permalink
Switch curl for url parsing (#582)
Browse files Browse the repository at this point in the history
Fixes #577
  • Loading branch information
hadley authored Dec 20, 2024
1 parent 962d50a commit 9db8f7e
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 63 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# httr2 (development version)

* `url_parse()` now uses `curl::curl_parse_url()` which is much faster and more correct (#577).
* `req_retry()` now defaults to `max_tries = 2` with a message.
Set to `max_tries = 1` to disable retries.

Expand Down
50 changes: 13 additions & 37 deletions R/url.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,44 +26,20 @@
url_parse <- function(url) {
check_string(url)

# https://datatracker.ietf.org/doc/html/rfc3986#appendix-B
pieces <- parse_match(url, "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?")

scheme <- pieces[[2]]
authority <- pieces[[4]]
path <- pieces[[5]]
query <- pieces[[7]]
if (!is.null(query)) {
query <- query_parse(query)
}
fragment <- pieces[[9]]

# https://datatracker.ietf.org/doc/html/rfc3986#section-3.2
pieces <- parse_match(authority %||% "", "^(([^@]+)@)?([^:]+)?(:([^#]+))?")

userinfo <- pieces[[2]]
if (!is.null(userinfo)) {
userinfo <- parse_in_half(userinfo, ":")
if (userinfo$right == "") {
userinfo$right <- NULL
}
}
hostname <- pieces[[3]]
port <- pieces[[5]]

structure(
list(
scheme = scheme,
hostname = hostname,
username = userinfo$left,
password = userinfo$right,
port = port,
path = path,
query = query,
fragment = fragment
),
class = "httr2_url"
curl <- curl::curl_parse_url(url)

parsed <- list(
scheme = curl$scheme,
hostname = curl$host,
username = curl$user,
password = curl$password,
port = curl$port,
path = curl$path,
query = if (length(curl$params)) as.list(curl$params),
fragment = curl$fragment
)
class(parsed) <- "httr2_url"
parsed
}

url_modify <- function(url, ..., error_call = caller_env()) {
Expand Down
31 changes: 17 additions & 14 deletions tests/testthat/_snaps/curl.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,48 +40,48 @@
Code
curl_translate("curl http://x.com")
Output
request("http://x.com") |>
request("http://x.com/") |>
req_perform()
Code
curl_translate("curl http://x.com -X DELETE")
Output
request("http://x.com") |>
request("http://x.com/") |>
req_method("DELETE") |>
req_perform()
Code
curl_translate("curl http://x.com -H A:1")
Output
request("http://x.com") |>
request("http://x.com/") |>
req_headers(
A = "1",
) |>
req_perform()
Code
curl_translate("curl http://x.com -H 'A B:1'")
Output
request("http://x.com") |>
request("http://x.com/") |>
req_headers(
`A B` = "1",
) |>
req_perform()
Code
curl_translate("curl http://x.com -u u:p")
Output
request("http://x.com") |>
request("http://x.com/") |>
req_auth_basic("u", "p") |>
req_perform()
Code
curl_translate("curl http://x.com --verbose")
Output
request("http://x.com") |>
request("http://x.com/") |>
req_perform(verbosity = 1)

# can translate query

Code
curl_translate("curl http://x.com?string=abcde&b=2")
Output
request("http://x.com") |>
request("http://x.com/") |>
req_url_query(
string = "abcde",
b = "2",
Expand All @@ -93,14 +93,14 @@
Code
curl_translate("curl http://example.com --data abcdef")
Output
request("http://example.com") |>
request("http://example.com/") |>
req_body_raw("abcdef", "application/x-www-form-urlencoded") |>
req_perform()
Code
curl_translate(
"curl http://example.com --data abcdef -H Content-Type:text/plain")
Output
request("http://example.com") |>
request("http://example.com/") |>
req_body_raw("abcdef", "text/plain") |>
req_perform()

Expand All @@ -111,7 +111,7 @@
Message
v Copying to clipboard:
Output
request("http://example.com") |>
request("http://example.com/") |>
req_headers(
A = "1",
B = "2",
Expand All @@ -120,16 +120,19 @@
Code
clipr::read_clip()
Output
[1] "request(\"http://example.com\") |> " " req_headers("
[3] " A = \"1\"," " B = \"2\","
[5] " ) |> " " req_perform()"
[1] "request(\"http://example.com/\") |> "
[2] " req_headers("
[3] " A = \"1\","
[4] " B = \"2\","
[5] " ) |> "
[6] " req_perform()"

# encode_string2() produces simple strings

Code
curl_translate(cmd)
Output
request("http://example.com") |>
request("http://example.com/") |>
req_method("PATCH") |>
req_body_raw('{"data":{"x":1,"y":"a","nested":{"z":[1,2,3]}}}', "application/json") |>
req_perform()
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-oauth-flow-auth-code.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,12 @@ test_that("old args are deprecated", {
expect_snapshot(
redirect <- normalize_redirect_uri("http://localhost", port = 1234)
)
expect_equal(redirect$uri, "http://localhost:1234")
expect_equal(redirect$uri, "http://localhost:1234/")

expect_snapshot(
redirect <- normalize_redirect_uri("http://x.com", host_name = "y.com")
)
expect_equal(redirect$uri, "http://y.com")
expect_equal(redirect$uri, "http://y.com/")

expect_snapshot(
redirect <- normalize_redirect_uri("http://x.com", host_ip = "y.com")
Expand Down
10 changes: 0 additions & 10 deletions tests/testthat/test-url.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,11 @@
test_that("can parse special cases", {
url <- url_parse("//google.com")
expect_equal(url$scheme, NULL)
expect_equal(url$hostname, "google.com")

url <- url_parse("file:///tmp")
expect_equal(url$scheme, "file")
expect_equal(url$path, "/tmp")

url <- url_parse("/")
expect_equal(url$scheme, NULL)
expect_equal(url$path, "/")
})

test_that("can round trip urls", {
urls <- list(
"/",
"//google.com",
"file:///",
"http://google.com/",
"http://google.com/path",
Expand Down

0 comments on commit 9db8f7e

Please sign in to comment.