Skip to content

Commit

Permalink
Gracefully handle invalid URIs (#1414)
Browse files Browse the repository at this point in the history
With the upgrade to `reqwest` 0.12, we can finally handle a long-standing
issue, when Urls could not be parsed to Uris. Previously, we would panic, but
we can now handle that situation gracefully and return an error instead.

I've also renamed `Status::is_failure` to `Status::is_error`, because the
notion of failures no longer exists in the codebase and we use the term "error"
consistently throughout the codebase instead. This is technically a breaking
change in the API, but it's fine since we have not released a stable version
yet.

More information about the URI parsing issue:
- #539
- seanmonstar/reqwest#668
  • Loading branch information
mre authored Apr 25, 2024
1 parent 8451f88 commit fc85695
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 21 deletions.
43 changes: 35 additions & 8 deletions lychee-bin/src/commands/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use reqwest::Url;
use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream;

use lychee_lib::{Client, Request, Response};
use lychee_lib::{Client, ErrorKind, Request, Response};
use lychee_lib::{InputSource, Result};
use lychee_lib::{ResponseBody, Status};

Expand Down Expand Up @@ -225,6 +225,25 @@ async fn request_channel_task(
.await;
}

/// Check a URL and return a response.
///
/// # Errors
///
/// This can fail when the URL could not be parsed to a URI.
async fn check_url(client: &Client, request: Request) -> Response {
// Request was not cached; run a normal check
let uri = request.uri.clone();
let source = request.source.clone();
client.check(request).await.unwrap_or_else(|e| {
log::error!("Error checking URL {}: Cannot parse URL to URI: {}", uri, e);
Response::new(
uri.clone(),
Status::Error(ErrorKind::InvalidURI(uri.clone())),
source,
)
})
}

/// Handle a single request
async fn handle(
client: &Client,
Expand All @@ -250,12 +269,7 @@ async fn handle(
}

// Request was not cached; run a normal check
//
// This can panic when the Url could not be parsed to a Uri.
// See https://github.com/servo/rust-url/issues/554
// See https://github.com/seanmonstar/reqwest/issues/668
// TODO: Handle error as soon as https://github.com/seanmonstar/reqwest/pull/1399 got merged
let response = client.check(request).await.expect("cannot check URI");
let response = check_url(client, request).await;

// - Never cache filesystem access as it is fast already so caching has no
// benefit.
Expand Down Expand Up @@ -318,7 +332,7 @@ fn get_failed_urls(stats: &mut ResponseStats) -> Vec<(InputSource, Url)> {
mod tests {
use log::info;

use lychee_lib::{CacheStatus, InputSource, ResponseBody, Uri};
use lychee_lib::{CacheStatus, ClientBuilder, InputSource, ResponseBody, Uri};

use crate::formatters;

Expand Down Expand Up @@ -367,4 +381,17 @@ mod tests {
let buf = String::from_utf8_lossy(&buf);
assert_eq!(buf, "↻ [200] http://127.0.0.1/ | Cached: OK (cached)\n");
}

#[tokio::test]
async fn test_invalid_url() {
// Run a normal request with an invalid Url
let client = ClientBuilder::builder().build().client().unwrap();
let request = Request::try_from("http://\"").unwrap();
let response = check_url(&client, request).await;
assert!(response.status().is_error());
assert!(matches!(
response.status(),
Status::Error(ErrorKind::InvalidURI(_))
));
}
}
2 changes: 1 addition & 1 deletion lychee-bin/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ impl ResponseStats {
self.increment_status_counters(status);

match status {
_ if status.is_failure() => {
_ if status.is_error() => {
let fail = self.fail_map.entry(source).or_default();
fail.insert(response.1);
}
Expand Down
22 changes: 11 additions & 11 deletions lychee-lib/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -761,13 +761,13 @@ mod tests {
let mock_server = mock_server!(StatusCode::NOT_FOUND);
let res = get_mock_client_response(mock_server.uri()).await;

assert!(res.status().is_failure());
assert!(res.status().is_error());
}

#[tokio::test]
async fn test_nonexistent_with_path() {
let res = get_mock_client_response("http://127.0.0.1/invalid").await;
assert!(res.status().is_failure());
assert!(res.status().is_error());
}

#[tokio::test]
Expand All @@ -779,7 +779,7 @@ mod tests {
#[tokio::test]
async fn test_github_nonexistent_repo() {
let res = get_mock_client_response("https://github.com/lycheeverse/not-lychee").await;
assert!(res.status().is_failure());
assert!(res.status().is_error());
}

#[tokio::test]
Expand All @@ -788,7 +788,7 @@ mod tests {
"https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md",
)
.await;
assert!(res.status().is_failure());
assert!(res.status().is_error());
}

#[tokio::test]
Expand All @@ -798,7 +798,7 @@ mod tests {
assert!(res.status().is_success());

let res = get_mock_client_response("https://www.youtube.com/watch?v=invalidNlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
assert!(res.status().is_failure());
assert!(res.status().is_error());
}

#[tokio::test]
Expand Down Expand Up @@ -831,7 +831,7 @@ mod tests {
async fn test_invalid_ssl() {
let res = get_mock_client_response("https://expired.badssl.com/").await;

assert!(res.status().is_failure());
assert!(res.status().is_error());

// Same, but ignore certificate error
let res = ClientBuilder::builder()
Expand Down Expand Up @@ -920,7 +920,7 @@ mod tests {
.client()
.unwrap();
let res = client.check("http://example.com").await.unwrap();
assert!(res.status().is_failure());
assert!(res.status().is_error());
}

#[tokio::test]
Expand Down Expand Up @@ -984,7 +984,7 @@ mod tests {
let res = client.check(mock_server.uri()).await.unwrap();
let end = start.elapsed();

assert!(res.status().is_failure());
assert!(res.status().is_error());

// on slow connections, this might take a bit longer than nominal
// backed-off timeout (7 secs)
Expand All @@ -996,7 +996,7 @@ mod tests {
let client = ClientBuilder::builder().build().client().unwrap();
// This request will fail, but it won't panic
let res = client.check("http://\"").await.unwrap();
assert!(res.status().is_failure());
assert!(res.status().is_error());
}

#[tokio::test]
Expand Down Expand Up @@ -1029,7 +1029,7 @@ mod tests {
.unwrap();

let res = client.check(redirect_uri.clone()).await.unwrap();
assert!(res.status().is_failure());
assert!(res.status().is_error());

let client = ClientBuilder::builder()
.max_redirects(1_usize)
Expand Down Expand Up @@ -1060,7 +1060,7 @@ mod tests {
.unwrap();

let res = client.check(mock_server.uri()).await.unwrap();
assert!(res.status().is_failure());
assert!(res.status().is_error());
}

#[tokio::test]
Expand Down
2 changes: 1 addition & 1 deletion lychee-lib/src/types/status.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ impl Status {
#[inline]
#[must_use]
/// Returns `true` if the check was not successful
pub const fn is_failure(&self) -> bool {
pub const fn is_error(&self) -> bool {
matches!(
self,
Status::Error(_) | Status::Cached(CacheStatus::Error(_)) | Status::Timeout(_)
Expand Down

0 comments on commit fc85695

Please sign in to comment.