Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more information to HTTP errors to help with debugging. #11878

Merged
merged 4 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 25 additions & 7 deletions src/cargo/core/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use crate::core::{Dependency, Manifest, PackageId, SourceId, Target};
use crate::core::{SourceMap, Summary, Workspace};
use crate::ops;
use crate::util::config::PackageCacheLock;
use crate::util::errors::{CargoResult, HttpNotSuccessful};
use crate::util::errors::{CargoResult, HttpNotSuccessful, DEBUG_HEADERS};
use crate::util::interning::InternedString;
use crate::util::network::retry::{Retry, RetryResult};
use crate::util::network::sleep::SleepTracker;
Expand Down Expand Up @@ -379,6 +379,9 @@ struct Download<'cfg> {
/// Actual downloaded data, updated throughout the lifetime of this download.
data: RefCell<Vec<u8>>,

/// HTTP headers for debugging.
headers: RefCell<Vec<String>>,

/// The URL that we're downloading from, cached here for error messages and
/// reenqueuing.
url: String,
Expand Down Expand Up @@ -762,6 +765,19 @@ impl<'a, 'cfg> Downloads<'a, 'cfg> {
});
Ok(buf.len())
})?;
handle.header_function(move |data| {
tls::with(|downloads| {
if let Some(downloads) = downloads {
// Headers contain trailing \r\n, trim them to make it easier
// to work with.
let h = String::from_utf8_lossy(data).trim().to_string();
if DEBUG_HEADERS.iter().any(|p| h.starts_with(p)) {
downloads.pending[&token].0.headers.borrow_mut().push(h);
}
}
});
true
})?;

handle.progress(true)?;
handle.progress_function(move |dl_total, dl_cur, _, _| {
Expand All @@ -787,6 +803,7 @@ impl<'a, 'cfg> Downloads<'a, 'cfg> {
let dl = Download {
token,
data: RefCell::new(Vec::new()),
headers: RefCell::new(Vec::new()),
id,
url,
descriptor,
Expand Down Expand Up @@ -826,6 +843,7 @@ impl<'a, 'cfg> Downloads<'a, 'cfg> {
.remove(&token)
.expect("got a token for a non-in-progress transfer");
let data = mem::take(&mut *dl.data.borrow_mut());
let headers = mem::take(&mut *dl.headers.borrow_mut());
let mut handle = self.set.multi.remove(handle)?;
self.pending_ids.remove(&dl.id);

Expand Down Expand Up @@ -862,12 +880,12 @@ impl<'a, 'cfg> Downloads<'a, 'cfg> {

let code = handle.response_code()?;
if code != 200 && code != 0 {
let url = handle.effective_url()?.unwrap_or(url);
return Err(HttpNotSuccessful {
code,
url: url.to_string(),
body: data,
}
return Err(HttpNotSuccessful::new_from_handle(
&mut handle,
&url,
data,
headers,
)
.into());
}
Ok(data)
Expand Down
25 changes: 16 additions & 9 deletions src/cargo/sources/registry/http_remote.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::ops::{self};
use crate::sources::registry::download;
use crate::sources::registry::MaybeLock;
use crate::sources::registry::{LoadResponse, RegistryConfig, RegistryData};
use crate::util::errors::{CargoResult, HttpNotSuccessful};
use crate::util::errors::{CargoResult, HttpNotSuccessful, DEBUG_HEADERS};
use crate::util::network::retry::{Retry, RetryResult};
use crate::util::network::sleep::SleepTracker;
use crate::util::{auth, Config, Filesystem, IntoUrl, Progress, ProgressStyle};
Expand Down Expand Up @@ -142,6 +142,7 @@ struct Headers {
last_modified: Option<String>,
etag: Option<String>,
www_authenticate: Vec<String>,
others: Vec<String>,
}

enum StatusCode {
Expand Down Expand Up @@ -287,13 +288,13 @@ impl<'cfg> HttpRegistry<'cfg> {
304 => StatusCode::NotModified,
401 => StatusCode::Unauthorized,
404 | 410 | 451 => StatusCode::NotFound,
code => {
let url = handle.effective_url()?.unwrap_or(&url);
return Err(HttpNotSuccessful {
code,
url: url.to_owned(),
body: data,
}
_ => {
return Err(HttpNotSuccessful::new_from_handle(
&mut handle,
&url,
data,
download.header_map.take().others,
)
.into());
}
};
Expand Down Expand Up @@ -546,6 +547,8 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> {
code: 401,
body: result.data,
url: self.full_url(path),
ip: None,
headers: result.header_map.others,
}
.into());
if self.auth_required {
Expand Down Expand Up @@ -665,7 +668,11 @@ impl<'cfg> RegistryData for HttpRegistry<'cfg> {
LAST_MODIFIED => header_map.last_modified = Some(value.to_string()),
ETAG => header_map.etag = Some(value.to_string()),
WWW_AUTHENTICATE => header_map.www_authenticate.push(value.to_string()),
_ => {}
_ => {
if DEBUG_HEADERS.iter().any(|prefix| tag.starts_with(prefix)) {
header_map.others.push(format!("{tag}: {value}"));
}
}
}
}
});
Expand Down
81 changes: 75 additions & 6 deletions src/cargo/util/errors.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,100 @@
#![allow(unknown_lints)]

use anyhow::Error;
use std::fmt;
use curl::easy::Easy;
use std::fmt::{self, Write};
use std::path::PathBuf;

use super::truncate_with_ellipsis;

pub type CargoResult<T> = anyhow::Result<T>;

/// These are headers that are included in error messages to help with
/// diagnosing issues.
pub const DEBUG_HEADERS: &[&str] = &[
// This is the unique ID that identifies the request in CloudFront which
// can be used for looking at the AWS logs.
"x-amz-cf-id",
// This is the CloudFront POP (Point of Presence) that identifies the
// region where the request was routed. This can help identify if an issue
// is region-specific.
"x-amz-cf-pop",
// The unique token used for troubleshooting S3 requests via AWS logs or support.
"x-amz-request-id",
// Another token used in conjunction with x-amz-request-id.
"x-amz-id-2",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel a little nervous about special casing aws in perpetuity for all registries. We need these headers because at the moment aws is used for serving crates.io. But we may switch providers for crates.io and other registries already use their own stack. I wish there was a more flexible/future proof solution, but only if it doesn't add too much complexity. And I don't know what that looks like.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kinda view this as a "best effort" approach where it provides diagnostics for the things we know could be useful now. I don't know what other headers would be useful, and I'm reluctant to show all of them because most headers just add a bunch of noise.

If someone wants to debug a reproducible issue, then they can use HTTP debugging to view all of the headers.

I realize this doesn't cover all cases. My balance here tips towards keeping the error messages a little cleaner as opposed to being exhaustive.

That being said, I'd be happy to go with the opposite approach of removing certain headers and showing the rest. That will be a bit more verbose than this approach, but still not as bad as showing all of them.

BTW, this also includes some fastly headers as well as aws.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we're going to add this, we may want to also include via (for more proxies) and www-authenticate (for authentication issues). Note that sparse registries currently do read www-authenticate, but it's not displayed in errors.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How much of this information is also in the request body? The issue mentioned in Zulip appeared to have a good amount.

  <?xml version="1.0" encoding="UTF-8"?><Error><Code>SlowDown</Code><Message>Please reduce your request rate.</Message><RequestId>HJT1QW870MWXTRJQ</RequestId><HostId>Fa1X04OlNaOF9iIoX5FB3NRlJhqQKjq2AbfZIDjGUw51zbRndr6f8mAMGtkExpsLbJZy2aazpDs=</HostId></Error>

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The request ID is in the body of that particular message. However, that's the first time I've ever seen an error message with it, so I'm not sure in which circumstances it actually shows up.

One issue is that we don't have direct access to the logs (which are currently not enabled), so having just a request ID would be a slow process of working with infra and AWS.

Knowing the POP and IP can allow us to try to diagnose and debug an issue. For example, in #8647, I was able to update my HOSTS file to the Mumbai pop and reproduce the issue.

Knowing other information, like cache hit status, can also help identify patterns of issues to further diagnose potential problems.

Also, I'm not sure what information other services like fastly will display.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@weihanglo Did you have any opinion on this or any other feedback on this PR? Although it is not critical, I think it might be nice to have something like this in 1.70.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we document why some headers are included here? It is not a blocker though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great idea! I also added a header I missed.

And FWIW, I think the idea of logging all headers (minus the uninteresting ones) is also a good suggestion. I'm just concerned the error message could get quite large. I'm happy to go that route if others feel like it would be preferable.

// Whether or not there was a cache hit or miss (both CloudFront and Fastly).
"x-cache",
// The cache server that processed the request (Fastly).
"x-served-by",
];

#[derive(Debug)]
pub struct HttpNotSuccessful {
pub code: u32,
pub url: String,
pub ip: Option<String>,
pub body: Vec<u8>,
pub headers: Vec<String>,
}

impl fmt::Display for HttpNotSuccessful {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl HttpNotSuccessful {
pub fn new_from_handle(
handle: &mut Easy,
initial_url: &str,
body: Vec<u8>,
headers: Vec<String>,
) -> HttpNotSuccessful {
let ip = handle.primary_ip().ok().flatten().map(|s| s.to_string());
let url = handle
.effective_url()
.ok()
.flatten()
.unwrap_or(initial_url)
.to_string();
HttpNotSuccessful {
code: handle.response_code().unwrap_or(0),
url,
ip,
body,
headers,
}
}

/// Renders the error in a compact form.
pub fn display_short(&self) -> String {
self.render(false)
}

fn render(&self, show_headers: bool) -> String {
let mut result = String::new();
let body = std::str::from_utf8(&self.body)
.map(|s| truncate_with_ellipsis(s, 512))
.unwrap_or_else(|_| format!("[{} non-utf8 bytes]", self.body.len()));

write!(
f,
"failed to get successful HTTP response from `{}`, got {}\nbody:\n{body}",
self.url, self.code
result,
"failed to get successful HTTP response from `{}`",
self.url
)
.unwrap();
if let Some(ip) = &self.ip {
write!(result, " ({ip})").unwrap();
}
write!(result, ", got {}\n", self.code).unwrap();
if show_headers {
if !self.headers.is_empty() {
write!(result, "debug headers:\n{}\n", self.headers.join("\n")).unwrap();
}
}
write!(result, "body:\n{body}").unwrap();
result
}
}

impl fmt::Display for HttpNotSuccessful {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.render(true))
}
}

Expand Down
17 changes: 15 additions & 2 deletions src/cargo/util/network/retry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,13 @@ impl<'a> Retry<'a> {
pub fn r#try<T>(&mut self, f: impl FnOnce() -> CargoResult<T>) -> RetryResult<T> {
match f() {
Err(ref e) if maybe_spurious(e) && self.retries < self.max_retries => {
let err_msg = e
.downcast_ref::<HttpNotSuccessful>()
.map(|http_err| http_err.display_short())
.unwrap_or_else(|| e.root_cause().to_string());
let msg = format!(
"spurious network error ({} tries remaining): {}",
"spurious network error ({} tries remaining): {err_msg}",
self.max_retries - self.retries,
e.root_cause(),
);
if let Err(e) = self.config.shell().warn(msg) {
return RetryResult::Err(e);
Expand Down Expand Up @@ -150,13 +153,17 @@ fn with_retry_repeats_the_call_then_works() {
let error1 = HttpNotSuccessful {
code: 501,
url: "Uri".to_string(),
ip: None,
body: Vec::new(),
headers: Vec::new(),
}
.into();
let error2 = HttpNotSuccessful {
code: 502,
url: "Uri".to_string(),
ip: None,
body: Vec::new(),
headers: Vec::new(),
}
.into();
let mut results: Vec<CargoResult<()>> = vec![Ok(()), Err(error1), Err(error2)];
Expand All @@ -175,13 +182,17 @@ fn with_retry_finds_nested_spurious_errors() {
let error1 = anyhow::Error::from(HttpNotSuccessful {
code: 501,
url: "Uri".to_string(),
ip: None,
body: Vec::new(),
headers: Vec::new(),
});
let error1 = anyhow::Error::from(error1.context("A non-spurious wrapping err"));
let error2 = anyhow::Error::from(HttpNotSuccessful {
code: 502,
url: "Uri".to_string(),
ip: None,
body: Vec::new(),
headers: Vec::new(),
});
let error2 = anyhow::Error::from(error2.context("A second chained error"));
let mut results: Vec<CargoResult<()>> = vec![Ok(()), Err(error1), Err(error2)];
Expand All @@ -199,7 +210,9 @@ fn default_retry_schedule() {
Err(anyhow::Error::from(HttpNotSuccessful {
code: 500,
url: "Uri".to_string(),
ip: None,
body: Vec::new(),
headers: Vec::new(),
}))
};
let config = Config::default().unwrap();
Expand Down
Loading