Skip to content

Commit

Permalink
server: return better HTTP errors when not ensured (#649)
Browse files Browse the repository at this point in the history
Currently, if the `propolis-server` APIs that require a running instance
are called when the server has not received an `InstanceEnsure` request,
we return a 404. Sled-agent and Nexus handle this status code by
assuming *they* have done something wrong, such as attempting to call an
API route that the server doesn't support. This results in issues where
sled-agent assumes that it's the one that made a mistake when it tries
to call APIs on a `propolis-server` that has lost its VM instance due to
being restarted after a panic.

To make it easier for callers distinguish between errors that indicate
"this server doesn't know about a running instance" and errors that
indicate "you attempted to call an API route that I don't know about",
I've changed the status code returned by `propolis-server`s that haven't
created an instance to HTTP [424 Failed Dependency]. The best thing
about returning this status code is that it's "not 404", but it does
arguably have the most correct semantics for this specific error:

> The HTTP 424 Failed Dependency client error response code indicates
> that the method could not be performed on the resource because the
> requested action depended on another action, and that action failed.

On the other hand, this is kind of a weird status code that's mostly
used by WebDAV servers, so I'm open to using something else. We could
alternatively return 410 Gone in these cases, but that status is already
returned by servers whose VM instance has been deleted. I thought it
might be nice to be able to distinguish between "not ensured" and
"ensured, and then deleted"...but we could switch to 410 Gone here, too.

In addition, I've added an `enum` in `propolis-api-types` that's used to
generate Dropshot "error code" strings in HTTP error responses. This can
be used by clients to programmatically inspect the error's cause.

[424 Failed Dependency]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/424
  • Loading branch information
hawkw authored Apr 10, 2024
1 parent 8ff3ab6 commit 14b0892
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 36 deletions.
62 changes: 30 additions & 32 deletions bin/propolis-server/src/lib/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,12 +296,7 @@ impl DropshotEndpointContext {
self.services.vm.lock().await,
VmControllerState::as_controller,
)
.map_err(|_| {
HttpError::for_not_found(
None,
"Server not initialized (no instance)".to_string(),
)
})
.map_err(|_| not_created_error())
}
}

Expand Down Expand Up @@ -576,19 +571,21 @@ async fn instance_ensure_common(
{
let existing_properties = existing.properties();
if existing_properties.id != properties.id {
return Err(HttpError::for_status(
Some(format!(
return Err(HttpError::for_client_error(
Some(api::ErrorCode::AlreadyInitialized.to_string()),
http::status::StatusCode::CONFLICT,
format!(
"Server already initialized with ID {}",
existing_properties.id
)),
http::status::StatusCode::CONFLICT,
),
));
}

if *existing_properties != properties {
return Err(HttpError::for_status(
Some("Cannot update running server".to_string()),
return Err(HttpError::for_client_error(
Some(api::ErrorCode::AlreadyRunning.to_string()),
http::status::StatusCode::CONFLICT,
"Cannot update running server".to_string(),
));
}

Expand Down Expand Up @@ -658,10 +655,11 @@ async fn instance_ensure_common(
vm_hdl.await.unwrap()
}
.map_err(|e| {
HttpError::for_internal_error(format!(
"failed to create instance: {}",
e
))
HttpError::for_client_error(
Some(api::ErrorCode::CreateFailed.to_string()),
http::status::StatusCode::INTERNAL_SERVER_ERROR,
format!("failed to create instance: {e}"),
)
})?;

if let Some(ramfb) = vm.framebuffer() {
Expand Down Expand Up @@ -785,10 +783,7 @@ async fn instance_get_common(
) -> Result<(api::Instance, VersionedInstanceSpec), HttpError> {
let ctx = rqctx.context();
match &*ctx.services.vm.lock().await {
VmControllerState::NotCreated => Err(HttpError::for_not_found(
None,
"Server not initialized (no instance)".to_string(),
)),
VmControllerState::NotCreated => Err(not_created_error()),
VmControllerState::Created(vm) => {
Ok((
api::Instance {
Expand Down Expand Up @@ -862,10 +857,7 @@ async fn instance_state_monitor(
let vm_state = ctx.services.vm.lock().await;
match &*vm_state {
VmControllerState::NotCreated => {
return Err(HttpError::for_not_found(
None,
"Server not initialized (no instance)".to_string(),
));
return Err(not_created_error());
}
VmControllerState::Created(vm) => vm.state_watcher().clone(),
VmControllerState::Destroyed { state_watcher, .. } => {
Expand All @@ -885,12 +877,13 @@ async fn instance_state_monitor(
// means it will never reach the number the client wants it to reach.
// Inform the client of this condition so it doesn't wait forever.
state_watcher.changed().await.map_err(|_| {
HttpError::for_status(
Some(format!(
HttpError::for_client_error(
Some(api::ErrorCode::NoInstance.to_string()),
http::status::StatusCode::GONE,
format!(
"No instance present; will never reach generation {}",
gen
)),
http::status::StatusCode::GONE,
),
)
})?;
}
Expand Down Expand Up @@ -1046,10 +1039,7 @@ async fn instance_migrate_status(
let migration_id = path_params.into_inner().migration_id;
let ctx = rqctx.context();
match &*ctx.services.vm.lock().await {
VmControllerState::NotCreated => Err(HttpError::for_not_found(
None,
"Server not initialized (no instance)".to_string(),
)),
VmControllerState::NotCreated => Err(not_created_error()),
VmControllerState::Created(vm) => {
vm.migrate_status(migration_id).map_err(Into::into).map(|state| {
HttpResponseOk(api::InstanceMigrateStatusResponse {
Expand Down Expand Up @@ -1206,6 +1196,14 @@ pub fn api() -> ApiDescription<Arc<DropshotEndpointContext>> {
api
}

fn not_created_error() -> HttpError {
HttpError::for_client_error(
Some(api::ErrorCode::NoInstance.to_string()),
http::StatusCode::FAILED_DEPENDENCY,
"Server not initialized (no instance)".to_string(),
)
}

#[cfg(test)]
mod tests {
#[test]
Expand Down
46 changes: 45 additions & 1 deletion crates/propolis-api-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

//! Definitions for types exposed by the propolis-server API

use std::net::SocketAddr;
use std::{fmt, net::SocketAddr};

use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -395,3 +395,47 @@ pub struct SnapshotRequestPathParams {
pub struct VCRRequestPathParams {
pub id: Uuid,
}

/// Error codes used to populate the `error_code` field of Dropshot API responses.
#[derive(
Clone, Copy, Debug, Deserialize, PartialEq, Eq, Serialize, JsonSchema,
)]
pub enum ErrorCode {
/// This `propolis-server` process has not received an `InstanceEnsure`
/// request yet.
NoInstance,
/// This `propolis-server` process has already received an `InstanceEnsure`
/// request with a different ID.
AlreadyInitialized,
/// Cannot update a running server.
AlreadyRunning,
/// Instance creation failed
CreateFailed,
}

impl fmt::Display for ErrorCode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(self, f)
}
}

impl std::str::FromStr for ErrorCode {
type Err = &'static str;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.trim() {
s if s.eq_ignore_ascii_case("NoInstance") => Ok(Self::NoInstance),
s if s.eq_ignore_ascii_case("AlreadyInitialized") => {
Ok(ErrorCode::AlreadyInitialized)
}
s if s.eq_ignore_ascii_case("AlreadyRunning") => {
Ok(ErrorCode::AlreadyRunning)
}
s if s.eq_ignore_ascii_case("CreateFailed") => {
Ok(ErrorCode::CreateFailed)
}
_ => Err("unknown error code, expected one of: \
'NoInstance', 'AlreadyInitialized', 'AlreadyRunning', \
'CreateFailed'"),
}
}
}
6 changes: 3 additions & 3 deletions phd-tests/tests/src/server_state_machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,15 @@ async fn instance_stop_causes_destroy_test(ctx: &Framework) {

assert!(matches!(
vm.run().await.unwrap_err().status().unwrap(),
http::status::StatusCode::NOT_FOUND
http::status::StatusCode::FAILED_DEPENDENCY
));
assert!(matches!(
vm.stop().await.unwrap_err().status().unwrap(),
http::status::StatusCode::NOT_FOUND
http::status::StatusCode::FAILED_DEPENDENCY
));
assert!(matches!(
vm.reset().await.unwrap_err().status().unwrap(),
http::status::StatusCode::NOT_FOUND
http::status::StatusCode::FAILED_DEPENDENCY
));
}

Expand Down

0 comments on commit 14b0892

Please sign in to comment.