Skip to content

Commit

Permalink
Initial support for migrating VM to a new propolis instance. (#447)
Browse files Browse the repository at this point in the history
Building on the live migration work in propolis (oxidecomputer/propolis#69), this adds the nexus endpoint to trigger a migration of an instance.
  • Loading branch information
luqmana authored Jan 25, 2022
1 parent a75149c commit 4d12a4d
Show file tree
Hide file tree
Showing 30 changed files with 930 additions and 136 deletions.
63 changes: 52 additions & 11 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,17 @@ panic = "abort"
panic = "abort"

#
# It's common during development to use a local copy of dropshot or steno in the
# parent directory. If you want to use those, uncomment one of these blocks.
# It's common during development to use a local copy of dropshot, propolis
# or steno in the parent directory. If you want to use those, uncomment
# one of these blocks.
#
#[patch."https://github.com/oxidecomputer/dropshot"]
#dropshot = { path = "../dropshot/dropshot" }
#[patch."https://github.com/oxidecomputer/steno"]
#steno = { path = "../steno" }
#[patch."https://github.com/oxidecomputer/propolis"]
#propolis-client = { path = "../propolis/client" }
#propolis-server = { path = "../propolis/server" }

#
# Local client generation during development.
Expand Down
10 changes: 10 additions & 0 deletions common/src/api/external/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,16 @@ impl Error {
Error::InternalError { internal_message: internal_message.to_owned() }
}

/**
* Generates an [`Error::InvalidRequest`] error with the specific message
*
* This should be used for failures due possibly to invalid client input
* or malformed requests.
*/
pub fn invalid_request(message: &str) -> Error {
Error::InvalidRequest { message: message.to_owned() }
}

/**
* Generates an [`Error::ServiceUnavailable`] error with the specific
* message
Expand Down
7 changes: 7 additions & 0 deletions common/src/api/external/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,10 @@ pub enum InstanceState {
/// The instance is in the process of rebooting - it will remain
/// in the "rebooting" state until the VM is starting once more.
Rebooting,
/// The instance is in the process of migrating - it will remain
/// in the "migrating" state until the migration process is complete
/// and the destination propolis is ready to continue execution.
Migrating,
Repairing,
Failed,
Destroyed,
Expand Down Expand Up @@ -702,6 +706,7 @@ impl TryFrom<&str> for InstanceState {
"stopping" => InstanceState::Stopping,
"stopped" => InstanceState::Stopped,
"rebooting" => InstanceState::Rebooting,
"migrating" => InstanceState::Migrating,
"repairing" => InstanceState::Repairing,
"failed" => InstanceState::Failed,
"destroyed" => InstanceState::Destroyed,
Expand All @@ -720,6 +725,7 @@ impl InstanceState {
InstanceState::Stopping => "stopping",
InstanceState::Stopped => "stopped",
InstanceState::Rebooting => "rebooting",
InstanceState::Migrating => "migrating",
InstanceState::Repairing => "repairing",
InstanceState::Failed => "failed",
InstanceState::Destroyed => "destroyed",
Expand All @@ -737,6 +743,7 @@ impl InstanceState {
InstanceState::Running => false,
InstanceState::Stopping => false,
InstanceState::Rebooting => false,
InstanceState::Migrating => false,

InstanceState::Creating => true,
InstanceState::Stopped => true,
Expand Down
4 changes: 4 additions & 0 deletions common/src/api/internal/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ pub struct InstanceRuntimeState {
pub sled_uuid: Uuid,
/// which propolis-server is running this Instance
pub propolis_uuid: Uuid,
/// address of propolis-server running this Instance
pub propolis_addr: Option<SocketAddr>,
/// migration id (if one in process)
pub migration_uuid: Option<Uuid>,
/// number of CPUs allocated for this Instance
pub ncpus: InstanceCpuCount,
/// memory allocated for this Instance
Expand Down
19 changes: 17 additions & 2 deletions common/src/api/internal/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use crate::api::{external, internal};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::fmt::{Debug, Display, Formatter, Result as FormatResult};
use std::net::SocketAddr;
use uuid::Uuid;

/// Describes the instance hardware.
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)]
Expand All @@ -24,6 +26,14 @@ pub struct InstanceEnsureBody {
pub initial: InstanceHardware,
/// requested runtime state of the Instance
pub target: InstanceRuntimeStateRequested,
/// If we're migrating this instance, the details needed to drive the migration
pub migrate: Option<InstanceMigrateParams>,
}

#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
pub struct InstanceMigrateParams {
pub src_propolis_uuid: Uuid,
pub src_propolis_addr: SocketAddr,
}

/// Requestable running state of an Instance.
Expand All @@ -48,6 +58,7 @@ pub enum InstanceStateRequested {
// Issues a reset command to the instance, such that it should
// stop and then immediately become running.
Reboot,
Migrating,
Destroyed,
}

Expand All @@ -63,6 +74,7 @@ impl InstanceStateRequested {
InstanceStateRequested::Running => "running",
InstanceStateRequested::Stopped => "stopped",
InstanceStateRequested::Reboot => "reboot",
InstanceStateRequested::Migrating => "migrating",
InstanceStateRequested::Destroyed => "destroyed",
}
}
Expand All @@ -73,16 +85,19 @@ impl InstanceStateRequested {
InstanceStateRequested::Running => false,
InstanceStateRequested::Stopped => true,
InstanceStateRequested::Reboot => false,
InstanceStateRequested::Migrating => false,
InstanceStateRequested::Destroyed => true,
}
}
}

/// Used to request an Instance state change from a sled agent
///
/// Right now, it's only the run state that can be changed, though we might want
/// to support changing properties like "ncpus" here.
/// Right now, it's only the run state and migration id that can
/// be changed, though we might want to support changing properties
/// like "ncpus" here.
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
pub struct InstanceRuntimeStateRequested {
pub run_state: InstanceStateRequested,
pub migration_id: Option<Uuid>,
}
6 changes: 6 additions & 0 deletions common/src/sql/dbinit.sql
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,12 @@ CREATE TABLE omicron.public.instance (
active_server_id UUID,
/* Identifies the underlying propolis-server backing the instance. */
active_propolis_id UUID,
active_propolis_ip INET,

/*
* Identifies an ongoing migration for this instance.
*/
migration_id UUID,

/* Instance configuration */
ncpus INT NOT NULL,
Expand Down
9 changes: 9 additions & 0 deletions nexus-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ impl From<types::InstanceState>
types::InstanceState::Stopping => Self::Stopping,
types::InstanceState::Stopped => Self::Stopped,
types::InstanceState::Rebooting => Self::Rebooting,
types::InstanceState::Migrating => Self::Migrating,
types::InstanceState::Repairing => Self::Repairing,
types::InstanceState::Failed => Self::Failed,
types::InstanceState::Destroyed => Self::Destroyed,
Expand All @@ -67,6 +68,8 @@ impl From<omicron_common::api::internal::nexus::InstanceRuntimeState>
run_state: s.run_state.into(),
sled_uuid: s.sled_uuid,
propolis_uuid: s.propolis_uuid,
propolis_addr: s.propolis_addr.map(|addr| addr.to_string()),
migration_uuid: s.migration_uuid,
ncpus: s.ncpus.into(),
memory: s.memory.into(),
hostname: s.hostname,
Expand All @@ -86,6 +89,8 @@ impl From<&omicron_common::api::internal::nexus::InstanceRuntimeState>
run_state: s.run_state.into(),
sled_uuid: s.sled_uuid,
propolis_uuid: s.propolis_uuid,
propolis_addr: s.propolis_addr.map(|addr| addr.to_string()),
migration_uuid: s.migration_uuid,
ncpus: s.ncpus.into(),
memory: s.memory.into(),
hostname: s.hostname.clone(),
Expand Down Expand Up @@ -118,6 +123,9 @@ impl From<omicron_common::api::external::InstanceState>
omicron_common::api::external::InstanceState::Rebooting => {
Self::Rebooting
}
omicron_common::api::external::InstanceState::Migrating => {
Self::Migrating
}
omicron_common::api::external::InstanceState::Repairing => {
Self::Repairing
}
Expand Down Expand Up @@ -194,6 +202,7 @@ impl From<&types::InstanceState>
types::InstanceState::Stopping => Self::Stopping,
types::InstanceState::Stopped => Self::Stopped,
types::InstanceState::Rebooting => Self::Rebooting,
types::InstanceState::Migrating => Self::Migrating,
types::InstanceState::Repairing => Self::Repairing,
types::InstanceState::Failed => Self::Failed,
types::InstanceState::Destroyed => Self::Destroyed,
Expand Down
Loading

0 comments on commit 4d12a4d

Please sign in to comment.