diff --git a/Cargo.lock b/Cargo.lock index 54ca3cd93e..d5a5655294 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -15,6 +15,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b613b8e1e3cf911a086f53f03bf286f52fd7a7258e4fa606f0ef220d39d8877" dependencies = [ "generic-array 0.14.5", + "heapless", ] [[package]] @@ -59,7 +60,7 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" dependencies = [ - "getrandom", + "getrandom 0.2.6", "once_cell", "version_check", ] @@ -224,7 +225,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b62ddb9cb1ec0a098ad4bbf9344d0713fa193ae1a80af55febcff2627b6a00c1" dependencies = [ "futures-core", - "getrandom", + "getrandom 0.2.6", "instant", "pin-project-lite", "rand 0.8.5", @@ -424,6 +425,31 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chacha20" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b72a433d0cf2aef113ba70f62634c56fddb0f244e6377185c56a7cadbd8f91" +dependencies = [ + "cfg-if 1.0.0", + "cipher", + "cpufeatures", + "zeroize", +] + +[[package]] +name = "chacha20poly1305" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b84ed6d1d5f7aa9bdde921a5090e0ca4d934d250ea3b402a5fab3a994e28a2a" +dependencies = [ + "aead", + "chacha20", + "cipher", + "poly1305", + "zeroize", +] + [[package]] name = "chrono" version = "0.4.19" @@ -549,7 +575,7 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40" dependencies = [ - "getrandom", + "getrandom 0.2.6", "lazy_static", "proc-macro-hack", "tiny-keccak", @@ -757,7 +783,7 @@ dependencies = [ "futures", "futures-core", "rand 0.8.5", - "rand_chacha", + "rand_chacha 0.3.1", "reqwest", "ringbuffer", "schemars", @@ -931,6 +957,19 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b365fabc795046672053e29c954733ec3b05e4be654ab130fe8f1f94d7051f35" +[[package]] +name = "curve25519-dalek" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f9d052967f590a76e62eb387bd0bbb1b000182c3cefe5364db6b7211651bc0" +dependencies = [ + "byteorder", + "digest 0.9.0", + "rand_core 0.5.1", + "subtle", + "zeroize", +] + [[package]] name = "darling" version = "0.13.4" @@ -1234,6 +1273,20 @@ dependencies = [ "signature", ] +[[package]] +name = "ed25519-dalek" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c762bae6dcaf24c4c84667b8579785430908723d5c889f469d76a41d59cc7a9d" +dependencies = [ + "curve25519-dalek", + "ed25519", + "rand 0.7.3", + "serde", + "sha2 0.9.9", + "zeroize", +] + [[package]] name = "either" version = "1.6.1" @@ -1705,6 +1758,17 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + [[package]] name = "getrandom" version = "0.2.6" @@ -1880,6 +1944,15 @@ dependencies = [ "serde", ] +[[package]] +name = "hkdf" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "791a029f6b9fc27657f6f188ec6e5e43f6911f6f878e0dc5501396e09809d437" +dependencies = [ + "hmac 0.12.1", +] + [[package]] name = "hmac" version = "0.11.0" @@ -2235,6 +2308,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "keccak" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67c21572b4949434e4fc1e1978b99c5f77064153c59d998bf13ecd96fb5ecba7" + [[package]] name = "lalrpop" version = "0.19.7" @@ -2732,8 +2811,10 @@ dependencies = [ "omicron-sled-agent", "serde", "serde_derive", + "sp-sim", "structopt", "thiserror", + "toml", ] [[package]] @@ -2919,7 +3000,10 @@ dependencies = [ "slog-term", "smf", "socket2", + "sp-sim", "spdm", + "sprockets-host", + "sprockets-proxy", "structopt", "subprocess", "tar", @@ -3442,6 +3526,26 @@ dependencies = [ "siphasher", ] +[[package]] +name = "pin-project" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58ad3879ad3baf4e44784bc6a718a8698867bb991f8ce24d1bcbe2cfb4c3a75e" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "744b6f092ba29c3650faf274db506afd39944f48420f6c86b17cfe0ee1cb36bb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "pin-project-lite" version = "0.2.8" @@ -3514,6 +3618,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "poly1305" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "048aeb476be11a4b6ca432ca569e375810de9294ae78f4774e78ea98a9246ede" +dependencies = [ + "cpufeatures", + "opaque-debug 0.3.0", + "universal-hash", +] + [[package]] name = "polyval" version = "0.5.3" @@ -3833,6 +3948,19 @@ dependencies = [ "winapi", ] +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", +] + [[package]] name = "rand" version = "0.8.5" @@ -3840,10 +3968,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", + "rand_chacha 0.3.1", "rand_core 0.6.3", ] +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + [[package]] name = "rand_chacha" version = "0.3.1" @@ -3869,13 +4007,31 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + [[package]] name = "rand_core" version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" dependencies = [ - "getrandom", + "getrandom 0.2.6", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", ] [[package]] @@ -3926,7 +4082,7 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b033d837a7cf162d7993aded9304e30a83213c648b6e389db233191f891e5c2b" dependencies = [ - "getrandom", + "getrandom 0.2.6", "redox_syscall", "thiserror", ] @@ -4549,6 +4705,16 @@ dependencies = [ "digest 0.10.3", ] +[[package]] +name = "sha3" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "881bf8156c87b6301fc5ca6b27f11eeb2761224c7081e69b409d5a1951a70c86" +dependencies = [ + "digest 0.10.3", + "keccak", +] + [[package]] name = "sharded-slab" version = "0.1.4" @@ -4874,11 +5040,43 @@ source = "git+http://github.com/oxidecomputer/sprockets?rev=0361fd13ff19cda66962 dependencies = [ "derive_more", "hubpack", + "rand 0.8.5", "salty", "serde", "serde-big-array 0.4.1", ] +[[package]] +name = "sprockets-host" +version = "0.1.0" +source = "git+http://github.com/oxidecomputer/sprockets?rev=0361fd13ff19cda6696242fe40f1325fca30d3d1#0361fd13ff19cda6696242fe40f1325fca30d3d1" +dependencies = [ + "anyhow", + "clap 3.1.18", + "derive_more", + "futures", + "pin-project", + "ring", + "serde", + "slog", + "sprockets-common", + "sprockets-session", + "thiserror", + "tokio", +] + +[[package]] +name = "sprockets-proxy" +version = "0.1.0" +source = "git+http://github.com/oxidecomputer/sprockets?rev=0361fd13ff19cda6696242fe40f1325fca30d3d1#0361fd13ff19cda6696242fe40f1325fca30d3d1" +dependencies = [ + "serde", + "slog", + "sprockets-host", + "thiserror", + "tokio", +] + [[package]] name = "sprockets-rot" version = "0.1.0" @@ -4894,6 +5092,26 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "sprockets-session" +version = "0.1.0" +source = "git+http://github.com/oxidecomputer/sprockets?rev=0361fd13ff19cda6696242fe40f1325fca30d3d1#0361fd13ff19cda6696242fe40f1325fca30d3d1" +dependencies = [ + "chacha20poly1305", + "derive_more", + "ed25519", + "ed25519-dalek", + "hkdf", + "hmac 0.12.1", + "hubpack", + "rand_core 0.6.3", + "serde", + "sha3", + "sprockets-common", + "x25519-dalek", + "zeroize", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -5860,7 +6078,7 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93bbc61e655a4833cf400d0d15bf3649313422fa7572886ad6dab16d79886365" dependencies = [ - "getrandom", + "getrandom 0.2.6", "serde", ] @@ -5917,7 +6135,7 @@ checksum = "6a56f1fe9df13cff48de34c468a28d38ebede6af79ef5f2bef0b8f6b4a4a28ea" dependencies = [ "ff", "group", - "rand_chacha", + "rand_chacha 0.3.1", "rand_core 0.6.3", "serde", "serde-big-array 0.3.3", @@ -5947,6 +6165,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.10.0+wasi-snapshot-preview1" @@ -6204,6 +6428,17 @@ dependencies = [ "tap", ] +[[package]] +name = "x25519-dalek" +version = "2.0.0-pre.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5da623d8af10a62342bcbbb230e33e58a63255a58012f8653c578e54bab48df" +dependencies = [ + "curve25519-dalek", + "rand_core 0.6.3", + "zeroize", +] + [[package]] name = "xattr" version = "0.2.2" diff --git a/deploy/Cargo.toml b/deploy/Cargo.toml index 370375e129..d0502819c1 100644 --- a/deploy/Cargo.toml +++ b/deploy/Cargo.toml @@ -12,8 +12,10 @@ omicron-sled-agent = { path = "../sled-agent" } omicron-package = { path = "../package" } serde = { version = "1.0", features = [ "derive" ] } serde_derive = "1.0" +sp-sim = { path = "../sp-sim" } structopt = "0.3" thiserror = "1.0" +toml = "0.5.9" # Disable doc builds by default for our binaries to work around issue # rust-lang/cargo#8373. These docs would not be very useful anyway. diff --git a/deploy/src/bin/sled-agent-overlay-files.rs b/deploy/src/bin/sled-agent-overlay-files.rs index ea665c000e..f9812b3327 100644 --- a/deploy/src/bin/sled-agent-overlay-files.rs +++ b/deploy/src/bin/sled-agent-overlay-files.rs @@ -11,7 +11,9 @@ use omicron_sled_agent::bootstrap::trust_quorum::{ RackSecret, ShareDistribution, }; -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; +use sp_sim::config::GimletConfig; +use sp_sim::config::SpCommonConfig; use std::path::PathBuf; use structopt::StructOpt; @@ -60,8 +62,40 @@ fn overlay_secret_shares( Ok(()) } +// Generate a config file for a simulated SP in each deployment server folder. +fn overlay_sp_configs(server_dirs: &[PathBuf]) -> Result<()> { + // We will eventually need to flesh out more of this config; for now, + // it's sufficient to only generate an SP that emulates a RoT. + let mut config = GimletConfig { + common: SpCommonConfig { + multicast_addr: None, + bind_addrs: None, + serial_number: [0; 16], + manufacturing_root_cert_seed: [0; 32], + device_id_cert_seed: [0; 32], + }, + components: Vec::new(), + }; + + // Our lazy device ID generation fails if we overflow a u8. + assert!(server_dirs.len() <= 255, "expand simulated SP ID generation"); + + for server_dir in server_dirs { + config.common.serial_number[0] += 1; + config.common.device_id_cert_seed[0] += 1; + + let bytes = toml::ser::to_vec(&config).unwrap(); + let path = server_dir.join("config-sp.toml"); + std::fs::write(&path, bytes) + .with_context(|| format!("failed to write {}", path.display()))?; + } + + Ok(()) +} + fn main() -> Result<()> { let args = Args::from_args_safe().map_err(|err| anyhow!(err))?; overlay_secret_shares(args.threshold, &args.directories)?; + overlay_sp_configs(&args.directories)?; Ok(()) } diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 2aa2a12896..a2b322bc1c 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -35,6 +35,9 @@ slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_deb slog-dtrace = "0.2" smf = "0.2" spdm = { git = "https://github.com/oxidecomputer/spdm", rev = "9742f6e" } +sp-sim = { path = "../sp-sim" } +sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } +sprockets-proxy = { git = "http://github.com/oxidecomputer/sprockets", rev = "0361fd13ff19cda6696242fe40f1325fca30d3d1" } socket2 = { version = "0.4", features = [ "all" ] } structopt = "0.3" tar = "0.4" diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index 175f18b2bd..c8d361db6e 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -14,6 +14,7 @@ use omicron_sled_agent::bootstrap::{ }; use omicron_sled_agent::rack_setup::config::SetupServiceConfig as RssConfig; use omicron_sled_agent::{config::Config as SledConfig, server as sled_server}; +use sp_sim::config::GimletConfig; use std::net::SocketAddr; use std::path::PathBuf; use structopt::StructOpt; @@ -108,6 +109,20 @@ async fn do_run() -> Result<(), CmdError> { } else { None }; + let sp_config_path = { + let mut sp_config_path = config_path.clone(); + sp_config_path.pop(); + sp_config_path.push("config-sp.toml"); + sp_config_path + }; + let sp_config = if sp_config_path.exists() { + Some( + GimletConfig::from_file(sp_config_path) + .map_err(|e| CmdError::Failure(e.to_string()))?, + ) + } else { + None + }; // Derive the bootstrap address from the data link's MAC address. let link = config @@ -116,16 +131,47 @@ async fn do_run() -> Result<(), CmdError> { let bootstrap_address = bootstrap_address(link) .map_err(|e| CmdError::Failure(e.to_string()))?; + // Are we going to simulate a local SP? If so: + // + // 1. The bootstrap dropshot server listens on localhost + // 2. A sprockets proxy listens on `bootstrap_address` (and relays + // incoming connections to the localhost dropshot server) + // + // If we're not simulating a local SP, we can't establish sprockets + // sessions, so we'll have the bootstrap dropshot server listen on + // `bootstrap_address` (and no sprockets proxy). + // + // TODO-security: With this configuration, dropshot itself is + // running plain HTTP and blindly trusting all connections from + // localhost. We have a similar sprockets proxy on the client side, + // where the proxy blindly trusts all connections from localhost + // (although the client-side proxy only runs while is being made, + // while our dropshot server is always listening). Can we secure + // these connections sufficiently? Other options include expanding + // dropshot/progenitor to allow a custom connection layer (supported + // by hyper, but not reqwest), keeping the sprockets proxy but using + // something other than TCP that we can lock down, or abandoning + // dropshot and using a bespoke protocol over a raw + // sprockets-encrypted TCP connection. + let (bootstrap_dropshot_addr, sprockets_proxy_bind_addr) = + if sp_config.is_some() { + ("[::1]:0".parse().unwrap(), Some(bootstrap_address)) + } else { + (SocketAddr::V6(bootstrap_address), None) + }; + // Configure and run the Bootstrap server. let bootstrap_config = BootstrapConfig { id: config.id, dropshot: ConfigDropshot { - bind_address: SocketAddr::V6(bootstrap_address), + bind_address: bootstrap_dropshot_addr, request_body_max_bytes: 1024 * 1024, ..Default::default() }, log: config.log.clone(), rss_config, + sprockets_proxy_bind_addr, + sp_config, }; // TODO: It's a little silly to pass the config this way - namely, diff --git a/sled-agent/src/bootstrap/agent.rs b/sled-agent/src/bootstrap/agent.rs index f32c6834a4..224f4c1e25 100644 --- a/sled-agent/src/bootstrap/agent.rs +++ b/sled-agent/src/bootstrap/agent.rs @@ -16,6 +16,7 @@ use crate::config::Config as SledConfig; use crate::illumos::dladm::{self, Dladm, PhysicalLink}; use crate::illumos::zone::Zones; use crate::server::Server as SledServer; +use crate::sp::SpHandle; use omicron_common::address::get_sled_address; use omicron_common::api::external::{Error as ExternalError, MacAddr}; use omicron_common::backoff::{ @@ -93,6 +94,7 @@ pub(crate) struct Agent { rss: Mutex>, sled_agent: Mutex>, sled_config: SledConfig, + sp: Option, } fn get_sled_agent_request_path() -> PathBuf { @@ -132,6 +134,7 @@ impl Agent { log: Logger, sled_config: SledConfig, address: Ipv6Addr, + sp: Option, ) -> Result { let ba_log = log.new(o!( "component" => "BootstrapAgent", @@ -190,6 +193,7 @@ impl Agent { rss: Mutex::new(None), sled_agent: Mutex::new(None), sled_config, + sp, }; let request_path = get_sled_agent_request_path(); @@ -405,6 +409,7 @@ impl Agent { &self.parent_log, rss_config.clone(), self.peer_monitor.observer().await, + self.sp.clone(), ); self.rss.lock().await.replace(rss); } diff --git a/sled-agent/src/bootstrap/config.rs b/sled-agent/src/bootstrap/config.rs index fc8951954e..945b03a10e 100644 --- a/sled-agent/src/bootstrap/config.rs +++ b/sled-agent/src/bootstrap/config.rs @@ -8,6 +8,8 @@ use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use serde::Deserialize; use serde::Serialize; +use sp_sim::config::GimletConfig; +use std::net::SocketAddrV6; use uuid::Uuid; pub const BOOTSTRAP_AGENT_PORT: u16 = 12346; @@ -20,4 +22,10 @@ pub struct Config { pub log: ConfigLogging, pub rss_config: Option, + + // If present, `dropshot` should bind to a localhost address, and we'll + // configure a sprockets-proxy pointed to it that listens on this + // (non-localhost) address. + pub sprockets_proxy_bind_addr: Option, + pub sp_config: Option, } diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs index a9f4cfbef3..1367a8addc 100644 --- a/sled-agent/src/bootstrap/rss_handle.rs +++ b/sled-agent/src/bootstrap/rss_handle.rs @@ -9,13 +9,16 @@ use super::discovery::PeerMonitorObserver; use super::params::SledAgentRequest; use crate::rack_setup::config::SetupServiceConfig; use crate::rack_setup::service::Service; +use crate::sp::SpHandle; use futures::stream::FuturesUnordered; use futures::StreamExt; use omicron_common::backoff::internal_service_policy; use omicron_common::backoff::retry_notify; use omicron_common::backoff::BackoffError; use slog::Logger; +use std::net::SocketAddr; use std::net::SocketAddrV6; +use std::time::Duration; use thiserror::Error; use tokio::sync::mpsc; use tokio::sync::oneshot; @@ -43,6 +46,7 @@ impl RssHandle { log: &Logger, config: SetupServiceConfig, peer_monitor: PeerMonitorObserver, + sp: Option, ) -> Self { let (tx, rx) = rss_channel(); @@ -54,7 +58,7 @@ impl RssHandle { ); let log = log.new(o!("component" => "BootstrapAgentRssHandler")); let task = tokio::spawn(async move { - rx.initialize_sleds(&log).await; + rx.initialize_sleds(&log, &sp).await; }); Self { _rss: rss, task } } @@ -65,6 +69,9 @@ enum InitializeSledAgentError { #[error("Failed to construct an HTTP client: {0}")] HttpClient(#[from] reqwest::Error), + #[error("Failed to start sprockets proxy: {0}")] + SprocketsProxy(#[from] sprockets_proxy::Error), + #[error("Error making HTTP request to Bootstrap Agent: {0}")] BootstrapApi( #[from] @@ -76,6 +83,7 @@ async fn initialize_sled_agent( log: &Logger, bootstrap_addr: SocketAddrV6, request: &SledAgentRequest, + sp: &Option, ) -> Result<(), InitializeSledAgentError> { let dur = std::time::Duration::from_secs(60); @@ -84,8 +92,57 @@ async fn initialize_sled_agent( .timeout(dur) .build()?; - let url = format!("http://{}", bootstrap_addr); - info!(log, "Sending request to peer agent: {}", url); + let (url, _proxy_task) = if let Some(sp) = sp.as_ref() { + // We have an SP; spawn a sprockets proxy for this connection. + let proxy_config = sprockets_proxy::Config { + bind_address: "[::1]:0".parse().unwrap(), + target_address: SocketAddr::V6(bootstrap_addr), + role: sprockets_proxy::Role::Client, + }; + // TODO-cleanup The `Duration` passed to `Proxy::new()` is the timeout + // for communicating with the RoT. Currently it can be set to anything + // at all (our simulated RoT always responds immediately). Should the + // value move to our config? + let proxy = sprockets_proxy::Proxy::new( + &proxy_config, + sp.manufacturing_public_key(), + sp.rot_handle(), + sp.rot_certs(), + Duration::from_secs(5), + log.new(o!("BootstrapAgentClientSprocketsProxy" + => proxy_config.target_address)), + ) + .await?; + + let proxy_addr = proxy.local_addr(); + + let proxy_task = tokio::spawn(async move { + // TODO-robustness `proxy.run()` only fails if `accept()`ing on our + // already-bound listening socket fails, which means something has + // gone very wrong. Do we have any recourse other than panicking? + // What does dropshot do if `accept()` fails? + proxy.run().await.expect("sprockets client proxy failed"); + }); + + // Wrap `proxy_task` in `AbortOnDrop`, which will abort it (shutting + // down the proxy) when we return. + let proxy_task = AbortOnDrop(proxy_task); + + info!( + log, "Sending request to peer agent via sprockets proxy"; + "peer" => %bootstrap_addr, + "sprockets_proxy" => %proxy_addr, + ); + (format!("http://{}", proxy_addr), Some(proxy_task)) + } else { + // We have no SP; connect directly. + info!( + log, "Sending request to peer agent"; + "peer" => %bootstrap_addr, + ); + (format!("http://{}", bootstrap_addr), None) + }; + let client = bootstrap_agent_client::Client::new_with_client( &url, client, @@ -119,7 +176,7 @@ async fn initialize_sled_agent( }; retry_notify(internal_service_policy(), sled_agent_initialize, log_failure) .await?; - info!(log, "Peer agent at {} initialized", url); + info!(log, "Peer agent initialized"; "peer" => %bootstrap_addr); Ok(()) } @@ -178,7 +235,7 @@ struct BootstrapAgentHandleReceiver { } impl BootstrapAgentHandleReceiver { - async fn initialize_sleds(mut self, log: &Logger) { + async fn initialize_sleds(mut self, log: &Logger, sp: &Option) { let (requests, tx_response) = match self.inner.recv().await { Some(requests) => requests, None => { @@ -201,7 +258,7 @@ impl BootstrapAgentHandleReceiver { "target_sled" => %bootstrap_addr, ); - initialize_sled_agent(log, bootstrap_addr, &request) + initialize_sled_agent(log, bootstrap_addr, &request, sp) .await .map_err(|err| { format!( @@ -241,3 +298,11 @@ impl BootstrapAgentHandleReceiver { tx_response.send(Ok(())).unwrap(); } } + +struct AbortOnDrop(JoinHandle); + +impl Drop for AbortOnDrop { + fn drop(&mut self) { + self.0.abort(); + } +} diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index ad95d363ff..f3a0cdc9a7 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -8,9 +8,15 @@ use super::agent::Agent; use super::config::Config; use super::http_entrypoints::ba_api as http_api; use crate::config::Config as SledConfig; +use crate::sp::SpHandle; +use dropshot::HttpServer; use slog::Drain; +use slog::Logger; use std::net::Ipv6Addr; +use std::net::SocketAddr; +use std::net::SocketAddrV6; use std::sync::Arc; +use std::time::Duration; /// Wraps a [Agent] object, and provides helper methods for exposing it /// via an HTTP interface. @@ -38,9 +44,15 @@ impl Server { } else { debug!(log, "registered DTrace probes"); } + + info!(log, "detecting (real or simulated) SP"); + let sp = SpHandle::detect(&config.sp_config, &sled_config, &log) + .await + .map_err(|err| format!("Failed to detect local SP: {err}"))?; + info!(log, "setting up bootstrap agent server"); let bootstrap_agent = Arc::new( - Agent::new(log.clone(), sled_config, address) + Agent::new(log.clone(), sled_config, address, sp.clone()) .await .map_err(|e| e.to_string())?, ); @@ -56,6 +68,20 @@ impl Server { .map_err(|error| format!("initializing server: {}", error))? .start(); + // Are connections to our bootstrap dropshot server being tunneled + // through a sprockets proxy? If so, start up our half. + if let Some(sprockets_proxy_bind_addr) = + config.sprockets_proxy_bind_addr + { + spawn_sprockets_proxy( + &sp, + &http_server, + sprockets_proxy_bind_addr, + &log, + ) + .await?; + } + let server = Server { bootstrap_agent, http_server }; // Initialize the bootstrap agent *after* the server has started. @@ -88,3 +114,58 @@ pub fn run_openapi() -> Result<(), String> { .write(&mut std::io::stdout()) .map_err(|e| e.to_string()) } + +async fn spawn_sprockets_proxy( + sp: &Option, + http_server: &HttpServer>, + sprockets_proxy_bind_addr: SocketAddrV6, + log: &Logger, +) -> Result<(), String> { + // We can only start a sprockets proxy if we have an SP. + let sp = sp.as_ref().ok_or( + "Misconfiguration: cannot start a sprockets proxy without an SP", + )?; + + // If we're running a sprockets proxy, our dropshot server should be + // listening on localhost. + let dropshot_addr = http_server.local_addr(); + if !dropshot_addr.ip().is_loopback() { + return Err(concat!( + "Misconfiguration: bootstrap dropshot IP address should ", + "be loopback when using a sprockets proxy" + ) + .into()); + } + + let proxy_config = sprockets_proxy::Config { + bind_address: SocketAddr::V6(sprockets_proxy_bind_addr), + target_address: dropshot_addr, + role: sprockets_proxy::Role::Server, + }; + let proxy_log = log.new(o!("component" => "sprockets-proxy (Bootstrap)")); + + // TODO-cleanup The `Duration` passed to `Proxy::new()` is the timeout + // for communicating with the RoT. Currently it can be set to anything + // at all (our simulated RoT always responds immediately). Should the + // value move to our config? + let proxy = sprockets_proxy::Proxy::new( + &proxy_config, + sp.manufacturing_public_key(), + sp.rot_handle(), + sp.rot_certs(), + Duration::from_secs(5), + proxy_log, + ) + .await + .map_err(|err| format!("Failed to start sprockets proxy: {err}"))?; + + tokio::spawn(async move { + // TODO-robustness `proxy.run()` only fails if `accept()`ing on our + // already-bound listening socket fails, which means something has + // gone very wrong. Do we have any recourse other than panicking? + // What does dropshot do if `accept()` fails? + proxy.run().await.expect("sprockets server proxy failed"); + }); + + Ok(()) +} diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index d63698402b..f07c643421 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -32,6 +32,7 @@ pub mod rack_setup; pub mod server; mod services; mod sled_agent; +mod sp; mod storage_manager; mod updates; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 602c7b7d8e..0fef7054d2 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -46,6 +46,9 @@ pub enum SetupServiceError { #[error("Failed to construct an HTTP client: {0}")] HttpClient(reqwest::Error), + + #[error("Failed to construct a sprockets proxy: {0}")] + SprocketsProxy(#[from] sprockets_proxy::Error), } // The workload / information allocated to a single sled. diff --git a/sled-agent/src/sp.rs b/sled-agent/src/sp.rs new file mode 100644 index 0000000000..6148e2d5e7 --- /dev/null +++ b/sled-agent/src/sp.rs @@ -0,0 +1,238 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Interface to a (currently simulated) SP / RoT. + +use crate::config::Config as SledConfig; +use crate::illumos; +use crate::illumos::dladm::CreateVnicError; +use crate::illumos::dladm::Dladm; +use crate::zone::EnsureGzAddressError; +use crate::zone::Zones; +use slog::Logger; +use sp_sim::config::GimletConfig; +use sp_sim::RotRequestV1; +use sp_sim::RotResponseV1; +use sp_sim::SimulatedSp as SpSimSimulatedSp; +use sprockets_host::Ed25519Certificates; +use sprockets_host::Ed25519PublicKey; +use sprockets_host::RotManager; +use sprockets_host::RotManagerHandle; +use sprockets_host::RotOpV1; +use sprockets_host::RotResultV1; +use sprockets_host::RotTransport; +use std::collections::VecDeque; +use std::net::Ipv6Addr; +use std::sync::Arc; +use std::thread; +use std::time::Instant; +use thiserror::Error; + +// These error cases are mostly simulation-specific; the list will grow once we +// have real hardware (and may shrink if/when we remove or collapse simulated +// cases). We mark the enum `non_exhaustive` to save some pain in the future. +#[derive(Debug, Error)] +#[non_exhaustive] +pub enum SpError { + #[error("Simulated SP config specifies distinct IP addresses ({0}, {1})")] + SimulatedSpMultipleIpAddresses(Ipv6Addr, Ipv6Addr), + #[error("Could not access etherstub for simulated SP: {0}")] + CreateEtherstub(illumos::ExecutionError), + #[error("Could not access etherstub VNIC device for simulated SP: {0}")] + CreateEtherstubVnic(CreateVnicError), + #[error("Could not ensure IP address {addr} in global zone for simulated SP: {err}")] + EnsureGlobalZoneAddressError { addr: Ipv6Addr, err: EnsureGzAddressError }, + #[error("Could not start simualted SP: {0}")] + StartSimSpError(String), + #[error("Communication with RoT failed: {0}")] + RotCommunicationError(String), +} + +#[derive(Clone)] +pub struct SpHandle { + inner: Inner, +} + +impl SpHandle { + /// Attempt to detect the presence of an SP. + /// + /// Currently the only "detection" performed is whether `sp_config` is + /// `Some(_)`, in which case a simulated SP is started, and a handle to it + /// is returned. + /// + /// A return value of `Ok(None)` means no SP is available. + pub async fn detect( + sp_config: &Option, + sled_config: &SledConfig, + log: &Logger, + ) -> Result, SpError> { + let inner = if let Some(config) = sp_config.as_ref() { + let sim_sp = start_simulated_sp(config, sled_config, log).await?; + Some(Inner::SimulatedSp(sim_sp)) + } else { + None + }; + Ok(inner.map(|inner| Self { inner })) + } + + pub fn manufacturing_public_key(&self) -> Ed25519PublicKey { + match &self.inner { + Inner::SimulatedSp(sim) => sim.sp.manufacturing_public_key(), + } + } + + // TODO The error type here leaks that we only currently support simulated + // SPs and will need work once we support a real SP. + pub fn rot_handle(&self) -> RotManagerHandle { + match &self.inner { + Inner::SimulatedSp(sim) => sim.rot_handle.clone(), + } + } + + pub fn rot_certs(&self) -> Ed25519Certificates { + match &self.inner { + Inner::SimulatedSp(sim) => sim.rot_certs, + } + } +} + +#[derive(Clone)] +enum Inner { + SimulatedSp(SimulatedSp), +} + +#[derive(Clone)] +struct SimulatedSp { + sp: Arc, + rot_certs: Ed25519Certificates, + rot_handle: RotManagerHandle, +} + +async fn start_simulated_sp( + sp_config: &GimletConfig, + sled_config: &SledConfig, + log: &Logger, +) -> Result { + // Is our simulated SP going to bind to addresses (acting like management + // network IPs)? + if let Some(bind_addrs) = sp_config.common.bind_addrs { + // Sanity check that the sim SP config only specifies one IP address; we + // can simulate multiple management network ports by using different TCP + // ports. + let sp_addr = bind_addrs[0].ip(); + for addr in bind_addrs[1..].iter().copied().chain( + sp_config.components.iter().filter_map(|comp| comp.serial_console), + ) { + if sp_addr != addr.ip() { + return Err(SpError::SimulatedSpMultipleIpAddresses( + *sp_addr, + *addr.ip(), + )); + } + } + + // Ensure we have the global zone IP address we need for the SP. + let etherstub = + Dladm::create_etherstub().map_err(SpError::CreateEtherstub)?; + let etherstub_vnic = Dladm::create_etherstub_vnic(ðerstub) + .map_err(SpError::CreateEtherstubVnic)?; + Zones::ensure_has_global_zone_v6_address( + etherstub_vnic, + *sp_addr, + "simsp", + ) + .map_err(|err| SpError::EnsureGlobalZoneAddressError { + addr: *sp_addr, + err, + })?; + } + + // Start up the simulated SP. + info!(log, "starting simulated gimlet SP"); + let sp_log = log.new(o!( + "component" => "sp-sim", + "server" => sled_config.id.clone().to_string(), + )); + let sp = Arc::new( + sp_sim::Gimlet::spawn(&sp_config, sp_log) + .await + .map_err(|e| SpError::StartSimSpError(e.to_string()))?, + ); + + // Start up the simulated RoT. + info!(log, "starting simulated gimlet RoT"); + let rot_log = log.new(o!( + "component" => "rot-sim", + "server" => sled_config.id.clone().to_string(), + )); + let transport = + SimRotTransport { sp: Arc::clone(&sp), responses: VecDeque::new() }; + let (rot_manager, rot_handle) = RotManager::new(32, transport, rot_log); + + // Spawn a thread to communicate with the RoT. In real hardware this + // ultimately uses the UART. + thread::Builder::new() + .name("sim-rot".to_string()) + .spawn(move || { + rot_manager.run(); + }) + .unwrap(); + + // Ask the simulated RoT for its certs. The deadline is ignored by our + // simulated rot transport; just pass "now". + let rot_certs_result = rot_handle + .call(RotOpV1::GetCertificates, Instant::now()) + .await + .map_err(|err| SpError::RotCommunicationError(err.to_string()))?; + let rot_certs = match rot_certs_result { + RotResultV1::Certificates(certs) => certs, + RotResultV1::Err(err) => { + return Err(SpError::RotCommunicationError(format!("{err:?}"))); + } + other => { + return Err(SpError::RotCommunicationError(format!( + "unexpected response to GetCertificates request: {other:?}" + ))); + } + }; + + Ok(SimulatedSp { sp, rot_certs, rot_handle }) +} + +struct SimRotTransport { + sp: Arc, + responses: VecDeque, +} + +#[derive(Debug, Error)] +pub enum SimRotTransportError { + #[error("RoT sprockets error: {0}")] + RotSprocketError(sp_sim::RotSprocketError), + #[error("Empty recv queue (recv called more than send?)")] + EmptyRecvQueue, +} + +impl RotTransport for SimRotTransport { + type Error = SimRotTransportError; + + fn send( + &mut self, + req: RotRequestV1, + _deadline: std::time::Instant, + ) -> Result<(), Self::Error> { + let response = self + .sp + .rot_request(req) + .map_err(SimRotTransportError::RotSprocketError)?; + self.responses.push_back(response); + Ok(()) + } + + fn recv( + &mut self, + _deadline: std::time::Instant, + ) -> Result { + self.responses.pop_front().ok_or(SimRotTransportError::EmptyRecvQueue) + } +}