From bf313f0bb878bb446c9f8f4c00125ace6e7d834a Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Sat, 18 Oct 2025 22:39:08 +0000 Subject: [PATCH 01/14] TQ: Async Nodes and P2P connections Builds on https://github.com/oxidecomputer/omicron/pull/9232 This is the first step in wrapping the `trust_quorum::Node` so that it can be used in an async context and integrated with sled-agent. Only the sprockets networking has been fully integrated so far such that each `NodeTask` has a `ConnMgr` that sets up a full mesh of sprockets connections. A test for this connectivity behavior has been written but the code is not wired into the production code yet. Messages can be sent between `NodeTasks` over sprockets connections. Each connection exists in it's own task managed by an `EstablishedConn`. The main `NodeTask` task sends messages to and receives messages from this task to interact with the outside world via sprockets. Currently only `Ping` messages are sent over the wire as a means to keep the connections alive and detect disconnects. A `NodeHandle` allows one to interact with the `NodeTask`. Currently only three operations are implemented with messages defined in `NodeApiRequest`. The user can instruct the node who it's peers are on the bootstrap network to establish connectivity, can poll for connectivity status, and can shutdown the node. All of this functionality is used in the accompanying test. It's important to re-iterate that this code only implements connectivity between trust quorum nodes and no actual trust quorum messages are sent. They can't be as a handle can not yet initiate a reconfiguration or LRTQ upgrade. That behavior will come in a follow up. This PR is large enough. A lot of this code is similar to the LRTQ connection management code, except that it operates over sprockets rather than TCP channels. This introduces some complexity, but it is mostly abstracted away into the `SprocketsConfig`. --- Cargo.lock | 273 +++++++++- Cargo.toml | 4 +- sled-agent/src/bootstrap/config.rs | 1 + trust-quorum/Cargo.toml | 6 + trust-quorum/src/connection_manager.rs | 717 +++++++++++++++++++++++++ trust-quorum/src/established_conn.rs | 343 ++++++++++++ trust-quorum/src/lib.rs | 8 + trust-quorum/src/task.rs | 469 ++++++++++++++++ 8 files changed, 1796 insertions(+), 25 deletions(-) create mode 100644 trust-quorum/src/connection_manager.rs create mode 100644 trust-quorum/src/established_conn.rs create mode 100644 trust-quorum/src/task.rs diff --git a/Cargo.lock b/Cargo.lock index 91e68b4b509..534b403f3a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -536,6 +536,41 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "attest-data" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/dice-util?rev=10952e8d9599b735b85d480af3560a11700e5b64#10952e8d9599b735b85d480af3560a11700e5b64" +dependencies = [ + "const-oid", + "der", + "getrandom 0.3.4", + "hex", + "hubpack", + "rats-corim", + "salty", + "serde", + "serde_with", + "sha3", + "static_assertions", + "thiserror 2.0.17", +] + +[[package]] +name = "attest-mock" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/dice-util?rev=10952e8d9599b735b85d480af3560a11700e5b64#10952e8d9599b735b85d480af3560a11700e5b64" +dependencies = [ + "anyhow", + "attest-data 0.5.0", + "clap", + "hex", + "hubpack", + "knuffel", + "miette", + "rats-corim", + "serde_json", +] + [[package]] name = "atty" version = "0.2.14" @@ -624,6 +659,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "backtrace-ext" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50" +dependencies = [ + "backtrace", +] + [[package]] name = "base16ct" version = "0.2.0" @@ -1343,6 +1387,15 @@ dependencies = [ "serde", ] +[[package]] +name = "chumsky" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +dependencies = [ + "hashbrown 0.14.5", +] + [[package]] name = "ciborium" version = "0.2.2" @@ -1412,7 +1465,7 @@ dependencies = [ "anstyle", "clap_lex", "strsim", - "terminal_size", + "terminal_size 0.4.0", ] [[package]] @@ -2642,7 +2695,7 @@ name = "dice-verifier" version = "0.3.0-pre0" source = "git+https://github.com/oxidecomputer/dice-util?rev=4b408edc1d00f108ddf635415d783e6f12fe9641#4b408edc1d00f108ddf635415d783e6f12fe9641" dependencies = [ - "attest-data", + "attest-data 0.4.0", "const-oid", "ed25519-dalek", "env_logger", @@ -3036,14 +3089,14 @@ dependencies = [ "indent_write", "newtype_derive", "openapiv3", - "owo-colors", + "owo-colors 4.2.2", "paste", "semver 1.0.27", "serde_json", "sha2", "similar", - "supports-color", - "textwrap", + "supports-color 3.0.2", + "textwrap 0.16.2", "thiserror 2.0.17", ] @@ -4209,6 +4262,16 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "hashbrown" version = "0.15.4" @@ -4290,6 +4353,9 @@ name = "heck" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +dependencies = [ + "unicode-segmentation", +] [[package]] name = "heck" @@ -5618,6 +5684,33 @@ dependencies = [ "zeroize", ] +[[package]] +name = "knuffel" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04bee6ddc6071011314b1ce4f7705fef6c009401dba4fd22cb0009db6a177413" +dependencies = [ + "base64 0.21.7", + "chumsky", + "knuffel-derive", + "miette", + "thiserror 1.0.69", + "unicode-width 0.1.14", +] + +[[package]] +name = "knuffel-derive" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91977f56c49cfb961e3d840e2e7c6e4a56bde7283898cf606861f1421348283d" +dependencies = [ + "heck 0.4.1", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "kstat-macro" version = "0.1.0" @@ -6234,6 +6327,38 @@ dependencies = [ "tokio", ] +[[package]] +name = "miette" +version = "5.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59bb584eaeeab6bd0226ccf3509a69d7936d148cf3d036ad350abe35e8c6856e" +dependencies = [ + "backtrace", + "backtrace-ext", + "is-terminal", + "miette-derive", + "once_cell", + "owo-colors 3.5.0", + "supports-color 2.1.0", + "supports-hyperlinks", + "supports-unicode", + "terminal_size 0.1.17", + "textwrap 0.15.2", + "thiserror 1.0.69", + "unicode-width 0.1.14", +] + +[[package]] +name = "miette-derive" +version = "5.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "mime" version = "0.3.17" @@ -7366,7 +7491,7 @@ dependencies = [ "swrite", "tabled 0.15.0", "test-strategy", - "textwrap", + "textwrap 0.16.2", "thiserror 2.0.17", "tokio", "tough", @@ -8073,7 +8198,7 @@ dependencies = [ "serde", "slog", "slog-error-chain", - "textwrap", + "textwrap 0.16.2", "tokio", "uuid", ] @@ -8363,7 +8488,7 @@ dependencies = [ "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", - "owo-colors", + "owo-colors 4.2.2", "oxide-tokio-rt", "oximeter-client", "oximeter-db", @@ -8382,9 +8507,9 @@ dependencies = [ "strum 0.27.2", "subprocess", "support-bundle-viewer", - "supports-color", + "supports-color 3.0.2", "tabled 0.15.0", - "textwrap", + "textwrap 0.16.2", "tokio", "tufaceous-artifact", "unicode-width 0.1.14", @@ -8489,7 +8614,7 @@ dependencies = [ "repo-depot-client", "serde_json", "slog", - "supports-color", + "supports-color 3.0.2", "tokio", "update-engine", "uuid", @@ -9121,6 +9246,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "owo-colors" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" + [[package]] name = "owo-colors" version = "4.2.2" @@ -10134,6 +10265,36 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +[[package]] +name = "pki-playground" +version = "0.2.0" +source = "git+https://github.com/oxidecomputer/pki-playground?rev=7600756029ce046a02c6234aa84ce230cc5eaa04#7600756029ce046a02c6234aa84ce230cc5eaa04" +dependencies = [ + "camino", + "clap", + "const-oid", + "der", + "digest", + "ed25519-dalek", + "flagset", + "hex", + "ipnet", + "knuffel", + "miette", + "p384", + "pem-rfc7468", + "pkcs8", + "rand 0.8.5", + "rsa", + "sha1", + "sha2", + "sha3", + "signature", + "spki", + "x509-cert", + "zeroize", +] + [[package]] name = "plain" version = "0.2.3" @@ -11484,9 +11645,9 @@ dependencies = [ [[package]] name = "rsa" -version = "0.9.6" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc" +checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" dependencies = [ "const-oid", "digest", @@ -13115,7 +13276,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", "syn 2.0.106", @@ -13195,10 +13356,10 @@ dependencies = [ [[package]] name = "sprockets-tls" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/sprockets.git?rev=7da1f0b5dcd3d631da18b43ba78a84b1a2b425ee#7da1f0b5dcd3d631da18b43ba78a84b1a2b425ee" +source = "git+https://github.com/oxidecomputer/sprockets.git?rev=dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210#dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210" dependencies = [ "anyhow", - "attest-data", + "attest-data 0.4.0", "camino", "cfg-if", "clap", @@ -13225,6 +13386,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "sprockets-tls-test-utils" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/sprockets.git?rev=dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210#dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210" +dependencies = [ + "camino", + "pki-playground", +] + [[package]] name = "sqlformat" version = "0.3.5" @@ -13497,6 +13667,16 @@ dependencies = [ "zip 4.2.0", ] +[[package]] +name = "supports-color" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89" +dependencies = [ + "is-terminal", + "is_ci", +] + [[package]] name = "supports-color" version = "3.0.2" @@ -13506,6 +13686,24 @@ dependencies = [ "is_ci", ] +[[package]] +name = "supports-hyperlinks" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84231692eb0d4d41e4cdd0cabfdd2e6cd9e255e65f80c9aa7c98dd502b4233d" +dependencies = [ + "is-terminal", +] + +[[package]] +name = "supports-unicode" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f850c19edd184a205e883199a261ed44471c81e39bd95b1357f5febbef00e77a" +dependencies = [ + "is-terminal", +] + [[package]] name = "swrite" version = "0.1.0" @@ -13755,6 +13953,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "terminal_size" version = "0.4.0" @@ -13802,6 +14010,17 @@ dependencies = [ "unicode-width 0.2.0", ] +[[package]] +name = "textwrap" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7b3e525a49ec206798b40326a44121291b530c963cfb01018f63e135bac543d" +dependencies = [ + "smawk", + "unicode-linebreak", + "unicode-width 0.1.14", +] + [[package]] name = "textwrap" version = "0.16.2" @@ -13809,7 +14028,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" dependencies = [ "smawk", - "terminal_size", + "terminal_size 0.4.0", "unicode-linebreak", "unicode-width 0.2.0", ] @@ -14582,13 +14801,17 @@ version = "0.1.0" dependencies = [ "anyhow", "assert_matches", + "attest-mock", "bcs", "bootstore", + "bytes", "camino", "chacha20poly1305", + "ciborium", "daft", "derive_more 0.99.20", "dropshot", + "futures", "gfss", "hex", "hkdf", @@ -14606,6 +14829,8 @@ dependencies = [ "sled-agent-types", "slog", "slog-error-chain", + "sprockets-tls", + "sprockets-tls-test-utils", "static_assertions", "subtle", "test-strategy", @@ -15115,14 +15340,14 @@ dependencies = [ "linear-map", "omicron-test-utils", "omicron-workspace-hack", - "owo-colors", + "owo-colors 4.2.2", "petgraph 0.8.2", "schemars 0.8.22", "serde", "serde_json", "serde_with", "slog", - "supports-color", + "supports-color 3.0.2", "swrite", "tokio", "tokio-stream", @@ -15652,7 +15877,7 @@ dependencies = [ "omicron-common", "omicron-passwords", "omicron-workspace-hack", - "owo-colors", + "owo-colors 4.2.2", "proptest", "ratatui", "reqwest", @@ -15666,9 +15891,9 @@ dependencies = [ "slog-async", "slog-envlogger", "slog-term", - "supports-color", + "supports-color 3.0.2", "tempfile", - "textwrap", + "textwrap 0.16.2", "tokio", "tokio-util", "toml 0.8.23", @@ -15695,7 +15920,7 @@ dependencies = [ "maplit", "omicron-common", "omicron-workspace-hack", - "owo-colors", + "owo-colors 4.2.2", "oxnet", "schemars 0.8.22", "serde", @@ -16365,7 +16590,7 @@ dependencies = [ "serde", "swrite", "tabled 0.15.0", - "textwrap", + "textwrap 0.16.2", "toml 0.8.23", "usdt 0.5.0", ] diff --git a/Cargo.toml b/Cargo.toml index 630ac028cc4..acc91bb2037 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -370,6 +370,7 @@ assert_matches = "1.5.0" assert_cmd = "2.0.17" async-bb8-diesel = "0.2" async-trait = "0.1.89" +attest-mock = { git = "https://github.com/oxidecomputer/dice-util", rev = "10952e8d9599b735b85d480af3560a11700e5b64" } atomicwrites = "0.4.4" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } @@ -724,7 +725,8 @@ slog-term = "2.9.1" smf = "0.2" socket2 = { version = "0.5", features = ["all"] } sp-sim = { path = "sp-sim" } -sprockets-tls = { git = "https://github.com/oxidecomputer/sprockets.git", rev = "7da1f0b5dcd3d631da18b43ba78a84b1a2b425ee" } +sprockets-tls = { git = "https://github.com/oxidecomputer/sprockets.git", rev = "dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210" } +sprockets-tls-test-utils = { git = "https://github.com/oxidecomputer/sprockets.git", rev = "dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210" } sqlformat = "0.3.5" sqlparser = { version = "0.45.0", features = [ "visitor" ] } static_assertions = "1.1.0" diff --git a/sled-agent/src/bootstrap/config.rs b/sled-agent/src/bootstrap/config.rs index 6833cb76071..3b6b5e3e443 100644 --- a/sled-agent/src/bootstrap/config.rs +++ b/sled-agent/src/bootstrap/config.rs @@ -7,3 +7,4 @@ pub const BOOTSTRAP_AGENT_HTTP_PORT: u16 = 80; pub const BOOTSTRAP_AGENT_RACK_INIT_PORT: u16 = 12346; pub const BOOTSTORE_PORT: u16 = 12347; +pub const TRUST_QUORUM_PORT: u16 = 12349; diff --git a/trust-quorum/Cargo.toml b/trust-quorum/Cargo.toml index 0b0dfefb0fe..7a034f78f8d 100644 --- a/trust-quorum/Cargo.toml +++ b/trust-quorum/Cargo.toml @@ -11,10 +11,13 @@ workspace = true anyhow.workspace = true bcs.workspace = true bootstore.workspace = true +bytes.workspace = true camino.workspace = true chacha20poly1305.workspace = true +ciborium.workspace = true daft.workspace = true derive_more.workspace = true +futures.workspace = true gfss.workspace = true hex.workspace = true hkdf.workspace = true @@ -28,6 +31,7 @@ sha3.workspace = true sled-agent-types.workspace = true slog.workspace = true slog-error-chain.workspace = true +sprockets-tls.workspace = true static_assertions.workspace = true subtle.workspace = true thiserror.workspace = true @@ -38,12 +42,14 @@ omicron-workspace-hack.workspace = true [dev-dependencies] assert_matches.workspace = true +attest-mock.workspace = true dropshot.workspace = true omicron-test-utils.workspace = true proptest.workspace = true serde_json.workspace = true test-strategy.workspace = true trust-quorum-test-utils.workspace = true +sprockets-tls-test-utils.workspace = true [features] # Impl `PartialEq` and `Eq` for types implementing `subtle::ConstantTimeEq` when diff --git a/trust-quorum/src/connection_manager.rs b/trust-quorum/src/connection_manager.rs new file mode 100644 index 00000000000..4d24e365148 --- /dev/null +++ b/trust-quorum/src/connection_manager.rs @@ -0,0 +1,717 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A mechanism for maintaining a full mesh of trust quorum node connections + +use crate::established_conn::EstablishedConn; +use crate::{BaseboardId, PeerMsg}; +// TODO: Move or copy this to this crate? +use bootstore::schemes::v0::NetworkConfig; +use camino::Utf8PathBuf; +use futures::StreamExt; +use futures::stream::FuturesUnordered; +use serde::{Deserialize, Serialize}; +use slog::{Logger, debug, error, info, o, warn}; +use slog_error_chain::SlogInlineError; +use sprockets_tls::keys::SprocketsConfig; +use sprockets_tls::server::SprocketsAcceptor; +use std::collections::{BTreeMap, BTreeSet}; +use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6}; +use std::time::Duration; +use tokio::sync::mpsc; +use tokio::task::JoinHandle; +use tokio::time::{MissedTickBehavior, interval}; + +/// We only expect a handful of concurrent requests at most. +const CHANNEL_BOUND: usize = 10; + +// Time between checks to see if we need to reconnect to to any peers +const RECONNECT_TIME: Duration = Duration::from_secs(5); + +/// An error returned from `ConnMgr::accept` +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum AcceptError { + #[error("Accepted connection from IPv4 address {addr}. Only IPv6 allowed.")] + Ipv4Accept { addr: SocketAddrV4 }, + + #[error("sprockets error")] + Sprockets( + #[from] + #[source] + sprockets_tls::Error, + ), +} + +/// A mechanism for uniquely identifying a task managing a connection +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct TaskId(u64); + +impl TaskId { + pub fn new(id: u64) -> TaskId { + TaskId(id) + } + + /// Increment the ID and then return the value before the increment + pub fn inc(&mut self) -> TaskId { + let id = *self; + self.0 += 1; + id + } +} + +/// Messages sent from the main task to the connection managing tasks +#[derive(Debug, PartialEq)] +pub enum MainToConnMsg { + Close, + #[allow(unused)] + Msg(WireMsg), +} + +/// All possible messages sent over established connections +/// +/// This include trust quorum related `PeerMsg`s, but also ancillary network +/// messages used for other purposes. +/// +/// All `WireMsg`s sent between nodes is prefixed with a 4 byte size header used +/// for framing. +#[derive(Debug, PartialEq, Serialize, Deserialize)] +pub enum WireMsg { + /// Used for connection keep alive + Ping, + /// Trust quorum peer messages + Tq(PeerMsg), + /// Early network configuration to enable NTP timesync + /// + /// Technically this is not part of the trust quorum protocol. However it is + /// necessary to gossip this information to all nodes on the system so that + /// each can establish NTP sync required for the rest of the control plane + /// to boot. In short, we can't have rack unlock without this information, + /// even if we can decrypt the drives. For simplicity, we just piggyback + /// this information on the trust quorum connections. This is why the + /// implementation of LRTQ lived inside the `bootstore` directory in the + /// `omicron` repo. This is technically an eventually consistent database + /// of tiny information layered on top of trust quorum. You can still think + /// of it as a bootstore, although, we no longer use that name. + NetworkConfig(NetworkConfig), +} + +/// Messages sent from connection managing tasks to the main peer task +/// +/// We include `task_id` to differentiate which task they come from so we can +/// exclude requests from tasks that have been cancelled or have been told to +/// shutdown. +#[derive(Debug, PartialEq)] +pub struct ConnToMainMsg { + pub task_id: TaskId, + pub msg: ConnToMainMsgInner, +} + +#[derive(Debug, PartialEq)] +pub enum ConnToMainMsgInner { + Accepted { addr: SocketAddrV6, peer_id: BaseboardId }, + Connected { addr: SocketAddrV6, peer_id: BaseboardId }, + Received { from: BaseboardId, msg: PeerMsg }, + ReceivedNetworkConfig { from: BaseboardId, config: NetworkConfig }, + Disconnected { peer_id: BaseboardId }, +} + +pub struct TaskHandle { + pub task_id: TaskId, + pub tx: mpsc::Sender, + pub conn_type: ConnectionType, +} + +impl TaskHandle { + pub fn addr(&self) -> SocketAddrV6 { + self.conn_type.addr() + } +} + +pub enum ConnectionType { + Connected(SocketAddrV6), + Accepted(SocketAddrV6), +} + +impl ConnectionType { + pub fn addr(&self) -> SocketAddrV6 { + match self { + Self::Connected(addr) => *addr, + Self::Accepted(addr) => *addr, + } + } +} + +#[derive(Debug, Clone)] +pub enum ConnState { + Connecting, + Accepting, + Established(BaseboardId), +} + +/// Information about a single connection task +#[derive(Debug, Clone)] +pub struct ConnInfo { + pub state: ConnState, + pub addr: SocketAddrV6, + pub task_id: TaskId, +} + +/// Status information useful for debugging +#[derive(Debug, Clone)] +pub struct ConnMgrStatus { + pub bootstrap_addrs: BTreeSet, + pub connections: Vec, + pub num_task_join_handles: u64, + pub next_task_id: TaskId, +} + +/// A structure to manage all sprockets connections to peer nodes +/// +/// Each sprockets connection runs in its own task which communicates with the +/// main `NodeTask`. All methods on the `ConnMgr` run inside the main `NodeTask` +/// as `ConnMgr` is a member field of `NodeTask`. This allows isolating the +/// connection management logic from the main node message handling logic +/// without adding yet another task. +pub struct ConnMgr { + log: Logger, + + /// A channel for sending messages from a connection task to the main task + main_tx: mpsc::Sender, + + /// The sprockets config + config: SprocketsConfig, + + /// The sprockets server + server: sprockets_tls::Server, + + /// The address the sprockets server listens on + listen_addr: SocketAddrV6, + + // A unique, monotonically incrementing id for each task to help map tasks + // to their handles in case the task aborts, or there is a new connection + // accepted and established for an existing `BaseboardId`. + next_task_id: TaskId, + + /// `JoinHandle`s to all tasks that can be polled for crashes + join_handles: FuturesUnordered>, + + /// All known addresses on the bootstrap network, learned via DDMD + bootstrap_addrs: BTreeSet, + + /// All tasks currently connecting to remote nodes and attempting a + /// sprockets handshake. + connecting: BTreeMap, + + /// All tasks with an accepted TCP connnection performing a sprockets handshake + accepting: BTreeMap, + + /// All tasks containing established connections that can be used to communicate + /// with other nodes. + established: BTreeMap, +} + +impl ConnMgr { + pub async fn new( + log: &Logger, + mut listen_addr: SocketAddrV6, + sprockets_config: SprocketsConfig, + main_tx: mpsc::Sender, + ) -> ConnMgr { + let log = log.new(o!("component" => "trust-quorum-conn-mgr")); + + let config = sprockets_config.clone(); + let server = sprockets_tls::Server::new( + sprockets_config, + listen_addr, + log.clone(), + ) + .await + .expect("sprockets server can listen"); + + // If the listen port was 0, we want to update our addr to use + // the actual port This is really only useful for testing, but the + // connection manager won't work properly without doing this because it + // will never trigger connections since its own address will always sort + // lower than other addresses if only the ports differ. + let listen_port = server.listen_addr().unwrap().port(); + + if listen_port != listen_addr.port() { + listen_addr.set_port(listen_port); + } + + info!( + log, + "Started listening"; + "local_addr" => %listen_addr + ); + + ConnMgr { + log, + main_tx, + config, + server, + listen_addr, + next_task_id: TaskId::new(0), + join_handles: Default::default(), + bootstrap_addrs: BTreeSet::new(), + connecting: BTreeMap::new(), + accepting: BTreeMap::new(), + established: BTreeMap::new(), + } + } + + pub async fn shutdown(&mut self) { + // Shutdown all connection processing tasks + for (_, handle) in &self.accepting { + let _ = handle.tx.send(MainToConnMsg::Close).await; + } + for (_, handle) in &self.connecting { + let _ = handle.tx.send(MainToConnMsg::Close).await; + } + for (_, handle) in &self.established { + let _ = handle.tx.send(MainToConnMsg::Close).await; + } + } + + pub fn status(&self) -> ConnMgrStatus { + let connections = self + .connecting + .iter() + .map(|(addr, task_handle)| ConnInfo { + state: ConnState::Connecting, + addr: *addr, + task_id: task_handle.task_id, + }) + .chain(self.accepting.iter().map(|(addr, task_handle)| ConnInfo { + state: ConnState::Accepting, + addr: *addr, + task_id: task_handle.task_id, + })) + .chain(self.established.iter().map( + |(baseboard_id, task_handle)| ConnInfo { + state: ConnState::Established(baseboard_id.clone()), + addr: task_handle.addr(), + task_id: task_handle.task_id, + }, + )) + .collect(); + + ConnMgrStatus { + bootstrap_addrs: self.bootstrap_addrs.clone(), + connections, + num_task_join_handles: self.join_handles.len() as u64, + next_task_id: self.next_task_id, + } + } + + pub fn listen_addr(&self) -> SocketAddrV6 { + self.listen_addr + } + + /// Perform any polling related operations that the connection + /// manager must perform concurrently. + pub async fn step( + &mut self, + corpus: Vec, + ) -> Result<(), AcceptError> { + let mut interval = interval(RECONNECT_TIME); + interval.set_missed_tick_behavior(MissedTickBehavior::Delay); + + loop { + tokio::select! { + acceptor = self.server.accept(corpus.clone()) => { + self.accept(acceptor?).await?; + } + Some(res) = self.join_handles.next() => { + match res { + Ok(task_id) => { + self.on_task_exit(task_id).await; + } + Err(err) => { + error!(self.log, "Connection task panic: {}", err); + } + + } + } + _ = interval.tick() => { + self.reconnect(corpus.clone()).await; + } + } + } + } + + pub async fn accept( + &mut self, + acceptor: SprocketsAcceptor, + ) -> Result<(), AcceptError> { + let addr = match acceptor.addr() { + SocketAddr::V4(addr) => { + return Err(AcceptError::Ipv4Accept { addr }); + } + SocketAddr::V6(addr) => addr, + }; + let log = self.log.clone(); + let task_id = self.next_task_id.inc(); + let (tx, rx) = mpsc::channel(CHANNEL_BOUND); + let task_handle = TaskHandle { + task_id, + tx, + conn_type: ConnectionType::Accepted(addr), + }; + let main_tx = self.main_tx.clone(); + let join_handle = tokio::spawn(async move { + match acceptor.handshake().await { + Ok((stream, _)) => { + let platform_id = + stream.peer_platform_id().as_str().unwrap(); + let baseboard_id = platform_id_to_baseboard_id(platform_id); + + // TODO: Conversion between `PlatformId` and `BaseboardId` should + // happen in `sled-agent-types`. This is waiting on an update + // to the `dice-mfg-msgs` crate. + let log = + log.new(o!("peer_id" => baseboard_id.to_string())); + info!(log, "Accepted sprockets connection"; "addr" => %addr); + + let mut conn = EstablishedConn::new( + baseboard_id.clone(), + task_id, + stream, + main_tx.clone(), + rx, + &log, + ); + + // Inform the main task that accepted connection is established + if let Err(e) = main_tx + .send(ConnToMainMsg { + task_id: task_id, + msg: ConnToMainMsgInner::Accepted { + addr, + peer_id: baseboard_id, + }, + }) + .await + { + // The system is shutting down + // Just bail from this task + warn!( + log, + "Failed to send 'accepted' msg to main task: {e:?}" + ); + } else { + conn.run().await; + } + } + Err(err) => { + error!(log, "Failed to accept a connection"; &err); + } + } + task_id + }); + self.join_handles.push(join_handle); + self.accepting.insert(addr, task_handle); + Ok(()) + } + + pub async fn server_handshake_completed( + &mut self, + task_id: TaskId, + addr: SocketAddrV6, + peer_id: BaseboardId, + ) { + if let Some(task_handle) = self.accepting.remove(&addr) { + info!( + self.log, + "Established server connection"; + "task_id" => ?task_id, + "remote_addr" => %addr, + "remote_peer_id" => peer_id.to_string() + ); + let already_established = + self.established.insert(peer_id, task_handle); + assert!(already_established.is_none()); + } + } + + pub async fn client_handshake_completed( + &mut self, + task_id: TaskId, + addr: SocketAddrV6, + peer_id: BaseboardId, + ) { + if let Some(task_handle) = self.connecting.remove(&addr) { + info!( + self.log, + "Established client connection"; + "task_id" => ?task_id, + "remote_addr" => %addr, + "remote_peer_id" => peer_id.to_string() + ); + let already_established = + self.established.insert(peer_id, task_handle); + + assert!(already_established.is_none()); + } + } + + /// The established connection task has asynchronously exited. + pub async fn on_disconnected( + &mut self, + task_id: TaskId, + peer_id: BaseboardId, + ) { + if let Some(task_handle) = self.established.get(&peer_id) { + if task_handle.task_id != task_id { + // This was a stale disconnect + return; + } + } + warn!(self.log, "peer disconnected"; "peer_id" => %peer_id); + let _ = self.established.remove(&peer_id); + } + + /// Initiate connections if a corresponding task doesn't already exist. This + /// must be called periodically to handle transient disconnections which + /// cause tasks to exit. + pub async fn reconnect(&mut self, corpus: Vec) { + debug!(self.log, "Reconnect called"); + let mut to_connect = vec![]; + for addr in + self.bootstrap_addrs.iter().filter(|&&addr| self.listen_addr > addr) + { + if self.connecting.contains_key(addr) { + continue; + } + + if self + .established + .values() + .any(|task_handle| task_handle.addr() == *addr) + { + continue; + } + + to_connect.push(addr.clone()); + } + + for addr in to_connect { + // We don't have an existing connection + self.connect_client(corpus.clone(), addr).await + } + } + + /// The set of known addresses on the bootstrap network has changed + /// + /// We need to connect to peers with addresses less than our own + /// and tear down any connections that no longer exist in `addrs`. + pub async fn update_bootstrap_connections( + &mut self, + addrs: BTreeSet, + corpus: Vec, + ) { + if self.bootstrap_addrs == addrs { + return; + } + + // We don't try to compare addresses from accepted nodes. If DDMD + // loses an accepting address we assume that the connection will go + // away soon, if it hasn't already. We can't compare without an extra + // handshake message to identify the listen address of the remote + // connection because clients use ephemeral ports. We always compare + // on the full `SocketAddrV6` which includes the port, which helps when + // testing on localhost. + let to_connect: BTreeSet<_> = addrs + .difference(&self.bootstrap_addrs) + .filter(|&&addr| self.listen_addr > addr) + .cloned() + .collect(); + let to_disconnect: BTreeSet<_> = self + .bootstrap_addrs + .difference(&addrs) + .filter(|&&addr| self.listen_addr > addr) + .cloned() + .collect(); + + self.bootstrap_addrs = addrs; + + for addr in to_connect { + self.connect_client(corpus.clone(), addr).await; + } + + for addr in to_disconnect { + self.disconnect_client(addr).await; + } + } + + /// Spawn a task to estalbish a sprockets connection for the given address + async fn connect_client( + &mut self, + corpus: Vec, + addr: SocketAddrV6, + ) { + let task_id = self.next_task_id.inc(); + let (tx, rx) = mpsc::channel(CHANNEL_BOUND); + let task_handle = TaskHandle { + task_id, + tx, + conn_type: ConnectionType::Connected(addr), + }; + info!(self.log, "Initiating connection to new peer: {addr}"); + let main_tx = self.main_tx.clone(); + let log = self.log.clone(); + let config = self.config.clone(); + let join_handle = tokio::spawn(async move { + match sprockets_tls::Client::connect( + config, + addr, + corpus.clone(), + log.clone(), + ) + .await + { + Ok(stream) => { + let platform_id = + stream.peer_platform_id().as_str().unwrap(); + let baseboard_id = platform_id_to_baseboard_id(platform_id); + + // TODO: Conversion between `PlatformId` and `BaseboardId` should + // happen in `sled-agent-types`. This is waiting on an update + // to the `dice-mfg-msgs` crate. + let log = + log.new(o!("peer_id" => baseboard_id.to_string())); + info!(log, "Sprockets connection established"; "addr" => %addr); + + let mut conn = EstablishedConn::new( + baseboard_id.clone(), + task_id, + stream, + main_tx.clone(), + rx, + &log, + ); + // Inform the main task that the client connection is + // established. + if let Err(e) = main_tx + .send(ConnToMainMsg { + task_id: task_id, + msg: ConnToMainMsgInner::Connected { + addr, + peer_id: baseboard_id, + }, + }) + .await + { + // The system is shutting down + // Just bail from this task + error!( + log, + "Failed to send 'connected' msg to main task: {e:?}" + ); + } else { + conn.run().await; + } + } + Err(err) => { + warn!(log, "Failed to connect"; &err); + } + } + task_id + }); + self.join_handles.push(join_handle); + self.connecting.insert(addr, task_handle); + } + + /// Remove any information about a sprockets client connection and inform + /// the corresponding task to stop. + /// + /// We don't tear down server connections this way as we don't know their + /// listen port, just the ephemeral port. + async fn disconnect_client(&mut self, addr: SocketAddrV6) { + if let Some(handle) = self.connecting.remove(&addr) { + // The connection has not yet completed its handshake + info!( + self.log, + "Deleting initiating connection"; + "remote_addr" => addr.to_string() + ); + let _ = handle.tx.send(MainToConnMsg::Close).await; + } else { + if let Some((id, handle)) = self + .established + .iter() + .find(|(_, handle)| handle.addr() == addr) + { + info!( + self.log, + "Deleting established connection"; + "remote_addr" => addr.to_string(), + "remote_peer_id" => id.to_string(), + ); + let _ = handle.tx.send(MainToConnMsg::Close).await; + // probably a better way to avoid borrowck issues + let id = id.clone(); + self.established.remove(&id); + } + } + } + + /// Remove any references to the given task + async fn on_task_exit(&mut self, task_id: TaskId) { + // We're most likely to find the task as established so we start with that + if let Some((id, handle)) = self + .established + .iter() + .find(|(_, handle)| handle.task_id == task_id) + { + info!( + self.log, + "Established connection task exited"; + "task_id" => ?task_id, + "remote_addr" => handle.addr().to_string(), + "remote_peer_id" => id.to_string(), + ); + // probably a better way to avoid borrowck issues + let id = id.clone(); + self.established.remove(&id); + } else if let Some((addr, handle)) = + self.accepting.iter().find(|(_, handle)| handle.task_id == task_id) + { + info!( + self.log, + "Accepting task exited"; + "task_id" => ?task_id, + "remote_addr" => handle.addr().to_string(), + ); + let addr = *addr; + self.accepting.remove(&addr); + } else if let Some((addr, handle)) = + self.connecting.iter().find(|(_, handle)| handle.task_id == task_id) + { + info!( + self.log, + "Connecting task exited"; + "task_id" => ?task_id, + "remote_addr" => handle.addr().to_string(), + ); + let addr = *addr; + self.connecting.remove(&addr); + } else { + info!( + self.log, + "Task exited. No cleanup required."; + "task_id" => ?task_id + ); + } + } +} + +// TODO: Eventually this will go away, once we pull in and use the latest +// `dice-util` code. +pub fn platform_id_to_baseboard_id(platform_id: &str) -> BaseboardId { + let mut platform_id_iter = platform_id.split(":"); + let part_number = platform_id_iter.nth(1).unwrap().to_string(); + let serial_number = platform_id_iter.skip(1).next().unwrap().to_string(); + BaseboardId { part_number, serial_number } +} diff --git a/trust-quorum/src/established_conn.rs b/trust-quorum/src/established_conn.rs new file mode 100644 index 00000000000..01a04bd9e76 --- /dev/null +++ b/trust-quorum/src/established_conn.rs @@ -0,0 +1,343 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! An individual sprockets connection running in its own task + +use crate::{ + BaseboardId, ConnToMainMsg, ConnToMainMsgInner, MainToConnMsg, TaskId, + WireMsg, +}; +use bytes::Buf; +use serde::Serialize; +use slog::{Logger, debug, error, o, warn}; +use slog_error_chain::SlogInlineError; +use std::collections::VecDeque; +use std::io::Cursor; +use std::time::Duration; +use tokio::io::{AsyncReadExt, AsyncWriteExt, ReadHalf, WriteHalf, split}; +use tokio::net::TcpStream; +use tokio::sync::mpsc; +use tokio::time::{Instant, MissedTickBehavior, interval}; + +/// Max buffer size of a connection +const CONN_BUF_SIZE: usize = 1024 * 1024; + +/// Each message starts with a 4 bytes size header +const FRAME_HEADER_SIZE: usize = 4; + +/// The number of serialized messages to queue for writing before closing the socket. +/// This means the remote side is very slow. +/// +/// TODO: Alternatively we could drop the oldest message. +const MSG_WRITE_QUEUE_CAPACITY: usize = 5; + +// Timing parameters for keeping the connection healthy +const PING_INTERVAL: Duration = Duration::from_secs(1); + +/// The time limit for not receiving a complete message from a peer. +/// The connection is shutdown after this time. +const INACTIVITY_TIMEOUT: Duration = Duration::from_secs(10); + +/// An error from within an `EstablishedConn` that triggers connection close +/// +/// Also a great movie +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum ConnErr { + #[error("Main task insructed this connection to close")] + Close, + #[error("Failed to write")] + FailedWrite(#[source] std::io::Error), + #[error("Failed to read")] + FailedRead(#[source] std::io::Error), + #[error("Failed to deserialize wire message")] + DeserializeWireMsg(#[from] ciborium::de::Error), + #[error("Failed to serialize wire message")] + SerializeWireMsg(#[from] ciborium::ser::Error), + #[error("Write queue filled with serialized messages")] + WriteQueueFull, + #[error("Inactivity timeout")] + InactivityTimeout, +} + +/// Container for code running in its own task per sprockets connection +pub struct EstablishedConn { + peer_id: BaseboardId, + task_id: TaskId, + reader: ReadHalf>, + writer: WriteHalf>, + main_tx: mpsc::Sender, + rx: mpsc::Receiver, + log: Logger, + + // Buffer we read raw data into from a sprockets connection + read_buf: Box<[u8]>, + + // The amount of data currently in `read_buf` + total_read: usize, + + // Used for managing inactivity timeouts for the connection + last_received_msg: Instant, + + // Keep a queue to write serialized messages into. We limit the queue + // size, and if it gets exceeded it means the peer at the other + // end isn't pulling data out fast enough. This should be basically + // impossible to hit given the size and rate of message exchange + // between peers. We go ahead and close the connection if the queue + // fills. + write_queue: VecDeque>, + + // The current serialized message being written if there is one + current_write: Cursor>, +} + +impl EstablishedConn { + pub fn new( + peer_id: BaseboardId, + task_id: TaskId, + stream: sprockets_tls::Stream, + main_tx: mpsc::Sender, + rx: mpsc::Receiver, + log: &Logger, + ) -> EstablishedConn { + let log = log.new(o!("component" => "trust-quorum-established-conn")); + let (reader, writer) = split(stream); + EstablishedConn { + peer_id, + task_id, + reader, + writer, + main_tx, + rx, + log, + read_buf: vec![0u8; CONN_BUF_SIZE].into_boxed_slice(), + total_read: 0, + last_received_msg: Instant::now(), + write_queue: VecDeque::with_capacity(MSG_WRITE_QUEUE_CAPACITY), + current_write: Cursor::new(Vec::new()), + } + } + + pub async fn run(&mut self) { + let mut interval = interval(PING_INTERVAL); + interval.set_missed_tick_behavior(MissedTickBehavior::Delay); + + // This is the main loop of the connection + // + // Continuously process messages until the connection closes + loop { + if !self.current_write.has_remaining() + && !self.write_queue.is_empty() + { + self.current_write = + Cursor::new(self.write_queue.pop_front().unwrap()); + } + + let res = tokio::select! { + _ = interval.tick() => { + self.ping().await + } + Some(msg) = self.rx.recv() => { + self.on_msg_from_main(msg).await + } + res = self.reader.read(&mut self.read_buf[self.total_read..]) => { + self.on_read(res).await + } + res = self.writer.write_buf(&mut self.current_write), + if self.current_write.has_remaining() => + { + self.check_write_result(res).await + } + }; + + if let Err(err) = res { + warn!(self.log, "Closing connection"; &err); + self.close().await; + return; + } + } + } + + async fn close(&mut self) { + if let Err(e) = self + .main_tx + .send(ConnToMainMsg { + task_id: self.task_id, + msg: ConnToMainMsgInner::Disconnected { + peer_id: self.peer_id.clone(), + }, + }) + .await + { + warn!(self.log, "Failed to send to main task: {e:?}"); + } + // TODO: This causes a deadlock and breaks the test. + // + // I'm unclear why, although I plan to dig a bit further in the future. + // It should be noted that the writer and reader share a std::mutex + // under the hood and that could be causing issues. Regardless, it + // is not actually critical to issue a shutdown as detection will + // be discovered via missing ping messages at the other end of the + // connection. + // + // let _ = self.writer.shutdown().await; + } + + async fn on_read( + &mut self, + res: Result, + ) -> Result<(), ConnErr> { + match res { + Ok(n) => { + self.total_read += n; + } + Err(e) => { + return Err(ConnErr::FailedRead(e)); + } + } + + // We may have more than one message that has been read + loop { + if self.total_read < FRAME_HEADER_SIZE { + return Ok(()); + } + // Read frame size + let size = read_frame_size( + self.read_buf[..FRAME_HEADER_SIZE].try_into().unwrap(), + ); + let end = size + FRAME_HEADER_SIZE; + + // If we haven't read the whole message yet, then return + if end > self.total_read { + return Ok(()); + } + let msg: WireMsg = + ciborium::from_reader(&self.read_buf[FRAME_HEADER_SIZE..end])?; + // Move any remaining bytes to the beginning of the buffer. + self.read_buf.copy_within(end..self.total_read, 0); + self.total_read = self.total_read - end; + + self.last_received_msg = Instant::now(); + debug!(self.log, "Received {msg:?}"); + match msg { + WireMsg::Tq(msg) => { + if let Err(e) = self + .main_tx + .send(ConnToMainMsg { + task_id: self.task_id, + msg: ConnToMainMsgInner::Received { + from: self.peer_id.clone(), + msg, + }, + }) + .await + { + warn!( + self.log, + "Failed to send received fsm msg to main task: {e:?}" + ); + } + } + WireMsg::Ping => { + // Nothing to do here, since Ping is just to keep us alive and + // we updated self.last_received_msg above. + } + WireMsg::NetworkConfig(config) => { + let generation = config.generation; + if let Err(e) = self + .main_tx + .send(ConnToMainMsg { + task_id: self.task_id, + msg: ConnToMainMsgInner::ReceivedNetworkConfig { + from: self.peer_id.clone(), + config, + }, + }) + .await + { + warn!( + self.log, + "Failed to send received NetworkConfig with + generation {generation} to main task: {e:?}" + ); + } + } + } + } + } + + async fn check_write_result( + &mut self, + res: Result, + ) -> Result<(), ConnErr> { + match res { + Ok(_) => { + if !self.current_write.has_remaining() { + self.current_write = Cursor::new(Vec::new()); + } + Ok(()) + } + Err(e) => { + let _ = self.writer.shutdown().await; + Err(ConnErr::FailedWrite(e)) + } + } + } + + async fn on_msg_from_main( + &mut self, + msg: MainToConnMsg, + ) -> Result<(), ConnErr> { + match msg { + MainToConnMsg::Close => { + return Err(ConnErr::Close); + } + MainToConnMsg::Msg(msg) => self.write_framed_to_queue(msg).await, + } + } + + async fn write_framed_to_queue( + &mut self, + msg: WireMsg, + ) -> Result<(), ConnErr> { + if self.write_queue.len() == MSG_WRITE_QUEUE_CAPACITY { + return Err(ConnErr::WriteQueueFull); + } else { + let msg = write_framed(&msg)?; + self.write_queue.push_back(msg); + Ok(()) + } + } + + async fn ping(&mut self) -> Result<(), ConnErr> { + if Instant::now() - self.last_received_msg > INACTIVITY_TIMEOUT { + return Err(ConnErr::InactivityTimeout); + } + self.write_framed_to_queue(WireMsg::Ping).await + } +} + +// Decode the 4-byte big-endian frame size header +fn read_frame_size(buf: [u8; FRAME_HEADER_SIZE]) -> usize { + u32::from_be_bytes(buf) as usize +} + +/// Serialize and write `msg` into `buf`, prefixed by a 4-byte big-endian size +/// header +/// +/// Return the total amount of data written into `buf` including the 4-byte +/// header. +fn write_framed( + msg: &T, +) -> Result, ciborium::ser::Error> { + let mut cursor = Cursor::new(vec![]); + // Write a size placeholder + std::io::Write::write(&mut cursor, &[0u8; FRAME_HEADER_SIZE])?; + cursor.set_position(FRAME_HEADER_SIZE as u64); + ciborium::into_writer(msg, &mut cursor)?; + let size: u32 = + (cursor.position() - FRAME_HEADER_SIZE as u64).try_into().unwrap(); + let mut buf = cursor.into_inner(); + buf[0..FRAME_HEADER_SIZE].copy_from_slice(&size.to_be_bytes()); + Ok(buf) +} diff --git a/trust-quorum/src/lib.rs b/trust-quorum/src/lib.rs index a389022af0f..b4b18736163 100644 --- a/trust-quorum/src/lib.rs +++ b/trust-quorum/src/lib.rs @@ -21,6 +21,7 @@ mod compute_key_share; mod configuration; mod coordinator_state; pub(crate) mod crypto; +pub(crate) mod established_conn; mod messages; mod node; mod node_ctx; @@ -38,6 +39,13 @@ pub use validators::{ ValidatedLrtqUpgradeMsgDiff, ValidatedReconfigureMsgDiff, }; mod alarm; +mod connection_manager; +mod task; + +pub(crate) use connection_manager::{ + ConnToMainMsg, ConnToMainMsgInner, MainToConnMsg, TaskId, WireMsg, +}; +pub use task::NodeTask; pub use alarm::Alarm; pub use crypto::RackSecret; diff --git a/trust-quorum/src/task.rs b/trust-quorum/src/task.rs new file mode 100644 index 00000000000..26d92c3783e --- /dev/null +++ b/trust-quorum/src/task.rs @@ -0,0 +1,469 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A runnable async trust quorum node that wraps the sans-io [`crate::Node`] + +use crate::connection_manager::{ + ConnMgr, ConnMgrStatus, ConnToMainMsg, ConnToMainMsgInner, +}; +use crate::{BaseboardId, Node, NodeCtx}; +use slog::{Logger, debug, error, info, o}; +use sprockets_tls::keys::SprocketsConfig; +use std::collections::BTreeSet; +use std::net::SocketAddrV6; +use thiserror::Error; +use tokio::sync::mpsc::error::SendError; +use tokio::sync::oneshot::error::RecvError; +use tokio::sync::{mpsc, oneshot}; + +#[derive(Debug, Clone)] +pub struct Config { + pub baseboard_id: BaseboardId, + pub listen_addr: SocketAddrV6, + // pub tq_state_ledger_paths: Vec, + // pub network_config_ledger_paths: Vec, + pub sprockets: SprocketsConfig, +} + +/// A request sent to the `NodeTask` from the `NodeTaskHandle` +pub enum NodeApiRequest { + /// Inform the `Node` of currently known IP addresses on the bootstrap network + /// + /// These are generated from DDM prefixes learned by the bootstrap agent. + BootstrapAddresses(BTreeSet), + + /// Retrieve connectivity status via the `ConnMgr` + ConnMgrStatus { responder: oneshot::Sender }, + + /// Shutdown the node's tokio tasks + Shutdown, +} + +/// An error response from a `NodeApiRequest` +#[derive(Error, Debug, PartialEq)] +pub enum NodeApiError { + #[error("Failed to send request to node task")] + Send, + #[error("Failed to receive response from node task")] + Recv, +} + +impl From> for NodeApiError { + fn from(_: SendError) -> Self { + NodeApiError::Send + } +} + +impl From for NodeApiError { + fn from(_: RecvError) -> Self { + NodeApiError::Recv + } +} + +#[derive(Debug, Clone)] +pub struct NodeTaskHandle { + baseboard_id: BaseboardId, + tx: mpsc::Sender, + listen_addr: SocketAddrV6, +} + +impl NodeTaskHandle { + /// Return the actual port being listened on + /// + /// This is useful when the port passed in was `0`. + pub fn listen_addr(&self) -> SocketAddrV6 { + self.listen_addr + } + + pub fn baseboard_id(&self) -> &BaseboardId { + &self.baseboard_id + } + + /// Inform the node of currently known IP addresses on the bootstrap network + /// + /// These are generated from DDM prefixes learned by the bootstrap agent. + pub async fn load_peer_addresses( + &self, + addrs: BTreeSet, + ) -> Result<(), NodeApiError> { + self.tx.send(NodeApiRequest::BootstrapAddresses(addrs)).await?; + Ok(()) + } + + pub async fn conn_mgr_status(&self) -> Result { + let (tx, rx) = oneshot::channel(); + self.tx.send(NodeApiRequest::ConnMgrStatus { responder: tx }).await?; + let res = rx.await?; + Ok(res) + } + + pub async fn shutdown(&self) -> Result<(), NodeApiError> { + self.tx.send(NodeApiRequest::Shutdown).await?; + Ok(()) + } +} + +pub struct NodeTask { + shutdown: bool, + log: Logger, + #[allow(unused)] + config: Config, + #[allow(unused)] + node: Node, + #[allow(unused)] + ctx: NodeCtx, + conn_mgr: ConnMgr, + conn_mgr_rx: mpsc::Receiver, + + // Handle requests received from `PeerHandle` + rx: mpsc::Receiver, +} + +impl NodeTask { + pub async fn new( + config: Config, + log: &Logger, + ) -> (NodeTask, NodeTaskHandle) { + let log = log.new(o!( + "component" => "trust-quorum", + "baseboard_id" => config.baseboard_id.to_string() + )); + // We only expect one outstanding request at a time for `Init_` or + // `LoadRackSecret` requests, We can have one of those requests in + // flight while allowing `PeerAddresses` updates. We also allow status + // requests in parallel. Just leave some room. + let (tx, rx) = mpsc::channel(10); + + let (conn_mgr_tx, conn_mgr_rx) = mpsc::channel(100); + + let baseboard_id = config.baseboard_id.clone(); + + // TODO: Load persistent state from ledger + let mut ctx = NodeCtx::new(config.baseboard_id.clone()); + let node = Node::new(&log, &mut ctx); + let conn_mgr = ConnMgr::new( + &log, + config.listen_addr, + config.sprockets.clone(), + conn_mgr_tx, + ) + .await; + let listen_addr = conn_mgr.listen_addr(); + ( + NodeTask { + shutdown: false, + log, + config, + node, + ctx, + conn_mgr, + conn_mgr_rx, + rx, + }, + NodeTaskHandle { baseboard_id, tx, listen_addr }, + ) + } + + /// Run the main loop of the node + /// + /// This should be spawned into its own tokio task + pub async fn run(&mut self) { + while !self.shutdown { + // TODO: Real corpus + let corpus = vec![]; + tokio::select! { + Some(request) = self.rx.recv() => { + self.on_api_request(request).await; + } + res = self.conn_mgr.step(corpus.clone()) => { + if let Err(err) = res { + error!(self.log, "Failed to accept connection"; &err); + continue; + } + } + Some(msg) = self.conn_mgr_rx.recv() => { + self.on_conn_msg(msg).await + } + + } + } + } + + // Handle messages from connection management tasks + async fn on_conn_msg(&mut self, msg: ConnToMainMsg) { + let task_id = msg.task_id; + match msg.msg { + ConnToMainMsgInner::Accepted { addr, peer_id } => { + self.conn_mgr + .server_handshake_completed(task_id, addr, peer_id) + .await; + } + ConnToMainMsgInner::Connected { addr, peer_id } => { + self.conn_mgr + .client_handshake_completed(task_id, addr, peer_id) + .await; + } + ConnToMainMsgInner::Disconnected { peer_id } => { + self.conn_mgr.on_disconnected(task_id, peer_id).await; + } + ConnToMainMsgInner::Received { from: _, msg: _ } => { + todo!(); + } + ConnToMainMsgInner::ReceivedNetworkConfig { + from: _, + config: _, + } => { + todo!(); + } + } + } + + async fn on_api_request(&mut self, request: NodeApiRequest) { + match request { + NodeApiRequest::BootstrapAddresses(addrs) => { + info!(self.log, "Updated Peer Addresses: {addrs:?}"); + // TODO: real corpus + let corpus = vec![]; + self.conn_mgr.update_bootstrap_connections(addrs, corpus).await; + } + NodeApiRequest::ConnMgrStatus { responder } => { + debug!(self.log, "Received Request for ConnMgrStatus"); + let _ = responder.send(self.conn_mgr.status()); + } + NodeApiRequest::Shutdown => { + info!(self.log, "Shutting down Node tokio tasks"); + self.shutdown = true; + self.conn_mgr.shutdown().await; + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::TaskId; + use crate::connection_manager::{ConnState, platform_id_to_baseboard_id}; + use camino::Utf8PathBuf; + use dropshot::test_util::log_prefix_for_test; + use omicron_test_utils::dev::poll::{CondCheckError, wait_for_condition}; + use omicron_test_utils::dev::test_setup_log; + use sprockets_tls::keys::ResolveSetting; + use sprockets_tls_test_utils::{ + alias_prefix, cert_path, certlist_path, private_key_path, root_prefix, + sprockets_auth_prefix, + }; + use std::time::Duration; + + fn pki_doc_to_node_configs(dir: Utf8PathBuf, n: usize) -> Vec { + (1..=n) + .map(|i| { + let baseboard_id = platform_id_to_baseboard_id( + &sprockets_tls_test_utils::platform_id(i), + ); + let listen_addr = + SocketAddrV6::new(std::net::Ipv6Addr::LOCALHOST, 0, 0, 0); + let sprockets_auth_key_name = sprockets_auth_prefix(i); + let alias_key_name = alias_prefix(i); + let sprockets = SprocketsConfig { + resolve: ResolveSetting::Local { + priv_key: private_key_path( + dir.clone(), + &sprockets_auth_key_name, + ), + cert_chain: certlist_path( + dir.clone(), + &sprockets_auth_key_name, + ), + }, + attest: sprockets_tls::keys::AttestConfig::Local { + priv_key: private_key_path( + dir.clone(), + &alias_key_name, + ), + cert_chain: certlist_path(dir.clone(), &alias_key_name), + // TODO: We need attest-mock to generate a real log + log: dir.join("log.bin"), + }, + roots: vec![cert_path(dir.clone(), &root_prefix())], + }; + Config { baseboard_id, listen_addr, sprockets } + }) + .collect() + } + + /// Test that all nodes can connect to each other when given each the full + /// set of "bootstrap addresses". + #[tokio::test] + async fn full_mesh_connectivity() { + let logctx = test_setup_log("full_mesh_connectivity"); + let (dir, _) = log_prefix_for_test("full_mesh_connectivity"); + println!("Writing keys and certs to {dir}"); + let num_nodes = 4; + + let file_behavior = + sprockets_tls_test_utils::OutputFileExistsBehavior::Overwrite; + + // Create `num_nodes` nodes worth of keys and certs + let doc = sprockets_tls_test_utils::generate_config(num_nodes); + doc.write_key_pairs(dir.clone(), file_behavior).unwrap(); + doc.write_certificates(dir.clone(), file_behavior).unwrap(); + doc.write_certificate_lists(dir.clone(), file_behavior).unwrap(); + + // This is just a made up digest. We aren't currently using a corpus, so it + // doesn't matter what the measurements are, just that there is at least + // one in a file named "log.bin". + let digest = + "be4df4e085175f3de0c8ac4837e1c2c9a34e8983209dac6b549e94154f7cdd9c" + .into(); + let attest_log_doc = attest_mock::log::Document { + measurements: vec![attest_mock::log::Measurement { + algorithm: "sha3-256".into(), + digest, + }], + }; + // Write out the log document to the filesystem + let out = attest_mock::log::mock(attest_log_doc).unwrap(); + std::fs::write(dir.join("log.bin"), &out).unwrap(); + + let configs = pki_doc_to_node_configs(dir, num_nodes); + + let mut node_handles = vec![]; + let mut join_handles = vec![]; + for config in configs.clone() { + let (mut task, handle) = NodeTask::new(config, &logctx.log).await; + node_handles.push(handle); + join_handles.push(tokio::spawn(async move { task.run().await })); + } + + let listen_addrs: BTreeSet<_> = + node_handles.iter().map(|h| h.listen_addr()).collect(); + + for h in &node_handles { + h.load_peer_addresses(listen_addrs.clone()).await.unwrap(); + } + + let poll_interval = Duration::from_millis(1); + let poll_max = Duration::from_secs(10); + + // Wait for all nodes have `num_nodes - 1` established connections + wait_for_condition( + async || { + let mut count = 0; + for h in &node_handles { + let status = h.conn_mgr_status().await.unwrap(); + if status + .connections + .iter() + .all(|c| matches!(c.state, ConnState::Established(_))) + && status.connections.len() == num_nodes - 1 + && status.next_task_id == TaskId::new(3) + { + count += 1; + } + } + if count == num_nodes { + Ok(()) + } else { + Err(CondCheckError::<()>::NotYet) + } + }, + &poll_interval, + &poll_max, + ) + .await + .unwrap(); + + // Killing a single node should cause all other nodes to start + // reconnecting. This should cause the task id counter to start + // incrementing at all nodes and for their to be one fewer established + // connection. + let h = node_handles.pop().unwrap(); + h.shutdown().await.unwrap(); + let _ = join_handles.pop().unwrap(); + let stopped_addr = h.listen_addr; + + let poll_interval = Duration::from_millis(50); + wait_for_condition( + async || { + let mut valid = 0; + for h in &node_handles { + let status = h.conn_mgr_status().await.unwrap(); + let established_count = status + .connections + .iter() + .filter(|c| { + matches!(c.state, ConnState::Established(_)) + }) + .count(); + + // Nodes only connect to other nodes if their listening + // address sorts greater. The only node where a reconnect will be attempted + // is the stopped node. + let should_be_connecting = h.listen_addr > stopped_addr; + let valid_task_id = if should_be_connecting { + status.next_task_id > TaskId::new(3) + } else { + true + }; + if established_count == num_nodes - 2 && valid_task_id { + valid += 1; + } + } + if valid == num_nodes - 1 { + Ok(()) + } else { + Err(CondCheckError::<()>::NotYet) + } + }, + &poll_interval, + &poll_max, + ) + .await + .unwrap(); + + // Now let's bring back up the old node and ensure full connectivity again + let (mut task, handle) = + NodeTask::new(configs.last().unwrap().clone(), &logctx.log).await; + node_handles.push(handle.clone()); + join_handles.push(tokio::spawn(async move { task.run().await })); + + // The port likely changed, so we must refresh everyone's set of addresses + let listen_addrs: BTreeSet<_> = + node_handles.iter().map(|h| h.listen_addr()).collect(); + + for h in &node_handles { + h.load_peer_addresses(listen_addrs.clone()).await.unwrap(); + } + + // Wait for all nodes have `num_nodes - 1` established connections + wait_for_condition( + async || { + let mut count = 0; + for h in &node_handles { + let status = h.conn_mgr_status().await.unwrap(); + if status + .connections + .iter() + .all(|c| matches!(c.state, ConnState::Established(_))) + && status.connections.len() == num_nodes - 1 + { + count += 1; + } + } + if count == num_nodes { + Ok(()) + } else { + Err(CondCheckError::<()>::NotYet) + } + }, + &poll_interval, + &poll_max, + ) + .await + .unwrap(); + + logctx.cleanup_successful(); + } +} From 7d20b2d332c51cfba0149866c0986f04729d2169 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Sat, 18 Oct 2025 23:23:53 +0000 Subject: [PATCH 02/14] hakari --- Cargo.lock | 1 + workspace-hack/Cargo.toml | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 534b403f3a4..c63c83ba8b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8954,6 +8954,7 @@ dependencies = [ "log", "managed", "memchr", + "miniz_oxide", "mio", "newtype-uuid", "nix 0.29.0", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 6f64f8c40bb..096d5d26dcd 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -64,7 +64,7 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor getrandom-6f8ce4dd05d13bba = { package = "getrandom", version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.16.0", default-features = false, features = ["allocator-api2", "inline-more"] } -heck = { version = "0.4.1" } +heck = { version = "0.4.1", features = ["unicode"] } hickory-proto = { version = "0.25.2", features = ["serde", "text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "1.7.0", features = ["full"] } @@ -109,7 +109,7 @@ regex = { version = "1.11.3" } regex-automata = { version = "0.4.11", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "std", "unicode"] } regex-syntax = { version = "0.8.5" } reqwest = { version = "0.12.22", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] } -rsa = { version = "0.9.6", features = ["serde", "sha2"] } +rsa = { version = "0.9.8", features = ["serde", "sha2"] } rustc-hash = { version = "2.1.1" } rustls = { version = "0.23.19", features = ["ring"] } rustls-webpki = { version = "0.102.8", default-features = false, features = ["aws_lc_rs", "ring", "std"] } @@ -118,7 +118,7 @@ scopeguard = { version = "1.2.0" } semver = { version = "1.0.27", features = ["serde"] } serde = { version = "1.0.226", features = ["alloc", "derive", "rc"] } serde_core = { version = "1.0.226", features = ["alloc", "rc"] } -serde_json = { version = "1.0.145", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.145", features = ["alloc", "raw_value", "unbounded_depth"] } serde_with = { version = "3.14.0" } sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.9", features = ["oid"] } @@ -203,7 +203,7 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor getrandom-6f8ce4dd05d13bba = { package = "getrandom", version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.16.0", default-features = false, features = ["allocator-api2", "inline-more"] } -heck = { version = "0.4.1" } +heck = { version = "0.4.1", features = ["unicode"] } hickory-proto = { version = "0.25.2", features = ["serde", "text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "1.7.0", features = ["full"] } @@ -248,7 +248,7 @@ regex = { version = "1.11.3" } regex-automata = { version = "0.4.11", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "std", "unicode"] } regex-syntax = { version = "0.8.5" } reqwest = { version = "0.12.22", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] } -rsa = { version = "0.9.6", features = ["serde", "sha2"] } +rsa = { version = "0.9.8", features = ["serde", "sha2"] } rustc-hash = { version = "2.1.1" } rustls = { version = "0.23.19", features = ["ring"] } rustls-webpki = { version = "0.102.8", default-features = false, features = ["aws_lc_rs", "ring", "std"] } @@ -257,7 +257,7 @@ scopeguard = { version = "1.2.0" } semver = { version = "1.0.27", features = ["serde"] } serde = { version = "1.0.226", features = ["alloc", "derive", "rc"] } serde_core = { version = "1.0.226", features = ["alloc", "rc"] } -serde_json = { version = "1.0.145", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.145", features = ["alloc", "raw_value", "unbounded_depth"] } serde_with = { version = "3.14.0" } sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.9", features = ["oid"] } @@ -307,6 +307,7 @@ getrandom-468e82937335b1c9 = { package = "getrandom", version = "0.3.4", default hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio"] } hyper-util = { version = "0.1.17", features = ["full"] } linux-raw-sys = { version = "0.4.14", default-features = false, features = ["elf", "errno", "general", "if_ether", "ioctl", "net", "netlink", "no_std", "prctl", "std", "system", "xdp"] } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.0.7", features = ["fs", "stdio", "termios"] } @@ -320,6 +321,7 @@ getrandom-468e82937335b1c9 = { package = "getrandom", version = "0.3.4", default hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio"] } hyper-util = { version = "0.1.17", features = ["full"] } linux-raw-sys = { version = "0.4.14", default-features = false, features = ["elf", "errno", "general", "if_ether", "ioctl", "net", "netlink", "no_std", "prctl", "std", "system", "xdp"] } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.0.7", features = ["fs", "stdio", "termios"] } @@ -330,6 +332,7 @@ cookie = { version = "0.18.1", default-features = false, features = ["percent-en getrandom-468e82937335b1c9 = { package = "getrandom", version = "0.3.4", default-features = false, features = ["std"] } hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio"] } hyper-util = { version = "0.1.17", features = ["full"] } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.0.7", features = ["fs", "stdio", "termios"] } @@ -340,6 +343,7 @@ cookie = { version = "0.18.1", default-features = false, features = ["percent-en getrandom-468e82937335b1c9 = { package = "getrandom", version = "0.3.4", default-features = false, features = ["std"] } hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio"] } hyper-util = { version = "0.1.17", features = ["full"] } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.0.7", features = ["fs", "stdio", "termios"] } @@ -350,6 +354,7 @@ cookie = { version = "0.18.1", default-features = false, features = ["percent-en getrandom-468e82937335b1c9 = { package = "getrandom", version = "0.3.4", default-features = false, features = ["std"] } hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio"] } hyper-util = { version = "0.1.17", features = ["full"] } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.0.7", features = ["fs", "stdio", "termios"] } @@ -360,6 +365,7 @@ cookie = { version = "0.18.1", default-features = false, features = ["percent-en getrandom-468e82937335b1c9 = { package = "getrandom", version = "0.3.4", default-features = false, features = ["std"] } hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio"] } hyper-util = { version = "0.1.17", features = ["full"] } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.0.7", features = ["fs", "stdio", "termios"] } @@ -374,6 +380,7 @@ hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio" hyper-util = { version = "0.1.17", features = ["full"] } itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } nom = { version = "7.1.3" } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } @@ -392,6 +399,7 @@ hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio" hyper-util = { version = "0.1.17", features = ["full"] } itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } nom = { version = "7.1.3" } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } From 78143bd5b678a510250c0ef77ac5e5675095e687 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 22 Oct 2025 16:28:47 +0000 Subject: [PATCH 03/14] Fix up step method --- trust-quorum/src/connection_manager.rs | 50 ++++++++++++++------------ trust-quorum/src/task.rs | 10 +++++- 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/trust-quorum/src/connection_manager.rs b/trust-quorum/src/connection_manager.rs index 4d24e365148..e98a88b2b97 100644 --- a/trust-quorum/src/connection_manager.rs +++ b/trust-quorum/src/connection_manager.rs @@ -21,18 +21,18 @@ use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6}; use std::time::Duration; use tokio::sync::mpsc; use tokio::task::JoinHandle; -use tokio::time::{MissedTickBehavior, interval}; +use tokio::time::{Interval, MissedTickBehavior, interval}; /// We only expect a handful of concurrent requests at most. const CHANNEL_BOUND: usize = 10; // Time between checks to see if we need to reconnect to to any peers -const RECONNECT_TIME: Duration = Duration::from_secs(5); +pub const RECONNECT_TIME: Duration = Duration::from_secs(5); /// An error returned from `ConnMgr::accept` #[derive(Debug, thiserror::Error, SlogInlineError)] pub enum AcceptError { - #[error("Accepted connection from IPv4 address {addr}. Only IPv6 allowed.")] + #[error("accepted connection from IPv4 address {addr}. Only IPv6 allowed.")] Ipv4Accept { addr: SocketAddrV4 }, #[error("sprockets error")] @@ -209,6 +209,9 @@ pub struct ConnMgr { /// All tasks containing established connections that can be used to communicate /// with other nodes. established: BTreeMap, + + /// An interval for reconnect operations + reconnect_interval: Interval, } impl ConnMgr { @@ -246,6 +249,9 @@ impl ConnMgr { "local_addr" => %listen_addr ); + let mut reconnect_interval = interval(RECONNECT_TIME); + reconnect_interval.set_missed_tick_behavior(MissedTickBehavior::Delay); + ConnMgr { log, main_tx, @@ -258,6 +264,7 @@ impl ConnMgr { connecting: BTreeMap::new(), accepting: BTreeMap::new(), established: BTreeMap::new(), + reconnect_interval, } } @@ -315,30 +322,27 @@ impl ConnMgr { &mut self, corpus: Vec, ) -> Result<(), AcceptError> { - let mut interval = interval(RECONNECT_TIME); - interval.set_missed_tick_behavior(MissedTickBehavior::Delay); - - loop { - tokio::select! { - acceptor = self.server.accept(corpus.clone()) => { - self.accept(acceptor?).await?; - } - Some(res) = self.join_handles.next() => { - match res { - Ok(task_id) => { - self.on_task_exit(task_id).await; - } - Err(err) => { - error!(self.log, "Connection task panic: {}", err); - } - + tokio::select! { + acceptor = self.server.accept(corpus.clone()) => { + self.accept(acceptor?).await?; + } + Some(res) = self.join_handles.next() => { + match res { + Ok(task_id) => { + self.on_task_exit(task_id).await; } - } - _ = interval.tick() => { - self.reconnect(corpus.clone()).await; + Err(err) => { + error!(self.log, "Connection task panic: {}", err); + } + } } + _ = self.reconnect_interval.tick() => { + self.reconnect(corpus.clone()).await; + } } + + Ok(()) } pub async fn accept( diff --git a/trust-quorum/src/task.rs b/trust-quorum/src/task.rs index 26d92c3783e..85438fe43ed 100644 --- a/trust-quorum/src/task.rs +++ b/trust-quorum/src/task.rs @@ -244,7 +244,9 @@ impl NodeTask { mod tests { use super::*; use crate::TaskId; - use crate::connection_manager::{ConnState, platform_id_to_baseboard_id}; + use crate::connection_manager::{ + ConnState, RECONNECT_TIME, platform_id_to_baseboard_id, + }; use camino::Utf8PathBuf; use dropshot::test_util::log_prefix_for_test; use omicron_test_utils::dev::poll::{CondCheckError, wait_for_condition}; @@ -375,6 +377,9 @@ mod tests { .await .unwrap(); + // Pause time so we can jump it for reconnects + tokio::time::pause(); + // Killing a single node should cause all other nodes to start // reconnecting. This should cause the task id counter to start // incrementing at all nodes and for their to be one fewer established @@ -384,6 +389,9 @@ mod tests { let _ = join_handles.pop().unwrap(); let stopped_addr = h.listen_addr; + // Speed up reconnection in the test + tokio::time::advance(RECONNECT_TIME).await; + let poll_interval = Duration::from_millis(50); wait_for_condition( async || { From 5a89a984677ef4e362d42d0d15fd8d3e78a39182 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 22 Oct 2025 17:35:29 +0000 Subject: [PATCH 04/14] Use JoinSet and tokio::task::Id instead of FuturesUnordered and crate::TaskId --- trust-quorum/src/connection_manager.rs | 104 ++++++++++--------------- trust-quorum/src/established_conn.rs | 8 +- trust-quorum/src/lib.rs | 2 +- trust-quorum/src/task.rs | 5 +- 4 files changed, 47 insertions(+), 72 deletions(-) diff --git a/trust-quorum/src/connection_manager.rs b/trust-quorum/src/connection_manager.rs index e98a88b2b97..f5a2f4cbf45 100644 --- a/trust-quorum/src/connection_manager.rs +++ b/trust-quorum/src/connection_manager.rs @@ -9,8 +9,6 @@ use crate::{BaseboardId, PeerMsg}; // TODO: Move or copy this to this crate? use bootstore::schemes::v0::NetworkConfig; use camino::Utf8PathBuf; -use futures::StreamExt; -use futures::stream::FuturesUnordered; use serde::{Deserialize, Serialize}; use slog::{Logger, debug, error, info, o, warn}; use slog_error_chain::SlogInlineError; @@ -20,7 +18,7 @@ use std::collections::{BTreeMap, BTreeSet}; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6}; use std::time::Duration; use tokio::sync::mpsc; -use tokio::task::JoinHandle; +use tokio::task::{self, JoinSet}; use tokio::time::{Interval, MissedTickBehavior, interval}; /// We only expect a handful of concurrent requests at most. @@ -43,23 +41,6 @@ pub enum AcceptError { ), } -/// A mechanism for uniquely identifying a task managing a connection -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub struct TaskId(u64); - -impl TaskId { - pub fn new(id: u64) -> TaskId { - TaskId(id) - } - - /// Increment the ID and then return the value before the increment - pub fn inc(&mut self) -> TaskId { - let id = *self; - self.0 += 1; - id - } -} - /// Messages sent from the main task to the connection managing tasks #[derive(Debug, PartialEq)] pub enum MainToConnMsg { @@ -103,7 +84,7 @@ pub enum WireMsg { /// shutdown. #[derive(Debug, PartialEq)] pub struct ConnToMainMsg { - pub task_id: TaskId, + pub task_id: task::Id, pub msg: ConnToMainMsgInner, } @@ -117,7 +98,7 @@ pub enum ConnToMainMsgInner { } pub struct TaskHandle { - pub task_id: TaskId, + pub task_id: task::Id, pub tx: mpsc::Sender, pub conn_type: ConnectionType, } @@ -154,7 +135,7 @@ pub enum ConnState { pub struct ConnInfo { pub state: ConnState, pub addr: SocketAddrV6, - pub task_id: TaskId, + pub task_id: task::Id, } /// Status information useful for debugging @@ -162,8 +143,8 @@ pub struct ConnInfo { pub struct ConnMgrStatus { pub bootstrap_addrs: BTreeSet, pub connections: Vec, - pub num_task_join_handles: u64, - pub next_task_id: TaskId, + pub num_conn_tasks: u64, + pub total_tasks_spawned: u64, } /// A structure to manage all sprockets connections to peer nodes @@ -188,13 +169,8 @@ pub struct ConnMgr { /// The address the sprockets server listens on listen_addr: SocketAddrV6, - // A unique, monotonically incrementing id for each task to help map tasks - // to their handles in case the task aborts, or there is a new connection - // accepted and established for an existing `BaseboardId`. - next_task_id: TaskId, - - /// `JoinHandle`s to all tasks that can be polled for crashes - join_handles: FuturesUnordered>, + /// A mechanism for spawning connection tasks + join_set: JoinSet<()>, /// All known addresses on the bootstrap network, learned via DDMD bootstrap_addrs: BTreeSet, @@ -212,6 +188,9 @@ pub struct ConnMgr { /// An interval for reconnect operations reconnect_interval: Interval, + + /// The number of total connection tasks spawned + total_tasks_spawned: u64, } impl ConnMgr { @@ -258,13 +237,13 @@ impl ConnMgr { config, server, listen_addr, - next_task_id: TaskId::new(0), - join_handles: Default::default(), + join_set: JoinSet::new(), bootstrap_addrs: BTreeSet::new(), connecting: BTreeMap::new(), accepting: BTreeMap::new(), established: BTreeMap::new(), reconnect_interval, + total_tasks_spawned: 0, } } @@ -307,8 +286,8 @@ impl ConnMgr { ConnMgrStatus { bootstrap_addrs: self.bootstrap_addrs.clone(), connections, - num_task_join_handles: self.join_handles.len() as u64, - next_task_id: self.next_task_id, + num_conn_tasks: self.join_set.len() as u64, + total_tasks_spawned: self.total_tasks_spawned, } } @@ -326,13 +305,14 @@ impl ConnMgr { acceptor = self.server.accept(corpus.clone()) => { self.accept(acceptor?).await?; } - Some(res) = self.join_handles.next() => { + Some(res) = self.join_set.join_next_with_id() => { match res { - Ok(task_id) => { + Ok((task_id, _)) => { self.on_task_exit(task_id).await; } Err(err) => { error!(self.log, "Connection task panic: {}", err); + self.on_task_exit(err.id()).await; } } @@ -356,15 +336,9 @@ impl ConnMgr { SocketAddr::V6(addr) => addr, }; let log = self.log.clone(); - let task_id = self.next_task_id.inc(); let (tx, rx) = mpsc::channel(CHANNEL_BOUND); - let task_handle = TaskHandle { - task_id, - tx, - conn_type: ConnectionType::Accepted(addr), - }; let main_tx = self.main_tx.clone(); - let join_handle = tokio::spawn(async move { + let abort_handle = self.join_set.spawn(async move { match acceptor.handshake().await { Ok((stream, _)) => { let platform_id = @@ -380,7 +354,7 @@ impl ConnMgr { let mut conn = EstablishedConn::new( baseboard_id.clone(), - task_id, + task::id(), stream, main_tx.clone(), rx, @@ -390,7 +364,7 @@ impl ConnMgr { // Inform the main task that accepted connection is established if let Err(e) = main_tx .send(ConnToMainMsg { - task_id: task_id, + task_id: task::id(), msg: ConnToMainMsgInner::Accepted { addr, peer_id: baseboard_id, @@ -412,16 +386,20 @@ impl ConnMgr { error!(log, "Failed to accept a connection"; &err); } } - task_id }); - self.join_handles.push(join_handle); + self.total_tasks_spawned += 1; + let task_handle = TaskHandle { + task_id: abort_handle.id(), + tx, + conn_type: ConnectionType::Accepted(addr), + }; self.accepting.insert(addr, task_handle); Ok(()) } pub async fn server_handshake_completed( &mut self, - task_id: TaskId, + task_id: task::Id, addr: SocketAddrV6, peer_id: BaseboardId, ) { @@ -441,7 +419,7 @@ impl ConnMgr { pub async fn client_handshake_completed( &mut self, - task_id: TaskId, + task_id: task::Id, addr: SocketAddrV6, peer_id: BaseboardId, ) { @@ -463,7 +441,7 @@ impl ConnMgr { /// The established connection task has asynchronously exited. pub async fn on_disconnected( &mut self, - task_id: TaskId, + task_id: task::Id, peer_id: BaseboardId, ) { if let Some(task_handle) = self.established.get(&peer_id) { @@ -555,18 +533,12 @@ impl ConnMgr { corpus: Vec, addr: SocketAddrV6, ) { - let task_id = self.next_task_id.inc(); let (tx, rx) = mpsc::channel(CHANNEL_BOUND); - let task_handle = TaskHandle { - task_id, - tx, - conn_type: ConnectionType::Connected(addr), - }; info!(self.log, "Initiating connection to new peer: {addr}"); let main_tx = self.main_tx.clone(); let log = self.log.clone(); let config = self.config.clone(); - let join_handle = tokio::spawn(async move { + let abort_handle = self.join_set.spawn(async move { match sprockets_tls::Client::connect( config, addr, @@ -589,7 +561,7 @@ impl ConnMgr { let mut conn = EstablishedConn::new( baseboard_id.clone(), - task_id, + task::id(), stream, main_tx.clone(), rx, @@ -599,7 +571,7 @@ impl ConnMgr { // established. if let Err(e) = main_tx .send(ConnToMainMsg { - task_id: task_id, + task_id: task::id(), msg: ConnToMainMsgInner::Connected { addr, peer_id: baseboard_id, @@ -621,9 +593,13 @@ impl ConnMgr { warn!(log, "Failed to connect"; &err); } } - task_id }); - self.join_handles.push(join_handle); + self.total_tasks_spawned += 1; + let task_handle = TaskHandle { + task_id: abort_handle.id(), + tx, + conn_type: ConnectionType::Connected(addr), + }; self.connecting.insert(addr, task_handle); } @@ -662,7 +638,7 @@ impl ConnMgr { } /// Remove any references to the given task - async fn on_task_exit(&mut self, task_id: TaskId) { + async fn on_task_exit(&mut self, task_id: task::Id) { // We're most likely to find the task as established so we start with that if let Some((id, handle)) = self .established diff --git a/trust-quorum/src/established_conn.rs b/trust-quorum/src/established_conn.rs index 01a04bd9e76..c5a4c9dd11a 100644 --- a/trust-quorum/src/established_conn.rs +++ b/trust-quorum/src/established_conn.rs @@ -5,8 +5,7 @@ //! An individual sprockets connection running in its own task use crate::{ - BaseboardId, ConnToMainMsg, ConnToMainMsgInner, MainToConnMsg, TaskId, - WireMsg, + BaseboardId, ConnToMainMsg, ConnToMainMsgInner, MainToConnMsg, WireMsg, }; use bytes::Buf; use serde::Serialize; @@ -18,6 +17,7 @@ use std::time::Duration; use tokio::io::{AsyncReadExt, AsyncWriteExt, ReadHalf, WriteHalf, split}; use tokio::net::TcpStream; use tokio::sync::mpsc; +use tokio::task; use tokio::time::{Instant, MissedTickBehavior, interval}; /// Max buffer size of a connection @@ -63,7 +63,7 @@ pub enum ConnErr { /// Container for code running in its own task per sprockets connection pub struct EstablishedConn { peer_id: BaseboardId, - task_id: TaskId, + task_id: task::Id, reader: ReadHalf>, writer: WriteHalf>, main_tx: mpsc::Sender, @@ -94,7 +94,7 @@ pub struct EstablishedConn { impl EstablishedConn { pub fn new( peer_id: BaseboardId, - task_id: TaskId, + task_id: task::Id, stream: sprockets_tls::Stream, main_tx: mpsc::Sender, rx: mpsc::Receiver, diff --git a/trust-quorum/src/lib.rs b/trust-quorum/src/lib.rs index b4b18736163..457403999e4 100644 --- a/trust-quorum/src/lib.rs +++ b/trust-quorum/src/lib.rs @@ -43,7 +43,7 @@ mod connection_manager; mod task; pub(crate) use connection_manager::{ - ConnToMainMsg, ConnToMainMsgInner, MainToConnMsg, TaskId, WireMsg, + ConnToMainMsg, ConnToMainMsgInner, MainToConnMsg, WireMsg, }; pub use task::NodeTask; diff --git a/trust-quorum/src/task.rs b/trust-quorum/src/task.rs index 85438fe43ed..ef82aa93063 100644 --- a/trust-quorum/src/task.rs +++ b/trust-quorum/src/task.rs @@ -243,7 +243,6 @@ impl NodeTask { #[cfg(test)] mod tests { use super::*; - use crate::TaskId; use crate::connection_manager::{ ConnState, RECONNECT_TIME, platform_id_to_baseboard_id, }; @@ -360,7 +359,7 @@ mod tests { .iter() .all(|c| matches!(c.state, ConnState::Established(_))) && status.connections.len() == num_nodes - 1 - && status.next_task_id == TaskId::new(3) + && status.total_tasks_spawned == 3 { count += 1; } @@ -411,7 +410,7 @@ mod tests { // is the stopped node. let should_be_connecting = h.listen_addr > stopped_addr; let valid_task_id = if should_be_connecting { - status.next_task_id > TaskId::new(3) + status.total_tasks_spawned > 3 } else { true }; From 2af7f29bd2adef525bbd13c2d9c985f46020d30c Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 22 Oct 2025 19:22:18 +0000 Subject: [PATCH 05/14] logging cleanup --- trust-quorum/src/connection_manager.rs | 202 +++++++++++++------------ 1 file changed, 104 insertions(+), 98 deletions(-) diff --git a/trust-quorum/src/connection_manager.rs b/trust-quorum/src/connection_manager.rs index f5a2f4cbf45..dc142b353db 100644 --- a/trust-quorum/src/connection_manager.rs +++ b/trust-quorum/src/connection_manager.rs @@ -339,52 +339,51 @@ impl ConnMgr { let (tx, rx) = mpsc::channel(CHANNEL_BOUND); let main_tx = self.main_tx.clone(); let abort_handle = self.join_set.spawn(async move { - match acceptor.handshake().await { - Ok((stream, _)) => { - let platform_id = - stream.peer_platform_id().as_str().unwrap(); - let baseboard_id = platform_id_to_baseboard_id(platform_id); - - // TODO: Conversion between `PlatformId` and `BaseboardId` should - // happen in `sled-agent-types`. This is waiting on an update - // to the `dice-mfg-msgs` crate. - let log = - log.new(o!("peer_id" => baseboard_id.to_string())); - info!(log, "Accepted sprockets connection"; "addr" => %addr); - - let mut conn = EstablishedConn::new( - baseboard_id.clone(), - task::id(), - stream, - main_tx.clone(), - rx, - &log, - ); - - // Inform the main task that accepted connection is established - if let Err(e) = main_tx - .send(ConnToMainMsg { - task_id: task::id(), - msg: ConnToMainMsgInner::Accepted { - addr, - peer_id: baseboard_id, - }, - }) - .await - { - // The system is shutting down - // Just bail from this task - warn!( - log, - "Failed to send 'accepted' msg to main task: {e:?}" - ); - } else { - conn.run().await; - } - } + let stream = match acceptor.handshake().await { + Ok((stream, _)) => stream, + Err(err) => { error!(log, "Failed to accept a connection"; &err); + return (); } + }; + let platform_id = stream.peer_platform_id().as_str().unwrap(); + let baseboard_id = platform_id_to_baseboard_id(platform_id); + + // TODO: Conversion between `PlatformId` and `BaseboardId` should + // happen in `sled-agent-types`. This is waiting on an update + // to the `dice-mfg-msgs` crate. + let log = log.new(o!( + "peer_id" => baseboard_id.to_string(), + "peer_addr" => addr.to_string() + )); + info!(log, "Accepted sprockets connection"); + + let mut conn = EstablishedConn::new( + baseboard_id.clone(), + task::id(), + stream, + main_tx.clone(), + rx, + &log, + ); + + // Inform the main task that accepted connection is established + if let Err(e) = main_tx + .send(ConnToMainMsg { + task_id: task::id(), + msg: ConnToMainMsgInner::Accepted { + addr, + peer_id: baseboard_id, + }, + }) + .await + { + // The system is shutting down + // Just bail from this task + warn!(log, "Failed to send 'accepted' msg to main task: {e:?}"); + } else { + conn.run().await; } }); self.total_tasks_spawned += 1; @@ -408,8 +407,8 @@ impl ConnMgr { self.log, "Established server connection"; "task_id" => ?task_id, - "remote_addr" => %addr, - "remote_peer_id" => peer_id.to_string() + "peer_addr" => %addr, + "peer_id" => %peer_id ); let already_established = self.established.insert(peer_id, task_handle); @@ -428,13 +427,19 @@ impl ConnMgr { self.log, "Established client connection"; "task_id" => ?task_id, - "remote_addr" => %addr, - "remote_peer_id" => peer_id.to_string() + "peer_addr" => %addr, + "peer_id" => %peer_id ); let already_established = self.established.insert(peer_id, task_handle); assert!(already_established.is_none()); + } else { + error!(self.log, "Client handshake completed, but no client addr in map"; + "task_id" => ?task_id, + "peer_addr" => %addr, + "peer_id" => %peer_id + ); } } @@ -539,7 +544,7 @@ impl ConnMgr { let log = self.log.clone(); let config = self.config.clone(); let abort_handle = self.join_set.spawn(async move { - match sprockets_tls::Client::connect( + let stream = match sprockets_tls::Client::connect( config, addr, corpus.clone(), @@ -547,51 +552,52 @@ impl ConnMgr { ) .await { - Ok(stream) => { - let platform_id = - stream.peer_platform_id().as_str().unwrap(); - let baseboard_id = platform_id_to_baseboard_id(platform_id); - - // TODO: Conversion between `PlatformId` and `BaseboardId` should - // happen in `sled-agent-types`. This is waiting on an update - // to the `dice-mfg-msgs` crate. - let log = - log.new(o!("peer_id" => baseboard_id.to_string())); - info!(log, "Sprockets connection established"; "addr" => %addr); - - let mut conn = EstablishedConn::new( - baseboard_id.clone(), - task::id(), - stream, - main_tx.clone(), - rx, - &log, - ); - // Inform the main task that the client connection is - // established. - if let Err(e) = main_tx - .send(ConnToMainMsg { - task_id: task::id(), - msg: ConnToMainMsgInner::Connected { - addr, - peer_id: baseboard_id, - }, - }) - .await - { - // The system is shutting down - // Just bail from this task - error!( - log, - "Failed to send 'connected' msg to main task: {e:?}" - ); - } else { - conn.run().await; - } - } + Ok(stream) => stream, Err(err) => { - warn!(log, "Failed to connect"; &err); + warn!(log, "Failed to connect"; "peer_addr"=> %addr, &err); + return (); } + }; + let platform_id = stream.peer_platform_id().as_str().unwrap(); + let baseboard_id = platform_id_to_baseboard_id(platform_id); + + // TODO: Conversion between `PlatformId` and `BaseboardId` should + // happen in `sled-agent-types`. This is waiting on an update + // to the `dice-mfg-msgs` crate. + let log = log.new(o!( + "peer_id" => baseboard_id.to_string(), + "peer_addr" => addr.to_string() + )); + info!(log, "Sprockets connection established"); + + let mut conn = EstablishedConn::new( + baseboard_id.clone(), + task::id(), + stream, + main_tx.clone(), + rx, + &log, + ); + // Inform the main task that the client connection is + // established. + if let Err(e) = main_tx + .send(ConnToMainMsg { + task_id: task::id(), + msg: ConnToMainMsgInner::Connected { + addr, + peer_id: baseboard_id, + }, + }) + .await + { + // The system is shutting down + // Just bail from this task + error!( + log, + "Failed to send 'connected' msg to main task: {e:?}" + ); + } else { + conn.run().await; } }); self.total_tasks_spawned += 1; @@ -614,7 +620,7 @@ impl ConnMgr { info!( self.log, "Deleting initiating connection"; - "remote_addr" => addr.to_string() + "remote_addr" => %addr ); let _ = handle.tx.send(MainToConnMsg::Close).await; } else { @@ -626,8 +632,8 @@ impl ConnMgr { info!( self.log, "Deleting established connection"; - "remote_addr" => addr.to_string(), - "remote_peer_id" => id.to_string(), + "peer_addr" => %addr, + "peer_id" => %id ); let _ = handle.tx.send(MainToConnMsg::Close).await; // probably a better way to avoid borrowck issues @@ -649,8 +655,8 @@ impl ConnMgr { self.log, "Established connection task exited"; "task_id" => ?task_id, - "remote_addr" => handle.addr().to_string(), - "remote_peer_id" => id.to_string(), + "peer_addr" => %handle.addr(), + "peer_id" => %id ); // probably a better way to avoid borrowck issues let id = id.clone(); @@ -662,7 +668,7 @@ impl ConnMgr { self.log, "Accepting task exited"; "task_id" => ?task_id, - "remote_addr" => handle.addr().to_string(), + "peer_addr" => %handle.addr() ); let addr = *addr; self.accepting.remove(&addr); @@ -673,7 +679,7 @@ impl ConnMgr { self.log, "Connecting task exited"; "task_id" => ?task_id, - "remote_addr" => handle.addr().to_string(), + "peer_addr" => %handle.addr() ); let addr = *addr; self.connecting.remove(&addr); From 56e201873f81dbdd4873d76160fe29fe7aa321f6 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 22 Oct 2025 19:29:05 +0000 Subject: [PATCH 06/14] more review cleanup --- trust-quorum/src/connection_manager.rs | 8 +++++--- trust-quorum/src/task.rs | 6 +++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/trust-quorum/src/connection_manager.rs b/trust-quorum/src/connection_manager.rs index dc142b353db..2da28d659e6 100644 --- a/trust-quorum/src/connection_manager.rs +++ b/trust-quorum/src/connection_manager.rs @@ -45,7 +45,7 @@ pub enum AcceptError { #[derive(Debug, PartialEq)] pub enum MainToConnMsg { Close, - #[allow(unused)] + #[expect(unused)] Msg(WireMsg), } @@ -491,8 +491,10 @@ impl ConnMgr { /// The set of known addresses on the bootstrap network has changed /// - /// We need to connect to peers with addresses less than our own - /// and tear down any connections that no longer exist in `addrs`. + /// We only want a single connection between known peers at a time. The + /// easiest way to achieve this is to only connect to peers with addresses + /// that sort less than our own and tear down any connections that no longer + /// exist in `addrs`. pub async fn update_bootstrap_connections( &mut self, addrs: BTreeSet, diff --git a/trust-quorum/src/task.rs b/trust-quorum/src/task.rs index ef82aa93063..4585697f291 100644 --- a/trust-quorum/src/task.rs +++ b/trust-quorum/src/task.rs @@ -107,11 +107,11 @@ impl NodeTaskHandle { pub struct NodeTask { shutdown: bool, log: Logger, - #[allow(unused)] + #[expect(unused)] config: Config, - #[allow(unused)] + #[expect(unused)] node: Node, - #[allow(unused)] + #[expect(unused)] ctx: NodeCtx, conn_mgr: ConnMgr, conn_mgr_rx: mpsc::Receiver, From 19453d38da665a0b3226ab43c1655e239b889999 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 22 Oct 2025 19:41:50 +0000 Subject: [PATCH 07/14] sock writer shutdown works again --- trust-quorum/src/established_conn.rs | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/trust-quorum/src/established_conn.rs b/trust-quorum/src/established_conn.rs index c5a4c9dd11a..209cb0af3f7 100644 --- a/trust-quorum/src/established_conn.rs +++ b/trust-quorum/src/established_conn.rs @@ -171,16 +171,7 @@ impl EstablishedConn { { warn!(self.log, "Failed to send to main task: {e:?}"); } - // TODO: This causes a deadlock and breaks the test. - // - // I'm unclear why, although I plan to dig a bit further in the future. - // It should be noted that the writer and reader share a std::mutex - // under the hood and that could be causing issues. Regardless, it - // is not actually critical to issue a shutdown as detection will - // be discovered via missing ping messages at the other end of the - // connection. - // - // let _ = self.writer.shutdown().await; + let _ = self.writer.shutdown().await; } async fn on_read( From 5ac98d05b9c0f2f2b874d891b88f4fe0b7388f96 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 22 Oct 2025 19:51:25 +0000 Subject: [PATCH 08/14] clippy --- trust-quorum/src/connection_manager.rs | 4 ++-- trust-quorum/src/task.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/trust-quorum/src/connection_manager.rs b/trust-quorum/src/connection_manager.rs index 2da28d659e6..bffb0190de9 100644 --- a/trust-quorum/src/connection_manager.rs +++ b/trust-quorum/src/connection_manager.rs @@ -480,7 +480,7 @@ impl ConnMgr { continue; } - to_connect.push(addr.clone()); + to_connect.push(*addr); } for addr in to_connect { @@ -700,6 +700,6 @@ impl ConnMgr { pub fn platform_id_to_baseboard_id(platform_id: &str) -> BaseboardId { let mut platform_id_iter = platform_id.split(":"); let part_number = platform_id_iter.nth(1).unwrap().to_string(); - let serial_number = platform_id_iter.skip(1).next().unwrap().to_string(); + let serial_number = platform_id_iter.nth(2).unwrap().to_string(); BaseboardId { part_number, serial_number } } diff --git a/trust-quorum/src/task.rs b/trust-quorum/src/task.rs index 4585697f291..4110259e867 100644 --- a/trust-quorum/src/task.rs +++ b/trust-quorum/src/task.rs @@ -385,7 +385,7 @@ mod tests { // connection. let h = node_handles.pop().unwrap(); h.shutdown().await.unwrap(); - let _ = join_handles.pop().unwrap(); + join_handles.pop().unwrap(); let stopped_addr = h.listen_addr; // Speed up reconnection in the test From 7f9060b7a65c970cc29497cbb68fe6bb91f4ebff Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 22 Oct 2025 22:22:44 +0000 Subject: [PATCH 09/14] Review comments --- trust-quorum/src/connection_manager.rs | 19 +++---------------- trust-quorum/src/established_conn.rs | 19 ++++++------------- trust-quorum/src/task.rs | 1 - 3 files changed, 9 insertions(+), 30 deletions(-) diff --git a/trust-quorum/src/connection_manager.rs b/trust-quorum/src/connection_manager.rs index bffb0190de9..9b9f1a7e1ec 100644 --- a/trust-quorum/src/connection_manager.rs +++ b/trust-quorum/src/connection_manager.rs @@ -247,19 +247,6 @@ impl ConnMgr { } } - pub async fn shutdown(&mut self) { - // Shutdown all connection processing tasks - for (_, handle) in &self.accepting { - let _ = handle.tx.send(MainToConnMsg::Close).await; - } - for (_, handle) in &self.connecting { - let _ = handle.tx.send(MainToConnMsg::Close).await; - } - for (_, handle) in &self.established { - let _ = handle.tx.send(MainToConnMsg::Close).await; - } - } - pub fn status(&self) -> ConnMgrStatus { let connections = self .connecting @@ -311,7 +298,7 @@ impl ConnMgr { self.on_task_exit(task_id).await; } Err(err) => { - error!(self.log, "Connection task panic: {}", err); + error!(self.log, "Connection task panic: {err}"); self.on_task_exit(err.id()).await; } @@ -344,7 +331,7 @@ impl ConnMgr { Err(err) => { error!(log, "Failed to accept a connection"; &err); - return (); + return; } }; let platform_id = stream.peer_platform_id().as_str().unwrap(); @@ -700,6 +687,6 @@ impl ConnMgr { pub fn platform_id_to_baseboard_id(platform_id: &str) -> BaseboardId { let mut platform_id_iter = platform_id.split(":"); let part_number = platform_id_iter.nth(1).unwrap().to_string(); - let serial_number = platform_id_iter.nth(2).unwrap().to_string(); + let serial_number = platform_id_iter.nth(1).unwrap().to_string(); BaseboardId { part_number, serial_number } } diff --git a/trust-quorum/src/established_conn.rs b/trust-quorum/src/established_conn.rs index 209cb0af3f7..c1359644aae 100644 --- a/trust-quorum/src/established_conn.rs +++ b/trust-quorum/src/established_conn.rs @@ -126,11 +126,10 @@ impl EstablishedConn { // // Continuously process messages until the connection closes loop { - if !self.current_write.has_remaining() - && !self.write_queue.is_empty() - { - self.current_write = - Cursor::new(self.write_queue.pop_front().unwrap()); + if !self.current_write.has_remaining() { + if let Some(buf) = self.write_queue.pop_front() { + self.current_write = Cursor::new(buf); + } } let res = tokio::select! { @@ -178,14 +177,8 @@ impl EstablishedConn { &mut self, res: Result, ) -> Result<(), ConnErr> { - match res { - Ok(n) => { - self.total_read += n; - } - Err(e) => { - return Err(ConnErr::FailedRead(e)); - } - } + let n = res.map_err(ConnErr::FailedRead)?; + self.total_read += n; // We may have more than one message that has been read loop { diff --git a/trust-quorum/src/task.rs b/trust-quorum/src/task.rs index 4110259e867..bc312ecad92 100644 --- a/trust-quorum/src/task.rs +++ b/trust-quorum/src/task.rs @@ -234,7 +234,6 @@ impl NodeTask { NodeApiRequest::Shutdown => { info!(self.log, "Shutting down Node tokio tasks"); self.shutdown = true; - self.conn_mgr.shutdown().await; } } } From 9c17716b91721c38ab5476d8c659452251731495 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 22 Oct 2025 23:36:44 +0000 Subject: [PATCH 10/14] Use BiHashMap and TriHashMap for connections --- trust-quorum/src/connection_manager.rs | 180 +++++++++++++++---------- 1 file changed, 112 insertions(+), 68 deletions(-) diff --git a/trust-quorum/src/connection_manager.rs b/trust-quorum/src/connection_manager.rs index 9b9f1a7e1ec..612bc9ac5c0 100644 --- a/trust-quorum/src/connection_manager.rs +++ b/trust-quorum/src/connection_manager.rs @@ -9,12 +9,15 @@ use crate::{BaseboardId, PeerMsg}; // TODO: Move or copy this to this crate? use bootstore::schemes::v0::NetworkConfig; use camino::Utf8PathBuf; +use iddqd::{ + BiHashItem, BiHashMap, TriHashItem, TriHashMap, bi_upcast, tri_upcast, +}; use serde::{Deserialize, Serialize}; use slog::{Logger, debug, error, info, o, warn}; use slog_error_chain::SlogInlineError; use sprockets_tls::keys::SprocketsConfig; use sprockets_tls::server::SprocketsAcceptor; -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeSet; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6}; use std::time::Duration; use tokio::sync::mpsc; @@ -109,6 +112,63 @@ impl TaskHandle { } } +impl BiHashItem for TaskHandle { + type K1<'a> = task::Id; + type K2<'a> = SocketAddrV6; + + fn key1(&self) -> Self::K1<'_> { + self.task_id + } + + fn key2(&self) -> Self::K2<'_> { + self.conn_type.addr() + } + + bi_upcast!(); +} + +pub struct EstablishedTaskHandle { + baseboard_id: BaseboardId, + task_handle: TaskHandle, +} + +impl EstablishedTaskHandle { + pub fn new( + baseboard_id: BaseboardId, + task_handle: TaskHandle, + ) -> EstablishedTaskHandle { + EstablishedTaskHandle { baseboard_id, task_handle } + } + + pub fn task_id(&self) -> task::Id { + self.task_handle.task_id + } + + pub fn addr(&self) -> SocketAddrV6 { + self.task_handle.addr() + } +} + +impl TriHashItem for EstablishedTaskHandle { + type K1<'a> = &'a BaseboardId; + type K2<'a> = task::Id; + type K3<'a> = SocketAddrV6; + + fn key1(&self) -> Self::K1<'_> { + &self.baseboard_id + } + + fn key2(&self) -> Self::K2<'_> { + self.task_handle.task_id + } + + fn key3(&self) -> Self::K3<'_> { + self.task_handle.addr() + } + + tri_upcast!(); +} + pub enum ConnectionType { Connected(SocketAddrV6), Accepted(SocketAddrV6), @@ -177,14 +237,14 @@ pub struct ConnMgr { /// All tasks currently connecting to remote nodes and attempting a /// sprockets handshake. - connecting: BTreeMap, + connecting: BiHashMap, /// All tasks with an accepted TCP connnection performing a sprockets handshake - accepting: BTreeMap, + accepting: BiHashMap, /// All tasks containing established connections that can be used to communicate /// with other nodes. - established: BTreeMap, + established: TriHashMap, /// An interval for reconnect operations reconnect_interval: Interval, @@ -239,9 +299,9 @@ impl ConnMgr { listen_addr, join_set: JoinSet::new(), bootstrap_addrs: BTreeSet::new(), - connecting: BTreeMap::new(), - accepting: BTreeMap::new(), - established: BTreeMap::new(), + connecting: BiHashMap::new(), + accepting: BiHashMap::new(), + established: TriHashMap::new(), reconnect_interval, total_tasks_spawned: 0, } @@ -251,23 +311,25 @@ impl ConnMgr { let connections = self .connecting .iter() - .map(|(addr, task_handle)| ConnInfo { + .map(|task_handle| ConnInfo { state: ConnState::Connecting, - addr: *addr, + addr: task_handle.addr(), task_id: task_handle.task_id, }) - .chain(self.accepting.iter().map(|(addr, task_handle)| ConnInfo { + .chain(self.accepting.iter().map(|task_handle| ConnInfo { state: ConnState::Accepting, - addr: *addr, + addr: task_handle.addr(), task_id: task_handle.task_id, })) - .chain(self.established.iter().map( - |(baseboard_id, task_handle)| ConnInfo { - state: ConnState::Established(baseboard_id.clone()), - addr: task_handle.addr(), - task_id: task_handle.task_id, - }, - )) + .chain(self.established.iter().map(|established_task_handle| { + ConnInfo { + state: ConnState::Established( + established_task_handle.baseboard_id.clone(), + ), + addr: established_task_handle.addr(), + task_id: established_task_handle.task_id(), + } + })) .collect(); ConnMgrStatus { @@ -379,7 +441,7 @@ impl ConnMgr { tx, conn_type: ConnectionType::Accepted(addr), }; - self.accepting.insert(addr, task_handle); + assert!(self.accepting.insert_unique(task_handle).is_ok()); Ok(()) } @@ -389,7 +451,7 @@ impl ConnMgr { addr: SocketAddrV6, peer_id: BaseboardId, ) { - if let Some(task_handle) = self.accepting.remove(&addr) { + if let Some(task_handle) = self.accepting.remove2(&addr) { info!( self.log, "Established server connection"; @@ -397,9 +459,17 @@ impl ConnMgr { "peer_addr" => %addr, "peer_id" => %peer_id ); - let already_established = - self.established.insert(peer_id, task_handle); - assert!(already_established.is_none()); + + let already_established = self.established.insert_unique( + EstablishedTaskHandle::new(peer_id, task_handle), + ); + assert!(already_established.is_ok()); + } else { + error!(self.log, "Server handshake completed, but no server addr in map"; + "task_id" => ?task_id, + "peer_addr" => %addr, + "peer_id" => %peer_id + ); } } @@ -409,7 +479,7 @@ impl ConnMgr { addr: SocketAddrV6, peer_id: BaseboardId, ) { - if let Some(task_handle) = self.connecting.remove(&addr) { + if let Some(task_handle) = self.connecting.remove2(&addr) { info!( self.log, "Established client connection"; @@ -417,10 +487,10 @@ impl ConnMgr { "peer_addr" => %addr, "peer_id" => %peer_id ); - let already_established = - self.established.insert(peer_id, task_handle); - - assert!(already_established.is_none()); + let already_established = self.established.insert_unique( + EstablishedTaskHandle::new(peer_id, task_handle), + ); + assert!(already_established.is_ok()); } else { error!(self.log, "Client handshake completed, but no client addr in map"; "task_id" => ?task_id, @@ -436,14 +506,14 @@ impl ConnMgr { task_id: task::Id, peer_id: BaseboardId, ) { - if let Some(task_handle) = self.established.get(&peer_id) { - if task_handle.task_id != task_id { + if let Some(established_task_handle) = self.established.get1(&peer_id) { + if established_task_handle.task_id() != task_id { // This was a stale disconnect return; } } warn!(self.log, "peer disconnected"; "peer_id" => %peer_id); - let _ = self.established.remove(&peer_id); + let _ = self.established.remove1(&peer_id); } /// Initiate connections if a corresponding task doesn't already exist. This @@ -455,15 +525,11 @@ impl ConnMgr { for addr in self.bootstrap_addrs.iter().filter(|&&addr| self.listen_addr > addr) { - if self.connecting.contains_key(addr) { + if self.connecting.contains_key2(addr) { continue; } - if self - .established - .values() - .any(|task_handle| task_handle.addr() == *addr) - { + if self.established.contains_key3(addr) { continue; } @@ -595,7 +661,7 @@ impl ConnMgr { tx, conn_type: ConnectionType::Connected(addr), }; - self.connecting.insert(addr, task_handle); + assert!(self.connecting.insert_unique(task_handle).is_ok()); } /// Remove any information about a sprockets client connection and inform @@ -604,7 +670,7 @@ impl ConnMgr { /// We don't tear down server connections this way as we don't know their /// listen port, just the ephemeral port. async fn disconnect_client(&mut self, addr: SocketAddrV6) { - if let Some(handle) = self.connecting.remove(&addr) { + if let Some(handle) = self.connecting.remove2(&addr) { // The connection has not yet completed its handshake info!( self.log, @@ -613,21 +679,14 @@ impl ConnMgr { ); let _ = handle.tx.send(MainToConnMsg::Close).await; } else { - if let Some((id, handle)) = self - .established - .iter() - .find(|(_, handle)| handle.addr() == addr) - { + if let Some(handle) = self.established.remove3(&addr) { info!( self.log, "Deleting established connection"; "peer_addr" => %addr, - "peer_id" => %id + "peer_id" => %handle.baseboard_id ); - let _ = handle.tx.send(MainToConnMsg::Close).await; - // probably a better way to avoid borrowck issues - let id = id.clone(); - self.established.remove(&id); + let _ = handle.task_handle.tx.send(MainToConnMsg::Close).await; } } } @@ -635,43 +694,28 @@ impl ConnMgr { /// Remove any references to the given task async fn on_task_exit(&mut self, task_id: task::Id) { // We're most likely to find the task as established so we start with that - if let Some((id, handle)) = self - .established - .iter() - .find(|(_, handle)| handle.task_id == task_id) - { + if let Some(handle) = self.established.remove2(&task_id) { info!( self.log, "Established connection task exited"; "task_id" => ?task_id, "peer_addr" => %handle.addr(), - "peer_id" => %id + "peer_id" => %handle.baseboard_id ); - // probably a better way to avoid borrowck issues - let id = id.clone(); - self.established.remove(&id); - } else if let Some((addr, handle)) = - self.accepting.iter().find(|(_, handle)| handle.task_id == task_id) - { + } else if let Some(handle) = self.accepting.remove1(&task_id) { info!( self.log, "Accepting task exited"; "task_id" => ?task_id, "peer_addr" => %handle.addr() ); - let addr = *addr; - self.accepting.remove(&addr); - } else if let Some((addr, handle)) = - self.connecting.iter().find(|(_, handle)| handle.task_id == task_id) - { + } else if let Some(handle) = self.connecting.remove1(&task_id) { info!( self.log, "Connecting task exited"; "task_id" => ?task_id, "peer_addr" => %handle.addr() ); - let addr = *addr; - self.connecting.remove(&addr); } else { info!( self.log, From dc1e778d21f0322e404e712ae78596d571e43caf Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 22 Oct 2025 23:49:07 +0000 Subject: [PATCH 11/14] No more graceful close from ConnMgr --- trust-quorum/src/connection_manager.rs | 36 +++++++++++++++++--------- trust-quorum/src/established_conn.rs | 5 ---- 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/trust-quorum/src/connection_manager.rs b/trust-quorum/src/connection_manager.rs index 612bc9ac5c0..584e4b1a6bb 100644 --- a/trust-quorum/src/connection_manager.rs +++ b/trust-quorum/src/connection_manager.rs @@ -21,7 +21,7 @@ use std::collections::BTreeSet; use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6}; use std::time::Duration; use tokio::sync::mpsc; -use tokio::task::{self, JoinSet}; +use tokio::task::{self, AbortHandle, JoinSet}; use tokio::time::{Interval, MissedTickBehavior, interval}; /// We only expect a handful of concurrent requests at most. @@ -47,7 +47,6 @@ pub enum AcceptError { /// Messages sent from the main task to the connection managing tasks #[derive(Debug, PartialEq)] pub enum MainToConnMsg { - Close, #[expect(unused)] Msg(WireMsg), } @@ -101,15 +100,24 @@ pub enum ConnToMainMsgInner { } pub struct TaskHandle { - pub task_id: task::Id, + pub abort_handle: AbortHandle, + #[expect(unused)] pub tx: mpsc::Sender, pub conn_type: ConnectionType, } impl TaskHandle { + pub fn task_id(&self) -> task::Id { + self.abort_handle.id() + } + pub fn addr(&self) -> SocketAddrV6 { self.conn_type.addr() } + + pub fn abort(&self) { + self.abort_handle.abort() + } } impl BiHashItem for TaskHandle { @@ -117,7 +125,7 @@ impl BiHashItem for TaskHandle { type K2<'a> = SocketAddrV6; fn key1(&self) -> Self::K1<'_> { - self.task_id + self.task_id() } fn key2(&self) -> Self::K2<'_> { @@ -141,12 +149,16 @@ impl EstablishedTaskHandle { } pub fn task_id(&self) -> task::Id { - self.task_handle.task_id + self.task_handle.task_id() } pub fn addr(&self) -> SocketAddrV6 { self.task_handle.addr() } + + pub fn abort(&self) { + self.task_handle.abort(); + } } impl TriHashItem for EstablishedTaskHandle { @@ -159,7 +171,7 @@ impl TriHashItem for EstablishedTaskHandle { } fn key2(&self) -> Self::K2<'_> { - self.task_handle.task_id + self.task_handle.task_id() } fn key3(&self) -> Self::K3<'_> { @@ -314,12 +326,12 @@ impl ConnMgr { .map(|task_handle| ConnInfo { state: ConnState::Connecting, addr: task_handle.addr(), - task_id: task_handle.task_id, + task_id: task_handle.task_id(), }) .chain(self.accepting.iter().map(|task_handle| ConnInfo { state: ConnState::Accepting, addr: task_handle.addr(), - task_id: task_handle.task_id, + task_id: task_handle.task_id(), })) .chain(self.established.iter().map(|established_task_handle| { ConnInfo { @@ -437,7 +449,7 @@ impl ConnMgr { }); self.total_tasks_spawned += 1; let task_handle = TaskHandle { - task_id: abort_handle.id(), + abort_handle, tx, conn_type: ConnectionType::Accepted(addr), }; @@ -657,7 +669,7 @@ impl ConnMgr { }); self.total_tasks_spawned += 1; let task_handle = TaskHandle { - task_id: abort_handle.id(), + abort_handle, tx, conn_type: ConnectionType::Connected(addr), }; @@ -677,7 +689,7 @@ impl ConnMgr { "Deleting initiating connection"; "remote_addr" => %addr ); - let _ = handle.tx.send(MainToConnMsg::Close).await; + handle.abort(); } else { if let Some(handle) = self.established.remove3(&addr) { info!( @@ -686,7 +698,7 @@ impl ConnMgr { "peer_addr" => %addr, "peer_id" => %handle.baseboard_id ); - let _ = handle.task_handle.tx.send(MainToConnMsg::Close).await; + handle.abort(); } } } diff --git a/trust-quorum/src/established_conn.rs b/trust-quorum/src/established_conn.rs index c1359644aae..5e4e9e12641 100644 --- a/trust-quorum/src/established_conn.rs +++ b/trust-quorum/src/established_conn.rs @@ -44,8 +44,6 @@ const INACTIVITY_TIMEOUT: Duration = Duration::from_secs(10); /// Also a great movie #[derive(Debug, thiserror::Error, SlogInlineError)] pub enum ConnErr { - #[error("Main task insructed this connection to close")] - Close, #[error("Failed to write")] FailedWrite(#[source] std::io::Error), #[error("Failed to read")] @@ -273,9 +271,6 @@ impl EstablishedConn { msg: MainToConnMsg, ) -> Result<(), ConnErr> { match msg { - MainToConnMsg::Close => { - return Err(ConnErr::Close); - } MainToConnMsg::Msg(msg) => self.write_framed_to_queue(msg).await, } } From 40906c61e8a0bae46f0caec1b80586ec6014aa58 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 23 Oct 2025 00:13:27 +0000 Subject: [PATCH 12/14] no more test detritus --- trust-quorum/src/task.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/trust-quorum/src/task.rs b/trust-quorum/src/task.rs index bc312ecad92..44fd2338eb0 100644 --- a/trust-quorum/src/task.rs +++ b/trust-quorum/src/task.rs @@ -298,7 +298,9 @@ mod tests { #[tokio::test] async fn full_mesh_connectivity() { let logctx = test_setup_log("full_mesh_connectivity"); - let (dir, _) = log_prefix_for_test("full_mesh_connectivity"); + let (mut dir, s) = log_prefix_for_test("full_mesh_connectivity"); + dir.push(&s); + std::fs::create_dir(&dir).unwrap(); println!("Writing keys and certs to {dir}"); let num_nodes = 4; @@ -327,7 +329,7 @@ mod tests { let out = attest_mock::log::mock(attest_log_doc).unwrap(); std::fs::write(dir.join("log.bin"), &out).unwrap(); - let configs = pki_doc_to_node_configs(dir, num_nodes); + let configs = pki_doc_to_node_configs(dir.clone(), num_nodes); let mut node_handles = vec![]; let mut join_handles = vec![]; @@ -471,5 +473,6 @@ mod tests { .unwrap(); logctx.cleanup_successful(); + std::fs::remove_dir_all(dir).unwrap(); } } From 79a6730e5aed9d6268bf882a18920441e3899978 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 23 Oct 2025 03:56:26 +0000 Subject: [PATCH 13/14] Move sans-io code into trust-quorum-protocol crate --- Cargo.lock | 46 ++++- Cargo.toml | 3 + trust-quorum/Cargo.toml | 13 +- trust-quorum/protocol/Cargo.toml | 57 ++++++ trust-quorum/{ => protocol}/src/alarm.rs | 0 .../{ => protocol}/src/compute_key_share.rs | 0 .../{ => protocol}/src/configuration.rs | 0 .../{ => protocol}/src/coordinator_state.rs | 0 trust-quorum/{ => protocol}/src/crypto.rs | 0 trust-quorum/protocol/src/lib.rs | 162 ++++++++++++++++++ trust-quorum/{ => protocol}/src/messages.rs | 0 trust-quorum/{ => protocol}/src/node.rs | 0 trust-quorum/{ => protocol}/src/node_ctx.rs | 0 .../{ => protocol}/src/persistent_state.rs | 0 .../{ => protocol}/src/rack_secret_loader.rs | 0 trust-quorum/{ => protocol}/src/validators.rs | 0 .../tests/cluster.proptest-regressions | 0 trust-quorum/{ => protocol}/tests/cluster.rs | 2 +- trust-quorum/src/connection_manager.rs | 36 ++-- trust-quorum/src/established_conn.rs | 5 +- trust-quorum/src/lib.rs | 159 +---------------- trust-quorum/src/task.rs | 5 +- trust-quorum/test-utils/Cargo.toml | 2 +- trust-quorum/test-utils/src/event.rs | 2 +- trust-quorum/test-utils/src/lib.rs | 2 +- trust-quorum/test-utils/src/nexus.rs | 2 +- trust-quorum/test-utils/src/state.rs | 2 +- trust-quorum/tqdb/Cargo.toml | 2 +- trust-quorum/tqdb/src/bin/tqdb/main.rs | 2 +- 29 files changed, 308 insertions(+), 194 deletions(-) create mode 100644 trust-quorum/protocol/Cargo.toml rename trust-quorum/{ => protocol}/src/alarm.rs (100%) rename trust-quorum/{ => protocol}/src/compute_key_share.rs (100%) rename trust-quorum/{ => protocol}/src/configuration.rs (100%) rename trust-quorum/{ => protocol}/src/coordinator_state.rs (100%) rename trust-quorum/{ => protocol}/src/crypto.rs (100%) create mode 100644 trust-quorum/protocol/src/lib.rs rename trust-quorum/{ => protocol}/src/messages.rs (100%) rename trust-quorum/{ => protocol}/src/node.rs (100%) rename trust-quorum/{ => protocol}/src/node_ctx.rs (100%) rename trust-quorum/{ => protocol}/src/persistent_state.rs (100%) rename trust-quorum/{ => protocol}/src/rack_secret_loader.rs (100%) rename trust-quorum/{ => protocol}/src/validators.rs (100%) rename trust-quorum/{ => protocol}/tests/cluster.proptest-regressions (100%) rename trust-quorum/{ => protocol}/tests/cluster.rs (99%) diff --git a/Cargo.lock b/Cargo.lock index c63c83ba8b2..dd5980e1ec9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14632,7 +14632,7 @@ dependencies = [ "serde_json", "slog", "tabled 0.15.0", - "trust-quorum", + "trust-quorum-protocol", "trust-quorum-test-utils", ] @@ -14803,7 +14803,6 @@ dependencies = [ "anyhow", "assert_matches", "attest-mock", - "bcs", "bootstore", "bytes", "camino", @@ -14837,6 +14836,47 @@ dependencies = [ "test-strategy", "thiserror 2.0.17", "tokio", + "trust-quorum-protocol", + "trust-quorum-test-utils", + "uuid", + "zeroize", +] + +[[package]] +name = "trust-quorum-protocol" +version = "0.1.0" +dependencies = [ + "assert_matches", + "attest-mock", + "bootstore", + "bytes", + "camino", + "chacha20poly1305", + "ciborium", + "daft", + "derive_more 0.99.20", + "dropshot", + "gfss", + "hex", + "hkdf", + "iddqd", + "omicron-test-utils", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "proptest", + "rand 0.9.2", + "secrecy 0.10.3", + "serde", + "serde_json", + "serde_with", + "sha3", + "sled-agent-types", + "slog", + "slog-error-chain", + "static_assertions", + "subtle", + "test-strategy", + "thiserror 2.0.17", "trust-quorum-test-utils", "uuid", "zeroize", @@ -14859,7 +14899,7 @@ dependencies = [ "serde_json", "sled-hardware-types", "slog", - "trust-quorum", + "trust-quorum-protocol", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index acc91bb2037..b376e3100c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -143,6 +143,7 @@ members = [ "test-utils", "trust-quorum", "trust-quorum/gfss", + "trust-quorum/protocol", "trust-quorum/test-utils", "trust-quorum/tqdb", "typed-rng", @@ -304,6 +305,7 @@ default-members = [ "sp-sim", "trust-quorum", "trust-quorum/gfss", + "trust-quorum/protocol", "trust-quorum/test-utils", "trust-quorum/tqdb", "test-utils", @@ -472,6 +474,7 @@ gateway-types = { path = "gateway-types" } gethostname = "0.5.0" gfss = { path = "trust-quorum/gfss" } trust-quorum = { path = "trust-quorum" } +trust-quorum-protocol = { path = "trust-quorum/protocol" } trust-quorum-test-utils = { path = "trust-quorum/test-utils" } glob = "0.3.2" guppy = "0.17.20" diff --git a/trust-quorum/Cargo.toml b/trust-quorum/Cargo.toml index 7a034f78f8d..5f5ad0e88a8 100644 --- a/trust-quorum/Cargo.toml +++ b/trust-quorum/Cargo.toml @@ -3,13 +3,13 @@ name = "trust-quorum" version = "0.1.0" edition = "2021" license = "MPL-2.0" +description = "trust quorum library for use by bootstrap agent" [lints] workspace = true [dependencies] anyhow.workspace = true -bcs.workspace = true bootstore.workspace = true bytes.workspace = true camino.workspace = true @@ -36,6 +36,7 @@ static_assertions.workspace = true subtle.workspace = true thiserror.workspace = true tokio.workspace = true +trust-quorum-protocol.workspace = true uuid.workspace = true zeroize.workspace = true omicron-workspace-hack.workspace = true @@ -50,13 +51,3 @@ serde_json.workspace = true test-strategy.workspace = true trust-quorum-test-utils.workspace = true sprockets-tls-test-utils.workspace = true - -[features] -# Impl `PartialEq` and `Eq` for types implementing `subtle::ConstantTimeEq` when -# this feature is enabled. -# -# This is of unknown risk. The rust compiler may obviate the security of using -# subtle when we do this. On the other hand its very useful for testing and -# debugging outside of production. -danger_partial_eq_ct_wrapper = ["gfss/danger_partial_eq_ct_wrapper"] -testing = [] diff --git a/trust-quorum/protocol/Cargo.toml b/trust-quorum/protocol/Cargo.toml new file mode 100644 index 00000000000..9a5d42f7d95 --- /dev/null +++ b/trust-quorum/protocol/Cargo.toml @@ -0,0 +1,57 @@ +[package] +name = "trust-quorum-protocol" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" +description = "sans-io trust quorum protocol implementation" + +[lints] +workspace = true + +[dependencies] +bootstore.workspace = true +bytes.workspace = true +camino.workspace = true +chacha20poly1305.workspace = true +ciborium.workspace = true +daft.workspace = true +derive_more.workspace = true +gfss.workspace = true +hex.workspace = true +hkdf.workspace = true +iddqd.workspace = true +omicron-uuid-kinds.workspace = true +rand = { workspace = true, features = ["os_rng"] } +secrecy.workspace = true +serde.workspace = true +serde_with.workspace = true +sha3.workspace = true +sled-agent-types.workspace = true +slog.workspace = true +slog-error-chain.workspace = true +static_assertions.workspace = true +subtle.workspace = true +thiserror.workspace = true +uuid.workspace = true +zeroize.workspace = true +omicron-workspace-hack.workspace = true + +[dev-dependencies] +assert_matches.workspace = true +attest-mock.workspace = true +dropshot.workspace = true +omicron-test-utils.workspace = true +proptest.workspace = true +serde_json.workspace = true +test-strategy.workspace = true +trust-quorum-test-utils.workspace = true + +[features] +# Impl `PartialEq` and `Eq` for types implementing `subtle::ConstantTimeEq` when +# this feature is enabled. +# +# This is of unknown risk. The rust compiler may obviate the security of using +# subtle when we do this. On the other hand its very useful for testing and +# debugging outside of production. +danger_partial_eq_ct_wrapper = ["gfss/danger_partial_eq_ct_wrapper"] +testing = [] diff --git a/trust-quorum/src/alarm.rs b/trust-quorum/protocol/src/alarm.rs similarity index 100% rename from trust-quorum/src/alarm.rs rename to trust-quorum/protocol/src/alarm.rs diff --git a/trust-quorum/src/compute_key_share.rs b/trust-quorum/protocol/src/compute_key_share.rs similarity index 100% rename from trust-quorum/src/compute_key_share.rs rename to trust-quorum/protocol/src/compute_key_share.rs diff --git a/trust-quorum/src/configuration.rs b/trust-quorum/protocol/src/configuration.rs similarity index 100% rename from trust-quorum/src/configuration.rs rename to trust-quorum/protocol/src/configuration.rs diff --git a/trust-quorum/src/coordinator_state.rs b/trust-quorum/protocol/src/coordinator_state.rs similarity index 100% rename from trust-quorum/src/coordinator_state.rs rename to trust-quorum/protocol/src/coordinator_state.rs diff --git a/trust-quorum/src/crypto.rs b/trust-quorum/protocol/src/crypto.rs similarity index 100% rename from trust-quorum/src/crypto.rs rename to trust-quorum/protocol/src/crypto.rs diff --git a/trust-quorum/protocol/src/lib.rs b/trust-quorum/protocol/src/lib.rs new file mode 100644 index 00000000000..0d5c522b2d4 --- /dev/null +++ b/trust-quorum/protocol/src/lib.rs @@ -0,0 +1,162 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Implementation of the oxide rack trust quorum protocol +//! +//! This protocol is written as a +//! [no-IO](https://sans-io.readthedocs.io/how-to-sans-io.html) implementation. +//! All persistent state and all networking is managed outside of this +//! implementation. + +use crypto::Sha3_256Digest; +use daft::Diffable; +use derive_more::Display; +use gfss::shamir::Share; +use serde::{Deserialize, Serialize}; +pub use sled_agent_types::sled::BaseboardId; +use slog::{Logger, error, warn}; + +mod alarm; +mod compute_key_share; +mod configuration; +mod coordinator_state; +pub(crate) mod crypto; +mod messages; +mod node; +mod node_ctx; +mod persistent_state; +#[allow(unused)] +mod rack_secret_loader; +mod validators; + +pub use configuration::Configuration; +pub use coordinator_state::{ + CoordinatingMsg, CoordinatorOperation, CoordinatorState, + CoordinatorStateDiff, +}; +pub use rack_secret_loader::{LoadRackSecretError, RackSecretLoaderDiff}; +pub use validators::{ + ValidatedLrtqUpgradeMsgDiff, ValidatedReconfigureMsgDiff, +}; + +pub use alarm::Alarm; +pub use crypto::RackSecret; +pub use messages::*; +pub use node::{Node, NodeDiff}; +// public only for docs. +pub use node_ctx::NodeHandlerCtx; +pub use node_ctx::{NodeCallerCtx, NodeCommonCtx, NodeCtx, NodeCtxDiff}; +pub use persistent_state::{ + ExpungedMetadata, PersistentState, PersistentStateSummary, +}; + +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + Serialize, + Deserialize, + Display, + Diffable, +)] +#[daft(leaf)] +pub struct Epoch(pub u64); + +impl Epoch { + pub fn next(&self) -> Epoch { + Epoch(self.0.checked_add(1).expect("fewer than 2^64 epochs")) + } +} + +/// The number of shares required to reconstruct the rack secret +/// +/// Typically referred to as `k` in the docs +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + Serialize, + Deserialize, + Display, + Diffable, +)] +#[daft(leaf)] +pub struct Threshold(pub u8); + +/// A container to make messages between trust quorum nodes routable +#[derive(Debug, Clone, Serialize, Deserialize, Diffable)] +#[cfg_attr(feature = "danger_partial_eq_ct_wrapper", derive(PartialEq, Eq))] +#[daft(leaf)] +pub struct Envelope { + pub to: BaseboardId, + pub from: BaseboardId, + pub msg: PeerMsg, +} + +#[cfg(feature = "testing")] +impl Envelope { + pub fn equal_except_for_crypto_data(&self, other: &Self) -> bool { + self.to == other.to + && self.from == other.from + && self.msg.equal_except_for_crypto_data(&other.msg) + } +} + +/// Check if a received share is valid for a given configuration +/// +/// Return true if valid, false otherwise. +pub fn validate_share( + log: &Logger, + config: &Configuration, + from: &BaseboardId, + epoch: Epoch, + share: &Share, +) -> bool { + // Are we trying to retrieve shares for `epoch`? + if epoch != config.epoch { + warn!( + log, + "Received Share from node with wrong epoch"; + "received_epoch" => %epoch, + "from" => %from + ); + return false; + } + + // Is the sender a member of the configuration `epoch`? + // Was the sender a member of the configuration at `old_epoch`? + let Some(expected_digest) = config.members.get(&from) else { + warn!( + log, + "Received Share from unexpected node"; + "epoch" => %epoch, + "from" => %from + ); + return false; + }; + + // Does the share hash match what we expect? + let mut digest = Sha3_256Digest::default(); + share.digest::(&mut digest.0); + if digest != *expected_digest { + error!( + log, + "Received share with invalid digest"; + "epoch" => %epoch, + "from" => %from + ); + return false; + } + + true +} diff --git a/trust-quorum/src/messages.rs b/trust-quorum/protocol/src/messages.rs similarity index 100% rename from trust-quorum/src/messages.rs rename to trust-quorum/protocol/src/messages.rs diff --git a/trust-quorum/src/node.rs b/trust-quorum/protocol/src/node.rs similarity index 100% rename from trust-quorum/src/node.rs rename to trust-quorum/protocol/src/node.rs diff --git a/trust-quorum/src/node_ctx.rs b/trust-quorum/protocol/src/node_ctx.rs similarity index 100% rename from trust-quorum/src/node_ctx.rs rename to trust-quorum/protocol/src/node_ctx.rs diff --git a/trust-quorum/src/persistent_state.rs b/trust-quorum/protocol/src/persistent_state.rs similarity index 100% rename from trust-quorum/src/persistent_state.rs rename to trust-quorum/protocol/src/persistent_state.rs diff --git a/trust-quorum/src/rack_secret_loader.rs b/trust-quorum/protocol/src/rack_secret_loader.rs similarity index 100% rename from trust-quorum/src/rack_secret_loader.rs rename to trust-quorum/protocol/src/rack_secret_loader.rs diff --git a/trust-quorum/src/validators.rs b/trust-quorum/protocol/src/validators.rs similarity index 100% rename from trust-quorum/src/validators.rs rename to trust-quorum/protocol/src/validators.rs diff --git a/trust-quorum/tests/cluster.proptest-regressions b/trust-quorum/protocol/tests/cluster.proptest-regressions similarity index 100% rename from trust-quorum/tests/cluster.proptest-regressions rename to trust-quorum/protocol/tests/cluster.proptest-regressions diff --git a/trust-quorum/tests/cluster.rs b/trust-quorum/protocol/tests/cluster.rs similarity index 99% rename from trust-quorum/tests/cluster.rs rename to trust-quorum/protocol/tests/cluster.rs index 39c1367661b..e1b14994dd4 100644 --- a/trust-quorum/tests/cluster.rs +++ b/trust-quorum/protocol/tests/cluster.rs @@ -15,7 +15,7 @@ use secrecy::ExposeSecret; use slog::{Logger, info, o}; use std::collections::BTreeSet; use test_strategy::{Arbitrary, proptest}; -use trust_quorum::{ +use trust_quorum_protocol::{ BaseboardId, CoordinatorOperation, Epoch, NodeCallerCtx, NodeCommonCtx, Threshold, }; diff --git a/trust-quorum/src/connection_manager.rs b/trust-quorum/src/connection_manager.rs index 584e4b1a6bb..2feae008779 100644 --- a/trust-quorum/src/connection_manager.rs +++ b/trust-quorum/src/connection_manager.rs @@ -5,7 +5,7 @@ //! A mechanism for maintaining a full mesh of trust quorum node connections use crate::established_conn::EstablishedConn; -use crate::{BaseboardId, PeerMsg}; +use trust_quorum_protocol::{BaseboardId, PeerMsg}; // TODO: Move or copy this to this crate? use bootstore::schemes::v0::NetworkConfig; use camino::Utf8PathBuf; @@ -45,7 +45,7 @@ pub enum AcceptError { } /// Messages sent from the main task to the connection managing tasks -#[derive(Debug, PartialEq)] +#[derive(Debug)] pub enum MainToConnMsg { #[expect(unused)] Msg(WireMsg), @@ -58,7 +58,7 @@ pub enum MainToConnMsg { /// /// All `WireMsg`s sent between nodes is prefixed with a 4 byte size header used /// for framing. -#[derive(Debug, PartialEq, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize)] pub enum WireMsg { /// Used for connection keep alive Ping, @@ -84,19 +84,35 @@ pub enum WireMsg { /// We include `task_id` to differentiate which task they come from so we can /// exclude requests from tasks that have been cancelled or have been told to /// shutdown. -#[derive(Debug, PartialEq)] +#[derive(Debug)] pub struct ConnToMainMsg { pub task_id: task::Id, pub msg: ConnToMainMsgInner, } -#[derive(Debug, PartialEq)] +#[derive(Debug)] pub enum ConnToMainMsgInner { - Accepted { addr: SocketAddrV6, peer_id: BaseboardId }, - Connected { addr: SocketAddrV6, peer_id: BaseboardId }, - Received { from: BaseboardId, msg: PeerMsg }, - ReceivedNetworkConfig { from: BaseboardId, config: NetworkConfig }, - Disconnected { peer_id: BaseboardId }, + Accepted { + addr: SocketAddrV6, + peer_id: BaseboardId, + }, + Connected { + addr: SocketAddrV6, + peer_id: BaseboardId, + }, + #[expect(unused)] + Received { + from: BaseboardId, + msg: PeerMsg, + }, + #[expect(unused)] + ReceivedNetworkConfig { + from: BaseboardId, + config: NetworkConfig, + }, + Disconnected { + peer_id: BaseboardId, + }, } pub struct TaskHandle { diff --git a/trust-quorum/src/established_conn.rs b/trust-quorum/src/established_conn.rs index 5e4e9e12641..77cb2d73cbd 100644 --- a/trust-quorum/src/established_conn.rs +++ b/trust-quorum/src/established_conn.rs @@ -4,9 +4,7 @@ //! An individual sprockets connection running in its own task -use crate::{ - BaseboardId, ConnToMainMsg, ConnToMainMsgInner, MainToConnMsg, WireMsg, -}; +use crate::{ConnToMainMsg, ConnToMainMsgInner, MainToConnMsg, WireMsg}; use bytes::Buf; use serde::Serialize; use slog::{Logger, debug, error, o, warn}; @@ -19,6 +17,7 @@ use tokio::net::TcpStream; use tokio::sync::mpsc; use tokio::task; use tokio::time::{Instant, MissedTickBehavior, interval}; +use trust_quorum_protocol::BaseboardId; /// Max buffer size of a connection const CONN_BUF_SIZE: usize = 1024 * 1024; diff --git a/trust-quorum/src/lib.rs b/trust-quorum/src/lib.rs index 457403999e4..f508647c889 100644 --- a/trust-quorum/src/lib.rs +++ b/trust-quorum/src/lib.rs @@ -2,168 +2,13 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Implementation of the oxide rack trust quorum protocol -//! -//! This protocol is written as a -//! [no-IO](https://sans-io.readthedocs.io/how-to-sans-io.html) implementation. -//! All persistent state and all networking is managed outside of this -//! implementation. +//! Async trust-quorum library code for intergrating with sled-agent -use crypto::Sha3_256Digest; -use daft::Diffable; -use derive_more::Display; -use gfss::shamir::Share; -use serde::{Deserialize, Serialize}; -pub use sled_agent_types::sled::BaseboardId; -use slog::{Logger, error, warn}; - -mod compute_key_share; -mod configuration; -mod coordinator_state; -pub(crate) mod crypto; -pub(crate) mod established_conn; -mod messages; -mod node; -mod node_ctx; -mod persistent_state; -#[allow(unused)] -mod rack_secret_loader; -mod validators; -pub use configuration::Configuration; -pub use coordinator_state::{ - CoordinatingMsg, CoordinatorOperation, CoordinatorState, - CoordinatorStateDiff, -}; -pub use rack_secret_loader::{LoadRackSecretError, RackSecretLoaderDiff}; -pub use validators::{ - ValidatedLrtqUpgradeMsgDiff, ValidatedReconfigureMsgDiff, -}; -mod alarm; mod connection_manager; +pub(crate) mod established_conn; mod task; pub(crate) use connection_manager::{ ConnToMainMsg, ConnToMainMsgInner, MainToConnMsg, WireMsg, }; pub use task::NodeTask; - -pub use alarm::Alarm; -pub use crypto::RackSecret; -pub use messages::*; -pub use node::{Node, NodeDiff}; -// public only for docs. -pub use node_ctx::NodeHandlerCtx; -pub use node_ctx::{NodeCallerCtx, NodeCommonCtx, NodeCtx, NodeCtxDiff}; -pub use persistent_state::{ - ExpungedMetadata, PersistentState, PersistentStateSummary, -}; - -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - PartialOrd, - Ord, - Hash, - Serialize, - Deserialize, - Display, - Diffable, -)] -#[daft(leaf)] -pub struct Epoch(pub u64); - -impl Epoch { - pub fn next(&self) -> Epoch { - Epoch(self.0.checked_add(1).expect("fewer than 2^64 epochs")) - } -} - -/// The number of shares required to reconstruct the rack secret -/// -/// Typically referred to as `k` in the docs -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - PartialOrd, - Ord, - Serialize, - Deserialize, - Display, - Diffable, -)] -#[daft(leaf)] -pub struct Threshold(pub u8); - -/// A container to make messages between trust quorum nodes routable -#[derive(Debug, Clone, Serialize, Deserialize, Diffable)] -#[cfg_attr(feature = "danger_partial_eq_ct_wrapper", derive(PartialEq, Eq))] -#[daft(leaf)] -pub struct Envelope { - pub to: BaseboardId, - pub from: BaseboardId, - pub msg: PeerMsg, -} - -#[cfg(feature = "testing")] -impl Envelope { - pub fn equal_except_for_crypto_data(&self, other: &Self) -> bool { - self.to == other.to - && self.from == other.from - && self.msg.equal_except_for_crypto_data(&other.msg) - } -} - -/// Check if a received share is valid for a given configuration -/// -/// Return true if valid, false otherwise. -pub fn validate_share( - log: &Logger, - config: &Configuration, - from: &BaseboardId, - epoch: Epoch, - share: &Share, -) -> bool { - // Are we trying to retrieve shares for `epoch`? - if epoch != config.epoch { - warn!( - log, - "Received Share from node with wrong epoch"; - "received_epoch" => %epoch, - "from" => %from - ); - return false; - } - - // Is the sender a member of the configuration `epoch`? - // Was the sender a member of the configuration at `old_epoch`? - let Some(expected_digest) = config.members.get(&from) else { - warn!( - log, - "Received Share from unexpected node"; - "epoch" => %epoch, - "from" => %from - ); - return false; - }; - - // Does the share hash match what we expect? - let mut digest = Sha3_256Digest::default(); - share.digest::(&mut digest.0); - if digest != *expected_digest { - error!( - log, - "Received share with invalid digest"; - "epoch" => %epoch, - "from" => %from - ); - return false; - } - - true -} diff --git a/trust-quorum/src/task.rs b/trust-quorum/src/task.rs index 44fd2338eb0..9ac34c80470 100644 --- a/trust-quorum/src/task.rs +++ b/trust-quorum/src/task.rs @@ -2,12 +2,12 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! A runnable async trust quorum node that wraps the sans-io [`crate::Node`] +//! A runnable async trust quorum node that wraps the sans-io +//! [`trust_quorum_protocol::Node`] use crate::connection_manager::{ ConnMgr, ConnMgrStatus, ConnToMainMsg, ConnToMainMsgInner, }; -use crate::{BaseboardId, Node, NodeCtx}; use slog::{Logger, debug, error, info, o}; use sprockets_tls::keys::SprocketsConfig; use std::collections::BTreeSet; @@ -16,6 +16,7 @@ use thiserror::Error; use tokio::sync::mpsc::error::SendError; use tokio::sync::oneshot::error::RecvError; use tokio::sync::{mpsc, oneshot}; +use trust_quorum_protocol::{BaseboardId, Node, NodeCtx}; #[derive(Debug, Clone)] pub struct Config { diff --git a/trust-quorum/test-utils/Cargo.toml b/trust-quorum/test-utils/Cargo.toml index 33181dc1ddf..853bdd464f7 100644 --- a/trust-quorum/test-utils/Cargo.toml +++ b/trust-quorum/test-utils/Cargo.toml @@ -19,6 +19,6 @@ serde.workspace = true serde_json.workspace = true sled-hardware-types.workspace = true slog.workspace = true -trust-quorum = { workspace = true, features = ["danger_partial_eq_ct_wrapper", "testing"] } +trust-quorum-protocol = { workspace = true, features = ["danger_partial_eq_ct_wrapper", "testing"] } omicron-workspace-hack.workspace = true diff --git a/trust-quorum/test-utils/src/event.rs b/trust-quorum/test-utils/src/event.rs index 3544456b3c3..8bead3c53e2 100644 --- a/trust-quorum/test-utils/src/event.rs +++ b/trust-quorum/test-utils/src/event.rs @@ -7,7 +7,7 @@ use crate::nexus::{NexusConfig, NexusReply}; use serde::{Deserialize, Serialize}; use std::collections::BTreeSet; -use trust_quorum::{BaseboardId, Envelope, Epoch}; +use trust_quorum_protocol::{BaseboardId, Envelope, Epoch}; /// An event that can be fed into our system under test (SUT) /// diff --git a/trust-quorum/test-utils/src/lib.rs b/trust-quorum/test-utils/src/lib.rs index 6cc7d617f97..9bfffdde256 100644 --- a/trust-quorum/test-utils/src/lib.rs +++ b/trust-quorum/test-utils/src/lib.rs @@ -13,7 +13,7 @@ pub use event::Event; pub use event_log::EventLog; pub use state::TqState; -use trust_quorum::BaseboardId; +use trust_quorum_protocol::BaseboardId; /// All possible members used in a test pub fn member_universe(size: usize) -> Vec { diff --git a/trust-quorum/test-utils/src/nexus.rs b/trust-quorum/test-utils/src/nexus.rs index d59ec53cc9c..c2665f37870 100644 --- a/trust-quorum/test-utils/src/nexus.rs +++ b/trust-quorum/test-utils/src/nexus.rs @@ -10,7 +10,7 @@ use iddqd::{IdOrdItem, IdOrdMap, id_upcast}; use omicron_uuid_kinds::RackUuid; use serde::{Deserialize, Serialize}; use std::collections::BTreeSet; -use trust_quorum::{ +use trust_quorum_protocol::{ BaseboardId, Epoch, LrtqUpgradeMsg, ReconfigureMsg, Threshold, }; diff --git a/trust-quorum/test-utils/src/state.rs b/trust-quorum/test-utils/src/state.rs index 59b8524d8b7..3c1b31e5a32 100644 --- a/trust-quorum/test-utils/src/state.rs +++ b/trust-quorum/test-utils/src/state.rs @@ -17,7 +17,7 @@ use sled_hardware_types::Baseboard; use slog::{Logger, info}; use std::collections::{BTreeMap, BTreeSet}; use std::fmt::Display; -use trust_quorum::{ +use trust_quorum_protocol::{ BaseboardId, Configuration, CoordinatingMsg, CoordinatorOperation, CoordinatorStateDiff, Envelope, Epoch, LoadRackSecretError, Node, NodeCallerCtx, NodeCommonCtx, NodeCtx, NodeCtxDiff, NodeDiff, PeerMsgKind, diff --git a/trust-quorum/tqdb/Cargo.toml b/trust-quorum/tqdb/Cargo.toml index 4436cc99fbc..18242508aa1 100644 --- a/trust-quorum/tqdb/Cargo.toml +++ b/trust-quorum/tqdb/Cargo.toml @@ -20,7 +20,7 @@ reconfigurator-cli.workspace = true serde_json.workspace = true slog.workspace = true tabled.workspace = true -trust-quorum = { workspace = true, features = ["danger_partial_eq_ct_wrapper"] } +trust-quorum-protocol = { workspace = true, features = ["danger_partial_eq_ct_wrapper"] } trust-quorum-test-utils.workspace = true omicron-workspace-hack.workspace = true diff --git a/trust-quorum/tqdb/src/bin/tqdb/main.rs b/trust-quorum/tqdb/src/bin/tqdb/main.rs index a593e697e3b..12e163f801b 100644 --- a/trust-quorum/tqdb/src/bin/tqdb/main.rs +++ b/trust-quorum/tqdb/src/bin/tqdb/main.rs @@ -24,7 +24,7 @@ use std::fmt::Write; use std::fs; use std::io::IsTerminal; use tabled::Tabled; -use trust_quorum::BaseboardId; +use trust_quorum_protocol::BaseboardId; use trust_quorum_test_utils::{Event, TqState}; fn main() -> Result<(), anyhow::Error> { From aadc7abab0006d20d27809ca503f0b5b1776cc69 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 23 Oct 2025 03:58:51 +0000 Subject: [PATCH 14/14] hakari --- Cargo.lock | 2 ++ workspace-hack/Cargo.toml | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index dd5980e1ec9..8fba4430fcd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8899,6 +8899,7 @@ dependencies = [ "clang-sys", "clap", "clap_builder", + "const-oid", "cookie", "crossbeam-epoch", "crossbeam-utils", @@ -8907,6 +8908,7 @@ dependencies = [ "curve25519-dalek", "daft", "data-encoding", + "der", "digest", "dof 0.3.0", "dof 0.4.0", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 096d5d26dcd..6ef9f57bbac 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -35,6 +35,7 @@ chrono = { version = "0.4.42", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.5.48", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.48", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } +const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.18" } crossbeam-utils = { version = "0.8.21" } crossterm = { version = "0.28.1", features = ["event-stream", "serde"] } @@ -42,6 +43,7 @@ crypto-common = { version = "0.1.6", default-features = false, features = ["getr curve25519-dalek = { version = "4.1.3", features = ["digest", "legacy_compatibility", "rand_core"] } daft = { version = "0.1.4", features = ["derive", "newtype-uuid1", "oxnet01", "uuid1"] } data-encoding = { version = "2.9.0" } +der = { version = "0.7.10", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } ecdsa = { version = "0.16.9", features = ["pem", "signing", "std", "verifying"] } ed25519-dalek = { version = "2.1.1", features = ["digest", "pem", "rand_core"] } @@ -174,6 +176,7 @@ chrono = { version = "0.4.42", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.5.48", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.48", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } +const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.18" } crossbeam-utils = { version = "0.8.21" } crossterm = { version = "0.28.1", features = ["event-stream", "serde"] } @@ -181,6 +184,7 @@ crypto-common = { version = "0.1.6", default-features = false, features = ["getr curve25519-dalek = { version = "4.1.3", features = ["digest", "legacy_compatibility", "rand_core"] } daft = { version = "0.1.4", features = ["derive", "newtype-uuid1", "oxnet01", "uuid1"] } data-encoding = { version = "2.9.0" } +der = { version = "0.7.10", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } ecdsa = { version = "0.16.9", features = ["pem", "signing", "std", "verifying"] } ed25519-dalek = { version = "2.1.1", features = ["digest", "pem", "rand_core"] }