diff --git a/Cargo.lock b/Cargo.lock index 91e68b4b50..8fba4430fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -536,6 +536,41 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "attest-data" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/dice-util?rev=10952e8d9599b735b85d480af3560a11700e5b64#10952e8d9599b735b85d480af3560a11700e5b64" +dependencies = [ + "const-oid", + "der", + "getrandom 0.3.4", + "hex", + "hubpack", + "rats-corim", + "salty", + "serde", + "serde_with", + "sha3", + "static_assertions", + "thiserror 2.0.17", +] + +[[package]] +name = "attest-mock" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/dice-util?rev=10952e8d9599b735b85d480af3560a11700e5b64#10952e8d9599b735b85d480af3560a11700e5b64" +dependencies = [ + "anyhow", + "attest-data 0.5.0", + "clap", + "hex", + "hubpack", + "knuffel", + "miette", + "rats-corim", + "serde_json", +] + [[package]] name = "atty" version = "0.2.14" @@ -624,6 +659,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "backtrace-ext" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "537beee3be4a18fb023b570f80e3ae28003db9167a751266b259926e25539d50" +dependencies = [ + "backtrace", +] + [[package]] name = "base16ct" version = "0.2.0" @@ -1343,6 +1387,15 @@ dependencies = [ "serde", ] +[[package]] +name = "chumsky" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +dependencies = [ + "hashbrown 0.14.5", +] + [[package]] name = "ciborium" version = "0.2.2" @@ -1412,7 +1465,7 @@ dependencies = [ "anstyle", "clap_lex", "strsim", - "terminal_size", + "terminal_size 0.4.0", ] [[package]] @@ -2642,7 +2695,7 @@ name = "dice-verifier" version = "0.3.0-pre0" source = "git+https://github.com/oxidecomputer/dice-util?rev=4b408edc1d00f108ddf635415d783e6f12fe9641#4b408edc1d00f108ddf635415d783e6f12fe9641" dependencies = [ - "attest-data", + "attest-data 0.4.0", "const-oid", "ed25519-dalek", "env_logger", @@ -3036,14 +3089,14 @@ dependencies = [ "indent_write", "newtype_derive", "openapiv3", - "owo-colors", + "owo-colors 4.2.2", "paste", "semver 1.0.27", "serde_json", "sha2", "similar", - "supports-color", - "textwrap", + "supports-color 3.0.2", + "textwrap 0.16.2", "thiserror 2.0.17", ] @@ -4209,6 +4262,16 @@ dependencies = [ "ahash", ] +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "hashbrown" version = "0.15.4" @@ -4290,6 +4353,9 @@ name = "heck" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +dependencies = [ + "unicode-segmentation", +] [[package]] name = "heck" @@ -5618,6 +5684,33 @@ dependencies = [ "zeroize", ] +[[package]] +name = "knuffel" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04bee6ddc6071011314b1ce4f7705fef6c009401dba4fd22cb0009db6a177413" +dependencies = [ + "base64 0.21.7", + "chumsky", + "knuffel-derive", + "miette", + "thiserror 1.0.69", + "unicode-width 0.1.14", +] + +[[package]] +name = "knuffel-derive" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91977f56c49cfb961e3d840e2e7c6e4a56bde7283898cf606861f1421348283d" +dependencies = [ + "heck 0.4.1", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "kstat-macro" version = "0.1.0" @@ -6234,6 +6327,38 @@ dependencies = [ "tokio", ] +[[package]] +name = "miette" +version = "5.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59bb584eaeeab6bd0226ccf3509a69d7936d148cf3d036ad350abe35e8c6856e" +dependencies = [ + "backtrace", + "backtrace-ext", + "is-terminal", + "miette-derive", + "once_cell", + "owo-colors 3.5.0", + "supports-color 2.1.0", + "supports-hyperlinks", + "supports-unicode", + "terminal_size 0.1.17", + "textwrap 0.15.2", + "thiserror 1.0.69", + "unicode-width 0.1.14", +] + +[[package]] +name = "miette-derive" +version = "5.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "mime" version = "0.3.17" @@ -7366,7 +7491,7 @@ dependencies = [ "swrite", "tabled 0.15.0", "test-strategy", - "textwrap", + "textwrap 0.16.2", "thiserror 2.0.17", "tokio", "tough", @@ -8073,7 +8198,7 @@ dependencies = [ "serde", "slog", "slog-error-chain", - "textwrap", + "textwrap 0.16.2", "tokio", "uuid", ] @@ -8363,7 +8488,7 @@ dependencies = [ "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", - "owo-colors", + "owo-colors 4.2.2", "oxide-tokio-rt", "oximeter-client", "oximeter-db", @@ -8382,9 +8507,9 @@ dependencies = [ "strum 0.27.2", "subprocess", "support-bundle-viewer", - "supports-color", + "supports-color 3.0.2", "tabled 0.15.0", - "textwrap", + "textwrap 0.16.2", "tokio", "tufaceous-artifact", "unicode-width 0.1.14", @@ -8489,7 +8614,7 @@ dependencies = [ "repo-depot-client", "serde_json", "slog", - "supports-color", + "supports-color 3.0.2", "tokio", "update-engine", "uuid", @@ -8774,6 +8899,7 @@ dependencies = [ "clang-sys", "clap", "clap_builder", + "const-oid", "cookie", "crossbeam-epoch", "crossbeam-utils", @@ -8782,6 +8908,7 @@ dependencies = [ "curve25519-dalek", "daft", "data-encoding", + "der", "digest", "dof 0.3.0", "dof 0.4.0", @@ -8829,6 +8956,7 @@ dependencies = [ "log", "managed", "memchr", + "miniz_oxide", "mio", "newtype-uuid", "nix 0.29.0", @@ -9121,6 +9249,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" +[[package]] +name = "owo-colors" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" + [[package]] name = "owo-colors" version = "4.2.2" @@ -10134,6 +10268,36 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +[[package]] +name = "pki-playground" +version = "0.2.0" +source = "git+https://github.com/oxidecomputer/pki-playground?rev=7600756029ce046a02c6234aa84ce230cc5eaa04#7600756029ce046a02c6234aa84ce230cc5eaa04" +dependencies = [ + "camino", + "clap", + "const-oid", + "der", + "digest", + "ed25519-dalek", + "flagset", + "hex", + "ipnet", + "knuffel", + "miette", + "p384", + "pem-rfc7468", + "pkcs8", + "rand 0.8.5", + "rsa", + "sha1", + "sha2", + "sha3", + "signature", + "spki", + "x509-cert", + "zeroize", +] + [[package]] name = "plain" version = "0.2.3" @@ -11484,9 +11648,9 @@ dependencies = [ [[package]] name = "rsa" -version = "0.9.6" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d0e5124fcb30e76a7e79bfee683a2746db83784b86289f6251b54b7950a0dfc" +checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" dependencies = [ "const-oid", "digest", @@ -13115,7 +13279,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", "syn 2.0.106", @@ -13195,10 +13359,10 @@ dependencies = [ [[package]] name = "sprockets-tls" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/sprockets.git?rev=7da1f0b5dcd3d631da18b43ba78a84b1a2b425ee#7da1f0b5dcd3d631da18b43ba78a84b1a2b425ee" +source = "git+https://github.com/oxidecomputer/sprockets.git?rev=dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210#dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210" dependencies = [ "anyhow", - "attest-data", + "attest-data 0.4.0", "camino", "cfg-if", "clap", @@ -13225,6 +13389,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "sprockets-tls-test-utils" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/sprockets.git?rev=dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210#dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210" +dependencies = [ + "camino", + "pki-playground", +] + [[package]] name = "sqlformat" version = "0.3.5" @@ -13497,6 +13670,16 @@ dependencies = [ "zip 4.2.0", ] +[[package]] +name = "supports-color" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6398cde53adc3c4557306a96ce67b302968513830a77a95b2b17305d9719a89" +dependencies = [ + "is-terminal", + "is_ci", +] + [[package]] name = "supports-color" version = "3.0.2" @@ -13506,6 +13689,24 @@ dependencies = [ "is_ci", ] +[[package]] +name = "supports-hyperlinks" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84231692eb0d4d41e4cdd0cabfdd2e6cd9e255e65f80c9aa7c98dd502b4233d" +dependencies = [ + "is-terminal", +] + +[[package]] +name = "supports-unicode" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f850c19edd184a205e883199a261ed44471c81e39bd95b1357f5febbef00e77a" +dependencies = [ + "is-terminal", +] + [[package]] name = "swrite" version = "0.1.0" @@ -13755,6 +13956,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "terminal_size" version = "0.4.0" @@ -13802,6 +14013,17 @@ dependencies = [ "unicode-width 0.2.0", ] +[[package]] +name = "textwrap" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7b3e525a49ec206798b40326a44121291b530c963cfb01018f63e135bac543d" +dependencies = [ + "smawk", + "unicode-linebreak", + "unicode-width 0.1.14", +] + [[package]] name = "textwrap" version = "0.16.2" @@ -13809,7 +14031,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c13547615a44dc9c452a8a534638acdf07120d4b6847c8178705da06306a3057" dependencies = [ "smawk", - "terminal_size", + "terminal_size 0.4.0", "unicode-linebreak", "unicode-width 0.2.0", ] @@ -14412,7 +14634,7 @@ dependencies = [ "serde_json", "slog", "tabled 0.15.0", - "trust-quorum", + "trust-quorum-protocol", "trust-quorum-test-utils", ] @@ -14582,13 +14804,16 @@ version = "0.1.0" dependencies = [ "anyhow", "assert_matches", - "bcs", + "attest-mock", "bootstore", + "bytes", "camino", "chacha20poly1305", + "ciborium", "daft", "derive_more 0.99.20", "dropshot", + "futures", "gfss", "hex", "hkdf", @@ -14606,11 +14831,54 @@ dependencies = [ "sled-agent-types", "slog", "slog-error-chain", + "sprockets-tls", + "sprockets-tls-test-utils", "static_assertions", "subtle", "test-strategy", "thiserror 2.0.17", "tokio", + "trust-quorum-protocol", + "trust-quorum-test-utils", + "uuid", + "zeroize", +] + +[[package]] +name = "trust-quorum-protocol" +version = "0.1.0" +dependencies = [ + "assert_matches", + "attest-mock", + "bootstore", + "bytes", + "camino", + "chacha20poly1305", + "ciborium", + "daft", + "derive_more 0.99.20", + "dropshot", + "gfss", + "hex", + "hkdf", + "iddqd", + "omicron-test-utils", + "omicron-uuid-kinds", + "omicron-workspace-hack", + "proptest", + "rand 0.9.2", + "secrecy 0.10.3", + "serde", + "serde_json", + "serde_with", + "sha3", + "sled-agent-types", + "slog", + "slog-error-chain", + "static_assertions", + "subtle", + "test-strategy", + "thiserror 2.0.17", "trust-quorum-test-utils", "uuid", "zeroize", @@ -14633,7 +14901,7 @@ dependencies = [ "serde_json", "sled-hardware-types", "slog", - "trust-quorum", + "trust-quorum-protocol", ] [[package]] @@ -15115,14 +15383,14 @@ dependencies = [ "linear-map", "omicron-test-utils", "omicron-workspace-hack", - "owo-colors", + "owo-colors 4.2.2", "petgraph 0.8.2", "schemars 0.8.22", "serde", "serde_json", "serde_with", "slog", - "supports-color", + "supports-color 3.0.2", "swrite", "tokio", "tokio-stream", @@ -15652,7 +15920,7 @@ dependencies = [ "omicron-common", "omicron-passwords", "omicron-workspace-hack", - "owo-colors", + "owo-colors 4.2.2", "proptest", "ratatui", "reqwest", @@ -15666,9 +15934,9 @@ dependencies = [ "slog-async", "slog-envlogger", "slog-term", - "supports-color", + "supports-color 3.0.2", "tempfile", - "textwrap", + "textwrap 0.16.2", "tokio", "tokio-util", "toml 0.8.23", @@ -15695,7 +15963,7 @@ dependencies = [ "maplit", "omicron-common", "omicron-workspace-hack", - "owo-colors", + "owo-colors 4.2.2", "oxnet", "schemars 0.8.22", "serde", @@ -16365,7 +16633,7 @@ dependencies = [ "serde", "swrite", "tabled 0.15.0", - "textwrap", + "textwrap 0.16.2", "toml 0.8.23", "usdt 0.5.0", ] diff --git a/Cargo.toml b/Cargo.toml index 630ac028cc..b376e3100c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -143,6 +143,7 @@ members = [ "test-utils", "trust-quorum", "trust-quorum/gfss", + "trust-quorum/protocol", "trust-quorum/test-utils", "trust-quorum/tqdb", "typed-rng", @@ -304,6 +305,7 @@ default-members = [ "sp-sim", "trust-quorum", "trust-quorum/gfss", + "trust-quorum/protocol", "trust-quorum/test-utils", "trust-quorum/tqdb", "test-utils", @@ -370,6 +372,7 @@ assert_matches = "1.5.0" assert_cmd = "2.0.17" async-bb8-diesel = "0.2" async-trait = "0.1.89" +attest-mock = { git = "https://github.com/oxidecomputer/dice-util", rev = "10952e8d9599b735b85d480af3560a11700e5b64" } atomicwrites = "0.4.4" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } @@ -471,6 +474,7 @@ gateway-types = { path = "gateway-types" } gethostname = "0.5.0" gfss = { path = "trust-quorum/gfss" } trust-quorum = { path = "trust-quorum" } +trust-quorum-protocol = { path = "trust-quorum/protocol" } trust-quorum-test-utils = { path = "trust-quorum/test-utils" } glob = "0.3.2" guppy = "0.17.20" @@ -724,7 +728,8 @@ slog-term = "2.9.1" smf = "0.2" socket2 = { version = "0.5", features = ["all"] } sp-sim = { path = "sp-sim" } -sprockets-tls = { git = "https://github.com/oxidecomputer/sprockets.git", rev = "7da1f0b5dcd3d631da18b43ba78a84b1a2b425ee" } +sprockets-tls = { git = "https://github.com/oxidecomputer/sprockets.git", rev = "dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210" } +sprockets-tls-test-utils = { git = "https://github.com/oxidecomputer/sprockets.git", rev = "dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210" } sqlformat = "0.3.5" sqlparser = { version = "0.45.0", features = [ "visitor" ] } static_assertions = "1.1.0" diff --git a/sled-agent/src/bootstrap/config.rs b/sled-agent/src/bootstrap/config.rs index 6833cb7607..3b6b5e3e44 100644 --- a/sled-agent/src/bootstrap/config.rs +++ b/sled-agent/src/bootstrap/config.rs @@ -7,3 +7,4 @@ pub const BOOTSTRAP_AGENT_HTTP_PORT: u16 = 80; pub const BOOTSTRAP_AGENT_RACK_INIT_PORT: u16 = 12346; pub const BOOTSTORE_PORT: u16 = 12347; +pub const TRUST_QUORUM_PORT: u16 = 12349; diff --git a/trust-quorum/Cargo.toml b/trust-quorum/Cargo.toml index 0b0dfefb0f..5f5ad0e88a 100644 --- a/trust-quorum/Cargo.toml +++ b/trust-quorum/Cargo.toml @@ -3,18 +3,21 @@ name = "trust-quorum" version = "0.1.0" edition = "2021" license = "MPL-2.0" +description = "trust quorum library for use by bootstrap agent" [lints] workspace = true [dependencies] anyhow.workspace = true -bcs.workspace = true bootstore.workspace = true +bytes.workspace = true camino.workspace = true chacha20poly1305.workspace = true +ciborium.workspace = true daft.workspace = true derive_more.workspace = true +futures.workspace = true gfss.workspace = true hex.workspace = true hkdf.workspace = true @@ -28,29 +31,23 @@ sha3.workspace = true sled-agent-types.workspace = true slog.workspace = true slog-error-chain.workspace = true +sprockets-tls.workspace = true static_assertions.workspace = true subtle.workspace = true thiserror.workspace = true tokio.workspace = true +trust-quorum-protocol.workspace = true uuid.workspace = true zeroize.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] assert_matches.workspace = true +attest-mock.workspace = true dropshot.workspace = true omicron-test-utils.workspace = true proptest.workspace = true serde_json.workspace = true test-strategy.workspace = true trust-quorum-test-utils.workspace = true - -[features] -# Impl `PartialEq` and `Eq` for types implementing `subtle::ConstantTimeEq` when -# this feature is enabled. -# -# This is of unknown risk. The rust compiler may obviate the security of using -# subtle when we do this. On the other hand its very useful for testing and -# debugging outside of production. -danger_partial_eq_ct_wrapper = ["gfss/danger_partial_eq_ct_wrapper"] -testing = [] +sprockets-tls-test-utils.workspace = true diff --git a/trust-quorum/protocol/Cargo.toml b/trust-quorum/protocol/Cargo.toml new file mode 100644 index 0000000000..9a5d42f7d9 --- /dev/null +++ b/trust-quorum/protocol/Cargo.toml @@ -0,0 +1,57 @@ +[package] +name = "trust-quorum-protocol" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" +description = "sans-io trust quorum protocol implementation" + +[lints] +workspace = true + +[dependencies] +bootstore.workspace = true +bytes.workspace = true +camino.workspace = true +chacha20poly1305.workspace = true +ciborium.workspace = true +daft.workspace = true +derive_more.workspace = true +gfss.workspace = true +hex.workspace = true +hkdf.workspace = true +iddqd.workspace = true +omicron-uuid-kinds.workspace = true +rand = { workspace = true, features = ["os_rng"] } +secrecy.workspace = true +serde.workspace = true +serde_with.workspace = true +sha3.workspace = true +sled-agent-types.workspace = true +slog.workspace = true +slog-error-chain.workspace = true +static_assertions.workspace = true +subtle.workspace = true +thiserror.workspace = true +uuid.workspace = true +zeroize.workspace = true +omicron-workspace-hack.workspace = true + +[dev-dependencies] +assert_matches.workspace = true +attest-mock.workspace = true +dropshot.workspace = true +omicron-test-utils.workspace = true +proptest.workspace = true +serde_json.workspace = true +test-strategy.workspace = true +trust-quorum-test-utils.workspace = true + +[features] +# Impl `PartialEq` and `Eq` for types implementing `subtle::ConstantTimeEq` when +# this feature is enabled. +# +# This is of unknown risk. The rust compiler may obviate the security of using +# subtle when we do this. On the other hand its very useful for testing and +# debugging outside of production. +danger_partial_eq_ct_wrapper = ["gfss/danger_partial_eq_ct_wrapper"] +testing = [] diff --git a/trust-quorum/src/alarm.rs b/trust-quorum/protocol/src/alarm.rs similarity index 100% rename from trust-quorum/src/alarm.rs rename to trust-quorum/protocol/src/alarm.rs diff --git a/trust-quorum/src/compute_key_share.rs b/trust-quorum/protocol/src/compute_key_share.rs similarity index 100% rename from trust-quorum/src/compute_key_share.rs rename to trust-quorum/protocol/src/compute_key_share.rs diff --git a/trust-quorum/src/configuration.rs b/trust-quorum/protocol/src/configuration.rs similarity index 100% rename from trust-quorum/src/configuration.rs rename to trust-quorum/protocol/src/configuration.rs diff --git a/trust-quorum/src/coordinator_state.rs b/trust-quorum/protocol/src/coordinator_state.rs similarity index 100% rename from trust-quorum/src/coordinator_state.rs rename to trust-quorum/protocol/src/coordinator_state.rs diff --git a/trust-quorum/src/crypto.rs b/trust-quorum/protocol/src/crypto.rs similarity index 100% rename from trust-quorum/src/crypto.rs rename to trust-quorum/protocol/src/crypto.rs diff --git a/trust-quorum/protocol/src/lib.rs b/trust-quorum/protocol/src/lib.rs new file mode 100644 index 0000000000..0d5c522b2d --- /dev/null +++ b/trust-quorum/protocol/src/lib.rs @@ -0,0 +1,162 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Implementation of the oxide rack trust quorum protocol +//! +//! This protocol is written as a +//! [no-IO](https://sans-io.readthedocs.io/how-to-sans-io.html) implementation. +//! All persistent state and all networking is managed outside of this +//! implementation. + +use crypto::Sha3_256Digest; +use daft::Diffable; +use derive_more::Display; +use gfss::shamir::Share; +use serde::{Deserialize, Serialize}; +pub use sled_agent_types::sled::BaseboardId; +use slog::{Logger, error, warn}; + +mod alarm; +mod compute_key_share; +mod configuration; +mod coordinator_state; +pub(crate) mod crypto; +mod messages; +mod node; +mod node_ctx; +mod persistent_state; +#[allow(unused)] +mod rack_secret_loader; +mod validators; + +pub use configuration::Configuration; +pub use coordinator_state::{ + CoordinatingMsg, CoordinatorOperation, CoordinatorState, + CoordinatorStateDiff, +}; +pub use rack_secret_loader::{LoadRackSecretError, RackSecretLoaderDiff}; +pub use validators::{ + ValidatedLrtqUpgradeMsgDiff, ValidatedReconfigureMsgDiff, +}; + +pub use alarm::Alarm; +pub use crypto::RackSecret; +pub use messages::*; +pub use node::{Node, NodeDiff}; +// public only for docs. +pub use node_ctx::NodeHandlerCtx; +pub use node_ctx::{NodeCallerCtx, NodeCommonCtx, NodeCtx, NodeCtxDiff}; +pub use persistent_state::{ + ExpungedMetadata, PersistentState, PersistentStateSummary, +}; + +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + Serialize, + Deserialize, + Display, + Diffable, +)] +#[daft(leaf)] +pub struct Epoch(pub u64); + +impl Epoch { + pub fn next(&self) -> Epoch { + Epoch(self.0.checked_add(1).expect("fewer than 2^64 epochs")) + } +} + +/// The number of shares required to reconstruct the rack secret +/// +/// Typically referred to as `k` in the docs +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + Serialize, + Deserialize, + Display, + Diffable, +)] +#[daft(leaf)] +pub struct Threshold(pub u8); + +/// A container to make messages between trust quorum nodes routable +#[derive(Debug, Clone, Serialize, Deserialize, Diffable)] +#[cfg_attr(feature = "danger_partial_eq_ct_wrapper", derive(PartialEq, Eq))] +#[daft(leaf)] +pub struct Envelope { + pub to: BaseboardId, + pub from: BaseboardId, + pub msg: PeerMsg, +} + +#[cfg(feature = "testing")] +impl Envelope { + pub fn equal_except_for_crypto_data(&self, other: &Self) -> bool { + self.to == other.to + && self.from == other.from + && self.msg.equal_except_for_crypto_data(&other.msg) + } +} + +/// Check if a received share is valid for a given configuration +/// +/// Return true if valid, false otherwise. +pub fn validate_share( + log: &Logger, + config: &Configuration, + from: &BaseboardId, + epoch: Epoch, + share: &Share, +) -> bool { + // Are we trying to retrieve shares for `epoch`? + if epoch != config.epoch { + warn!( + log, + "Received Share from node with wrong epoch"; + "received_epoch" => %epoch, + "from" => %from + ); + return false; + } + + // Is the sender a member of the configuration `epoch`? + // Was the sender a member of the configuration at `old_epoch`? + let Some(expected_digest) = config.members.get(&from) else { + warn!( + log, + "Received Share from unexpected node"; + "epoch" => %epoch, + "from" => %from + ); + return false; + }; + + // Does the share hash match what we expect? + let mut digest = Sha3_256Digest::default(); + share.digest::(&mut digest.0); + if digest != *expected_digest { + error!( + log, + "Received share with invalid digest"; + "epoch" => %epoch, + "from" => %from + ); + return false; + } + + true +} diff --git a/trust-quorum/src/messages.rs b/trust-quorum/protocol/src/messages.rs similarity index 100% rename from trust-quorum/src/messages.rs rename to trust-quorum/protocol/src/messages.rs diff --git a/trust-quorum/src/node.rs b/trust-quorum/protocol/src/node.rs similarity index 100% rename from trust-quorum/src/node.rs rename to trust-quorum/protocol/src/node.rs diff --git a/trust-quorum/src/node_ctx.rs b/trust-quorum/protocol/src/node_ctx.rs similarity index 100% rename from trust-quorum/src/node_ctx.rs rename to trust-quorum/protocol/src/node_ctx.rs diff --git a/trust-quorum/src/persistent_state.rs b/trust-quorum/protocol/src/persistent_state.rs similarity index 100% rename from trust-quorum/src/persistent_state.rs rename to trust-quorum/protocol/src/persistent_state.rs diff --git a/trust-quorum/src/rack_secret_loader.rs b/trust-quorum/protocol/src/rack_secret_loader.rs similarity index 100% rename from trust-quorum/src/rack_secret_loader.rs rename to trust-quorum/protocol/src/rack_secret_loader.rs diff --git a/trust-quorum/src/validators.rs b/trust-quorum/protocol/src/validators.rs similarity index 100% rename from trust-quorum/src/validators.rs rename to trust-quorum/protocol/src/validators.rs diff --git a/trust-quorum/tests/cluster.proptest-regressions b/trust-quorum/protocol/tests/cluster.proptest-regressions similarity index 100% rename from trust-quorum/tests/cluster.proptest-regressions rename to trust-quorum/protocol/tests/cluster.proptest-regressions diff --git a/trust-quorum/tests/cluster.rs b/trust-quorum/protocol/tests/cluster.rs similarity index 99% rename from trust-quorum/tests/cluster.rs rename to trust-quorum/protocol/tests/cluster.rs index 39c1367661..e1b14994dd 100644 --- a/trust-quorum/tests/cluster.rs +++ b/trust-quorum/protocol/tests/cluster.rs @@ -15,7 +15,7 @@ use secrecy::ExposeSecret; use slog::{Logger, info, o}; use std::collections::BTreeSet; use test_strategy::{Arbitrary, proptest}; -use trust_quorum::{ +use trust_quorum_protocol::{ BaseboardId, CoordinatorOperation, Epoch, NodeCallerCtx, NodeCommonCtx, Threshold, }; diff --git a/trust-quorum/src/connection_manager.rs b/trust-quorum/src/connection_manager.rs new file mode 100644 index 0000000000..1b764ffbd6 --- /dev/null +++ b/trust-quorum/src/connection_manager.rs @@ -0,0 +1,755 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A mechanism for maintaining a full mesh of trust quorum node connections + +use crate::established_conn::EstablishedConn; +use trust_quorum_protocol::{BaseboardId, PeerMsg}; +// TODO: Move or copy this to this crate? +use bootstore::schemes::v0::NetworkConfig; +use camino::Utf8PathBuf; +use iddqd::{ + BiHashItem, BiHashMap, TriHashItem, TriHashMap, bi_upcast, tri_upcast, +}; +use serde::{Deserialize, Serialize}; +use slog::{Logger, debug, error, info, o, warn}; +use slog_error_chain::SlogInlineError; +use sprockets_tls::keys::SprocketsConfig; +use sprockets_tls::server::SprocketsAcceptor; +use std::collections::BTreeSet; +use std::net::{SocketAddr, SocketAddrV4, SocketAddrV6}; +use std::time::Duration; +use tokio::sync::mpsc; +use tokio::task::{self, AbortHandle, JoinSet}; +use tokio::time::{Interval, MissedTickBehavior, interval}; + +/// We only expect a handful of concurrent requests at most. +const CHANNEL_BOUND: usize = 10; + +// Time between checks to see if we need to reconnect to to any peers +pub const RECONNECT_TIME: Duration = Duration::from_secs(5); + +/// An error returned from `ConnMgr::accept` +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum AcceptError { + #[error("accepted connection from IPv4 address {addr}. Only IPv6 allowed.")] + Ipv4Accept { addr: SocketAddrV4 }, + + #[error("sprockets error")] + Sprockets( + #[from] + #[source] + sprockets_tls::Error, + ), +} + +/// Messages sent from the main task to the connection managing tasks +#[derive(Debug)] +pub enum MainToConnMsg { + #[expect(unused)] + Msg(WireMsg), +} + +/// All possible messages sent over established connections +/// +/// This include trust quorum related `PeerMsg`s, but also ancillary network +/// messages used for other purposes. +/// +/// All `WireMsg`s sent between nodes is prefixed with a 4 byte size header used +/// for framing. +#[derive(Debug, Serialize, Deserialize)] +pub enum WireMsg { + /// Used for connection keep alive + Ping, + /// Trust quorum peer messages + Tq(PeerMsg), + /// Early network configuration to enable NTP timesync + /// + /// Technically this is not part of the trust quorum protocol. However it is + /// necessary to gossip this information to all nodes on the system so that + /// each can establish NTP sync required for the rest of the control plane + /// to boot. In short, we can't have rack unlock without this information, + /// even if we can decrypt the drives. For simplicity, we just piggyback + /// this information on the trust quorum connections. This is why the + /// implementation of LRTQ lived inside the `bootstore` directory in the + /// `omicron` repo. This is technically an eventually consistent database + /// of tiny information layered on top of trust quorum. You can still think + /// of it as a bootstore, although, we no longer use that name. + NetworkConfig(NetworkConfig), +} + +/// Messages sent from connection managing tasks to the main peer task +/// +/// We include `task_id` to differentiate which task they come from so we can +/// exclude requests from tasks that have been cancelled or have been told to +/// shutdown. +#[derive(Debug)] +pub struct ConnToMainMsg { + pub task_id: task::Id, + pub msg: ConnToMainMsgInner, +} + +#[derive(Debug)] +pub enum ConnToMainMsgInner { + Accepted { + addr: SocketAddrV6, + peer_id: BaseboardId, + }, + Connected { + addr: SocketAddrV6, + peer_id: BaseboardId, + }, + #[expect(unused)] + Received { + from: BaseboardId, + msg: PeerMsg, + }, + #[expect(unused)] + ReceivedNetworkConfig { + from: BaseboardId, + config: NetworkConfig, + }, + Disconnected { + peer_id: BaseboardId, + }, +} + +pub struct TaskHandle { + pub abort_handle: AbortHandle, + #[expect(unused)] + pub tx: mpsc::Sender, + pub conn_type: ConnectionType, +} + +impl TaskHandle { + pub fn task_id(&self) -> task::Id { + self.abort_handle.id() + } + + pub fn addr(&self) -> SocketAddrV6 { + self.conn_type.addr() + } + + pub fn abort(&self) { + self.abort_handle.abort() + } +} + +impl BiHashItem for TaskHandle { + type K1<'a> = task::Id; + type K2<'a> = SocketAddrV6; + + fn key1(&self) -> Self::K1<'_> { + self.task_id() + } + + fn key2(&self) -> Self::K2<'_> { + self.conn_type.addr() + } + + bi_upcast!(); +} + +pub struct EstablishedTaskHandle { + baseboard_id: BaseboardId, + task_handle: TaskHandle, +} + +impl EstablishedTaskHandle { + pub fn new( + baseboard_id: BaseboardId, + task_handle: TaskHandle, + ) -> EstablishedTaskHandle { + EstablishedTaskHandle { baseboard_id, task_handle } + } + + pub fn task_id(&self) -> task::Id { + self.task_handle.task_id() + } + + pub fn addr(&self) -> SocketAddrV6 { + self.task_handle.addr() + } + + pub fn abort(&self) { + self.task_handle.abort(); + } +} + +impl TriHashItem for EstablishedTaskHandle { + type K1<'a> = &'a BaseboardId; + type K2<'a> = task::Id; + type K3<'a> = SocketAddrV6; + + fn key1(&self) -> Self::K1<'_> { + &self.baseboard_id + } + + fn key2(&self) -> Self::K2<'_> { + self.task_handle.task_id() + } + + fn key3(&self) -> Self::K3<'_> { + self.task_handle.addr() + } + + tri_upcast!(); +} + +pub enum ConnectionType { + Connected(SocketAddrV6), + Accepted(SocketAddrV6), +} + +impl ConnectionType { + pub fn addr(&self) -> SocketAddrV6 { + match self { + Self::Connected(addr) => *addr, + Self::Accepted(addr) => *addr, + } + } +} + +#[derive(Debug, Clone)] +pub enum ConnState { + Connecting, + Accepting, + Established(BaseboardId), +} + +/// Information about a single connection task +#[derive(Debug, Clone)] +pub struct ConnInfo { + pub state: ConnState, + pub addr: SocketAddrV6, + pub task_id: task::Id, +} + +/// Status information useful for debugging +#[derive(Debug, Clone)] +pub struct ConnMgrStatus { + pub bootstrap_addrs: BTreeSet, + pub connections: Vec, + pub num_conn_tasks: u64, + pub total_tasks_spawned: u64, +} + +/// A structure to manage all sprockets connections to peer nodes +/// +/// Each sprockets connection runs in its own task which communicates with the +/// main `NodeTask`. All methods on the `ConnMgr` run inside the main `NodeTask` +/// as `ConnMgr` is a member field of `NodeTask`. This allows isolating the +/// connection management logic from the main node message handling logic +/// without adding yet another task. +pub struct ConnMgr { + log: Logger, + + /// A channel for sending messages from a connection task to the main task + main_tx: mpsc::Sender, + + /// The sprockets config + config: SprocketsConfig, + + /// The sprockets server + server: sprockets_tls::Server, + + /// The address the sprockets server listens on + listen_addr: SocketAddrV6, + + /// A mechanism for spawning connection tasks + join_set: JoinSet<()>, + + /// All known addresses on the bootstrap network, learned via DDMD + bootstrap_addrs: BTreeSet, + + /// All tasks currently connecting to remote nodes and attempting a + /// sprockets handshake. + connecting: BiHashMap, + + /// All tasks with an accepted TCP connnection performing a sprockets handshake + accepting: BiHashMap, + + /// All tasks containing established connections that can be used to communicate + /// with other nodes. + established: TriHashMap, + + /// An interval for reconnect operations + reconnect_interval: Interval, + + /// The number of total connection tasks spawned + total_tasks_spawned: u64, +} + +impl ConnMgr { + pub async fn new( + log: &Logger, + mut listen_addr: SocketAddrV6, + sprockets_config: SprocketsConfig, + main_tx: mpsc::Sender, + ) -> ConnMgr { + let log = log.new(o!("component" => "trust-quorum-conn-mgr")); + + let config = sprockets_config.clone(); + let server = sprockets_tls::Server::new( + sprockets_config, + listen_addr, + log.clone(), + ) + .await + .expect("sprockets server can listen"); + + // If the listen port was 0, we want to update our addr to use + // the actual port This is really only useful for testing, but the + // connection manager won't work properly without doing this because it + // will never trigger connections since its own address will always sort + // lower than other addresses if only the ports differ. + let listen_port = server.listen_addr().unwrap().port(); + + if listen_port != listen_addr.port() { + listen_addr.set_port(listen_port); + } + + info!( + log, + "Started listening"; + "local_addr" => %listen_addr + ); + + let mut reconnect_interval = interval(RECONNECT_TIME); + reconnect_interval.set_missed_tick_behavior(MissedTickBehavior::Delay); + + ConnMgr { + log, + main_tx, + config, + server, + listen_addr, + join_set: JoinSet::new(), + bootstrap_addrs: BTreeSet::new(), + connecting: BiHashMap::new(), + accepting: BiHashMap::new(), + established: TriHashMap::new(), + reconnect_interval, + total_tasks_spawned: 0, + } + } + + pub fn status(&self) -> ConnMgrStatus { + let connections = self + .connecting + .iter() + .map(|task_handle| ConnInfo { + state: ConnState::Connecting, + addr: task_handle.addr(), + task_id: task_handle.task_id(), + }) + .chain(self.accepting.iter().map(|task_handle| ConnInfo { + state: ConnState::Accepting, + addr: task_handle.addr(), + task_id: task_handle.task_id(), + })) + .chain(self.established.iter().map(|established_task_handle| { + ConnInfo { + state: ConnState::Established( + established_task_handle.baseboard_id.clone(), + ), + addr: established_task_handle.addr(), + task_id: established_task_handle.task_id(), + } + })) + .collect(); + + ConnMgrStatus { + bootstrap_addrs: self.bootstrap_addrs.clone(), + connections, + num_conn_tasks: self.join_set.len() as u64, + total_tasks_spawned: self.total_tasks_spawned, + } + } + + pub fn listen_addr(&self) -> SocketAddrV6 { + self.listen_addr + } + + /// Perform any polling related operations that the connection + /// manager must perform concurrently. + pub async fn step( + &mut self, + corpus: Vec, + ) -> Result<(), AcceptError> { + tokio::select! { + acceptor = self.server.accept(corpus.clone()) => { + self.accept(acceptor?).await?; + } + Some(res) = self.join_set.join_next_with_id() => { + match res { + Ok((task_id, _)) => { + self.on_task_exit(task_id).await; + } + Err(err) => { + error!(self.log, "Connection task panic: {err}"); + self.on_task_exit(err.id()).await; + } + + } + } + _ = self.reconnect_interval.tick() => { + self.reconnect(corpus.clone()).await; + } + } + + Ok(()) + } + + pub async fn accept( + &mut self, + acceptor: SprocketsAcceptor, + ) -> Result<(), AcceptError> { + let addr = match acceptor.addr() { + SocketAddr::V4(addr) => { + return Err(AcceptError::Ipv4Accept { addr }); + } + SocketAddr::V6(addr) => addr, + }; + let log = self.log.clone(); + let (tx, rx) = mpsc::channel(CHANNEL_BOUND); + let main_tx = self.main_tx.clone(); + let abort_handle = self.join_set.spawn(async move { + let stream = match acceptor.handshake().await { + Ok((stream, _)) => stream, + + Err(err) => { + error!(log, "Failed to accept a connection"; &err); + return; + } + }; + let platform_id = stream.peer_platform_id().as_str().unwrap(); + let baseboard_id = platform_id_to_baseboard_id(platform_id); + + // TODO: Conversion between `PlatformId` and `BaseboardId` should + // happen in `sled-agent-types`. This is waiting on an update + // to the `dice-mfg-msgs` crate. + let log = log.new(o!( + "peer_id" => baseboard_id.to_string(), + "peer_addr" => addr.to_string() + )); + info!(log, "Accepted sprockets connection"); + + let mut conn = EstablishedConn::new( + baseboard_id.clone(), + task::id(), + stream, + main_tx.clone(), + rx, + &log, + ); + + // Inform the main task that accepted connection is established + if let Err(_) = main_tx.try_send(ConnToMainMsg { + task_id: task::id(), + msg: ConnToMainMsgInner::Accepted { + addr, + peer_id: baseboard_id, + }, + }) { + // The system is shutting down or we've overloaded the main channel + // Just bail from this task + warn!(log, "Failed to send 'accepted' msg to main task"); + } else { + conn.run().await; + } + }); + self.total_tasks_spawned += 1; + let task_handle = TaskHandle { + abort_handle, + tx, + conn_type: ConnectionType::Accepted(addr), + }; + assert!(self.accepting.insert_unique(task_handle).is_ok()); + Ok(()) + } + + pub async fn server_handshake_completed( + &mut self, + task_id: task::Id, + addr: SocketAddrV6, + peer_id: BaseboardId, + ) { + if let Some(task_handle) = self.accepting.remove2(&addr) { + info!( + self.log, + "Established server connection"; + "task_id" => ?task_id, + "peer_addr" => %addr, + "peer_id" => %peer_id + ); + + let already_established = self.established.insert_unique( + EstablishedTaskHandle::new(peer_id, task_handle), + ); + assert!(already_established.is_ok()); + } else { + error!(self.log, "Server handshake completed, but no server addr in map"; + "task_id" => ?task_id, + "peer_addr" => %addr, + "peer_id" => %peer_id + ); + } + } + + pub async fn client_handshake_completed( + &mut self, + task_id: task::Id, + addr: SocketAddrV6, + peer_id: BaseboardId, + ) { + if let Some(task_handle) = self.connecting.remove2(&addr) { + info!( + self.log, + "Established client connection"; + "task_id" => ?task_id, + "peer_addr" => %addr, + "peer_id" => %peer_id + ); + let already_established = self.established.insert_unique( + EstablishedTaskHandle::new(peer_id, task_handle), + ); + assert!(already_established.is_ok()); + } else { + error!(self.log, "Client handshake completed, but no client addr in map"; + "task_id" => ?task_id, + "peer_addr" => %addr, + "peer_id" => %peer_id + ); + } + } + + /// The established connection task has asynchronously exited. + pub async fn on_disconnected( + &mut self, + task_id: task::Id, + peer_id: BaseboardId, + ) { + if let Some(established_task_handle) = self.established.get1(&peer_id) { + if established_task_handle.task_id() != task_id { + // This was a stale disconnect + return; + } + } + warn!(self.log, "peer disconnected"; "peer_id" => %peer_id); + let _ = self.established.remove1(&peer_id); + } + + /// Initiate connections if a corresponding task doesn't already exist. This + /// must be called periodically to handle transient disconnections which + /// cause tasks to exit. + pub async fn reconnect(&mut self, corpus: Vec) { + debug!(self.log, "Reconnect called"); + let mut to_connect = vec![]; + for addr in + self.bootstrap_addrs.iter().filter(|&&addr| self.listen_addr > addr) + { + if self.connecting.contains_key2(addr) { + continue; + } + + if self.established.contains_key3(addr) { + continue; + } + + to_connect.push(*addr); + } + + for addr in to_connect { + // We don't have an existing connection + self.connect_client(corpus.clone(), addr).await + } + } + + /// The set of known addresses on the bootstrap network has changed + /// + /// We only want a single connection between known peers at a time. The + /// easiest way to achieve this is to only connect to peers with addresses + /// that sort less than our own and tear down any connections that no longer + /// exist in `addrs`. + pub async fn update_bootstrap_connections( + &mut self, + addrs: BTreeSet, + corpus: Vec, + ) { + if self.bootstrap_addrs == addrs { + return; + } + + // We don't try to compare addresses from accepted nodes. If DDMD + // loses an accepting address we assume that the connection will go + // away soon, if it hasn't already. We can't compare without an extra + // handshake message to identify the listen address of the remote + // connection because clients use ephemeral ports. We always compare + // on the full `SocketAddrV6` which includes the port, which helps when + // testing on localhost. + let to_connect: BTreeSet<_> = addrs + .difference(&self.bootstrap_addrs) + .filter(|&&addr| self.listen_addr > addr) + .cloned() + .collect(); + let to_disconnect: BTreeSet<_> = self + .bootstrap_addrs + .difference(&addrs) + .filter(|&&addr| self.listen_addr > addr) + .cloned() + .collect(); + + self.bootstrap_addrs = addrs; + + for addr in to_connect { + self.connect_client(corpus.clone(), addr).await; + } + + for addr in to_disconnect { + self.disconnect_client(addr).await; + } + } + + /// Spawn a task to estalbish a sprockets connection for the given address + async fn connect_client( + &mut self, + corpus: Vec, + addr: SocketAddrV6, + ) { + let (tx, rx) = mpsc::channel(CHANNEL_BOUND); + info!(self.log, "Initiating connection to new peer: {addr}"); + let main_tx = self.main_tx.clone(); + let log = self.log.clone(); + let config = self.config.clone(); + let abort_handle = self.join_set.spawn(async move { + let stream = match sprockets_tls::Client::connect( + config, + addr, + corpus.clone(), + log.clone(), + ) + .await + { + Ok(stream) => stream, + Err(err) => { + warn!(log, "Failed to connect"; "peer_addr"=> %addr, &err); + return (); + } + }; + let platform_id = stream.peer_platform_id().as_str().unwrap(); + let baseboard_id = platform_id_to_baseboard_id(platform_id); + + // TODO: Conversion between `PlatformId` and `BaseboardId` should + // happen in `sled-agent-types`. This is waiting on an update + // to the `dice-mfg-msgs` crate. + let log = log.new(o!( + "peer_id" => baseboard_id.to_string(), + "peer_addr" => addr.to_string() + )); + info!(log, "Sprockets connection established"); + + let mut conn = EstablishedConn::new( + baseboard_id.clone(), + task::id(), + stream, + main_tx.clone(), + rx, + &log, + ); + // Inform the main task that the client connection is + // established. + if let Err(_) = main_tx.try_send(ConnToMainMsg { + task_id: task::id(), + msg: ConnToMainMsgInner::Connected { + addr, + peer_id: baseboard_id, + }, + }) { + // The system is shutting down or we've overloaded the main channel + // Just bail from this task + error!(log, "Failed to send 'connected' msg to main task"); + } else { + conn.run().await; + } + }); + self.total_tasks_spawned += 1; + let task_handle = TaskHandle { + abort_handle, + tx, + conn_type: ConnectionType::Connected(addr), + }; + assert!(self.connecting.insert_unique(task_handle).is_ok()); + } + + /// Remove any information about a sprockets client connection and inform + /// the corresponding task to stop. + /// + /// We don't tear down server connections this way as we don't know their + /// listen port, just the ephemeral port. + async fn disconnect_client(&mut self, addr: SocketAddrV6) { + if let Some(handle) = self.connecting.remove2(&addr) { + // The connection has not yet completed its handshake + info!( + self.log, + "Deleting initiating connection"; + "remote_addr" => %addr + ); + handle.abort(); + } else { + if let Some(handle) = self.established.remove3(&addr) { + info!( + self.log, + "Deleting established connection"; + "peer_addr" => %addr, + "peer_id" => %handle.baseboard_id + ); + handle.abort(); + } + } + } + + /// Remove any references to the given task + async fn on_task_exit(&mut self, task_id: task::Id) { + // We're most likely to find the task as established so we start with that + if let Some(handle) = self.established.remove2(&task_id) { + info!( + self.log, + "Established connection task exited"; + "task_id" => ?task_id, + "peer_addr" => %handle.addr(), + "peer_id" => %handle.baseboard_id + ); + } else if let Some(handle) = self.accepting.remove1(&task_id) { + info!( + self.log, + "Accepting task exited"; + "task_id" => ?task_id, + "peer_addr" => %handle.addr() + ); + } else if let Some(handle) = self.connecting.remove1(&task_id) { + info!( + self.log, + "Connecting task exited"; + "task_id" => ?task_id, + "peer_addr" => %handle.addr() + ); + } else { + info!( + self.log, + "Task exited. No cleanup required."; + "task_id" => ?task_id + ); + } + } +} + +// TODO: Eventually this will go away, once we pull in and use the latest +// `dice-util` code. +pub fn platform_id_to_baseboard_id(platform_id: &str) -> BaseboardId { + let mut platform_id_iter = platform_id.split(":"); + let part_number = platform_id_iter.nth(1).unwrap().to_string(); + let serial_number = platform_id_iter.nth(1).unwrap().to_string(); + BaseboardId { part_number, serial_number } +} diff --git a/trust-quorum/src/established_conn.rs b/trust-quorum/src/established_conn.rs new file mode 100644 index 0000000000..b75b057694 --- /dev/null +++ b/trust-quorum/src/established_conn.rs @@ -0,0 +1,311 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! An individual sprockets connection running in its own task + +use crate::{ConnToMainMsg, ConnToMainMsgInner, MainToConnMsg, WireMsg}; +use bytes::Buf; +use serde::Serialize; +use slog::{Logger, debug, error, o, warn}; +use slog_error_chain::SlogInlineError; +use std::collections::VecDeque; +use std::io::Cursor; +use std::time::Duration; +use tokio::io::{AsyncReadExt, AsyncWriteExt, ReadHalf, WriteHalf, split}; +use tokio::net::TcpStream; +use tokio::sync::mpsc; +use tokio::task; +use tokio::time::{Instant, MissedTickBehavior, interval}; +use trust_quorum_protocol::BaseboardId; + +/// Max buffer size of a connection +const CONN_BUF_SIZE: usize = 1024 * 1024; + +/// Each message starts with a 4 bytes size header +const FRAME_HEADER_SIZE: usize = 4; + +/// The number of serialized messages to queue for writing before closing the socket. +/// This means the remote side is very slow. +/// +/// TODO: Alternatively we could drop the oldest message. +const MSG_WRITE_QUEUE_CAPACITY: usize = 5; + +// Timing parameters for keeping the connection healthy +const PING_INTERVAL: Duration = Duration::from_secs(1); + +/// The time limit for not receiving a complete message from a peer. +/// The connection is shutdown after this time. +const INACTIVITY_TIMEOUT: Duration = Duration::from_secs(10); + +/// An error from within an `EstablishedConn` that triggers connection close +/// +/// Also a great movie +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum ConnErr { + #[error("Failed to write")] + FailedWrite(#[source] std::io::Error), + #[error("Failed to read")] + FailedRead(#[source] std::io::Error), + #[error("Failed to deserialize wire message")] + DeserializeWireMsg(#[from] ciborium::de::Error), + #[error("Failed to serialize wire message")] + SerializeWireMsg(#[from] ciborium::ser::Error), + #[error("Write queue filled with serialized messages")] + WriteQueueFull, + #[error("Inactivity timeout")] + InactivityTimeout, +} + +/// Container for code running in its own task per sprockets connection +pub struct EstablishedConn { + peer_id: BaseboardId, + task_id: task::Id, + reader: ReadHalf>, + writer: WriteHalf>, + main_tx: mpsc::Sender, + rx: mpsc::Receiver, + log: Logger, + + // Buffer we read raw data into from a sprockets connection + read_buf: Box<[u8]>, + + // The amount of data currently in `read_buf` + total_read: usize, + + // Used for managing inactivity timeouts for the connection + last_received_msg: Instant, + + // Keep a queue to write serialized messages into. We limit the queue + // size, and if it gets exceeded it means the peer at the other + // end isn't pulling data out fast enough. This should be basically + // impossible to hit given the size and rate of message exchange + // between peers. We go ahead and close the connection if the queue + // fills. + write_queue: VecDeque>, + + // The current serialized message being written if there is one + current_write: Cursor>, +} + +impl EstablishedConn { + pub fn new( + peer_id: BaseboardId, + task_id: task::Id, + stream: sprockets_tls::Stream, + main_tx: mpsc::Sender, + rx: mpsc::Receiver, + log: &Logger, + ) -> EstablishedConn { + let log = log.new(o!("component" => "trust-quorum-established-conn")); + let (reader, writer) = split(stream); + EstablishedConn { + peer_id, + task_id, + reader, + writer, + main_tx, + rx, + log, + read_buf: vec![0u8; CONN_BUF_SIZE].into_boxed_slice(), + total_read: 0, + last_received_msg: Instant::now(), + write_queue: VecDeque::with_capacity(MSG_WRITE_QUEUE_CAPACITY), + current_write: Cursor::new(Vec::new()), + } + } + + pub async fn run(&mut self) { + let mut interval = interval(PING_INTERVAL); + interval.set_missed_tick_behavior(MissedTickBehavior::Delay); + + // This is the main loop of the connection + // + // Continuously process messages until the connection closes + loop { + if !self.current_write.has_remaining() { + if let Some(buf) = self.write_queue.pop_front() { + self.current_write = Cursor::new(buf); + } + } + + let res = tokio::select! { + _ = interval.tick() => { + self.ping().await + } + Some(msg) = self.rx.recv() => { + self.on_msg_from_main(msg).await + } + res = self.reader.read(&mut self.read_buf[self.total_read..]) => { + self.on_read(res).await + } + res = self.writer.write_buf(&mut self.current_write), + if self.current_write.has_remaining() => + { + self.check_write_result(res).await + } + }; + + if let Err(err) = res { + warn!(self.log, "Closing connection"; &err); + self.close().await; + return; + } + } + } + + async fn close(&mut self) { + if let Err(_) = self.main_tx.try_send(ConnToMainMsg { + task_id: self.task_id, + msg: ConnToMainMsgInner::Disconnected { + peer_id: self.peer_id.clone(), + }, + }) { + warn!(self.log, "Failed to send to main task"); + } + let _ = self.writer.shutdown().await; + } + + async fn on_read( + &mut self, + res: Result, + ) -> Result<(), ConnErr> { + let n = res.map_err(ConnErr::FailedRead)?; + self.total_read += n; + + // We may have more than one message that has been read + loop { + if self.total_read < FRAME_HEADER_SIZE { + return Ok(()); + } + // Read frame size + let size = read_frame_size( + self.read_buf[..FRAME_HEADER_SIZE].try_into().unwrap(), + ); + let end = size + FRAME_HEADER_SIZE; + + // If we haven't read the whole message yet, then return + if end > self.total_read { + return Ok(()); + } + let msg: WireMsg = + ciborium::from_reader(&self.read_buf[FRAME_HEADER_SIZE..end])?; + // Move any remaining bytes to the beginning of the buffer. + self.read_buf.copy_within(end..self.total_read, 0); + self.total_read = self.total_read - end; + + self.last_received_msg = Instant::now(); + debug!(self.log, "Received {msg:?}"); + match msg { + WireMsg::Tq(msg) => { + if let Err(_) = self.main_tx.try_send(ConnToMainMsg { + task_id: self.task_id, + msg: ConnToMainMsgInner::Received { + from: self.peer_id.clone(), + msg, + }, + }) { + error!( + self.log, + "Failed to send received fsm msg to main task" + ); + panic!("Connection to main task channel full"); + } + } + WireMsg::Ping => { + // Nothing to do here, since Ping is just to keep us alive and + // we updated self.last_received_msg above. + } + WireMsg::NetworkConfig(config) => { + let generation = config.generation; + if let Err(_) = self.main_tx.try_send(ConnToMainMsg { + task_id: self.task_id, + msg: ConnToMainMsgInner::ReceivedNetworkConfig { + from: self.peer_id.clone(), + config, + }, + }) { + error!( + self.log, + "Failed to send received NetworkConfig with + generation {generation} to main task" + ); + panic!("Connection to main task channnel full"); + } + } + } + } + } + + async fn check_write_result( + &mut self, + res: Result, + ) -> Result<(), ConnErr> { + match res { + Ok(_) => { + if !self.current_write.has_remaining() { + self.current_write = Cursor::new(Vec::new()); + } + Ok(()) + } + Err(e) => { + let _ = self.writer.shutdown().await; + Err(ConnErr::FailedWrite(e)) + } + } + } + + async fn on_msg_from_main( + &mut self, + msg: MainToConnMsg, + ) -> Result<(), ConnErr> { + match msg { + MainToConnMsg::Msg(msg) => self.write_framed_to_queue(msg).await, + } + } + + async fn write_framed_to_queue( + &mut self, + msg: WireMsg, + ) -> Result<(), ConnErr> { + if self.write_queue.len() == MSG_WRITE_QUEUE_CAPACITY { + return Err(ConnErr::WriteQueueFull); + } else { + let msg = write_framed(&msg)?; + self.write_queue.push_back(msg); + Ok(()) + } + } + + async fn ping(&mut self) -> Result<(), ConnErr> { + if Instant::now() - self.last_received_msg > INACTIVITY_TIMEOUT { + return Err(ConnErr::InactivityTimeout); + } + self.write_framed_to_queue(WireMsg::Ping).await + } +} + +// Decode the 4-byte big-endian frame size header +fn read_frame_size(buf: [u8; FRAME_HEADER_SIZE]) -> usize { + u32::from_be_bytes(buf) as usize +} + +/// Serialize and write `msg` into `buf`, prefixed by a 4-byte big-endian size +/// header +/// +/// Return the total amount of data written into `buf` including the 4-byte +/// header. +fn write_framed( + msg: &T, +) -> Result, ciborium::ser::Error> { + let mut cursor = Cursor::new(vec![]); + // Write a size placeholder + std::io::Write::write(&mut cursor, &[0u8; FRAME_HEADER_SIZE])?; + cursor.set_position(FRAME_HEADER_SIZE as u64); + ciborium::into_writer(msg, &mut cursor)?; + let size: u32 = + (cursor.position() - FRAME_HEADER_SIZE as u64).try_into().unwrap(); + let mut buf = cursor.into_inner(); + buf[0..FRAME_HEADER_SIZE].copy_from_slice(&size.to_be_bytes()); + Ok(buf) +} diff --git a/trust-quorum/src/lib.rs b/trust-quorum/src/lib.rs index a389022af0..f508647c88 100644 --- a/trust-quorum/src/lib.rs +++ b/trust-quorum/src/lib.rs @@ -2,160 +2,13 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Implementation of the oxide rack trust quorum protocol -//! -//! This protocol is written as a -//! [no-IO](https://sans-io.readthedocs.io/how-to-sans-io.html) implementation. -//! All persistent state and all networking is managed outside of this -//! implementation. +//! Async trust-quorum library code for intergrating with sled-agent -use crypto::Sha3_256Digest; -use daft::Diffable; -use derive_more::Display; -use gfss::shamir::Share; -use serde::{Deserialize, Serialize}; -pub use sled_agent_types::sled::BaseboardId; -use slog::{Logger, error, warn}; +mod connection_manager; +pub(crate) mod established_conn; +mod task; -mod compute_key_share; -mod configuration; -mod coordinator_state; -pub(crate) mod crypto; -mod messages; -mod node; -mod node_ctx; -mod persistent_state; -#[allow(unused)] -mod rack_secret_loader; -mod validators; -pub use configuration::Configuration; -pub use coordinator_state::{ - CoordinatingMsg, CoordinatorOperation, CoordinatorState, - CoordinatorStateDiff, +pub(crate) use connection_manager::{ + ConnToMainMsg, ConnToMainMsgInner, MainToConnMsg, WireMsg, }; -pub use rack_secret_loader::{LoadRackSecretError, RackSecretLoaderDiff}; -pub use validators::{ - ValidatedLrtqUpgradeMsgDiff, ValidatedReconfigureMsgDiff, -}; -mod alarm; - -pub use alarm::Alarm; -pub use crypto::RackSecret; -pub use messages::*; -pub use node::{Node, NodeDiff}; -// public only for docs. -pub use node_ctx::NodeHandlerCtx; -pub use node_ctx::{NodeCallerCtx, NodeCommonCtx, NodeCtx, NodeCtxDiff}; -pub use persistent_state::{ - ExpungedMetadata, PersistentState, PersistentStateSummary, -}; - -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - PartialOrd, - Ord, - Hash, - Serialize, - Deserialize, - Display, - Diffable, -)] -#[daft(leaf)] -pub struct Epoch(pub u64); - -impl Epoch { - pub fn next(&self) -> Epoch { - Epoch(self.0.checked_add(1).expect("fewer than 2^64 epochs")) - } -} - -/// The number of shares required to reconstruct the rack secret -/// -/// Typically referred to as `k` in the docs -#[derive( - Debug, - Clone, - Copy, - PartialEq, - Eq, - PartialOrd, - Ord, - Serialize, - Deserialize, - Display, - Diffable, -)] -#[daft(leaf)] -pub struct Threshold(pub u8); - -/// A container to make messages between trust quorum nodes routable -#[derive(Debug, Clone, Serialize, Deserialize, Diffable)] -#[cfg_attr(feature = "danger_partial_eq_ct_wrapper", derive(PartialEq, Eq))] -#[daft(leaf)] -pub struct Envelope { - pub to: BaseboardId, - pub from: BaseboardId, - pub msg: PeerMsg, -} - -#[cfg(feature = "testing")] -impl Envelope { - pub fn equal_except_for_crypto_data(&self, other: &Self) -> bool { - self.to == other.to - && self.from == other.from - && self.msg.equal_except_for_crypto_data(&other.msg) - } -} - -/// Check if a received share is valid for a given configuration -/// -/// Return true if valid, false otherwise. -pub fn validate_share( - log: &Logger, - config: &Configuration, - from: &BaseboardId, - epoch: Epoch, - share: &Share, -) -> bool { - // Are we trying to retrieve shares for `epoch`? - if epoch != config.epoch { - warn!( - log, - "Received Share from node with wrong epoch"; - "received_epoch" => %epoch, - "from" => %from - ); - return false; - } - - // Is the sender a member of the configuration `epoch`? - // Was the sender a member of the configuration at `old_epoch`? - let Some(expected_digest) = config.members.get(&from) else { - warn!( - log, - "Received Share from unexpected node"; - "epoch" => %epoch, - "from" => %from - ); - return false; - }; - - // Does the share hash match what we expect? - let mut digest = Sha3_256Digest::default(); - share.digest::(&mut digest.0); - if digest != *expected_digest { - error!( - log, - "Received share with invalid digest"; - "epoch" => %epoch, - "from" => %from - ); - return false; - } - - true -} +pub use task::NodeTask; diff --git a/trust-quorum/src/task.rs b/trust-quorum/src/task.rs new file mode 100644 index 0000000000..6e821f197d --- /dev/null +++ b/trust-quorum/src/task.rs @@ -0,0 +1,489 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A runnable async trust quorum node that wraps the sans-io +//! [`trust_quorum_protocol::Node`] + +use crate::connection_manager::{ + ConnMgr, ConnMgrStatus, ConnToMainMsg, ConnToMainMsgInner, +}; +use slog::{Logger, debug, error, info, o}; +use sprockets_tls::keys::SprocketsConfig; +use std::collections::BTreeSet; +use std::net::SocketAddrV6; +use thiserror::Error; +use tokio::sync::mpsc::error::SendError; +use tokio::sync::oneshot::error::RecvError; +use tokio::sync::{mpsc, oneshot}; +use trust_quorum_protocol::{BaseboardId, Node, NodeCtx}; + +/// We only expect a handful of messages at a time. +const API_CHANNEL_BOUND: usize = 32; + +/// We size this bound large enough that it should never be hit. Up to 31 +/// `EstablishedConn` tasks can send messages to the main task simultaneously when +/// messages are received. +/// +/// We use `try_send.unwrap()` when sending to the main task to prevent deadlock +/// and inform us via panic that something has gone seriously wrong. This is +/// similar to using an unbounded channel but will not use all possible memory. +const CONN_TO_MAIN_CHANNEL_BOUND: usize = 1024; + +#[derive(Debug, Clone)] +pub struct Config { + pub baseboard_id: BaseboardId, + pub listen_addr: SocketAddrV6, + // pub tq_state_ledger_paths: Vec, + // pub network_config_ledger_paths: Vec, + pub sprockets: SprocketsConfig, +} + +/// A request sent to the `NodeTask` from the `NodeTaskHandle` +pub enum NodeApiRequest { + /// Inform the `Node` of currently known IP addresses on the bootstrap network + /// + /// These are generated from DDM prefixes learned by the bootstrap agent. + BootstrapAddresses(BTreeSet), + + /// Retrieve connectivity status via the `ConnMgr` + ConnMgrStatus { responder: oneshot::Sender }, + + /// Shutdown the node's tokio tasks + Shutdown, +} + +/// An error response from a `NodeApiRequest` +#[derive(Error, Debug, PartialEq)] +pub enum NodeApiError { + #[error("Failed to send request to node task")] + Send, + #[error("Failed to receive response from node task")] + Recv, +} + +impl From> for NodeApiError { + fn from(_: SendError) -> Self { + NodeApiError::Send + } +} + +impl From for NodeApiError { + fn from(_: RecvError) -> Self { + NodeApiError::Recv + } +} + +#[derive(Debug, Clone)] +pub struct NodeTaskHandle { + baseboard_id: BaseboardId, + tx: mpsc::Sender, + listen_addr: SocketAddrV6, +} + +impl NodeTaskHandle { + /// Return the actual port being listened on + /// + /// This is useful when the port passed in was `0`. + pub fn listen_addr(&self) -> SocketAddrV6 { + self.listen_addr + } + + pub fn baseboard_id(&self) -> &BaseboardId { + &self.baseboard_id + } + + /// Inform the node of currently known IP addresses on the bootstrap network + /// + /// These are generated from DDM prefixes learned by the bootstrap agent. + pub async fn load_peer_addresses( + &self, + addrs: BTreeSet, + ) -> Result<(), NodeApiError> { + self.tx.send(NodeApiRequest::BootstrapAddresses(addrs)).await?; + Ok(()) + } + + pub async fn conn_mgr_status(&self) -> Result { + let (tx, rx) = oneshot::channel(); + self.tx.send(NodeApiRequest::ConnMgrStatus { responder: tx }).await?; + let res = rx.await?; + Ok(res) + } + + pub async fn shutdown(&self) -> Result<(), NodeApiError> { + self.tx.send(NodeApiRequest::Shutdown).await?; + Ok(()) + } +} + +pub struct NodeTask { + shutdown: bool, + log: Logger, + #[expect(unused)] + config: Config, + #[expect(unused)] + node: Node, + #[expect(unused)] + ctx: NodeCtx, + conn_mgr: ConnMgr, + conn_mgr_rx: mpsc::Receiver, + + // Handle requests received from `PeerHandle` + rx: mpsc::Receiver, +} + +impl NodeTask { + pub async fn new( + config: Config, + log: &Logger, + ) -> (NodeTask, NodeTaskHandle) { + let log = log.new(o!( + "component" => "trust-quorum", + "baseboard_id" => config.baseboard_id.to_string() + )); + + let (tx, rx) = mpsc::channel(API_CHANNEL_BOUND); + + let (conn_mgr_tx, conn_mgr_rx) = + mpsc::channel(CONN_TO_MAIN_CHANNEL_BOUND); + + let baseboard_id = config.baseboard_id.clone(); + + // TODO: Load persistent state from ledger + let mut ctx = NodeCtx::new(config.baseboard_id.clone()); + let node = Node::new(&log, &mut ctx); + let conn_mgr = ConnMgr::new( + &log, + config.listen_addr, + config.sprockets.clone(), + conn_mgr_tx, + ) + .await; + let listen_addr = conn_mgr.listen_addr(); + ( + NodeTask { + shutdown: false, + log, + config, + node, + ctx, + conn_mgr, + conn_mgr_rx, + rx, + }, + NodeTaskHandle { baseboard_id, tx, listen_addr }, + ) + } + + /// Run the main loop of the node + /// + /// This should be spawned into its own tokio task + pub async fn run(&mut self) { + while !self.shutdown { + // TODO: Real corpus + let corpus = vec![]; + tokio::select! { + Some(request) = self.rx.recv() => { + self.on_api_request(request).await; + } + res = self.conn_mgr.step(corpus.clone()) => { + if let Err(err) = res { + error!(self.log, "Failed to accept connection"; &err); + continue; + } + } + Some(msg) = self.conn_mgr_rx.recv() => { + self.on_conn_msg(msg).await + } + + } + } + } + + // Handle messages from connection management tasks + async fn on_conn_msg(&mut self, msg: ConnToMainMsg) { + let task_id = msg.task_id; + match msg.msg { + ConnToMainMsgInner::Accepted { addr, peer_id } => { + self.conn_mgr + .server_handshake_completed(task_id, addr, peer_id) + .await; + } + ConnToMainMsgInner::Connected { addr, peer_id } => { + self.conn_mgr + .client_handshake_completed(task_id, addr, peer_id) + .await; + } + ConnToMainMsgInner::Disconnected { peer_id } => { + self.conn_mgr.on_disconnected(task_id, peer_id).await; + } + ConnToMainMsgInner::Received { from: _, msg: _ } => { + todo!(); + } + ConnToMainMsgInner::ReceivedNetworkConfig { + from: _, + config: _, + } => { + todo!(); + } + } + } + + async fn on_api_request(&mut self, request: NodeApiRequest) { + match request { + NodeApiRequest::BootstrapAddresses(addrs) => { + info!(self.log, "Updated Peer Addresses: {addrs:?}"); + // TODO: real corpus + let corpus = vec![]; + self.conn_mgr.update_bootstrap_connections(addrs, corpus).await; + } + NodeApiRequest::ConnMgrStatus { responder } => { + debug!(self.log, "Received Request for ConnMgrStatus"); + let _ = responder.send(self.conn_mgr.status()); + } + NodeApiRequest::Shutdown => { + info!(self.log, "Shutting down Node tokio tasks"); + self.shutdown = true; + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::connection_manager::{ + ConnState, RECONNECT_TIME, platform_id_to_baseboard_id, + }; + use camino::Utf8PathBuf; + use dropshot::test_util::log_prefix_for_test; + use omicron_test_utils::dev::poll::{CondCheckError, wait_for_condition}; + use omicron_test_utils::dev::test_setup_log; + use sprockets_tls::keys::ResolveSetting; + use sprockets_tls_test_utils::{ + alias_prefix, cert_path, certlist_path, private_key_path, root_prefix, + sprockets_auth_prefix, + }; + use std::time::Duration; + + fn pki_doc_to_node_configs(dir: Utf8PathBuf, n: usize) -> Vec { + (1..=n) + .map(|i| { + let baseboard_id = platform_id_to_baseboard_id( + &sprockets_tls_test_utils::platform_id(i), + ); + let listen_addr = + SocketAddrV6::new(std::net::Ipv6Addr::LOCALHOST, 0, 0, 0); + let sprockets_auth_key_name = sprockets_auth_prefix(i); + let alias_key_name = alias_prefix(i); + let sprockets = SprocketsConfig { + resolve: ResolveSetting::Local { + priv_key: private_key_path( + dir.clone(), + &sprockets_auth_key_name, + ), + cert_chain: certlist_path( + dir.clone(), + &sprockets_auth_key_name, + ), + }, + attest: sprockets_tls::keys::AttestConfig::Local { + priv_key: private_key_path( + dir.clone(), + &alias_key_name, + ), + cert_chain: certlist_path(dir.clone(), &alias_key_name), + // TODO: We need attest-mock to generate a real log + log: dir.join("log.bin"), + }, + roots: vec![cert_path(dir.clone(), &root_prefix())], + }; + Config { baseboard_id, listen_addr, sprockets } + }) + .collect() + } + + /// Test that all nodes can connect to each other when given each the full + /// set of "bootstrap addresses". + #[tokio::test] + async fn full_mesh_connectivity() { + let logctx = test_setup_log("full_mesh_connectivity"); + let (mut dir, s) = log_prefix_for_test("full_mesh_connectivity"); + dir.push(&s); + std::fs::create_dir(&dir).unwrap(); + println!("Writing keys and certs to {dir}"); + let num_nodes = 4; + + let file_behavior = + sprockets_tls_test_utils::OutputFileExistsBehavior::Overwrite; + + // Create `num_nodes` nodes worth of keys and certs + let doc = sprockets_tls_test_utils::generate_config(num_nodes); + doc.write_key_pairs(dir.clone(), file_behavior).unwrap(); + doc.write_certificates(dir.clone(), file_behavior).unwrap(); + doc.write_certificate_lists(dir.clone(), file_behavior).unwrap(); + + // This is just a made up digest. We aren't currently using a corpus, so it + // doesn't matter what the measurements are, just that there is at least + // one in a file named "log.bin". + let digest = + "be4df4e085175f3de0c8ac4837e1c2c9a34e8983209dac6b549e94154f7cdd9c" + .into(); + let attest_log_doc = attest_mock::log::Document { + measurements: vec![attest_mock::log::Measurement { + algorithm: "sha3-256".into(), + digest, + }], + }; + // Write out the log document to the filesystem + let out = attest_mock::log::mock(attest_log_doc).unwrap(); + std::fs::write(dir.join("log.bin"), &out).unwrap(); + + let configs = pki_doc_to_node_configs(dir.clone(), num_nodes); + + let mut node_handles = vec![]; + let mut join_handles = vec![]; + for config in configs.clone() { + let (mut task, handle) = NodeTask::new(config, &logctx.log).await; + node_handles.push(handle); + join_handles.push(tokio::spawn(async move { task.run().await })); + } + + let listen_addrs: BTreeSet<_> = + node_handles.iter().map(|h| h.listen_addr()).collect(); + + for h in &node_handles { + h.load_peer_addresses(listen_addrs.clone()).await.unwrap(); + } + + let poll_interval = Duration::from_millis(1); + let poll_max = Duration::from_secs(10); + + // Wait for all nodes have `num_nodes - 1` established connections + wait_for_condition( + async || { + let mut count = 0; + for h in &node_handles { + let status = h.conn_mgr_status().await.unwrap(); + if status + .connections + .iter() + .all(|c| matches!(c.state, ConnState::Established(_))) + && status.connections.len() == num_nodes - 1 + && status.total_tasks_spawned == 3 + { + count += 1; + } + } + if count == num_nodes { + Ok(()) + } else { + Err(CondCheckError::<()>::NotYet) + } + }, + &poll_interval, + &poll_max, + ) + .await + .unwrap(); + + // Pause time so we can jump it for reconnects + tokio::time::pause(); + + // Killing a single node should cause all other nodes to start + // reconnecting. This should cause the task id counter to start + // incrementing at all nodes and for their to be one fewer established + // connection. + let h = node_handles.pop().unwrap(); + h.shutdown().await.unwrap(); + join_handles.pop().unwrap(); + let stopped_addr = h.listen_addr; + + // Speed up reconnection in the test + tokio::time::advance(RECONNECT_TIME).await; + + let poll_interval = Duration::from_millis(50); + wait_for_condition( + async || { + let mut valid = 0; + for h in &node_handles { + let status = h.conn_mgr_status().await.unwrap(); + let established_count = status + .connections + .iter() + .filter(|c| { + matches!(c.state, ConnState::Established(_)) + }) + .count(); + + // Nodes only connect to other nodes if their listening + // address sorts greater. The only node where a reconnect will be attempted + // is the stopped node. + let should_be_connecting = h.listen_addr > stopped_addr; + let valid_task_id = if should_be_connecting { + status.total_tasks_spawned > 3 + } else { + true + }; + if established_count == num_nodes - 2 && valid_task_id { + valid += 1; + } + } + if valid == num_nodes - 1 { + Ok(()) + } else { + Err(CondCheckError::<()>::NotYet) + } + }, + &poll_interval, + &poll_max, + ) + .await + .unwrap(); + + // Now let's bring back up the old node and ensure full connectivity again + let (mut task, handle) = + NodeTask::new(configs.last().unwrap().clone(), &logctx.log).await; + node_handles.push(handle.clone()); + join_handles.push(tokio::spawn(async move { task.run().await })); + + // The port likely changed, so we must refresh everyone's set of addresses + let listen_addrs: BTreeSet<_> = + node_handles.iter().map(|h| h.listen_addr()).collect(); + + for h in &node_handles { + h.load_peer_addresses(listen_addrs.clone()).await.unwrap(); + } + + // Wait for all nodes have `num_nodes - 1` established connections + wait_for_condition( + async || { + let mut count = 0; + for h in &node_handles { + let status = h.conn_mgr_status().await.unwrap(); + if status + .connections + .iter() + .all(|c| matches!(c.state, ConnState::Established(_))) + && status.connections.len() == num_nodes - 1 + { + count += 1; + } + } + if count == num_nodes { + Ok(()) + } else { + Err(CondCheckError::<()>::NotYet) + } + }, + &poll_interval, + &poll_max, + ) + .await + .unwrap(); + + logctx.cleanup_successful(); + std::fs::remove_dir_all(dir).unwrap(); + } +} diff --git a/trust-quorum/test-utils/Cargo.toml b/trust-quorum/test-utils/Cargo.toml index 33181dc1dd..853bdd464f 100644 --- a/trust-quorum/test-utils/Cargo.toml +++ b/trust-quorum/test-utils/Cargo.toml @@ -19,6 +19,6 @@ serde.workspace = true serde_json.workspace = true sled-hardware-types.workspace = true slog.workspace = true -trust-quorum = { workspace = true, features = ["danger_partial_eq_ct_wrapper", "testing"] } +trust-quorum-protocol = { workspace = true, features = ["danger_partial_eq_ct_wrapper", "testing"] } omicron-workspace-hack.workspace = true diff --git a/trust-quorum/test-utils/src/event.rs b/trust-quorum/test-utils/src/event.rs index 3544456b3c..8bead3c53e 100644 --- a/trust-quorum/test-utils/src/event.rs +++ b/trust-quorum/test-utils/src/event.rs @@ -7,7 +7,7 @@ use crate::nexus::{NexusConfig, NexusReply}; use serde::{Deserialize, Serialize}; use std::collections::BTreeSet; -use trust_quorum::{BaseboardId, Envelope, Epoch}; +use trust_quorum_protocol::{BaseboardId, Envelope, Epoch}; /// An event that can be fed into our system under test (SUT) /// diff --git a/trust-quorum/test-utils/src/lib.rs b/trust-quorum/test-utils/src/lib.rs index 6cc7d617f9..9bfffdde25 100644 --- a/trust-quorum/test-utils/src/lib.rs +++ b/trust-quorum/test-utils/src/lib.rs @@ -13,7 +13,7 @@ pub use event::Event; pub use event_log::EventLog; pub use state::TqState; -use trust_quorum::BaseboardId; +use trust_quorum_protocol::BaseboardId; /// All possible members used in a test pub fn member_universe(size: usize) -> Vec { diff --git a/trust-quorum/test-utils/src/nexus.rs b/trust-quorum/test-utils/src/nexus.rs index d59ec53cc9..c2665f3787 100644 --- a/trust-quorum/test-utils/src/nexus.rs +++ b/trust-quorum/test-utils/src/nexus.rs @@ -10,7 +10,7 @@ use iddqd::{IdOrdItem, IdOrdMap, id_upcast}; use omicron_uuid_kinds::RackUuid; use serde::{Deserialize, Serialize}; use std::collections::BTreeSet; -use trust_quorum::{ +use trust_quorum_protocol::{ BaseboardId, Epoch, LrtqUpgradeMsg, ReconfigureMsg, Threshold, }; diff --git a/trust-quorum/test-utils/src/state.rs b/trust-quorum/test-utils/src/state.rs index 59b8524d8b..3c1b31e5a3 100644 --- a/trust-quorum/test-utils/src/state.rs +++ b/trust-quorum/test-utils/src/state.rs @@ -17,7 +17,7 @@ use sled_hardware_types::Baseboard; use slog::{Logger, info}; use std::collections::{BTreeMap, BTreeSet}; use std::fmt::Display; -use trust_quorum::{ +use trust_quorum_protocol::{ BaseboardId, Configuration, CoordinatingMsg, CoordinatorOperation, CoordinatorStateDiff, Envelope, Epoch, LoadRackSecretError, Node, NodeCallerCtx, NodeCommonCtx, NodeCtx, NodeCtxDiff, NodeDiff, PeerMsgKind, diff --git a/trust-quorum/tqdb/Cargo.toml b/trust-quorum/tqdb/Cargo.toml index 4436cc99fb..18242508aa 100644 --- a/trust-quorum/tqdb/Cargo.toml +++ b/trust-quorum/tqdb/Cargo.toml @@ -20,7 +20,7 @@ reconfigurator-cli.workspace = true serde_json.workspace = true slog.workspace = true tabled.workspace = true -trust-quorum = { workspace = true, features = ["danger_partial_eq_ct_wrapper"] } +trust-quorum-protocol = { workspace = true, features = ["danger_partial_eq_ct_wrapper"] } trust-quorum-test-utils.workspace = true omicron-workspace-hack.workspace = true diff --git a/trust-quorum/tqdb/src/bin/tqdb/main.rs b/trust-quorum/tqdb/src/bin/tqdb/main.rs index a593e697e3..12e163f801 100644 --- a/trust-quorum/tqdb/src/bin/tqdb/main.rs +++ b/trust-quorum/tqdb/src/bin/tqdb/main.rs @@ -24,7 +24,7 @@ use std::fmt::Write; use std::fs; use std::io::IsTerminal; use tabled::Tabled; -use trust_quorum::BaseboardId; +use trust_quorum_protocol::BaseboardId; use trust_quorum_test_utils::{Event, TqState}; fn main() -> Result<(), anyhow::Error> { diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 6f64f8c40b..6ef9f57bba 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -35,6 +35,7 @@ chrono = { version = "0.4.42", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.5.48", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.48", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } +const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.18" } crossbeam-utils = { version = "0.8.21" } crossterm = { version = "0.28.1", features = ["event-stream", "serde"] } @@ -42,6 +43,7 @@ crypto-common = { version = "0.1.6", default-features = false, features = ["getr curve25519-dalek = { version = "4.1.3", features = ["digest", "legacy_compatibility", "rand_core"] } daft = { version = "0.1.4", features = ["derive", "newtype-uuid1", "oxnet01", "uuid1"] } data-encoding = { version = "2.9.0" } +der = { version = "0.7.10", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } ecdsa = { version = "0.16.9", features = ["pem", "signing", "std", "verifying"] } ed25519-dalek = { version = "2.1.1", features = ["digest", "pem", "rand_core"] } @@ -64,7 +66,7 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor getrandom-6f8ce4dd05d13bba = { package = "getrandom", version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.16.0", default-features = false, features = ["allocator-api2", "inline-more"] } -heck = { version = "0.4.1" } +heck = { version = "0.4.1", features = ["unicode"] } hickory-proto = { version = "0.25.2", features = ["serde", "text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "1.7.0", features = ["full"] } @@ -109,7 +111,7 @@ regex = { version = "1.11.3" } regex-automata = { version = "0.4.11", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "std", "unicode"] } regex-syntax = { version = "0.8.5" } reqwest = { version = "0.12.22", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] } -rsa = { version = "0.9.6", features = ["serde", "sha2"] } +rsa = { version = "0.9.8", features = ["serde", "sha2"] } rustc-hash = { version = "2.1.1" } rustls = { version = "0.23.19", features = ["ring"] } rustls-webpki = { version = "0.102.8", default-features = false, features = ["aws_lc_rs", "ring", "std"] } @@ -118,7 +120,7 @@ scopeguard = { version = "1.2.0" } semver = { version = "1.0.27", features = ["serde"] } serde = { version = "1.0.226", features = ["alloc", "derive", "rc"] } serde_core = { version = "1.0.226", features = ["alloc", "rc"] } -serde_json = { version = "1.0.145", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.145", features = ["alloc", "raw_value", "unbounded_depth"] } serde_with = { version = "3.14.0" } sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.9", features = ["oid"] } @@ -174,6 +176,7 @@ chrono = { version = "0.4.42", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.5.48", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.48", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } +const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.18" } crossbeam-utils = { version = "0.8.21" } crossterm = { version = "0.28.1", features = ["event-stream", "serde"] } @@ -181,6 +184,7 @@ crypto-common = { version = "0.1.6", default-features = false, features = ["getr curve25519-dalek = { version = "4.1.3", features = ["digest", "legacy_compatibility", "rand_core"] } daft = { version = "0.1.4", features = ["derive", "newtype-uuid1", "oxnet01", "uuid1"] } data-encoding = { version = "2.9.0" } +der = { version = "0.7.10", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } ecdsa = { version = "0.16.9", features = ["pem", "signing", "std", "verifying"] } ed25519-dalek = { version = "2.1.1", features = ["digest", "pem", "rand_core"] } @@ -203,7 +207,7 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor getrandom-6f8ce4dd05d13bba = { package = "getrandom", version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.16.0", default-features = false, features = ["allocator-api2", "inline-more"] } -heck = { version = "0.4.1" } +heck = { version = "0.4.1", features = ["unicode"] } hickory-proto = { version = "0.25.2", features = ["serde", "text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "1.7.0", features = ["full"] } @@ -248,7 +252,7 @@ regex = { version = "1.11.3" } regex-automata = { version = "0.4.11", default-features = false, features = ["dfa", "hybrid", "meta", "nfa", "perf", "std", "unicode"] } regex-syntax = { version = "0.8.5" } reqwest = { version = "0.12.22", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] } -rsa = { version = "0.9.6", features = ["serde", "sha2"] } +rsa = { version = "0.9.8", features = ["serde", "sha2"] } rustc-hash = { version = "2.1.1" } rustls = { version = "0.23.19", features = ["ring"] } rustls-webpki = { version = "0.102.8", default-features = false, features = ["aws_lc_rs", "ring", "std"] } @@ -257,7 +261,7 @@ scopeguard = { version = "1.2.0" } semver = { version = "1.0.27", features = ["serde"] } serde = { version = "1.0.226", features = ["alloc", "derive", "rc"] } serde_core = { version = "1.0.226", features = ["alloc", "rc"] } -serde_json = { version = "1.0.145", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.145", features = ["alloc", "raw_value", "unbounded_depth"] } serde_with = { version = "3.14.0" } sha1 = { version = "0.10.6", features = ["oid"] } sha2 = { version = "0.10.9", features = ["oid"] } @@ -307,6 +311,7 @@ getrandom-468e82937335b1c9 = { package = "getrandom", version = "0.3.4", default hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio"] } hyper-util = { version = "0.1.17", features = ["full"] } linux-raw-sys = { version = "0.4.14", default-features = false, features = ["elf", "errno", "general", "if_ether", "ioctl", "net", "netlink", "no_std", "prctl", "std", "system", "xdp"] } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.0.7", features = ["fs", "stdio", "termios"] } @@ -320,6 +325,7 @@ getrandom-468e82937335b1c9 = { package = "getrandom", version = "0.3.4", default hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio"] } hyper-util = { version = "0.1.17", features = ["full"] } linux-raw-sys = { version = "0.4.14", default-features = false, features = ["elf", "errno", "general", "if_ether", "ioctl", "net", "netlink", "no_std", "prctl", "std", "system", "xdp"] } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.0.7", features = ["fs", "stdio", "termios"] } @@ -330,6 +336,7 @@ cookie = { version = "0.18.1", default-features = false, features = ["percent-en getrandom-468e82937335b1c9 = { package = "getrandom", version = "0.3.4", default-features = false, features = ["std"] } hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio"] } hyper-util = { version = "0.1.17", features = ["full"] } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.0.7", features = ["fs", "stdio", "termios"] } @@ -340,6 +347,7 @@ cookie = { version = "0.18.1", default-features = false, features = ["percent-en getrandom-468e82937335b1c9 = { package = "getrandom", version = "0.3.4", default-features = false, features = ["std"] } hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio"] } hyper-util = { version = "0.1.17", features = ["full"] } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.0.7", features = ["fs", "stdio", "termios"] } @@ -350,6 +358,7 @@ cookie = { version = "0.18.1", default-features = false, features = ["percent-en getrandom-468e82937335b1c9 = { package = "getrandom", version = "0.3.4", default-features = false, features = ["std"] } hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio"] } hyper-util = { version = "0.1.17", features = ["full"] } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.0.7", features = ["fs", "stdio", "termios"] } @@ -360,6 +369,7 @@ cookie = { version = "0.18.1", default-features = false, features = ["percent-en getrandom-468e82937335b1c9 = { package = "getrandom", version = "0.3.4", default-features = false, features = ["std"] } hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio"] } hyper-util = { version = "0.1.17", features = ["full"] } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } rustix-dff4ba8e3ae991db = { package = "rustix", version = "1.0.7", features = ["fs", "stdio", "termios"] } @@ -374,6 +384,7 @@ hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio" hyper-util = { version = "0.1.17", features = ["full"] } itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } nom = { version = "7.1.3" } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] } @@ -392,6 +403,7 @@ hyper-rustls = { version = "0.27.7", features = ["http2", "ring", "webpki-tokio" hyper-util = { version = "0.1.17", features = ["full"] } itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" } itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" } +miniz_oxide = { version = "0.8.5", default-features = false, features = ["with-alloc"] } mio = { version = "1.0.2", features = ["net", "os-ext"] } nom = { version = "7.1.3" } rustix-d585fab2519d2d1 = { package = "rustix", version = "0.38.37", features = ["event", "fs", "net", "pipe", "process", "stdio", "system", "termios", "time"] }