Skip to content

Commit 2d974e3

Browse files
committed
TQ: Async Nodes and P2P connections
Builds on #9232 This is the first step in wrapping the `trust_quorum::Node` so that it can be used in an async context and integrated with sled-agent. Only the sprockets networking has been fully integrated so far such that each `NodeTask` has a `ConnMgr` that sets up a full mesh of sprockets connections. A test for this connectivity behavior has been written but the code is not wired into the production code yet. Messages can be sent between `NodeTasks` over sprockets connections. Each connection exists in it's own task managed by an `EstablishedConn`. The main `NodeTask` task sends messages to and receives messages from this task to interact with the outside world via sprockets. Currently only `Ping` messages are sent over the wire as a means to keep the connections alive and detect disconnects. A `NodeHandle` allows one to interact with the `NodeTask`. Currently only three operations are implemented with messages defined in `NodeApiRequest`. The user can instruct the node who it's peers are on the bootstrap network to establish connectivity, can poll for connectivity status, and can shutdown the node. All of this functionality is used in the accompanying test. It's important to re-iterate that this code only implements connectivity between trust quorum nodes and no actual trust quorum messages are sent. They can't be as a handle can not yet initiate a reconfiguration or LRTQ upgrade. That behavior will come in a follow up. This PR is large enough. A lot of this code is similar to the LRTQ connection management code, except that it operates over sprockets rather than TCP channels. This introduces some complexity, but it is mostly abstracted away into the `SprocketsConfig`.
1 parent 41bb83c commit 2d974e3

File tree

24 files changed

+2310
-330
lines changed

24 files changed

+2310
-330
lines changed

Cargo.lock

Lines changed: 476 additions & 172 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ assert_matches = "1.5.0"
368368
assert_cmd = "2.0.17"
369369
async-bb8-diesel = "0.2"
370370
async-trait = "0.1.89"
371+
attest-mock = { path = "../dice-util/attest-mock" }
371372
atomicwrites = "0.4.4"
372373
authz-macros = { path = "nexus/authz-macros" }
373374
backoff = { version = "0.4.0", features = [ "tokio" ] }
@@ -722,7 +723,8 @@ slog-term = "2.9.1"
722723
smf = "0.2"
723724
socket2 = { version = "0.5", features = ["all"] }
724725
sp-sim = { path = "sp-sim" }
725-
sprockets-tls = { git = "https://github.com/oxidecomputer/sprockets.git", rev = "6d31fa63217c6a51061dc4afa1ebe175a0021981" }
726+
sprockets-tls = { git = "https://github.com/oxidecomputer/sprockets.git", rev = "dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210" }
727+
sprockets-tls-test-utils = { git = "https://github.com/oxidecomputer/sprockets.git", rev = "dea3bbfac7d9d3c45f088898fcd05ee5d2ec2210" }
726728
sqlformat = "0.3.5"
727729
sqlparser = { version = "0.45.0", features = [ "visitor" ] }
728730
static_assertions = "1.1.0"

sled-agent/src/bootstrap/client.rs

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use crate::bootstrap::views::Response;
1212
use crate::bootstrap::views::ResponseEnvelope;
1313
use sled_agent_types::sled::StartSledAgentRequest;
1414
use slog::Logger;
15+
use slog_error_chain::SlogInlineError;
1516
use sprockets_tls::client::Client as SprocketsClient;
1617
use sprockets_tls::keys::SprocketsConfig;
1718
use std::borrow::Cow;
@@ -21,34 +22,38 @@ use thiserror::Error;
2122
use tokio::io::AsyncReadExt;
2223
use tokio::io::AsyncWriteExt;
2324

24-
#[derive(Debug, Error)]
25+
#[derive(Debug, Error, SlogInlineError)]
2526
pub enum Error {
26-
#[error("Could not connect to {addr}: {err}")]
27-
Connect { addr: SocketAddrV6, err: sprockets_tls::Error },
27+
#[error("Could not connect to {addr}")]
28+
Connect {
29+
addr: SocketAddrV6,
30+
#[source]
31+
err: sprockets_tls::Error,
32+
},
2833

29-
#[error("Failed serializing request: {0}")]
30-
Serialize(serde_json::Error),
34+
#[error("Failed serializing request")]
35+
Serialize(#[source] serde_json::Error),
3136

32-
#[error("Failed writing request length prefix: {0}")]
33-
WriteLengthPrefix(io::Error),
37+
#[error("Failed writing request length prefix")]
38+
WriteLengthPrefix(#[source] io::Error),
3439

35-
#[error("Failed writing request: {0}")]
36-
WriteRequest(io::Error),
40+
#[error("Failed writing request")]
41+
WriteRequest(#[source] io::Error),
3742

38-
#[error("Failed flushing request: {0}")]
39-
FlushRequest(io::Error),
43+
#[error("Failed flushing request")]
44+
FlushRequest(#[source] io::Error),
4045

41-
#[error("Failed reading response length prefix: {0}")]
42-
ReadLengthPrefix(io::Error),
46+
#[error("Failed reading response length prefix")]
47+
ReadLengthPrefix(#[source] io::Error),
4348

4449
#[error("Received bogus response length: {0}")]
4550
BadResponseLength(u32),
4651

47-
#[error("Failed reading response: {0}")]
48-
ReadResponse(io::Error),
52+
#[error("Failed reading response")]
53+
ReadResponse(#[source] io::Error),
4954

50-
#[error("Failed deserializing response: {0}")]
51-
Deserialize(serde_json::Error),
55+
#[error("Failed deserializing response")]
56+
Deserialize(#[source] serde_json::Error),
5257

5358
#[error("Unsupported version: {0}")]
5459
UnsupportedVersion(u32),
@@ -111,9 +116,14 @@ impl Client {
111116
let log = self.log.new(o!("component" => "SledAgentSprocketsClient"));
112117
// Establish connection and sprockets connection (if possible).
113118
// The sprockets client loads the associated root certificates at this point.
119+
//
120+
// TODO: Use a real corpus
121+
let corpus = vec![];
114122
let stream = SprocketsClient::connect(
115123
self.sprockets_conf.clone(),
116124
self.addr,
125+
// We don't have corpus files yet
126+
vec![],
117127
log.clone(),
118128
)
119129
.await

sled-agent/src/bootstrap/config.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@
77
pub const BOOTSTRAP_AGENT_HTTP_PORT: u16 = 80;
88
pub const BOOTSTRAP_AGENT_RACK_INIT_PORT: u16 = 12346;
99
pub const BOOTSTORE_PORT: u16 = 12347;
10+
pub const TRUST_QUORUM_PORT: u16 = 12349;

sled-agent/src/bootstrap/sprockets_server.rs

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -59,22 +59,32 @@ impl SprocketsServer {
5959
/// which is cancel-safe. Note that cancelling this
6060
/// server does not necessarily cancel any outstanding requests that it has
6161
/// already received (and which may still be executing).
62-
pub(super) async fn run(mut self) {
62+
pub(super) async fn run(self) {
6363
loop {
6464
// Sprockets actually _uses_ the key here!
65-
let (stream, remote_addr) = match self.listener.accept().await {
66-
Ok(conn) => conn,
67-
Err(err) => {
68-
error!(self.log, "accept() failed"; "err" => #%err);
69-
continue;
70-
}
71-
};
72-
73-
let log = self.log.new(o!("remote_addr" => remote_addr));
74-
info!(log, "Accepted connection");
65+
// We don't have corpus files yet, so pass in an empty Vec
66+
let (stream, remote_addr) =
67+
match self.listener.accept(vec![]).await.await {
68+
Ok(conn) => conn,
69+
Err(err) => {
70+
error!(self.log, "accept() failed"; "err" => #%err);
71+
continue;
72+
}
73+
};
7574

75+
let log = self.log.new(o!("remote_addr" => acceptor.addr()));
76+
info!(log, "TCP connection accepted");
7677
let tx_requests = self.tx_requests.clone();
7778
tokio::spawn(async move {
79+
let stream = match acceptor.handshake().await {
80+
Ok((stream, _)) => stream,
81+
Err(err) => {
82+
error!(log, "Sprockets handshake failed"; &err);
83+
return;
84+
}
85+
};
86+
info!(log, "Sprockets handshake completed");
87+
7888
match handle_start_sled_agent_request(stream, tx_requests, &log)
7989
.await
8090
{

sled-agent/src/sled_agent.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ use sled_hardware::{HardwareManager, MemoryReservations, underlay};
7878
use sled_hardware_types::Baseboard;
7979
use sled_hardware_types::underlay::BootstrapInterface;
8080
use slog::Logger;
81-
use slog_error_chain::InlineErrorChain;
81+
use slog_error_chain::{InlineErrorChain, SlogInlineError};
8282
use sprockets_tls::keys::SprocketsConfig;
8383
use std::collections::BTreeMap;
8484
use std::net::{Ipv6Addr, SocketAddrV6};
@@ -1191,7 +1191,7 @@ impl SledAgent {
11911191
}
11921192
}
11931193

1194-
#[derive(From, thiserror::Error, Debug)]
1194+
#[derive(From, thiserror::Error, Debug, SlogInlineError)]
11951195
pub enum AddSledError {
11961196
#[error("Failed to learn bootstrap ip for {sled_id}")]
11971197
BootstrapAgentClient {
@@ -1206,6 +1206,7 @@ pub enum AddSledError {
12061206
#[error("Failed to initialize {sled_id}: {err}")]
12071207
BootstrapTcpClient {
12081208
sled_id: Baseboard,
1209+
#[source]
12091210
err: crate::bootstrap::client::Error,
12101211
},
12111212
}

smf/sled-agent/gimlet-standalone/config.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,5 @@ if_exists = "append"
7676

7777
[sprockets]
7878
resolve = { which = "ipcc" }
79+
attest = { which = "ipcc" }
7980
roots = ["/usr/share/oxide/idcerts/staging.pem", "/usr/share/oxide/idcerts/production.pem"]

smf/sled-agent/gimlet/config.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,5 @@ if_exists = "append"
7272

7373
[sprockets]
7474
resolve = { which = "ipcc" }
75+
attest = { which = "ipcc" }
7576
roots = ["/usr/share/oxide/idcerts/staging.pem", "/usr/share/oxide/idcerts/production.pem"]

smf/sled-agent/non-gimlet/config.kdl

Lines changed: 107 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -77,46 +77,6 @@ certificate "test-signer-a1" {
7777
}
7878
}
7979

80-
key-pair "test-signer-a2" {
81-
p384
82-
}
83-
84-
entity "test-signer-a2" {
85-
country-name "US"
86-
organization-name "Oxide Computer Company"
87-
common-name "test-platformid-1 Signer Staging A2"
88-
}
89-
90-
certificate "test-signer-a2" {
91-
issuer-certificate "test-root-a"
92-
issuer-key "test-root-a"
93-
94-
subject-entity "test-signer-a2"
95-
subject-key "test-signer-a2"
96-
97-
digest-algorithm "sha-384"
98-
not-after "9999-12-31T23:59:59Z"
99-
serial-number "01"
100-
101-
extensions {
102-
subject-key-identifier critical=false
103-
authority-key-identifier critical=false {
104-
key-id
105-
}
106-
107-
basic-constraints critical=true ca=true
108-
key-usage critical=true {
109-
key-cert-sign
110-
crl-sign
111-
}
112-
certificate-policies critical=true {
113-
oana-platform-identity
114-
tcg-dice-kp-identity-init
115-
tcg-dice-kp-attest-init
116-
tcg-dice-kp-eca
117-
}
118-
}
119-
}
12080
/// Device 1
12181
key-pair "test-platformid-1" {
12282
ed25519
@@ -166,7 +126,7 @@ key-pair "test-deviceid-1" {
166126
entity "test-deviceid-1" {
167127
country-name "US"
168128
organization-name "Oxide Computer Company"
169-
common-name "/C=US/O=Oxide Computer Company/CN=test-deviceid-1"
129+
common-name "test-deviceid-1"
170130
}
171131

172132
certificate "test-deviceid-1" {
@@ -207,7 +167,7 @@ key-pair "test-sprockets-auth-1" {
207167
entity "test-sprockets-auth-1" {
208168
country-name "US"
209169
organization-name "Oxide Computer Company"
210-
common-name "/C=US/O=Oxide Computer Company/CN=test-sprockets-auth-1"
170+
common-name "test-sprockets-auth-1"
211171
}
212172

213173
certificate "test-sprockets-auth-1" {
@@ -241,6 +201,58 @@ certificate "test-sprockets-auth-1" {
241201
}
242202
}
243203

204+
// TODO: sprockets reverses this cert chain before passing it to rustls
205+
certificate-list "test-sprockets-auth-1" \
206+
"test-signer-a1" \
207+
"test-platformid-1" \
208+
"test-deviceid-1" \
209+
"test-sprockets-auth-1"
210+
211+
key-pair "test-alias-1" {
212+
ed25519
213+
}
214+
215+
entity "test-alias-1" {
216+
country-name "US"
217+
organization-name "Oxide Computer Company"
218+
common-name "alias"
219+
}
220+
221+
certificate "test-alias-1" {
222+
issuer-certificate "test-deviceid-1"
223+
issuer-key "test-deviceid-1"
224+
225+
subject-entity "test-alias-1"
226+
subject-key "test-alias-1"
227+
228+
not-after "9999-12-31T23:59:59Z"
229+
serial-number "00"
230+
231+
extensions {
232+
basic-constraints critical=true ca=false
233+
key-usage critical=true {
234+
digital-signature
235+
}
236+
certificate-policies critical=true {
237+
tcg-dice-kp-attest-init
238+
}
239+
dice-tcb-info critical=true {
240+
fwid-list {
241+
fwid {
242+
digest-algorithm "sha3-256"
243+
digest "72fa8f8ea84a42251031366002cbb36281d0131f78cd680436116a720cdd9de5"
244+
}
245+
}
246+
}
247+
}
248+
}
249+
250+
certificate-list "test-alias-1" \
251+
"test-alias-1" \
252+
"test-deviceid-1" \
253+
"test-platformid-1" \
254+
"test-signer-a1"
255+
244256
/// Device 2
245257

246258
key-pair "test-platformid-2" {
@@ -291,7 +303,7 @@ key-pair "test-deviceid-2" {
291303
entity "test-deviceid-2" {
292304
country-name "US"
293305
organization-name "Oxide Computer Company"
294-
common-name "/C=US/O=Oxide Computer Company/CN=test-deviceid-2"
306+
common-name "test-deviceid-2"
295307
}
296308

297309
certificate "test-deviceid-2" {
@@ -332,7 +344,7 @@ key-pair "test-sprockets-auth-2" {
332344
entity "test-sprockets-auth-2" {
333345
country-name "US"
334346
organization-name "Oxide Computer Company"
335-
common-name "/C=US/O=Oxide Computer Company/CN=test-sprockets-auth-2"
347+
common-name "test-sprockets-auth-2"
336348
}
337349

338350
certificate "test-sprockets-auth-2" {
@@ -366,3 +378,54 @@ certificate "test-sprockets-auth-2" {
366378
}
367379
}
368380

381+
// TODO: sprockets reverses this cert chain before passing it to rustls
382+
certificate-list "test-sprockets-auth-2" \
383+
"test-signer-a1" \
384+
"test-platformid-2" \
385+
"test-deviceid-2" \
386+
"test-sprockets-auth-2"
387+
388+
key-pair "test-alias-2" {
389+
ed25519
390+
}
391+
392+
entity "test-alias-2" {
393+
country-name "US"
394+
organization-name "Oxide Computer Company"
395+
common-name "alias"
396+
}
397+
398+
certificate "test-alias-2" {
399+
issuer-certificate "test-deviceid-2"
400+
issuer-key "test-deviceid-2"
401+
402+
subject-entity "test-alias-2"
403+
subject-key "test-alias-2"
404+
405+
not-after "9999-12-31T23:59:59Z"
406+
serial-number "00"
407+
408+
extensions {
409+
basic-constraints critical=true ca=false
410+
key-usage critical=true {
411+
digital-signature
412+
}
413+
certificate-policies critical=true {
414+
tcg-dice-kp-attest-init
415+
}
416+
dice-tcb-info critical=true {
417+
fwid-list {
418+
fwid {
419+
digest-algorithm "sha3-256"
420+
digest "72fa8f8ea84a42251031366002cbb36281d0131f78cd680436116a720cdd9de5"
421+
}
422+
}
423+
}
424+
}
425+
}
426+
427+
certificate-list "test-alias-2" \
428+
"test-alias-2" \
429+
"test-deviceid-2" \
430+
"test-platformid-2" \
431+
"test-signer-a1"

smf/sled-agent/non-gimlet/config.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,5 @@ if_exists = "append"
121121
# See the .kdl file for use with pki-playground for generating
122122
[sprockets]
123123
resolve = { which = "local", priv_key = "/opt/oxide/sled-agent/pkg/sprockets-auth.key.pem", cert_chain = "/opt/oxide/sled-agent/pkg/sprockets-chain.pem" }
124+
attest = { which = "local", priv_key = "/opt/oxide/sled-agent/pkg/sprockets-attest.key.pem", cert_chain = "/opt/oxide/sled-agent/pkg/sprockets-attest-chain.pem", log = "/opt/oxide/sled-agent/pkg/sprockets-log.bin" }
124125
roots = ["/opt/oxide/sled-agent/pkg/root.cert.pem"]

0 commit comments

Comments
 (0)