Skip to content

Commit 02d8cd5

Browse files
authored
[inventory] Add full OmicronSledConfig and fields for upcoming config reconciler (#8188)
The primary change here is replacing these inventory fields (a subset of `OmicronSledConfig`): ```rust pub omicron_zones: OmicronZonesConfig, pub omicron_physical_disks_generation: Generation, ``` with these: ```rust pub ledgered_sled_config: Option<OmicronSledConfig>, pub reconciler_status: ConfigReconcilerInventoryStatus, pub last_reconciliation: Option<ConfigReconcilerInventory>, ``` Once #8064 lands, all three of these will be filled in meaningfully; as of this PR, only `ledgered_sled_config` is populated. (`reconciler_status` is always `NotYetRun` and `last_reconciliation` is always `None`, since there is no reconciler yet.) The rest of the changes are all fallout from changing inventory: * Update `omdb` printing * Update sled-agent to report the new inventory fields * Update consumers of inventory (tests, reconfigurator planner, one Nexus RPW) - these all just look at `ledgered_sled_config` for now, but will need to be updated on #8064 once other fields are populated * Update database schema, model, and queries (the bulk of the diff). This requires dropping all preexisting collections, since there's no way to migrate from just `omicron_zones` to a full `OmicronSledConfig`. The first few schema migrations take care of this. Before merging I'll go through an upgrade on a racklette and confirm things come back up okay after the schema migration blows away all the pre-update inventory collections. (We think this is fine, but it'd be good to confirm.) But I think this is close enough that it's reviewable. Couple other minor changes that came along for the ride: * Closes #6770 (`inv_sled_omicron_zones` is gone now) * Fixes #8084 (added `image_source` columns to the inventory zone config table, so we don't lose `ImageSource::Artifact { hash }` values reported by sled-agent)
1 parent a016336 commit 02d8cd5

File tree

59 files changed

+2939
-553
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+2939
-553
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dev-tools/omdb/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ nexus-db-schema.workspace = true
4444
nexus-inventory.workspace = true
4545
nexus-reconfigurator-preparation.workspace = true
4646
nexus-saga-recovery.workspace = true
47+
nexus-sled-agent-shared.workspace = true
4748
nexus-types.workspace = true
4849
omicron-common.workspace = true
4950
omicron-uuid-kinds.workspace = true

dev-tools/omdb/src/bin/omdb/db.rs

Lines changed: 196 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,10 @@ use nexus_db_queries::db::pagination::Paginator;
123123
use nexus_db_queries::db::pagination::paginated;
124124
use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL;
125125
use nexus_db_queries::db::queries::region_allocation;
126+
use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult;
127+
use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus;
128+
use nexus_sled_agent_shared::inventory::OmicronSledConfig;
129+
use nexus_sled_agent_shared::inventory::OmicronZoneImageSource;
126130
use nexus_types::deployment::Blueprint;
127131
use nexus_types::deployment::BlueprintZoneDisposition;
128132
use nexus_types::deployment::BlueprintZoneType;
@@ -149,6 +153,7 @@ use omicron_uuid_kinds::DatasetUuid;
149153
use omicron_uuid_kinds::DownstairsRegionUuid;
150154
use omicron_uuid_kinds::GenericUuid;
151155
use omicron_uuid_kinds::InstanceUuid;
156+
use omicron_uuid_kinds::OmicronZoneUuid;
152157
use omicron_uuid_kinds::ParseError;
153158
use omicron_uuid_kinds::PhysicalDiskUuid;
154159
use omicron_uuid_kinds::PropolisUuid;
@@ -7329,27 +7334,204 @@ fn inv_collection_print_sleds(collection: &Collection) {
73297334
println!(" reservation: {reservation:?}, quota: {quota:?}");
73307335
}
73317336

7332-
println!(
7333-
" zones generation: {} (count: {})",
7334-
sled.omicron_zones.generation,
7335-
sled.omicron_zones.zones.len(),
7336-
);
7337+
if let Some(config) = &sled.ledgered_sled_config {
7338+
inv_collection_print_sled_config("LEDGERED", config);
7339+
} else {
7340+
println!(" no ledgered sled config");
7341+
}
73377342

7338-
if sled.omicron_zones.zones.is_empty() {
7339-
continue;
7343+
if let Some(last_reconciliation) = &sled.last_reconciliation {
7344+
if Some(&last_reconciliation.last_reconciled_config)
7345+
== sled.ledgered_sled_config.as_ref()
7346+
{
7347+
println!(" last reconciled config: matches ledgered config");
7348+
} else {
7349+
inv_collection_print_sled_config(
7350+
"LAST RECONCILED CONFIG",
7351+
&last_reconciliation.last_reconciled_config,
7352+
);
7353+
let disk_errs = collect_config_reconciler_errors(
7354+
&last_reconciliation.external_disks,
7355+
);
7356+
let dataset_errs = collect_config_reconciler_errors(
7357+
&last_reconciliation.datasets,
7358+
);
7359+
let zone_errs = collect_config_reconciler_errors(
7360+
&last_reconciliation.zones,
7361+
);
7362+
for (label, errs) in [
7363+
("disk", disk_errs),
7364+
("dataset", dataset_errs),
7365+
("zone", zone_errs),
7366+
] {
7367+
if errs.is_empty() {
7368+
println!(" all {label}s reconciled successfully");
7369+
} else {
7370+
println!(
7371+
" {} {label} reconciliation errors:",
7372+
errs.len()
7373+
);
7374+
for err in errs {
7375+
println!(" {err}");
7376+
}
7377+
}
7378+
}
7379+
}
73407380
}
73417381

7342-
println!(" ZONES FOUND");
7343-
for z in &sled.omicron_zones.zones {
7344-
println!(
7345-
" zone {} (type {})",
7346-
z.id,
7347-
z.zone_type.kind().report_str()
7348-
);
7382+
print!(" reconciler task status: ");
7383+
match &sled.reconciler_status {
7384+
ConfigReconcilerInventoryStatus::NotYetRun => {
7385+
println!("not yet run");
7386+
}
7387+
ConfigReconcilerInventoryStatus::Running {
7388+
config,
7389+
started_at,
7390+
running_for,
7391+
} => {
7392+
println!("running for {running_for:?} (since {started_at})");
7393+
if Some(config) == sled.ledgered_sled_config.as_ref() {
7394+
println!(" reconciling currently-ledgered config");
7395+
} else {
7396+
inv_collection_print_sled_config(
7397+
"RECONCILING CONFIG",
7398+
config,
7399+
);
7400+
}
7401+
}
7402+
ConfigReconcilerInventoryStatus::Idle { completed_at, ran_for } => {
7403+
println!(
7404+
"idle (finished at {completed_at} \
7405+
after running for {ran_for:?})"
7406+
);
7407+
}
73497408
}
73507409
}
73517410
}
73527411

7412+
fn collect_config_reconciler_errors<T: Ord + Display>(
7413+
results: &BTreeMap<T, ConfigReconcilerInventoryResult>,
7414+
) -> Vec<String> {
7415+
results
7416+
.iter()
7417+
.filter_map(|(id, result)| match result {
7418+
ConfigReconcilerInventoryResult::Ok => None,
7419+
ConfigReconcilerInventoryResult::Err { message } => {
7420+
Some(format!("{id}: {message}"))
7421+
}
7422+
})
7423+
.collect()
7424+
}
7425+
7426+
fn inv_collection_print_sled_config(label: &str, config: &OmicronSledConfig) {
7427+
let OmicronSledConfig {
7428+
generation,
7429+
disks,
7430+
datasets,
7431+
zones,
7432+
remove_mupdate_override,
7433+
} = config;
7434+
7435+
println!("\n{label} SLED CONFIG");
7436+
println!(" generation: {}", generation);
7437+
println!(" remove_mupdate_override: {remove_mupdate_override:?}");
7438+
7439+
if disks.is_empty() {
7440+
println!(" disk config empty");
7441+
} else {
7442+
#[derive(Tabled)]
7443+
#[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
7444+
struct DiskRow {
7445+
id: PhysicalDiskUuid,
7446+
zpool_id: ZpoolUuid,
7447+
vendor: String,
7448+
model: String,
7449+
serial: String,
7450+
}
7451+
7452+
let rows = disks.iter().map(|d| DiskRow {
7453+
id: d.id,
7454+
zpool_id: d.pool_id,
7455+
vendor: d.identity.vendor.clone(),
7456+
model: d.identity.model.clone(),
7457+
serial: d.identity.serial.clone(),
7458+
});
7459+
let table = tabled::Table::new(rows)
7460+
.with(tabled::settings::Style::empty())
7461+
.with(tabled::settings::Padding::new(8, 1, 0, 0))
7462+
.to_string();
7463+
println!(" DISKS: {}", disks.len());
7464+
println!("{table}");
7465+
}
7466+
7467+
if datasets.is_empty() {
7468+
println!(" dataset config empty");
7469+
} else {
7470+
#[derive(Tabled)]
7471+
#[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
7472+
struct DatasetRow {
7473+
id: DatasetUuid,
7474+
name: String,
7475+
compression: String,
7476+
quota: String,
7477+
reservation: String,
7478+
}
7479+
7480+
let rows = datasets.iter().map(|d| DatasetRow {
7481+
id: d.id,
7482+
name: d.name.full_name(),
7483+
compression: d.inner.compression.to_string(),
7484+
quota: d
7485+
.inner
7486+
.quota
7487+
.map(|q| q.to_string())
7488+
.unwrap_or_else(|| "none".to_string()),
7489+
reservation: d
7490+
.inner
7491+
.reservation
7492+
.map(|r| r.to_string())
7493+
.unwrap_or_else(|| "none".to_string()),
7494+
});
7495+
let table = tabled::Table::new(rows)
7496+
.with(tabled::settings::Style::empty())
7497+
.with(tabled::settings::Padding::new(8, 1, 0, 0))
7498+
.to_string();
7499+
println!(" DATASETS: {}", datasets.len());
7500+
println!("{table}");
7501+
}
7502+
7503+
if zones.is_empty() {
7504+
println!(" zone config empty");
7505+
} else {
7506+
#[derive(Tabled)]
7507+
#[tabled(rename_all = "SCREAMING_SNAKE_CASE")]
7508+
struct ZoneRow {
7509+
id: OmicronZoneUuid,
7510+
kind: &'static str,
7511+
image_source: String,
7512+
}
7513+
7514+
let rows = zones.iter().map(|z| ZoneRow {
7515+
id: z.id,
7516+
kind: z.zone_type.kind().report_str(),
7517+
image_source: match &z.image_source {
7518+
OmicronZoneImageSource::InstallDataset => {
7519+
"install-dataset".to_string()
7520+
}
7521+
OmicronZoneImageSource::Artifact { hash } => {
7522+
format!("artifact: {hash}")
7523+
}
7524+
},
7525+
});
7526+
let table = tabled::Table::new(rows)
7527+
.with(tabled::settings::Style::empty())
7528+
.with(tabled::settings::Padding::new(8, 1, 0, 0))
7529+
.to_string();
7530+
println!(" ZONES: {}", zones.len());
7531+
println!("{table}");
7532+
}
7533+
}
7534+
73537535
fn inv_collection_print_keeper_membership(collection: &Collection) {
73547536
println!("\nKEEPER MEMBERSHIP");
73557537
for k in &collection.clickhouse_keeper_cluster_membership {

id-map/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ impl<T: IdMappable + Debug + Eq> Diffable for IdMap<T> {
289289

290290
/// Wrapper around a `&'a mut T` that panics when dropped if the borrowed
291291
/// value's `id()` has changed since the wrapper was created.
292+
#[derive(Debug)]
292293
pub struct RefMut<'a, T: IdMappable> {
293294
original_id: T::Id,
294295
// Always `Some(_)` until the `RefMut` is consumed by `into_ref()`.

live-tests/tests/test_nexus_add_remove.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,9 +186,21 @@ async fn test_nexus_add_remove(lc: &LiveTestContext) {
186186
let agent = latest_collection.sled_agents.get(&sled_id).expect(
187187
"collection information for the sled we added a Nexus to",
188188
);
189-
if agent.omicron_zones.zones.iter().any(|z| z.id == new_zone.id) {
190-
debug!(log, "zone still present in inventory");
191-
return Err(CondCheckError::<()>::NotYet);
189+
if let Some(config) = &agent.ledgered_sled_config {
190+
if config.zones.iter().any(|z| z.id == new_zone.id) {
191+
debug!(log, "zone still present in ledger");
192+
return Err(CondCheckError::<()>::NotYet);
193+
}
194+
}
195+
if let Some(config) = agent
196+
.last_reconciliation
197+
.as_ref()
198+
.map(|lr| &lr.last_reconciled_config)
199+
{
200+
if config.zones.iter().any(|z| z.id == new_zone.id) {
201+
debug!(log, "zone still present in inventory");
202+
return Err(CondCheckError::<()>::NotYet);
203+
}
192204
}
193205
return Ok(latest_collection);
194206
},

nexus-sled-agent-shared/src/inventory.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,12 @@ pub struct Inventory {
111111
pub usable_hardware_threads: u32,
112112
pub usable_physical_ram: ByteCount,
113113
pub reservoir_size: ByteCount,
114-
pub omicron_zones: OmicronZonesConfig,
115114
pub disks: Vec<InventoryDisk>,
116115
pub zpools: Vec<InventoryZpool>,
117116
pub datasets: Vec<InventoryDataset>,
118-
pub omicron_physical_disks_generation: Generation,
117+
pub ledgered_sled_config: Option<OmicronSledConfig>,
118+
pub reconciler_status: ConfigReconcilerInventoryStatus,
119+
pub last_reconciliation: Option<ConfigReconcilerInventory>,
119120
}
120121

121122
/// Describes the last attempt made by the sled-agent-config-reconciler to
@@ -196,6 +197,18 @@ pub struct OmicronSledConfig {
196197
pub remove_mupdate_override: Option<MupdateOverrideUuid>,
197198
}
198199

200+
impl Default for OmicronSledConfig {
201+
fn default() -> Self {
202+
Self {
203+
generation: Generation::new(),
204+
disks: IdMap::default(),
205+
datasets: IdMap::default(),
206+
zones: IdMap::default(),
207+
remove_mupdate_override: None,
208+
}
209+
}
210+
}
211+
199212
impl Ledgerable for OmicronSledConfig {
200213
fn is_newer_than(&self, other: &Self) -> bool {
201214
self.generation > other.generation

0 commit comments

Comments
 (0)