diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index aa0f1ae99a..44027f4175 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -72,7 +72,6 @@ use omicron_common::api::external::Vni; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_common::disk::M2Slot; -use omicron_common::policy::INTERNAL_DNS_REDUNDANCY; use omicron_uuid_kinds::BlueprintUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::MupdateOverrideUuid; @@ -142,8 +141,6 @@ pub enum Error { AllocateInternalDnsSubnet(#[from] NoAvailableDnsSubnets), #[error("error allocating external networking resources")] AllocateExternalNetworking(#[from] ExternalNetworkingError), - #[error("can only have {INTERNAL_DNS_REDUNDANCY} internal DNS servers")] - PolicySpecifiesTooManyInternalDnsServers, #[error("zone is already up-to-date and should not be updated")] ZoneAlreadyUpToDate, #[error( @@ -696,6 +693,10 @@ impl<'a> BlueprintBuilder<'a> { self.new_blueprint_id } + pub fn planning_input(&self) -> &PlanningInput { + &self.input + } + fn resource_allocator( &mut self, ) -> Result<&mut BlueprintResourceAllocator, Error> { diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index da6ec98692..c5915dbe81 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -58,7 +58,6 @@ use nexus_types::inventory::Collection; use nexus_types::inventory::SpType; use omicron_common::api::external::Generation; use omicron_common::disk::M2Slot; -use omicron_common::policy::INTERNAL_DNS_REDUNDANCY; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; @@ -115,9 +114,6 @@ mod zone_safety; /// services, etc.). const NUM_CONCURRENT_MGS_UPDATES: usize = 1; -/// A receipt that `check_input_validity` has been run prior to planning. -struct InputChecked; - // Details of how a zone's status differs between the blueprint and the sled // inventory #[derive(Debug)] @@ -192,23 +188,11 @@ impl<'a> Planner<'a> { } pub fn plan(mut self) -> Result { - let checked = self.check_input_validity()?; - let report = self.do_plan(checked)?; + let report = self.do_plan()?; Ok(self.blueprint.build(BlueprintSource::Planner(Arc::new(report)))) } - fn check_input_validity(&self) -> Result { - if self.input.target_internal_dns_zone_count() > INTERNAL_DNS_REDUNDANCY - { - return Err(Error::PolicySpecifiesTooManyInternalDnsServers); - } - Ok(InputChecked) - } - - fn do_plan( - &mut self, - _checked: InputChecked, - ) -> Result { + fn do_plan(&mut self) -> Result { // Run the planning steps, recording their step reports as we go. let expunge = self.do_plan_expunge()?; let decommission = self.do_plan_decommission()?; @@ -2493,6 +2477,7 @@ pub(crate) mod test { use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY; use omicron_common::policy::COCKROACHDB_REDUNDANCY; use omicron_common::policy::CRUCIBLE_PANTRY_REDUNDANCY; + use omicron_common::policy::INTERNAL_DNS_REDUNDANCY; use omicron_common::policy::NEXUS_REDUNDANCY; use omicron_common::update::ArtifactId; use omicron_test_utils::dev::test_setup_log; @@ -2995,7 +2980,7 @@ pub(crate) mod test { } // Try to run the planner with a high number of internal DNS zones; - // it will fail because the target is > MAX_DNS_REDUNDANCY. + // it will fail because the target is > INTERNAL_DNS_REDUNDANCY. { let mut builder = example .system @@ -3018,8 +3003,7 @@ pub(crate) mod test { Err(err) => { let err = InlineErrorChain::new(&err).to_string(); assert!( - err.contains("can only have ") - && err.contains(" internal DNS servers"), + err.contains("error allocating internal DNS"), "unexpected error: {err}" ); } @@ -6606,10 +6590,18 @@ pub(crate) mod test { // // Ask for COCKROACHDB_REDUNDANCY cockroach nodes + // If this assertion breaks - which would be okay - we should delete all + // these planning steps explicitly including a base set of CRDB zones. + assert_eq!( + example.system.get_target_cockroachdb_zone_count(), + 0, + "We expect the system is initialized without cockroach zones" + ); let mut input_builder = example.input.clone().into_builder(); input_builder.policy_mut().target_cockroachdb_zone_count = COCKROACHDB_REDUNDANCY; example.input = input_builder.build(); + example.system.target_cockroachdb_zone_count(COCKROACHDB_REDUNDANCY); let blueprint_name = "blueprint_with_cockroach"; let new_blueprint = Planner::new_based_on( diff --git a/nexus/reconfigurator/planning/src/planner/zone_safety.rs b/nexus/reconfigurator/planning/src/planner/zone_safety.rs index 215c5da0f8..4020d25e7f 100644 --- a/nexus/reconfigurator/planning/src/planner/zone_safety.rs +++ b/nexus/reconfigurator/planning/src/planner/zone_safety.rs @@ -14,9 +14,6 @@ use nexus_types::deployment::CockroachdbUnsafeToShutdown; use nexus_types::deployment::ZoneUnsafeToShutdown; use nexus_types::inventory::Collection; use omicron_common::api::external::Generation; -use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY; -use omicron_common::policy::COCKROACHDB_REDUNDANCY; -use omicron_common::policy::INTERNAL_DNS_REDUNDANCY; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use std::collections::BTreeMap; @@ -216,11 +213,9 @@ impl<'a> ZoneSafetyChecksBuilder<'a> { } } - // TODO-correctness This should be looking at the input policy target - // redundancy, not the `BOUNDARY_NTP_REDUNDANCY` constant. (Same for the - // other redundancy checks in this file.) - // - if synchronized_boundary_ntp_count < BOUNDARY_NTP_REDUNDANCY { + let target_boundary_ntp_zone_count = + self.blueprint.planning_input().target_boundary_ntp_zone_count(); + if synchronized_boundary_ntp_count < target_boundary_ntp_zone_count { return Some(ZoneUnsafeToShutdown::BoundaryNtp { total_boundary_ntp_zones: self.boundary_ntp_zones.len(), synchronized_count: synchronized_boundary_ntp_count, @@ -236,10 +231,13 @@ impl<'a> ZoneSafetyChecksBuilder<'a> { use CockroachdbUnsafeToShutdown::*; use ZoneUnsafeToShutdown::Cockroachdb; + let target_cockroachdb_zone_count = + self.blueprint.planning_input().target_cockroachdb_zone_count(); + // We must hear from all nodes let all_statuses = &self.inventory.cockroach_status; - if all_statuses.len() < COCKROACHDB_REDUNDANCY { + if all_statuses.len() < target_cockroachdb_zone_count { return Some(Cockroachdb { reason: NotEnoughNodes }); } @@ -260,7 +258,7 @@ impl<'a> ZoneSafetyChecksBuilder<'a> { let Some(live_nodes) = status.liveness_live_nodes else { return Some(Cockroachdb { reason: MissingLiveNodesStat }); }; - if live_nodes < COCKROACHDB_REDUNDANCY as u64 { + if live_nodes < target_cockroachdb_zone_count as u64 { return Some(Cockroachdb { reason: NotEnoughLiveNodes { live_nodes }, }); @@ -300,10 +298,12 @@ impl<'a> ZoneSafetyChecksBuilder<'a> { // and restarts, at least one exists and can get the control plane back // up and running. // - // Our INTERNAL_DNS_REDUNDANCY factor is set so that we can tolerate "at - // least one upgrade, and at least one failure during that upgrade - // window". - if synchronized_internal_dns_count >= INTERNAL_DNS_REDUNDANCY { + // The target internal DNS zone count should be set so that we can + // tolerate "at least one upgrade, and at least one failure during that + // upgrade window" (e.g., it should be "at least 3" in production). + let target_internal_dns_zone_count = + self.blueprint.planning_input().target_internal_dns_zone_count(); + if synchronized_internal_dns_count >= target_internal_dns_zone_count { return None; } else { return Some(ZoneUnsafeToShutdown::InternalDns { diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 265c968862..a036bb13ba 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -254,6 +254,15 @@ impl SystemDescription { self.target_nexus_zone_count } + pub fn target_cockroachdb_zone_count(&mut self, count: usize) -> &mut Self { + self.target_cockroachdb_zone_count = count; + self + } + + pub fn get_target_cockroachdb_zone_count(&self) -> usize { + self.target_cockroachdb_zone_count + } + pub fn target_boundary_ntp_zone_count( &mut self, count: usize,