Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ use omicron_common::api::external::Vni;
use omicron_common::api::internal::shared::NetworkInterface;
use omicron_common::api::internal::shared::NetworkInterfaceKind;
use omicron_common::disk::M2Slot;
use omicron_common::policy::INTERNAL_DNS_REDUNDANCY;
use omicron_uuid_kinds::BlueprintUuid;
use omicron_uuid_kinds::GenericUuid;
use omicron_uuid_kinds::MupdateOverrideUuid;
Expand Down Expand Up @@ -142,8 +141,6 @@ pub enum Error {
AllocateInternalDnsSubnet(#[from] NoAvailableDnsSubnets),
#[error("error allocating external networking resources")]
AllocateExternalNetworking(#[from] ExternalNetworkingError),
#[error("can only have {INTERNAL_DNS_REDUNDANCY} internal DNS servers")]
PolicySpecifiesTooManyInternalDnsServers,
#[error("zone is already up-to-date and should not be updated")]
ZoneAlreadyUpToDate,
#[error(
Expand Down Expand Up @@ -696,6 +693,10 @@ impl<'a> BlueprintBuilder<'a> {
self.new_blueprint_id
}

pub fn planning_input(&self) -> &PlanningInput {
&self.input
}

fn resource_allocator(
&mut self,
) -> Result<&mut BlueprintResourceAllocator, Error> {
Expand Down
34 changes: 13 additions & 21 deletions nexus/reconfigurator/planning/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ use nexus_types::inventory::Collection;
use nexus_types::inventory::SpType;
use omicron_common::api::external::Generation;
use omicron_common::disk::M2Slot;
use omicron_common::policy::INTERNAL_DNS_REDUNDANCY;
use omicron_uuid_kinds::OmicronZoneUuid;
use omicron_uuid_kinds::PhysicalDiskUuid;
use omicron_uuid_kinds::SledUuid;
Expand Down Expand Up @@ -115,9 +114,6 @@ mod zone_safety;
/// services, etc.).
const NUM_CONCURRENT_MGS_UPDATES: usize = 1;

/// A receipt that `check_input_validity` has been run prior to planning.
struct InputChecked;

// Details of how a zone's status differs between the blueprint and the sled
// inventory
#[derive(Debug)]
Expand Down Expand Up @@ -192,23 +188,11 @@ impl<'a> Planner<'a> {
}

pub fn plan(mut self) -> Result<Blueprint, Error> {
let checked = self.check_input_validity()?;
let report = self.do_plan(checked)?;
let report = self.do_plan()?;
Ok(self.blueprint.build(BlueprintSource::Planner(Arc::new(report))))
}

fn check_input_validity(&self) -> Result<InputChecked, Error> {
if self.input.target_internal_dns_zone_count() > INTERNAL_DNS_REDUNDANCY
{
return Err(Error::PolicySpecifiesTooManyInternalDnsServers);
}
Ok(InputChecked)
}

fn do_plan(
&mut self,
_checked: InputChecked,
) -> Result<PlanningReport, Error> {
fn do_plan(&mut self) -> Result<PlanningReport, Error> {
// Run the planning steps, recording their step reports as we go.
let expunge = self.do_plan_expunge()?;
let decommission = self.do_plan_decommission()?;
Expand Down Expand Up @@ -2493,6 +2477,7 @@ pub(crate) mod test {
use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY;
use omicron_common::policy::COCKROACHDB_REDUNDANCY;
use omicron_common::policy::CRUCIBLE_PANTRY_REDUNDANCY;
use omicron_common::policy::INTERNAL_DNS_REDUNDANCY;
use omicron_common::policy::NEXUS_REDUNDANCY;
use omicron_common::update::ArtifactId;
use omicron_test_utils::dev::test_setup_log;
Expand Down Expand Up @@ -2995,7 +2980,7 @@ pub(crate) mod test {
}

// Try to run the planner with a high number of internal DNS zones;
// it will fail because the target is > MAX_DNS_REDUNDANCY.
// it will fail because the target is > INTERNAL_DNS_REDUNDANCY.
{
let mut builder = example
.system
Expand All @@ -3018,8 +3003,7 @@ pub(crate) mod test {
Err(err) => {
let err = InlineErrorChain::new(&err).to_string();
assert!(
err.contains("can only have ")
&& err.contains(" internal DNS servers"),
err.contains("error allocating internal DNS"),
"unexpected error: {err}"
);
}
Expand Down Expand Up @@ -6606,10 +6590,18 @@ pub(crate) mod test {
//
// Ask for COCKROACHDB_REDUNDANCY cockroach nodes

// If this assertion breaks - which would be okay - we should delete all
// these planning steps explicitly including a base set of CRDB zones.
assert_eq!(
example.system.get_target_cockroachdb_zone_count(),
0,
"We expect the system is initialized without cockroach zones"
);
let mut input_builder = example.input.clone().into_builder();
input_builder.policy_mut().target_cockroachdb_zone_count =
COCKROACHDB_REDUNDANCY;
example.input = input_builder.build();
example.system.target_cockroachdb_zone_count(COCKROACHDB_REDUNDANCY);

let blueprint_name = "blueprint_with_cockroach";
let new_blueprint = Planner::new_based_on(
Expand Down
28 changes: 14 additions & 14 deletions nexus/reconfigurator/planning/src/planner/zone_safety.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ use nexus_types::deployment::CockroachdbUnsafeToShutdown;
use nexus_types::deployment::ZoneUnsafeToShutdown;
use nexus_types::inventory::Collection;
use omicron_common::api::external::Generation;
use omicron_common::policy::BOUNDARY_NTP_REDUNDANCY;
use omicron_common::policy::COCKROACHDB_REDUNDANCY;
use omicron_common::policy::INTERNAL_DNS_REDUNDANCY;
use omicron_uuid_kinds::OmicronZoneUuid;
use omicron_uuid_kinds::SledUuid;
use std::collections::BTreeMap;
Expand Down Expand Up @@ -216,11 +213,9 @@ impl<'a> ZoneSafetyChecksBuilder<'a> {
}
}

// TODO-correctness This should be looking at the input policy target
// redundancy, not the `BOUNDARY_NTP_REDUNDANCY` constant. (Same for the
// other redundancy checks in this file.)
// <https://github.com/oxidecomputer/omicron/issues/9220>
if synchronized_boundary_ntp_count < BOUNDARY_NTP_REDUNDANCY {
let target_boundary_ntp_zone_count =
self.blueprint.planning_input().target_boundary_ntp_zone_count();
if synchronized_boundary_ntp_count < target_boundary_ntp_zone_count {
return Some(ZoneUnsafeToShutdown::BoundaryNtp {
total_boundary_ntp_zones: self.boundary_ntp_zones.len(),
synchronized_count: synchronized_boundary_ntp_count,
Expand All @@ -236,10 +231,13 @@ impl<'a> ZoneSafetyChecksBuilder<'a> {
use CockroachdbUnsafeToShutdown::*;
use ZoneUnsafeToShutdown::Cockroachdb;

let target_cockroachdb_zone_count =
self.blueprint.planning_input().target_cockroachdb_zone_count();

// We must hear from all nodes
let all_statuses = &self.inventory.cockroach_status;

if all_statuses.len() < COCKROACHDB_REDUNDANCY {
if all_statuses.len() < target_cockroachdb_zone_count {
return Some(Cockroachdb { reason: NotEnoughNodes });
}

Expand All @@ -260,7 +258,7 @@ impl<'a> ZoneSafetyChecksBuilder<'a> {
let Some(live_nodes) = status.liveness_live_nodes else {
return Some(Cockroachdb { reason: MissingLiveNodesStat });
};
if live_nodes < COCKROACHDB_REDUNDANCY as u64 {
if live_nodes < target_cockroachdb_zone_count as u64 {
return Some(Cockroachdb {
reason: NotEnoughLiveNodes { live_nodes },
});
Expand Down Expand Up @@ -300,10 +298,12 @@ impl<'a> ZoneSafetyChecksBuilder<'a> {
// and restarts, at least one exists and can get the control plane back
// up and running.
//
// Our INTERNAL_DNS_REDUNDANCY factor is set so that we can tolerate "at
// least one upgrade, and at least one failure during that upgrade
// window".
if synchronized_internal_dns_count >= INTERNAL_DNS_REDUNDANCY {
// The target internal DNS zone count should be set so that we can
// tolerate "at least one upgrade, and at least one failure during that
// upgrade window" (e.g., it should be "at least 3" in production).
let target_internal_dns_zone_count =
self.blueprint.planning_input().target_internal_dns_zone_count();
if synchronized_internal_dns_count >= target_internal_dns_zone_count {
return None;
} else {
return Some(ZoneUnsafeToShutdown::InternalDns {
Expand Down
9 changes: 9 additions & 0 deletions nexus/reconfigurator/planning/src/system.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,15 @@ impl SystemDescription {
self.target_nexus_zone_count
}

pub fn target_cockroachdb_zone_count(&mut self, count: usize) -> &mut Self {
self.target_cockroachdb_zone_count = count;
self
}

pub fn get_target_cockroachdb_zone_count(&self) -> usize {
self.target_cockroachdb_zone_count
}

pub fn target_boundary_ntp_zone_count(
&mut self,
count: usize,
Expand Down
Loading