Skip to content

Commit da15ab2

Browse files
committed
Add Blippy::check_against_planning_input()
1 parent a78f456 commit da15ab2

File tree

3 files changed

+251
-3
lines changed

3 files changed

+251
-3
lines changed

nexus/reconfigurator/blippy/src/blippy.rs

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ use nexus_types::deployment::Blueprint;
1010
use nexus_types::deployment::BlueprintArtifactVersion;
1111
use nexus_types::deployment::BlueprintDatasetConfig;
1212
use nexus_types::deployment::BlueprintZoneConfig;
13+
use nexus_types::deployment::OmicronZoneExternalIp;
14+
use nexus_types::deployment::OmicronZoneNicEntry;
15+
use nexus_types::deployment::PlanningInput;
1316
use nexus_types::inventory::ZpoolName;
1417
use omicron_common::address::DnsSubnet;
1518
use omicron_common::address::Ipv6Subnet;
@@ -55,48 +58,55 @@ impl fmt::Display for Severity {
5558
pub enum Kind {
5659
Blueprint(BlueprintKind),
5760
Sled { sled_id: SledUuid, kind: Box<SledKind> },
61+
PlanningInput(PlanningInputKind),
5862
}
5963

6064
impl Kind {
6165
pub fn display_component(&self) -> impl fmt::Display + '_ {
6266
enum Component<'a> {
6367
Blueprint,
6468
Sled(&'a SledUuid),
69+
PlanningInput,
6570
}
6671

6772
impl fmt::Display for Component<'_> {
6873
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
6974
match self {
7075
Component::Blueprint => write!(f, "blueprint"),
7176
Component::Sled(id) => write!(f, "sled {id}"),
77+
Component::PlanningInput => write!(f, "planning input"),
7278
}
7379
}
7480
}
7581

7682
match self {
77-
Kind::Blueprint { .. } => Component::Blueprint,
83+
Kind::Blueprint(_) => Component::Blueprint,
7884
Kind::Sled { sled_id, .. } => Component::Sled(sled_id),
85+
Kind::PlanningInput(_) => Component::PlanningInput,
7986
}
8087
}
8188

8289
pub fn display_subkind(&self) -> impl fmt::Display + '_ {
8390
enum Subkind<'a> {
8491
Blueprint(&'a BlueprintKind),
8592
Sled(&'a SledKind),
93+
PlanningInput(&'a PlanningInputKind),
8694
}
8795

8896
impl fmt::Display for Subkind<'_> {
8997
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
9098
match self {
9199
Subkind::Blueprint(kind) => write!(f, "{kind}"),
92100
Subkind::Sled(kind) => write!(f, "{kind}"),
101+
Subkind::PlanningInput(kind) => write!(f, "{kind}"),
93102
}
94103
}
95104
}
96105

97106
match self {
98107
Kind::Blueprint(kind) => Subkind::Blueprint(kind),
99108
Kind::Sled { kind, .. } => Subkind::Sled(kind),
109+
Kind::PlanningInput(kind) => Subkind::PlanningInput(kind),
100110
}
101111
}
102112
}
@@ -480,6 +490,54 @@ impl fmt::Display for SledKind {
480490
}
481491
}
482492

493+
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
494+
pub enum PlanningInputKind {
495+
IpNotInBlueprint(OmicronZoneExternalIp),
496+
NicMacNotInBluperint(OmicronZoneNicEntry),
497+
NicIpNotInBlueprint(OmicronZoneNicEntry),
498+
NicWithUnknownOpteSubnet(OmicronZoneNicEntry),
499+
}
500+
501+
impl fmt::Display for PlanningInputKind {
502+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
503+
match self {
504+
PlanningInputKind::IpNotInBlueprint(ip) => {
505+
write!(
506+
f,
507+
"planning input contains an external IP \
508+
not described by the blueprint: {} ({})",
509+
ip.ip(),
510+
ip.id()
511+
)
512+
}
513+
PlanningInputKind::NicMacNotInBluperint(nic) => {
514+
write!(
515+
f,
516+
"planning input contains a NIC with a MAC address \
517+
not described by the blueprint: {} (NIC {} in zone {})",
518+
nic.nic.mac, nic.nic.id, nic.zone_id,
519+
)
520+
}
521+
PlanningInputKind::NicIpNotInBlueprint(nic) => {
522+
write!(
523+
f,
524+
"planning input contains a NIC with an IP address \
525+
not described by the blueprint: {} (NIC {} in zone {})",
526+
nic.nic.ip, nic.nic.id, nic.zone_id,
527+
)
528+
}
529+
PlanningInputKind::NicWithUnknownOpteSubnet(nic) => {
530+
write!(
531+
f,
532+
"planning input contains a NIC with an IP not in a known
533+
OPTE subnet: {} (NIC {} in zone {})",
534+
nic.nic.ip, nic.nic.id, nic.zone_id,
535+
)
536+
}
537+
}
538+
}
539+
}
540+
483541
impl Note {
484542
pub fn display(&self, sort_key: BlippyReportSortKey) -> NoteDisplay<'_> {
485543
NoteDisplay { note: self, sort_key }
@@ -530,6 +588,14 @@ impl<'a> Blippy<'a> {
530588
slf
531589
}
532590

591+
pub fn check_against_planning_input(
592+
mut self,
593+
input: &PlanningInput,
594+
) -> Self {
595+
checks::perform_planning_input_checks(&mut self, input);
596+
self
597+
}
598+
533599
pub fn blueprint(&self) -> &'a Blueprint {
534600
self.blueprint
535601
}
@@ -554,6 +620,14 @@ impl<'a> Blippy<'a> {
554620
});
555621
}
556622

623+
pub(crate) fn push_planning_input_note(
624+
&mut self,
625+
severity: Severity,
626+
kind: PlanningInputKind,
627+
) {
628+
self.notes.push(Note { severity, kind: Kind::PlanningInput(kind) });
629+
}
630+
557631
pub fn into_report(
558632
self,
559633
sort_key: BlippyReportSortKey,

nexus/reconfigurator/blippy/src/checks.rs

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use crate::blippy::Blippy;
66
use crate::blippy::BlueprintKind;
7+
use crate::blippy::PlanningInputKind;
78
use crate::blippy::Severity;
89
use crate::blippy::SledKind;
910
use nexus_sled_agent_shared::inventory::ZoneKind;
@@ -17,6 +18,7 @@ use nexus_types::deployment::BlueprintZoneDisposition;
1718
use nexus_types::deployment::BlueprintZoneImageSource;
1819
use nexus_types::deployment::BlueprintZoneType;
1920
use nexus_types::deployment::OmicronZoneExternalIp;
21+
use nexus_types::deployment::PlanningInput;
2022
use nexus_types::deployment::SledFilter;
2123
use nexus_types::deployment::blueprint_zone_type;
2224
use omicron_common::address::DnsSubnet;
@@ -31,8 +33,16 @@ use omicron_uuid_kinds::ZpoolUuid;
3133
use std::collections::BTreeMap;
3234
use std::collections::BTreeSet;
3335
use std::collections::btree_map::Entry;
36+
use std::net::IpAddr;
3437
use std::net::Ipv6Addr;
3538

39+
pub(crate) fn perform_planning_input_checks(
40+
blippy: &mut Blippy<'_>,
41+
input: &PlanningInput,
42+
) {
43+
check_planning_input_network_records_appear_in_blueprint(blippy, input);
44+
}
45+
3646
pub(crate) fn perform_all_blueprint_only_checks(blippy: &mut Blippy<'_>) {
3747
check_underlay_ips(blippy);
3848
check_external_networking(blippy);
@@ -2037,3 +2047,163 @@ mod tests {
20372047
logctx.cleanup_successful();
20382048
}
20392049
}
2050+
2051+
// For a given `PlanningInput` / `Blueprint` pair that could be passed to the
2052+
// planner, there should never be any external networking resources in the
2053+
// planning input (which is derived from the contents of CRDB) that we don't
2054+
// know about from the parent blueprint. It's possible a given planning
2055+
// iteration could see such a state if there have been intermediate changes made
2056+
// by other Nexus instances; e.g.,
2057+
//
2058+
// 1. Nexus A generates a `PlanningInput` by reading from CRDB
2059+
// 2. Nexus B executes on a target blueprint that removes IPs/NICs from
2060+
// CRDB
2061+
// 3. Nexus B regenerates a new blueprint and prunes the zone(s) associated
2062+
// with the IPs/NICs from step 2
2063+
// 4. Nexus B makes this new blueprint the target
2064+
// 5. Nexus A attempts to run planning with its `PlanningInput` from step 1 but
2065+
// the target blueprint from step 4; this will fail the following checks
2066+
// because the input contains records that were removed in step 3
2067+
//
2068+
// We do not need to handle this class of error; it's a transient failure that
2069+
// will clear itself up when Nexus A repeats its planning loop from the top and
2070+
// generates a new `PlanningInput`.
2071+
//
2072+
// There may still be database records corresponding to _expunged_ zones, but
2073+
// that's okay: it just means we haven't yet realized a blueprint where those
2074+
// zones are expunged. And those should should still be in the blueprint (not
2075+
// pruned) until their database records are cleaned up.
2076+
//
2077+
// It's also possible that there may be networking records in the database
2078+
// assigned to zones that have been expunged, and the blueprint uses those same
2079+
// records for new zones. This is also fine and expected, and is a similar case
2080+
// to the previous paragraph: a zone with networking resources was expunged, the
2081+
// database doesn't realize it yet, but can still move forward and make planning
2082+
// decisions that reuse those resources for new zones.
2083+
fn check_planning_input_network_records_appear_in_blueprint(
2084+
blippy: &mut Blippy<'_>,
2085+
input: &PlanningInput,
2086+
) {
2087+
use nexus_types::deployment::OmicronZoneExternalIp;
2088+
use omicron_common::address::DNS_OPTE_IPV4_SUBNET;
2089+
use omicron_common::address::DNS_OPTE_IPV6_SUBNET;
2090+
use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET;
2091+
use omicron_common::address::NEXUS_OPTE_IPV6_SUBNET;
2092+
use omicron_common::address::NTP_OPTE_IPV4_SUBNET;
2093+
use omicron_common::address::NTP_OPTE_IPV6_SUBNET;
2094+
use omicron_common::api::external::MacAddr;
2095+
2096+
let mut all_macs: BTreeSet<MacAddr> = BTreeSet::new();
2097+
let mut all_nexus_nic_ips: BTreeSet<IpAddr> = BTreeSet::new();
2098+
let mut all_boundary_ntp_nic_ips: BTreeSet<IpAddr> = BTreeSet::new();
2099+
let mut all_external_dns_nic_ips: BTreeSet<IpAddr> = BTreeSet::new();
2100+
let mut all_external_ips: BTreeSet<OmicronZoneExternalIp> = BTreeSet::new();
2101+
2102+
// Unlike the construction of the external IP allocator and existing IPs
2103+
// constructed above in `BuilderExternalNetworking::new()`, we do not
2104+
// check for duplicates here: we could very well see reuse of IPs
2105+
// between expunged zones or between expunged -> running zones.
2106+
for (_, z) in
2107+
blippy.blueprint().all_omicron_zones(BlueprintZoneDisposition::any)
2108+
{
2109+
let zone_type = &z.zone_type;
2110+
match zone_type {
2111+
BlueprintZoneType::BoundaryNtp(ntp) => {
2112+
all_boundary_ntp_nic_ips.insert(ntp.nic.ip);
2113+
}
2114+
BlueprintZoneType::Nexus(nexus) => {
2115+
all_nexus_nic_ips.insert(nexus.nic.ip);
2116+
}
2117+
BlueprintZoneType::ExternalDns(dns) => {
2118+
all_external_dns_nic_ips.insert(dns.nic.ip);
2119+
}
2120+
_ => (),
2121+
}
2122+
2123+
if let Some((external_ip, nic)) = zone_type.external_networking() {
2124+
// Ignore localhost (used by the test suite).
2125+
if !external_ip.ip().is_loopback() {
2126+
all_external_ips.insert(external_ip);
2127+
}
2128+
all_macs.insert(nic.mac);
2129+
}
2130+
}
2131+
for external_ip_entry in
2132+
input.network_resources().omicron_zone_external_ips()
2133+
{
2134+
// As above, ignore localhost (used by the test suite).
2135+
if external_ip_entry.ip.ip().is_loopback() {
2136+
continue;
2137+
}
2138+
if !all_external_ips.contains(&external_ip_entry.ip) {
2139+
blippy.push_planning_input_note(
2140+
Severity::Fatal,
2141+
PlanningInputKind::IpNotInBlueprint(external_ip_entry.ip),
2142+
);
2143+
}
2144+
}
2145+
for nic_entry in input.network_resources().omicron_zone_nics() {
2146+
if !all_macs.contains(&nic_entry.nic.mac) {
2147+
blippy.push_planning_input_note(
2148+
Severity::Fatal,
2149+
PlanningInputKind::NicMacNotInBluperint(nic_entry),
2150+
);
2151+
}
2152+
match nic_entry.nic.ip {
2153+
IpAddr::V4(ip) if NEXUS_OPTE_IPV4_SUBNET.contains(ip) => {
2154+
if !all_nexus_nic_ips.contains(&ip.into()) {
2155+
blippy.push_planning_input_note(
2156+
Severity::Fatal,
2157+
PlanningInputKind::NicIpNotInBlueprint(nic_entry),
2158+
);
2159+
}
2160+
}
2161+
IpAddr::V4(ip) if NTP_OPTE_IPV4_SUBNET.contains(ip) => {
2162+
if !all_boundary_ntp_nic_ips.contains(&ip.into()) {
2163+
blippy.push_planning_input_note(
2164+
Severity::Fatal,
2165+
PlanningInputKind::NicIpNotInBlueprint(nic_entry),
2166+
);
2167+
}
2168+
}
2169+
IpAddr::V4(ip) if DNS_OPTE_IPV4_SUBNET.contains(ip) => {
2170+
if !all_external_dns_nic_ips.contains(&ip.into()) {
2171+
blippy.push_planning_input_note(
2172+
Severity::Fatal,
2173+
PlanningInputKind::NicIpNotInBlueprint(nic_entry),
2174+
);
2175+
}
2176+
}
2177+
IpAddr::V6(ip) if NEXUS_OPTE_IPV6_SUBNET.contains(ip) => {
2178+
if !all_nexus_nic_ips.contains(&ip.into()) {
2179+
blippy.push_planning_input_note(
2180+
Severity::Fatal,
2181+
PlanningInputKind::NicIpNotInBlueprint(nic_entry),
2182+
);
2183+
}
2184+
}
2185+
IpAddr::V6(ip) if NTP_OPTE_IPV6_SUBNET.contains(ip) => {
2186+
if !all_boundary_ntp_nic_ips.contains(&ip.into()) {
2187+
blippy.push_planning_input_note(
2188+
Severity::Fatal,
2189+
PlanningInputKind::NicIpNotInBlueprint(nic_entry),
2190+
);
2191+
}
2192+
}
2193+
IpAddr::V6(ip) if DNS_OPTE_IPV6_SUBNET.contains(ip) => {
2194+
if !all_external_dns_nic_ips.contains(&ip.into()) {
2195+
blippy.push_planning_input_note(
2196+
Severity::Fatal,
2197+
PlanningInputKind::NicIpNotInBlueprint(nic_entry),
2198+
);
2199+
}
2200+
}
2201+
_ => {
2202+
blippy.push_planning_input_note(
2203+
Severity::Fatal,
2204+
PlanningInputKind::NicWithUnknownOpteSubnet(nic_entry),
2205+
);
2206+
}
2207+
}
2208+
}
2209+
}

nexus/types/src/deployment/network_resources.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,9 @@ pub struct OmicronZoneExternalSnatIp {
292292
///
293293
/// This is a slimmer `nexus_db_model::ServiceNetworkInterface` that only stores
294294
/// the fields necessary for blueprint planning.
295-
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
295+
#[derive(
296+
Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize,
297+
)]
296298
pub struct OmicronZoneNic {
297299
pub id: VnicUuid,
298300
pub mac: MacAddr,
@@ -337,7 +339,9 @@ impl TriHashItem for OmicronZoneExternalIpEntry {
337339
/// A pair of an Omicron zone ID and a network interface.
338340
///
339341
/// Part of [`OmicronZoneNetworkResources`].
340-
#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize)]
342+
#[derive(
343+
Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Deserialize, Serialize,
344+
)]
341345
pub struct OmicronZoneNicEntry {
342346
pub zone_id: OmicronZoneUuid,
343347
pub nic: OmicronZoneNic,

0 commit comments

Comments
 (0)