44
55use crate :: blippy:: Blippy ;
66use crate :: blippy:: BlueprintKind ;
7+ use crate :: blippy:: PlanningInputKind ;
78use crate :: blippy:: Severity ;
89use crate :: blippy:: SledKind ;
910use nexus_sled_agent_shared:: inventory:: ZoneKind ;
@@ -17,6 +18,7 @@ use nexus_types::deployment::BlueprintZoneDisposition;
1718use nexus_types:: deployment:: BlueprintZoneImageSource ;
1819use nexus_types:: deployment:: BlueprintZoneType ;
1920use nexus_types:: deployment:: OmicronZoneExternalIp ;
21+ use nexus_types:: deployment:: PlanningInput ;
2022use nexus_types:: deployment:: SledFilter ;
2123use nexus_types:: deployment:: blueprint_zone_type;
2224use omicron_common:: address:: DnsSubnet ;
@@ -31,8 +33,16 @@ use omicron_uuid_kinds::ZpoolUuid;
3133use std:: collections:: BTreeMap ;
3234use std:: collections:: BTreeSet ;
3335use std:: collections:: btree_map:: Entry ;
36+ use std:: net:: IpAddr ;
3437use std:: net:: Ipv6Addr ;
3538
39+ pub ( crate ) fn perform_planning_input_checks (
40+ blippy : & mut Blippy < ' _ > ,
41+ input : & PlanningInput ,
42+ ) {
43+ check_planning_input_network_records_appear_in_blueprint ( blippy, input) ;
44+ }
45+
3646pub ( crate ) fn perform_all_blueprint_only_checks ( blippy : & mut Blippy < ' _ > ) {
3747 check_underlay_ips ( blippy) ;
3848 check_external_networking ( blippy) ;
@@ -2037,3 +2047,163 @@ mod tests {
20372047 logctx. cleanup_successful ( ) ;
20382048 }
20392049}
2050+
2051+ // For a given `PlanningInput` / `Blueprint` pair that could be passed to the
2052+ // planner, there should never be any external networking resources in the
2053+ // planning input (which is derived from the contents of CRDB) that we don't
2054+ // know about from the parent blueprint. It's possible a given planning
2055+ // iteration could see such a state if there have been intermediate changes made
2056+ // by other Nexus instances; e.g.,
2057+ //
2058+ // 1. Nexus A generates a `PlanningInput` by reading from CRDB
2059+ // 2. Nexus B executes on a target blueprint that removes IPs/NICs from
2060+ // CRDB
2061+ // 3. Nexus B regenerates a new blueprint and prunes the zone(s) associated
2062+ // with the IPs/NICs from step 2
2063+ // 4. Nexus B makes this new blueprint the target
2064+ // 5. Nexus A attempts to run planning with its `PlanningInput` from step 1 but
2065+ // the target blueprint from step 4; this will fail the following checks
2066+ // because the input contains records that were removed in step 3
2067+ //
2068+ // We do not need to handle this class of error; it's a transient failure that
2069+ // will clear itself up when Nexus A repeats its planning loop from the top and
2070+ // generates a new `PlanningInput`.
2071+ //
2072+ // There may still be database records corresponding to _expunged_ zones, but
2073+ // that's okay: it just means we haven't yet realized a blueprint where those
2074+ // zones are expunged. And those should should still be in the blueprint (not
2075+ // pruned) until their database records are cleaned up.
2076+ //
2077+ // It's also possible that there may be networking records in the database
2078+ // assigned to zones that have been expunged, and the blueprint uses those same
2079+ // records for new zones. This is also fine and expected, and is a similar case
2080+ // to the previous paragraph: a zone with networking resources was expunged, the
2081+ // database doesn't realize it yet, but can still move forward and make planning
2082+ // decisions that reuse those resources for new zones.
2083+ fn check_planning_input_network_records_appear_in_blueprint (
2084+ blippy : & mut Blippy < ' _ > ,
2085+ input : & PlanningInput ,
2086+ ) {
2087+ use nexus_types:: deployment:: OmicronZoneExternalIp ;
2088+ use omicron_common:: address:: DNS_OPTE_IPV4_SUBNET ;
2089+ use omicron_common:: address:: DNS_OPTE_IPV6_SUBNET ;
2090+ use omicron_common:: address:: NEXUS_OPTE_IPV4_SUBNET ;
2091+ use omicron_common:: address:: NEXUS_OPTE_IPV6_SUBNET ;
2092+ use omicron_common:: address:: NTP_OPTE_IPV4_SUBNET ;
2093+ use omicron_common:: address:: NTP_OPTE_IPV6_SUBNET ;
2094+ use omicron_common:: api:: external:: MacAddr ;
2095+
2096+ let mut all_macs: BTreeSet < MacAddr > = BTreeSet :: new ( ) ;
2097+ let mut all_nexus_nic_ips: BTreeSet < IpAddr > = BTreeSet :: new ( ) ;
2098+ let mut all_boundary_ntp_nic_ips: BTreeSet < IpAddr > = BTreeSet :: new ( ) ;
2099+ let mut all_external_dns_nic_ips: BTreeSet < IpAddr > = BTreeSet :: new ( ) ;
2100+ let mut all_external_ips: BTreeSet < OmicronZoneExternalIp > = BTreeSet :: new ( ) ;
2101+
2102+ // Unlike the construction of the external IP allocator and existing IPs
2103+ // constructed above in `BuilderExternalNetworking::new()`, we do not
2104+ // check for duplicates here: we could very well see reuse of IPs
2105+ // between expunged zones or between expunged -> running zones.
2106+ for ( _, z) in
2107+ blippy. blueprint ( ) . all_omicron_zones ( BlueprintZoneDisposition :: any)
2108+ {
2109+ let zone_type = & z. zone_type ;
2110+ match zone_type {
2111+ BlueprintZoneType :: BoundaryNtp ( ntp) => {
2112+ all_boundary_ntp_nic_ips. insert ( ntp. nic . ip ) ;
2113+ }
2114+ BlueprintZoneType :: Nexus ( nexus) => {
2115+ all_nexus_nic_ips. insert ( nexus. nic . ip ) ;
2116+ }
2117+ BlueprintZoneType :: ExternalDns ( dns) => {
2118+ all_external_dns_nic_ips. insert ( dns. nic . ip ) ;
2119+ }
2120+ _ => ( ) ,
2121+ }
2122+
2123+ if let Some ( ( external_ip, nic) ) = zone_type. external_networking ( ) {
2124+ // Ignore localhost (used by the test suite).
2125+ if !external_ip. ip ( ) . is_loopback ( ) {
2126+ all_external_ips. insert ( external_ip) ;
2127+ }
2128+ all_macs. insert ( nic. mac ) ;
2129+ }
2130+ }
2131+ for external_ip_entry in
2132+ input. network_resources ( ) . omicron_zone_external_ips ( )
2133+ {
2134+ // As above, ignore localhost (used by the test suite).
2135+ if external_ip_entry. ip . ip ( ) . is_loopback ( ) {
2136+ continue ;
2137+ }
2138+ if !all_external_ips. contains ( & external_ip_entry. ip ) {
2139+ blippy. push_planning_input_note (
2140+ Severity :: Fatal ,
2141+ PlanningInputKind :: IpNotInBlueprint ( external_ip_entry. ip ) ,
2142+ ) ;
2143+ }
2144+ }
2145+ for nic_entry in input. network_resources ( ) . omicron_zone_nics ( ) {
2146+ if !all_macs. contains ( & nic_entry. nic . mac ) {
2147+ blippy. push_planning_input_note (
2148+ Severity :: Fatal ,
2149+ PlanningInputKind :: NicMacNotInBluperint ( nic_entry) ,
2150+ ) ;
2151+ }
2152+ match nic_entry. nic . ip {
2153+ IpAddr :: V4 ( ip) if NEXUS_OPTE_IPV4_SUBNET . contains ( ip) => {
2154+ if !all_nexus_nic_ips. contains ( & ip. into ( ) ) {
2155+ blippy. push_planning_input_note (
2156+ Severity :: Fatal ,
2157+ PlanningInputKind :: NicIpNotInBlueprint ( nic_entry) ,
2158+ ) ;
2159+ }
2160+ }
2161+ IpAddr :: V4 ( ip) if NTP_OPTE_IPV4_SUBNET . contains ( ip) => {
2162+ if !all_boundary_ntp_nic_ips. contains ( & ip. into ( ) ) {
2163+ blippy. push_planning_input_note (
2164+ Severity :: Fatal ,
2165+ PlanningInputKind :: NicIpNotInBlueprint ( nic_entry) ,
2166+ ) ;
2167+ }
2168+ }
2169+ IpAddr :: V4 ( ip) if DNS_OPTE_IPV4_SUBNET . contains ( ip) => {
2170+ if !all_external_dns_nic_ips. contains ( & ip. into ( ) ) {
2171+ blippy. push_planning_input_note (
2172+ Severity :: Fatal ,
2173+ PlanningInputKind :: NicIpNotInBlueprint ( nic_entry) ,
2174+ ) ;
2175+ }
2176+ }
2177+ IpAddr :: V6 ( ip) if NEXUS_OPTE_IPV6_SUBNET . contains ( ip) => {
2178+ if !all_nexus_nic_ips. contains ( & ip. into ( ) ) {
2179+ blippy. push_planning_input_note (
2180+ Severity :: Fatal ,
2181+ PlanningInputKind :: NicIpNotInBlueprint ( nic_entry) ,
2182+ ) ;
2183+ }
2184+ }
2185+ IpAddr :: V6 ( ip) if NTP_OPTE_IPV6_SUBNET . contains ( ip) => {
2186+ if !all_boundary_ntp_nic_ips. contains ( & ip. into ( ) ) {
2187+ blippy. push_planning_input_note (
2188+ Severity :: Fatal ,
2189+ PlanningInputKind :: NicIpNotInBlueprint ( nic_entry) ,
2190+ ) ;
2191+ }
2192+ }
2193+ IpAddr :: V6 ( ip) if DNS_OPTE_IPV6_SUBNET . contains ( ip) => {
2194+ if !all_external_dns_nic_ips. contains ( & ip. into ( ) ) {
2195+ blippy. push_planning_input_note (
2196+ Severity :: Fatal ,
2197+ PlanningInputKind :: NicIpNotInBlueprint ( nic_entry) ,
2198+ ) ;
2199+ }
2200+ }
2201+ _ => {
2202+ blippy. push_planning_input_note (
2203+ Severity :: Fatal ,
2204+ PlanningInputKind :: NicWithUnknownOpteSubnet ( nic_entry) ,
2205+ ) ;
2206+ }
2207+ }
2208+ }
2209+ }
0 commit comments