diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 3a89d0273c1..0caf3680baf 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -13,7 +13,7 @@ use crate::db; use crate::db::identity::Resource; use crate::db::lookup::LookupPath; use crate::db::model::Name; -use crate::db::subnet_allocation::NetworkInterfaceError; +use crate::db::queries::network_interface::NetworkInterfaceError; use crate::external_api::params; use omicron_common::api::external; use omicron_common::api::external::CreateResult; diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index ca235812bc6..d3389881a17 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -9,6 +9,7 @@ use crate::app::{MAX_DISKS_PER_INSTANCE, MAX_NICS_PER_INSTANCE}; use crate::context::OpContext; use crate::db::identity::Resource; use crate::db::lookup::LookupPath; +use crate::db::queries::network_interface::NetworkInterfaceError; use crate::external_api::params; use crate::saga_interface::SagaContext; use crate::{authn, authz, db}; @@ -331,7 +332,6 @@ async fn sic_create_custom_network_interfaces( ) .await; - use crate::db::subnet_allocation::NetworkInterfaceError; match result { Ok(_) => Ok(()), @@ -432,7 +432,7 @@ async fn sic_create_default_network_interface( interface, ) .await - .map_err(db::subnet_allocation::NetworkInterfaceError::into_external) + .map_err(NetworkInterfaceError::into_external) .map_err(ActionError::action_failed)?; Ok(()) } diff --git a/nexus/src/app/vpc.rs b/nexus/src/app/vpc.rs index 10adadb3e52..446cacd1067 100644 --- a/nexus/src/app/vpc.rs +++ b/nexus/src/app/vpc.rs @@ -10,7 +10,7 @@ use crate::db; use crate::db::lookup::LookupPath; use crate::db::model::Name; use crate::db::model::VpcRouterKind; -use crate::db::subnet_allocation::SubnetError; +use crate::db::queries::vpc_subnet::SubnetError; use crate::defaults; use crate::external_api::params; use omicron_common::api::external; diff --git a/nexus/src/app/vpc_subnet.rs b/nexus/src/app/vpc_subnet.rs index c996ae485f7..84d7f458ee8 100644 --- a/nexus/src/app/vpc_subnet.rs +++ b/nexus/src/app/vpc_subnet.rs @@ -11,7 +11,7 @@ use crate::db::identity::Resource; use crate::db::lookup::LookupPath; use crate::db::model::Name; use crate::db::model::VpcSubnet; -use crate::db::subnet_allocation::SubnetError; +use crate::db::queries::vpc_subnet::SubnetError; use crate::defaults; use crate::external_api::params; use omicron_common::api::external; diff --git a/nexus/src/db/datastore.rs b/nexus/src/db/datastore.rs index 81ba8a86f52..617fe1863d2 100644 --- a/nexus/src/db/datastore.rs +++ b/nexus/src/db/datastore.rs @@ -33,6 +33,10 @@ use crate::db::fixed_data::role_builtin::BUILTIN_ROLES; use crate::db::fixed_data::silo::{DEFAULT_SILO, SILO_ID}; use crate::db::lookup::LookupPath; use crate::db::model::DatabaseString; +use crate::db::queries::network_interface::InsertNetworkInterfaceQuery; +use crate::db::queries::network_interface::NetworkInterfaceError; +use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; +use crate::db::queries::vpc_subnet::SubnetError; use crate::db::{ self, error::{public_error_from_diesel_pool, ErrorHandler, TransactionError}, @@ -49,10 +53,6 @@ use crate::db::{ }, pagination::paginated, pagination::paginated_multicolumn, - subnet_allocation::FilterConflictingVpcSubnetRangesQuery, - subnet_allocation::InsertNetworkInterfaceQuery, - subnet_allocation::NetworkInterfaceError, - subnet_allocation::SubnetError, update_and_check::{UpdateAndCheck, UpdateStatus}, }; use crate::external_api::{params, shared}; diff --git a/nexus/src/db/mod.rs b/nexus/src/db/mod.rs index 1ea4d28bb05..50f69fab08f 100644 --- a/nexus/src/db/mod.rs +++ b/nexus/src/db/mod.rs @@ -18,10 +18,12 @@ pub mod ipv6; pub mod lookup; mod pagination; mod pool; +// This is marked public because the error types are used elsewhere, e.g., in +// sagas. +pub(crate) mod queries; mod saga_recovery; mod saga_types; mod sec_store; -pub(crate) mod subnet_allocation; mod update_and_check; #[cfg(test)] diff --git a/nexus/src/db/queries/mod.rs b/nexus/src/db/queries/mod.rs new file mode 100644 index 00000000000..3fef3d8028e --- /dev/null +++ b/nexus/src/db/queries/mod.rs @@ -0,0 +1,12 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Specialized queries for inserting database records, usually to maintain +//! complex invariants that are most accurately expressed in a single query. + +#[macro_use] +mod next_item; +pub mod network_interface; +pub mod vni; +pub mod vpc_subnet; diff --git a/nexus/src/db/subnet_allocation.rs b/nexus/src/db/queries/network_interface.rs similarity index 68% rename from nexus/src/db/subnet_allocation.rs rename to nexus/src/db/queries/network_interface.rs index 615f928ee93..cb312c516de 100644 --- a/nexus/src/db/subnet_allocation.rs +++ b/nexus/src/db/queries/network_interface.rs @@ -2,415 +2,29 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Diesel queries used for subnet and IP allocation +//! Query for inserting a guest network interface. use crate::app::MAX_NICS_PER_INSTANCE; use crate::db; -use crate::db::identity::Resource; -use crate::db::model::{IncompleteNetworkInterface, VpcSubnet}; -use chrono::{DateTime, Utc}; +use crate::db::model::IncompleteNetworkInterface; +use crate::db::queries::next_item::NextItem; +use crate::defaults::NUM_INITIAL_RESERVED_IP_ADDRESSES; +use chrono::DateTime; +use chrono::Utc; use diesel::pg::Pg; -use diesel::prelude::*; -use diesel::query_builder::*; +use diesel::prelude::Column; +use diesel::query_builder::AstPass; +use diesel::query_builder::QueryFragment; +use diesel::query_builder::QueryId; use diesel::sql_types; +use diesel::Insertable; +use diesel::QueryResult; use ipnetwork::IpNetwork; +use ipnetwork::Ipv4Network; use omicron_common::api::external; -use std::convert::TryFrom; +use std::net::IpAddr; use uuid::Uuid; -// Helper to return the offset of the last valid/allocatable IP in a subnet. -fn generate_last_address_offset(subnet: &ipnetwork::IpNetwork) -> i64 { - // Generate last address in the range. - // - // NOTE: First subtraction is to convert from the subnet size to an - // offset, since `generate_series` is inclusive of the last value. - // Example: 256 -> 255. - let last_address_offset = match subnet { - ipnetwork::IpNetwork::V4(network) => network.size() as i64 - 1, - ipnetwork::IpNetwork::V6(network) => { - // If we're allocating from a v6 subnet with more than 2^63 - 1 - // addresses, just cap the size we'll explore. This will never - // fail in practice since we're never going to be storing 2^64 - // rows in the network_interface table. - i64::try_from(network.size() - 1).unwrap_or(i64::MAX) - } - }; - - // This subtraction is because the last address in a subnet is - // explicitly reserved for Oxide use. - last_address_offset - 1 -} - -/// Errors related to allocating VPC Subnets. -#[derive(Debug, PartialEq)] -pub enum SubnetError { - /// An IPv4 or IPv6 subnet overlaps with an existing VPC Subnet - OverlappingIpRange(ipnetwork::IpNetwork), - /// An other error - External(external::Error), -} - -impl SubnetError { - /// Construct a `SubnetError` from a Diesel error, catching the desired - /// cases and building useful errors. - pub fn from_pool( - e: async_bb8_diesel::PoolError, - subnet: &VpcSubnet, - ) -> Self { - use crate::db::error; - use async_bb8_diesel::ConnectionError; - use async_bb8_diesel::PoolError; - use diesel::result::DatabaseErrorKind; - use diesel::result::Error; - const IPV4_OVERLAP_ERROR_MESSAGE: &str = - r#"null value in column "ipv4_block" violates not-null constraint"#; - const IPV6_OVERLAP_ERROR_MESSAGE: &str = - r#"null value in column "ipv6_block" violates not-null constraint"#; - const NAME_CONFLICT_CONSTRAINT: &str = "vpc_subnet_vpc_id_name_key"; - match e { - // Attempt to insert overlapping IPv4 subnet - PoolError::Connection(ConnectionError::Query( - Error::DatabaseError( - DatabaseErrorKind::NotNullViolation, - ref info, - ), - )) if info.message() == IPV4_OVERLAP_ERROR_MESSAGE => { - SubnetError::OverlappingIpRange(subnet.ipv4_block.0 .0.into()) - } - - // Attempt to insert overlapping IPv6 subnet - PoolError::Connection(ConnectionError::Query( - Error::DatabaseError( - DatabaseErrorKind::NotNullViolation, - ref info, - ), - )) if info.message() == IPV6_OVERLAP_ERROR_MESSAGE => { - SubnetError::OverlappingIpRange(subnet.ipv6_block.0 .0.into()) - } - - // Conflicting name for the subnet within a VPC - PoolError::Connection(ConnectionError::Query( - Error::DatabaseError( - DatabaseErrorKind::UniqueViolation, - ref info, - ), - )) if info.constraint_name() == Some(NAME_CONFLICT_CONSTRAINT) => { - SubnetError::External(error::public_error_from_diesel_pool( - e, - error::ErrorHandler::Conflict( - external::ResourceType::VpcSubnet, - subnet.identity().name.as_str(), - ), - )) - } - - // Any other error at all is a bug - _ => SubnetError::External(error::public_error_from_diesel_pool( - e, - error::ErrorHandler::Server, - )), - } - } - - /// Convert into a public error - pub fn into_external(self) -> external::Error { - match self { - SubnetError::OverlappingIpRange(ip) => { - external::Error::invalid_request( - format!("IP address range '{}' conflicts with an existing subnet", ip).as_str() - ) - }, - SubnetError::External(e) => e, - } - } -} - -/// Generate a subquery that selects any overlapping address ranges of the same -/// type as the input IP subnet. -/// -/// This generates a query that, in full, looks like: -/// -/// ```sql -/// SELECT -/// -/// FROM -/// vpc_subnet -/// WHERE -/// vpc_id = AND -/// time_deleted IS NULL AND -/// inet_contains_or_equals(ipv*_block, ) -/// LIMIT 1 -/// ``` -/// -/// The input may be either an IPv4 or IPv6 subnet, and the corresponding column -/// is compared against. Note that the exact input IP range is returned on -/// purpose. -fn push_select_overlapping_ip_range<'a>( - mut out: AstPass<'_, 'a, Pg>, - vpc_id: &'a Uuid, - ip: &'a ipnetwork::IpNetwork, -) -> diesel::QueryResult<()> { - use crate::db::schema::vpc_subnet::dsl; - out.push_sql("SELECT "); - out.push_bind_param::(ip)?; - out.push_sql(" FROM "); - VPC_SUBNET_FROM_CLAUSE.walk_ast(out.reborrow())?; - out.push_sql(" WHERE "); - out.push_identifier(dsl::vpc_id::NAME)?; - out.push_sql(" = "); - out.push_bind_param::(vpc_id)?; - out.push_sql(" AND "); - out.push_identifier(dsl::time_deleted::NAME)?; - out.push_sql(" IS NULL AND inet_contains_or_equals("); - if ip.is_ipv4() { - out.push_identifier(dsl::ipv4_block::NAME)?; - } else { - out.push_identifier(dsl::ipv6_block::NAME)?; - } - out.push_sql(", "); - out.push_bind_param::(ip)?; - out.push_sql(")"); - Ok(()) -} - -/// Generate a subquery that returns NULL if there is an overlapping IP address -/// range of any type. -/// -/// This specifically generates a query that looks like: -/// -/// ```sql -/// SELECT NULLIF( -/// , -/// push_select_overlapping_ip_range(, ) -/// ) -/// ``` -/// -/// The `NULLIF` function returns NULL if those two expressions are equal, and -/// the first expression otherwise. That is, this returns NULL if there exists -/// an overlapping IP range already in the VPC Subnet table, and the requested -/// IP range if not. -fn push_null_if_overlapping_ip_range<'a>( - mut out: AstPass<'_, 'a, Pg>, - vpc_id: &'a Uuid, - ip: &'a ipnetwork::IpNetwork, -) -> diesel::QueryResult<()> { - out.push_sql("SELECT NULLIF("); - out.push_bind_param::(ip)?; - out.push_sql(", ("); - push_select_overlapping_ip_range(out.reborrow(), vpc_id, ip)?; - out.push_sql("))"); - Ok(()) -} - -/// Generate a CTE that can be used to insert a VPC Subnet, only if the IP -/// address ranges of that subnet don't overlap with existing Subnets in the -/// same VPC. -/// -/// In particular, this generates a CTE like so: -/// -/// ```sql -/// WITH candidate( -/// id, -/// name, -/// description, -/// time_created, -/// time_modified, -/// time_deleted, -/// vpc_id -/// ) AS (VALUES ( -/// , -/// , -/// , -/// , -/// , -/// NULL::TIMESTAMPTZ, -/// , -/// )), -/// candidate_ipv4(ipv4_block) AS ( -/// SELECT( -/// NULLIF( -/// , -/// ( -/// SELECT -/// ipv4_block -/// FROM -/// vpc_subnet -/// WHERE -/// vpc_id = AND -/// time_deleted IS NULL AND -/// inet_contains_or_equals(, ipv4_block) -/// LIMIT 1 -/// ) -/// ) -/// ) -/// ), -/// candidate_ipv6(ipv6_block) AS ( -/// -/// ) -/// SELECT * -/// FROM candidate, candidate_ipv4, candidate_ipv6 -/// ``` -pub struct FilterConflictingVpcSubnetRangesQuery { - subnet: VpcSubnet, - - // The following fields are derived from the previous field. This begs the - // question: "Why bother storing them at all?" - // - // Diesel's [`diesel::query_builder::ast_pass::AstPass:push_bind_param`] method - // requires that the provided value now live as long as the entire AstPass - // type. By storing these values in the struct, they'll live at least as - // long as the entire call to [`QueryFragment::walk_ast`]. - ipv4_block: ipnetwork::IpNetwork, - ipv6_block: ipnetwork::IpNetwork, -} - -impl FilterConflictingVpcSubnetRangesQuery { - pub fn new(subnet: VpcSubnet) -> Self { - let ipv4_block = ipnetwork::IpNetwork::from(subnet.ipv4_block.0 .0); - let ipv6_block = ipnetwork::IpNetwork::from(subnet.ipv6_block.0 .0); - Self { subnet, ipv4_block, ipv6_block } - } -} - -impl QueryId for FilterConflictingVpcSubnetRangesQuery { - type QueryId = (); - const HAS_STATIC_QUERY_ID: bool = false; -} - -impl QueryFragment for FilterConflictingVpcSubnetRangesQuery { - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - use db::schema::vpc_subnet::dsl; - - // Create the base `candidate` from values provided that need no - // verificiation. - out.push_sql("SELECT * FROM (WITH candidate("); - out.push_identifier(dsl::id::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::name::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::description::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::time_created::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::time_modified::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::time_deleted::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::vpc_id::NAME)?; - out.push_sql(") AS (VALUES ("); - out.push_bind_param::(&self.subnet.identity.id)?; - out.push_sql(", "); - out.push_bind_param::( - &self.subnet.name(), - )?; - out.push_sql(", "); - out.push_bind_param::( - &self.subnet.identity.description, - )?; - out.push_sql(", "); - out.push_bind_param::>( - &self.subnet.identity.time_created, - )?; - out.push_sql(", "); - out.push_bind_param::>( - &self.subnet.identity.time_modified, - )?; - out.push_sql(", "); - out.push_sql("NULL::TIMESTAMPTZ, "); - out.push_bind_param::(&self.subnet.vpc_id)?; - out.push_sql(")), "); - - // Push the candidate IPv4 and IPv6 selection subqueries, which return - // NULL if the corresponding address range overlaps. - out.push_sql("candidate_ipv4("); - out.push_identifier(dsl::ipv4_block::NAME)?; - out.push_sql(") AS ("); - push_null_if_overlapping_ip_range( - out.reborrow(), - &self.subnet.vpc_id, - &self.ipv4_block, - )?; - - out.push_sql("), candidate_ipv6("); - out.push_identifier(dsl::ipv6_block::NAME)?; - out.push_sql(") AS ("); - push_null_if_overlapping_ip_range( - out.reborrow(), - &self.subnet.vpc_id, - &self.ipv6_block, - )?; - out.push_sql(") "); - - // Select the entire set of candidate columns. - out.push_sql( - "SELECT * FROM candidate, candidate_ipv4, candidate_ipv6)", - ); - Ok(()) - } -} - -impl Insertable - for FilterConflictingVpcSubnetRangesQuery -{ - type Values = FilterConflictingVpcSubnetRangesQueryValues; - - fn values(self) -> Self::Values { - FilterConflictingVpcSubnetRangesQueryValues(self) - } -} - -/// Used to allow inserting the result of the -/// `FilterConflictingVpcSubnetRangesQuery`, as in -/// `diesel::insert_into(foo).values(_). Should not be used directly. -pub struct FilterConflictingVpcSubnetRangesQueryValues( - pub FilterConflictingVpcSubnetRangesQuery, -); - -impl QueryId for FilterConflictingVpcSubnetRangesQueryValues { - type QueryId = (); - const HAS_STATIC_QUERY_ID: bool = false; -} - -impl diesel::insertable::CanInsertInSingleQuery - for FilterConflictingVpcSubnetRangesQueryValues -{ - fn rows_to_insert(&self) -> Option { - Some(1) - } -} - -impl QueryFragment for FilterConflictingVpcSubnetRangesQueryValues { - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - use db::schema::vpc_subnet::dsl; - out.push_sql("("); - out.push_identifier(dsl::id::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::name::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::description::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::time_created::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::time_modified::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::time_deleted::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::vpc_id::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::ipv4_block::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::ipv6_block::NAME)?; - out.push_sql(") "); - self.0.walk_ast(out) - } -} - /// Errors related to inserting or attaching a NetworkInterface #[derive(Debug)] pub enum NetworkInterfaceError { @@ -642,6 +256,152 @@ fn decode_database_error( } } +// Helper to return the offset of the last valid/allocatable IP in a subnet. +// Note that this is the offset from the _first available address_, not the +// network address. +fn last_address_offset(subnet: &IpNetwork) -> u32 { + // Generate last address in the range. + // + // NOTE: First subtraction is to convert from the subnet size to an + // offset, since `generate_series` is inclusive of the last value. + // Example: 256 -> 255. + let last_address_offset = match subnet { + IpNetwork::V4(network) => network.size() - 1, + IpNetwork::V6(network) => { + // TODO-robustness: IPv6 subnets are always /64s, so in theory we + // could require searching all ~2^64 items for the next address. + // That won't happen in practice, because there will be other limits + // on the number of IPs (such as MAC addresses, or just project + // accounting limits). However, we should update this to be the + // actual maximum size we expect or want to support, once we get a + // better sense of what that is. + u32::try_from(network.size() - 1).unwrap_or(u32::MAX - 1) + } + }; + + // This subtraction is because the last address in a subnet is + // explicitly reserved for Oxide use. + last_address_offset + .checked_sub(1 + NUM_INITIAL_RESERVED_IP_ADDRESSES as u32) + .unwrap_or_else(|| panic!("Unexpectedly small IP subnet: '{}'", subnet)) +} + +// Return the first available address in a subnet. This is not the network +// address, since Oxide reserves the first few addresses. +fn first_available_address(subnet: &IpNetwork) -> IpAddr { + match subnet { + IpNetwork::V4(network) => network + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as _) + .unwrap_or_else(|| { + panic!("Unexpectedly small IPv4 subnetwork: '{}'", network) + }) + .into(), + IpNetwork::V6(network) => { + // TODO-performance: This is unfortunate. `ipnetwork` implements a + // direct addition-based approach for IPv4 but not IPv6. This will + // loop, which, while it may not matter much, can be nearly + // trivially avoided by converting to u128, adding, and converting + // back. Given that these spaces can be _really_ big, that is + // probably worth doing. + network + .iter() + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as _) + .unwrap_or_else(|| { + panic!("Unexpectedly small IPv6 subnetwork: '{}'", network) + }) + .into() + } + } +} + +/// The `NextGuestIpv4Address` query is a `NextItem` query for choosing the next +/// available IPv4 address for a guest interface. +#[derive(Debug, Clone, Copy)] +pub struct NextGuestIpv4Address { + inner: NextItem< + db::schema::network_interface::table, + IpNetwork, + db::schema::network_interface::dsl::ip, + Uuid, + db::schema::network_interface::dsl::subnet_id, + >, +} + +impl NextGuestIpv4Address { + pub fn new(subnet: Ipv4Network, subnet_id: Uuid) -> Self { + let subnet = IpNetwork::from(subnet); + let net = IpNetwork::from(first_available_address(&subnet)); + let max_offset = last_address_offset(&subnet); + Self { inner: NextItem::new_scoped(net, subnet_id, max_offset) } + } +} + +delegate_query_fragment_impl!(NextGuestIpv4Address); + +/// A `[NextItem`] subquery that selects the next empty slot for an interface. +/// +/// Instances are limited to 8 interfaces (RFD 135). This pushes a subquery that +/// looks like: +/// +/// ```sql +/// SELECT COALESCE(( +/// SELECT +/// next_slot +/// FROM +/// generate_series(0, ) +/// AS +/// next_slot +/// LEFT OUTER JOIN +/// network_interface +/// ON +/// (instance_id, time_deleted IS NULL, slot) = +/// (, TRUE, next_slot) +/// WHERE +/// slot IS NULL +/// LIMIT 1) +/// ), 0) +/// ``` +/// +/// That is, we select the lowest slot that has not yet been claimed by an +/// interface on this instance, or zero if there is no such instance at all. +/// +/// Errors +/// ------ +/// +/// Note that the `generate_series` function is inclusive of its upper bound. +/// We intentionally use the upper bound of the maximum number of NICs per +/// instance. In the case where there are no available slots (the current max +/// slot number is 7), this query will return 8. However, this violates the +/// check on the slot column being between `[0, 8)`. This check violation is +/// used to detect the case when there are no slots available. +#[derive(Debug, Clone, Copy)] +pub struct NextNicSlot { + inner: NextItem< + db::schema::network_interface::table, + i16, + db::schema::network_interface::dsl::slot, + Uuid, + db::schema::network_interface::dsl::instance_id, + >, +} + +impl NextNicSlot { + pub fn new(instance_id: Uuid) -> Self { + Self { + inner: NextItem::new_scoped(0, instance_id, MAX_NICS_PER_INSTANCE), + } + } +} + +impl QueryFragment for NextNicSlot { + fn walk_ast<'a>(&'a self, mut out: AstPass<'_, 'a, Pg>) -> QueryResult<()> { + out.push_sql("SELECT COALESCE(("); + self.inner.walk_ast(out.reborrow())?; + out.push_sql("), 0)"); + Ok(()) + } +} + /// Add a subquery intended to verify that an Instance's networking does not /// span multiple VPCs. /// @@ -744,81 +504,6 @@ fn push_ensure_unique_vpc_expression<'a>( Ok(()) } -/// Push a subquery that selects the next available IP address from a subnet. -/// -/// This adds a subquery like: -/// -/// ```sql -/// SELECT -/// + address_offset AS ip -/// FROM -/// generate_series(5, ) AS address_offset -/// LEFT OUTER JOIN -/// network_interface -/// ON -/// (subnet_id, ip, time_deleted IS NULL) = -/// ( + address_offset, TRUE) -/// WHERE ip IS NULL LIMIT 1 -/// ``` -/// -/// This is a linear, sequential scan for an IP from the subnet that's not yet -/// been allocated. We'd ultimately like a better-performing allocation -/// strategy; for example, we might be able to keep the lowest unallocated -/// address for each subnet, and atomically return and increment that. -/// -/// This would work fine, but explicit reservations of IP addresses complicate -/// the picture. We'd need a more complex data structure to manage the ranges of -/// available address for each subnet, especially to manage coalescing those -/// ranges as addresses are released back to the pool. -fn push_select_next_available_ip_subquery<'a>( - mut out: AstPass<'_, 'a, Pg>, - query: &'a InsertNetworkInterfaceQuery, -) -> diesel::QueryResult<()> { - use db::schema::network_interface::dsl; - out.push_sql("SELECT "); - out.push_bind_param::( - &query.network_address_v4_sql, - )?; - out.push_sql(" + "); - out.push_identifier("address_offset")?; - out.push_sql(" AS "); - out.push_identifier(dsl::ip::NAME)?; - - out.push_sql( - format!( - " FROM generate_series({}, ", - crate::defaults::NUM_INITIAL_RESERVED_IP_ADDRESSES - ) - .as_str(), - ); - out.push_bind_param::(&query.last_address_offset_v4)?; - out.push_sql(") AS "); - out.push_identifier("address_offset")?; - out.push_sql(" LEFT OUTER JOIN "); - NETWORK_INTERFACE_FROM_CLAUSE.walk_ast(out.reborrow())?; - out.push_sql(" ON ("); - out.push_identifier(dsl::subnet_id::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::ip::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::time_deleted::NAME)?; - out.push_sql(" IS NULL) = ("); - out.push_bind_param::( - &query.interface.subnet.identity.id, - )?; - out.push_sql(", "); - out.push_bind_param::( - &query.network_address_v4_sql, - )?; - out.push_sql(" + "); - out.push_identifier("address_offset")?; - out.push_sql(", TRUE) "); - out.push_sql("WHERE "); - out.push_identifier(dsl::ip::NAME)?; - out.push_sql(" IS NULL LIMIT 1"); - Ok(()) -} - /// Subquery used to insert a _new_ `NetworkInterface` from parameters. /// /// This function is used to construct a query that allows inserting a @@ -854,9 +539,9 @@ fn push_select_next_available_ip_subquery<'a>( /// /// If the user wants an address allocated, then this generates a subquery that /// tries to find the next available IP address (if any). See -/// [`push_select_next_available_ip_subquery`] for details on that allocation -/// subquery. If that fails, due to address exhaustion, this is detected and -/// forwarded to the caller. +/// [`NextGuestIpv4Address`] for details on that allocation subquery. If that +/// fails, due to address exhaustion, this is detected and forwarded to the +/// caller. /// /// Errors /// ------ @@ -979,7 +664,7 @@ fn push_interface_allocation_subquery<'a>( out.push_bind_param::(ip)?; } else { out.push_sql("("); - push_select_next_available_ip_subquery(out.reborrow(), &query)?; + query.next_ipv4_address_subquery.walk_ast(out.reborrow())?; out.push_sql(")"); } out.push_sql(" AS "); @@ -989,77 +674,13 @@ fn push_interface_allocation_subquery<'a>( // interface, including validating that there are available slots on the // instance. out.push_sql(", ("); - push_select_next_available_nic_slot_query( - out.reborrow(), - &query.interface.instance_id, - )?; + query.next_slot_subquery.walk_ast(out.reborrow())?; out.push_sql(") AS "); out.push_identifier(dsl::slot::NAME)?; Ok(()) } -/// Push a subquery that selects the next empty slot for an interface. -/// -/// Instances are limited to 8 interfaces (RFD 135). This pushes a subquery that -/// looks like: -/// -/// ```sql -/// SELECT COALESCE(( -/// SELECT -/// next_slot -/// FROM -/// generate_series(0, ) -/// AS -/// next_slot -/// LEFT OUTER JOIN -/// network_interface -/// ON -/// (instance_id, time_deleted IS NULL, slot) = -/// (, TRUE, next_slot) -/// WHERE -/// slot IS NULL -/// LIMIT 1) -/// ), 0) -/// ``` -/// -/// That is, we select the lowest slot that has not yet been claimed by an -/// interface on this instance, or zero if there is no such instance at all. -/// -/// Errors -/// ------ -/// -/// Note that the `generate_series` function is inclusive of its upper bound. -/// We intentionally use the upper bound of the maximum number of NICs per -/// instance. In the case where there are no available slots (the current max -/// slot number is 7), this query will return 8. However, this violates the -/// check on the slot column being between `[0, 8)`. This check violation is -/// used to detect the case when there are no slots available. -fn push_select_next_available_nic_slot_query<'a>( - mut out: AstPass<'_, 'a, Pg>, - instance_id: &'a Uuid, -) -> QueryResult<()> { - use db::schema::network_interface::dsl; - out.push_sql(&format!( - "SELECT COALESCE((SELECT next_slot FROM generate_series(0, {}) ", - MAX_NICS_PER_INSTANCE, - )); - out.push_sql("AS next_slot LEFT OUTER JOIN "); - NETWORK_INTERFACE_FROM_CLAUSE.walk_ast(out.reborrow())?; - out.push_sql(" ON ("); - out.push_identifier(dsl::instance_id::NAME)?; - out.push_sql(", "); - out.push_identifier(dsl::time_deleted::NAME)?; - out.push_sql(" IS NULL, "); - out.push_identifier(dsl::slot::NAME)?; - out.push_sql(") = ("); - out.push_bind_param::(instance_id)?; - out.push_sql(", TRUE, next_slot) WHERE "); - out.push_identifier(dsl::slot::NAME)?; - out.push_sql(" IS NULL LIMIT 1), 0)"); - Ok(()) -} - /// Type used to insert conditionally insert a network interface. /// /// This type implements a query that does one of two things @@ -1141,29 +762,31 @@ pub struct InsertNetworkInterfaceQuery { // long as the entire call to [`QueryFragment::walk_ast`]. vpc_id_str: String, ip_sql: Option, - network_address_v4_sql: IpNetwork, mac_sql: String, - last_address_offset_v4: i64, + next_ipv4_address_subquery: NextGuestIpv4Address, + next_slot_subquery: NextNicSlot, } impl InsertNetworkInterfaceQuery { pub fn new(interface: IncompleteNetworkInterface) -> Self { let vpc_id_str = interface.vpc_id.to_string(); let ip_sql = interface.ip.map(|ip| ip.into()); - let subnet_v4_sql = IpNetwork::from(interface.subnet.ipv4_block.0 .0); - let network_address_v4_sql = IpNetwork::from(subnet_v4_sql.network()); let mac_sql = interface.mac.to_string(); - let last_address_offset_v4 = - generate_last_address_offset(&subnet_v4_sql); + + let next_ipv4_address_subquery = NextGuestIpv4Address::new( + interface.subnet.ipv4_block.0 .0, + interface.subnet.identity.id, + ); + let next_slot_subquery = NextNicSlot::new(interface.instance_id); Self { interface, now: Utc::now(), vpc_id_str, ip_sql, - network_address_v4_sql, mac_sql, - last_address_offset_v4, + next_ipv4_address_subquery, + next_slot_subquery, } } } @@ -1172,11 +795,8 @@ type FromClause = diesel::internal::table_macro::StaticQueryFragmentInstance; type NetworkInterfaceFromClause = FromClause; -type VpcSubnetFromClause = FromClause; - const NETWORK_INTERFACE_FROM_CLAUSE: NetworkInterfaceFromClause = NetworkInterfaceFromClause::new(); -const VPC_SUBNET_FROM_CLAUSE: VpcSubnetFromClause = VpcSubnetFromClause::new(); impl QueryId for InsertNetworkInterfaceQuery { type QueryId = (); @@ -1307,165 +927,27 @@ impl QueryFragment for InsertNetworkInterfaceQueryValues { } #[cfg(test)] -mod test { +mod tests { + use super::first_available_address; + use super::last_address_offset; use super::NetworkInterfaceError; - use super::SubnetError; use super::MAX_NICS_PER_INSTANCE; use crate::context::OpContext; - use crate::db::model::{ - self, IncompleteNetworkInterface, NetworkInterface, VpcSubnet, - }; - use ipnetwork::IpNetwork; + use crate::db::model; + use crate::db::model::IncompleteNetworkInterface; + use crate::db::model::NetworkInterface; + use crate::db::model::VpcSubnet; use nexus_test_utils::db::test_setup_database; - use omicron_common::api::external::{ - Error, IdentityMetadataCreateParams, Ipv4Net, Ipv6Net, Name, - }; + use omicron_common::api::external::Error; + use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_common::api::external::Ipv4Net; + use omicron_common::api::external::Ipv6Net; use omicron_test_utils::dev; use std::convert::TryInto; + use std::net::IpAddr; use std::sync::Arc; use uuid::Uuid; - #[tokio::test] - async fn test_filter_conflicting_vpc_subnet_ranges_query() { - let make_id = - |name: &Name, description: &str| IdentityMetadataCreateParams { - name: name.clone(), - description: description.to_string(), - }; - let ipv4_block = Ipv4Net("172.30.0.0/22".parse().unwrap()); - let other_ipv4_block = Ipv4Net("172.31.0.0/22".parse().unwrap()); - let ipv6_block = Ipv6Net("fd12:3456:7890::/64".parse().unwrap()); - let other_ipv6_block = Ipv6Net("fd00::/64".parse().unwrap()); - let name = "a-name".to_string().try_into().unwrap(); - let other_name = "b-name".to_string().try_into().unwrap(); - let description = "some description".to_string(); - let identity = make_id(&name, &description); - let vpc_id = "d402369d-c9ec-c5ad-9138-9fbee732d53e".parse().unwrap(); - let other_vpc_id = - "093ad2db-769b-e3c2-bc1c-b46e84ce5532".parse().unwrap(); - let subnet_id = "093ad2db-769b-e3c2-bc1c-b46e84ce5532".parse().unwrap(); - let other_subnet_id = - "695debcc-e197-447d-ffb2-976150a7b7cf".parse().unwrap(); - let row = - VpcSubnet::new(subnet_id, vpc_id, identity, ipv4_block, ipv6_block); - - // Setup the test database - let logctx = - dev::test_setup_log("test_filter_conflicting_vpc_subnet_ranges"); - let log = logctx.log.new(o!()); - let mut db = test_setup_database(&log).await; - let cfg = crate::db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(crate::db::Pool::new(&cfg)); - let db_datastore = - Arc::new(crate::db::DataStore::new(Arc::clone(&pool))); - - // We should be able to insert anything into an empty table. - assert!( - matches!(db_datastore.vpc_create_subnet_raw(row).await, Ok(_)), - "Should be able to insert VPC subnet into empty table" - ); - - // We shouldn't be able to insert a row with the same IP ranges, even if - // the other data does not conflict. - let new_row = VpcSubnet::new( - other_subnet_id, - vpc_id, - make_id(&other_name, &description), - ipv4_block, - ipv6_block, - ); - assert!( - matches!( - db_datastore.vpc_create_subnet_raw(new_row).await, - Err(SubnetError::OverlappingIpRange(IpNetwork::V4(_))) - ), - "Should not be able to insert new VPC subnet with the same IPv4 and IPv6 ranges" - ); - - // We should be able to insert data with the same ranges, if we change - // the VPC ID. - let new_row = VpcSubnet::new( - other_subnet_id, - other_vpc_id, - make_id(&name, &description), - ipv4_block, - ipv6_block, - ); - assert!( - matches!(db_datastore.vpc_create_subnet_raw(new_row).await, Ok(_)), - "Should be able to insert a VPC Subnet with the same ranges in a different VPC", - ); - - // We shouldn't be able to insert a subnet if we change only the - // IPv4 or IPv6 block. They must _both_ be non-overlapping. - let new_row = VpcSubnet::new( - other_subnet_id, - vpc_id, - make_id(&other_name, &description), - other_ipv4_block, - ipv6_block, - ); - let err = db_datastore - .vpc_create_subnet_raw(new_row) - .await - .expect_err("Should not be able to insert VPC Subnet with overlapping IPv6 range"); - assert_eq!( - err, - SubnetError::OverlappingIpRange(IpNetwork::from(ipv6_block.0)), - "SubnetError variant should include the exact IP range that overlaps" - ); - let new_row = VpcSubnet::new( - other_subnet_id, - vpc_id, - make_id(&other_name, &description), - ipv4_block, - other_ipv6_block, - ); - let err = db_datastore - .vpc_create_subnet_raw(new_row) - .await - .expect_err("Should not be able to insert VPC Subnet with overlapping IPv4 range"); - assert_eq!( - err, - SubnetError::OverlappingIpRange(IpNetwork::from(ipv4_block.0)), - "SubnetError variant should include the exact IP range that overlaps" - ); - - // We should get an _external error_ if the IP address ranges are OK, - // but the name conflicts. - let new_row = VpcSubnet::new( - other_subnet_id, - vpc_id, - make_id(&name, &description), - other_ipv4_block, - other_ipv6_block, - ); - assert!( - matches!( - db_datastore.vpc_create_subnet_raw(new_row).await, - Err(SubnetError::External(_)) - ), - "Should get an error inserting a VPC Subnet with unique IP ranges, but the same name" - ); - - // We should be able to insert the row if _both ranges_ are different, - // and the name is unique as well. - let new_row = VpcSubnet::new( - Uuid::new_v4(), - vpc_id, - make_id(&other_name, &description), - other_ipv4_block, - other_ipv6_block, - ); - assert!( - matches!(db_datastore.vpc_create_subnet_raw(new_row).await, Ok(_)), - "Should be able to insert new VPC Subnet with non-overlapping IP ranges" - ); - - db.cleanup().await.unwrap(); - logctx.cleanup_successful(); - } - #[tokio::test] async fn test_insert_network_interface_query() { // Setup the test database @@ -1905,4 +1387,36 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + #[test] + fn test_last_address_offset() { + let subnet = "172.30.0.0/28".parse().unwrap(); + assert_eq!( + last_address_offset(&subnet), + // /28 = 2 ** 4 = 16 total addresses + // ... - 1 for converting from size to index = 15 + // ... - 1 for reserved broadcast address = 14 + // ... - 5 for reserved initial addresses = 9 + 9, + ); + let subnet = "fd00::/64".parse().unwrap(); + assert_eq!( + last_address_offset(&subnet), + u32::MAX - 1 - 1 - super::NUM_INITIAL_RESERVED_IP_ADDRESSES as u32, + ); + } + + #[test] + fn test_first_available_address() { + let subnet = "172.30.0.0/28".parse().unwrap(); + assert_eq!( + first_available_address(&subnet), + "172.30.0.5".parse::().unwrap(), + ); + let subnet = "fd00::/64".parse().unwrap(); + assert_eq!( + first_available_address(&subnet), + "fd00::5".parse::().unwrap(), + ); + } } diff --git a/nexus/src/db/queries/next_item.rs b/nexus/src/db/queries/next_item.rs new file mode 100644 index 00000000000..e7753428b05 --- /dev/null +++ b/nexus/src/db/queries/next_item.rs @@ -0,0 +1,374 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A generic query for selecting a unique next item from a table. + +use diesel::associations::HasTable; +use diesel::pg::Pg; +use diesel::prelude::Column; +use diesel::prelude::Expression; +use diesel::query_builder::AstPass; +use diesel::query_builder::QueryFragment; +use diesel::serialize::ToSql; +use diesel::sql_types; +use diesel::sql_types::HasSqlType; +use std::marker::PhantomData; +use uuid::Uuid; + +/// The `NextItem` query is used to generate common subqueries that select the +/// next available item from a database table. +/// +/// There are a number of contexts in which we'd like to choose a value for a +/// particular model field, avoiding conflicts with existing records in a table. +/// A canonical example is selecting the next available IP address for a guest +/// network interface, from the VPC Subnet's IP subnet. See the other types in +/// this module for more examples. +/// +/// Query structure +/// --------------- +/// +/// In its most general form, this generates queries that look like: +/// +/// ```sql +/// SELECT +/// + offset AS +/// FROM +/// generate_series(0, ) AS offset +/// LEFT OUTER JOIN +/// +/// ON +/// (, , time_deleted IS NULL) = +/// (, + offset, TRUE) +/// WHERE +/// IS NULL +/// LIMIT 1 +/// ``` +/// +/// In the example of a guest IP address, this would be instantiated as: +/// +/// ```sql +/// SELECT +/// + offset AS ip +/// FROM +/// generate_series(0, ) AS offset +/// LEFT OUTER JOIN +/// network_interfacd +/// ON +/// (subnet_id, ip, time_deleted IS NULL) = +/// (, + offset, TRUE) +/// WHERE +/// ip IS NULL +/// LIMIT 1 +/// ``` +/// +/// This query selects the lowest address in the IP subnet that's not already +/// allocated to a guest interface. Note that the query is linear in the number +/// of _allocated_ guest addresses. and are chosen +/// based on the subnet and its size, and take into account reserved IP +/// addresses (such as the broadcast address). +/// +/// Scanning for an item +/// -------------------- +/// +/// Much of the value of this query comes from the ability to specify the +/// starting point for a scan for the next item. In the case above, of an IP +/// address for a guest NIC, we always try to allocate the lowest available +/// address. This implies the search is linear in the number of allocated IP +/// addresses, but that runtime cost is acceptable for a few reasons. First, the +/// predictability of the addresses is nice. Second, the subnets can generally +/// be quite small, meaning both the actual runtime can be quite low, and it's +/// not clear that a more complex strategy may perform much better. +/// +/// However, consumers can _choose_ the base from which to start the scan. This +/// allows consumers to follow a strategy similar to linear probing in +/// hashtables: pick a random item in the search space, and then take the first +/// available starting from there. This has the benefit of avoiding a retry-loop +/// in the application in the case that a randomly-chosen item conflicts with an +/// existing item. That "retry loop" is converted into a sequential scan in the +/// database for an item that does not conflict. +/// +/// Scopes +/// ------ +/// +/// When we choose a next available item, we usually only want to avoid +/// conflicts within some scope. For example, consider MAC addresses for guest +/// network interfaces. These must be unique within a single VPC, but are +/// allowed to be duplicated in different VPCs. In this case, the scope is the +/// VPC. +/// +/// The query can be constructed with two methods: [`NextItem::new_scoped`] or +/// [`NextItem::new_unscoped`]. In the former case, the method accepts a scope +/// key, and the column in the table where we can find that key. In the latter, +/// there is no scope, which means the items must be globally unique in the +/// entire table (among non-deleted items). The query is structured slightly +/// differently in these two cases. +/// +/// Scan sizes +/// ---------- +/// +/// The scan size is limited by the `max_offset` field. The maximum value +/// depends heavily on the context. For a guest IP address, the subnets can be +/// small (though they don't need to be). More importantly, we definitely don't +/// want to fail a request if there _is_ an available address, even if that's +/// the very last address in a subnet. +/// +/// For that reason, the +/// [`NextGuestIpv4Address`](`crate::db::queries::network_interface::NextGuestIpv4Address`) +/// query uses the full subnet as its maximum scan size. For other contexts, the +/// size may be much smaller. +#[derive(Debug, Clone, Copy)] +pub(super) struct NextItem< + Table, + Item, + ItemColumn, + ScopeKey = NoScopeKey, + ScopeColumn = NoScopeColumn, +> { + table: Table, + _columns: PhantomData<(ItemColumn, ScopeColumn)>, + scope_key: ScopeKey, + base: Item, + max_offset: i64, +} + +impl + NextItem +where + // Table is a database table whose name can be used in a query fragment + Table: diesel::Table + HasTable
+ QueryFragment, + + // Item can be converted to the SQL type of the ItemColumn + Item: ToSql<::SqlType, Pg>, + + // ItemColum is a column in the target table + ItemColumn: Column
, + + // ScopeKey can be converted to the SQL type of the ScopeColumn + ScopeKey: ScopeKeyType + ToSql<::SqlType, Pg>, + + // ScopeColumn is a column on the target table + ScopeColumn: ScopeColumnType + Column
, + + // The Postgres backend supports the SQL types of both columns + Pg: HasSqlType<::SqlType> + + HasSqlType<::SqlType>, +{ + /// Create a new `NextItem` query, scoped to a particular key. + pub(super) fn new_scoped( + item: Item, + scope_key: ScopeKey, + max_offset: u32, + ) -> Self { + Self { + table: Table::table(), + _columns: PhantomData, + scope_key, + base: item, + max_offset: i64::from(max_offset), + } + } +} + +impl + NextItem +where + Table: diesel::Table + HasTable
+ QueryFragment, + Item: ToSql<::SqlType, Pg>, + ItemColumn: Column
, + Pg: HasSqlType<::SqlType>, +{ + /// Create a new `NextItem` query, with a global scope. + // This will be used in future to implement queries like creating VNIs or + // VPC Subnet IPv6 prefixes. + #[allow(dead_code)] + pub(super) fn new_unscoped(item: Item, max_offset: u32) -> Self { + Self { + table: Table::table(), + _columns: PhantomData, + scope_key: NoScopeKey, + base: item, + max_offset: i64::from(max_offset), + } + } +} + +impl QueryFragment + for NextItem +where + Table: diesel::Table + HasTable
+ QueryFragment, + Item: ToSql<::SqlType, Pg>, + ItemColumn: Column
, + ScopeKey: ToSql<::SqlType, Pg>, + ScopeColumn: Column
, + Pg: HasSqlType<::SqlType> + + HasSqlType<::SqlType>, +{ + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + push_next_item_select_clause::( + &self.base, + &self.table, + &self.max_offset, + out.reborrow(), + )?; + + // This generates the JOIN conditions for the query, which look like: + // ON + // (, , time_deleted IS NULL) = + // (, + offset, TRUE) + out.push_sql(" ON ("); + out.push_identifier(ScopeColumn::NAME)?; + out.push_sql(", "); + out.push_identifier(ItemColumn::NAME)?; + out.push_sql(", "); + out.push_identifier("time_deleted")?; + out.push_sql(" IS NULL) = ("); + out.push_bind_param::<::SqlType, ScopeKey>( + &self.scope_key, + )?; + out.push_sql(", "); + out.push_bind_param::<::SqlType, Item>( + &self.base, + )?; + out.push_sql(" + "); + out.push_identifier("offset")?; + out.push_sql(", TRUE) "); + + push_next_item_where_clause::(out.reborrow()) + } +} + +impl QueryFragment + for NextItem +where + Table: diesel::Table + HasTable
+ QueryFragment, + Item: ToSql<::SqlType, Pg>, + ItemColumn: Column
, + Pg: HasSqlType<::SqlType>, +{ + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + push_next_item_select_clause::( + &self.base, + &self.table, + &self.max_offset, + out.reborrow(), + )?; + + // This generates the JOIN conditions for the query, which look like: + // ON + // (, time_deleted IS NULL) = + // ( + offset, TRUE) + // + // Note that there is no scope here. + out.push_sql(" ON ("); + out.push_identifier(ItemColumn::NAME)?; + out.push_sql(", "); + out.push_identifier("time_deleted")?; + out.push_sql(" IS NULL) = ("); + out.push_bind_param::<::SqlType, Item>( + &self.base, + )?; + out.push_sql(" + "); + out.push_identifier("offset")?; + out.push_sql(", TRUE) "); + + push_next_item_where_clause::(out.reborrow()) + } +} + +// Push the initial `SELECT` clause shared by the scoped and unscoped next item +// queries. +// +// ```sql +// SELECT +// + offset AS +// FROM +// generate_series(0, ) AS offset +// LEFT OUTER JOIN +//
+// ``` +fn push_next_item_select_clause<'a, Table, Item, ItemColumn>( + base: &'a Item, + table: &'a Table, + max_offset: &'a i64, + mut out: AstPass<'_, 'a, Pg>, +) -> diesel::QueryResult<()> +where + Table: diesel::Table + HasTable
+ QueryFragment, + Item: ToSql<::SqlType, Pg>, + ItemColumn: Column
, + Pg: HasSqlType<::SqlType>, +{ + out.push_sql("SELECT "); + out.push_bind_param::<::SqlType, Item>(base)?; + out.push_sql(" + "); + out.push_identifier("offset")?; + out.push_sql(" AS "); + out.push_identifier(ItemColumn::NAME)?; + out.push_sql(" FROM generate_series(0, "); + out.push_bind_param::(max_offset)?; + out.push_sql(") AS "); + out.push_identifier("offset")?; + out.push_sql(" LEFT OUTER JOIN "); + table.walk_ast(out.reborrow()) +} + +// Push the final where clause shared by scoped and unscoped next item queries. +// +// ```sql +// WHERE IS NULL LIMIT 1 +// ``` +fn push_next_item_where_clause( + mut out: AstPass, +) -> diesel::QueryResult<()> +where + Table: diesel::Table + + diesel::associations::HasTable
+ + QueryFragment, + ItemColumn: Column
, +{ + out.push_sql(" WHERE "); + out.push_identifier(ItemColumn::NAME)?; + out.push_sql(" IS NULL LIMIT 1"); + Ok(()) +} + +/// A macro used to delegate the implementation of [`QueryFragment`] to a field +/// of `self` called `inner`. +macro_rules! delegate_query_fragment_impl { + ($parent:ty) => { + impl QueryFragment for $parent { + fn walk_ast<'a>( + &'a self, + out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + self.inner.walk_ast(out) + } + } + }; +} + +/// A marker trait used to identify types that can be used as scope keys. +pub(crate) trait ScopeKeyType: Sized + std::fmt::Debug {} +impl ScopeKeyType for Uuid {} + +/// A type indicating that a [`NextItem`] query has no scope key. +#[derive(Debug, Clone, Copy)] +pub(crate) struct NoScopeKey; +impl ScopeKeyType for NoScopeKey {} + +/// A marker trait used to identify types that identify the table column for a +/// scope key. +pub(crate) trait ScopeColumnType: Sized + std::fmt::Debug {} +impl ScopeColumnType for T where T: Column + std::fmt::Debug {} + +/// A type indicating that a [`NextItem`] query has no scope column. +#[derive(Debug, Clone, Copy)] +pub(crate) struct NoScopeColumn; +impl ScopeColumnType for NoScopeColumn {} diff --git a/nexus/src/db/queries/vni.rs b/nexus/src/db/queries/vni.rs new file mode 100644 index 00000000000..8b4f7368dae --- /dev/null +++ b/nexus/src/db/queries/vni.rs @@ -0,0 +1,5 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Allocation of unique VPC Geneve Virtual Network Identifiers (VNI) diff --git a/nexus/src/db/queries/vpc_subnet.rs b/nexus/src/db/queries/vpc_subnet.rs new file mode 100644 index 00000000000..b49d52d79d3 --- /dev/null +++ b/nexus/src/db/queries/vpc_subnet.rs @@ -0,0 +1,549 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Diesel query used for VPC Subnet allocation and insertion + +use crate::db; +use crate::db::identity::Resource; +use crate::db::model::VpcSubnet; +use chrono::{DateTime, Utc}; +use diesel::pg::Pg; +use diesel::prelude::*; +use diesel::query_builder::*; +use diesel::sql_types; +use omicron_common::api::external; +use uuid::Uuid; + +/// Errors related to allocating VPC Subnets. +#[derive(Debug, PartialEq)] +pub enum SubnetError { + /// An IPv4 or IPv6 subnet overlaps with an existing VPC Subnet + OverlappingIpRange(ipnetwork::IpNetwork), + /// An other error + External(external::Error), +} + +impl SubnetError { + /// Construct a `SubnetError` from a Diesel error, catching the desired + /// cases and building useful errors. + pub fn from_pool( + e: async_bb8_diesel::PoolError, + subnet: &VpcSubnet, + ) -> Self { + use crate::db::error; + use async_bb8_diesel::ConnectionError; + use async_bb8_diesel::PoolError; + use diesel::result::DatabaseErrorKind; + use diesel::result::Error; + const IPV4_OVERLAP_ERROR_MESSAGE: &str = + r#"null value in column "ipv4_block" violates not-null constraint"#; + const IPV6_OVERLAP_ERROR_MESSAGE: &str = + r#"null value in column "ipv6_block" violates not-null constraint"#; + const NAME_CONFLICT_CONSTRAINT: &str = "vpc_subnet_vpc_id_name_key"; + match e { + // Attempt to insert overlapping IPv4 subnet + PoolError::Connection(ConnectionError::Query( + Error::DatabaseError( + DatabaseErrorKind::NotNullViolation, + ref info, + ), + )) if info.message() == IPV4_OVERLAP_ERROR_MESSAGE => { + SubnetError::OverlappingIpRange(subnet.ipv4_block.0 .0.into()) + } + + // Attempt to insert overlapping IPv6 subnet + PoolError::Connection(ConnectionError::Query( + Error::DatabaseError( + DatabaseErrorKind::NotNullViolation, + ref info, + ), + )) if info.message() == IPV6_OVERLAP_ERROR_MESSAGE => { + SubnetError::OverlappingIpRange(subnet.ipv6_block.0 .0.into()) + } + + // Conflicting name for the subnet within a VPC + PoolError::Connection(ConnectionError::Query( + Error::DatabaseError( + DatabaseErrorKind::UniqueViolation, + ref info, + ), + )) if info.constraint_name() == Some(NAME_CONFLICT_CONSTRAINT) => { + SubnetError::External(error::public_error_from_diesel_pool( + e, + error::ErrorHandler::Conflict( + external::ResourceType::VpcSubnet, + subnet.identity().name.as_str(), + ), + )) + } + + // Any other error at all is a bug + _ => SubnetError::External(error::public_error_from_diesel_pool( + e, + error::ErrorHandler::Server, + )), + } + } + + /// Convert into a public error + pub fn into_external(self) -> external::Error { + match self { + SubnetError::OverlappingIpRange(ip) => { + external::Error::invalid_request( + format!("IP address range '{}' conflicts with an existing subnet", ip).as_str() + ) + }, + SubnetError::External(e) => e, + } + } +} + +/// Generate a subquery that selects any overlapping address ranges of the same +/// type as the input IP subnet. +/// +/// This generates a query that, in full, looks like: +/// +/// ```sql +/// SELECT +/// +/// FROM +/// vpc_subnet +/// WHERE +/// vpc_id = AND +/// time_deleted IS NULL AND +/// inet_contains_or_equals(ipv*_block, ) +/// LIMIT 1 +/// ``` +/// +/// The input may be either an IPv4 or IPv6 subnet, and the corresponding column +/// is compared against. Note that the exact input IP range is returned on +/// purpose. +fn push_select_overlapping_ip_range<'a>( + mut out: AstPass<'_, 'a, Pg>, + vpc_id: &'a Uuid, + ip: &'a ipnetwork::IpNetwork, +) -> diesel::QueryResult<()> { + use crate::db::schema::vpc_subnet::dsl; + out.push_sql("SELECT "); + out.push_bind_param::(ip)?; + out.push_sql(" FROM "); + VPC_SUBNET_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(dsl::vpc_id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(vpc_id)?; + out.push_sql(" AND "); + out.push_identifier(dsl::time_deleted::NAME)?; + out.push_sql(" IS NULL AND inet_contains_or_equals("); + if ip.is_ipv4() { + out.push_identifier(dsl::ipv4_block::NAME)?; + } else { + out.push_identifier(dsl::ipv6_block::NAME)?; + } + out.push_sql(", "); + out.push_bind_param::(ip)?; + out.push_sql(")"); + Ok(()) +} + +/// Generate a subquery that returns NULL if there is an overlapping IP address +/// range of any type. +/// +/// This specifically generates a query that looks like: +/// +/// ```sql +/// SELECT NULLIF( +/// , +/// push_select_overlapping_ip_range(, ) +/// ) +/// ``` +/// +/// The `NULLIF` function returns NULL if those two expressions are equal, and +/// the first expression otherwise. That is, this returns NULL if there exists +/// an overlapping IP range already in the VPC Subnet table, and the requested +/// IP range if not. +fn push_null_if_overlapping_ip_range<'a>( + mut out: AstPass<'_, 'a, Pg>, + vpc_id: &'a Uuid, + ip: &'a ipnetwork::IpNetwork, +) -> diesel::QueryResult<()> { + out.push_sql("SELECT NULLIF("); + out.push_bind_param::(ip)?; + out.push_sql(", ("); + push_select_overlapping_ip_range(out.reborrow(), vpc_id, ip)?; + out.push_sql("))"); + Ok(()) +} + +/// Generate a CTE that can be used to insert a VPC Subnet, only if the IP +/// address ranges of that subnet don't overlap with existing Subnets in the +/// same VPC. +/// +/// In particular, this generates a CTE like so: +/// +/// ```sql +/// WITH candidate( +/// id, +/// name, +/// description, +/// time_created, +/// time_modified, +/// time_deleted, +/// vpc_id +/// ) AS (VALUES ( +/// , +/// , +/// , +/// , +/// , +/// NULL::TIMESTAMPTZ, +/// , +/// )), +/// candidate_ipv4(ipv4_block) AS ( +/// SELECT( +/// NULLIF( +/// , +/// ( +/// SELECT +/// ipv4_block +/// FROM +/// vpc_subnet +/// WHERE +/// vpc_id = AND +/// time_deleted IS NULL AND +/// inet_contains_or_equals(, ipv4_block) +/// LIMIT 1 +/// ) +/// ) +/// ) +/// ), +/// candidate_ipv6(ipv6_block) AS ( +/// +/// ) +/// SELECT * +/// FROM candidate, candidate_ipv4, candidate_ipv6 +/// ``` +pub struct FilterConflictingVpcSubnetRangesQuery { + // TODO: update with random one if the insertion fails. + subnet: VpcSubnet, + + // The following fields are derived from the previous field. This begs the + // question: "Why bother storing them at all?" + // + // Diesel's [`diesel::query_builder::ast_pass::AstPass:push_bind_param`] method + // requires that the provided value now live as long as the entire AstPass + // type. By storing these values in the struct, they'll live at least as + // long as the entire call to [`QueryFragment::walk_ast`]. + ipv4_block: ipnetwork::IpNetwork, + ipv6_block: ipnetwork::IpNetwork, +} + +impl FilterConflictingVpcSubnetRangesQuery { + pub fn new(subnet: VpcSubnet) -> Self { + let ipv4_block = ipnetwork::IpNetwork::from(subnet.ipv4_block.0 .0); + let ipv6_block = ipnetwork::IpNetwork::from(subnet.ipv6_block.0 .0); + Self { subnet, ipv4_block, ipv6_block } + } +} + +impl QueryId for FilterConflictingVpcSubnetRangesQuery { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl QueryFragment for FilterConflictingVpcSubnetRangesQuery { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + use db::schema::vpc_subnet::dsl; + + // Create the base `candidate` from values provided that need no + // verificiation. + out.push_sql("SELECT * FROM (WITH candidate("); + out.push_identifier(dsl::id::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::name::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::description::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::time_created::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::time_modified::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::time_deleted::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::vpc_id::NAME)?; + out.push_sql(") AS (VALUES ("); + out.push_bind_param::(&self.subnet.identity.id)?; + out.push_sql(", "); + out.push_bind_param::( + &self.subnet.name(), + )?; + out.push_sql(", "); + out.push_bind_param::( + &self.subnet.identity.description, + )?; + out.push_sql(", "); + out.push_bind_param::>( + &self.subnet.identity.time_created, + )?; + out.push_sql(", "); + out.push_bind_param::>( + &self.subnet.identity.time_modified, + )?; + out.push_sql(", "); + out.push_sql("NULL::TIMESTAMPTZ, "); + out.push_bind_param::(&self.subnet.vpc_id)?; + out.push_sql(")), "); + + // Push the candidate IPv4 and IPv6 selection subqueries, which return + // NULL if the corresponding address range overlaps. + out.push_sql("candidate_ipv4("); + out.push_identifier(dsl::ipv4_block::NAME)?; + out.push_sql(") AS ("); + push_null_if_overlapping_ip_range( + out.reborrow(), + &self.subnet.vpc_id, + &self.ipv4_block, + )?; + + out.push_sql("), candidate_ipv6("); + out.push_identifier(dsl::ipv6_block::NAME)?; + out.push_sql(") AS ("); + push_null_if_overlapping_ip_range( + out.reborrow(), + &self.subnet.vpc_id, + &self.ipv6_block, + )?; + out.push_sql(") "); + + // Select the entire set of candidate columns. + out.push_sql( + "SELECT * FROM candidate, candidate_ipv4, candidate_ipv6)", + ); + Ok(()) + } +} + +impl Insertable + for FilterConflictingVpcSubnetRangesQuery +{ + type Values = FilterConflictingVpcSubnetRangesQueryValues; + + fn values(self) -> Self::Values { + FilterConflictingVpcSubnetRangesQueryValues(self) + } +} + +/// Used to allow inserting the result of the +/// `FilterConflictingVpcSubnetRangesQuery`, as in +/// `diesel::insert_into(foo).values(_). Should not be used directly. +pub struct FilterConflictingVpcSubnetRangesQueryValues( + pub FilterConflictingVpcSubnetRangesQuery, +); + +impl QueryId for FilterConflictingVpcSubnetRangesQueryValues { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl diesel::insertable::CanInsertInSingleQuery + for FilterConflictingVpcSubnetRangesQueryValues +{ + fn rows_to_insert(&self) -> Option { + Some(1) + } +} + +impl QueryFragment for FilterConflictingVpcSubnetRangesQueryValues { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + use db::schema::vpc_subnet::dsl; + out.push_sql("("); + out.push_identifier(dsl::id::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::name::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::description::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::time_created::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::time_modified::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::time_deleted::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::vpc_id::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::ipv4_block::NAME)?; + out.push_sql(", "); + out.push_identifier(dsl::ipv6_block::NAME)?; + out.push_sql(") "); + self.0.walk_ast(out) + } +} + +type FromClause = + diesel::internal::table_macro::StaticQueryFragmentInstance; +type VpcSubnetFromClause = FromClause; +const VPC_SUBNET_FROM_CLAUSE: VpcSubnetFromClause = VpcSubnetFromClause::new(); + +#[cfg(test)] +mod test { + use super::SubnetError; + use crate::db::model::VpcSubnet; + use ipnetwork::IpNetwork; + use nexus_test_utils::db::test_setup_database; + use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_common::api::external::Ipv4Net; + use omicron_common::api::external::Ipv6Net; + use omicron_common::api::external::Name; + use omicron_test_utils::dev; + use std::convert::TryInto; + use std::sync::Arc; + use uuid::Uuid; + + #[tokio::test] + async fn test_filter_conflicting_vpc_subnet_ranges_query() { + let make_id = + |name: &Name, description: &str| IdentityMetadataCreateParams { + name: name.clone(), + description: description.to_string(), + }; + let ipv4_block = Ipv4Net("172.30.0.0/22".parse().unwrap()); + let other_ipv4_block = Ipv4Net("172.31.0.0/22".parse().unwrap()); + let ipv6_block = Ipv6Net("fd12:3456:7890::/64".parse().unwrap()); + let other_ipv6_block = Ipv6Net("fd00::/64".parse().unwrap()); + let name = "a-name".to_string().try_into().unwrap(); + let other_name = "b-name".to_string().try_into().unwrap(); + let description = "some description".to_string(); + let identity = make_id(&name, &description); + let vpc_id = "d402369d-c9ec-c5ad-9138-9fbee732d53e".parse().unwrap(); + let other_vpc_id = + "093ad2db-769b-e3c2-bc1c-b46e84ce5532".parse().unwrap(); + let subnet_id = "093ad2db-769b-e3c2-bc1c-b46e84ce5532".parse().unwrap(); + let other_subnet_id = + "695debcc-e197-447d-ffb2-976150a7b7cf".parse().unwrap(); + let row = + VpcSubnet::new(subnet_id, vpc_id, identity, ipv4_block, ipv6_block); + + // Setup the test database + let logctx = + dev::test_setup_log("test_filter_conflicting_vpc_subnet_ranges"); + let log = logctx.log.new(o!()); + let mut db = test_setup_database(&log).await; + let cfg = crate::db::Config { url: db.pg_config().clone() }; + let pool = Arc::new(crate::db::Pool::new(&cfg)); + let db_datastore = + Arc::new(crate::db::DataStore::new(Arc::clone(&pool))); + + // We should be able to insert anything into an empty table. + assert!( + matches!(db_datastore.vpc_create_subnet_raw(row).await, Ok(_)), + "Should be able to insert VPC subnet into empty table" + ); + + // We shouldn't be able to insert a row with the same IP ranges, even if + // the other data does not conflict. + let new_row = VpcSubnet::new( + other_subnet_id, + vpc_id, + make_id(&other_name, &description), + ipv4_block, + ipv6_block, + ); + assert!( + matches!( + db_datastore.vpc_create_subnet_raw(new_row).await, + Err(SubnetError::OverlappingIpRange(IpNetwork::V4(_))) + ), + "Should not be able to insert new VPC subnet with the same IPv4 and IPv6 ranges" + ); + + // We should be able to insert data with the same ranges, if we change + // the VPC ID. + let new_row = VpcSubnet::new( + other_subnet_id, + other_vpc_id, + make_id(&name, &description), + ipv4_block, + ipv6_block, + ); + assert!( + matches!(db_datastore.vpc_create_subnet_raw(new_row).await, Ok(_)), + "Should be able to insert a VPC Subnet with the same ranges in a different VPC", + ); + + // We shouldn't be able to insert a subnet if we change only the + // IPv4 or IPv6 block. They must _both_ be non-overlapping. + let new_row = VpcSubnet::new( + other_subnet_id, + vpc_id, + make_id(&other_name, &description), + other_ipv4_block, + ipv6_block, + ); + let err = db_datastore + .vpc_create_subnet_raw(new_row) + .await + .expect_err("Should not be able to insert VPC Subnet with overlapping IPv6 range"); + assert_eq!( + err, + SubnetError::OverlappingIpRange(IpNetwork::from(ipv6_block.0)), + "SubnetError variant should include the exact IP range that overlaps" + ); + let new_row = VpcSubnet::new( + other_subnet_id, + vpc_id, + make_id(&other_name, &description), + ipv4_block, + other_ipv6_block, + ); + let err = db_datastore + .vpc_create_subnet_raw(new_row) + .await + .expect_err("Should not be able to insert VPC Subnet with overlapping IPv4 range"); + assert_eq!( + err, + SubnetError::OverlappingIpRange(IpNetwork::from(ipv4_block.0)), + "SubnetError variant should include the exact IP range that overlaps" + ); + + // We should get an _external error_ if the IP address ranges are OK, + // but the name conflicts. + let new_row = VpcSubnet::new( + other_subnet_id, + vpc_id, + make_id(&name, &description), + other_ipv4_block, + other_ipv6_block, + ); + assert!( + matches!( + db_datastore.vpc_create_subnet_raw(new_row).await, + Err(SubnetError::External(_)) + ), + "Should get an error inserting a VPC Subnet with unique IP ranges, but the same name" + ); + + // We should be able to insert the row if _both ranges_ are different, + // and the name is unique as well. + let new_row = VpcSubnet::new( + Uuid::new_v4(), + vpc_id, + make_id(&other_name, &description), + other_ipv4_block, + other_ipv6_block, + ); + assert!( + matches!(db_datastore.vpc_create_subnet_raw(new_row).await, Ok(_)), + "Should be able to insert new VPC Subnet with non-overlapping IP ranges" + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +}