diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 537d27e5b19..b38875cbc7c 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -1151,6 +1151,20 @@ def _init_instance(self, context, instance): 'updated.', instance=instance) self._set_instance_obj_error_state(instance) return + except exception.PciDeviceNotFoundById: + # This is bug 1981813 where the bound port vnic_type has changed + # from direct to macvtap. Nova does not support that and it + # already printed an ERROR when the change is detected during + # _heal_instance_info_cache. Now we print an ERROR again and skip + # plugging the vifs but let the service startup continue to init + # the other instances + LOG.exception( + 'Virtual interface plugging failed for instance. Probably the ' + 'vnic_type of the bound port has been changed. Nova does not ' + 'support such change.', + instance=instance + ) + return if instance.task_state == task_states.RESIZE_MIGRATING: # We crashed during resize/migration, so roll back for safety diff --git a/nova/network/neutron.py b/nova/network/neutron.py index 8ea3f7e6bb2..5b68d5ab102 100644 --- a/nova/network/neutron.py +++ b/nova/network/neutron.py @@ -3236,6 +3236,25 @@ def _build_vif_model(self, context, client, current_neutron_port, delegate_create=True, ) + def _log_error_if_vnic_type_changed( + self, port_id, old_vnic_type, new_vnic_type, instance + ): + if old_vnic_type and old_vnic_type != new_vnic_type: + LOG.error( + 'The vnic_type of the bound port %s has ' + 'been changed in neutron from "%s" to ' + '"%s". Changing vnic_type of a bound port ' + 'is not supported by Nova. To avoid ' + 'breaking the connectivity of the instance ' + 'please change the port vnic_type back to ' + '"%s".', + port_id, + old_vnic_type, + new_vnic_type, + old_vnic_type, + instance=instance + ) + def _build_network_info_model(self, context, instance, networks=None, port_ids=None, admin_client=None, preexisting_port_ids=None, @@ -3309,6 +3328,12 @@ def _build_network_info_model(self, context, instance, networks=None, preexisting_port_ids) for index, vif in enumerate(nw_info): if vif['id'] == refresh_vif_id: + self._log_error_if_vnic_type_changed( + vif['id'], + vif['vnic_type'], + refreshed_vif['vnic_type'], + instance, + ) # Update the existing entry. nw_info[index] = refreshed_vif LOG.debug('Updated VIF entry in instance network ' @@ -3358,6 +3383,7 @@ def _build_network_info_model(self, context, instance, networks=None, networks, port_ids = self._gather_port_ids_and_networks( context, instance, networks, port_ids, client) + old_nw_info = instance.get_network_info() nw_info = network_model.NetworkInfo() for port_id in port_ids: current_neutron_port = current_neutron_port_map.get(port_id) @@ -3365,6 +3391,14 @@ def _build_network_info_model(self, context, instance, networks=None, vif = self._build_vif_model( context, client, current_neutron_port, networks, preexisting_port_ids) + for old_vif in old_nw_info: + if old_vif['id'] == port_id: + self._log_error_if_vnic_type_changed( + port_id, + old_vif['vnic_type'], + vif['vnic_type'], + instance, + ) nw_info.append(vif) elif nw_info_refresh: LOG.info('Port %s from network info_cache is no ' diff --git a/nova/tests/functional/libvirt/test_pci_sriov_servers.py b/nova/tests/functional/libvirt/test_pci_sriov_servers.py index 976015a1acd..9d8d0a6f8c8 100644 --- a/nova/tests/functional/libvirt/test_pci_sriov_servers.py +++ b/nova/tests/functional/libvirt/test_pci_sriov_servers.py @@ -979,6 +979,14 @@ def test_change_bound_port_vnic_type_kills_compute_at_restart(self): self.host_mappings['compute1'].cell_mapping ) as cctxt: compute.manager._heal_instance_info_cache(cctxt) + self.assertIn( + 'The vnic_type of the bound port %s has been changed in ' + 'neutron from "direct" to "macvtap". Changing vnic_type of a ' + 'bound port is not supported by Nova. To avoid breaking the ' + 'connectivity of the instance please change the port ' + 'vnic_type back to "direct".' % port['id'], + self.stdlog.logger.output, + ) def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False): # we want to fail the netdev lookup only if the pci_address is @@ -1006,17 +1014,18 @@ def fake_get_ifname_by_pci_address(pci_addr: str, pf_interface=False): 'nova.pci.utils.get_ifname_by_pci_address', side_effect=fake_get_ifname_by_pci_address, ): - # This is bug 1981813 as the compute service fails to start with an - # exception. # Nova cannot prevent the vnic_type change on a bound port. Neutron # should prevent that instead. But the nova-compute should still # be able to start up and only log an ERROR for this instance in # inconsistent state. - self.assertRaises( - exception.PciDeviceNotFoundById, - self.restart_compute_service, - 'compute1', - ) + self.restart_compute_service('compute1') + + self.assertIn( + 'Virtual interface plugging failed for instance. Probably the ' + 'vnic_type of the bound port has been changed. Nova does not ' + 'support such change.', + self.stdlog.logger.output, + ) class SRIOVAttachDetachTest(_PCIServersTestBase): diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 152649a2e57..6601a2975db 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -1306,6 +1306,36 @@ def test_init_instance_with_binding_failed_vif_type(self): self.compute._init_instance(self.context, instance) set_error_state.assert_called_once_with(instance) + def test_init_instance_vif_plug_fails_missing_pci(self): + instance = fake_instance.fake_instance_obj( + self.context, + uuid=uuids.instance, + info_cache=None, + power_state=power_state.RUNNING, + vm_state=vm_states.ACTIVE, + task_state=None, + host=self.compute.host, + expected_attrs=['info_cache']) + + with test.nested( + mock.patch.object(context, 'get_admin_context', + return_value=self.context), + mock.patch.object(objects.Instance, 'get_network_info', + return_value=network_model.NetworkInfo()), + mock.patch.object(self.compute.driver, 'plug_vifs', + side_effect=exception.PciDeviceNotFoundById("pci-addr")), + mock.patch("nova.compute.manager.LOG.exception"), + ) as (get_admin_context, get_nw_info, plug_vifs, log_exception): + # as this does not raise, we are sure that the compute service + # continues initializing the rest of the instances + self.compute._init_instance(self.context, instance) + log_exception.assert_called_once_with( + "Virtual interface plugging failed for instance. Probably the " + "vnic_type of the bound port has been changed. Nova does not " + "support such change.", + instance=instance + ) + def _test__validate_pinning_configuration(self, supports_pcpus=True): instance_1 = fake_instance.fake_instance_obj( self.context, uuid=uuids.instance_1) diff --git a/nova/tests/unit/network/test_neutron.py b/nova/tests/unit/network/test_neutron.py index 488deabfbad..879f8a9ed6e 100644 --- a/nova/tests/unit/network/test_neutron.py +++ b/nova/tests/unit/network/test_neutron.py @@ -3392,6 +3392,155 @@ def test_build_network_info_model_empty( mocked_client.list_ports.assert_called_once_with( tenant_id=uuids.fake, device_id=uuids.instance) + @mock.patch.object( + neutronapi.API, + '_get_physnet_tunneled_info', + new=mock.Mock(return_value=(None, False))) + @mock.patch.object( + neutronapi.API, + '_get_preexisting_port_ids', + new=mock.Mock(return_value=[])) + @mock.patch.object( + neutronapi.API, + '_get_subnets_from_port', + new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')])) + @mock.patch.object( + neutronapi.API, + '_get_floating_ips_by_fixed_and_port', + new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}])) + @mock.patch.object(neutronapi, 'get_client') + def test_build_network_info_model_full_vnic_type_change( + self, mock_get_client + ): + mocked_client = mock.create_autospec(client.Client) + mock_get_client.return_value = mocked_client + fake_inst = objects.Instance() + fake_inst.project_id = uuids.fake + fake_inst.uuid = uuids.instance + fake_ports = [ + { + "id": "port1", + "network_id": "net-id", + "tenant_id": uuids.fake, + "admin_state_up": True, + "status": "ACTIVE", + "fixed_ips": [{"ip_address": "1.1.1.1"}], + "mac_address": "de:ad:be:ef:00:01", + "binding:vif_type": model.VIF_TYPE_BRIDGE, + "binding:vnic_type": model.VNIC_TYPE_DIRECT, + "binding:vif_details": {}, + }, + ] + mocked_client.list_ports.return_value = {'ports': fake_ports} + fake_inst.info_cache = objects.InstanceInfoCache.new( + self.context, uuids.instance) + fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([]) + + # build the network info first + nw_infos = self.api._build_network_info_model( + self.context, + fake_inst, + force_refresh=True, + ) + + self.assertEqual(1, len(nw_infos)) + fake_inst.info_cache.network_info = nw_infos + + # change the vnic_type of the port and rebuild the network info + fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP + with mock.patch( + "nova.network.neutron.API._log_error_if_vnic_type_changed" + ) as mock_log: + nw_infos = self.api._build_network_info_model( + self.context, + fake_inst, + force_refresh=True, + ) + + mock_log.assert_called_once_with( + fake_ports[0]["id"], "direct", "macvtap", fake_inst) + self.assertEqual(1, len(nw_infos)) + + @mock.patch.object( + neutronapi.API, + '_get_physnet_tunneled_info', + new=mock.Mock(return_value=(None, False))) + @mock.patch.object( + neutronapi.API, + '_get_preexisting_port_ids', + new=mock.Mock(return_value=[])) + @mock.patch.object( + neutronapi.API, + '_get_subnets_from_port', + new=mock.Mock(return_value=[model.Subnet(cidr='1.0.0.0/8')])) + @mock.patch.object( + neutronapi.API, + '_get_floating_ips_by_fixed_and_port', + new=mock.Mock(return_value=[{'floating_ip_address': '10.0.0.1'}])) + @mock.patch.object(neutronapi, 'get_client') + def test_build_network_info_model_single_vnic_type_change( + self, mock_get_client + ): + mocked_client = mock.create_autospec(client.Client) + mock_get_client.return_value = mocked_client + fake_inst = objects.Instance() + fake_inst.project_id = uuids.fake + fake_inst.uuid = uuids.instance + fake_ports = [ + { + "id": "port1", + "network_id": "net-id", + "tenant_id": uuids.fake, + "admin_state_up": True, + "status": "ACTIVE", + "fixed_ips": [{"ip_address": "1.1.1.1"}], + "mac_address": "de:ad:be:ef:00:01", + "binding:vif_type": model.VIF_TYPE_BRIDGE, + "binding:vnic_type": model.VNIC_TYPE_DIRECT, + "binding:vif_details": {}, + }, + ] + fake_nets = [ + { + "id": "net-id", + "name": "foo", + "tenant_id": uuids.fake, + } + ] + mocked_client.list_ports.return_value = {'ports': fake_ports} + fake_inst.info_cache = objects.InstanceInfoCache.new( + self.context, uuids.instance) + fake_inst.info_cache.network_info = model.NetworkInfo.hydrate([]) + + # build the network info first + nw_infos = self.api._build_network_info_model( + self.context, + fake_inst, + fake_nets, + [fake_ports[0]["id"]], + refresh_vif_id=fake_ports[0]["id"], + ) + + self.assertEqual(1, len(nw_infos)) + fake_inst.info_cache.network_info = nw_infos + + # change the vnic_type of the port and rebuild the network info + fake_ports[0]["binding:vnic_type"] = model.VNIC_TYPE_MACVTAP + with mock.patch( + "nova.network.neutron.API._log_error_if_vnic_type_changed" + ) as mock_log: + nw_infos = self.api._build_network_info_model( + self.context, + fake_inst, + fake_nets, + [fake_ports[0]["id"]], + refresh_vif_id=fake_ports[0]["id"], + ) + + mock_log.assert_called_once_with( + fake_ports[0]["id"], "direct", "macvtap", fake_inst) + self.assertEqual(1, len(nw_infos)) + @mock.patch.object(neutronapi, 'get_client') def test_get_subnets_from_port(self, mock_get_client): mocked_client = mock.create_autospec(client.Client) diff --git a/releasenotes/notes/bug-1981813-vnic-type-change-9f3e16fae885b57f.yaml b/releasenotes/notes/bug-1981813-vnic-type-change-9f3e16fae885b57f.yaml new file mode 100644 index 00000000000..a5a3b7c8c2c --- /dev/null +++ b/releasenotes/notes/bug-1981813-vnic-type-change-9f3e16fae885b57f.yaml @@ -0,0 +1,9 @@ +--- +fixes: + - | + `Bug #1981813 `_: Now nova + detects if the ``vnic_type`` of a bound port has been changed in neutron + and leaves an ERROR message in the compute service log as such change on a + bound port is not supported. Also the restart of the nova-compute service + will not crash any more after such port change. Nova will log an ERROR and + skip the initialization of the instance with such port during the startup.