From 3be0b532ebe07aff5fa1e92914e4409aeefb608a Mon Sep 17 00:00:00 2001 From: Keith Mattix II Date: Mon, 18 Mar 2024 19:54:34 +0000 Subject: [PATCH 1/5] xds: Ignore SDS removal Signed-off-by: Keith Mattix II --- source/common/secret/sds_api.cc | 32 ++++++--- source/common/secret/sds_api.h | 3 +- test/common/secret/sds_api_test.cc | 15 ++-- test/integration/ads_integration_test.cc | 87 ++++++++++++++++++++++++ 4 files changed, 122 insertions(+), 15 deletions(-) diff --git a/source/common/secret/sds_api.cc b/source/common/secret/sds_api.cc index 7c67f76ecda6..a9dc19855a62 100644 --- a/source/common/secret/sds_api.cc +++ b/source/common/secret/sds_api.cc @@ -82,7 +82,7 @@ void SdsApi::onWatchUpdate() { absl::Status SdsApi::onConfigUpdate(const std::vector& resources, const std::string& version_info) { - const absl::Status status = validateUpdateSize(resources.size()); + const absl::Status status = validateUpdateSize(resources.size(), 0); if (!status.ok()) { return status; } @@ -143,13 +143,24 @@ absl::Status SdsApi::onConfigUpdate(const std::vector& added_resources, - const Protobuf::RepeatedPtrField&, - const std::string&) { - const absl::Status status = validateUpdateSize(added_resources.size()); +absl::Status +SdsApi::onConfigUpdate(const std::vector& added_resources, + const Protobuf::RepeatedPtrField& removed_resources, + const std::string&) { + const absl::Status status = validateUpdateSize(added_resources.size(), removed_resources.size()); if (!status.ok()) { return status; } + + if (removed_resources.size() == 1) { + // We only removed a resource here; don't go through the SotW init process + // and expect this to be cleaned up by the owning cluster or listener. + ENVOY_LOG_MISC( + trace, + "Server sent a delta SDS update attempting to remove a resource (name: {}). Ignoring.", + removed_resources[0]); + return absl::OkStatus(); + } return onConfigUpdate(added_resources, added_resources[0].get().version()); } @@ -160,14 +171,17 @@ void SdsApi::onConfigUpdateFailed(Envoy::Config::ConfigUpdateFailureReason reaso init_target_.ready(); } -absl::Status SdsApi::validateUpdateSize(int num_resources) { - if (num_resources == 0) { +absl::Status SdsApi::validateUpdateSize(uint32_t added_resources_num, + uint32_t removed_resources_num) { + if (added_resources_num == 0 && removed_resources_num == 0) { return absl::InvalidArgumentError( fmt::format("Missing SDS resources for {} in onConfigUpdate()", sds_config_name_)); } - if (num_resources != 1) { + if (added_resources_num > 1 || removed_resources_num > 1) { return absl::InvalidArgumentError( - fmt::format("Unexpected SDS secrets length: {}", num_resources)); + fmt::format("Unexpected SDS secrets length for {}, number of added resources " + "{}, number of removed resources {}. Expected sum is 1", + sds_config_name_, added_resources_num, removed_resources_num)); } return absl::OkStatus(); } diff --git a/source/common/secret/sds_api.h b/source/common/secret/sds_api.h index e01f30b13983..24fc54e91231 100644 --- a/source/common/secret/sds_api.h +++ b/source/common/secret/sds_api.h @@ -92,7 +92,8 @@ class SdsApi : public Envoy::Config::SubscriptionBase< Api::Api& api_; private: - absl::Status validateUpdateSize(int num_resources); + absl::Status validateUpdateSize(uint32_t added_resources_num, + uint32_t removed_resources_num) const; void initialize(); FileContentMap loadFiles(); uint64_t getHashForFiles(const FileContentMap& files); diff --git a/test/common/secret/sds_api_test.cc b/test/common/secret/sds_api_test.cc index 9ce7ae73fdbf..6b13c71f7833 100644 --- a/test/common/secret/sds_api_test.cc +++ b/test/common/secret/sds_api_test.cc @@ -550,17 +550,20 @@ TEST_F(SdsApiTest, Delta) { EXPECT_CALL(sds, onConfigUpdate(DecodedResourcesEq(resources), "version1")); EXPECT_TRUE(subscription_factory_.callbacks_->onConfigUpdate(resources, {}, "ignored").ok()); - // An attempt to remove a resource logs an error, but otherwise just carries on (ignoring the - // removal attempt). + // An attempt to remove a resource while adding a resource should not error. auto secret_again = std::make_unique(); secret_again->set_name("secret_1"); Config::DecodedResourceImpl resource_v2(std::move(secret_again), "name", {}, "version2"); std::vector resources_v2{resource_v2}; - EXPECT_CALL(sds, onConfigUpdate(DecodedResourcesEq(resources_v2), "version2")); Protobuf::RepeatedPtrField removals; *removals.Add() = "route_0"; EXPECT_TRUE( subscription_factory_.callbacks_->onConfigUpdate(resources_v2, removals, "ignored").ok()); + removals.RemoveLast(); + + // An attempt to remove a resource without adding another resource should also not error + *removals.Add() = "route_1"; + EXPECT_TRUE(subscription_factory_.callbacks_->onConfigUpdate({}, removals, "ignored").ok()); } // Tests SDS's use of the delta variant of onConfigUpdate(). @@ -833,11 +836,13 @@ TEST_F(SdsApiTest, SecretUpdateWrongSize) { initialize(); EXPECT_EQ( subscription_factory_.callbacks_->onConfigUpdate(decoded_resources.refvec_, "").message(), - "Unexpected SDS secrets length: 2"); + "Unexpected SDS secrets length for abc.com, number of added resources " + "2, number of removed resources 0. Expected sum is 1"); Protobuf::RepeatedPtrField unused; EXPECT_EQ(subscription_factory_.callbacks_->onConfigUpdate(decoded_resources.refvec_, unused, "") .message(), - "Unexpected SDS secrets length: 2"); + "Unexpected SDS secrets length for abc.com, number of added resources " + "2, number of removed resources 0. Expected sum is 1"); } // Validate that SdsApi throws exception if secret name passed to onConfigUpdate() diff --git a/test/integration/ads_integration_test.cc b/test/integration/ads_integration_test.cc index 0fb8c4ecb006..e2d619b57c3c 100644 --- a/test/integration/ads_integration_test.cc +++ b/test/integration/ads_integration_test.cc @@ -236,6 +236,93 @@ TEST_P(AdsIntegrationTest, ClusterInitializationUpdateOneOfThe2Warming) { test_server_->waitForGaugeGe("cluster_manager.active_clusters", 4); } +TEST_P(AdsIntegrationTest, DeltaSdsRemovals) { + // This test is for delta only + if (sotw_or_delta_ != Grpc::SotwOrDelta::Delta && + sotw_or_delta_ != Grpc::SotwOrDelta::UnifiedDelta) { + GTEST_SKIP_("This test is for delta only"); + } + initialize(); + const auto cds_type_url = Config::getTypeUrl(); + const auto sds_type_url = + Config::getTypeUrl(); + const auto lds_type_url = Config::getTypeUrl(); + + // First get the cluster sent to the test proxy + envoy::config::core::v3::TransportSocket sds_transport_socket; + TestUtility::loadFromYaml(R"EOF( + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + common_tls_context: + validation_context_sds_secret_config: + name: validation_context + sds_config: + resource_api_version: V3 + ads: {} + )EOF", + sds_transport_socket); + auto cluster = ConfigHelper::buildStaticCluster("cluster", 8000, "127.0.0.1"); + *cluster.mutable_transport_socket() = sds_transport_socket; + cluster.set_name("cluster_0"); + + // Initial * Delta ADS subscription + EXPECT_TRUE(compareDeltaDiscoveryRequest(cds_type_url, {}, {})); + sendDeltaDiscoveryResponse(cds_type_url, {cluster}, {}, "1"); + + // The cluster needs this secret, so it's going to request it + EXPECT_TRUE(compareDeltaDiscoveryRequest(sds_type_url, {"validation_context"}, {}, {})); + + // Ack the original CDS sub + EXPECT_TRUE(compareDeltaDiscoveryRequest(cds_type_url, {}, {})); + + // Before we send the secret, we'll send a delta removal to make sure we don't get a NACK + sendDeltaDiscoveryResponse( + sds_type_url, {}, {"validation_context"}, "2"); + + // Ack the removal + EXPECT_TRUE(compareDeltaDiscoveryRequest(sds_type_url, {}, {})); + + // Cluster should still be warming + test_server_->waitForGaugeGe("cluster_manager.warming_clusters", 1); + test_server_->waitForGaugeEq("cluster.cluster_0.warming_state", 1); + test_server_->waitForCounterEq("cluster_manager.cluster_removed", 0); + + envoy::extensions::transport_sockets::tls::v3::Secret validation_context; + TestUtility::loadFromYaml(fmt::format(R"EOF( + name: validation_context + validation_context: + trusted_ca: + filename: {} + )EOF", + TestEnvironment::runfilesPath( + "test/config/integration/certs/upstreamcacert.pem")), + validation_context); + + // Now actually send the secret + sendDeltaDiscoveryResponse( + sds_type_url, {validation_context}, {}, "2"); + + // we're no longer warming + test_server_->waitForGaugeEq("cluster_manager.warming_clusters", 0); + // Ack the original LDS (now that the cluster is warmed?) + EXPECT_TRUE(compareDeltaDiscoveryRequest(lds_type_url, {}, {})); + // Ack the secret we just sent + EXPECT_TRUE(compareDeltaDiscoveryRequest(sds_type_url, {}, {})); + + // Remove the cluster that owns the secret + sendDeltaDiscoveryResponse(cds_type_url, {}, {"cluster_0"}, + "1"); + // Follow that up with a secret removal + sendDeltaDiscoveryResponse( + sds_type_url, {}, {"validation_context"}, "3"); + test_server_->waitForCounterEq("cluster_manager.cluster_removed", 1); + // Ack the CDS removal + EXPECT_TRUE(compareDeltaDiscoveryRequest(cds_type_url, {}, {})); + // Should be an ACK, not a NACK since the SDS removal is ignored + EXPECT_TRUE(compareDeltaDiscoveryRequest(sds_type_url, {}, {})); +} + // Make sure two clusters sharing same secret are both kept warming before secret // arrives. Verify that the clusters are eventually initialized. // This is a regression test of #11120. From 9f349196f3024ec6a580167f23a7ef06b3ed3b3d Mon Sep 17 00:00:00 2001 From: Keith Mattix II Date: Mon, 25 Mar 2024 16:25:25 +0000 Subject: [PATCH 2/5] Add comment to test Signed-off-by: Keith Mattix II --- test/integration/ads_integration_test.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/integration/ads_integration_test.cc b/test/integration/ads_integration_test.cc index e2d619b57c3c..2e31bf3c38b3 100644 --- a/test/integration/ads_integration_test.cc +++ b/test/integration/ads_integration_test.cc @@ -236,8 +236,10 @@ TEST_P(AdsIntegrationTest, ClusterInitializationUpdateOneOfThe2Warming) { test_server_->waitForGaugeGe("cluster_manager.active_clusters", 4); } +// Verify that Delta SDS Removals don't result in a NACK. TEST_P(AdsIntegrationTest, DeltaSdsRemovals) { - // This test is for delta only + // SOTW delivery doesn't track individual resources, so only + // run this test for delta xDS. if (sotw_or_delta_ != Grpc::SotwOrDelta::Delta && sotw_or_delta_ != Grpc::SotwOrDelta::UnifiedDelta) { GTEST_SKIP_("This test is for delta only"); From c1d54ab9242f53b113d05988de52f0b712efee32 Mon Sep 17 00:00:00 2001 From: Keith Mattix II Date: Mon, 25 Mar 2024 16:34:29 +0000 Subject: [PATCH 3/5] Update validateUpdateSize impl Signed-off-by: Keith Mattix II --- source/common/secret/sds_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/secret/sds_api.cc b/source/common/secret/sds_api.cc index a9dc19855a62..2ded978ac8f5 100644 --- a/source/common/secret/sds_api.cc +++ b/source/common/secret/sds_api.cc @@ -172,7 +172,7 @@ void SdsApi::onConfigUpdateFailed(Envoy::Config::ConfigUpdateFailureReason reaso } absl::Status SdsApi::validateUpdateSize(uint32_t added_resources_num, - uint32_t removed_resources_num) { + uint32_t removed_resources_num) const { if (added_resources_num == 0 && removed_resources_num == 0) { return absl::InvalidArgumentError( fmt::format("Missing SDS resources for {} in onConfigUpdate()", sds_config_name_)); From 89d1faaeedc79c3ea5b6c5a2ad5b68fad88f14bf Mon Sep 17 00:00:00 2001 From: Keith Mattix II Date: Fri, 29 Mar 2024 12:32:47 +0000 Subject: [PATCH 4/5] Add clarifying comments Signed-off-by: Keith Mattix II --- source/common/secret/sds_api.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/source/common/secret/sds_api.cc b/source/common/secret/sds_api.cc index 2ded978ac8f5..3afab7e6dfc8 100644 --- a/source/common/secret/sds_api.cc +++ b/source/common/secret/sds_api.cc @@ -153,8 +153,9 @@ SdsApi::onConfigUpdate(const std::vector& added_reso } if (removed_resources.size() == 1) { - // We only removed a resource here; don't go through the SotW init process - // and expect this to be cleaned up by the owning cluster or listener. + // SDS is a singleton (e.g. single-resource) resource subscription, so it should never be + // removed except by the modification of the referenced cluster/listener. Therefore, since the + // server indicates a removal, ignore it (via an ACK). ENVOY_LOG_MISC( trace, "Server sent a delta SDS update attempting to remove a resource (name: {}). Ignoring.", @@ -177,6 +178,11 @@ absl::Status SdsApi::validateUpdateSize(uint32_t added_resources_num, return absl::InvalidArgumentError( fmt::format("Missing SDS resources for {} in onConfigUpdate()", sds_config_name_)); } + + // This conditional technically allows a response with added=1 removed=1 + // which is nonsensical since SDS is a singleton resource subscription. + // It is, however, preferred to ignore these nonsensical responses rather + // than NACK them, so it is allowed here. if (added_resources_num > 1 || removed_resources_num > 1) { return absl::InvalidArgumentError( fmt::format("Unexpected SDS secrets length for {}, number of added resources " From 63c646cc39fa8f13a146a89ef3b0409d5ec1b5c8 Mon Sep 17 00:00:00 2001 From: Keith Mattix II Date: Fri, 29 Mar 2024 16:58:04 +0000 Subject: [PATCH 5/5] Add changelog entry Signed-off-by: Keith Mattix II --- changelogs/current.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 9ecf88138ad4..3c3d8f04dfc4 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -94,6 +94,10 @@ minor_behavior_changes: Disables recvmmsg (multi-message) for reading packets from a client QUIC UDP socket, if GRO is not set or not supported. recvmsg will be used instead. This behavior change can be reverted by setting ``envoy.reloadable_features.disallow_quic_client_udp_mmsg`` to ``false``. +- area: xds + change: | + Delta SDS removals will no longer result in a "Missing SDS resources" NACK + and instead will be ignored. bug_fixes: # *Changes expected to improve the state of the world and are unlikely to have negative effects*