Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sds: Ignore Delta Removals #32961

Merged
merged 5 commits into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ minor_behavior_changes:
Disables recvmmsg (multi-message) for reading packets from a client QUIC UDP socket, if GRO
is not set or not supported. recvmsg will be used instead. This behavior change can be
reverted by setting ``envoy.reloadable_features.disallow_quic_client_udp_mmsg`` to ``false``.
- area: xds
change: |
Delta SDS removals will no longer result in a "Missing SDS resources" NACK
and instead will be ignored.

bug_fixes:
# *Changes expected to improve the state of the world and are unlikely to have negative effects*
Expand Down
38 changes: 29 additions & 9 deletions source/common/secret/sds_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void SdsApi::onWatchUpdate() {

absl::Status SdsApi::onConfigUpdate(const std::vector<Config::DecodedResourceRef>& resources,
const std::string& version_info) {
const absl::Status status = validateUpdateSize(resources.size());
const absl::Status status = validateUpdateSize(resources.size(), 0);
if (!status.ok()) {
return status;
}
Expand Down Expand Up @@ -143,13 +143,25 @@ absl::Status SdsApi::onConfigUpdate(const std::vector<Config::DecodedResourceRef
return absl::OkStatus();
}

absl::Status SdsApi::onConfigUpdate(const std::vector<Config::DecodedResourceRef>& added_resources,
const Protobuf::RepeatedPtrField<std::string>&,
const std::string&) {
const absl::Status status = validateUpdateSize(added_resources.size());
absl::Status
SdsApi::onConfigUpdate(const std::vector<Config::DecodedResourceRef>& added_resources,
const Protobuf::RepeatedPtrField<std::string>& removed_resources,
const std::string&) {
const absl::Status status = validateUpdateSize(added_resources.size(), removed_resources.size());
if (!status.ok()) {
return status;
}

if (removed_resources.size() == 1) {
// SDS is a singleton (e.g. single-resource) resource subscription, so it should never be
// removed except by the modification of the referenced cluster/listener. Therefore, since the
// server indicates a removal, ignore it (via an ACK).
ENVOY_LOG_MISC(
trace,
"Server sent a delta SDS update attempting to remove a resource (name: {}). Ignoring.",
removed_resources[0]);
return absl::OkStatus();
}
return onConfigUpdate(added_resources, added_resources[0].get().version());
}

Expand All @@ -160,14 +172,22 @@ void SdsApi::onConfigUpdateFailed(Envoy::Config::ConfigUpdateFailureReason reaso
init_target_.ready();
}

absl::Status SdsApi::validateUpdateSize(int num_resources) {
if (num_resources == 0) {
absl::Status SdsApi::validateUpdateSize(uint32_t added_resources_num,
uint32_t removed_resources_num) const {
if (added_resources_num == 0 && removed_resources_num == 0) {
return absl::InvalidArgumentError(
fmt::format("Missing SDS resources for {} in onConfigUpdate()", sds_config_name_));
}
if (num_resources != 1) {

// This conditional technically allows a response with added=1 removed=1
// which is nonsensical since SDS is a singleton resource subscription.
// It is, however, preferred to ignore these nonsensical responses rather
// than NACK them, so it is allowed here.
if (added_resources_num > 1 || removed_resources_num > 1) {
return absl::InvalidArgumentError(
fmt::format("Unexpected SDS secrets length: {}", num_resources));
fmt::format("Unexpected SDS secrets length for {}, number of added resources "
"{}, number of removed resources {}. Expected sum is 1",
sds_config_name_, added_resources_num, removed_resources_num));
}
return absl::OkStatus();
keithmattix marked this conversation as resolved.
Show resolved Hide resolved
}
Expand Down
3 changes: 2 additions & 1 deletion source/common/secret/sds_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ class SdsApi : public Envoy::Config::SubscriptionBase<
Api::Api& api_;

private:
absl::Status validateUpdateSize(int num_resources);
absl::Status validateUpdateSize(uint32_t added_resources_num,
uint32_t removed_resources_num) const;
void initialize();
FileContentMap loadFiles();
uint64_t getHashForFiles(const FileContentMap& files);
Expand Down
15 changes: 10 additions & 5 deletions test/common/secret/sds_api_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -550,17 +550,20 @@ TEST_F(SdsApiTest, Delta) {
EXPECT_CALL(sds, onConfigUpdate(DecodedResourcesEq(resources), "version1"));
EXPECT_TRUE(subscription_factory_.callbacks_->onConfigUpdate(resources, {}, "ignored").ok());

// An attempt to remove a resource logs an error, but otherwise just carries on (ignoring the
// removal attempt).
// An attempt to remove a resource while adding a resource should not error.
auto secret_again = std::make_unique<envoy::extensions::transport_sockets::tls::v3::Secret>();
secret_again->set_name("secret_1");
Config::DecodedResourceImpl resource_v2(std::move(secret_again), "name", {}, "version2");
std::vector<Config::DecodedResourceRef> resources_v2{resource_v2};
EXPECT_CALL(sds, onConfigUpdate(DecodedResourcesEq(resources_v2), "version2"));
Protobuf::RepeatedPtrField<std::string> removals;
*removals.Add() = "route_0";
EXPECT_TRUE(
subscription_factory_.callbacks_->onConfigUpdate(resources_v2, removals, "ignored").ok());
removals.RemoveLast();

// An attempt to remove a resource without adding another resource should also not error
*removals.Add() = "route_1";
EXPECT_TRUE(subscription_factory_.callbacks_->onConfigUpdate({}, removals, "ignored").ok());
}

// Tests SDS's use of the delta variant of onConfigUpdate().
Expand Down Expand Up @@ -833,11 +836,13 @@ TEST_F(SdsApiTest, SecretUpdateWrongSize) {
initialize();
EXPECT_EQ(
subscription_factory_.callbacks_->onConfigUpdate(decoded_resources.refvec_, "").message(),
"Unexpected SDS secrets length: 2");
"Unexpected SDS secrets length for abc.com, number of added resources "
"2, number of removed resources 0. Expected sum is 1");
Protobuf::RepeatedPtrField<std::string> unused;
EXPECT_EQ(subscription_factory_.callbacks_->onConfigUpdate(decoded_resources.refvec_, unused, "")
.message(),
"Unexpected SDS secrets length: 2");
"Unexpected SDS secrets length for abc.com, number of added resources "
"2, number of removed resources 0. Expected sum is 1");
}

// Validate that SdsApi throws exception if secret name passed to onConfigUpdate()
Expand Down
89 changes: 89 additions & 0 deletions test/integration/ads_integration_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,95 @@ TEST_P(AdsIntegrationTest, ClusterInitializationUpdateOneOfThe2Warming) {
test_server_->waitForGaugeGe("cluster_manager.active_clusters", 4);
}

// Verify that Delta SDS Removals don't result in a NACK.
TEST_P(AdsIntegrationTest, DeltaSdsRemovals) {
adisuissa marked this conversation as resolved.
Show resolved Hide resolved
// SOTW delivery doesn't track individual resources, so only
// run this test for delta xDS.
if (sotw_or_delta_ != Grpc::SotwOrDelta::Delta &&
sotw_or_delta_ != Grpc::SotwOrDelta::UnifiedDelta) {
GTEST_SKIP_("This test is for delta only");
}
initialize();
const auto cds_type_url = Config::getTypeUrl<envoy::config::cluster::v3::Cluster>();
const auto sds_type_url =
Config::getTypeUrl<envoy::extensions::transport_sockets::tls::v3::Secret>();
const auto lds_type_url = Config::getTypeUrl<envoy::config::listener::v3::Listener>();

// First get the cluster sent to the test proxy
envoy::config::core::v3::TransportSocket sds_transport_socket;
TestUtility::loadFromYaml(R"EOF(
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
common_tls_context:
validation_context_sds_secret_config:
name: validation_context
sds_config:
resource_api_version: V3
ads: {}
)EOF",
sds_transport_socket);
auto cluster = ConfigHelper::buildStaticCluster("cluster", 8000, "127.0.0.1");
*cluster.mutable_transport_socket() = sds_transport_socket;
cluster.set_name("cluster_0");

// Initial * Delta ADS subscription
EXPECT_TRUE(compareDeltaDiscoveryRequest(cds_type_url, {}, {}));
sendDeltaDiscoveryResponse<envoy::config::cluster::v3::Cluster>(cds_type_url, {cluster}, {}, "1");

// The cluster needs this secret, so it's going to request it
EXPECT_TRUE(compareDeltaDiscoveryRequest(sds_type_url, {"validation_context"}, {}, {}));

// Ack the original CDS sub
EXPECT_TRUE(compareDeltaDiscoveryRequest(cds_type_url, {}, {}));

// Before we send the secret, we'll send a delta removal to make sure we don't get a NACK
sendDeltaDiscoveryResponse<envoy::extensions::transport_sockets::tls::v3::Secret>(
sds_type_url, {}, {"validation_context"}, "2");

// Ack the removal
EXPECT_TRUE(compareDeltaDiscoveryRequest(sds_type_url, {}, {}));

// Cluster should still be warming
test_server_->waitForGaugeGe("cluster_manager.warming_clusters", 1);
test_server_->waitForGaugeEq("cluster.cluster_0.warming_state", 1);
test_server_->waitForCounterEq("cluster_manager.cluster_removed", 0);

envoy::extensions::transport_sockets::tls::v3::Secret validation_context;
TestUtility::loadFromYaml(fmt::format(R"EOF(
name: validation_context
validation_context:
trusted_ca:
filename: {}
)EOF",
TestEnvironment::runfilesPath(
"test/config/integration/certs/upstreamcacert.pem")),
validation_context);

// Now actually send the secret
sendDeltaDiscoveryResponse<envoy::extensions::transport_sockets::tls::v3::Secret>(
sds_type_url, {validation_context}, {}, "2");

// we're no longer warming
test_server_->waitForGaugeEq("cluster_manager.warming_clusters", 0);
// Ack the original LDS (now that the cluster is warmed?)
EXPECT_TRUE(compareDeltaDiscoveryRequest(lds_type_url, {}, {}));
// Ack the secret we just sent
EXPECT_TRUE(compareDeltaDiscoveryRequest(sds_type_url, {}, {}));

// Remove the cluster that owns the secret
sendDeltaDiscoveryResponse<envoy::config::cluster::v3::Cluster>(cds_type_url, {}, {"cluster_0"},
"1");
// Follow that up with a secret removal
sendDeltaDiscoveryResponse<envoy::extensions::transport_sockets::tls::v3::Secret>(
sds_type_url, {}, {"validation_context"}, "3");
test_server_->waitForCounterEq("cluster_manager.cluster_removed", 1);
// Ack the CDS removal
EXPECT_TRUE(compareDeltaDiscoveryRequest(cds_type_url, {}, {}));
// Should be an ACK, not a NACK since the SDS removal is ignored
EXPECT_TRUE(compareDeltaDiscoveryRequest(sds_type_url, {}, {}));
}

// Make sure two clusters sharing same secret are both kept warming before secret
// arrives. Verify that the clusters are eventually initialized.
// This is a regression test of #11120.
Expand Down
Loading