From 7aa622a635c42af23b60511cdcbf2ab2f43f7ddb Mon Sep 17 00:00:00 2001 From: Evgeniy Naydanov Date: Mon, 25 Nov 2024 07:11:27 +0000 Subject: [PATCH] fix(upgrade_test): ignore topology change coordinator errors during upgrades The error messages reported in scylladb#20754 and scylladb#20950 can be ignored till a proper fix is merged, because it's mostly a bad UX. --- sdcm/sct_events/group_common_events.py | 30 ++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/sdcm/sct_events/group_common_events.py b/sdcm/sct_events/group_common_events.py index f1bb32e95ee..9d4c97140ea 100644 --- a/sdcm/sct_events/group_common_events.py +++ b/sdcm/sct_events/group_common_events.py @@ -97,6 +97,36 @@ def ignore_upgrade_schema_errors(): line="cql_server - exception while processing connection: seastar::nested_exception " "(seastar::nested_exception)", )) + if SkipPerIssues( + issues=[ + "https://github.com/scylladb/scylladb/issues/20754", + "https://github.com/scylladb/scylladb/issues/20950", + ], + params=TestConfig().tester_obj().params, + ): + # @piodul: + # + # The upgrade-to-view-build-status-on-raft procedure runs right after the VIEW_BUILD_STATUS_ON_GROUP0 + # feature is enabled. Enabling a cluster feature on raft requires all nodes to be alive. Most likely + # the node being restarted wasn't yet seen as such, but the upgrade procedure started anyway. + # + # The error is not critical. The topology coordinator node will retry the operation in short intervals + # until it succeeds. The operation shouldn't have any harmful side effects if it fails, so it's mostly + # bad UX because we can avoid the busywork and error messages by appropriately delaying the moment when + # the operation is executed. + # + # Therefore, it is OK to ignore this particular error until a proper fix is merged. + stack.enter_context(DbEventsFilter( + db_event=DatabaseLogEvent.DATABASE_ERROR, + line="raft_topology - topology change coordinator fiber got error exceptions::unavailable_exception" + " (Cannot achieve consistency level for cl ALL.", + )) + stack.enter_context(DbEventsFilter( + db_event=DatabaseLogEvent.RUNTIME_ERROR, + line="raft_topology - topology change coordinator fiber got error std::runtime_error" + " (raft topology: exec_global_command(barrier) failed with seastar::rpc::closed_error" + " (connection is closed))", + )) yield