From 3c49cd291b363c4b3d4834d55948bd2e052c86e9 Mon Sep 17 00:00:00 2001 From: Julia Yakovlev Date: Tue, 4 Jun 2024 17:58:58 +0300 Subject: [PATCH] fix(longevity): get table for 'add_drop_column' If run disrupt_add_remove_dc nemesis in parallel to the disrupt_add_drop_column one then we can get following error: KeyError: 'keyspace_new_dc' It is caused by the concurrency of a new keyspace addition (disrupt_add_remove_dc nemesis) and driver session update in addition to the unsafe coding assuming driver's session (disrupt_add_drop_column nemesis) knows about that newly added keyspace. Another possible problem: new added keyspace was dropped in exacly time when we run describe of the table. Fixes: https://github.com/scylladb/scylla-cluster-tests/issues/7240 --- sdcm/nemesis.py | 7 +++++-- sdcm/utils/common.py | 23 ++++++++++++++++------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/sdcm/nemesis.py b/sdcm/nemesis.py index b817932688..e7402b5eb7 100644 --- a/sdcm/nemesis.py +++ b/sdcm/nemesis.py @@ -2040,7 +2040,7 @@ def _get_all_tables_with_no_compact_storage(self, tables_to_skip=None): if tables_to_skip is None: tables_to_skip = {} to_be_skipped_default = tables_to_skip.get('*', '').split(',') - with self.cluster.cql_connection_patient(self.tester.db_cluster.nodes[0]) as session: + with self.cluster.cql_connection_patient(self.target_node) as session: query_result = session.execute('SELECT keyspace_name FROM system_schema.keyspaces;') for result_rows in query_result: keyspaces.extend([row.lower() @@ -2055,7 +2055,10 @@ def _get_all_tables_with_no_compact_storage(self, tables_to_skip=None): to_be_skipped = [] else: to_be_skipped = to_be_skipped.split(',') + to_be_skipped_default - tables = get_db_tables(session, ks, with_compact_storage=False) + tables = get_db_tables(session=session, + keyspace_name=ks, + node=self.target_node, + with_compact_storage=False) if to_be_skipped: tables = [table for table in tables if table not in to_be_skipped] if not tables: diff --git a/sdcm/utils/common.py b/sdcm/utils/common.py index 784e07842f..d1bbfe8c03 100644 --- a/sdcm/utils/common.py +++ b/sdcm/utils/common.py @@ -54,6 +54,7 @@ import requests import boto3 +from invoke import UnexpectedExit from mypy_boto3_s3 import S3Client, S3ServiceResource from mypy_boto3_ec2 import EC2Client, EC2ServiceResource from mypy_boto3_ec2.service_resource import Image as EC2Image @@ -67,6 +68,7 @@ from sdcm.provision.aws.capacity_reservation import SCTCapacityReservation from sdcm.provision.azure.provisioner import AzureProvisioner +from sdcm.remote.libssh2_client import UnexpectedExit as Libssh2_UnexpectedExit from sdcm.sct_events import Severity from sdcm.sct_events.system import CpuNotHighEnoughEvent, SoftTimeoutEvent from sdcm.utils.argus import ArgusError, get_argus_client, terminate_resource_in_argus @@ -2106,7 +2108,7 @@ def get_ami_tags(ami_id, region_name): return {} -def get_db_tables(session, ks, with_compact_storage=True): +def get_db_tables(session, keyspace_name, node, with_compact_storage=True): """ Return tables from keystore based on their compact storage feature Arguments: @@ -2116,12 +2118,19 @@ def get_db_tables(session, ks, with_compact_storage=True): """ output = [] - for table in list(session.cluster.metadata.keyspaces[ks].tables.keys()): - table_code = session.cluster.metadata.keyspaces[ks].tables[table].as_cql_query() - if with_compact_storage is None: - output.append(table) - elif ("with compact storage" in table_code.lower()) == with_compact_storage: - output.append(table) + for row in list(session.execute(f"select table_name from system_schema.tables where keyspace_name='{keyspace_name}'")): + try: + create_table_statement = node.run_cqlsh(f"describe {keyspace_name}.{row.table_name}").stdout.upper() + except (UnexpectedExit, Libssh2_UnexpectedExit) as err: + # SCT issue https://github.com/scylladb/scylla-cluster-tests/issues/7240 + # May happen when disrupt_add_remove_dc nemesis run in parallel to the disrupt_add_drop_column + LOGGER.error("Failed to describe '%s.%s' table. Maybe the table has been deleted. Error: %s", + keyspace_name, row.table_name, err.result.stderr) + continue + + if with_compact_storage is None or (("WITH COMPACT STORAGE" in create_table_statement) == with_compact_storage): + output.append(row.table_name) + return output