diff --git a/configurations/nemesis/additional_configs/docker_backend_local.yaml b/configurations/nemesis/additional_configs/docker_backend_local.yaml index 2dd5ebb002..8da79b042f 100644 --- a/configurations/nemesis/additional_configs/docker_backend_local.yaml +++ b/configurations/nemesis/additional_configs/docker_backend_local.yaml @@ -1,8 +1,8 @@ test_duration: 90 prepare_write_cmd: - - "cassandra-stress write cl=QUORUM n=1048576 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=80 -pop seq=1..5048570 -col 'n=FIXED(8) size=FIXED(128)' -log interval=5" - - "scylla-bench -workload=sequential -mode=write -replication-factor=3 -partition-count=10000 -clustering-row-count=100 -clustering-row-size=uniform:128..2048 -concurrency=10 -connection-count=10 -consistency-level=quorum -rows-per-request=10 -timeout=30s -validate-data" + - "cassandra-stress write cl=QUORUM n=1048576 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=30 throttle=2000/s -pop seq=1..5048570 -col 'n=FIXED(8) size=FIXED(128)' -log interval=5" + - "scylla-bench -workload=sequential -mode=write -replication-factor=3 -partition-count=10000 -clustering-row-count=100 -clustering-row-size=uniform:128..2048 -concurrency=10 -connection-count=10 -consistency-level=quorum -rows-per-request=10 -timeout=30s -validate-data -max-rate=1000" stress_cmd: - "cassandra-stress write cl=QUORUM duration=60m -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=30 throttle=2000/s -pop seq=1..5048570 -col 'n=FIXED(8) size=FIXED(128)' -log interval=5" @@ -21,7 +21,7 @@ nemesis_filter_seeds: false nemesis_during_prepare: true # NOTE: the parameters reduce footprint of scylla cluster for docker backend, comparing to default values -append_scylla_args: '--smp 1 --memory 5G' +append_scylla_args: '--smp 1 --memory 2G' use_mgmt: false monitor_swap_size: 0 @@ -29,3 +29,9 @@ monitor_swap_size: 0 # https://github.com/scylladb/scylla-cluster-tests/issues/7287 is fixed server_encrypt: false client_encrypt: false + +# TODO: remove this when we'll run this in jenkins +enable_argus: false + +# cause of issue https://github.com/scylladb/scylla-monitoring/issues/2246, we should use the older monitoring version +monitor_branch: 'branch-4.5' diff --git a/sdcm/cluster.py b/sdcm/cluster.py index 538da11710..798f5deea0 100644 --- a/sdcm/cluster.py +++ b/sdcm/cluster.py @@ -12,6 +12,8 @@ # Copyright (c) 2016 ScyllaDB # pylint: disable=too-many-lines +from __future__ import annotations + import contextlib import queue import logging @@ -2602,7 +2604,7 @@ def check_node_health(self, retries: int = CHECK_NODE_HEALTH_RETRIES) -> None: CHECK_NODE_HEALTH_RETRY_DELAY, self.name) time.sleep(CHECK_NODE_HEALTH_RETRY_DELAY) - def get_nodes_status(self): + def get_nodes_status(self) -> dict[BaseNode, dict]: nodes_status = {} try: statuses = self.parent_cluster.get_nodetool_status(verification_node=self) @@ -2660,7 +2662,7 @@ def get_peers_info(self): return peers_details @retrying(n=5, sleep_time=10, raise_on_exceeded=False) - def get_gossip_info(self): + def get_gossip_info(self) -> dict[BaseNode, dict]: gossip_info = self.run_nodetool('gossipinfo', verbose=False, warning_event_on_exception=(Exception,), publish_event=False) LOGGER.debug("get_gossip_info: %s", gossip_info) @@ -3180,7 +3182,7 @@ def tags(self) -> Dict[str, str]: def dead_nodes_ip_address_list(self): return [node.ip_address for node in self.dead_nodes_list] - def get_ip_to_node_map(self): + def get_ip_to_node_map(self) -> dict[str, BaseNode]: """returns {ip: node} map for all nodes in cluster to get node by ip""" return {ip: node for node in self.nodes for ip in node.get_all_ip_addresses()} @@ -3219,8 +3221,12 @@ def get_rack_names_per_datacenter_and_rack_idx(self, db_nodes: list[BaseNode] | db_nodes = db_nodes if db_nodes else self.nodes status = db_nodes[0].get_nodes_status() + # intersection of asked nodes and nodes returned by nodetool status + # since topology might change during this command execution + actual_db_nodes = set(status.keys()).intersection(db_nodes) + rack_names_mapping = {} - for (region, rack), nodes in self.nodes_by_racks_idx_and_regions(nodes=db_nodes).items(): + for (region, rack), nodes in self.nodes_by_racks_idx_and_regions(nodes=actual_db_nodes).items(): rack_names_mapping[(region, rack)] = status[nodes[0]]['rack'] return rack_names_mapping diff --git a/sdcm/utils/remote_logger.py b/sdcm/utils/remote_logger.py index eed47dc7be..1740dd57d9 100644 --- a/sdcm/utils/remote_logger.py +++ b/sdcm/utils/remote_logger.py @@ -275,13 +275,6 @@ def __init__(self, node, target_log_file: str): super().__init__(target_log_file) -class DockerScyllaLogger(CommandNodeLoggerBase): - # pylint: disable=invalid-overridden-method - @cached_property - def _logger_cmd(self) -> str: - return f'docker logs -f {self._node.name} 2>&1 | grep scylla >>{self._target_log_file}' - - class DockerGeneralLogger(CommandNodeLoggerBase): # pylint: disable=invalid-overridden-method @cached_property @@ -577,8 +570,6 @@ def _logger_cmd(self) -> str: def get_system_logging_thread(logs_transport, node, target_log_file): # pylint: disable=too-many-return-statements if logs_transport == 'docker': - if 'db-node' in node.name: - return DockerScyllaLogger(node, target_log_file) return DockerGeneralLogger(node, target_log_file) if logs_transport == 'kubectl': return KubectlGeneralLogger(node, target_log_file) diff --git a/sdcm/utils/remotewebbrowser.py b/sdcm/utils/remotewebbrowser.py index 2a3502e169..478f8a6cba 100644 --- a/sdcm/utils/remotewebbrowser.py +++ b/sdcm/utils/remotewebbrowser.py @@ -64,7 +64,8 @@ def web_driver_docker_client(self) -> Optional[DockerClient]: class RemoteBrowser: def __init__(self, node, use_tunnel=True): self.node = node - self.use_tunnel = bool(self.node.ssh_login_info and use_tunnel) + backend = self.node.parent_cluster.params.get("cluster_backend") + self.use_tunnel = bool(self.node.ssh_login_info and use_tunnel and backend not in ('docker',)) @cached_property def browser(self):