From 00c604e4bfaf961f82853c2710aa8726c37ee97d Mon Sep 17 00:00:00 2001 From: Pawan Dhiran Date: Wed, 9 Oct 2024 15:12:58 +0530 Subject: [PATCH] Addition of tests for 3AZ Cluster Scenarios - Netsplit b/w DCs Signed-off-by: Pawan Dhiran --- ceph/rados/core_workflows.py | 235 ++++++++++++++ conf/squid/rados/3AZ-cluster.yaml | 42 ++- .../rados/tier-3_rados_test-3-AZ-Cluster.yaml | 38 +-- .../test_stretch_n-az_netsplit_scenarios.py | 286 ++++++++++++++++++ .../rados/test_stretch_netsplit_scenarios.py | 2 +- 5 files changed, 568 insertions(+), 35 deletions(-) create mode 100644 tests/rados/test_stretch_n-az_netsplit_scenarios.py diff --git a/ceph/rados/core_workflows.py b/ceph/rados/core_workflows.py index 28be98a171..0dce5f84f1 100644 --- a/ceph/rados/core_workflows.py +++ b/ceph/rados/core_workflows.py @@ -11,8 +11,10 @@ import datetime import json +import math import re import time +from collections import namedtuple from ceph.ceph_admin import CephAdmin from ceph.parallel import parallel @@ -2271,6 +2273,239 @@ def run_pool_sanity_check(self): log.info("Completed check on the cluster. Pass!") return True + def create_n_az_stretch_pool( + self, + pool_name: str, + rule_name: str, + rule_id: int, + peer_bucket_barrier: str = "datacenter", + num_sites: int = 3, + num_copies_per_site: int = 2, + total_buckets: int = 3, + req_peering_buckets: int = 2, + ) -> bool: + """Method to create a replicated pool and enable stretch mode on the pool + + Note: Most of the params have a default value. when created with defaults, pool is crated for 3AZ cluster, + with 2 copies per site. + Args: + pool_name: name of the pool + rule_id: rule ID + rule_name: rule name + peer_bucket_barrier: Crush level at which failures are accepted + num_sites: number of "peer_bucket_barrier"s the data should be stored. + eg : data has to be stored acorss 3 DCs. num_sites is 3 + num_copies_per_site: number of copies of data to be stored in each site + total_buckets: total no of "peer_bucket_barrier" present on cluster. + note: In most cases, total_buckets = num_sites. this changes when CU does not want each site to + hold data copy + req_peering_buckets: number of "peer_bucket_barrier" buckets to perform successful peering process + Returns: + bool. Pass -> True, Fail -> False + """ + + # Creating test pool to check the effect of Netsplit scenarios on the Pool IO + if not self.create_pool(pool_name=pool_name): + log.error(f"Failed to create pool : {pool_name}") + return False + + rules = f"""id {rule_id} +type replicated +step take default +step choose firstn {num_sites} type {peer_bucket_barrier} +step chooseleaf firstn {num_copies_per_site} type host +step emit""" + log.debug(f"Rule to be added :\n {rules}\n") + + if not self.add_custom_crush_rules(rule_name=rule_name, rules=rules): + log.error("Failed to add the new crush rule") + return False + + size = num_sites * num_copies_per_site + min_size = math.ceil(size / 2) + + # Enabling stretch mode on the pool + if not self.enable_nsite_stretch_pool( + pool_name=pool_name, + peering_crush_bucket_count=req_peering_buckets, + peering_crush_bucket_target=total_buckets, + peering_crush_bucket_barrier=peer_bucket_barrier, + crush_rule=rule_name, + size=size, + min_size=min_size, + ): + log.error(f"Unable to enable stretch mode on the pool : {pool_name}") + return False + + def get_multi_az_stretch_site_hosts( + self, num_data_sites, stretch_bucket: str = "datacenter" + ) -> tuple: + """ + Method to get the site hosts from the stretch cluster + Uses osd tree and mon dump commands to prepare a set of all the hosts from each DC. + Args: + num_data_sites: number of data sites in the cluster + stretch_bucket: bucket level at which the stretch rules are set + Returns: + Hosts: A named tuple containing information about the hosts. + - {site_name} (list): A list of hosts in the respective data center. + """ + + # Getting the CRUSH buckets added into the cluster via osd tree + osd_tree_cmd = "ceph osd tree" + buckets = self.run_ceph_command(cmd=osd_tree_cmd) + dc_buckets = [d for d in buckets["nodes"] if d.get("type") == stretch_bucket] + dc_names = [name["name"] for name in dc_buckets] + log.debug(f"DC names obtained from OSD tree : {dc_names}, count : {len(dc_names)}") + + # Dynamically create named tuple fields based on data center names (site names) + fields = [dc["name"] for dc in dc_buckets[:num_data_sites]] + + # Create a namedtuple class dynamically based on the site names + Hosts = namedtuple("Hosts", fields) + + # Initialize all fields with empty lists + hosts = Hosts(**{field: [] for field in fields}) + + # Fetching the Mon daemon placement in each CRUSH location + def get_mon_from_dc(site_name) -> list: + """ + Returns the list of dictionaries that are part of the site_name passed. + Args: + site_name: Name of the site, whose mons have to be fetched. + Return: + List of dictionaries that are present in a particular site. + """ + mon_dump = "ceph mon dump" + mons = self.run_ceph_command(cmd=mon_dump) + site_mons = [ + d + for d in mons["mons"] + if d.get("crush_location") + == "{" + stretch_bucket + "=" + site_name + "}" + ] + return site_mons + + for i in range(num_data_sites): + dc = dc_buckets.pop() + dc_name = dc["name"] # Use the actual data center name (site name) + osd_hosts = [] + + # Fetching the OSD hosts of the DCs + for crush_id in dc["children"]: + for entry in buckets["nodes"]: + if entry.get("id") == crush_id: + osd_hosts.append(entry.get("name")) + + # Fetch MON hosts for the site + dc_mons = [ + entry.get("name") for entry in get_mon_from_dc(site_name=dc_name) + ] + + # Combine each DC's OSD & MON hosts and update the respective field in the namedtuple + combined_hosts = list(set(osd_hosts + dc_mons)) + field_name = dc_name # Use the site name as the field name + + # Using _replace to update the field + hosts = hosts._replace(**{field_name: combined_hosts}) + + log.debug(f"Hosts present in Datacenter : {dc_name} : {combined_hosts}") + + log.info(f"Hosts present in Cluster : {hosts}") + return hosts + + def enable_nsite_stretch_pool( + self, + pool_name, + peering_crush_bucket_count, + peering_crush_bucket_target, + peering_crush_bucket_barrier, + crush_rule, + size, + min_size, + ) -> bool: + """ + Module to enable stretch mode on the pools in a multi AZ setup + Args: + pool_name: name of the pool + peering_crush_bucket_count: number of buckets for peering to happen + peering_crush_bucket_target: number of peering buckets + peering_crush_bucket_barrier: CRUSH object used for various AZs + crush_rule: name of the crush rule. Make sure the crush rule already exists on the cluster + size: size for the pool + min_size: min_size for the pool + """ + cmd = ( + f"ceph osd pool stretch set {pool_name} {peering_crush_bucket_count} {peering_crush_bucket_target} " + f"{peering_crush_bucket_barrier} {crush_rule} {size} {min_size}" + ) + + try: + self.run_ceph_command(cmd=cmd) + time.sleep(5) + log.debug(f"Checking if the stretch mode op the pool : {pool_name}") + cmd = f"ceph osd pool stretch show {pool_name}" + out = self.run_ceph_command(cmd=cmd) + log.debug(out) + return True + except Exception as err: + log.error( + f"hit exception while enabling/ checking stretch pool details. Error : {err}" + ) + return False + + def add_custom_crush_rules(self, rule_name: str, rules: str) -> bool: + """ + Adds the given crush rules into the crush map + Args: + rule_name: Name of the crush rule to add + rules: The rules for crush + Returns: True -> pass, False -> fail + """ + try: + # Getting the crush map + cmd = "ceph osd getcrushmap > /tmp/crush.map.bin" + self.client.exec_command(cmd=cmd, sudo=True) + + # changing it to text for editing + cmd = "crushtool -d /tmp/crush.map.bin -o /tmp/crush.map.txt" + self.client.exec_command(cmd=cmd, sudo=True) + + # Adding the crush rules into the file + cmd = f"""cat <> /tmp/crush.map.txt +rule {rule_name} {"{"} +{rules} +{"}"} +EOF""" + log.debug(f"Command to add crush rules : \n {cmd} \n") + self.client.exec_command(cmd=cmd, sudo=True) + + # Changing back the text file into bin + cmd = "crushtool -c /tmp/crush.map.txt -o /tmp/crush2.map.bin" + self.client.exec_command(cmd=cmd, sudo=True) + + # Setting the new crush map + cmd = "ceph osd setcrushmap -i /tmp/crush2.map.bin" + self.client.exec_command(cmd=cmd, sudo=True) + + time.sleep(5) + + out = self.run_ceph_command(cmd="ceph osd crush rule ls", client_exec=True) + if rule_name not in out: + log.error( + f"New rule added in the cluster is not listed in the cluster." + f"rule added : {rule_name}, \n" + f"rules present on cluster : {out}" + ) + return False + + log.info(f"Crush rule: {rule_name} added successfully") + return True + except Exception as err: + log.error("Failed to set the crush rules") + log.error(err) + return False + def check_inactive_pgs_on_pool(self, pool_name) -> bool: """ Method to check if the provided pool has any PGs in inactive state diff --git a/conf/squid/rados/3AZ-cluster.yaml b/conf/squid/rados/3AZ-cluster.yaml index f66d582cee..4db39e9352 100644 --- a/conf/squid/rados/3AZ-cluster.yaml +++ b/conf/squid/rados/3AZ-cluster.yaml @@ -5,6 +5,8 @@ globals: - ceph-cluster: name: ceph node1: + networks: + - provider_net_cci_15 role: - _admin - mon @@ -15,68 +17,90 @@ globals: - prometheus - osd no-of-volumes: 4 - disk-size: 25 + disk-size: 15 node2: + networks: + - provider_net_cci_15 role: - mon - mgr - rgw - osd no-of-volumes: 4 - disk-size: 25 + disk-size: 15 node3: + networks: + - provider_net_cci_15 role: - osd - mon - mds no-of-volumes: 4 - disk-size: 25 + disk-size: 15 node4: + networks: + - provider_net_cci_13 role: - _admin - mon - mgr - osd + - alertmanager + - grafana + - prometheus no-of-volumes: 4 - disk-size: 25 + disk-size: 15 node5: + networks: + - provider_net_cci_13 role: - mon - mgr - rgw - osd no-of-volumes: 4 - disk-size: 25 + disk-size: 15 node6: + networks: + - provider_net_cci_13 role: - osd - mon - mds no-of-volumes: 4 - disk-size: 25 + disk-size: 15 node7: + networks: + - provider_net_cci_16 role: - _admin - mon - mgr - osd + - alertmanager + - grafana + - prometheus no-of-volumes: 4 - disk-size: 25 + disk-size: 15 node8: + networks: + - provider_net_cci_16 role: - mon - mgr - rgw - osd no-of-volumes: 4 - disk-size: 25 + disk-size: 15 node9: + networks: + - provider_net_cci_16 role: - osd - mon - mds no-of-volumes: 4 - disk-size: 25 + disk-size: 15 node10: role: - client diff --git a/suites/squid/rados/tier-3_rados_test-3-AZ-Cluster.yaml b/suites/squid/rados/tier-3_rados_test-3-AZ-Cluster.yaml index 9e0583ca52..8ddd4b04b8 100644 --- a/suites/squid/rados/tier-3_rados_test-3-AZ-Cluster.yaml +++ b/suites/squid/rados/tier-3_rados_test-3-AZ-Cluster.yaml @@ -105,31 +105,6 @@ tests: args: # display OSD tree - "ceph osd tree" - - test: - name: MDS Service deployment with spec - desc: Add MDS services using spec file - module: test_cephadm.py - polarion-id: CEPH-83574728 - config: - steps: - - config: - command: shell - args: # arguments to ceph orch - - ceph - - fs - - volume - - create - - cephfs - - config: - command: apply_spec - service: orch - validate-spec-services: true - specs: - - service_type: mds - service_id: cephfs - placement: - label: mds - - test: name: RGW Service deployment desc: RGW Service deployment @@ -157,6 +132,7 @@ tests: node: node10 install_packages: - ceph-common + - ceph-base copy_admin_keyring: true # Copy admin keyring to node caps: # authorize client capabilities mon: "allow *" @@ -171,3 +147,15 @@ tests: config: log_to_file: true desc: Change config options to enable logging to file + + - test: + name: Netsplit Scenarios data-data sites + module: test_stretch_n-az_netsplit_scenarios.py + polarion-id: CEPH-83574979 + config: + pool_name: test_stretch_pool7 + stretch_bucket: datacenter + netsplit_site_1: DC1 + netsplit_site_2: DC3 + delete_pool: true + desc: Test stretch Cluster netsplit scenario between data site and arbiter site diff --git a/tests/rados/test_stretch_n-az_netsplit_scenarios.py b/tests/rados/test_stretch_n-az_netsplit_scenarios.py new file mode 100644 index 0000000000..bafca4bc88 --- /dev/null +++ b/tests/rados/test_stretch_n-az_netsplit_scenarios.py @@ -0,0 +1,286 @@ +""" +This test module is used to test net-split scenarios with recovery in the stretch pool environment - 3 AZ +includes: +1. Netsplit b/w data sites in a 3 AZ cluster with post test checks. + +""" + +import time + +from ceph.ceph_admin import CephAdmin +from ceph.rados.core_workflows import RadosOrchestrator +from ceph.rados.pool_workflows import PoolFunctions +from utility.log import Log + +log = Log(__name__) + + +def run(ceph_cluster, **kw): + """ + performs Netsplit scenarios in stretch mode + Args: + ceph_cluster (ceph.ceph.Ceph): ceph cluster + """ + + log.info(run.__doc__) + config = kw.get("config") + cephadm = CephAdmin(cluster=ceph_cluster, **config) + rados_obj = RadosOrchestrator(node=cephadm) + pool_obj = PoolFunctions(node=cephadm) + client_node = ceph_cluster.get_nodes(role="client")[0] + pool_name = config.get("pool_name", "test_stretch_io") + stretch_bucket = config.get("stretch_bucket", "datacenter") + netsplit_site_1 = config.get("netsplit_site_1", "DC1") + netsplit_site_2 = config.get("netsplit_site_2", "DC2") + set_debug = config.get("set_debug", False) + rule_name = config.get("rule_name", "3az_rule_netsplit") + cluster_nodes = ceph_cluster.get_nodes() + installer = ceph_cluster.get_nodes(role="installer")[0] + init_time, _ = installer.exec_command(cmd="sudo date '+%Y-%m-%d %H:%M:%S'") + log.debug(f"Initial time when test was started : {init_time}") + + try: + + osd_tree_cmd = "ceph osd tree" + buckets = rados_obj.run_ceph_command(osd_tree_cmd) + dc_buckets = [d for d in buckets["nodes"] if d.get("type") == stretch_bucket] + dc_names = [name["name"] for name in dc_buckets] + + if netsplit_site_1 not in dc_names and netsplit_site_2 not in dc_names: + log.error( + f"Passed DC names does not exist on the cluster." + f"DC's on cluster : {dc_names}" + f"Passed names : {netsplit_site_1} & {netsplit_site_2}" + ) + raise Exception("DC names not found to test netsplit") + + # Starting to flush IP table rules on all hosts + for host in cluster_nodes: + log.debug(f"Proceeding to flush iptable rules on host : {host.hostname}") + host.exec_command(sudo=True, cmd="iptables -F", long_running=True) + time.sleep(60) + + if not rados_obj.run_pool_sanity_check(): + log.error( + "Cluster PGs not in active + clean state before starting the tests" + ) + # raise Exception("Post execution checks failed on the Stretch cluster") + + # log cluster health + rados_obj.log_cluster_health() + + all_hosts = rados_obj.get_multi_az_stretch_site_hosts( + num_data_sites=len(dc_names), stretch_bucket=stretch_bucket + ) + for site in dc_names: + log.debug( + f"Hosts present in Datacenter : {site} : {getattr(all_hosts, site)}" + ) + + log.info( + f"Starting Netsplit scenario in the cluster B/W site {netsplit_site_1} & " + f"{netsplit_site_2}." + f" Pre-checks Passed and IP tables flushed on the cluster" + ) + + if set_debug: + log.debug("Setting up debug configs on the cluster for mon & osd") + rados_obj.run_ceph_command( + cmd="ceph config set mon debug_mon 30", client_exec=True + ) + rados_obj.run_ceph_command( + cmd="ceph config set osd debug_osd 20", client_exec=True + ) + + # Creating test pool to check the effect of Netsplit scenarios on the Pool IO + + if not rados_obj.create_n_az_stretch_pool( + pool_name=pool_name, + rule_name=rule_name, + rule_id=101, + peer_bucket_barrier=stretch_bucket, + num_sites=3, + num_copies_per_site=2, + total_buckets=3, + req_peering_buckets=2, + ): + log.error(f"Unable to Create/Enable stretch mode on the pool : {pool_name}") + raise Exception("Unable to enable stretch pool") + + # Sleeping for 10 seconds for pool to be populated in the cluster + time.sleep(10) + + # Collecting the init no of objects on the pool, before site down + pool_stat = rados_obj.get_cephdf_stats(pool_name=pool_name) + init_objects = pool_stat["stats"]["objects"] + log.debug( + f"initial number of objects on the pool : {pool_name} is {init_objects}" + ) + + # Starting test to induce netsplit b/w + log.debug( + f"Proceeding to induce netsplit scenario b/w the two data sites. Adding IPs of {netsplit_site_1} hosts" + f"into other site, i.e {netsplit_site_2} for blocking Incoming and Outgoing " + f"packets between the two sites" + ) + + for host1 in getattr(all_hosts, netsplit_site_1): + target_host_obj = rados_obj.get_host_object(hostname=host1) + if not target_host_obj: + log.error(f"target host : {host1} not found . Exiting...") + raise Exception("Test execution Failed") + log.debug( + f"Proceeding to add IPtables rules to block incoming - outgoing traffic to host {host1} " + ) + for host2 in getattr(all_hosts, netsplit_site_2): + source_host_obj = rados_obj.get_host_object(hostname=host2) + log.debug( + f"Proceeding to add IPtables rules to block incoming - outgoing traffic to host {host1} " + f"Applying rules on host : {host2}" + ) + if not source_host_obj: + log.error(f"Source host : {host2} not found . Exiting...") + if not rados_obj.block_in_out_packets_on_host( + source_host=source_host_obj, target_host=target_host_obj + ): + log.error( + f"Failed to add IPtable rules to block {host1} on {host2}" + ) + raise Exception("Test execution Failed") + + log.info( + f"Completed adding IPtable rules into all hosts of {netsplit_site_1} to {netsplit_site_2}" + ) + + # sleeping for 120 seconds for the DC to be identified as down and proceeding to next checks + time.sleep(120) + + # log cluster health + rados_obj.log_cluster_health() + + # Checking the health status of the cluster and the active alerts for site down + # These should be generated on the cluster + status_report = rados_obj.run_ceph_command(cmd="ceph report", client_exec=True) + ceph_health_status = list(status_report["health"]["checks"].keys()) + expected_health_warns = ( + "OSD_HOST_DOWN", + "OSD_DOWN", + "OSD_DATACENTER_DOWN", + "MON_DOWN", + ) + if not all(elem in ceph_health_status for elem in expected_health_warns): + log.error( + f"We do not have the expected health warnings generated on the cluster.\n" + f" Warns on cluster : {ceph_health_status}\n" + f"Expected Warnings : {expected_health_warns}\n" + ) + # raise execption() + + log.info( + f"The expected health warnings are generated on the cluster. Warnings : {ceph_health_status}" + ) + + log.debug( + "Checking is the cluster is marked degraded and " + "operating in degraded mode post Netsplit b/w data sites" + ) + + log.debug("sleeping for 4 minutes before starting writes.") + time.sleep(600) + + # log cluster health + rados_obj.log_cluster_health() + + # Starting checks to see availability of cluster during netsplit scenario + # perform rados put to check if write ops is possible + pool_obj.do_rados_put(client=client_node, pool=pool_name, nobj=200, timeout=100) + # rados_obj.bench_write(pool_name=pool_name, rados_write_duration=100) + + log.debug("sleeping for 4 minutes for the objects to be displayed in ceph df") + time.sleep(600) + + # Getting the number of objects post write, to check if writes were successful + pool_stat_final = rados_obj.get_cephdf_stats(pool_name=pool_name) + log.debug(pool_stat_final) + final_objects = pool_stat_final["stats"]["objects"] + log.debug( + f"Final number of objects on the pool : {pool_name} is {final_objects}" + ) + + # Objects should be more than the initial no of objects + if int(final_objects) <= int(init_objects): + log.error( + "Write ops should be possible, number of objects in the pool has not changed" + ) + raise Exception( + f"Pool {pool_name} has {pool_stat['stats']['objects']} objs" + ) + + log.info( + f"Successfully wrote {int(final_objects) - int(init_objects)} on pool {pool_name} in degraded mode\n" + f"Proceeding to remove the IPtable rules and recover the cluster from degraded mode" + ) + + time.sleep(5) + + # Starting to flush IP table rules on all hosts + for host in cluster_nodes: + log.debug(f"Proceeding to flush iptable rules on host : {host.hostname}") + host.exec_command(sudo=True, cmd="iptables -F", long_running=True) + log.debug( + "Observed that just IP tables flush did not work to bring back the nodes to cluster." + f"rebooting the nodes post testing. Rebooting node : {host.hostname}" + ) + host.exec_command(sudo=True, cmd="reboot") + log.debug("Sleeping for 30 seconds...") + time.sleep(30) + + log.info("Proceeding to do checks post Stretch mode netsplit scenarios") + + if not rados_obj.run_pool_sanity_check(): + log.error("Checks failed post Site Netsplit scenarios") + raise Exception("Post execution checks failed on the Stretch cluster") + + except Exception as err: + log.error(f"Hit an exception: {err}. Test failed") + log.debug( + "Test case expected to fail until bug fix : https://bugzilla.redhat.com/show_bug.cgi?id=2265116" + ) + return 1 + finally: + log.debug("---------------- In Finally Block -------------") + # Starting to flush IP table rules on all hosts + for host in cluster_nodes: + log.debug(f"Proceeding to flush iptable rules on host : {host.hostname}") + host.exec_command(sudo=True, cmd="iptables -F", long_running=True) + log.debug( + "Observed that just IP tables flush did not work to bring back the nodes to cluster." + f"rebooting the nodes post testing. Rebooting node : {host.hostname}" + ) + host.exec_command(sudo=True, cmd="reboot") + + rados_obj.rados_pool_cleanup() + cmd = f"ceph osd crush rule rm {rule_name}" + rados_obj.client.exec_command(cmd=cmd, sudo=True) + + init_time, _ = installer.exec_command(cmd="sudo date '+%Y-%m-%d %H:%M:%S'") + log.debug(f"time when test was Ended : {init_time}") + if set_debug: + log.debug("Removing debug configs on the cluster for mon & osd") + rados_obj.run_ceph_command( + cmd="ceph config rm mon debug_mon", client_exec=True + ) + rados_obj.run_ceph_command( + cmd="ceph config rm osd debug_osd", client_exec=True + ) + + time.sleep(60) + # log cluster health + rados_obj.log_cluster_health() + # check for crashes after test execution + if rados_obj.check_crash_status(): + log.error("Test failed due to crash at the end of test") + return 1 + + log.info("All the tests completed on the cluster, Pass!!!") + return 0 diff --git a/tests/rados/test_stretch_netsplit_scenarios.py b/tests/rados/test_stretch_netsplit_scenarios.py index df110c3230..5569537dd7 100644 --- a/tests/rados/test_stretch_netsplit_scenarios.py +++ b/tests/rados/test_stretch_netsplit_scenarios.py @@ -275,7 +275,7 @@ def run(ceph_cluster, **kw): f"Pool {pool_name} has {pool_stat['stats']['objects']} objs" ) log.info( - f"Successfully wrote {pool_stat['stats']['objects']} on pool {pool_name} in degraded mode\n" + f"Successfully wrote {int(final_objects) - int(init_objects)} on pool {pool_name} in degraded mode\n" f"Proceeding to remove the IPtable rules and recover the cluster from degraded mode" )