From 112adfacfdd79d3cbc60bb4953f8969fcd2eed0c Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Wed, 3 Mar 2021 00:11:41 -0500
Subject: [PATCH 01/37] DAOS-6923 test: Offline Reintegration - More tests
 Test-tag-hw-medium: pr,hw,medium,ib2 offline_reintegration

Summary:
   - Moved some more common files to osa_utils.py
   - Added the 200 pool test method
   - Test with different object class
   - More ranks excluded and reintegrated

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 .../ftest/osa/osa_offline_reintegration.py    | 128 +++++++++---------
 .../ftest/osa/osa_offline_reintegration.yaml  |   8 +-
 src/tests/ftest/util/osa_utils.py             |  43 ++++++
 3 files changed, 112 insertions(+), 67 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 8201d35a8ca..f456a369ea1 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -11,12 +11,6 @@
 from write_host_file import write_host_file
 from apricot import skipForTicket
 
-try:
-    # python 3.x
-    import queue as queue
-except ImportError:
-    # python 2.7
-    import Queue as queue
 
 class OSAOfflineReintegration(OSAUtils):
     # pylint: disable=too-many-ancestors
@@ -30,51 +24,18 @@ def setUp(self):
         """Set up for test case."""
         super(OSAOfflineReintegration, self).setUp()
         self.dmg_command = self.get_dmg_command()
-        self.ior_w_flags = self.params.get("write_flags", '/run/ior/iorflags/*')
-        self.ior_r_flags = self.params.get("read_flags", '/run/ior/iorflags/*')
         self.ior_apis = self.params.get("ior_api", '/run/ior/iorflags/*')
         self.ior_test_sequence = self.params.get(
             "ior_test_sequence", '/run/ior/iorflags/*')
         self.ior_dfs_oclass = self.params.get(
             "obj_class", '/run/ior/iorflags/*')
+        self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*')
         # Recreate the client hostfile without slots defined
         self.hostfile_clients = write_host_file(
             self.hostlist_clients, self.workdir, None)
-        self.out_queue = queue.Queue()
-
-    def run_ior_thread(self, action, oclass, api, test):
-        """Start the IOR thread for either writing or
-        reading data to/from a container.
-        Args:
-            action (str): Start the IOR thread with Read or
-                          Write
-            oclass (str): IOR object class
-            API (str): IOR API
-            test (list): IOR test sequence
-            flags (str): IOR flags
-        """
-        if action == "Write":
-            flags = self.ior_w_flags
-        else:
-            flags = self.ior_r_flags
-
-        # Add a thread for these IOR arguments
-        process = threading.Thread(target=self.ior_thread,
-                                   kwargs={"pool": self.pool,
-                                           "oclass": oclass,
-                                           "api": api,
-                                           "test": test,
-                                           "flags": flags,
-                                           "results":
-                                           self.out_queue})
-        # Launch the IOR thread
-        process.start()
-        # Wait for the thread to finish
-        process.join()
-
 
     def run_offline_reintegration_test(self, num_pool, data=False,
-                                       server_boot=False):
+                                       server_boot=False, oclass=None):
         """Run the offline reintegration without data.
             Args:
             num_pool (int) : total pools to create for testing purposes.
@@ -82,45 +43,44 @@ def run_offline_reintegration_test(self, num_pool, data=False,
                           some data in pool. Defaults to False.
             server_boot (bool) : Perform system stop/start on a rank.
                                  Defults to False.
+            oclass (str) : daos object class string (eg: "RP_2G8")
         """
         # Create a pool
         pool = {}
         pool_uuid = []
-        exclude_servers = (len(self.hostlist_servers) * 2) - 1
 
-        # Exclude rank : two ranks other than rank 0.
-        rank = random.randint(1, exclude_servers)
+        # Exclude ranks [0, 3, 4]
+        rank = [0, 3, 4]
+        if oclass is None:
+            oclass = self.ior_dfs_oclass[0]
 
         for val in range(0, num_pool):
             pool[val] = TestPool(self.context,
                                  dmg_command=self.get_dmg_command())
             pool[val].get_params(self)
-            # Split total SCM and NVME size for creating multiple pools.
-            pool[val].scm_size.value = int(pool[val].scm_size.value /
-                                           num_pool)
-            pool[val].nvme_size.value = int(pool[val].nvme_size.value /
-                                            num_pool)
             pool[val].create()
             pool_uuid.append(pool[val].uuid)
             self.pool = pool[val]
             if data:
-                self.run_ior_thread("Write", self.ior_dfs_oclass[0],
-                                    self.ior_apis[0], self.ior_test_sequence[0])
-
-        # Exclude and reintegrate the pool_uuid, rank and targets
-        for val in range(0, num_pool):
-            self.pool = pool[val]
+                self.run_ior_thread("Write", oclass,
+                                    self.ior_apis[0],
+                                    self.ior_test_sequence[0])
+
+        # Exclude all the ranks
+        random_pool = random.randint(0, (num_pool-1))
+        for val in range(len(rank)):
+            self.pool = pool[random_pool]
             self.pool.display_pool_daos_space("Pool space: Beginning")
             pver_begin = self.get_pool_version()
             self.log.info("Pool Version at the beginning %s", pver_begin)
             if server_boot is False:
                 output = self.dmg_command.pool_exclude(self.pool.uuid,
-                                                       rank)
+                                                       rank[val])
             else:
-                output = self.dmg_command.system_stop(ranks=rank)
+                output = self.dmg_command.system_stop(ranks=rank[val])
                 self.pool.wait_for_rebuild(True)
                 self.log.info(output)
-                output = self.dmg_command.system_start(ranks=rank)
+                output = self.dmg_command.system_start(ranks=rank[val])
 
             self.log.info(output)
             self.is_rebuild_done(3)
@@ -133,8 +93,15 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             # pver_begin + 8 targets.
             self.assertTrue(pver_exclude > (pver_begin + 8),
                             "Pool Version Error:  After exclude")
-            output = self.dmg_command.pool_reintegrate(self.pool.uuid,
-                                                       rank)
+
+        # Reintegrate the ranks which was excluded
+        for val in range(0, len(rank)):
+            if val == 2:
+                output = self.dmg_command.pool_reintegrate(self.pool.uuid,
+                                                           rank[val], "0,2")
+            else:
+                output = self.dmg_command.pool_reintegrate(self.pool.uuid,
+                                                           rank[val])
             self.log.info(output)
             self.is_rebuild_done(3)
             self.assert_on_rebuild_failure()
@@ -145,13 +112,12 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             self.assertTrue(pver_reint > (pver_exclude + 1),
                             "Pool Version Error:  After reintegrate")
 
-        for val in range(0, num_pool):
-            display_string = "Pool{} space at the End".format(val)
-            self.pool = pool[val]
-            self.pool.display_pool_daos_space(display_string)
+        display_string = "Pool{} space at the End".format(random_pool)
+        self.pool = pool[random_pool]
+        self.pool.display_pool_daos_space(display_string)
 
         if data:
-            self.run_ior_thread("Read", self.ior_dfs_oclass[0],
+            self.run_ior_thread("Read", oclass,
                                 self.ior_apis[0], self.ior_test_sequence[0])
 
     def test_osa_offline_reintegration(self):
@@ -160,6 +126,7 @@ def test_osa_offline_reintegration(self):
 
         :avocado: tags=all,daily_regression,hw,medium,ib2
         :avocado: tags=osa,offline_reintegration
+        :avocado: tags=offline_reintegration_exclude
         """
         # Perform reintegration testing with a pool
         self.run_offline_reintegration_test(1, True)
@@ -168,7 +135,36 @@ def test_osa_offline_reintegration(self):
     def test_osa_offline_reintegration_server_stop(self):
         """Test ID: DAOS-6748.
         Test Description: Validate Offline Reintegration with server stop
-        :avocado: tags=all,pr,daily_regression,hw,medium,ib2,osa
+        :avocado: tags=all,pr,daily_regression,hw,medium,ib2
+        :avocado: tags=osa,offline_reintegration
         :avocado: tags=offline_reintegration_srv_stop
         """
         self.run_offline_reintegration_test(1, data=True, server_boot=True)
+
+    @skipForTicket("DAOS-6505")
+    def test_osa_offline_reintegration_200_pools(self):
+        """Test ID: DAOS-6923
+        Test Description: Validate Offline Reintegration
+        with 200 pools
+
+        :avocado: tags=all,full_regression,hw,medium,ib2
+        :avocado: tags=osa,offline_reintegration
+        :avocado: tags=offline_reintegration_200
+        """
+        # Perform reintegration testing with a pool
+        self.run_offline_reintegration_test(200, True)
+
+    def test_osa_offline_reintegration_oclass(self):
+        """Test ID: DAOS-6923
+        Test Description: Validate Offline Reintegration
+        with different object class
+
+        :avocado: tags=all,full_regression,hw,medium,ib2
+        :avocado: tags=osa,offline_reintegration
+        :avocado: tags=offline_reintegration_oclass
+        """
+        # Perform reintegration testing with a pool
+        for oclass in self.test_oclass:
+            self.run_offline_reintegration_test(1, data=True,
+                                                server_boot=False,
+                                                oclass=oclass)
diff --git a/src/tests/ftest/osa/osa_offline_reintegration.yaml b/src/tests/ftest/osa/osa_offline_reintegration.yaml
index d426d879a65..49773c16732 100644
--- a/src/tests/ftest/osa/osa_offline_reintegration.yaml
+++ b/src/tests/ftest/osa/osa_offline_reintegration.yaml
@@ -77,4 +77,10 @@ ior:
     #   - [scmsize, nvmesize, transfersize, blocksize]
     #    The values are set to be in the multiples of 10.
     #    Values are appx GB.
-      - [6000000000, 54000000000, 500000, 500000000]
\ No newline at end of file
+      - [6000000000, 54000000000, 500000, 500000000]
+test_obj_class:
+    oclass:
+      - RP_2G8
+      - RP_3G6
+      - EC_2P2G4
+      - RP_4G1
\ No newline at end of file
diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index 9ee0b78ddcc..906bf719cc0 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -6,6 +6,7 @@
 """
 import ctypes
 import time
+import threading
 
 from avocado import fail_on
 from ior_test_base import IorTestBase
@@ -16,6 +17,14 @@
 from pydaos.raw import (DaosContainer, IORequest,
                         DaosObj, DaosApiError)
 
+try:
+    # python 3.x
+    import queue as queue
+except ImportError:
+    # python 2.7
+    import Queue as queue
+
+
 class OSAUtils(IorTestBase):
     # pylint: disable=too-many-ancestors
     """
@@ -37,6 +46,10 @@ def setUp(self):
                                            default=[0])[0]
         self.record_length = self.params.get("length", '/run/record/*',
                                              default=[0])[0]
+        self.ior_w_flags = self.params.get("write_flags", '/run/ior/iorflags/*',
+                                           default="")
+        self.ior_r_flags = self.params.get("read_flags", '/run/ior/iorflags/*')
+        self.out_queue = queue.Queue()
 
     @fail_on(CommandFailure)
     def get_pool_leader(self):
@@ -163,6 +176,36 @@ def verify_single_object(self):
         self.obj.close()
         self.container.close()
 
+    def run_ior_thread(self, action, oclass, api, test):
+        """Start the IOR thread for either writing or
+        reading data to/from a container.
+        Args:
+            action (str): Start the IOR thread with Read or
+                          Write
+            oclass (str): IOR object class
+            API (str): IOR API
+            test (list): IOR test sequence
+            flags (str): IOR flags
+        """
+        if action == "Write":
+            flags = self.ior_w_flags
+        else:
+            flags = self.ior_r_flags
+
+        # Add a thread for these IOR arguments
+        process = threading.Thread(target=self.ior_thread,
+                                   kwargs={"pool": self.pool,
+                                           "oclass": oclass,
+                                           "api": api,
+                                           "test": test,
+                                           "flags": flags,
+                                           "results":
+                                           self.out_queue})
+        # Launch the IOR thread
+        process.start()
+        # Wait for the thread to finish
+        process.join()
+
     def ior_thread(self, pool, oclass, api, test, flags, results):
         """Start threads and wait until all threads are finished.
 

From 9bc230256d1bef2792bd4bb2100a758169a22bb5 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Fri, 5 Mar 2021 17:33:55 -0500
Subject: [PATCH 02/37] DAOS-6923 test: Added mdtest feature
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_drain.py      | 46 ++++++---
 src/tests/ftest/osa/osa_offline_drain.yaml    | 47 ++++++++-
 .../ftest/osa/osa_offline_reintegration.py    | 96 ++++++++++++++-----
 .../ftest/osa/osa_offline_reintegration.yaml  | 85 +++++++++-------
 src/tests/ftest/util/mdtest_test_base.py      |  4 +-
 src/tests/ftest/util/osa_utils.py             | 71 +++++++-------
 6 files changed, 235 insertions(+), 114 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_drain.py b/src/tests/ftest/osa/osa_offline_drain.py
index 25c9fb72827..746200e9595 100644
--- a/src/tests/ftest/osa/osa_offline_drain.py
+++ b/src/tests/ftest/osa/osa_offline_drain.py
@@ -5,9 +5,10 @@
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
 import random
+import time
 from osa_utils import OSAUtils
 from test_utils_pool import TestPool
-from apricot import skipForTicket
+from write_host_file import write_host_file
 
 
 class OSAOfflineDrain(OSAUtils):
@@ -22,19 +23,29 @@ def setUp(self):
         """Set up for test case."""
         super(OSAOfflineDrain, self).setUp()
         self.dmg_command = self.get_dmg_command()
+        self.ior_test_sequence = self.params.get(
+            "ior_test_sequence", '/run/ior/iorflags/*')
+        # Recreate the client hostfile without slots defined
+        self.hostfile_clients = write_host_file(
+            self.hostlist_clients, self.workdir, None)
 
-    def run_offline_drain_test(self, num_pool, data=False):
+    def run_offline_drain_test(self, num_pool, data=False,
+                               oclass=None, drain_during_aggregation=False):
         """Run the offline drain without data.
             Args:
             num_pool (int) : total pools to create for testing purposes.
             data (bool) : whether pool has no data or to create
                           some data in pool. Defaults to False.
+            oclass (str): DAOS object class (eg: RP_2G1,etc)
+            drain_during_aggregation (bool) : Perform drain and aggregation
+                                              in parallel
         """
         # Create a pool
         pool = {}
-        pool_uuid = []
         target_list = []
-        drain_servers = (len(self.hostlist_servers) * 2) - 1
+
+        if oclass is None:
+            oclass = self.ior_cmd.dfs_oclass.value
 
         # Exclude target : random two targets  (target idx : 0-7)
         n = random.randint(0, 6)
@@ -42,8 +53,8 @@ def run_offline_drain_test(self, num_pool, data=False):
         target_list.append(n+1)
         t_string = "{},{}".format(target_list[0], target_list[1])
 
-        # Drain a rank (or server)
-        rank = random.randint(1, drain_servers)
+        # Drain a rank 1 (or server)
+        rank = 1
 
         for val in range(0, num_pool):
             pool[val] = TestPool(self.context, dmg_command=self.dmg_command)
@@ -54,17 +65,27 @@ def run_offline_drain_test(self, num_pool, data=False):
             pool[val].nvme_size.value = int(pool[val].nvme_size.value /
                                             num_pool)
             pool[val].create()
-            pool_uuid.append(pool[val].uuid)
             self.pool = pool[val]
+            if drain_during_aggregation is True:
+                test_seq = self.ior_test_sequence[1]
+                self.pool.set_property("reclaim", "disabled")
+            else:
+                test_seq = self.ior_test_sequence[0]
+
             if data:
-                self.write_single_object()
+                self.run_ior_thread("Write", oclass, test_seq)
+                self.run_mdtest_thread()
 
-        # Drain the pool_uuid, rank and targets
+        # Drain rank and targets
         for val in range(0, num_pool):
             self.pool = pool[val]
+            rank = rank + val
             self.pool.display_pool_daos_space("Pool space: Beginning")
             pver_begin = self.get_pool_version()
             self.log.info("Pool Version at the beginning %s", pver_begin)
+            if drain_during_aggregation is True:
+                self.pool.set_property("reclaim", "time")
+                time.sleep(90)
             output = self.dmg_command.pool_drain(self.pool.uuid,
                                                  rank, t_string)
             self.log.info(output)
@@ -82,9 +103,9 @@ def run_offline_drain_test(self, num_pool, data=False):
             pool[val].display_pool_daos_space(display_string)
 
         if data:
-            self.verify_single_object()
+            self.run_ior_thread("Read", oclass, test_seq)
+            self.run_mdtest_thread()
 
-    @skipForTicket("DAOS-6668")
     def test_osa_offline_drain(self):
         """
         JIRA ID: DAOS-4750
@@ -94,5 +115,4 @@ def test_osa_offline_drain(self):
         :avocado: tags=all,daily_regression,hw,medium,ib2
         :avocado: tags=osa,osa_drain,offline_drain
         """
-        for pool_num in range(1, 3):
-            self.run_offline_drain_test(pool_num, True)
+        self.run_offline_drain_test(1, True)
diff --git a/src/tests/ftest/osa/osa_offline_drain.yaml b/src/tests/ftest/osa/osa_offline_drain.yaml
index c1ecf210a30..d8c6a1a52bd 100644
--- a/src/tests/ftest/osa/osa_offline_drain.yaml
+++ b/src/tests/ftest/osa/osa_offline_drain.yaml
@@ -48,8 +48,10 @@ pool:
     svcn: 4
     control_method: dmg
 container:
-  properties:
-    enable_checksum: True
+    type: POSIX
+    control_method: daos
+    oclass: RP_2G1
+    properties: cksum:crc64,cksum_size:16384,srv_cksum:on
 dkeys:
   single:
     no_of_dkeys:
@@ -62,3 +64,44 @@ record:
   1KB:
     length:
       - 1024
+ior:
+    clientslots:
+      slots: 48
+    test_file: /testFile
+    repetitions: 1
+    dfs_destroy: False
+    iorflags:
+      write_flags: "-w -F -k -G 1"
+      read_flags: "-F -r -R -k -G 1"
+      api: DFS
+      dfs_oclass: RP_2G1
+      dfs_dir_oclass: RP_2G1
+    ior_test_sequence:
+    #   - [scmsize, nvmesize, transfersize, blocksize]
+    #    The values are set to be in the multiples of 10.
+    #    Values are appx GB.
+      - [6000000000, 54000000000, 500000, 500000000]
+      - [6000000000, 54000000000, 1000, 500000000]
+mdtest:
+  api: DFS
+  client_processes:
+    np: 30
+  num_of_files_dirs: 4067         # creating total of 120K files
+  test_dir: "/"
+  iteration: 1
+  dfs_destroy: False
+  dfs_oclass: RP_2G1
+  dfs_dir_oclass: RP_2G1
+  manager: "MPICH"
+  flags: "-u"
+  wr_size:
+    32K:
+      write_bytes: 32768
+      read_bytes: 32768
+  verbosity_value: 1
+  depth: 0
+test_obj_class:
+  oclass:
+    - RP_2G8
+    - RP_3G6
+    - RP_4G1
diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index f456a369ea1..2fa108cddfb 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -5,7 +5,7 @@
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
 import random
-import threading
+import time
 from osa_utils import OSAUtils
 from test_utils_pool import TestPool
 from write_host_file import write_host_file
@@ -24,18 +24,17 @@ def setUp(self):
         """Set up for test case."""
         super(OSAOfflineReintegration, self).setUp()
         self.dmg_command = self.get_dmg_command()
-        self.ior_apis = self.params.get("ior_api", '/run/ior/iorflags/*')
         self.ior_test_sequence = self.params.get(
             "ior_test_sequence", '/run/ior/iorflags/*')
-        self.ior_dfs_oclass = self.params.get(
-            "obj_class", '/run/ior/iorflags/*')
         self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*')
         # Recreate the client hostfile without slots defined
         self.hostfile_clients = write_host_file(
             self.hostlist_clients, self.workdir, None)
 
     def run_offline_reintegration_test(self, num_pool, data=False,
-                                       server_boot=False, oclass=None):
+                                       server_boot=False, oclass=None,
+                                       reint_during_rebuild=False,
+                                       reint_during_aggregation=False):
         """Run the offline reintegration without data.
             Args:
             num_pool (int) : total pools to create for testing purposes.
@@ -44,15 +43,20 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             server_boot (bool) : Perform system stop/start on a rank.
                                  Defults to False.
             oclass (str) : daos object class string (eg: "RP_2G8")
+            reint_during_rebuild (bool) : Perform reintegration during
+                                          rebuild (Defaults to False).
+            reint_during_aggregation (bool) : Perform reintegration
+                                              during aggregation
+                                              (Defaults to False).
         """
         # Create a pool
         pool = {}
         pool_uuid = []
+        if oclass is None:
+            oclass = self.ior_cmd.dfs_oclass.value
 
         # Exclude ranks [0, 3, 4]
         rank = [0, 3, 4]
-        if oclass is None:
-            oclass = self.ior_dfs_oclass[0]
 
         for val in range(0, num_pool):
             pool[val] = TestPool(self.context,
@@ -61,19 +65,33 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             pool[val].create()
             pool_uuid.append(pool[val].uuid)
             self.pool = pool[val]
+            if reint_during_aggregation is True:
+                test_seq = self.ior_test_sequence[1]
+                self.pool.set_property("reclaim", "disabled")
+            else:
+                test_seq = self.ior_test_sequence[0]
             if data:
-                self.run_ior_thread("Write", oclass,
-                                    self.ior_apis[0],
-                                    self.ior_test_sequence[0])
+                self.run_ior_thread("Write", oclass, test_seq)
+                self.run_mdtest_thread()
 
         # Exclude all the ranks
         random_pool = random.randint(0, (num_pool-1))
-        for val in range(len(rank)):
+        for val in range(0, len(rank)):
             self.pool = pool[random_pool]
             self.pool.display_pool_daos_space("Pool space: Beginning")
             pver_begin = self.get_pool_version()
             self.log.info("Pool Version at the beginning %s", pver_begin)
             if server_boot is False:
+                if (reint_during_rebuild is True and val == 0):
+                    # Exclude rank 5
+                    output = self.dmg_command.pool_exclude(self.pool.uuid,
+                                                           "5")
+                    self.log.info(output)
+                    self.is_rebuild_done(3)
+                    self.assert_on_rebuild_failure()
+                if reint_during_aggregation is True:
+                    self.pool.set_property("reclaim", "time")
+                    time.sleep(90)
                 output = self.dmg_command.pool_exclude(self.pool.uuid,
                                                        rank[val])
             else:
@@ -81,7 +99,12 @@ def run_offline_reintegration_test(self, num_pool, data=False,
                 self.pool.wait_for_rebuild(True)
                 self.log.info(output)
                 output = self.dmg_command.system_start(ranks=rank[val])
-
+            # Just try to reintegrate rank 5
+            if (reint_during_rebuild is True and val == 2):
+                # Exclude rank 5
+                time.sleep(3)
+                output = self.dmg_command.pool_reintegrate(self.pool.uuid,
+                                                           "5")
             self.log.info(output)
             self.is_rebuild_done(3)
             self.assert_on_rebuild_failure()
@@ -96,7 +119,7 @@ def run_offline_reintegration_test(self, num_pool, data=False,
 
         # Reintegrate the ranks which was excluded
         for val in range(0, len(rank)):
-            if val == 2:
+            if (val == 2 and "RP_2G" in oclass):
                 output = self.dmg_command.pool_reintegrate(self.pool.uuid,
                                                            rank[val], "0,2")
             else:
@@ -117,8 +140,8 @@ def run_offline_reintegration_test(self, num_pool, data=False,
         self.pool.display_pool_daos_space(display_string)
 
         if data:
-            self.run_ior_thread("Read", oclass,
-                                self.ior_apis[0], self.ior_test_sequence[0])
+            self.run_ior_thread("Read", oclass, test_seq)
+            self.run_mdtest_thread()
 
     def test_osa_offline_reintegration(self):
         """Test ID: DAOS-4749
@@ -128,8 +151,7 @@ def test_osa_offline_reintegration(self):
         :avocado: tags=osa,offline_reintegration
         :avocado: tags=offline_reintegration_exclude
         """
-        # Perform reintegration testing with a pool
-        self.run_offline_reintegration_test(1, True)
+        self.run_offline_reintegration_test(1, data=True)
 
     @skipForTicket("DAOS-6766, DAOS-6783")
     def test_osa_offline_reintegration_server_stop(self):
@@ -141,19 +163,19 @@ def test_osa_offline_reintegration_server_stop(self):
         """
         self.run_offline_reintegration_test(1, data=True, server_boot=True)
 
-    @skipForTicket("DAOS-6505")
-    def test_osa_offline_reintegration_200_pools(self):
+    def test_osa_offline_reintegrate_during_rebuild(self):
         """Test ID: DAOS-6923
-        Test Description: Validate Offline Reintegration
-        with 200 pools
+        Test Description: Reintegrate rank while rebuild
+        is happening in parallel
 
-        :avocado: tags=all,full_regression,hw,medium,ib2
+        :avocado: tags=all,daily_regression,hw,medium,ib2
         :avocado: tags=osa,offline_reintegration
-        :avocado: tags=offline_reintegration_200
+        :avocado: tags=offline_reintegrate_during_rebuild
         """
-        # Perform reintegration testing with a pool
-        self.run_offline_reintegration_test(200, True)
+        self.run_offline_reintegration_test(1, data=True,
+                                            reint_during_rebuild=True)
 
+    @skipForTicket("DAOS-6905")
     def test_osa_offline_reintegration_oclass(self):
         """Test ID: DAOS-6923
         Test Description: Validate Offline Reintegration
@@ -163,8 +185,30 @@ def test_osa_offline_reintegration_oclass(self):
         :avocado: tags=osa,offline_reintegration
         :avocado: tags=offline_reintegration_oclass
         """
-        # Perform reintegration testing with a pool
         for oclass in self.test_oclass:
             self.run_offline_reintegration_test(1, data=True,
                                                 server_boot=False,
                                                 oclass=oclass)
+
+    def test_osa_offline_reintegrate_during_aggregation(self):
+        """Test ID: DAOS-6923
+        Test Description: Reintegrate rank while aggregation
+        is happening in parallel
+
+        :avocado: tags=all,full_regression,hw,medium,ib2
+        :avocado: tags=osa,offline_reintegration
+        :avocado: tags=offline_reintegrate_during_aggregation
+        """
+        self.run_offline_reintegration_test(1, data=True,
+                                            reint_during_aggregation=True)
+
+    @skipForTicket("DAOS-6505")
+    def test_osa_offline_reintegration_multiple_pools(self):
+        """Test ID: DAOS-6923
+        Test Description: Validate Offline Reintegration
+        with multiple pools
+
+        :avocado: tags=all,hw,medium,ib2,osa,offline_reintegration
+        :avocado: tags=offline_reintegration_multiple_pools
+        """
+        self.run_offline_reintegration_test(200, data=True)
diff --git a/src/tests/ftest/osa/osa_offline_reintegration.yaml b/src/tests/ftest/osa/osa_offline_reintegration.yaml
index 49773c16732..b29145ef6ad 100644
--- a/src/tests/ftest/osa/osa_offline_reintegration.yaml
+++ b/src/tests/ftest/osa/osa_offline_reintegration.yaml
@@ -47,40 +47,57 @@ dmg:
   transport_config:
     allow_insecure: True
 pool:
-    mode: 146
-    name: daos_server
-    scm_size: 6000000000
-    nvme_size: 54000000000
-    svcn: 4
-    control_method: dmg
-    rebuild_timeout: 120
-    pool_query_timeout: 30
+  mode: 146
+  name: daos_server
+  scm_size: 6000000000
+  nvme_size: 54000000000
+  svcn: 4
+  control_method: dmg
+  rebuild_timeout: 120
+  pool_query_timeout: 30
 container:
-    type: POSIX
-    control_method: daos
-    oclass: RP_2G1
-    properties: cksum:crc64,cksum_size:16384,srv_cksum:on
+  type: POSIX
+  control_method: daos
+  oclass: RP_2G1
+  properties: cksum:crc64,cksum_size:16384,srv_cksum:on,rf:1
 ior:
-    clientslots:
-      slots: 48
-    test_file: /testFile
-    repetitions: 1
-    dfs_destroy: False
-    iorflags:
-      write_flags: "-w -F -k -G 1"
-      read_flags: "-F -r -R -k -G 1"
-      ior_api:
-        - DFS
-      obj_class:
-        - RP_2G1
-    ior_test_sequence:
-    #   - [scmsize, nvmesize, transfersize, blocksize]
-    #    The values are set to be in the multiples of 10.
-    #    Values are appx GB.
-      - [6000000000, 54000000000, 500000, 500000000]
+  clientslots:
+    slots: 48
+  test_file: /testFile
+  repetitions: 2
+  dfs_destroy: False
+  iorflags:
+    write_flags: "-w -F -k -G 1"
+    read_flags: "-F -r -R -k -G 1"
+    api: DFS
+    dfs_oclass: RP_2G1
+    dfs_dir_oclass: RP_2G1
+  ior_test_sequence:
+  #   - [scmsize, nvmesize, transfersize, blocksize]
+  #    The values are set to be in the multiples of 10.
+  #    Values are appx GB.
+    - [6000000000, 54000000000, 500000, 500000000]
+    - [6000000000, 54000000000, 1000, 500000000]
+mdtest:
+  api: DFS
+  client_processes:
+    np: 30
+  num_of_files_dirs: 4067         # creating total of 120K files
+  test_dir: "/"
+  iteration: 1
+  dfs_destroy: False
+  dfs_oclass: RP_2G1
+  dfs_dir_oclass: RP_2G1
+  manager: "MPICH"
+  flags: "-u"
+  wr_size:
+    32K:
+      write_bytes: 32768
+      read_bytes: 32768
+  verbosity_value: 1
+  depth: 0
 test_obj_class:
-    oclass:
-      - RP_2G8
-      - RP_3G6
-      - EC_2P2G4
-      - RP_4G1
\ No newline at end of file
+  oclass:
+    - RP_2G8
+    - RP_3G6
+    - RP_4G1
diff --git a/src/tests/ftest/util/mdtest_test_base.py b/src/tests/ftest/util/mdtest_test_base.py
index a60ab73b27a..a0c6570c76c 100755
--- a/src/tests/ftest/util/mdtest_test_base.py
+++ b/src/tests/ftest/util/mdtest_test_base.py
@@ -28,7 +28,7 @@ def __init__(self, *args, **kwargs):
     def setUp(self):
         """Set up each test case."""
         # obtain separate logs
-        self.update_log_file_names()
+        self.update_log_file_names()g
         # Start the servers and agents
         super(MdtestBase, self).setUp()
 
@@ -62,7 +62,7 @@ def execute_mdtest(self):
         self.run_mdtest(self.get_mdtest_job_manager_command(self.manager),
                         self.processes)
         # reset self.container if dfs_destroy is True
-        if self.mdtest_cmd.dfs_destroy:
+        if self.mdtest_cmd.dfs_destroy is True:
             self.container = None
 
         self.stop_dfuse()
diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index 906bf719cc0..c4897951412 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -10,6 +10,7 @@
 
 from avocado import fail_on
 from ior_test_base import IorTestBase
+from mdtest_test_base import MdtestBase
 from command_utils import CommandFailure
 from ior_utils import IorCommand
 from job_manager_utils import Mpirun
@@ -19,13 +20,13 @@
 
 try:
     # python 3.x
-    import queue as queue
+    import queue as test_queue
 except ImportError:
     # python 2.7
-    import Queue as queue
+    import Queue as test_queue
 
 
-class OSAUtils(IorTestBase):
+class OSAUtils(IorTestBase, MdtestBase):
     # pylint: disable=too-many-ancestors
     """
     Test Class Description: This test runs
@@ -49,7 +50,8 @@ def setUp(self):
         self.ior_w_flags = self.params.get("write_flags", '/run/ior/iorflags/*',
                                            default="")
         self.ior_r_flags = self.params.get("read_flags", '/run/ior/iorflags/*')
-        self.out_queue = queue.Queue()
+        self.out_queue = test_queue.Queue()
+        self.dmg_command.exit_status_exception = False
 
     @fail_on(CommandFailure)
     def get_pool_leader(self):
@@ -176,14 +178,13 @@ def verify_single_object(self):
         self.obj.close()
         self.container.close()
 
-    def run_ior_thread(self, action, oclass, api, test):
+    def run_ior_thread(self, action, oclass, test):
         """Start the IOR thread for either writing or
         reading data to/from a container.
         Args:
             action (str): Start the IOR thread with Read or
                           Write
             oclass (str): IOR object class
-            API (str): IOR API
             test (list): IOR test sequence
             flags (str): IOR flags
         """
@@ -196,7 +197,6 @@ def run_ior_thread(self, action, oclass, api, test):
         process = threading.Thread(target=self.ior_thread,
                                    kwargs={"pool": self.pool,
                                            "oclass": oclass,
-                                           "api": api,
                                            "test": test,
                                            "flags": flags,
                                            "results":
@@ -206,47 +206,44 @@ def run_ior_thread(self, action, oclass, api, test):
         # Wait for the thread to finish
         process.join()
 
-    def ior_thread(self, pool, oclass, api, test, flags, results):
+    def ior_thread(self, pool, oclass, test, flags, results):
         """Start threads and wait until all threads are finished.
 
         Args:
             pool (object): pool handle
             oclass (str): IOR object class
-            api (str): IOR api
             test (list): IOR test sequence
             flags (str): IOR flags
             results (queue): queue for returning thread results
 
         """
-        mpio_util = MpioUtils()
-        if mpio_util.mpich_installed(self.hostlist_clients) is False:
-            self.fail("Exiting Test : Mpich not installed on :"
-                      " {}".format(self.hostfile_clients[0]))
         self.pool = pool
-        # Define the arguments for the ior_runner_thread method
-        ior_cmd = IorCommand()
-        ior_cmd.get_params(self)
-        ior_cmd.set_daos_params(self.server_group, self.pool)
-        ior_cmd.dfs_oclass.update(oclass)
-        ior_cmd.dfs_dir_oclass.update(oclass)
-        ior_cmd.api.update(api)
-        ior_cmd.transfer_size.update(test[2])
-        ior_cmd.block_size.update(test[3])
-        ior_cmd.flags.update(flags)
-
-        # Define the job manager for the IOR command
-        self.job_manager = Mpirun(ior_cmd, mpitype="mpich")
+        self.ior_cmd.get_params(self)
+        self.ior_cmd.set_daos_params(self.server_group, self.pool)
+        self.ior_cmd.dfs_oclass.update(oclass)
+        self.ior_cmd.dfs_dir_oclass.update(oclass)
         # Create container only
         if self.container is None:
             self.add_container(self.pool)
-        self.job_manager.job.dfs_cont.update(self.container.uuid)
-        env = ior_cmd.get_default_env(str(self.job_manager))
-        self.job_manager.assign_hosts(self.hostlist_clients, self.workdir, None)
-        self.job_manager.assign_processes(self.processes)
-        self.job_manager.assign_environment(env, True)
-
-        # run IOR Command
-        try:
-            self.job_manager.run()
-        except CommandFailure as _error:
-            results.put("FAIL")
+        job_manager = self.get_ior_job_manager_command()
+        job_manager.job.dfs_cont.update(self.container.uuid)
+        self.ior_cmd.transfer_size.update(test[2])
+        self.ior_cmd.block_size.update(test[3])
+        self.ior_cmd.flags.update(flags)
+        self.run_ior_with_pool(create_pool=False, create_cont=False)
+    
+    def run_mdtest_thread(self):
+        """Start mdtest thread and wait until thread completes.
+        """
+        # Create container only
+        self.mdtest_cmd.dfs_destroy = False
+        if self.container is None:
+            self.add_container(self.pool)
+        job_manager = self.get_mdtest_job_manager_command(self.manager)
+        job_manager.job.dfs_cont.update(self.container.uuid)
+        # Add a thread for these IOR arguments
+        process = threading.Thread(target=self.execute_mdtest)
+        # Launch the MDtest thread
+        process.start()
+        # Wait for the thread to finish
+        process.join()

From 7225a5b0b775b7bd189983142d9f1778a3fa1322 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Fri, 5 Mar 2021 17:58:44 -0500
Subject: [PATCH 03/37] DAOS-6923 test: Fix checkpatch issues.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py | 4 ++--
 src/tests/ftest/util/mdtest_test_base.py         | 2 +-
 src/tests/ftest/util/osa_utils.py                | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 2fa108cddfb..6bd23596f6f 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -76,7 +76,7 @@ def run_offline_reintegration_test(self, num_pool, data=False,
 
         # Exclude all the ranks
         random_pool = random.randint(0, (num_pool-1))
-        for val in range(0, len(rank)):
+        for val, _ in enumerate(rank)::
             self.pool = pool[random_pool]
             self.pool.display_pool_daos_space("Pool space: Beginning")
             pver_begin = self.get_pool_version()
@@ -118,7 +118,7 @@ def run_offline_reintegration_test(self, num_pool, data=False,
                             "Pool Version Error:  After exclude")
 
         # Reintegrate the ranks which was excluded
-        for val in range(0, len(rank)):
+        for val, _ in enumerate(rank):
             if (val == 2 and "RP_2G" in oclass):
                 output = self.dmg_command.pool_reintegrate(self.pool.uuid,
                                                            rank[val], "0,2")
diff --git a/src/tests/ftest/util/mdtest_test_base.py b/src/tests/ftest/util/mdtest_test_base.py
index a0c6570c76c..54e84d74106 100755
--- a/src/tests/ftest/util/mdtest_test_base.py
+++ b/src/tests/ftest/util/mdtest_test_base.py
@@ -28,7 +28,7 @@ def __init__(self, *args, **kwargs):
     def setUp(self):
         """Set up each test case."""
         # obtain separate logs
-        self.update_log_file_names()g
+        self.update_log_file_names()
         # Start the servers and agents
         super(MdtestBase, self).setUp()
 
diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index c4897951412..06166850a2f 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -26,7 +26,7 @@
     import Queue as test_queue
 
 
-class OSAUtils(IorTestBase, MdtestBase):
+class OSAUtils(MdtestBase, IorTestBase):
     # pylint: disable=too-many-ancestors
     """
     Test Class Description: This test runs
@@ -206,7 +206,7 @@ def run_ior_thread(self, action, oclass, test):
         # Wait for the thread to finish
         process.join()
 
-    def ior_thread(self, pool, oclass, test, flags, results):
+    def ior_thread(self, pool, oclass, test, flags):
         """Start threads and wait until all threads are finished.
 
         Args:
@@ -231,7 +231,7 @@ def ior_thread(self, pool, oclass, test, flags, results):
         self.ior_cmd.block_size.update(test[3])
         self.ior_cmd.flags.update(flags)
         self.run_ior_with_pool(create_pool=False, create_cont=False)
-    
+
     def run_mdtest_thread(self):
         """Start mdtest thread and wait until thread completes.
         """

From 77f8c65bea7d76f448fa5a495bcc24eeee8d8c11 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Fri, 5 Mar 2021 18:03:29 -0500
Subject: [PATCH 04/37] DAOS-6923 test: Fix the typo Test-tag-hw-medium:
 pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 6bd23596f6f..115868c2dd3 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -76,7 +76,7 @@ def run_offline_reintegration_test(self, num_pool, data=False,
 
         # Exclude all the ranks
         random_pool = random.randint(0, (num_pool-1))
-        for val, _ in enumerate(rank)::
+        for val, _ in enumerate(rank):
             self.pool = pool[random_pool]
             self.pool.display_pool_daos_space("Pool space: Beginning")
             pver_begin = self.get_pool_version()

From b2cb8c6e4363aa8635ce81d47eb3e38efa762df8 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Sun, 7 Mar 2021 15:00:21 -0500
Subject: [PATCH 05/37] DAOS-6923 test: Removed unwanted results parameter
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/util/osa_utils.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index 06166850a2f..d6ac08c719b 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -198,9 +198,7 @@ def run_ior_thread(self, action, oclass, test):
                                    kwargs={"pool": self.pool,
                                            "oclass": oclass,
                                            "test": test,
-                                           "flags": flags,
-                                           "results":
-                                           self.out_queue})
+                                           "flags": flags)
         # Launch the IOR thread
         process.start()
         # Wait for the thread to finish
@@ -214,7 +212,6 @@ def ior_thread(self, pool, oclass, test, flags):
             oclass (str): IOR object class
             test (list): IOR test sequence
             flags (str): IOR flags
-            results (queue): queue for returning thread results
 
         """
         self.pool = pool

From 078065e4d028c0f47b560b81aae594432e166be3 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Sun, 7 Mar 2021 20:48:58 -0500
Subject: [PATCH 06/37] DAOS-6923 test: Address checkpatch issues.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py | 4 +---
 src/tests/ftest/util/osa_utils.py                | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 115868c2dd3..2041dbbe2bc 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -51,19 +51,17 @@ def run_offline_reintegration_test(self, num_pool, data=False,
         """
         # Create a pool
         pool = {}
-        pool_uuid = []
+        random_pool = 0
         if oclass is None:
             oclass = self.ior_cmd.dfs_oclass.value
 
         # Exclude ranks [0, 3, 4]
         rank = [0, 3, 4]
-
         for val in range(0, num_pool):
             pool[val] = TestPool(self.context,
                                  dmg_command=self.get_dmg_command())
             pool[val].get_params(self)
             pool[val].create()
-            pool_uuid.append(pool[val].uuid)
             self.pool = pool[val]
             if reint_during_aggregation is True:
                 test_seq = self.ior_test_sequence[1]
diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index d6ac08c719b..1f31a089be7 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -198,7 +198,7 @@ def run_ior_thread(self, action, oclass, test):
                                    kwargs={"pool": self.pool,
                                            "oclass": oclass,
                                            "test": test,
-                                           "flags": flags)
+                                           "flags": flags})
         # Launch the IOR thread
         process.start()
         # Wait for the thread to finish

From c072ccb813a43bb8ba7d4d1db43679fa7b1b53b0 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Tue, 9 Mar 2021 00:25:06 -0500
Subject: [PATCH 07/37] DAOS-6923 test: Run all the tests once now.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 .../ftest/osa/osa_offline_reintegration.py    | 134 ++++++++++--------
 .../ftest/osa/osa_offline_reintegration.yaml  |   4 +-
 2 files changed, 77 insertions(+), 61 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 2041dbbe2bc..a3c78c7a115 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -9,7 +9,7 @@
 from osa_utils import OSAUtils
 from test_utils_pool import TestPool
 from write_host_file import write_host_file
-from apricot import skipForTicket
+# from apricot import skipForTicket
 
 
 class OSAOfflineReintegration(OSAUtils):
@@ -27,9 +27,12 @@ def setUp(self):
         self.ior_test_sequence = self.params.get(
             "ior_test_sequence", '/run/ior/iorflags/*')
         self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*')
+        self.loop_test_cnt = self.params.get("iterations",
+                                             '/run/loop_test/*')
         # Recreate the client hostfile without slots defined
         self.hostfile_clients = write_host_file(
             self.hostlist_clients, self.workdir, None)
+        self.dmg_command.exit_status_exception = False
 
     def run_offline_reintegration_test(self, num_pool, data=False,
                                        server_boot=False, oclass=None,
@@ -74,68 +77,71 @@ def run_offline_reintegration_test(self, num_pool, data=False,
 
         # Exclude all the ranks
         random_pool = random.randint(0, (num_pool-1))
-        for val, _ in enumerate(rank):
-            self.pool = pool[random_pool]
-            self.pool.display_pool_daos_space("Pool space: Beginning")
-            pver_begin = self.get_pool_version()
-            self.log.info("Pool Version at the beginning %s", pver_begin)
-            if server_boot is False:
-                if (reint_during_rebuild is True and val == 0):
-                    # Exclude rank 5
+        for _ in range(0, self.loop_test_cnt):
+            for val, _ in enumerate(rank):
+                self.pool = pool[random_pool]
+                self.pool.display_pool_daos_space("Pool space: Beginning")
+                pver_begin = self.get_pool_version()
+                self.log.info("Pool Version at the beginning %s", pver_begin)
+                if server_boot is False:
+                    if (reint_during_rebuild is True and val == 0):
+                        # Exclude rank 5
+                        output = self.dmg_command.pool_exclude(self.pool.uuid,
+                                                               "5")
+                        self.log.info(output)
+                        self.is_rebuild_done(3)
+                        self.assert_on_rebuild_failure()
+                    if reint_during_aggregation is True:
+                        self.pool.set_property("reclaim", "time")
+                        time.sleep(90)
                     output = self.dmg_command.pool_exclude(self.pool.uuid,
-                                                           "5")
+                                                           rank[val])
+                else:
+                    output = self.dmg_command.system_stop(ranks=rank[val])
+                    self.pool.wait_for_rebuild(True)
                     self.log.info(output)
-                    self.is_rebuild_done(3)
-                    self.assert_on_rebuild_failure()
-                if reint_during_aggregation is True:
-                    self.pool.set_property("reclaim", "time")
-                    time.sleep(90)
-                output = self.dmg_command.pool_exclude(self.pool.uuid,
-                                                       rank[val])
-            else:
-                output = self.dmg_command.system_stop(ranks=rank[val])
-                self.pool.wait_for_rebuild(True)
+                    output = self.dmg_command.system_start(ranks=rank[val])
+                # Just try to reintegrate rank 5
+                if (reint_during_rebuild is True and val == 2):
+                    # Exclude rank 5
+                    time.sleep(3)
+                    output = self.dmg_command.pool_reintegrate(self.pool.uuid,
+                                                               "5")
                 self.log.info(output)
-                output = self.dmg_command.system_start(ranks=rank[val])
-            # Just try to reintegrate rank 5
-            if (reint_during_rebuild is True and val == 2):
-                # Exclude rank 5
-                time.sleep(3)
-                output = self.dmg_command.pool_reintegrate(self.pool.uuid,
-                                                           "5")
-            self.log.info(output)
-            self.is_rebuild_done(3)
-            self.assert_on_rebuild_failure()
-
-            pver_exclude = self.get_pool_version()
-            self.log.info("Pool Version after exclude %s", pver_exclude)
-            # Check pool version incremented after pool exclude
-            # pver_exclude should be greater than
-            # pver_begin + 8 targets.
-            self.assertTrue(pver_exclude > (pver_begin + 8),
-                            "Pool Version Error:  After exclude")
-
-        # Reintegrate the ranks which was excluded
-        for val, _ in enumerate(rank):
-            if (val == 2 and "RP_2G" in oclass):
-                output = self.dmg_command.pool_reintegrate(self.pool.uuid,
-                                                           rank[val], "0,2")
-            else:
-                output = self.dmg_command.pool_reintegrate(self.pool.uuid,
-                                                           rank[val])
-            self.log.info(output)
-            self.is_rebuild_done(3)
-            self.assert_on_rebuild_failure()
+                self.is_rebuild_done(3)
+                self.assert_on_rebuild_failure()
+
+                pver_exclude = self.get_pool_version()
+                self.log.info("Pool Version after exclude %s", pver_exclude)
+                # Check pool version incremented after pool exclude
+                # pver_exclude should be greater than
+                # pver_begin + 8 targets.
+                self.assertTrue(pver_exclude > (pver_begin + 8),
+                                "Pool Version Error:  After exclude")
+
+            # Reintegrate the ranks which was excluded
+            for val, _ in enumerate(rank):
+                time.sleep(5)
+                if (val == 2 and "RP_2G" in oclass):
+                    output = self.dmg_command.pool_reintegrate(self.pool.uuid,
+                                                               rank[val],
+                                                               "0,2")
+                else:
+                    output = self.dmg_command.pool_reintegrate(self.pool.uuid,
+                                                               rank[val])
+                self.log.info(output)
+                self.is_rebuild_done(3)
+                self.assert_on_rebuild_failure()
 
-            pver_reint = self.get_pool_version()
-            self.log.info("Pool Version after reintegrate %d", pver_reint)
-            # Check pool version incremented after pool reintegrate
-            self.assertTrue(pver_reint > (pver_exclude + 1),
-                            "Pool Version Error:  After reintegrate")
+                pver_reint = self.get_pool_version()
+                self.log.info("Pool Version after reintegrate %d", pver_reint)
+                # Check pool version incremented after pool reintegrate
+                self.assertTrue(pver_reint > (pver_exclude + 1),
+                                "Pool Version Error:  After reintegrate")
 
-        display_string = "Pool{} space at the End".format(random_pool)
-        self.pool = pool[random_pool]
-        self.pool.display_pool_daos_space(display_string)
+            display_string = "Pool{} space at the End".format(random_pool)
+            self.pool = pool[random_pool]
+            self.pool.display_pool_daos_space(display_string)
 
         if data:
             self.run_ior_thread("Read", oclass, test_seq)
@@ -151,7 +157,6 @@ def test_osa_offline_reintegration(self):
         """
         self.run_offline_reintegration_test(1, data=True)
 
-    @skipForTicket("DAOS-6766, DAOS-6783")
     def test_osa_offline_reintegration_server_stop(self):
         """Test ID: DAOS-6748.
         Test Description: Validate Offline Reintegration with server stop
@@ -173,7 +178,6 @@ def test_osa_offline_reintegrate_during_rebuild(self):
         self.run_offline_reintegration_test(1, data=True,
                                             reint_during_rebuild=True)
 
-    @skipForTicket("DAOS-6905")
     def test_osa_offline_reintegration_oclass(self):
         """Test ID: DAOS-6923
         Test Description: Validate Offline Reintegration
@@ -210,3 +214,13 @@ def test_osa_offline_reintegration_multiple_pools(self):
         :avocado: tags=offline_reintegration_multiple_pools
         """
         self.run_offline_reintegration_test(200, data=True)
+
+    def test_osa_offline_reintegration_loop_test(self):
+        """Test ID: DAOS-6923
+        Test Description: Validate Offline Reintegration
+        with multiple pools
+
+        :avocado: tags=all,hw,medium,ib2,osa,offline_reintegration
+        :avocado: tags=offline_reintegration_loop_test
+        """
+        self.run_offline_reintegration_test(1, data=True)
diff --git a/src/tests/ftest/osa/osa_offline_reintegration.yaml b/src/tests/ftest/osa/osa_offline_reintegration.yaml
index b29145ef6ad..0882e7da7d2 100644
--- a/src/tests/ftest/osa/osa_offline_reintegration.yaml
+++ b/src/tests/ftest/osa/osa_offline_reintegration.yaml
@@ -5,7 +5,7 @@ hosts:
     - server-C
   test_clients:
     - client-D
-timeout: 1000
+timeout: 3600
 server_config:
   name: daos_server
   engines_per_host: 2
@@ -101,3 +101,5 @@ test_obj_class:
     - RP_2G8
     - RP_3G6
     - RP_4G1
+loop_test:
+  iterations: 10

From 1ed510eb3bc7a083e88a71c236e5052765ce3072 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Tue, 9 Mar 2021 00:36:23 -0500
Subject: [PATCH 08/37] DAOS-6923 test: Fix checkpatch issues.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 9ba638f3f83..22068863a26 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -24,22 +24,15 @@ def setUp(self):
         """Set up for test case."""
         super(OSAOfflineReintegration, self).setUp()
         self.dmg_command = self.get_dmg_command()
-<<<<<<< HEAD
-=======
-        self.ior_apis = self.params.get("ior_api", '/run/ior/iorflags/*')
->>>>>>> master
-        self.ior_test_sequence = self.params.get(
-            "ior_test_sequence", '/run/ior/iorflags/*')
+        self.ior_test_sequence = self.params.get("ior_test_sequence",
+                                                 '/run/ior/iorflags/*')
         self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*')
         self.loop_test_cnt = self.params.get("iterations",
                                              '/run/loop_test/*')
         # Recreate the client hostfile without slots defined
         self.hostfile_clients = write_host_file(
             self.hostlist_clients, self.workdir, None)
-<<<<<<< HEAD
         self.dmg_command.exit_status_exception = False
-=======
->>>>>>> master
 
     def run_offline_reintegration_test(self, num_pool, data=False,
                                        server_boot=False, oclass=None,

From f2cedae16335e25a73be1f6d5dd2d92b479e3ba0 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Tue, 9 Mar 2021 00:52:56 -0500
Subject: [PATCH 09/37] DAOS-6923 test: Added skipForTicket Test-tag-hw-medium:
 pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 22068863a26..43825a03a5c 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -9,7 +9,7 @@
 from osa_utils import OSAUtils
 from test_utils_pool import TestPool
 from write_host_file import write_host_file
-# from apricot import skipForTicket
+from apricot import skipForTicket
 
 
 class OSAOfflineReintegration(OSAUtils):

From 3b2b7b7cb48225cd893b757fde872e15515a4110 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Tue, 9 Mar 2021 19:09:37 -0500
Subject: [PATCH 10/37] DAOS-6923 test: Just run offline (server stop)
 Test-tag-hw-medium: pr,hw,medium,ib2 osa Skip-unit-tests: true Skip-nlt: true
 Skip-unit-test: true Skip-unit-test-memcheck: true Skip-func-hw-test-large:
 true Skip-func-hw-test-small: true

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py   | 4 ++++
 src/tests/ftest/osa/osa_offline_reintegration.yaml | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 43825a03a5c..d4949714ab6 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -147,6 +147,7 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             self.run_ior_thread("Read", oclass, test_seq)
             self.run_mdtest_thread()
 
+    @skipForTicket("DAOS-6505")
     def test_osa_offline_reintegration(self):
         """Test ID: DAOS-4749
         Test Description: Validate Offline Reintegration
@@ -166,6 +167,7 @@ def test_osa_offline_reintegration_server_stop(self):
         """
         self.run_offline_reintegration_test(1, data=True, server_boot=True)
 
+    @skipForTicket("DAOS-6505")
     def test_osa_offline_reintegrate_during_rebuild(self):
         """Test ID: DAOS-6923
         Test Description: Reintegrate rank while rebuild
@@ -178,6 +180,7 @@ def test_osa_offline_reintegrate_during_rebuild(self):
         self.run_offline_reintegration_test(1, data=True,
                                             reint_during_rebuild=True)
 
+    @skipForTicket("DAOS-6505")
     def test_osa_offline_reintegration_oclass(self):
         """Test ID: DAOS-6923
         Test Description: Validate Offline Reintegration
@@ -192,6 +195,7 @@ def test_osa_offline_reintegration_oclass(self):
                                                 server_boot=False,
                                                 oclass=oclass)
 
+    @skipForTicket("DAOS-6505")
     def test_osa_offline_reintegrate_during_aggregation(self):
         """Test ID: DAOS-6923
         Test Description: Reintegrate rank while aggregation
diff --git a/src/tests/ftest/osa/osa_offline_reintegration.yaml b/src/tests/ftest/osa/osa_offline_reintegration.yaml
index 0882e7da7d2..a31e002168e 100644
--- a/src/tests/ftest/osa/osa_offline_reintegration.yaml
+++ b/src/tests/ftest/osa/osa_offline_reintegration.yaml
@@ -81,8 +81,8 @@ ior:
 mdtest:
   api: DFS
   client_processes:
-    np: 30
-  num_of_files_dirs: 4067         # creating total of 120K files
+    np: 2
+  num_of_files_dirs: 100         # creating total of 120K files
   test_dir: "/"
   iteration: 1
   dfs_destroy: False

From 7b83df8b9517d5aa32e6b7ce31078cb5088e680f Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Wed, 10 Mar 2021 23:37:47 -0500
Subject: [PATCH 11/37] DAOS-6923 test: Add the loop testing methods.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa Skip-unit-tests: true Skip-nlt: true
 Skip-unit-test: true Skip-unit-test-memcheck: true Skip-coverity-test: true
 Skip-func-hw-test-small: true Skip-func-hw-test-large: true

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 .../ftest/osa/osa_offline_reintegration.py    | 60 +++++++------------
 .../ftest/osa/osa_offline_reintegration.yaml  |  9 +--
 src/tests/ftest/util/osa_utils.py             | 34 +++++------
 3 files changed, 42 insertions(+), 61 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index d4949714ab6..bad272f8bee 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -27,12 +27,11 @@ def setUp(self):
         self.ior_test_sequence = self.params.get("ior_test_sequence",
                                                  '/run/ior/iorflags/*')
         self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*')
-        self.loop_test_cnt = self.params.get("iterations",
-                                             '/run/loop_test/*')
+        self.loop_test_cnt = 1
         # Recreate the client hostfile without slots defined
         self.hostfile_clients = write_host_file(
             self.hostlist_clients, self.workdir, None)
-        self.dmg_command.exit_status_exception = False
+        self.dmg_command.exit_status_exception = True
 
     def run_offline_reintegration_test(self, num_pool, data=False,
                                        server_boot=False, oclass=None,
@@ -98,8 +97,9 @@ def run_offline_reintegration_test(self, num_pool, data=False,
                                                            rank[val])
                 else:
                     output = self.dmg_command.system_stop(ranks=rank[val])
-                    self.pool.wait_for_rebuild(True)
                     self.log.info(output)
+                    self.is_rebuild_done(3)
+                    self.assert_on_rebuild_failure()
                     output = self.dmg_command.system_start(ranks=rank[val])
                 # Just try to reintegrate rank 5
                 if (reint_during_rebuild is True and val == 2):
@@ -115,9 +115,9 @@ def run_offline_reintegration_test(self, num_pool, data=False,
                 self.log.info("Pool Version after exclude %s", pver_exclude)
                 # Check pool version incremented after pool exclude
                 # pver_exclude should be greater than
-                # pver_begin + 8 targets.
-                self.assertTrue(pver_exclude > (pver_begin + 8),
-                                "Pool Version Error:  After exclude")
+                # pver_begin + 3 (2 targets + exclude)
+                self.assertTrue(pver_exclude > (pver_begin + 3),
+                                "Pool Version Error: After exclude")
 
             # Reintegrate the ranks which was excluded
             for val, _ in enumerate(rank):
@@ -143,20 +143,22 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             self.pool = pool[random_pool]
             self.pool.display_pool_daos_space(display_string)
 
-        if data:
-            self.run_ior_thread("Read", oclass, test_seq)
-            self.run_mdtest_thread()
+        for val in range(0, num_pool):
+            self.pool = pool[val]
+            if data:
+                self.run_ior_thread("Read", oclass, test_seq)
+                self.run_mdtest_thread()
 
-    @skipForTicket("DAOS-6505")
-    def test_osa_offline_reintegration(self):
-        """Test ID: DAOS-4749
+    def test_osa_offline_reintegration_multiple_pools(self):
+        """Test ID: DAOS-6923
         Test Description: Validate Offline Reintegration
+        with multiple pools
 
         :avocado: tags=all,daily_regression,hw,medium,ib2
         :avocado: tags=osa,offline_reintegration
-        :avocado: tags=offline_reintegration_exclude
+        :avocado: tags=offline_reintegration_multiple_pools
         """
-        self.run_offline_reintegration_test(1, data=True)
+        self.run_offline_reintegration_test(5, data=True)
 
     def test_osa_offline_reintegration_server_stop(self):
         """Test ID: DAOS-6748.
@@ -167,20 +169,20 @@ def test_osa_offline_reintegration_server_stop(self):
         """
         self.run_offline_reintegration_test(1, data=True, server_boot=True)
 
-    @skipForTicket("DAOS-6505")
     def test_osa_offline_reintegrate_during_rebuild(self):
         """Test ID: DAOS-6923
         Test Description: Reintegrate rank while rebuild
         is happening in parallel
 
-        :avocado: tags=all,daily_regression,hw,medium,ib2
+        :avocado: tags=all,full_regression,hw,medium,ib2
         :avocado: tags=osa,offline_reintegration
         :avocado: tags=offline_reintegrate_during_rebuild
         """
+        self.loop_test_cnt = self.params.get("iterations",
+                                             '/run/loop_test/*')
         self.run_offline_reintegration_test(1, data=True,
                                             reint_during_rebuild=True)
 
-    @skipForTicket("DAOS-6505")
     def test_osa_offline_reintegration_oclass(self):
         """Test ID: DAOS-6923
         Test Description: Validate Offline Reintegration
@@ -195,7 +197,6 @@ def test_osa_offline_reintegration_oclass(self):
                                                 server_boot=False,
                                                 oclass=oclass)
 
-    @skipForTicket("DAOS-6505")
     def test_osa_offline_reintegrate_during_aggregation(self):
         """Test ID: DAOS-6923
         Test Description: Reintegrate rank while aggregation
@@ -207,24 +208,3 @@ def test_osa_offline_reintegrate_during_aggregation(self):
         """
         self.run_offline_reintegration_test(1, data=True,
                                             reint_during_aggregation=True)
-
-    @skipForTicket("DAOS-6505")
-    def test_osa_offline_reintegration_multiple_pools(self):
-        """Test ID: DAOS-6923
-        Test Description: Validate Offline Reintegration
-        with multiple pools
-
-        :avocado: tags=all,hw,medium,ib2,osa,offline_reintegration
-        :avocado: tags=offline_reintegration_multiple_pools
-        """
-        self.run_offline_reintegration_test(200, data=True)
-
-    def test_osa_offline_reintegration_loop_test(self):
-        """Test ID: DAOS-6923
-        Test Description: Validate Offline Reintegration
-        with multiple pools
-
-        :avocado: tags=all,hw,medium,ib2,osa,offline_reintegration
-        :avocado: tags=offline_reintegration_loop_test
-        """
-        self.run_offline_reintegration_test(1, data=True)
diff --git a/src/tests/ftest/osa/osa_offline_reintegration.yaml b/src/tests/ftest/osa/osa_offline_reintegration.yaml
index a31e002168e..1e16e258983 100644
--- a/src/tests/ftest/osa/osa_offline_reintegration.yaml
+++ b/src/tests/ftest/osa/osa_offline_reintegration.yaml
@@ -5,7 +5,7 @@ hosts:
     - server-C
   test_clients:
     - client-D
-timeout: 3600
+timeout: 800
 server_config:
   name: daos_server
   engines_per_host: 2
@@ -77,12 +77,12 @@ ior:
   #    The values are set to be in the multiples of 10.
   #    Values are appx GB.
     - [6000000000, 54000000000, 500000, 500000000]
-    - [6000000000, 54000000000, 1000, 500000000]
+    - [6000000000, 54000000000, 1000, 5000000]
 mdtest:
   api: DFS
   client_processes:
     np: 2
-  num_of_files_dirs: 100         # creating total of 120K files
+  num_of_files_dirs: 100
   test_dir: "/"
   iteration: 1
   dfs_destroy: False
@@ -101,5 +101,6 @@ test_obj_class:
     - RP_2G8
     - RP_3G6
     - RP_4G1
+    - S1
 loop_test:
-  iterations: 10
+  iterations: 3
diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index 1f31a089be7..2e12d7aef60 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -37,6 +37,7 @@ class OSAUtils(MdtestBase, IorTestBase):
     def setUp(self):
         """Set up for test case."""
         super(OSAUtils, self).setUp()
+        self.pool_cont_dict = {}
         self.container = None
         self.obj = None
         self.ioreq = None
@@ -76,25 +77,16 @@ def get_rebuild_status(self):
         return data["rebuild"]["status"]
 
     @fail_on(CommandFailure)
-    def is_rebuild_done(self, time_interval):
+    def is_rebuild_done(self, time_interval,
+                        wait_for_rebuild_not_to_complete=False):
         """Rebuild is completed/done.
         Args:
             time_interval: Wait interval between checks
-        Returns:
-            False: If rebuild_status not "done" or "completed".
-            True: If rebuild status is "done" or "completed".
+            wait_for_rebuild_not_to_complete: Rebuild completed
+                                              (Default: False)
         """
-        status = False
-        fail_count = 0
-        completion_flag = ["done", "completed"]
-        while fail_count <= 20:
-            rebuild_status = self.get_rebuild_status()
-            time.sleep(time_interval)
-            fail_count += 1
-            if rebuild_status in completion_flag:
-                status = True
-                break
-        return status
+        self.pool.wait_for_rebuild(wait_for_rebuild_not_to_complete,
+                                   interval=time_interval)
 
     @fail_on(CommandFailure)
     def assert_on_rebuild_failure(self):
@@ -219,9 +211,17 @@ def ior_thread(self, pool, oclass, test, flags):
         self.ior_cmd.set_daos_params(self.server_group, self.pool)
         self.ior_cmd.dfs_oclass.update(oclass)
         self.ior_cmd.dfs_dir_oclass.update(oclass)
-        # Create container only
-        if self.container is None:
+        # If pool is not in the dictionary, 
+        # initialize its container as None.
+        if self.pool not in self.pool_cont_dict:
+            self.pool_cont_dict[self.pool] = None
+        # Create container if the pool doesn't have one.
+        # Otherwise, use the existing container in the pool.
+        if self.pool_cont_dict[self.pool] is None:
             self.add_container(self.pool)
+            self.pool_cont_dict[self.pool] = self.container
+        else:
+            self.container = self.pool_cont_dict[self.pool]
         job_manager = self.get_ior_job_manager_command()
         job_manager.job.dfs_cont.update(self.container.uuid)
         self.ior_cmd.transfer_size.update(test[2])

From 68f975321f8dfe2363d63ea2e08e99c4b05746b5 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Thu, 11 Mar 2021 16:06:10 -0500
Subject: [PATCH 12/37] DAOS-6923 test : Added daos cont check support
 Test-tag-hw-medium: pr,hw,medium,ib2 osa Skip-unit-tests: true Skip-nlt: true
 Skip-unit-test: true Skip-unit-test-memcheck: true Skip-coverity-test: true
 Skip-func-hw-test-small: true Skip-func-hw-test-medium: true
 Skip-func-hw-test-large: true

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 .../ftest/osa/osa_offline_reintegration.py    |  9 ++++++++
 src/tests/ftest/util/daos_utils.py            | 22 +++++++++++++++++++
 src/tests/ftest/util/daos_utils_base.py       | 11 ++++++++++
 src/tests/ftest/util/osa_utils.py             |  2 +-
 4 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index bad272f8bee..ab040efcba0 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -7,6 +7,7 @@
 import random
 import time
 from osa_utils import OSAUtils
+from daos_utils import DaosCommand
 from test_utils_pool import TestPool
 from write_host_file import write_host_file
 from apricot import skipForTicket
@@ -24,6 +25,7 @@ def setUp(self):
         """Set up for test case."""
         super(OSAOfflineReintegration, self).setUp()
         self.dmg_command = self.get_dmg_command()
+        self.daos_command = DaosCommand(self.bin)
         self.ior_test_sequence = self.params.get("ior_test_sequence",
                                                  '/run/ior/iorflags/*')
         self.test_oclass = self.params.get("oclass", '/run/test_obj_class/*')
@@ -143,11 +145,18 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             self.pool = pool[random_pool]
             self.pool.display_pool_daos_space(display_string)
 
+        # Finally check whether the written data can be accessed.
+        # Also, run the daos cont check (for object integrity)
         for val in range(0, num_pool):
             self.pool = pool[val]
             if data:
                 self.run_ior_thread("Read", oclass, test_seq)
                 self.run_mdtest_thread()
+                self.container = self.pool_cont_dict[self.pool]
+                #kwargs = {"pool": self.pool.uuid,
+                #          "cont": self.container.uuid}
+                #output = self.daos_command.container_check(**kwargs)
+                #self.log.info(output)
 
     def test_osa_offline_reintegration_multiple_pools(self):
         """Test ID: DAOS-6923
diff --git a/src/tests/ftest/util/daos_utils.py b/src/tests/ftest/util/daos_utils.py
index 7d575170112..94166846313 100644
--- a/src/tests/ftest/util/daos_utils.py
+++ b/src/tests/ftest/util/daos_utils.py
@@ -131,6 +131,28 @@ def container_destroy(self, pool, cont, force=None, sys_name=None):
             ("container", "destroy"), pool=pool, sys_name=sys_name,
             cont=cont, force=force)
 
+    def container_check(self, pool, cont, sys_name=None, path=None):
+        """Check the integrity of container objects.
+
+        Args:
+            pool (str): UUID of the pool in which to create the container
+            cont (str): container UUID.
+            sys_name (str, optional):  DAOS system name context for servers.
+                Defaults to None.
+            path (str): Container namespace path. Defaults to None
+
+        Returns:
+            CmdResult: Object that contains exit status, stdout, and other
+                information.
+
+        Raises:
+            CommandFailure: if the daos container check command fails.
+
+        """
+        return self._get_result(
+            ("container", "check"), pool=pool, cont=cont,
+            sys_name=sys_name, path=path)
+
     def container_get_acl(self, pool, cont,
                           verbose=False, outfile=None):
         """Get the ACL for a given container.
diff --git a/src/tests/ftest/util/daos_utils_base.py b/src/tests/ftest/util/daos_utils_base.py
index bc010cb12f3..2bccaa05e9c 100644
--- a/src/tests/ftest/util/daos_utils_base.py
+++ b/src/tests/ftest/util/daos_utils_base.py
@@ -148,6 +148,8 @@ def get_sub_command_class(self):
                 self.sub_command_class = self.CreateSubCommand()
             elif self.sub_command.value == "destroy":
                 self.sub_command_class = self.DestroySubCommand()
+            elif self.sub_command.value == "check":
+                self.sub_command_class = self.CheckSubCommand()
             elif self.sub_command.value == "list-objects":
                 self.sub_command_class = self.ListObjectsSubCommand()
             elif self.sub_command.value == "query":
@@ -273,6 +275,15 @@ def __init__(self):
                     DaosCommandBase.ContainerSubCommand.QuerySubCommand,
                     self).__init__("query")
 
+        class CheckSubCommand(CommonContainerSubCommand):
+            """Defines an object for the daos container check command."""
+
+            def __init__(self):
+                """Create a daos container check command object."""
+                super(
+                    DaosCommandBase.ContainerSubCommand.CheckSubCommand,
+                    self).__init__("check")
+
         class GetAclSubCommand(CommonContainerSubCommand):
             """Defines an object for the daos container get-acl command."""
 
diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index 2e12d7aef60..e5c8bd9b7a1 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -211,7 +211,7 @@ def ior_thread(self, pool, oclass, test, flags):
         self.ior_cmd.set_daos_params(self.server_group, self.pool)
         self.ior_cmd.dfs_oclass.update(oclass)
         self.ior_cmd.dfs_dir_oclass.update(oclass)
-        # If pool is not in the dictionary, 
+        # If pool is not in the dictionary,
         # initialize its container as None.
         if self.pool not in self.pool_cont_dict:
             self.pool_cont_dict[self.pool] = None

From 85434c393fbce0ad18843fd859bd1428310c1a87 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Thu, 11 Mar 2021 16:17:29 -0500
Subject: [PATCH 13/37] DAOS-6923 test: Merge with master, minor change.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa Skip-unit-tests: true Skip-nlt: true
 Skip-unit-test: true Skip-unit-test-memcheck: true Skip-coverity-test: true
 Skip-func-hw-test-small: true Skip-func-hw-test-medium: true
 Skip-func-hw-test-large: true

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/util/osa_utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index e5c8bd9b7a1..5c808224500 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -78,14 +78,14 @@ def get_rebuild_status(self):
 
     @fail_on(CommandFailure)
     def is_rebuild_done(self, time_interval,
-                        wait_for_rebuild_not_to_complete=False):
+                        wait_for_rebuild_to_complete=False):
         """Rebuild is completed/done.
         Args:
             time_interval: Wait interval between checks
-            wait_for_rebuild_not_to_complete: Rebuild completed
-                                              (Default: False)
+            wait_for_rebuild_to_complete: Rebuild completed
+                                          (Default: False)
         """
-        self.pool.wait_for_rebuild(wait_for_rebuild_not_to_complete,
+        self.pool.wait_for_rebuild(wait_for_rebuild_to_complete,
                                    interval=time_interval)
 
     @fail_on(CommandFailure)

From b07b5819522b26e303279e44f721285f373dad1d Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Sun, 14 Mar 2021 19:28:31 -0400
Subject: [PATCH 14/37] DAOS-6923 test: Code review script changes.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa Skip-unit-tests: true Skip-nlt: true
 Skip-unit-test: true Skip-unit-test-memcheck: true Skip-coverity-test: true
 Skip-func-hw-test-small: true Skip-func-hw-test-medium: true
 Skip-func-hw-test-large: true

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_drain.py      | 20 ++----
 src/tests/ftest/osa/osa_offline_drain.yaml    |  3 +
 .../ftest/osa/osa_offline_reintegration.py    | 70 ++++++++-----------
 .../ftest/osa/osa_offline_reintegration.yaml  |  5 +-
 src/tests/ftest/util/osa_utils.py             | 62 ++++++++++++++--
 5 files changed, 99 insertions(+), 61 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_drain.py b/src/tests/ftest/osa/osa_offline_drain.py
index 746200e9595..0e2aca0e1d2 100644
--- a/src/tests/ftest/osa/osa_offline_drain.py
+++ b/src/tests/ftest/osa/osa_offline_drain.py
@@ -30,15 +30,13 @@ def setUp(self):
             self.hostlist_clients, self.workdir, None)
 
     def run_offline_drain_test(self, num_pool, data=False,
-                               oclass=None, drain_during_aggregation=False):
+                               oclass=None):
         """Run the offline drain without data.
             Args:
             num_pool (int) : total pools to create for testing purposes.
             data (bool) : whether pool has no data or to create
                           some data in pool. Defaults to False.
             oclass (str): DAOS object class (eg: RP_2G1,etc)
-            drain_during_aggregation (bool) : Perform drain and aggregation
-                                              in parallel
         """
         # Create a pool
         pool = {}
@@ -66,11 +64,8 @@ def run_offline_drain_test(self, num_pool, data=False,
                                             num_pool)
             pool[val].create()
             self.pool = pool[val]
-            if drain_during_aggregation is True:
-                test_seq = self.ior_test_sequence[1]
-                self.pool.set_property("reclaim", "disabled")
-            else:
-                test_seq = self.ior_test_sequence[0]
+            self.pool.set_property("reclaim", "disabled")
+            test_seq = self.ior_test_sequence[0]
 
             if data:
                 self.run_ior_thread("Write", oclass, test_seq)
@@ -83,14 +78,13 @@ def run_offline_drain_test(self, num_pool, data=False,
             self.pool.display_pool_daos_space("Pool space: Beginning")
             pver_begin = self.get_pool_version()
             self.log.info("Pool Version at the beginning %s", pver_begin)
-            if drain_during_aggregation is True:
+            if self.test_during_aggregation is True:
                 self.pool.set_property("reclaim", "time")
-                time.sleep(90)
+                self.delete_extra_container(self.pool)
+                self.simple_exclude_reintegrate_loop(rank)
             output = self.dmg_command.pool_drain(self.pool.uuid,
                                                  rank, t_string)
-            self.log.info(output)
-            self.is_rebuild_done(3)
-            self.assert_on_rebuild_failure()
+            self.print_and_assert_on_rebuild_failure(output)
 
             pver_drain = self.get_pool_version()
             self.log.info("Pool Version after drain %d", pver_drain)
diff --git a/src/tests/ftest/osa/osa_offline_drain.yaml b/src/tests/ftest/osa/osa_offline_drain.yaml
index d8c6a1a52bd..1acf88f6c19 100644
--- a/src/tests/ftest/osa/osa_offline_drain.yaml
+++ b/src/tests/ftest/osa/osa_offline_drain.yaml
@@ -105,3 +105,6 @@ test_obj_class:
     - RP_2G8
     - RP_3G6
     - RP_4G1
+aggregation:
+  test_with_aggregation: True
+
diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index ab040efcba0..04f2fc939d4 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -36,9 +36,7 @@ def setUp(self):
         self.dmg_command.exit_status_exception = True
 
     def run_offline_reintegration_test(self, num_pool, data=False,
-                                       server_boot=False, oclass=None,
-                                       reint_during_rebuild=False,
-                                       reint_during_aggregation=False):
+                                       server_boot=False, oclass=None):
         """Run the offline reintegration without data.
             Args:
             num_pool (int) : total pools to create for testing purposes.
@@ -47,11 +45,6 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             server_boot (bool) : Perform system stop/start on a rank.
                                  Defults to False.
             oclass (str) : daos object class string (eg: "RP_2G8")
-            reint_during_rebuild (bool) : Perform reintegration during
-                                          rebuild (Defaults to False).
-            reint_during_aggregation (bool) : Perform reintegration
-                                              during aggregation
-                                              (Defaults to False).
         """
         # Create a pool
         pool = {}
@@ -67,14 +60,13 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             pool[val].get_params(self)
             pool[val].create()
             self.pool = pool[val]
-            if reint_during_aggregation is True:
-                test_seq = self.ior_test_sequence[1]
-                self.pool.set_property("reclaim", "disabled")
-            else:
-                test_seq = self.ior_test_sequence[0]
+            self.pool.set_property("reclaim", "disabled")
+            test_seq = self.ior_test_sequence[0]
             if data:
                 self.run_ior_thread("Write", oclass, test_seq)
                 self.run_mdtest_thread()
+                if self.test_during_aggregation is True:
+                    self.run_ior_thread("Write", oclass, test_seq)
 
         # Exclude all the ranks
         random_pool = random.randint(0, (num_pool-1))
@@ -85,33 +77,26 @@ def run_offline_reintegration_test(self, num_pool, data=False,
                 pver_begin = self.get_pool_version()
                 self.log.info("Pool Version at the beginning %s", pver_begin)
                 if server_boot is False:
-                    if (reint_during_rebuild is True and val == 0):
+                    if (self.test_during_rebuild is True and val == 0):
                         # Exclude rank 5
                         output = self.dmg_command.pool_exclude(self.pool.uuid,
                                                                "5")
-                        self.log.info(output)
-                        self.is_rebuild_done(3)
-                        self.assert_on_rebuild_failure()
-                    if reint_during_aggregation is True:
-                        self.pool.set_property("reclaim", "time")
-                        time.sleep(90)
+                        self.print_and_assert_on_rebuild_failure(output)
+                    if self.test_during_aggregation is True:
+                        self.delete_extra_container(self.pool)
+                        self.simple_exclude_reintegrate_loop(rank[val])
                     output = self.dmg_command.pool_exclude(self.pool.uuid,
                                                            rank[val])
                 else:
                     output = self.dmg_command.system_stop(ranks=rank[val])
-                    self.log.info(output)
-                    self.is_rebuild_done(3)
-                    self.assert_on_rebuild_failure()
+                    self.print_and_assert_on_rebuild_failure(output)
                     output = self.dmg_command.system_start(ranks=rank[val])
                 # Just try to reintegrate rank 5
-                if (reint_during_rebuild is True and val == 2):
-                    # Exclude rank 5
-                    time.sleep(3)
+                if (self.test_during_rebuild is True and val == 2):
+                    # Reintegrate rank 5
                     output = self.dmg_command.pool_reintegrate(self.pool.uuid,
                                                                "5")
-                self.log.info(output)
-                self.is_rebuild_done(3)
-                self.assert_on_rebuild_failure()
+                self.print_and_assert_on_rebuild_failure(output)
 
                 pver_exclude = self.get_pool_version()
                 self.log.info("Pool Version after exclude %s", pver_exclude)
@@ -123,7 +108,6 @@ def run_offline_reintegration_test(self, num_pool, data=False,
 
             # Reintegrate the ranks which was excluded
             for val, _ in enumerate(rank):
-                time.sleep(5)
                 if (val == 2 and "RP_2G" in oclass):
                     output = self.dmg_command.pool_reintegrate(self.pool.uuid,
                                                                rank[val],
@@ -131,9 +115,7 @@ def run_offline_reintegration_test(self, num_pool, data=False,
                 else:
                     output = self.dmg_command.pool_reintegrate(self.pool.uuid,
                                                                rank[val])
-                self.log.info(output)
-                self.is_rebuild_done(3)
-                self.assert_on_rebuild_failure()
+                self.print_and_assert_on_rebuild_failure(output)
 
                 pver_reint = self.get_pool_version()
                 self.log.info("Pool Version after reintegrate %d", pver_reint)
@@ -152,11 +134,12 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             if data:
                 self.run_ior_thread("Read", oclass, test_seq)
                 self.run_mdtest_thread()
-                self.container = self.pool_cont_dict[self.pool]
-                #kwargs = {"pool": self.pool.uuid,
-                #          "cont": self.container.uuid}
-                #output = self.daos_command.container_check(**kwargs)
-                #self.log.info(output)
+                if self.test_during_rebuild is True:
+                    self.container = self.pool_cont_dict[self.pool]
+                    kwargs = {"pool": self.pool.uuid,
+                              "cont": self.container.uuid}
+                    output = self.daos_command.container_check(**kwargs)
+                    self.log.info(output)
 
     def test_osa_offline_reintegration_multiple_pools(self):
         """Test ID: DAOS-6923
@@ -178,6 +161,7 @@ def test_osa_offline_reintegration_server_stop(self):
         """
         self.run_offline_reintegration_test(1, data=True, server_boot=True)
 
+    @skipForTicket("DAOS-7013")
     def test_osa_offline_reintegrate_during_rebuild(self):
         """Test ID: DAOS-6923
         Test Description: Reintegrate rank while rebuild
@@ -189,8 +173,9 @@ def test_osa_offline_reintegrate_during_rebuild(self):
         """
         self.loop_test_cnt = self.params.get("iterations",
                                              '/run/loop_test/*')
-        self.run_offline_reintegration_test(1, data=True,
-                                            reint_during_rebuild=True)
+        self.test_during_rebuild = self.params.get("test_with_rebuild",
+                                                   '/run/rebuild/*')
+        self.run_offline_reintegration_test(1, data=True)
 
     def test_osa_offline_reintegration_oclass(self):
         """Test ID: DAOS-6923
@@ -215,5 +200,6 @@ def test_osa_offline_reintegrate_during_aggregation(self):
         :avocado: tags=osa,offline_reintegration
         :avocado: tags=offline_reintegrate_during_aggregation
         """
-        self.run_offline_reintegration_test(1, data=True,
-                                            reint_during_aggregation=True)
+        self.test_during_aggregation = self.params.get("test_with_aggregation",
+                                                       '/run/aggregation/*')
+        self.run_offline_reintegration_test(1, data=True)
diff --git a/src/tests/ftest/osa/osa_offline_reintegration.yaml b/src/tests/ftest/osa/osa_offline_reintegration.yaml
index 1e16e258983..fe52612e1b7 100644
--- a/src/tests/ftest/osa/osa_offline_reintegration.yaml
+++ b/src/tests/ftest/osa/osa_offline_reintegration.yaml
@@ -77,7 +77,6 @@ ior:
   #    The values are set to be in the multiples of 10.
   #    Values are appx GB.
     - [6000000000, 54000000000, 500000, 500000000]
-    - [6000000000, 54000000000, 1000, 5000000]
 mdtest:
   api: DFS
   client_processes:
@@ -104,3 +103,7 @@ test_obj_class:
     - S1
 loop_test:
   iterations: 3
+aggregation:
+  test_with_aggregation: True
+rebuild:
+  test_with_rebuild: True
diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index 5c808224500..ff3f31c19eb 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -53,6 +53,8 @@ def setUp(self):
         self.ior_r_flags = self.params.get("read_flags", '/run/ior/iorflags/*')
         self.out_queue = test_queue.Queue()
         self.dmg_command.exit_status_exception = False
+        self.test_during_aggregation = False
+        self.test_during_rebuild = False
 
     @fail_on(CommandFailure)
     def get_pool_leader(self):
@@ -99,6 +101,15 @@ def assert_on_rebuild_failure(self):
         self.assertTrue(rebuild_status not in rebuild_failed_string,
                         "Rebuild failed")
 
+    @fail_on(CommandFailure)
+    def print_and_assert_on_rebuild_failure(self, out, timeout=3):
+        """Print the out value (daos, dmg, etc) and check for rebuild
+        completion. If not, raise assert.
+        """
+        self.log.info(out)
+        self.is_rebuild_done(timeout)
+        self.assert_on_rebuild_failure()
+
     @fail_on(CommandFailure)
     def get_pool_version(self):
         """Get the pool version.
@@ -110,6 +121,21 @@ def get_pool_version(self):
         data = self.dmg_command.pool_query(self.pool.uuid)
         return int(data["version"])
 
+    def simple_exclude_reintegrate_loop(self, rank, loop_time=100):
+        """This method performs exclude and reintegration on a rank,
+        for a certain amount of time.
+        """
+        start_time = 0
+        finish_time = 0
+        while (int(finish_time - start_time) > loop_time):
+            start_time = time.time()
+            output = self.dmg_command.pool_exclude(self.pool.uuid,
+                                                   rank)
+            self.print_and_assert_on_rebuild_failure(output)
+            output = self.dmg_command.pool_reintegrate(self.pool.uuid,
+                                                       rank)
+            self.print_and_assert_on_rebuild_failure(output)
+
     @fail_on(DaosApiError)
     def write_single_object(self):
         """Write some data to the existing pool."""
@@ -170,6 +196,16 @@ def verify_single_object(self):
         self.obj.close()
         self.container.close()
 
+    def delete_extra_container(self, pool):
+        """Delete the extra container in the pool.
+        Args:
+            pool (object): pool handle
+        """
+        self.pool.set_property("reclaim", "time")
+        extra_container = self.pool_cont_dict[pool][2]
+        extra_container.destroy()
+        self.pool_cont_dict[pool][3] = None
+
     def run_ior_thread(self, action, oclass, test):
         """Start the IOR thread for either writing or
         reading data to/from a container.
@@ -211,17 +247,33 @@ def ior_thread(self, pool, oclass, test, flags):
         self.ior_cmd.set_daos_params(self.server_group, self.pool)
         self.ior_cmd.dfs_oclass.update(oclass)
         self.ior_cmd.dfs_dir_oclass.update(oclass)
+        self.log.info(self.pool_cont_dict)
         # If pool is not in the dictionary,
-        # initialize its container as None.
+        # initialize its container list to None
+        # {poolA : [None, None], [None, None]}
         if self.pool not in self.pool_cont_dict:
-            self.pool_cont_dict[self.pool] = None
+            self.pool_cont_dict[self.pool] = [None] * 4
         # Create container if the pool doesn't have one.
         # Otherwise, use the existing container in the pool.
-        if self.pool_cont_dict[self.pool] is None:
+        # pool_cont_dict {pool A: [containerA, Updated,
+        #                          containerB, Updated],
+        #                 pool B : containerA, Updated,
+        #                          containerB, None]}
+        if self.pool_cont_dict[self.pool][0] is None:
             self.add_container(self.pool)
-            self.pool_cont_dict[self.pool] = self.container
+            self.pool_cont_dict[self.pool][0] = self.container
+            self.pool_cont_dict[self.pool][1] = "Updated"
         else:
-            self.container = self.pool_cont_dict[self.pool]
+            if ((self.test_during_aggregation is True) and
+               (self.pool_cont_dict[self.pool][1] == "Updated") and
+               (self.pool_cont_dict[self.pool][3] is None) and
+               ("-w" in flags)):
+                # Write to the second container
+                self.add_container(self.pool)
+                self.pool_cont_dict[self.pool][2] = self.container
+                self.pool_cont_dict[self.pool][3] = "Updated"
+            else:
+                self.container = self.pool_cont_dict[self.pool][0]
         job_manager = self.get_ior_job_manager_command()
         job_manager.job.dfs_cont.update(self.container.uuid)
         self.ior_cmd.transfer_size.update(test[2])

From 82980768216783c6a43eff1f614136c6c4cef6b3 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Sun, 14 Mar 2021 19:37:23 -0400
Subject: [PATCH 15/37] DAOS-6923 test: Fix minor checkpatch issues.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa Skip-unit-tests: true Skip-nlt: true
 Skip-unit-test: true Skip-unit-test-memcheck: true Skip-coverity-test: true
 Skip-func-hw-test-small: true Skip-func-hw-test-medium: true
 Skip-func-hw-test-large: true

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_drain.py         | 1 -
 src/tests/ftest/osa/osa_offline_drain.yaml       | 1 -
 src/tests/ftest/osa/osa_offline_reintegration.py | 1 -
 src/tests/ftest/util/osa_utils.py                | 2 +-
 4 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_drain.py b/src/tests/ftest/osa/osa_offline_drain.py
index 0e2aca0e1d2..ec4bca6eb5e 100644
--- a/src/tests/ftest/osa/osa_offline_drain.py
+++ b/src/tests/ftest/osa/osa_offline_drain.py
@@ -5,7 +5,6 @@
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
 import random
-import time
 from osa_utils import OSAUtils
 from test_utils_pool import TestPool
 from write_host_file import write_host_file
diff --git a/src/tests/ftest/osa/osa_offline_drain.yaml b/src/tests/ftest/osa/osa_offline_drain.yaml
index 1acf88f6c19..2d8cde44a73 100644
--- a/src/tests/ftest/osa/osa_offline_drain.yaml
+++ b/src/tests/ftest/osa/osa_offline_drain.yaml
@@ -81,7 +81,6 @@ ior:
     #    The values are set to be in the multiples of 10.
     #    Values are appx GB.
       - [6000000000, 54000000000, 500000, 500000000]
-      - [6000000000, 54000000000, 1000, 500000000]
 mdtest:
   api: DFS
   client_processes:
diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 04f2fc939d4..f410bcbceb9 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -5,7 +5,6 @@
   SPDX-License-Identifier: BSD-2-Clause-Patent
 """
 import random
-import time
 from osa_utils import OSAUtils
 from daos_utils import DaosCommand
 from test_utils_pool import TestPool
diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index ff3f31c19eb..cdc8c57db7a 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -127,7 +127,7 @@ def simple_exclude_reintegrate_loop(self, rank, loop_time=100):
         """
         start_time = 0
         finish_time = 0
-        while (int(finish_time - start_time) > loop_time):
+        while int(finish_time - start_time) > loop_time:
             start_time = time.time()
             output = self.dmg_command.pool_exclude(self.pool.uuid,
                                                    rank)

From 5ac7f7571805bb5537b2041fcb488352307fdc03 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Tue, 16 Mar 2021 17:36:20 -0400
Subject: [PATCH 16/37] DAOS-6923 test: Update the container class
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 .../ftest/osa/osa_offline_reintegration.py    |  5 ++-
 src/tests/ftest/util/osa_utils.py             | 36 ++++++++++++++++---
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index f410bcbceb9..b54b268c9a0 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -9,7 +9,6 @@
 from daos_utils import DaosCommand
 from test_utils_pool import TestPool
 from write_host_file import write_host_file
-from apricot import skipForTicket
 
 
 class OSAOfflineReintegration(OSAUtils):
@@ -87,7 +86,8 @@ def run_offline_reintegration_test(self, num_pool, data=False,
                     output = self.dmg_command.pool_exclude(self.pool.uuid,
                                                            rank[val])
                 else:
-                    output = self.dmg_command.system_stop(ranks=rank[val])
+                    output = self.dmg_command.system_stop(ranks=rank[val],
+                                                          force=True)
                     self.print_and_assert_on_rebuild_failure(output)
                     output = self.dmg_command.system_start(ranks=rank[val])
                 # Just try to reintegrate rank 5
@@ -160,7 +160,6 @@ def test_osa_offline_reintegration_server_stop(self):
         """
         self.run_offline_reintegration_test(1, data=True, server_boot=True)
 
-    @skipForTicket("DAOS-7013")
     def test_osa_offline_reintegrate_during_rebuild(self):
         """Test ID: DAOS-6923
         Test Description: Reintegrate rank while rebuild
diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index cdc8c57db7a..75d2ef116e2 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -7,6 +7,7 @@
 import ctypes
 import time
 import threading
+import re
 
 from avocado import fail_on
 from ior_test_base import IorTestBase
@@ -206,6 +207,25 @@ def delete_extra_container(self, pool):
         extra_container.destroy()
         self.pool_cont_dict[pool][3] = None
 
+    def set_cont_class_properties(self, cont, oclass="S1"):
+        """Update the container class to match the IOR object
+        class. Also, remove the redundancy factor for S type
+        object class.
+        Args:
+            cont (object): TestContainer object
+            oclass (str, optional): Container object class to be set.
+                                    Defaults to "S1".
+        """
+        self.container.oclass.value = oclass
+        # Set the container properties properly for S!, S2 class.
+        # rf should not be set to 1 for S type object class.
+
+        x = re.search("^S\\d$", oclass)
+        if x is not None:
+            prop = self.container.properties.value
+            prop = prop.replace("rf:1", "rf:0")
+            self.container.properties.value = prop
+
     def run_ior_thread(self, action, oclass, test):
         """Start the IOR thread for either writing or
         reading data to/from a container.
@@ -237,7 +257,7 @@ def ior_thread(self, pool, oclass, test, flags):
 
         Args:
             pool (object): pool handle
-            oclass (str): IOR object class
+            oclass (str): IOR object class, container class.
             test (list): IOR test sequence
             flags (str): IOR flags
 
@@ -247,6 +267,7 @@ def ior_thread(self, pool, oclass, test, flags):
         self.ior_cmd.set_daos_params(self.server_group, self.pool)
         self.ior_cmd.dfs_oclass.update(oclass)
         self.ior_cmd.dfs_dir_oclass.update(oclass)
+        
         self.log.info(self.pool_cont_dict)
         # If pool is not in the dictionary,
         # initialize its container list to None
@@ -260,7 +281,9 @@ def ior_thread(self, pool, oclass, test, flags):
         #                 pool B : containerA, Updated,
         #                          containerB, None]}
         if self.pool_cont_dict[self.pool][0] is None:
-            self.add_container(self.pool)
+            self.add_container(self.pool, create=False)
+            self.set_cont_class_properties(self.container, oclass)
+            self.container.create()
             self.pool_cont_dict[self.pool][0] = self.container
             self.pool_cont_dict[self.pool][1] = "Updated"
         else:
@@ -269,7 +292,9 @@ def ior_thread(self, pool, oclass, test, flags):
                (self.pool_cont_dict[self.pool][3] is None) and
                ("-w" in flags)):
                 # Write to the second container
-                self.add_container(self.pool)
+                self.add_container(self.pool, create=False)
+                self.set_cont_class_properties(self.container, oclass)
+                self.container.create()
                 self.pool_cont_dict[self.pool][2] = self.container
                 self.pool_cont_dict[self.pool][3] = "Updated"
             else:
@@ -287,7 +312,10 @@ def run_mdtest_thread(self):
         # Create container only
         self.mdtest_cmd.dfs_destroy = False
         if self.container is None:
-            self.add_container(self.pool)
+            self.add_container(self.pool, create=False)
+            self.set_cont_class_properties(self.container,
+                                           self.mdtest_cmd.dfs_oclass)
+            self.container.create()
         job_manager = self.get_mdtest_job_manager_command(self.manager)
         job_manager.job.dfs_cont.update(self.container.uuid)
         # Add a thread for these IOR arguments

From fecfde79c15757b23f0479c4a76d981c051e651b Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Tue, 16 Mar 2021 17:48:19 -0400
Subject: [PATCH 17/37] DAOS-6923 test: Fix checkpatch issues.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/util/osa_utils.py | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index 75d2ef116e2..8bc6a6a01db 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -13,9 +13,6 @@
 from ior_test_base import IorTestBase
 from mdtest_test_base import MdtestBase
 from command_utils import CommandFailure
-from ior_utils import IorCommand
-from job_manager_utils import Mpirun
-from mpio_utils import MpioUtils
 from pydaos.raw import (DaosContainer, IORequest,
                         DaosObj, DaosApiError)
 
@@ -207,19 +204,17 @@ def delete_extra_container(self, pool):
         extra_container.destroy()
         self.pool_cont_dict[pool][3] = None
 
-    def set_cont_class_properties(self, cont, oclass="S1"):
+    def set_cont_class_properties(self, oclass="S1"):
         """Update the container class to match the IOR object
         class. Also, remove the redundancy factor for S type
         object class.
         Args:
-            cont (object): TestContainer object
             oclass (str, optional): Container object class to be set.
                                     Defaults to "S1".
         """
         self.container.oclass.value = oclass
         # Set the container properties properly for S!, S2 class.
         # rf should not be set to 1 for S type object class.
-
         x = re.search("^S\\d$", oclass)
         if x is not None:
             prop = self.container.properties.value
@@ -267,7 +262,7 @@ def ior_thread(self, pool, oclass, test, flags):
         self.ior_cmd.set_daos_params(self.server_group, self.pool)
         self.ior_cmd.dfs_oclass.update(oclass)
         self.ior_cmd.dfs_dir_oclass.update(oclass)
-        
+
         self.log.info(self.pool_cont_dict)
         # If pool is not in the dictionary,
         # initialize its container list to None
@@ -282,7 +277,7 @@ def ior_thread(self, pool, oclass, test, flags):
         #                          containerB, None]}
         if self.pool_cont_dict[self.pool][0] is None:
             self.add_container(self.pool, create=False)
-            self.set_cont_class_properties(self.container, oclass)
+            self.set_cont_class_properties(oclass)
             self.container.create()
             self.pool_cont_dict[self.pool][0] = self.container
             self.pool_cont_dict[self.pool][1] = "Updated"
@@ -293,7 +288,7 @@ def ior_thread(self, pool, oclass, test, flags):
                ("-w" in flags)):
                 # Write to the second container
                 self.add_container(self.pool, create=False)
-                self.set_cont_class_properties(self.container, oclass)
+                self.set_cont_class_properties(oclass)
                 self.container.create()
                 self.pool_cont_dict[self.pool][2] = self.container
                 self.pool_cont_dict[self.pool][3] = "Updated"
@@ -313,8 +308,7 @@ def run_mdtest_thread(self):
         self.mdtest_cmd.dfs_destroy = False
         if self.container is None:
             self.add_container(self.pool, create=False)
-            self.set_cont_class_properties(self.container,
-                                           self.mdtest_cmd.dfs_oclass)
+            self.set_cont_class_properties(self.mdtest_cmd.dfs_oclass)
             self.container.create()
         job_manager = self.get_mdtest_job_manager_command(self.manager)
         job_manager.job.dfs_cont.update(self.container.uuid)

From b507f47467b612d27835324a570360bed8b01173 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Wed, 17 Mar 2021 14:04:54 -0400
Subject: [PATCH 18/37] DAOS-6923 test: Support single/multiple containers
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/util/osa_utils.py | 90 ++++++++++++++++++++-----------
 1 file changed, 58 insertions(+), 32 deletions(-)

diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index 8bc6a6a01db..37e7860aaf6 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -194,8 +194,56 @@ def verify_single_object(self):
         self.obj.close()
         self.container.close()
 
+    def prepare_cont_ior_write_read(self, oclass, flags):
+        """This method prepares the containers for
+        IOR write and read invocations.
+            To enable aggregation:
+            - Create two containers and read always from
+              first container
+            Normal usage (use only a single container):
+            - Create a single container and use the same.
+        Args:
+            oclass (str): IOR object class
+            flags (str): IOR flags
+        """
+        self.log.info(self.pool_cont_dict)
+        # If pool is not in the dictionary,
+        # initialize its container list to None
+        # {poolA : [None, None], [None, None]}
+        if self.pool not in self.pool_cont_dict:
+            self.pool_cont_dict[self.pool] = [None] * 4
+        # Create container if the pool doesn't have one.
+        # Otherwise, use the existing container in the pool.
+        # pool_cont_dict {pool A: [containerA, Updated,
+        #                          containerB, Updated],
+        #                 pool B : containerA, Updated,
+        #                          containerB, None]}
+        if self.pool_cont_dict[self.pool][0] is None:
+            self.add_container(self.pool, create=False)
+            self.set_cont_class_properties(oclass)
+            self.container.create()
+            self.pool_cont_dict[self.pool][0] = self.container
+            self.pool_cont_dict[self.pool][1] = "Updated"
+        else:
+            if ((self.test_during_aggregation is True) and
+               (self.pool_cont_dict[self.pool][1] == "Updated") and
+               (self.pool_cont_dict[self.pool][3] is None) and
+               ("-w" in flags)):
+                # Write to the second container
+                self.add_container(self.pool, create=False)
+                self.set_cont_class_properties(oclass)
+                self.container.create()
+                self.pool_cont_dict[self.pool][2] = self.container
+                self.pool_cont_dict[self.pool][3] = "Updated"
+            else:
+                self.container = self.pool_cont_dict[self.pool][0]
+
+
     def delete_extra_container(self, pool):
         """Delete the extra container in the pool.
+        Refer prepare_cont_ior_write_read. This method
+        should be called when OSA tests intend to
+        enable aggregation.
         Args:
             pool (object): pool handle
         """
@@ -247,7 +295,8 @@ def run_ior_thread(self, action, oclass, test):
         # Wait for the thread to finish
         process.join()
 
-    def ior_thread(self, pool, oclass, test, flags):
+    def ior_thread(self, pool, oclass, test, flags,
+                   single_cont_read=True):
         """Start threads and wait until all threads are finished.
 
         Args:
@@ -262,38 +311,15 @@ def ior_thread(self, pool, oclass, test, flags):
         self.ior_cmd.set_daos_params(self.server_group, self.pool)
         self.ior_cmd.dfs_oclass.update(oclass)
         self.ior_cmd.dfs_dir_oclass.update(oclass)
-
-        self.log.info(self.pool_cont_dict)
-        # If pool is not in the dictionary,
-        # initialize its container list to None
-        # {poolA : [None, None], [None, None]}
-        if self.pool not in self.pool_cont_dict:
-            self.pool_cont_dict[self.pool] = [None] * 4
-        # Create container if the pool doesn't have one.
-        # Otherwise, use the existing container in the pool.
-        # pool_cont_dict {pool A: [containerA, Updated,
-        #                          containerB, Updated],
-        #                 pool B : containerA, Updated,
-        #                          containerB, None]}
-        if self.pool_cont_dict[self.pool][0] is None:
-            self.add_container(self.pool, create=False)
-            self.set_cont_class_properties(oclass)
-            self.container.create()
-            self.pool_cont_dict[self.pool][0] = self.container
-            self.pool_cont_dict[self.pool][1] = "Updated"
+        if single_cont_read is True and self.container is None:
+            # Prepare the containers created and use in a specific
+            # way defined in prepare_cont_ior_write.
+            self.prepare_cont_ior_write_read(oclass, flags)
+        elif single_cont_read is False and self.container is not None:
+            # Here self.container is having actual value. Just use it.
+            self.log.info(self.container)
         else:
-            if ((self.test_during_aggregation is True) and
-               (self.pool_cont_dict[self.pool][1] == "Updated") and
-               (self.pool_cont_dict[self.pool][3] is None) and
-               ("-w" in flags)):
-                # Write to the second container
-                self.add_container(self.pool, create=False)
-                self.set_cont_class_properties(oclass)
-                self.container.create()
-                self.pool_cont_dict[self.pool][2] = self.container
-                self.pool_cont_dict[self.pool][3] = "Updated"
-            else:
-                self.container = self.pool_cont_dict[self.pool][0]
+            self.fail("Not supported option on ior_thread")
         job_manager = self.get_ior_job_manager_command()
         job_manager.job.dfs_cont.update(self.container.uuid)
         self.ior_cmd.transfer_size.update(test[2])

From f4b3a4353dd4989ff312e6e51230b7e08086880c Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Thu, 18 Mar 2021 18:24:31 -0400
Subject: [PATCH 19/37] DAOS-6923 test: Minor changes to osa_utils.py
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/util/osa_utils.py | 38 ++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index 37e7860aaf6..ad79e80cc3b 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -118,6 +118,14 @@ def get_pool_version(self):
         """
         data = self.dmg_command.pool_query(self.pool.uuid)
         return int(data["version"])
+ 
+    def set_container(self, container):
+        """Set the OSA utils container object.
+        Args:
+            container (obj) : Container object to be used
+                              within OSA utils.
+        """
+        self.container = container
 
     def simple_exclude_reintegrate_loop(self, rank, loop_time=100):
         """This method performs exclude and reintegration on a rank,
@@ -238,7 +246,6 @@ def prepare_cont_ior_write_read(self, oclass, flags):
             else:
                 self.container = self.pool_cont_dict[self.pool][0]
 
-
     def delete_extra_container(self, pool):
         """Delete the extra container in the pool.
         Refer prepare_cont_ior_write_read. This method
@@ -269,7 +276,9 @@ def set_cont_class_properties(self, oclass="S1"):
             prop = prop.replace("rf:1", "rf:0")
             self.container.properties.value = prop
 
-    def run_ior_thread(self, action, oclass, test):
+    def run_ior_thread(self, action, oclass, test,
+                       single_cont_read=True,
+                       fail_on_warning=True):
         """Start the IOR thread for either writing or
         reading data to/from a container.
         Args:
@@ -278,6 +287,12 @@ def run_ior_thread(self, action, oclass, test):
             oclass (str): IOR object class
             test (list): IOR test sequence
             flags (str): IOR flags
+            single_cont_read (bool) : Always read from the 
+                                      1st container.
+                                      Defaults to True.
+            fail_on_warning (bool)  : Test terminates
+                                      for IOR warnings.
+                                      Defaults to True.
         """
         if action == "Write":
             flags = self.ior_w_flags
@@ -289,14 +304,19 @@ def run_ior_thread(self, action, oclass, test):
                                    kwargs={"pool": self.pool,
                                            "oclass": oclass,
                                            "test": test,
-                                           "flags": flags})
+                                           "flags": flags,
+                                           "single_cont_read":
+                                           single_cont_read,
+                                           "fail_on_warning":
+                                           fail_on_warning})
         # Launch the IOR thread
         process.start()
         # Wait for the thread to finish
         process.join()
 
     def ior_thread(self, pool, oclass, test, flags,
-                   single_cont_read=True):
+                   single_cont_read=True,
+                   fail_on_warning=True):
         """Start threads and wait until all threads are finished.
 
         Args:
@@ -304,7 +324,12 @@ def ior_thread(self, pool, oclass, test, flags,
             oclass (str): IOR object class, container class.
             test (list): IOR test sequence
             flags (str): IOR flags
-
+            single_cont_read (bool) : Always read from the 
+                                      1st container.
+                                      Defaults to True.
+            fail_on_warning (bool)  : Test terminates
+                                      for IOR warnings.
+                                      Defaults to True.
         """
         self.pool = pool
         self.ior_cmd.get_params(self)
@@ -325,7 +350,8 @@ def ior_thread(self, pool, oclass, test, flags,
         self.ior_cmd.transfer_size.update(test[2])
         self.ior_cmd.block_size.update(test[3])
         self.ior_cmd.flags.update(flags)
-        self.run_ior_with_pool(create_pool=False, create_cont=False)
+        self.run_ior_with_pool(create_pool=False, create_cont=False,
+                               fail_on_warning=fail_on_warning)
 
     def run_mdtest_thread(self):
         """Start mdtest thread and wait until thread completes.

From ff074fc055ccaaae1060dfadc6cb097e5189f22a Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Thu, 18 Mar 2021 19:01:31 -0400
Subject: [PATCH 20/37] DAOS-6923 test: Fix minor checkpatch issues.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/util/osa_utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index 266a263a0d6..6fc602d0edb 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -118,7 +118,7 @@ def get_pool_version(self):
         """
         data = self.dmg_command.pool_query(self.pool.uuid)
         return int(data["response"]["version"])
- 
+
     def set_container(self, container):
         """Set the OSA utils container object.
         Args:
@@ -287,7 +287,7 @@ def run_ior_thread(self, action, oclass, test,
             oclass (str): IOR object class
             test (list): IOR test sequence
             flags (str): IOR flags
-            single_cont_read (bool) : Always read from the 
+            single_cont_read (bool) : Always read from the
                                       1st container.
                                       Defaults to True.
             fail_on_warning (bool)  : Test terminates
@@ -324,7 +324,7 @@ def ior_thread(self, pool, oclass, test, flags,
             oclass (str): IOR object class, container class.
             test (list): IOR test sequence
             flags (str): IOR flags
-            single_cont_read (bool) : Always read from the 
+            single_cont_read (bool) : Always read from the
                                       1st container.
                                       Defaults to True.
             fail_on_warning (bool)  : Test terminates

From 189be593d1c7c3dfc4068a4fa202c43472dae113 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Thu, 18 Mar 2021 21:42:08 -0400
Subject: [PATCH 21/37] DAOS-6923 test: Fix the ior_thread issue.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/util/osa_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index 6fc602d0edb..e96ea81babb 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -336,7 +336,7 @@ def ior_thread(self, pool, oclass, test, flags,
         self.ior_cmd.set_daos_params(self.server_group, self.pool)
         self.ior_cmd.dfs_oclass.update(oclass)
         self.ior_cmd.dfs_dir_oclass.update(oclass)
-        if single_cont_read is True and self.container is None:
+        if single_cont_read is True:
             # Prepare the containers created and use in a specific
             # way defined in prepare_cont_ior_write.
             self.prepare_cont_ior_write_read(oclass, flags)

From a557e807ed287ad9e3cfdc97aa9fd2caf070679e Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Mon, 22 Mar 2021 15:42:01 -0400
Subject: [PATCH 22/37] DAOS-6923 test: Added skipForTicket (DAOS-6925)
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 .../ftest/osa/osa_offline_reintegration.py    | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index fd89a6f0a70..910a2c90bb6 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -9,6 +9,7 @@
 from daos_utils import DaosCommand
 from test_utils_pool import TestPool
 from write_host_file import write_host_file
+from apricot import skipForTicket
 
 
 class OSAOfflineReintegration(OSAUtils):
@@ -34,6 +35,7 @@ def setUp(self):
         self.hostfile_clients = write_host_file(
             self.hostlist_clients, self.workdir, None)
         self.dmg_command.exit_status_exception = True
+        self.pool_cont_dict = {}
 
     def run_offline_reintegration_test(self, num_pool, data=False,
                                        server_boot=False, oclass=None):
@@ -88,10 +90,16 @@ def run_offline_reintegration_test(self, num_pool, data=False,
                         self.simple_exclude_reintegrate_loop(rank[val])
                     output = self.dmg_command.pool_exclude(self.pool.uuid,
                                                            rank[val])
+                    # Check the IOR data after exclude
+                    if data:
+                        self.run_ior_thread("Read", oclass, test_seq)
                 else:
                     output = self.dmg_command.system_stop(ranks=rank[val],
                                                           force=True)
                     self.print_and_assert_on_rebuild_failure(output)
+                    # Check the IOR data after system stop
+                    if data:
+                        self.run_ior_thread("Read", oclass, test_seq)
                     output = self.dmg_command.system_start(ranks=rank[val])
                 # Just try to reintegrate rank 5
                 if (self.test_during_rebuild is True and val == 2):
@@ -136,12 +144,11 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             if data:
                 self.run_ior_thread("Read", oclass, test_seq)
                 self.run_mdtest_thread()
-                if self.test_during_rebuild is True:
-                    self.container = self.pool_cont_dict[self.pool]
-                    kwargs = {"pool": self.pool.uuid,
-                              "cont": self.container.uuid}
-                    output = self.daos_command.container_check(**kwargs)
-                    self.log.info(output)
+                self.container = self.pool_cont_dict[self.pool][0]
+                kwargs = {"pool": self.pool.uuid,
+                          "cont": self.container.uuid}
+                output = self.daos_command.container_check(**kwargs)
+                self.log.info(output)
 
     def test_osa_offline_reintegration_multiple_pools(self):
         """Test ID: DAOS-6923
@@ -179,6 +186,7 @@ def test_osa_offline_reintegrate_during_rebuild(self):
                                                    '/run/rebuild/*')
         self.run_offline_reintegration_test(1, data=True)
 
+    @skipForTicket("DAOS-6925")
     def test_osa_offline_reintegration_oclass(self):
         """Test ID: DAOS-6923
         Test Description: Validate Offline Reintegration

From 76842210d5438f41a6bb2f982e3aa9a6c9ba1efd Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Mon, 22 Mar 2021 17:26:12 -0400
Subject: [PATCH 23/37] DAOS-6923 test: Removed unwanted variable.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 910a2c90bb6..3c7a3ba38aa 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -35,7 +35,6 @@ def setUp(self):
         self.hostfile_clients = write_host_file(
             self.hostlist_clients, self.workdir, None)
         self.dmg_command.exit_status_exception = True
-        self.pool_cont_dict = {}
 
     def run_offline_reintegration_test(self, num_pool, data=False,
                                        server_boot=False, oclass=None):

From 33406afa7c15ffc2d11e89b058da70b878e6404b Mon Sep 17 00:00:00 2001
From: Di Wang <di.wang@intel.com>
Date: Tue, 16 Mar 2021 06:28:51 +0000
Subject: [PATCH 24/37] DAOS-5758 pl: fixes for placement

1: Add allow_status to layout generation, so only
the target with allow status can be existed in
the layout. And remove op_type/for_reint and some
duplication to make those placement algorithm easier
to follow.

2. During reintegration reclaim process, it needs
to compare the shard id as well, i.e. if shard id
is different, the object needs to be deleted as
well.

3. In particular, fix find_reint and main placement
APIs returning too many items when the pool map
contains simultaneous reintegration, drain, and
failure operations. This is a possible real-world
scenario that would be triggered when a reintegration
is running and something fails.

4. re-enable a few placement tests and add multiple
simultaneous states tests.

Signed-off-by: Di Wang <di.wang@intel.com>
Signed-off-by: Byron Marohn <byron.marohn@intel.com>
---
 src/common/pool_map.c                    |   4 +
 src/include/daos/placement.h             |   3 +-
 src/include/daos/pool_map.h              |   6 +
 src/placement/jump_map.c                 | 524 +++++++++++------------
 src/placement/pl_map.c                   |   4 +-
 src/placement/pl_map.h                   |   4 +-
 src/placement/pl_map_common.c            |  21 +-
 src/placement/ring_map.c                 |   3 +-
 src/placement/tests/jump_map_place_obj.c | 145 ++++++-
 src/rebuild/scan.c                       |   3 +-
 src/tests/suite/daos_rebuild_simple.c    |   2 -
 11 files changed, 414 insertions(+), 305 deletions(-)

diff --git a/src/common/pool_map.c b/src/common/pool_map.c
index f39029727d9..7a678bc8277 100644
--- a/src/common/pool_map.c
+++ b/src/common/pool_map.c
@@ -104,6 +104,10 @@ static struct pool_comp_state_dict comp_state_dict[] = {
 		.sd_state	= PO_COMP_ST_NEW,
 		.sd_name	= "NEW",
 	},
+	{
+		.sd_state	= PO_COMP_ST_DRAIN,
+		.sd_name	= "DRAIN",
+	},
 	{
 		.sd_state	= PO_COMP_ST_UNKNOWN,
 		.sd_name	= "UNKNOWN",
diff --git a/src/include/daos/placement.h b/src/include/daos/placement.h
index 263032eb4b4..d9f5b1d4f95 100644
--- a/src/include/daos/placement.h
+++ b/src/include/daos/placement.h
@@ -121,7 +121,8 @@ void pl_obj_layout_free(struct pl_obj_layout *layout);
 int  pl_obj_layout_alloc(unsigned int grp_size, unsigned int grp_nr,
 			 struct pl_obj_layout **layout_pp);
 bool pl_obj_layout_contains(struct pool_map *map, struct pl_obj_layout *layout,
-			    uint32_t rank, uint32_t target_index);
+			    uint32_t rank, uint32_t target_index,
+			    uint32_t shard);
 
 int pl_obj_place(struct pl_map *map,
 		 struct daos_obj_md *md,
diff --git a/src/include/daos/pool_map.h b/src/include/daos/pool_map.h
index 614a7d2d3f1..13006ea42f7 100644
--- a/src/include/daos/pool_map.h
+++ b/src/include/daos/pool_map.h
@@ -313,6 +313,12 @@ pool_target_unavail(struct pool_target *tgt, bool for_reint)
 	return pool_component_unavail(&tgt->ta_comp, for_reint);
 }
 
+static inline bool
+pool_target_avail(struct pool_target *tgt, uint32_t allow_status)
+{
+	return tgt->ta_comp.co_status & allow_status;
+}
+
 /** Check if the target is in PO_COMP_ST_DOWN status */
 static inline bool
 pool_target_down(struct pool_target *tgt)
diff --git a/src/placement/jump_map.c b/src/placement/jump_map.c
index 9995a9b3ac5..8f2d6841d11 100644
--- a/src/placement/jump_map.c
+++ b/src/placement/jump_map.c
@@ -60,26 +60,6 @@ struct pl_jump_map {
 	pool_comp_type_t	jmp_redundant_dom;
 };
 
-/**
- * This functions determines whether the object layout should be extended or
- * not based on the operation performed and the target status.
- *
- * \param[in]	op	The operation being performed
- * \param[in]	status	The component status.
- *
- * \return		True if the layout should be extended,
- *			False otherwise.
- */
-static inline bool
-can_extend(enum PL_OP_TYPE op, enum pool_comp_state state)
-{
-	if (op != PL_PLACE_EXTENDED)
-		return false;
-	if (state != PO_COMP_ST_UP && state != PO_COMP_ST_DRAIN)
-		return false;
-	return true;
-}
-
 /**
  * This functions finds the pairwise differences in the two layouts provided
  * and appends them into the d_list provided. The function appends the targets
@@ -111,7 +91,28 @@ layout_find_diff(struct pl_jump_map *jmap, struct pl_obj_layout *original,
 		if (reint_tgt != original_target) {
 			pool_map_find_target(jmap->jmp_map.pl_poolmap,
 					     reint_tgt, &temp_tgt);
-			remap_alloc_one(diff, index, temp_tgt, true);
+			if (pool_target_avail(temp_tgt, PO_COMP_ST_UPIN |
+							PO_COMP_ST_UP |
+							PO_COMP_ST_DRAIN |
+							PO_COMP_ST_NEW))
+				remap_alloc_one(diff, index, temp_tgt, true);
+			else
+				/* XXX: This isn't desirable - but it can happen
+				 * when a reintegration is happening when
+				 * something else fails. Placement will do a
+				 * pass to determine what failed (good), and
+				 * then do another pass to figure out where
+				 * things moved to. But that 2nd pass will
+				 * re-find failed things, and this diff function
+				 * will cause the failed targets to be re-added
+				 * to the layout as rebuilding. This should be
+				 * removed when placement is able to handle
+				 * this situation better
+				 */
+				D_DEBUG(DB_PL,
+					"skip remap %d to unavail tgt %u\n",
+					index, reint_tgt);
+
 		}
 	}
 }
@@ -211,8 +212,20 @@ pl_map2jmap(struct pl_map *map)
 	return container_of(map, struct pl_jump_map, jmp_map);
 }
 
+static void debug_print_allow_status(uint32_t allow_status)
+{
+	D_DEBUG(DB_PL, "Allow status: [%s%s%s%s%s%s%s ]\n",
+		allow_status & PO_COMP_ST_UNKNOWN ? " UNKNOWN" : "",
+		allow_status & PO_COMP_ST_NEW ? " NEW" : "",
+		allow_status & PO_COMP_ST_UP ? " UP" : "",
+		allow_status & PO_COMP_ST_UPIN ? " UPIN" : "",
+		allow_status & PO_COMP_ST_DOWN ? " DOWN" : "",
+		allow_status & PO_COMP_ST_DOWNOUT ? " DOWNOUT" : "",
+		allow_status & PO_COMP_ST_DRAIN ? " DRAIN" : "");
+}
+
 static inline uint32_t
-get_num_domains(struct pool_domain *curr_dom, enum PL_OP_TYPE op_type)
+get_num_domains(struct pool_domain *curr_dom, uint32_t allow_status)
 {
 	struct pool_domain *next_dom;
 	struct pool_target *next_target;
@@ -224,7 +237,7 @@ get_num_domains(struct pool_domain *curr_dom, enum PL_OP_TYPE op_type)
 	else
 		num_dom = curr_dom->do_child_nr;
 
-	if (op_type == PL_ADD)
+	if (allow_status & PO_COMP_ST_NEW)
 		return num_dom;
 
 	if (curr_dom->do_children != NULL) {
@@ -281,7 +294,7 @@ get_num_domains(struct pool_domain *curr_dom, enum PL_OP_TYPE op_type)
 static void
 get_target(struct pool_domain *curr_dom, struct pool_target **target,
 	   uint64_t obj_key, uint8_t *dom_used, uint8_t *dom_occupied,
-	   uint8_t *tgts_used, int shard_num, enum PL_OP_TYPE op_type)
+	   uint8_t *tgts_used, int shard_num, uint32_t allow_status)
 {
 	int                     range_set;
 	uint8_t                 found_target = 0;
@@ -296,7 +309,7 @@ get_target(struct pool_domain *curr_dom, struct pool_target **target,
 		uint32_t        num_doms;
 
 		/* Retrieve number of nodes in this domain */
-		num_doms = get_num_domains(curr_dom, op_type);
+		num_doms = get_num_domains(curr_dom, allow_status);
 
 		/* If choosing target (lowest fault domain level) */
 		if (curr_dom->do_children == NULL) {
@@ -409,7 +422,6 @@ get_target(struct pool_domain *curr_dom, struct pool_target **target,
 	} while (!found_target);
 }
 
-
 uint32_t
 count_available_spares(struct pl_jump_map *jmap, struct pl_obj_layout *layout,
 		uint32_t failed_in_layout)
@@ -452,9 +464,9 @@ count_available_spares(struct pl_jump_map *jmap, struct pl_obj_layout *layout,
 static int
 obj_remap_shards(struct pl_jump_map *jmap, struct daos_obj_md *md,
 		 struct pl_obj_layout *layout, struct jm_obj_placement *jmop,
-		 d_list_t *remap_list, enum PL_OP_TYPE op_type,
+		 d_list_t *remap_list, uint32_t allow_status,
 		 uint8_t *tgts_used, uint8_t *dom_used, uint8_t *dom_occupied,
-		 uint32_t failed_in_layout, d_list_t *extend_list)
+		 uint32_t failed_in_layout, bool *is_extending)
 {
 	struct failed_shard     *f_shard;
 	struct pl_obj_shard     *l_shard;
@@ -463,7 +475,6 @@ obj_remap_shards(struct pl_jump_map *jmap, struct daos_obj_md *md,
 	d_list_t                *current;
 	daos_obj_id_t           oid;
 	bool                    spare_avail = true;
-	bool			for_reint;
 	uint64_t                key;
 	uint32_t		spares_left;
 	int                     rc;
@@ -471,7 +482,6 @@ obj_remap_shards(struct pl_jump_map *jmap, struct daos_obj_md *md,
 
 	remap_dump(remap_list, md, "remap:");
 
-	for_reint = (op_type == PL_REINT);
 	current = remap_list->next;
 	spare_tgt = NULL;
 	oid = md->omd_id;
@@ -491,6 +501,7 @@ obj_remap_shards(struct pl_jump_map *jmap, struct daos_obj_md *md,
 		l_shard = &layout->ol_shards[f_shard->fs_shard_idx];
 		D_DEBUG(DB_PL, "Attempting to remap failed shard: "
 			DF_FAILEDSHARD"\n", DP_FAILEDSHARD(*f_shard));
+		debug_print_allow_status(allow_status);
 
 		/*
 		 * If there are any targets left, there are potentially valid
@@ -505,23 +516,18 @@ obj_remap_shards(struct pl_jump_map *jmap, struct daos_obj_md *md,
 			rebuild_key = crc(key, f_shard->fs_shard_idx);
 			get_target(root, &spare_tgt, crc(key, rebuild_key),
 				   dom_used, dom_occupied, tgts_used,
-				   shard_id, op_type);
+				   shard_id, allow_status);
 			D_ASSERT(spare_tgt != NULL);
 			D_DEBUG(DB_PL, "Trying new target: "DF_TARGET"\n",
 				DP_TARGET(spare_tgt));
 			spares_left--;
 		}
 
-		determine_valid_spares(spare_tgt, md, spare_avail,
-				&current, remap_list, for_reint, f_shard,
-				l_shard);
+		determine_valid_spares(spare_tgt, md, spare_avail, &current,
+				       remap_list, allow_status, f_shard,
+				       l_shard, is_extending);
 	}
 
-	if (op_type == PL_PLACE_EXTENDED) {
-		rc = pl_map_extend(layout, extend_list);
-		if (rc != 0)
-			return rc;
-	}
 	return 0;
 }
 
@@ -544,7 +550,6 @@ jump_map_obj_spec_place_get(struct pl_jump_map *jmap, daos_obj_id_t oid,
 
 	*target = &(tgts[pos]);
 
-
 	rc = pool_map_find_domain(jmap->jmp_map.pl_poolmap, PO_COMP_TP_ROOT,
 				  PO_COMP_ID_ALL, &root);
 	D_ASSERT(rc == 1);
@@ -589,45 +594,43 @@ jump_map_obj_spec_place_get(struct pl_jump_map *jmap, daos_obj_id_t oid,
  * \param[in]   jmap            The placement map used for this placement.
  * \param[in]   jmop            The layout group size and count.
  * \param[in]   md              Object metadata.
+ * \param[in]	allow_status	target status allowed to be in the layout.
  * \param[out]  layout          This will contain the layout for the object
- * \param[out]  remap_list      This will contain the targets that need to
+ * \param[out]  out_list	This will contain the targets that need to
  *                              be rebuilt and in the case of rebuild, may be
  *                              returned during the rebuild process.
+ * \param[out]	is_extending	if there is drain/extending/reintegrating tgts
+ *                              exists in this layout, which we might need
+ *                              insert extra shards into the layout.
  *
  * \return                      An error code determining if the function
  *                              succeeded (0) or failed.
  */
 static int
 get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout,
-		  struct jm_obj_placement *jmop, d_list_t *remap_list,
-		  enum PL_OP_TYPE op_type, struct daos_obj_md *md)
+		  struct jm_obj_placement *jmop, d_list_t *out_list,
+		  uint32_t allow_status, struct daos_obj_md *md,
+		  bool *is_extending)
 {
 	struct pool_target      *target;
 	struct pool_domain      *root;
 	daos_obj_id_t           oid;
-	d_list_t		extend_list;
 	uint8_t                 *dom_used = NULL;
 	uint8_t                 *dom_occupied = NULL;
 	uint8_t                 *tgts_used = NULL;
-	uint32_t                dom_used_length;
+	uint32_t                dom_size;
 	uint64_t                key;
-	uint32_t		fail_tgt_cnt;
-	bool			for_reint;
-	enum pool_comp_state	state;
-	int i, j, k, rc;
+	uint32_t		fail_tgt_cnt = 0;
+	bool			spec_oid = false;
+	d_list_t		local_list;
+	d_list_t		*remap_list;
+	int			i, j, k;
+	int			rc = 0;
 
 	/* Set the pool map version */
 	layout->ol_ver = pl_map_version(&(jmap->jmp_map));
 	D_DEBUG(DB_PL, "Building layout. map version: %d\n", layout->ol_ver);
-
-	j = 0;
-	k = 0;
-	fail_tgt_cnt = 0;
-	oid = md->omd_id;
-	key = oid.hi ^ oid.lo;
-	target = NULL;
-	for_reint = (op_type == PL_REINT);
-	D_DEBUG(DB_PL, "for_reint: %s", for_reint ? "Yes" : "No");
+	debug_print_allow_status(allow_status);
 
 	rc = pool_map_find_domain(jmap->jmp_map.pl_poolmap, PO_COMP_TP_ROOT,
 				  PO_COMP_ID_ALL, &root);
@@ -635,64 +638,51 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout,
 		D_ERROR("Could not find root node in pool map.");
 		return -DER_NONEXIST;
 	}
+	rc = 0;
 
-	dom_used_length = (struct pool_domain *)(root->do_targets) - (root) + 1;
-
-	D_ALLOC_ARRAY(dom_used, (dom_used_length / 8) + 1);
-	D_ALLOC_ARRAY(dom_occupied, (dom_used_length / 8) + 1);
-	D_ALLOC_ARRAY(tgts_used, (root->do_target_nr / 8) + 1);
-	D_INIT_LIST_HEAD(&extend_list);
+	if (out_list != NULL) {
+		remap_list = out_list;
+	} else {
+		D_INIT_LIST_HEAD(&local_list);
+		remap_list = &local_list;
+	}
 
+	dom_size = (struct pool_domain *)(root->do_targets) - (root) + 1;
+	D_ALLOC_ARRAY(dom_used, (dom_size / NBBY) + 1);
+	D_ALLOC_ARRAY(dom_occupied, (dom_size / NBBY) + 1);
+	D_ALLOC_ARRAY(tgts_used, (root->do_target_nr / NBBY) + 1);
 	if (dom_used == NULL || dom_occupied == NULL || tgts_used == NULL)
 		D_GOTO(out, rc = -DER_NOMEM);
 
-	/**
-	 * If the object class is a special class then the first shard must be
-	 * hand picked because there is no other way to specify a starting
-	 * location.
-	 */
-	if (daos_obj_is_srank(oid)) {
-		rc = jump_map_obj_spec_place_get(jmap, oid, &target, dom_used,
-						 dom_used_length);
-		if (rc) {
-			D_ERROR("special oid "DF_OID" failed: rc %d\n",
-				DP_OID(oid), rc);
-			D_GOTO(out, rc);
-		}
-
-		layout->ol_shards[0].po_target = target->ta_comp.co_id;
-		layout->ol_shards[0].po_shard = 0;
-		layout->ol_shards[0].po_fseq = target->ta_comp.co_fseq;
-		setbit(tgts_used, target->ta_comp.co_id);
-
-		if (pool_target_unavail(target, for_reint)) {
-			fail_tgt_cnt++;
-			state = target->ta_comp.co_status;
-			rc = remap_alloc_one(remap_list, 0, target, false);
-			if (rc)
-				D_GOTO(out, rc);
-			if (can_extend(op_type, state)) {
-				rc = remap_alloc_one(&extend_list, k, target,
-						     true);
-				if (rc != 0)
+	oid = md->omd_id;
+	key = oid.hi ^ oid.lo;
+	if (daos_obj_is_srank(oid))
+		spec_oid = true;
+
+	for (i = 0, k = 0; i < jmop->jmop_grp_nr; i++) {
+		for (j = 0; j < jmop->jmop_grp_size; j++, k++) {
+			target = NULL;
+			if (spec_oid && i == 0 && j == 0) {
+				/**
+				 * If the object class is a special class then
+				 * the first shard must be picked specially.
+				 */
+				rc = jump_map_obj_spec_place_get(jmap, oid,
+								 &target,
+								 dom_used,
+								 dom_size);
+				if (rc) {
+					D_ERROR("special oid "DF_OID
+						" failed: rc %d\n",
+						DP_OID(oid), rc);
 					D_GOTO(out, rc);
+				}
+				setbit(tgts_used, target->ta_comp.co_id);
+			} else {
+				get_target(root, &target, key, dom_used,
+					   dom_occupied, tgts_used, k,
+					   allow_status);
 			}
-		}
-
-		/** skip the first shard because it's been
-		 * determined by Obj class
-		 */
-		j = 1;
-		k = 1;
-	}
-	for (i = 0; i < jmop->jmop_grp_nr; i++) {
-
-		for (; j < jmop->jmop_grp_size; j++, k++) {
-			uint32_t tgt_id;
-			uint32_t fseq;
-
-			get_target(root, &target, key, dom_used, dom_occupied,
-				   tgts_used, k, op_type);
 
 			if (target == NULL) {
 				D_DEBUG(DB_PL, "no targets for %d/%d/%d\n",
@@ -702,52 +692,44 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout,
 				layout->ol_shards[k].po_fseq = 0;
 				continue;
 			}
-
-			tgt_id = target->ta_comp.co_id;
-			fseq = target->ta_comp.co_fseq;
-
-			layout->ol_shards[k].po_target = tgt_id;
+			layout->ol_shards[k].po_target =
+				target->ta_comp.co_id;
+			layout->ol_shards[k].po_fseq =
+				target->ta_comp.co_fseq;
 			layout->ol_shards[k].po_shard = k;
-			layout->ol_shards[k].po_fseq = fseq;
 
 			/** If target is failed queue it for remap*/
-			if (pool_target_unavail(target, for_reint)) {
-				D_DEBUG(DB_PL, "Target unavailable " DF_TARGET
-					". Adding to remap_list:\n",
-					DP_TARGET(target));
+			if (!pool_target_avail(target, allow_status)) {
 				fail_tgt_cnt++;
-				state = target->ta_comp.co_status;
+				D_DEBUG(DB_PL, "Target unavailable " DF_TARGET
+					". Adding to remap_list: fail cnt %d\n",
+					DP_TARGET(target), fail_tgt_cnt);
 				rc = remap_alloc_one(remap_list, k, target,
-						false);
+						     false);
 				if (rc)
 					D_GOTO(out, rc);
 
-				if (can_extend(op_type, state)) {
-					D_DEBUG(DB_PL, "Adding "DF_TARGET" to"
-						" extend_list\n",
-						DP_TARGET(target));
-					remap_alloc_one(&extend_list, k,
-							target, true);
-				}
+				if (is_extending != NULL &&
+				    (target->ta_comp.co_status ==
+				     PO_COMP_ST_UP ||
+				     target->ta_comp.co_status ==
+				     PO_COMP_ST_DRAIN))
+					*is_extending = true;
 			}
 		}
-
-		j = 0;
 	}
 
-	rc = 0;
-	D_DEBUG(DB_PL, "Fail tgt cnt: %d\n", fail_tgt_cnt);
 	if (fail_tgt_cnt > 0)
 		rc = obj_remap_shards(jmap, md, layout, jmop, remap_list,
-				      op_type, tgts_used, dom_used,
-				      dom_occupied, fail_tgt_cnt,
-				      &extend_list);
+				      allow_status, tgts_used, dom_used,
+				      dom_occupied, fail_tgt_cnt, is_extending);
 out:
-	if (rc) {
+	if (rc)
 		D_ERROR("jump_map_obj_layout_fill failed, rc "DF_RC"\n",
 			DP_RC(rc));
-		remap_list_free_all(remap_list);
-	}
+	if (remap_list == &local_list)
+		remap_list_free_all(&local_list);
+
 	if (dom_used)
 		D_FREE(dom_used);
 	if (dom_occupied)
@@ -758,6 +740,42 @@ get_object_layout(struct pl_jump_map *jmap, struct pl_obj_layout *layout,
 	return rc;
 }
 
+static int
+obj_layout_alloc_and_get(struct pl_jump_map *jmap,
+			 struct jm_obj_placement *jmop, struct daos_obj_md *md,
+			 uint32_t allow_status, struct pl_obj_layout **layout_p,
+			 d_list_t *remap_list, bool *is_extending)
+{
+	int rc;
+
+	/* Allocate space to hold the layout */
+	D_ASSERT(jmop->jmop_grp_size > 0);
+	D_ASSERT(jmop->jmop_grp_nr > 0);
+	rc = pl_obj_layout_alloc(jmop->jmop_grp_size, jmop->jmop_grp_nr,
+				 layout_p);
+	if (rc) {
+		D_ERROR("pl_obj_layout_alloc failed, rc "DF_RC"\n",
+			DP_RC(rc));
+		return rc;
+	}
+
+	rc = get_object_layout(jmap, *layout_p, jmop, remap_list, allow_status,
+			       md, is_extending);
+	if (rc) {
+		D_ERROR("get object layout failed, rc "DF_RC"\n",
+			DP_RC(rc));
+		D_GOTO(out, rc);
+	}
+
+out:
+	if (rc != 0) {
+		if (*layout_p != NULL)
+			pl_obj_layout_free(*layout_p);
+		*layout_p = NULL;
+	}
+	return rc;
+}
+
 /**
  * Frees the placement map
  *
@@ -873,20 +891,21 @@ jump_map_obj_place(struct pl_map *map, struct daos_obj_md *md,
 		   struct pl_obj_layout **layout_pp)
 {
 	struct pl_jump_map	*jmap;
-	struct pl_obj_layout	*layout;
-	struct pl_obj_layout	*add_layout = NULL;
+	struct pl_obj_layout	*layout = NULL;
+	struct pl_obj_layout	*extend_layout = NULL;
 	struct jm_obj_placement	jmop;
-	struct pool_domain	*root;
-	d_list_t		remap_list;
-	d_list_t		add_list;
+	d_list_t		extend_list;
+	bool			is_extending = false;
+	bool			is_adding_new = false;
 	daos_obj_id_t		oid;
+	struct pool_domain	*root;
+	uint32_t		allow_status;
 	int			rc;
 
-	D_DEBUG(DB_PL, "Determining location for object: "DF_OID", ver: %d\n",
-		DP_OID(md->omd_id), md->omd_ver);
-
 	jmap = pl_map2jmap(map);
 	oid = md->omd_id;
+	D_DEBUG(DB_PL, "Determining location for object: "DF_OID", ver: %d\n",
+		DP_OID(oid), md->omd_ver);
 
 	rc = jm_obj_placement_get(jmap, md, shard_md, &jmop);
 	if (rc) {
@@ -894,66 +913,71 @@ jump_map_obj_place(struct pl_map *map, struct daos_obj_md *md,
 		return rc;
 	}
 
-	/* Allocate space to hold the layout */
-	rc = pl_obj_layout_alloc(jmop.jmop_grp_size, jmop.jmop_grp_nr,
-				 &layout);
-	if (rc) {
-		D_ERROR("pl_obj_layout_alloc failed, rc "DF_RC"\n", DP_RC(rc));
-		return rc;
-	}
-
-	D_INIT_LIST_HEAD(&remap_list);
-	rc = get_object_layout(jmap, layout, &jmop, &remap_list,
-				PL_PLACE_EXTENDED, md);
+	D_INIT_LIST_HEAD(&extend_list);
+	allow_status = PO_COMP_ST_UPIN;
+	rc = obj_layout_alloc_and_get(jmap, &jmop, md, allow_status, &layout,
+				      NULL, &is_extending);
 	if (rc != 0) {
 		D_ERROR("get_layout_alloc failed, rc "DF_RC"\n", DP_RC(rc));
-		pl_obj_layout_free(layout);
-		return rc;
+		D_GOTO(out, rc);
 	}
-	/* Needed to check if domains are being added to pool map */
-	rc = pool_map_find_domain(jmap->jmp_map.pl_poolmap, PO_COMP_TP_ROOT,
-				  PO_COMP_ID_ALL, &root);
-	D_ASSERT(rc == 1);
 
-	if (is_pool_adding(root)) {
-		/* Allocate space to hold the layout */
-		rc = pl_obj_layout_alloc(jmop.jmop_grp_size, jmop.jmop_grp_nr,
-					 &add_layout);
-		if (rc) {
-			D_ERROR("pl_obj_layout_alloc failed, rc "DF_RC"\n",
-				DP_RC(rc));
-			goto out;
-		}
+	obj_layout_dump(oid, layout);
 
-		remap_list_free_all(&remap_list);
-		D_INIT_LIST_HEAD(&remap_list);
+	rc = pool_map_find_domain(jmap->jmp_map.pl_poolmap,
+				  PO_COMP_TP_ROOT, PO_COMP_ID_ALL,
+				  &root);
+	D_ASSERT(rc == 1);
+	rc = 0;
+	if (is_pool_adding(root))
+		is_adding_new = true;
 
-		rc = get_object_layout(jmap, add_layout, &jmop, &remap_list,
-				       PL_ADD, md);
-		assert(rc == 0);
-		D_INIT_LIST_HEAD(&add_list);
-		layout_find_diff(jmap, layout, add_layout, &add_list);
+	/* If the layout might being extended, i.e. so extra shards needs
+	 * to be added to the layout.
+	 */
+	if (unlikely(is_extending || is_adding_new)) {
+		/* Needed to check if domains are being added to pool map */
+		D_DEBUG(DB_PL, DF_OID"/%d is being extended.\n",
+			DP_OID(oid), md->omd_ver);
+		if (is_adding_new)
+			allow_status |= PO_COMP_ST_NEW;
+		else
+			allow_status |= PO_COMP_ST_UP | PO_COMP_ST_DRAIN;
+
+		/* Don't repeat remapping failed shards during this phase -
+		 * they have already been remapped.
+		 */
+		allow_status |= PO_COMP_ST_DOWN;
+		rc = obj_layout_alloc_and_get(jmap, &jmop, md, allow_status,
+					      &extend_layout, NULL, NULL);
+		if (rc)
+			D_GOTO(out, rc);
 
-		if (!d_list_empty(&add_list))
-			rc = pl_map_extend(layout, &add_list);
+		obj_layout_dump(oid, extend_layout);
+		layout_find_diff(jmap, layout, extend_layout, &extend_list);
+		if (!d_list_empty(&extend_list)) {
+			rc = pl_map_extend(layout, &extend_list);
+			if (rc)
+				D_GOTO(out, rc);
+		}
+		obj_layout_dump(oid, layout);
 	}
+
+	*layout_pp = layout;
 out:
-	remap_list_free_all(&remap_list);
+	remap_list_free_all(&extend_list);
 
-	if (add_layout != NULL)
-		pl_obj_layout_free(add_layout);
+	if (extend_layout != NULL)
+		pl_obj_layout_free(extend_layout);
 
 	if (rc < 0) {
 		D_ERROR("Could not generate placement layout, rc "DF_RC"\n",
 			DP_RC(rc));
-		pl_obj_layout_free(layout);
-		return rc;
+		if (layout != NULL)
+			pl_obj_layout_free(layout);
 	}
 
-	*layout_pp = layout;
-	obj_layout_dump(oid, layout);
-
-	return DER_SUCCESS;
+	return rc;
 }
 
 /**
@@ -990,7 +1014,7 @@ jump_map_obj_find_rebuild(struct pl_map *map, struct daos_obj_md *md,
 
 	int idx = 0;
 
-	D_DEBUG(DB_PL, "Finding Rebuild\n");
+	D_DEBUG(DB_PL, "Finding Rebuild at version: %u\n", rebuild_ver);
 
 	/* Caller should guarantee the pl_map is up-to-date */
 	if (pl_map_version(map) < rebuild_ver) {
@@ -1008,32 +1032,20 @@ jump_map_obj_find_rebuild(struct pl_map *map, struct daos_obj_md *md,
 		return rc;
 	}
 
-	/* Allocate space to hold the layout */
-	rc = pl_obj_layout_alloc(jmop.jmop_grp_size, jmop.jmop_grp_nr,
-				 &layout);
-	if (rc) {
-		D_ERROR("pl_obj_layout_alloc failed, rc "DF_RC"\n", DP_RC(rc));
-		return rc;
-	}
-
 	D_INIT_LIST_HEAD(&remap_list);
-	rc = get_object_layout(jmap, layout, &jmop, &remap_list, PL_REBUILD,
-				md);
-
-	if (rc < 0) {
-		D_ERROR("Could not generate placement layout, rc "DF_RC"\n",
-			DP_RC(rc));
-		goto out;
-	}
+	rc = obj_layout_alloc_and_get(jmap, &jmop, md, PO_COMP_ST_UPIN, &layout,
+				      &remap_list, NULL);
+	if (rc < 0)
+		D_GOTO(out, rc);
 
 	obj_layout_dump(oid, layout);
-
 	rc = remap_list_fill(map, md, shard_md, rebuild_ver, tgt_id, shard_idx,
 			     array_size, &idx, layout, &remap_list, false);
 
 out:
 	remap_list_free_all(&remap_list);
-	pl_obj_layout_free(layout);
+	if (layout != NULL)
+		pl_obj_layout_free(layout);
 	return rc < 0 ? rc : idx;
 }
 
@@ -1044,16 +1056,16 @@ jump_map_obj_find_reint(struct pl_map *map, struct daos_obj_md *md,
 			uint32_t *shard_id, unsigned int array_size)
 {
 	struct pl_jump_map              *jmap;
-	struct pl_obj_layout            *layout;
-	struct pl_obj_layout            *reint_layout;
-	d_list_t                        remap_list;
-	d_list_t                        reint_list;
+	struct pl_obj_layout            *layout = NULL;
+	struct pl_obj_layout            *reint_layout = NULL;
+	d_list_t			reint_list;
 	struct jm_obj_placement         jop;
+	uint32_t			allow_status;
 	int                             rc;
 
 	int idx = 0;
 
-	D_DEBUG(DB_PL, "Finding Rebuild\n");
+	D_DEBUG(DB_PL, "Finding Reint at version: %u\n", reint_ver);
 
 	/* Caller should guarantee the pl_map is up-to-date */
 	if (pl_map_version(map) < reint_ver) {
@@ -1063,41 +1075,27 @@ jump_map_obj_find_reint(struct pl_map *map, struct daos_obj_md *md,
 	}
 
 	jmap = pl_map2jmap(map);
-
 	rc = jm_obj_placement_get(jmap, md, shard_md, &jop);
 	if (rc) {
 		D_ERROR("jm_obj_placement_get failed, rc %d.\n", rc);
 		return rc;
 	}
 
-	/* Allocate space to hold the layout */
-	rc = pl_obj_layout_alloc(jop.jmop_grp_size, jop.jmop_grp_nr,
-			&layout);
-	if (rc)
-		return 0;
-
-	rc = pl_obj_layout_alloc(jop.jmop_grp_size, jop.jmop_grp_nr,
-			&reint_layout);
-	if (rc)
-		goto out;
-
-	D_INIT_LIST_HEAD(&remap_list);
+	/* Ignore DOWN and DRAIN objects here - this API is only for finding
+	 * reintegration candidates
+	 */
+	allow_status = PO_COMP_ST_UPIN | PO_COMP_ST_DOWN | PO_COMP_ST_DRAIN;
 	D_INIT_LIST_HEAD(&reint_list);
+	rc = obj_layout_alloc_and_get(jmap, &jop, md, allow_status, &layout,
+				      NULL, NULL);
+	if (rc < 0)
+		D_GOTO(out, rc);
 
-	/* Get original placement */
-	rc = get_object_layout(jmap, layout, &jop, &remap_list, PL_PLACE, md);
-	if (rc)
-		goto out;
-
-	/* Clear list for next placement operation. */
-	remap_list_free_all(&remap_list);
-	D_INIT_LIST_HEAD(&remap_list);
-
-	/* Get placement after reintegration. */
-	rc = get_object_layout(jmap, reint_layout, &jop, &remap_list, PL_REINT,
-			       md);
-	if (rc)
-		goto out;
+	allow_status |= PO_COMP_ST_UP;
+	rc = obj_layout_alloc_and_get(jmap, &jop, md, allow_status,
+				      &reint_layout, NULL, NULL);
+	if (rc < 0)
+		D_GOTO(out, rc);
 
 	layout_find_diff(jmap, layout, reint_layout, &reint_list);
 
@@ -1106,8 +1104,6 @@ jump_map_obj_find_reint(struct pl_map *map, struct daos_obj_md *md,
 			     false);
 out:
 	remap_list_free_all(&reint_list);
-	remap_list_free_all(&remap_list);
-
 	if (layout != NULL)
 		pl_obj_layout_free(layout);
 	if (reint_layout != NULL)
@@ -1123,15 +1119,14 @@ jump_map_obj_find_addition(struct pl_map *map, struct daos_obj_md *md,
 			   uint32_t *shard_id, unsigned int array_size)
 {
 	struct pl_jump_map              *jmap;
-	struct pl_obj_layout            *layout;
-	struct pl_obj_layout            *add_layout;
-	d_list_t                        remap_list;
+	struct pl_obj_layout            *layout = NULL;
+	struct pl_obj_layout            *add_layout = NULL;
 	d_list_t                        add_list;
 	struct jm_obj_placement         jop;
+	uint32_t			allow_status;
+	int				idx = 0;
 	int                             rc;
 
-	int idx = 0;
-
 	D_DEBUG(DB_PL, "Finding new layout for server addition\n");
 
 	/* Caller should guarantee the pl_map is up-to-date */
@@ -1149,41 +1144,24 @@ jump_map_obj_find_addition(struct pl_map *map, struct daos_obj_md *md,
 		return rc;
 	}
 
-	/* Allocate space to hold the layout */
-	rc = pl_obj_layout_alloc(jop.jmop_grp_size, jop.jmop_grp_nr, &layout);
-	if (rc)
-		return rc;
-
-	D_INIT_LIST_HEAD(&remap_list);
+	allow_status = PO_COMP_ST_UPIN;
 	D_INIT_LIST_HEAD(&add_list);
-
-	rc = pl_obj_layout_alloc(jop.jmop_grp_size, jop.jmop_grp_nr,
-				 &add_layout);
-	if (rc)
-		goto out;
-
-	/* Get original placement */
-	rc = get_object_layout(jmap, layout, &jop, &remap_list, PL_PLACE, md);
+	rc = obj_layout_alloc_and_get(jmap, &jop, md, allow_status,
+				      &layout, NULL, NULL);
 	if (rc)
-		goto out;
+		D_GOTO(out, rc);
 
-	/* Clear list for next placement operation. */
-	remap_list_free_all(&remap_list);
-	D_INIT_LIST_HEAD(&remap_list);
-
-	/* Get placement after server addition. */
-	rc = get_object_layout(jmap, add_layout, &jop, &remap_list, PL_ADD,
-			       md);
+	allow_status |= PO_COMP_ST_NEW;
+	rc = obj_layout_alloc_and_get(jmap, &jop, md, allow_status,
+				      &add_layout, NULL, NULL);
 	if (rc)
-		goto out;
+		D_GOTO(out, rc);
 
 	layout_find_diff(jmap, layout, add_layout, &add_list);
-
 	rc = remap_list_fill(map, md, shard_md, reint_ver, tgt_rank, shard_id,
 			     array_size, &idx, add_layout, &add_list, true);
 out:
 	remap_list_free_all(&add_list);
-	remap_list_free_all(&remap_list);
 
 	if (layout != NULL)
 		pl_obj_layout_free(layout);
diff --git a/src/placement/pl_map.c b/src/placement/pl_map.c
index ca985ccea2c..cb49a0c5049 100644
--- a/src/placement/pl_map.c
+++ b/src/placement/pl_map.c
@@ -199,7 +199,7 @@ pl_obj_layout_free(struct pl_obj_layout *layout)
 /* Returns whether or not a given layout contains the specified rank */
 bool
 pl_obj_layout_contains(struct pool_map *map, struct pl_obj_layout *layout,
-		       uint32_t rank, uint32_t target_index)
+		       uint32_t rank, uint32_t target_index, uint32_t id_shard)
 {
 	struct pool_target *target;
 	int i;
@@ -211,7 +211,7 @@ pl_obj_layout_contains(struct pool_map *map, struct pl_obj_layout *layout,
 		rc = pool_map_find_target(map, layout->ol_shards[i].po_target,
 					  &target);
 		if (rc != 0 && target->ta_comp.co_rank == rank &&
-		    target->ta_comp.co_index == target_index)
+		    target->ta_comp.co_index == target_index && i == id_shard)
 			return true; /* Found a target and rank matches */
 	}
 
diff --git a/src/placement/pl_map.h b/src/placement/pl_map.h
index f6a14f23c2d..c2d88daad72 100644
--- a/src/placement/pl_map.h
+++ b/src/placement/pl_map.h
@@ -119,9 +119,9 @@ remap_list_fill(struct pl_map *map, struct daos_obj_md *md,
 void
 determine_valid_spares(struct pool_target *spare_tgt, struct daos_obj_md *md,
 		       bool spare_avail, d_list_t **current,
-		       d_list_t *remap_list, bool for_reint,
+		       d_list_t *remap_list, uint32_t allow_status,
 		       struct failed_shard *f_shard,
-		       struct pl_obj_shard *l_shard);
+		       struct pl_obj_shard *l_shard, bool *extending);
 
 int
 spec_place_rank_get(unsigned int *pos, daos_obj_id_t oid,
diff --git a/src/placement/pl_map_common.c b/src/placement/pl_map_common.c
index a52b35ea9a6..f15f0430a5a 100644
--- a/src/placement/pl_map_common.c
+++ b/src/placement/pl_map_common.c
@@ -94,10 +94,12 @@ inline void
 remap_list_free_all(d_list_t *remap_list)
 {
 	struct failed_shard *f_shard;
+	struct failed_shard *tmp;
 
-	while ((f_shard = d_list_pop_entry(remap_list, struct failed_shard,
-			fs_list)))
+	d_list_for_each_entry_safe(f_shard, tmp, remap_list, fs_list) {
+		d_list_del(&f_shard->fs_list);
 		D_FREE(f_shard);
+	}
 }
 
 /** dump remap list, for debug only */
@@ -232,8 +234,8 @@ remap_list_fill(struct pl_map *map, struct daos_obj_md *md,
 void
 determine_valid_spares(struct pool_target *spare_tgt, struct daos_obj_md *md,
 		bool spare_avail, d_list_t **current, d_list_t *remap_list,
-		bool for_reint, struct failed_shard *f_shard,
-		struct pl_obj_shard *l_shard)
+		uint32_t allow_status, struct failed_shard *f_shard,
+		struct pl_obj_shard *l_shard, bool *is_extending)
 {
 	struct failed_shard *f_tmp;
 
@@ -241,7 +243,7 @@ determine_valid_spares(struct pool_target *spare_tgt, struct daos_obj_md *md,
 		goto next_fail;
 
 	/* The selected spare target is down as well */
-	if (pool_target_unavail(spare_tgt, for_reint)) {
+	if (!pool_target_avail(spare_tgt, allow_status)) {
 		D_ASSERTF(spare_tgt->ta_comp.co_fseq !=
 			  f_shard->fs_fseq, "same fseq %u!\n",
 			  f_shard->fs_fseq);
@@ -294,6 +296,10 @@ determine_valid_spares(struct pool_target *spare_tgt, struct daos_obj_md *md,
 		D_DEBUG(DB_PL, "failed shard ("DF_FAILEDSHARD") added to "
 			       "remamp_list\n", DP_FAILEDSHARD(*f_shard));
 		remap_add_one(remap_list, f_shard);
+		if (is_extending != NULL &&
+		    (spare_tgt->ta_comp.co_status == PO_COMP_ST_UP ||
+		     spare_tgt->ta_comp.co_status == PO_COMP_ST_DRAIN))
+			*is_extending = true;
 
 		/* Continue with the failed shard has minimal fseq */
 		if ((*current) == remap_list) {
@@ -310,6 +316,7 @@ determine_valid_spares(struct pool_target *spare_tgt, struct daos_obj_md *md,
 			spare_tgt->ta_comp.co_fseq);
 		return; /* try next spare */
 	}
+
 next_fail:
 	if (spare_avail) {
 		/* The selected spare target is up and ready */
@@ -468,7 +475,6 @@ pl_map_extend(struct pl_obj_layout *layout, d_list_t *extended_list)
 		D_FREE(grp_map);
 	if (grp_count != grp_cnt_array && grp_count != NULL)
 		D_FREE(grp_count);
-	remap_list_free_all(extended_list);
 	return rc;
 }
 
@@ -477,7 +483,8 @@ is_pool_adding(struct pool_domain *dom)
 {
 	uint32_t child_nr;
 
-	while (dom->do_children && dom->do_comp.co_status != PO_COMP_ST_NEW) {
+	while (dom->do_children &&
+	       dom->do_comp.co_status != PO_COMP_ST_NEW) {
 		child_nr = dom->do_child_nr;
 		dom = &dom->do_children[child_nr - 1];
 	}
diff --git a/src/placement/ring_map.c b/src/placement/ring_map.c
index b0877a62156..c57bbfd57a5 100644
--- a/src/placement/ring_map.c
+++ b/src/placement/ring_map.c
@@ -1022,7 +1022,8 @@ ring_obj_remap_shards(struct pl_ring_map *rimap, struct daos_obj_md *md,
 		spare_tgt = &tgts[plts[spare_idx].pt_pos];
 
 		determine_valid_spares(spare_tgt, md, spare_avail, &current,
-				       remap_list, for_reint, f_shard, l_shard);
+				       remap_list, for_reint, f_shard, l_shard,
+				       NULL);
 	}
 
 	remap_dump(remap_list, md, "after remap:");
diff --git a/src/placement/tests/jump_map_place_obj.c b/src/placement/tests/jump_map_place_obj.c
index 5d79646bfa7..43730c09b52 100644
--- a/src/placement/tests/jump_map_place_obj.c
+++ b/src/placement/tests/jump_map_place_obj.c
@@ -921,6 +921,7 @@ jtc_snapshot_layout_targets(struct jm_test_ctx *ctx)
 	} while (0)
 
 #define UP	POOL_REINT
+#define UPIN	POOL_ADD_IN
 #define DOWN	POOL_EXCLUDE
 #define DOWNOUT	POOL_EXCLUDE_OUT
 #define DRAIN	POOL_DRAIN
@@ -1242,10 +1243,23 @@ down_back_to_up_in_same_order(void **state)
 	jtc_set_status_on_target(&ctx, UP, orig_shard_targets[0]);
 	jtc_assert_scan_and_layout(&ctx);
 
-	jtc_fini(&ctx);
-	skip_msg("DAOS-6519: too many things are in the reint scan");
-	assert_int_equal(1, ctx.reint.out_nr);
-	jtc_assert_rebuild_reint_new(ctx, 1, 0, 1, 0);
+	/* NOTE: This is a really important test case. Even though this test
+	 * seems like it should only move one shard (because only one target is
+	 * being reintegrated), this particular combination happens to trigger
+	 * extra data movement, resulting in two shards moving - one moving back
+	 * to the reintegrated target, and one moving between two otherwise
+	 * healthy targets because of the retry/collision mechanism of the jump
+	 * map algorithm.
+	 *
+	 * XXX This will likely break if the jump consistent hashing algorithm
+	 * is changed. It's just fortunate we happened to trigger this somewhat
+	 * rare case here. If you are reading this later and you find this
+	 * assert triggering because the value is 1 instead of 2, likely the
+	 * placement algorithm was modified so that this test no longer hits
+	 * this corner case.
+	 */
+	assert_int_equal(2, ctx.reint.out_nr);
+	jtc_assert_rebuild_reint_new(ctx, 2, 0, 2, 0);
 
 	/* Take second downed target up */
 	jtc_set_status_on_target(&ctx, UP, orig_shard_targets[1]);
@@ -1404,8 +1418,6 @@ down_up_sequences1(void **state)
 
 	jtc_set_status_on_target(&ctx, UP, shard_target_2);
 	jtc_assert_scan_and_layout(&ctx);
-	jtc_fini(&ctx);
-	skip_msg("Investigation into DAOS-6519 is similar/same issue.");
 	is_true(jtc_has_shard_moving_to_target(&ctx, 0, shard_target_2));
 
 	jtc_set_status_on_target(&ctx, UP, shard_target_1);
@@ -1446,8 +1458,6 @@ drain_all_with_extra_domains(void **state)
 	 */
 	assert_int_equal(8, jtc_get_layout_target_count(&ctx));
 
-	jtc_fini(&ctx);
-	skip_msg("DAOS-6300 - too many are marked as rebuild");
 	assert_int_equal(4, jtc_get_layout_rebuild_count(&ctx));
 	for (i = 0; i < shards_nr; i++) {
 		is_true(jtc_has_shard_with_target_rebuilding(&ctx, i, NULL));
@@ -1478,8 +1488,6 @@ drain_all_with_enough_targets(void **state)
 	 * rebuilding and one not
 	 */
 	for (i = 0; i < shards_nr; i++) {
-		jtc_fini(&ctx);
-		skip_msg("DAOS-6300 - Not drained to other target?");
 		assert_int_equal(0, jtc_get_layout_bad_count(&ctx));
 		is_true(jtc_has_shard_with_target_rebuilding(&ctx, i, NULL));
 		is_true(jtc_has_shard_with_rebuilding_not_set(&ctx, i));
@@ -1510,8 +1518,6 @@ drain_target_same_shard_repeatedly_for_all_shards(void **state)
 			is_true(jtc_has_shard_with_target_rebuilding(&ctx,
 				shard_id, &new_target));
 
-			jtc_fini(&ctx);
-			skip_msg("DAOS-6300: All are marked as rebuilding");
 			is_true(jtc_has_shard_target_not_rebuilding(&ctx,
 				shard_id, target));
 
@@ -1564,8 +1570,6 @@ one_server_is_added(void **state)
 	assert_int_equal(0, ctx.rebuild.out_nr);
 	assert_int_equal(0, ctx.reint.out_nr);
 
-	jtc_fini(&ctx);
-	skip_msg("DAOS-6303 - should have targets marked as rebuild");
 	assert_int_equal(ctx.new.out_nr, jtc_get_layout_rebuild_count(&ctx));
 
 	jtc_fini(&ctx);
@@ -1573,11 +1577,118 @@ one_server_is_added(void **state)
 
 /*
  * ------------------------------------------------
- * Leave in multiple states at same time
+ * Leave in multiple states at same time (no addition)
  * ------------------------------------------------
  */
 static void
 placement_handles_multiple_states(void **state)
+{
+	struct jm_test_ctx ctx;
+	int ver_after_reint;
+	int ver_after_fail;
+	int ver_after_drain;
+	int ver_after_reint_complete;
+	uint32_t reint_tgt_id;
+	uint32_t fail_tgt_id;
+	uint32_t rebuilding;
+
+	jtc_init_with_layout(&ctx, 4, 1, 8, OC_RP_3G1, g_verbose);
+
+	/* first shard goes down, rebuilt, then reintegrated */
+	jtc_set_status_on_shard_target(&ctx, DOWN, 0);
+	jtc_set_status_on_shard_target(&ctx, DOWNOUT, 0);
+	jtc_set_status_on_shard_target(&ctx, UP, 0);
+	reint_tgt_id = jtc_layout_shard_tgt(&ctx, 0);
+	assert_success(jtc_create_layout(&ctx));
+
+	rebuilding = jtc_get_layout_rebuild_count(&ctx);
+	/* One thing reintegrating */
+	assert_int_equal(1, rebuilding);
+
+	/*
+	 * Reintegration is now in progress. Grab the version from here
+	 * for find reint count
+	 */
+	ver_after_reint = ctx.ver;
+
+	/* second shard goes down */
+	jtc_set_status_on_shard_target(&ctx, DOWN, 1);
+	fail_tgt_id = jtc_layout_shard_tgt(&ctx, 1);
+	assert_success(jtc_create_layout(&ctx));
+
+	ver_after_fail = ctx.ver;
+
+	rebuilding = jtc_get_layout_rebuild_count(&ctx);
+	/* One reintegrating plus one failure recovery */
+	assert_int_equal(2, rebuilding);
+
+	/* third shard is queued for drain */
+	jtc_set_status_on_shard_target(&ctx, DRAIN, 2);
+	assert_success(jtc_create_layout(&ctx));
+
+	/*
+	 * Reintegration is still running, but these other operations have
+	 * happened too and are now queued.
+	 */
+	ver_after_drain = ctx.ver;
+
+	is_false(jtc_layout_has_duplicate(&ctx));
+
+	/*
+	 * Compute placement in this state. All three shards should
+	 * be moving around
+	 */
+	jtc_scan(&ctx);
+	rebuilding = jtc_get_layout_rebuild_count(&ctx);
+	assert_int_equal(3, rebuilding);
+
+	/*
+	 * Compute find_reint() using the correct version of rebuild which
+	 * would have launched when reintegration started
+	 *
+	 * find_reint() should only be finding the one thing to move at this
+	 * version
+	 */
+	ctx.ver = ver_after_reint;
+	jtc_scan(&ctx);
+	assert_int_equal(ctx.reint.out_nr, 1);
+
+	/* Complete the reintegration */
+	ctx.ver = ver_after_drain; /* Restore the version first */
+	jtc_set_status_on_target(&ctx, UPIN, reint_tgt_id);
+	ver_after_reint_complete = ctx.ver;
+
+	/* This would start processing the failure - so check that it'd just
+	 * move one thing
+	 */
+	ctx.ver = ver_after_fail;
+	jtc_scan(&ctx);
+	assert_int_equal(ctx.rebuild.out_nr, 1);
+
+	/* Complete the rebuild */
+	ctx.ver = ver_after_reint_complete; /* Restore the version first */
+	jtc_set_status_on_target(&ctx, DOWNOUT, fail_tgt_id);
+
+	/* This would start processing the drain - so check that it'd just
+	 * move one thing
+	 */
+	ctx.ver = ver_after_drain;
+	jtc_scan(&ctx);
+	assert_int_equal(ctx.rebuild.out_nr, 1);
+
+	/* Remainder is simple / out of scope for this test */
+
+	jtc_fini(&ctx);
+}
+
+
+/*
+ * ------------------------------------------------
+ * Leave in multiple states at same time (including addition)
+ * ------------------------------------------------
+ */
+static void
+placement_handles_multiple_states_with_addition(void **state)
 {
 	struct jm_test_ctx	 ctx;
 
@@ -1773,8 +1884,10 @@ static const struct CMUnitTest tests[] = {
 	  "data movement to the new server",
 	  one_server_is_added),
 	/* Multiple */
-	T("Placement can handle multiple states",
+	T("Placement can handle multiple states (excluding addition)",
 	  placement_handles_multiple_states),
+	T("Placement can handle multiple states (including addition)",
+	  placement_handles_multiple_states_with_addition),
 	/* Non-standard system setups*/
 	T("Non-standard system configurations. All healthy",
 	  unbalanced_config),
diff --git a/src/rebuild/scan.c b/src/rebuild/scan.c
index 704d85fd192..aba3032f698 100644
--- a/src/rebuild/scan.c
+++ b/src/rebuild/scan.c
@@ -525,7 +525,8 @@ rebuild_obj_scan_cb(daos_handle_t ch, vos_iter_entry_t *ent,
 			D_GOTO(out, rc);
 
 		still_needed = pl_obj_layout_contains(rpt->rt_pool->sp_map,
-						      layout, myrank, mytarget);
+						      layout, myrank, mytarget,
+						      oid.id_shard);
 		if (!still_needed) {
 			struct rebuild_pool_tls *tls;
 
diff --git a/src/tests/suite/daos_rebuild_simple.c b/src/tests/suite/daos_rebuild_simple.c
index ef23f28c3c6..8d7e77f5944 100644
--- a/src/tests/suite/daos_rebuild_simple.c
+++ b/src/tests/suite/daos_rebuild_simple.c
@@ -914,8 +914,6 @@ rebuild_full_shards(void **state)
 	struct ioreq	req;
 	int		i;
 
-	skip(); /** DAOS-5758 */
-
 	if (!test_runable(arg, 4))
 		return;
 

From 6188da127179e761b45f6dc4cabb8e0f592a248f Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Wed, 24 Mar 2021 09:26:39 -0400
Subject: [PATCH 25/37] DAOS-6923 test: Merge with Di's branch.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py | 2 --
 src/tests/ftest/util/osa_utils.py                | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 3c7a3ba38aa..3c36cb53c72 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -9,7 +9,6 @@
 from daos_utils import DaosCommand
 from test_utils_pool import TestPool
 from write_host_file import write_host_file
-from apricot import skipForTicket
 
 
 class OSAOfflineReintegration(OSAUtils):
@@ -185,7 +184,6 @@ def test_osa_offline_reintegrate_during_rebuild(self):
                                                    '/run/rebuild/*')
         self.run_offline_reintegration_test(1, data=True)
 
-    @skipForTicket("DAOS-6925")
     def test_osa_offline_reintegration_oclass(self):
         """Test ID: DAOS-6923
         Test Description: Validate Offline Reintegration
diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index fe76c51cde4..7a6e177450a 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -312,7 +312,7 @@ def run_ior_thread(self, action, oclass, test,
     def ior_thread(self, pool, oclass, test, flags,
                    single_cont_read=True,
                    fail_on_warning=True):
-        """Start threads and wait until all threads are finished.
+        """Start an IOR thread.
 
         Args:
             pool (object): pool handle

From 49830190389ed07b3968b71933f8b7f48efba595 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Wed, 24 Mar 2021 17:39:10 -0400
Subject: [PATCH 26/37] DAOS-6923 test: Add skipForTicket-daos cont check
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 .../ftest/osa/osa_offline_reintegration.py      | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 3c36cb53c72..53df10b3abf 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -9,6 +9,7 @@
 from daos_utils import DaosCommand
 from test_utils_pool import TestPool
 from write_host_file import write_host_file
+from apricot import skipForTicket
 
 
 class OSAOfflineReintegration(OSAUtils):
@@ -89,14 +90,14 @@ def run_offline_reintegration_test(self, num_pool, data=False,
                     output = self.dmg_command.pool_exclude(self.pool.uuid,
                                                            rank[val])
                     # Check the IOR data after exclude
-                    if data:
+                    if data and (val == 0):
                         self.run_ior_thread("Read", oclass, test_seq)
                 else:
                     output = self.dmg_command.system_stop(ranks=rank[val],
                                                           force=True)
                     self.print_and_assert_on_rebuild_failure(output)
                     # Check the IOR data after system stop
-                    if data:
+                    if data and (val == 0):
                         self.run_ior_thread("Read", oclass, test_seq)
                     output = self.dmg_command.system_start(ranks=rank[val])
                 # Just try to reintegrate rank 5
@@ -142,11 +143,12 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             if data:
                 self.run_ior_thread("Read", oclass, test_seq)
                 self.run_mdtest_thread()
-                self.container = self.pool_cont_dict[self.pool][0]
-                kwargs = {"pool": self.pool.uuid,
-                          "cont": self.container.uuid}
-                output = self.daos_command.container_check(**kwargs)
-                self.log.info(output)
+                if self.test_during_rebuild is True:
+                    self.container = self.pool_cont_dict[self.pool][0]
+                    kwargs = {"pool": self.pool.uuid,
+                              "cont": self.container.uuid}
+                    output = self.daos_command.container_check(**kwargs)
+                    self.log.info(output)
 
     def test_osa_offline_reintegration_multiple_pools(self):
         """Test ID: DAOS-6923
@@ -169,6 +171,7 @@ def test_osa_offline_reintegration_server_stop(self):
         """
         self.run_offline_reintegration_test(1, data=True, server_boot=True)
 
+    @skipForTicket("DAOS-7042")
     def test_osa_offline_reintegrate_during_rebuild(self):
         """Test ID: DAOS-6923
         Test Description: Reintegrate rank while rebuild

From 2e6461a8da5e6cdcc41b76f83cff99f46fd6b090 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Wed, 24 Mar 2021 20:09:10 -0400
Subject: [PATCH 27/37] DAOS-6923 test: Run all the tests including weekly
 Test-tag-hw-medium: hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 53df10b3abf..6e58041c90e 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -45,7 +45,7 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             data (bool) : whether pool has no data or to create
                 some data in pool. Defaults to False.
             server_boot (bool) : Perform system stop/start on a rank.
-                                 Defults to False.
+                                 Defaults to False.
             oclass (str) : daos object class string (eg: "RP_2G8")
         """
         # Create a pool

From 9fe0d4b71447cd28ccb3d2c899ec8cf1b6ef4dcb Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Thu, 25 Mar 2021 13:20:56 -0400
Subject: [PATCH 28/37] DAOS-6923 test: Offline reintegration no checksum
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 .../ftest/osa/osa_offline_reintegration.py    | 13 ++++++
 .../ftest/osa/osa_offline_reintegration.yaml  |  2 +
 src/tests/ftest/util/osa_utils.py             | 43 +++++++++++++++++--
 3 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 6e58041c90e..e764396afda 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -150,6 +150,19 @@ def run_offline_reintegration_test(self, num_pool, data=False,
                     output = self.daos_command.container_check(**kwargs)
                     self.log.info(output)
 
+    def test_osa_offline_reintegration_without_checksum(self):
+        """Test ID: DAOS-6923
+        Test Description: Validate Offline Reintegration
+        without enabling checksum in container properties.
+
+        :avocado: tags=all,pr,daily_regression,hw,medium,ib2
+        :avocado: tags=osa,offline_reintegration
+        :avocado: tags=offline_reintegration_without_csum
+        """
+        self.test_with_checksum = self.params.get("checksum",
+                                                  '/run/test_with_checksum/*')
+        self.run_offline_reintegration_test(1, data=True)
+
     def test_osa_offline_reintegration_multiple_pools(self):
         """Test ID: DAOS-6923
         Test Description: Validate Offline Reintegration
diff --git a/src/tests/ftest/osa/osa_offline_reintegration.yaml b/src/tests/ftest/osa/osa_offline_reintegration.yaml
index fe52612e1b7..eb9a6b6f58c 100644
--- a/src/tests/ftest/osa/osa_offline_reintegration.yaml
+++ b/src/tests/ftest/osa/osa_offline_reintegration.yaml
@@ -107,3 +107,5 @@ aggregation:
   test_with_aggregation: True
 rebuild:
   test_with_rebuild: True
+checksum:
+  test_with_checksum: False
\ No newline at end of file
diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index 7a6e177450a..d1d56b319c6 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -48,6 +48,7 @@ def setUp(self):
         self.dmg_command.exit_status_exception = False
         self.test_during_aggregation = False
         self.test_during_rebuild = False
+        self.test_with_checksum = True
 
     @fail_on(CommandFailure)
     def get_pool_leader(self):
@@ -224,6 +225,9 @@ def prepare_cont_ior_write_read(self, oclass, flags):
         if self.pool_cont_dict[self.pool][0] is None:
             self.add_container(self.pool, create=False)
             self.set_cont_class_properties(oclass)
+            if self.test_with_checksum is False:
+                rf_value = "rf:{}".format(self.get_object_replica_value - 1)
+                self.update_cont_properties(rf_value)
             self.container.create()
             self.pool_cont_dict[self.pool][0] = self.container
             self.pool_cont_dict[self.pool][1] = "Updated"
@@ -235,6 +239,10 @@ def prepare_cont_ior_write_read(self, oclass, flags):
                 # Write to the second container
                 self.add_container(self.pool, create=False)
                 self.set_cont_class_properties(oclass)
+                if self.test_with_checksum is False:
+                    rf_value = "rf:{}".format(
+                        self.get_object_replica_value - 1)
+                    self.update_cont_properties(rf_value)
                 self.container.create()
                 self.pool_cont_dict[self.pool][2] = self.container
                 self.pool_cont_dict[self.pool][3] = "Updated"
@@ -254,9 +262,35 @@ def delete_extra_container(self, pool):
         extra_container.destroy()
         self.pool_cont_dict[pool][3] = None
 
+    def get_object_replica_value(self, oclass):
+        """ Get the object replica value for an object class.
+
+        Args:
+            oclass (str): Object Class (eg: RP_2G1,etc)
+
+        Returns:
+            value (int) : Object replica value
+        """
+        value = 0
+        if "_" in oclass:
+            replica_list = oclass.split("_")
+            value = replica_list[1][0]
+        else:
+            self.log.info("Wrong Object Class. Cannot split")
+        return int(value)
+
+    def update_cont_properties(self, cont_prop):
+        """Update the existing container properties.
+        Args:
+            cont_prop (str): Replace existing cotainer properties
+                             with new value
+        """
+        self.container.properties.value = cont_prop
+
     def set_cont_class_properties(self, oclass="S1"):
         """Update the container class to match the IOR object
-        class. Also, remove the redundancy factor for S type
+        class. Fix the rf factor based on object replica value.
+        Also, remove the redundancy factor for S type
         object class.
         Args:
             oclass (str, optional): Container object class to be set.
@@ -266,10 +300,13 @@ def set_cont_class_properties(self, oclass="S1"):
         # Set the container properties properly for S!, S2 class.
         # rf should not be set to 1 for S type object class.
         x = re.search("^S\\d$", oclass)
+        prop = self.container.properties.value
         if x is not None:
-            prop = self.container.properties.value
             prop = prop.replace("rf:1", "rf:0")
-            self.container.properties.value = prop
+        else:
+            rf_value = "rf:{}".format(self.get_object_replica_value - 1)
+            prop = prop.replace("rf:1", rf_value)
+        self.container.properties.value = prop
 
     def run_ior_thread(self, action, oclass, test,
                        single_cont_read=True,

From 1ccb85e33f5af0e8095cdf6e7cdbf00f492aa878 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Thu, 25 Mar 2021 13:31:10 -0400
Subject: [PATCH 29/37] DAOS-6923 test: Fix spell check checkpatch issue.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/util/osa_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index d1d56b319c6..aa6fab5512b 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -282,7 +282,7 @@ def get_object_replica_value(self, oclass):
     def update_cont_properties(self, cont_prop):
         """Update the existing container properties.
         Args:
-            cont_prop (str): Replace existing cotainer properties
+            cont_prop (str): Replace existing container properties
                              with new value
         """
         self.container.properties.value = cont_prop

From aa50cc51d20d5748fd39e32004fc58e564178b74 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Thu, 25 Mar 2021 13:58:27 -0400
Subject: [PATCH 30/37] DAOS-6923 test: skipforTicket DAOS-6807
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index e764396afda..414c691c7b4 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -174,6 +174,7 @@ def test_osa_offline_reintegration_multiple_pools(self):
         """
         self.run_offline_reintegration_test(5, data=True)
 
+    @skipForTicket("DAOS-6807")
     def test_osa_offline_reintegration_server_stop(self):
         """Test ID: DAOS-6748.
 

From f11419b8b63a03c8ffd64be2651ac289dc17b7bc Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Fri, 26 Mar 2021 00:02:04 -0400
Subject: [PATCH 31/37] DAOS-6923 test: Testing without enabling checksum
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py |  4 ++--
 src/tests/ftest/util/osa_utils.py                | 14 ++++++++++----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 414c691c7b4..0b965496b49 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -159,8 +159,8 @@ def test_osa_offline_reintegration_without_checksum(self):
         :avocado: tags=osa,offline_reintegration
         :avocado: tags=offline_reintegration_without_csum
         """
-        self.test_with_checksum = self.params.get("checksum",
-                                                  '/run/test_with_checksum/*')
+        self.test_with_checksum = self.params.get("test_with_checksum",
+                                                  '/run/checksum/*')
         self.run_offline_reintegration_test(1, data=True)
 
     def test_osa_offline_reintegration_multiple_pools(self):
diff --git a/src/tests/ftest/util/osa_utils.py b/src/tests/ftest/util/osa_utils.py
index aa6fab5512b..83997c047c8 100644
--- a/src/tests/ftest/util/osa_utils.py
+++ b/src/tests/ftest/util/osa_utils.py
@@ -226,7 +226,8 @@ def prepare_cont_ior_write_read(self, oclass, flags):
             self.add_container(self.pool, create=False)
             self.set_cont_class_properties(oclass)
             if self.test_with_checksum is False:
-                rf_value = "rf:{}".format(self.get_object_replica_value - 1)
+                tmp = self.get_object_replica_value(oclass)
+                rf_value = "rf:{}".format(tmp - 1)
                 self.update_cont_properties(rf_value)
             self.container.create()
             self.pool_cont_dict[self.pool][0] = self.container
@@ -240,8 +241,8 @@ def prepare_cont_ior_write_read(self, oclass, flags):
                 self.add_container(self.pool, create=False)
                 self.set_cont_class_properties(oclass)
                 if self.test_with_checksum is False:
-                    rf_value = "rf:{}".format(
-                        self.get_object_replica_value - 1)
+                    tmp = self.get_object_replica_value(oclass)
+                    rf_value = "rf:{}".format(tmp - 1)
                     self.update_cont_properties(rf_value)
                 self.container.create()
                 self.pool_cont_dict[self.pool][2] = self.container
@@ -304,7 +305,8 @@ def set_cont_class_properties(self, oclass="S1"):
         if x is not None:
             prop = prop.replace("rf:1", "rf:0")
         else:
-            rf_value = "rf:{}".format(self.get_object_replica_value - 1)
+            tmp = self.get_object_replica_value(oclass)
+            rf_value = "rf:{}".format(tmp - 1)
             prop = prop.replace("rf:1", rf_value)
         self.container.properties.value = prop
 
@@ -393,6 +395,10 @@ def run_mdtest_thread(self):
         if self.container is None:
             self.add_container(self.pool, create=False)
             self.set_cont_class_properties(self.mdtest_cmd.dfs_oclass)
+            if self.test_with_checksum is False:
+                tmp = self.get_object_replica_value(self.mdtest_cmd.dfs_oclass)
+                rf_value = "rf:{}".format(tmp - 1)
+                self.update_cont_properties(rf_value)
             self.container.create()
         job_manager = self.get_mdtest_job_manager_command(self.manager)
         job_manager.job.dfs_cont.update(self.container.uuid)

From ff2148dfbff57a514c802b5a8606e1b2c2e34177 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Fri, 26 Mar 2021 00:29:40 -0400
Subject: [PATCH 32/37] DAOS-6923 test: Perform IOR read after excludes
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 0b965496b49..eaf4859914d 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -90,7 +90,7 @@ def run_offline_reintegration_test(self, num_pool, data=False,
                     output = self.dmg_command.pool_exclude(self.pool.uuid,
                                                            rank[val])
                     # Check the IOR data after exclude
-                    if data and (val == 0):
+                    if data:
                         self.run_ior_thread("Read", oclass, test_seq)
                 else:
                     output = self.dmg_command.system_stop(ranks=rank[val],

From 8349efaa14711196583a1434a0a7553f637871ec Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Sun, 28 Mar 2021 10:46:08 -0400
Subject: [PATCH 33/37] DAOS-6923 test: Enable daos cont check
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index eaf4859914d..0b0eb8684ba 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -185,7 +185,6 @@ def test_osa_offline_reintegration_server_stop(self):
         """
         self.run_offline_reintegration_test(1, data=True, server_boot=True)
 
-    @skipForTicket("DAOS-7042")
     def test_osa_offline_reintegrate_during_rebuild(self):
         """Test ID: DAOS-6923
         Test Description: Reintegrate rank while rebuild

From 3787a556625874a7bc4090f449f0c16e02a9ae0b Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Sun, 28 Mar 2021 19:56:03 -0400
Subject: [PATCH 34/37] DAOS-6923 test: Seeing md_test failures.
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/util/mdtest_test_base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tests/ftest/util/mdtest_test_base.py b/src/tests/ftest/util/mdtest_test_base.py
index e7efa6c8fb8..029989aa923 100755
--- a/src/tests/ftest/util/mdtest_test_base.py
+++ b/src/tests/ftest/util/mdtest_test_base.py
@@ -61,12 +61,12 @@ def execute_mdtest(self):
         # Run Mdtest
         self.run_mdtest(self.get_mdtest_job_manager_command(self.manager),
                         self.processes)
+
+        self.stop_dfuse()
         # reset self.container if dfs_destroy is True
         if self.mdtest_cmd.dfs_destroy is True:
             self.container = None
 
-        self.stop_dfuse()
-
     def get_mdtest_job_manager_command(self, manager):
         """Get the MPI job manager command for Mdtest.
 

From de979b5e9209645829f25dc48ca79ec42d76b257 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Sun, 28 Mar 2021 20:04:30 -0400
Subject: [PATCH 35/37] DAOS-6923 test: Enable daos cont check for tests
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/osa/osa_offline_reintegration.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 0b0eb8684ba..3ee27c280db 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -143,12 +143,11 @@ def run_offline_reintegration_test(self, num_pool, data=False,
             if data:
                 self.run_ior_thread("Read", oclass, test_seq)
                 self.run_mdtest_thread()
-                if self.test_during_rebuild is True:
-                    self.container = self.pool_cont_dict[self.pool][0]
-                    kwargs = {"pool": self.pool.uuid,
-                              "cont": self.container.uuid}
-                    output = self.daos_command.container_check(**kwargs)
-                    self.log.info(output)
+                self.container = self.pool_cont_dict[self.pool][0]
+                kwargs = {"pool": self.pool.uuid,
+                          "cont": self.container.uuid}
+                output = self.daos_command.container_check(**kwargs)
+                self.log.info(output)
 
     def test_osa_offline_reintegration_without_checksum(self):
         """Test ID: DAOS-6923

From d17a081cc4d468a8985f6b47ee1680d0b3506129 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Mon, 29 Mar 2021 01:05:56 -0400
Subject: [PATCH 36/37] DAOS-6923 test: Fix mdtest_test_base
 Test-tag-hw-medium: pr,hw,medium,ib2 osa

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 src/tests/ftest/util/mdtest_test_base.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/tests/ftest/util/mdtest_test_base.py b/src/tests/ftest/util/mdtest_test_base.py
index 029989aa923..4d5f6a633ee 100755
--- a/src/tests/ftest/util/mdtest_test_base.py
+++ b/src/tests/ftest/util/mdtest_test_base.py
@@ -62,10 +62,10 @@ def execute_mdtest(self):
         self.run_mdtest(self.get_mdtest_job_manager_command(self.manager),
                         self.processes)
 
-        self.stop_dfuse()
-        # reset self.container if dfs_destroy is True
-        if self.mdtest_cmd.dfs_destroy is True:
+        # reset self.container if dfs_destroy is True or None.
+        if self.mdtest_cmd.dfs_destroy is not False:
             self.container = None
+        self.stop_dfuse()
 
     def get_mdtest_job_manager_command(self, manager):
         """Get the MPI job manager command for Mdtest.

From 3ef55fe23031e528d19130e760329c7329436841 Mon Sep 17 00:00:00 2001
From: rpadma2 <ravindran.padmanabhan@intel.com>
Date: Mon, 29 Mar 2021 10:29:15 -0400
Subject: [PATCH 37/37] DAOS-6923 test: Add log messages Test-tag-hw-medium:
 pr,hw,medium,ib2 offline_reintegration_daily

Signed-off-by: rpadma2 <ravindran.padmanabhan@intel.com>
---
 .../ftest/osa/osa_offline_reintegration.py     | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/tests/ftest/osa/osa_offline_reintegration.py b/src/tests/ftest/osa/osa_offline_reintegration.py
index 3ee27c280db..ccd5f3ceed2 100755
--- a/src/tests/ftest/osa/osa_offline_reintegration.py
+++ b/src/tests/ftest/osa/osa_offline_reintegration.py
@@ -155,11 +155,12 @@ def test_osa_offline_reintegration_without_checksum(self):
         without enabling checksum in container properties.
 
         :avocado: tags=all,pr,daily_regression,hw,medium,ib2
-        :avocado: tags=osa,offline_reintegration
+        :avocado: tags=osa,offline_reintegration_daily
         :avocado: tags=offline_reintegration_without_csum
         """
         self.test_with_checksum = self.params.get("test_with_checksum",
                                                   '/run/checksum/*')
+        self.log.info("Offline Reintegration : Without Checksum")
         self.run_offline_reintegration_test(1, data=True)
 
     def test_osa_offline_reintegration_multiple_pools(self):
@@ -168,9 +169,10 @@ def test_osa_offline_reintegration_multiple_pools(self):
         with multiple pools
 
         :avocado: tags=all,daily_regression,hw,medium,ib2
-        :avocado: tags=osa,offline_reintegration
+        :avocado: tags=osa,offline_reintegration_daily
         :avocado: tags=offline_reintegration_multiple_pools
         """
+        self.log.info("Offline Reintegration : Multiple Pools")
         self.run_offline_reintegration_test(5, data=True)
 
     @skipForTicket("DAOS-6807")
@@ -179,9 +181,10 @@ def test_osa_offline_reintegration_server_stop(self):
 
         Test Description: Validate Offline Reintegration with server stop
         :avocado: tags=all,pr,daily_regression,hw,medium,ib2
-        :avocado: tags=osa,offline_reintegration
+        :avocado: tags=osa,offline_reintegration_daily
         :avocado: tags=offline_reintegration_srv_stop
         """
+        self.log.info("Offline Reintegration : System Start/Stop")
         self.run_offline_reintegration_test(1, data=True, server_boot=True)
 
     def test_osa_offline_reintegrate_during_rebuild(self):
@@ -190,13 +193,14 @@ def test_osa_offline_reintegrate_during_rebuild(self):
         is happening in parallel
 
         :avocado: tags=all,full_regression,hw,medium,ib2
-        :avocado: tags=osa,offline_reintegration
+        :avocado: tags=osa,offline_reintegration_full
         :avocado: tags=offline_reintegrate_during_rebuild
         """
         self.loop_test_cnt = self.params.get("iterations",
                                              '/run/loop_test/*')
         self.test_during_rebuild = self.params.get("test_with_rebuild",
                                                    '/run/rebuild/*')
+        self.log.info("Offline Reintegration : Rebuild")
         self.run_offline_reintegration_test(1, data=True)
 
     def test_osa_offline_reintegration_oclass(self):
@@ -205,9 +209,10 @@ def test_osa_offline_reintegration_oclass(self):
         with different object class
 
         :avocado: tags=all,full_regression,hw,medium,ib2
-        :avocado: tags=osa,offline_reintegration
+        :avocado: tags=osa,offline_reintegration_full
         :avocado: tags=offline_reintegration_oclass
         """
+        self.log.info("Offline Reintegration : Object Class")
         for oclass in self.test_oclass:
             self.run_offline_reintegration_test(1, data=True,
                                                 server_boot=False,
@@ -219,9 +224,10 @@ def test_osa_offline_reintegrate_during_aggregation(self):
         is happening in parallel
 
         :avocado: tags=all,full_regression,hw,medium,ib2
-        :avocado: tags=osa,offline_reintegration
+        :avocado: tags=osa,offline_reintegration_full
         :avocado: tags=offline_reintegrate_during_aggregation
         """
         self.test_during_aggregation = self.params.get("test_with_aggregation",
                                                        '/run/aggregation/*')
+        self.log.info("Offline Reintegration : Aggregation")
         self.run_offline_reintegration_test(1, data=True)