diff --git a/src/rdb/rdb_raft.c b/src/rdb/rdb_raft.c index f1e2f59846b..c46754c4188 100644 --- a/src/rdb/rdb_raft.c +++ b/src/rdb/rdb_raft.c @@ -1341,6 +1341,31 @@ static raft_cbs_t rdb_raft_cbs = { .log = rdb_raft_cb_debug }; +static int +rdb_raft_compact_to_index(struct rdb *db, uint64_t index) +{ + int rc; + + D_DEBUG(DB_TRACE, DF_DB": snapping "DF_U64"\n", DP_DB(db), + index); + rc = raft_begin_snapshot(db->d_raft, index); + D_ASSERTF(rc == 0, ""DF_RC"\n", DP_RC(rc)); + /* + * VOS snaps every new index implicitly. + * + * raft_end_snapshot() only polls the log and wakes up + * rdb_compactd(), which does the real compaction (i.e., VOS + * aggregation) in the background. + */ + rc = raft_end_snapshot(db->d_raft); + if (rc != 0) { + D_ERROR(DF_DB": failed to poll entries: %d\n", + DP_DB(db), rc); + rc = rdb_raft_rc(rc); + } + + return rc; +} /* * Check if the log should be compacted. If so, trigger the compaction by * taking a snapshot (i.e., simply increasing the log base index in our @@ -1375,23 +1400,8 @@ rdb_raft_trigger_compaction(struct rdb *db) index = base + 1; else index = base + n / 2; - D_DEBUG(DB_TRACE, DF_DB": snapping "DF_U64"\n", DP_DB(db), - index); - rc = raft_begin_snapshot(db->d_raft, index); - D_ASSERTF(rc == 0, ""DF_RC"\n", DP_RC(rc)); - /* - * VOS snaps every new index implicitly. - * - * raft_end_snapshot() only polls the log and wakes up - * rdb_compactd(), which does the real compaction (i.e., VOS - * aggregation) in the background. - */ - rc = raft_end_snapshot(db->d_raft); - if (rc != 0) { - D_ERROR(DF_DB": failed to poll %d entries: %d\n", - DP_DB(db), n, rc); - rc = rdb_raft_rc(rc); - } + + rc = rdb_raft_compact_to_index(db, index); } return rc; } @@ -1719,7 +1729,11 @@ rdb_raft_check_state(struct rdb *db, const struct rdb_raft_state *state, rc = compaction_rc; switch (rc) { case -DER_NOMEM: + case -DER_NOSPACE: if (leader) { + /* No space / desperation: compact to committed idx */ + rdb_raft_compact_to_index(db, committed); + raft_become_follower(db->d_raft); leader = false; /* If stepping up fails, don't step down. */ @@ -1729,7 +1743,6 @@ rdb_raft_check_state(struct rdb *db, const struct rdb_raft_state *state, } break; case -DER_SHUTDOWN: - case -DER_NOSPACE: case -DER_IO: db->d_cbs->dc_stop(db, rc, db->d_arg); break; diff --git a/src/tests/ftest/server/metadata.py b/src/tests/ftest/server/metadata.py index e91edf37f18..5f9bbfa8a35 100755 --- a/src/tests/ftest/server/metadata.py +++ b/src/tests/ftest/server/metadata.py @@ -45,8 +45,7 @@ from write_host_file import write_host_file from test_utils_pool import TestPool -NO_OF_MAX_CONTAINER = 13180 - +NO_OF_MAX_CONTAINER = 13034 def ior_runner_thread(manager, uuids, results): """IOR run thread method. @@ -131,19 +130,20 @@ def test_metadata_fillup(self): Use Cases: ? - :avocado: tags=all,metadata,pr,small,metadatafill + :avocado: tags=all,metadata,large,metadatafill,hw + :avocado: tags=full_regression """ self.pool.pool.connect(2) container = DaosContainer(self.context) - self.d_log.debug("Fillup Metadata....") + self.log.info("Fillup Metadata....") for _cont in range(NO_OF_MAX_CONTAINER): container.create(self.pool.pool.handle) # This should fail with no Metadata space Error. - self.d_log.debug("Metadata Overload...") + self.log.info("Metadata Overload...") try: - for _cont in range(250): + for _cont in range(400): container.create(self.pool.pool.handle) self.fail("Test expected to fail with a no metadata space error") @@ -163,7 +163,7 @@ def test_metadata_addremove(self): Use Cases: ? - :avocado: tags=metadata,metadata_free_space,nvme,medium,hw + :avocado: tags=metadata,metadata_free_space,nvme,large,hw :avocado: tags=full_regression """ self.pool.pool.connect(2) @@ -194,7 +194,7 @@ def test_metadata_server_restart(self): Use Cases: ? - :avocado: tags=metadata,metadata_ior,nvme,small + :avocado: tags=metadata,metadata_ior,nvme,large """ files_per_thread = 400 total_ior_threads = 5 diff --git a/src/tests/ftest/server/metadata.yaml b/src/tests/ftest/server/metadata.yaml index c6ab2522651..7317dc8a407 100644 --- a/src/tests/ftest/server/metadata.yaml +++ b/src/tests/ftest/server/metadata.yaml @@ -5,6 +5,8 @@ hosts: test_servers: - server-A - server-B + - server-C + - server-D test_clients: - client-C timeout: 1800 @@ -22,7 +24,7 @@ pool: createset: group: daos_server createsvc: - svcn: 1 + svcn: 3 createsize: scm_size: 1073741824 nvme_size: 1073741824