Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-1946 md: keep service up on on metadata full condition #2077

Merged
merged 5 commits into from
Mar 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 31 additions & 18 deletions src/rdb/rdb_raft.c
Original file line number Diff line number Diff line change
Expand Up @@ -1341,6 +1341,31 @@ static raft_cbs_t rdb_raft_cbs = {
.log = rdb_raft_cb_debug
};

static int
rdb_raft_compact_to_index(struct rdb *db, uint64_t index)
{
int rc;

D_DEBUG(DB_TRACE, DF_DB": snapping "DF_U64"\n", DP_DB(db),
index);
rc = raft_begin_snapshot(db->d_raft, index);
D_ASSERTF(rc == 0, ""DF_RC"\n", DP_RC(rc));
/*
* VOS snaps every new index implicitly.
*
* raft_end_snapshot() only polls the log and wakes up
* rdb_compactd(), which does the real compaction (i.e., VOS
* aggregation) in the background.
*/
rc = raft_end_snapshot(db->d_raft);
if (rc != 0) {
D_ERROR(DF_DB": failed to poll entries: %d\n",
DP_DB(db), rc);
rc = rdb_raft_rc(rc);
}

return rc;
}
/*
* Check if the log should be compacted. If so, trigger the compaction by
* taking a snapshot (i.e., simply increasing the log base index in our
Expand Down Expand Up @@ -1375,23 +1400,8 @@ rdb_raft_trigger_compaction(struct rdb *db)
index = base + 1;
else
index = base + n / 2;
D_DEBUG(DB_TRACE, DF_DB": snapping "DF_U64"\n", DP_DB(db),
index);
rc = raft_begin_snapshot(db->d_raft, index);
D_ASSERTF(rc == 0, ""DF_RC"\n", DP_RC(rc));
/*
* VOS snaps every new index implicitly.
*
* raft_end_snapshot() only polls the log and wakes up
* rdb_compactd(), which does the real compaction (i.e., VOS
* aggregation) in the background.
*/
rc = raft_end_snapshot(db->d_raft);
if (rc != 0) {
D_ERROR(DF_DB": failed to poll %d entries: %d\n",
DP_DB(db), n, rc);
rc = rdb_raft_rc(rc);
}

rc = rdb_raft_compact_to_index(db, index);
}
return rc;
}
Expand Down Expand Up @@ -1719,7 +1729,11 @@ rdb_raft_check_state(struct rdb *db, const struct rdb_raft_state *state,
rc = compaction_rc;
switch (rc) {
case -DER_NOMEM:
case -DER_NOSPACE:
if (leader) {
/* No space / desperation: compact to committed idx */
rdb_raft_compact_to_index(db, committed);

raft_become_follower(db->d_raft);
leader = false;
/* If stepping up fails, don't step down. */
Expand All @@ -1729,7 +1743,6 @@ rdb_raft_check_state(struct rdb *db, const struct rdb_raft_state *state,
}
break;
case -DER_SHUTDOWN:
case -DER_NOSPACE:
case -DER_IO:
db->d_cbs->dc_stop(db, rc, db->d_arg);
break;
Expand Down
16 changes: 8 additions & 8 deletions src/tests/ftest/server/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@
from write_host_file import write_host_file
from test_utils_pool import TestPool

NO_OF_MAX_CONTAINER = 13180

NO_OF_MAX_CONTAINER = 13034

def ior_runner_thread(manager, uuids, results):
"""IOR run thread method.
Expand Down Expand Up @@ -131,19 +130,20 @@ def test_metadata_fillup(self):
Use Cases:
?

:avocado: tags=all,metadata,pr,small,metadatafill
:avocado: tags=all,metadata,large,metadatafill,hw
:avocado: tags=full_regression
"""
self.pool.pool.connect(2)
container = DaosContainer(self.context)

self.d_log.debug("Fillup Metadata....")
self.log.info("Fillup Metadata....")
for _cont in range(NO_OF_MAX_CONTAINER):
container.create(self.pool.pool.handle)

# This should fail with no Metadata space Error.
self.d_log.debug("Metadata Overload...")
self.log.info("Metadata Overload...")
try:
for _cont in range(250):
for _cont in range(400):
container.create(self.pool.pool.handle)
self.fail("Test expected to fail with a no metadata space error")

Expand All @@ -163,7 +163,7 @@ def test_metadata_addremove(self):
Use Cases:
?

:avocado: tags=metadata,metadata_free_space,nvme,medium,hw
:avocado: tags=metadata,metadata_free_space,nvme,large,hw
:avocado: tags=full_regression
"""
self.pool.pool.connect(2)
Expand Down Expand Up @@ -194,7 +194,7 @@ def test_metadata_server_restart(self):
Use Cases:
?

:avocado: tags=metadata,metadata_ior,nvme,small
:avocado: tags=metadata,metadata_ior,nvme,large
"""
files_per_thread = 400
total_ior_threads = 5
Expand Down
4 changes: 3 additions & 1 deletion src/tests/ftest/server/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ hosts:
test_servers:
- server-A
- server-B
- server-C
- server-D
test_clients:
- client-C
timeout: 1800
Expand All @@ -22,7 +24,7 @@ pool:
createset:
group: daos_server
createsvc:
svcn: 1
svcn: 3
createsize:
scm_size: 1073741824
nvme_size: 1073741824
Expand Down