Skip to content

Commit

Permalink
Bug#36822559 AT: testSystemRestart -n Bug29167 T1 fails in .4node4rpl…
Browse files Browse the repository at this point in the history
… and .6node3rpl

Issue #1
 Problem:
   Test fail in 4node4rpl (1 node group).
 Solution:
   Skip test when there is only one NG.

Issue #2
  Problem:
    Test fail in 6node3rpl (2 node group) with timeout.
    Test idea is to restart, with nostart option, *ALL* nodes
    in same node group to check if QMGR handles it wrongly as
    "node group is missing".
    In the test only two nodes in same node group are restarted,
    it works for 2 replica setups but, for 4 replica, test
    hangs waiting cluster to enter a noStart state.
  Solution:
   Instead of restart exactly 2 nodes, restart ALL nodes in a
   given node group.

Change-Id: Iafb0511992a553723013e73593ea10540cd03661
  • Loading branch information
vinc13e committed Jul 10, 2024
1 parent 680f83b commit 7ea3f1d
Showing 1 changed file with 17 additions and 10 deletions.
27 changes: 17 additions & 10 deletions storage/ndb/test/ndbapi/testSystemRestart.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2006,17 +2006,18 @@ int runBug27434(NDBT_Context *ctx, NDBT_Step *step) {
int runBug29167(NDBT_Context *ctx, NDBT_Step *step) {
int result = NDBT_OK;
NdbRestarter restarter;
const Uint32 nodeCount = restarter.getNumDbNodes();
const Uint32 nodeGroupCount = restarter.getNumNodeGroups();

if (nodeCount < 4) return NDBT_SKIPPED;
if (nodeGroupCount < 2) {
g_info << "Bug29167 - Needs atleast 2 node group to test" << endl;
return NDBT_SKIPPED;
}

struct ndb_logevent event;
int master = restarter.getMasterNodeId();
do {
int node1 = restarter.getRandomNodeOtherNodeGroup(master, rand());
int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());

ndbout_c("node1: %u node2: %u", node1, node2);
int node1_ng = restarter.getNodeGroup(node1);

int val2[] = {DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1};
restarter.dumpStateAllNodes(val2, 2);
Expand All @@ -2028,16 +2029,21 @@ int runBug29167(NDBT_Context *ctx, NDBT_Step *step) {
ndb_mgm_create_logevent_handle(restarter.handle, filter);

while (ndb_logevent_get_next(handle, &event, 0) >= 0 &&
event.type != NDB_LE_GlobalCheckpointCompleted)
;
event.type != NDB_LE_GlobalCheckpointCompleted) {
ndbout_c("waiting GCP to complete");
}

ndb_mgm_destroy_logevent_handle(&handle);

CHECK(restarter.insertErrorInAllNodes(932) == 0);

CHECK(restarter.insertErrorInNode(node1, 7183) == 0);
CHECK(restarter.insertErrorInNode(node2, 7183) == 0);

for (int i = 0; i < restarter.getNumDbNodes(); i++) {
int node_id = restarter.getDbNodeId(i);
if (restarter.getNodeGroup(node_id) == node1_ng) {
ndbout_c("Insert error 7183 in node : %i", node_id);
CHECK(restarter.insertErrorInNode(node_id, 7183) == 0);
}
}
const unsigned int timeout = 300;
CHECK(restarter.waitClusterNoStart(timeout) == 0);
restarter.startAll();
Expand All @@ -2046,6 +2052,7 @@ int runBug29167(NDBT_Context *ctx, NDBT_Step *step) {

return result;
}

int runOneNodeWithCleanFilesystem(NDBT_Context *ctx, NDBT_Step *step) {
int result = NDBT_OK;
NdbRestarter restarter;
Expand Down

0 comments on commit 7ea3f1d

Please sign in to comment.