Skip to content

Commit

Permalink
Added unit tests for offer operation feedback metrics.
Browse files Browse the repository at this point in the history
This adds a set of checks to verify the metrics introduced
in the previous commit are working as intended.

Review: https://reviews.apache.org/r/70117
  • Loading branch information
Benno Evers committed Mar 28, 2019
1 parent 18c4015 commit ede2a94
Show file tree
Hide file tree
Showing 10 changed files with 114 additions and 37 deletions.
10 changes: 10 additions & 0 deletions src/tests/agent_operation_feedback_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ TEST_P(AgentOperationFeedbackTest, RetryOperationStatusUpdate)
EXPECT_EQ(operationId, update->status().operation_id());
EXPECT_EQ(OPERATION_FINISHED, update->status().state());

// The master has already seen the update, so the operation is counted
// in the metrics.
EXPECT_TRUE(metricEquals("master/operations/finished", 1));

// The agent should resend the unacknowledged operation status update once the
// status update retry interval lapses.
Future<scheduler::Event::UpdateOperationStatus> retriedUpdate;
Expand All @@ -172,6 +176,9 @@ TEST_P(AgentOperationFeedbackTest, RetryOperationStatusUpdate)

AWAIT_READY(acknowledgeOperationStatusMessage);

// Verify that the update has not been double-counted.
EXPECT_TRUE(metricEquals("master/operations/finished", 1));

// The operation status update has been acknowledged, so the agent shouldn't
// send further status updates.
EXPECT_NO_FUTURE_PROTOBUFS(UpdateOperationStatusMessage(), _, _);
Expand Down Expand Up @@ -263,6 +270,7 @@ TEST_P(AgentOperationFeedbackTest, CleanupAcknowledgedTerminalOperation)

EXPECT_EQ(operationId, update->status().operation_id());
EXPECT_EQ(OPERATION_FINISHED, update->status().state());
EXPECT_TRUE(metricEquals("master/operations/finished", 1));

{
master::Call call;
Expand Down Expand Up @@ -417,6 +425,7 @@ TEST_P(AgentOperationFeedbackTest, OperationUpdateContainsAgentID)

EXPECT_EQ(operationId, update->status().operation_id());
EXPECT_EQ(OPERATION_FINISHED, update->status().state());
EXPECT_TRUE(metricEquals("master/operations/finished", 1));

ASSERT_TRUE(update->status().has_agent_id());
EXPECT_EQ(agentId, update->status().agent_id());
Expand Down Expand Up @@ -562,6 +571,7 @@ TEST_P(AgentOperationFeedbackTest, DroppedOperationStatusUpdate)
EXPECT_EQ(OPERATION_DROPPED, update->status().state());
EXPECT_FALSE(update->status().has_uuid());
EXPECT_FALSE(update->status().has_resource_provider_id());
EXPECT_TRUE(metricEquals("master/operations/dropped", 1));

const AgentID& agentId(offer.agent_id());
ASSERT_TRUE(update->status().has_agent_id());
Expand Down
10 changes: 10 additions & 0 deletions src/tests/api_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5114,6 +5114,11 @@ TEST_P(MasterAPITest, OperationUpdatesUponAgentGone)
EXPECT_EQ(
v1::OPERATION_GONE_BY_OPERATOR,
operationGoneUpdate->status().state());

// TODO(bevers): We have to reset the agent before we can access the
// metrics to work around MESOS-9644.
slave->reset();
EXPECT_TRUE(metricEquals("master/operations/gone_by_operator", 1));
}


Expand Down Expand Up @@ -5253,6 +5258,11 @@ TEST_P(MasterAPITest, OperationUpdatesUponUnreachable)
v1::OPERATION_UNREACHABLE,
operationUnreachableUpdate->status().state());

// TODO(bevers): We have to reset the agent before we can access the
// metrics to work around MESOS-9644.
slave->reset();
EXPECT_TRUE(metricEquals("master/operations/unreachable", 1));

Clock::resume();
}

Expand Down
1 change: 1 addition & 0 deletions src/tests/master_slave_reconciliation_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,7 @@ TEST_F(

EXPECT_EQ(operationId, operationDroppedUpdate->status().operation_id());
EXPECT_EQ(v1::OPERATION_DROPPED, operationDroppedUpdate->status().state());
EXPECT_TRUE(metricEquals("master/operations/dropped", 1));
}

// This test verifies that the master reconciles tasks that are
Expand Down
11 changes: 11 additions & 0 deletions src/tests/master_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9247,6 +9247,9 @@ TEST_F(MasterTest, OperationUpdateDuringFailover)

AWAIT_READY(updateOperationStatusMessage1);
AWAIT_READY(updateOperationStatusMessage2);
EXPECT_TRUE(metricEquals("master/operations/pending", 2));
EXPECT_TRUE(metricEquals("master/operations/reserve/pending", 1));
EXPECT_TRUE(metricEquals("master/operations/create_disk/pending", 1));

// Fail over the master.
master->reset();
Expand Down Expand Up @@ -9314,6 +9317,14 @@ TEST_F(MasterTest, OperationUpdateDuringFailover)
EXPECT_EQ(receivedOperationUUIDs, expectedOperationUUIDs);
}

EXPECT_TRUE(metricEquals("master/operations/pending", 0));
EXPECT_TRUE(metricEquals("master/operations/reserve/pending", 0));
EXPECT_TRUE(metricEquals("master/operations/create_disk/pending", 0));

EXPECT_TRUE(metricEquals("master/operations/finished", 2));
EXPECT_TRUE(metricEquals("master/operations/reserve/finished", 1));
EXPECT_TRUE(metricEquals("master/operations/create_disk/finished", 1));

driver.stop();
driver.join();
}
Expand Down
8 changes: 8 additions & 0 deletions src/tests/operation_reconciliation_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1108,6 +1108,7 @@ TEST_P(OperationReconciliationTest, ReconcileUnackedAgentOperation)

EXPECT_EQ(operationId, update->status().operation_id());
EXPECT_EQ(OPERATION_FINISHED, update->status().state());
EXPECT_TRUE(metricEquals("master/operations/finished", 1));

ASSERT_TRUE(update->status().has_agent_id());
EXPECT_EQ(agentId, update->status().agent_id());
Expand Down Expand Up @@ -1253,6 +1254,7 @@ TEST_P(OperationReconciliationTest, UnackedAgentOperationAfterMasterFailover)

EXPECT_EQ(operationId, update->status().operation_id());
EXPECT_EQ(OPERATION_FINISHED, update->status().state());
EXPECT_TRUE(metricEquals("master/operations/finished", 1));

// Simulate master failover.
EXPECT_CALL(*scheduler, disconnected(_));
Expand Down Expand Up @@ -1440,6 +1442,7 @@ TEST_P(OperationReconciliationTest, ReconcileAgentOperationOnGoneAgent)

EXPECT_EQ(operationId, update->status().operation_id());
EXPECT_EQ(OPERATION_FINISHED, update->status().state());
EXPECT_TRUE(metricEquals("master/operations/finished", 1));

ASSERT_TRUE(update->status().has_agent_id());
EXPECT_EQ(agentId, update->status().agent_id());
Expand Down Expand Up @@ -1473,6 +1476,11 @@ TEST_P(OperationReconciliationTest, ReconcileAgentOperationOnGoneAgent)
EXPECT_EQ(operationId, goneUpdate->status().operation_id());
EXPECT_EQ(OPERATION_GONE_BY_OPERATOR, goneUpdate->status().state());

// TODO(bevers): We have to reset the agent before we can access the
// metrics to work around MESOS-9644.
slave->reset();
EXPECT_TRUE(metricEquals("master/operations/gone_by_operator", 1));

ASSERT_TRUE(goneUpdate->status().has_agent_id());
EXPECT_EQ(agentId, goneUpdate->status().agent_id());

Expand Down
39 changes: 33 additions & 6 deletions src/tests/persistent_volume_endpoints_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1533,6 +1533,7 @@ TEST_F(PersistentVolumeEndpointsTest, OfferCreateThenEndpointRemove)
// Make sure the allocator processes the `RESERVE` operation before summoning
// an offer.
AWAIT_READY(updateOperationStatusMessage);
EXPECT_TRUE(metricEquals("master/operations/finished", 1));

// Summon an offer.
EXPECT_CALL(sched, resourceOffers(&driver, _))
Expand Down Expand Up @@ -1567,6 +1568,7 @@ TEST_F(PersistentVolumeEndpointsTest, OfferCreateThenEndpointRemove)

// Make sure the master processes the accept call before summoning an offer.
AWAIT_READY(updateOperationStatusMessage);
EXPECT_TRUE(metricEquals("master/operations/finished", 2));

// Summon an offer.
EXPECT_CALL(sched, resourceOffers(&driver, _))
Expand Down Expand Up @@ -1679,13 +1681,17 @@ TEST_F(PersistentVolumeEndpointsTest, EndpointCreateThenOfferRemove)
unreserved.pushReservation(createDynamicReservationInfo(
frameworkInfo.roles(0), DEFAULT_CREDENTIAL.principal()));

Future<AcknowledgeOperationStatusMessage> acknowledgeOperationStatus1 =
FUTURE_PROTOBUF(AcknowledgeOperationStatusMessage(), _, _);

Future<Response> response = process::http::post(
master.get()->pid,
"reserve",
createBasicAuthHeaders(DEFAULT_CREDENTIAL),
createRequestBody(slaveId, "resources", dynamicallyReserved));

AWAIT_EXPECT_RESPONSE_STATUS_EQ(Accepted().status, response);
AWAIT_READY(acknowledgeOperationStatus1);

// Create a 1MB persistent volume.
Resources volume = createPersistentVolume(
Expand All @@ -1697,13 +1703,17 @@ TEST_F(PersistentVolumeEndpointsTest, EndpointCreateThenOfferRemove)
None(),
frameworkInfo.principal());

Future<AcknowledgeOperationStatusMessage> acknowledgeOperationStatus2 =
FUTURE_PROTOBUF(AcknowledgeOperationStatusMessage(), _, _);

response = process::http::post(
master.get()->pid,
"create-volumes",
createBasicAuthHeaders(DEFAULT_CREDENTIAL),
createRequestBody(slaveId, "volumes", volume));

AWAIT_EXPECT_RESPONSE_STATUS_EQ(Accepted().status, response);
AWAIT_READY(acknowledgeOperationStatus2);

MockScheduler sched;
MesosSchedulerDriver driver(
Expand All @@ -1727,13 +1737,18 @@ TEST_F(PersistentVolumeEndpointsTest, EndpointCreateThenOfferRemove)
EXPECT_TRUE(Resources(offer.resources()).contains(
allocatedResources(volume, frameworkInfo.roles(0))));

Future<UpdateOperationStatusMessage> updateOperationStatusMessage =
FUTURE_PROTOBUF(UpdateOperationStatusMessage(), _, _);
Future<AcknowledgeOperationStatusMessage> acknowledgeOperationStatus3 =
FUTURE_PROTOBUF(AcknowledgeOperationStatusMessage(), _, _);

driver.acceptOffers({offer.id()}, {DESTROY(volume)});

// Make sure the master processes the accept call before summoning an offer.
AWAIT_READY(updateOperationStatusMessage);
AWAIT_READY(acknowledgeOperationStatus3);

// We expect 3 finished operations, `Reserve`, `CreateVolume` and finally
// `DestroyVolume`. The first two are not checked individually because it
// would require additional heavy machinery to do it in a non-flaky way.
EXPECT_TRUE(metricEquals("master/operations/finished", 3));

// Summon an offer.
EXPECT_CALL(sched, resourceOffers(&driver, _))
Expand All @@ -1753,14 +1768,15 @@ TEST_F(PersistentVolumeEndpointsTest, EndpointCreateThenOfferRemove)
<< Resources(offer.resources()) << " vs "
<< allocatedResources(dynamicallyReserved, frameworkInfo.roles(0));

updateOperationStatusMessage =
FUTURE_PROTOBUF(UpdateOperationStatusMessage(), _, _);
Future<AcknowledgeOperationStatusMessage> acknowledgeOperationStatus4 =
FUTURE_PROTOBUF(AcknowledgeOperationStatusMessage(), _, _);

// Unreserve the resources.
driver.acceptOffers({offer.id()}, {UNRESERVE(dynamicallyReserved)});

// Make sure the master processes the accept call before summoning an offer.
AWAIT_READY(updateOperationStatusMessage);
AWAIT_READY(acknowledgeOperationStatus4);
EXPECT_TRUE(metricEquals("master/operations/finished", 4));

// Summon an offer.
EXPECT_CALL(sched, resourceOffers(&driver, _))
Expand Down Expand Up @@ -1829,13 +1845,17 @@ TEST_F(PersistentVolumeEndpointsTest, ReserveAndSlaveRemoval)
slave1Unreserved.pushReservation(createDynamicReservationInfo(
frameworkInfo.roles(0), DEFAULT_CREDENTIAL.principal()));

Future<AcknowledgeOperationStatusMessage> acknowledgeOperationStatus1 =
FUTURE_PROTOBUF(AcknowledgeOperationStatusMessage(), _, _);

Future<Response> response = process::http::post(
master.get()->pid,
"reserve",
createBasicAuthHeaders(DEFAULT_CREDENTIAL),
createRequestBody(slaveId1, "resources", slave1Reserved));

AWAIT_EXPECT_RESPONSE_STATUS_EQ(Accepted().status, response);
AWAIT_READY(acknowledgeOperationStatus1);

MockScheduler sched;
MesosSchedulerDriver driver(
Expand All @@ -1861,6 +1881,9 @@ TEST_F(PersistentVolumeEndpointsTest, ReserveAndSlaveRemoval)
Future<UpdateOperationStatusMessage> updateOperationStatusMessage =
FUTURE_PROTOBUF(UpdateOperationStatusMessage(), _, _);

Future<AcknowledgeOperationStatusMessage> acknowledgeOperationStatus2 =
FUTURE_PROTOBUF(AcknowledgeOperationStatusMessage(), _, _);

// Use the offers API to reserve all CPUs on `slave2`.
Resources slave2Unreserved = Resources::parse("cpus:3").get();
Resources slave2Reserved =
Expand Down Expand Up @@ -1889,6 +1912,10 @@ TEST_F(PersistentVolumeEndpointsTest, ReserveAndSlaveRemoval)
EXPECT_EQ(sentResources, slave2Reserved);

AWAIT_READY(updateOperationStatusMessage);
AWAIT_READY(acknowledgeOperationStatus2);

// Expect both `Reserve` operations to be finished.
EXPECT_TRUE(metricEquals("master/operations/finished", 2));

// Shutdown `slave2` with an explicit shutdown message.
EXPECT_CALL(sched, offerRescinded(_, _));
Expand Down
1 change: 1 addition & 0 deletions src/tests/reservation_endpoints_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1559,6 +1559,7 @@ TEST_F(ReservationEndpointsTest, AgentStateEndpointResources)
// the agent to receive and process the `ApplyOperationMessage` and respond
// with an initial operation status update.
AWAIT_READY(updateOperationStatusMessage);
EXPECT_TRUE(metricEquals("master/operations/finished", 1));

// Make sure CheckpointResourcesMessage handling is completed
// before proceeding.
Expand Down
2 changes: 2 additions & 0 deletions src/tests/scheduler_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1246,6 +1246,8 @@ TEST_P(SchedulerTest, OperationFeedbackValidationNoResourceProviderCapability)
EXPECT_EQ(
mesos::v1::OPERATION_ERROR,
updateOperationStatus->status().state());

EXPECT_TRUE(metricEquals("master/operations/error", 1));
}


Expand Down
5 changes: 5 additions & 0 deletions src/tests/slave_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6549,6 +6549,7 @@ TEST_F(SlaveTest, UpdateOperationStatusRetry)

AWAIT_READY(updateOperationStatusMessage);
ASSERT_EQ(updateOperationStatusMessage->operation_uuid(), operationUuid);
EXPECT_TRUE(metricEquals("master/operations/finished", 1));

Clock::resume();
}
Expand Down Expand Up @@ -10935,6 +10936,9 @@ TEST_F(SlaveTest, RemoveResourceProvider)
EXPECT_TRUE(updateOperationStatus->status().has_agent_id());
EXPECT_FALSE(updateOperationStatus->status().has_resource_provider_id());

// The associated metrics should have also been increased.
EXPECT_TRUE(metricEquals("master/operations/gone_by_operator", 1));

// The agent should also report a change to its resources.
AWAIT_READY(updateSlaveMessage);

Expand Down Expand Up @@ -11521,6 +11525,7 @@ TEST_F(SlaveTest, RetryOperationStatusUpdateAfterRecovery)

EXPECT_EQ(operationId, update->status().operation_id());
EXPECT_EQ(v1::OPERATION_FINISHED, update->status().state());
EXPECT_TRUE(metricEquals("master/operations/finished", 1));

// Restart the agent.
slave.get()->terminate();
Expand Down
Loading

0 comments on commit ede2a94

Please sign in to comment.