Skip to content

Commit

Permalink
Fixed race condition in the OCI Metrics integration test between retr…
Browse files Browse the repository at this point in the history
…ieval of metrics from registry and asserting that from expected results (#4897)

* Fixed race condition in the OCI Metrics integration test between retrieval of metrics from registry and asserting that from expected results. To fix the issue, here are the list

1. Used CountDownLatches to signal when to start testing, for example, test only after results has been retrieved.
2. Make OciMetricsCdiExtension Priority higher than MetricsCdiExtension so that it will only start after MetricsCdiExtension has completed.

* Modify testMetricUpdate to explicitly test the 1st and 2nd metric update and fail test if InterruptedException is received in delay()

* Additional changes to avoid race condition in testMetricUpdate
  • Loading branch information
klustria authored Sep 20, 2022
1 parent f7dc8a1 commit a18a856
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 85 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
*/
public class OciMetricsCdiExtension implements Extension {

void registerOciMetrics(@Observes @Priority(LIBRARY_BEFORE + 10) @Initialized(ApplicationScoped.class) Object adv) {
// Make Priority higher than MetricsCdiExtension so this will only start after MetricsCdiExtension has completed.
void registerOciMetrics(@Observes @Priority(LIBRARY_BEFORE + 20) @Initialized(ApplicationScoped.class) Object adv) {
org.eclipse.microprofile.config.Config config = ConfigProvider.getConfig();
Config helidonConfig = MpConfig.toHelidonConfig(config).get("ocimetrics");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@
import org.eclipse.microprofile.metrics.MetricRegistry;

import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.fail;

import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
Expand All @@ -62,29 +64,25 @@
@AddConfig(key = "ocimetrics.resourceGroup",
value = OciMetricsCdiExtensionTest.MetricDataDetailsOCIParams.resourceGroup)
@AddConfig(key = "ocimetrics.initialDelay", value = "1")
@AddConfig(key = "ocimetrics.delay", value = "20")
@AddConfig(key = "ocimetrics.delay", value = "2")
public class OciMetricsCdiExtensionTest {
private final RegistryFactory rf = RegistryFactory.getInstance();
private final MetricRegistry baseMetricRegistry = rf.getRegistry(MetricRegistry.Type.BASE);
private final MetricRegistry vendorMetricRegistry = rf.getRegistry(MetricRegistry.Type.VENDOR);
private final MetricRegistry appMetricRegistry = rf.getRegistry(MetricRegistry.Type.APPLICATION);
private static int testMetricCount = 0;
private static volatile int testMetricCount = 0;
// Use countDownLatch1 to signal the test that results to be asserted has been retrieved
private static CountDownLatch countDownLatch1 = new CountDownLatch(1);
private static PostMetricDataDetails postMetricDataDetails;

@Test
public void testRegisterOciMetrics() {
public void testRegisterOciMetrics() throws InterruptedException {
baseMetricRegistry.counter("baseDummyCounter").inc();
vendorMetricRegistry.counter("vendorDummyCounter").inc();
appMetricRegistry.counter("appDummyCounter").inc();
// Wait for signal from metric update that testMetricCount has been retrieved
countDownLatch1.await(10, TimeUnit.SECONDS);

Timer timer = new Timer(10);
while (testMetricCount <= 0) {
if (timer.expired()) {
fail(String.format("Timed out after %d sec. waiting for testMetricCount",
timer.getTimeout()));
}
delay(50L);
}
assertThat(testMetricCount, is(equalTo(3)));

MetricDataDetails metricDataDetails = postMetricDataDetails.getMetricData().get(0);
Expand Down Expand Up @@ -144,6 +142,8 @@ public ListAlarmsStatusResponse listAlarmsStatus(ListAlarmsStatusRequest listAla
public PostMetricDataResponse postMetricData(PostMetricDataRequest postMetricDataRequest) {
postMetricDataDetails = postMetricDataRequest.getPostMetricDataDetails();
testMetricCount = postMetricDataDetails.getMetricData().size();
// Give signal that testMetricCount was retrieved
countDownLatch1.countDown();
return PostMetricDataResponse.builder()
.__httpStatusCode__(200)
.build();
Expand Down Expand Up @@ -178,24 +178,6 @@ public interface MetricDataDetailsOCIParams {
String resourceGroup = "dummy_resourceGroup";
}

private static class Timer {
private final long endTime;
private final int timeOut;

public Timer(int timeOut) {
this.timeOut = timeOut;
this.endTime = System.currentTimeMillis() + 1_000L * timeOut;
}

public boolean expired() {
return System.currentTimeMillis() >= this.endTime;
}

int getTimeout() {
return this.timeOut;
}
}

private static void delay(long millis) {
try {
Thread.sleep(millis);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;

import io.helidon.config.Config;
Expand Down Expand Up @@ -52,9 +53,11 @@ public class OciMetricsSupportTest {
private static final MonitoringClient monitoringClient = mock(MonitoringClient.class);
private final Type[] types = {Type.BASE, Type.VENDOR, Type.APPLICATION};

private static volatile int testMetricUpdatePostMetricDataCallCount = 0;
private static volatile Double testMetricUpdateCounterValue;
private static volatile Double[] testMetricUpdateCounterValue = new Double[2];
private static volatile int testMetricCount = 0;
// Use CountDownLatch to signal when to start testing, for example, test only after results has been retrieved.
private static CountDownLatch countDownLatch1;
private static int noOfExecutions;

private final RegistryFactory rf = RegistryFactory.getInstance();
private final MetricRegistry baseMetricRegistry = rf.getRegistry(Type.BASE);
Expand All @@ -76,49 +79,55 @@ public boolean matches(MetricID metricID, Metric metric) {
}

@Test
public void testMetricUpdate() {
// mock monitoringClient.postMetricData()
public void testMetricUpdate() throws InterruptedException {
Counter counter = baseMetricRegistry.counter("DummyCounter");

countDownLatch1 = new CountDownLatch(1);
noOfExecutions = 0;

doAnswer(invocationOnMock -> {
noOfExecutions++;
PostMetricDataRequest postMetricDataRequest = invocationOnMock.getArgument(0);
PostMetricDataDetails postMetricDataDetails = postMetricDataRequest.getPostMetricDataDetails();
List<MetricDataDetails> allMetricDataDetails = postMetricDataDetails.getMetricData();
testMetricUpdateCounterValue = allMetricDataDetails.get(0).getDatapoints().get(0).getValue();
testMetricUpdatePostMetricDataCallCount++;
// put 1st result in testMetricUpdateCounterValue index 0 and succeeding update in index 1 to ensure
// that the 1st update result does not overwrite the 2nd update in rare situations where all metric
// updates have already completed before the process to assert results has even started
testMetricUpdateCounterValue[noOfExecutions == 1 ? 0 : 1] =
allMetricDataDetails.get(0).getDatapoints().get(0).getValue();
if (noOfExecutions == 1) {
counter.inc();
} else {
// Give signal that multiple metric updates have been triggered
countDownLatch1.countDown();
}
return PostMetricDataResponse.builder()
.__httpStatusCode__(200)
.build();
}).when(monitoringClient).postMetricData(any());

Counter counter = baseMetricRegistry.counter("DummyCounter");

OciMetricsSupport.Builder ociMetricsSupportBuilder = OciMetricsSupport.builder()
.compartmentId("compartmentId")
.namespace("namespace")
.resourceGroup("resourceGroup")
.initialDelay(1L)
.delay(2L)
.descriptionEnabled(false)

.monitoringClient(monitoringClient);

Routing routing = createRouting(ociMetricsSupportBuilder);

counter.inc();
WebServer webServer = createWebServer(routing);

delay(1000L);
counter.inc();
// Wait for metric updates to complete
countDownLatch1.await(10, java.util.concurrent.TimeUnit.SECONDS);

Timer timer = new Timer(10);
while (testMetricUpdatePostMetricDataCallCount < 2) {
if (timer.expired()) {
fail(String.format("Timed out after %d sec. waiting for 2 Monitoring.postMetricData() calls",
timer.getTimeout()));
}
delay(50L);
}
assertThat(testMetricUpdateCounterValue.intValue(), is(equalTo(2)));
webServer.shutdown();
// Test the 1st and 2nd metric counter updates
assertThat(testMetricUpdateCounterValue[0].intValue(), is(equalTo(1)));
assertThat(testMetricUpdateCounterValue[1].intValue(), is(equalTo(2)));

webServer.shutdown().await(10, java.util.concurrent.TimeUnit.SECONDS);
}

@Test
Expand Down Expand Up @@ -178,7 +187,7 @@ public void testDisableMetrics() {
.compartmentId("compartmentId")
.resourceGroup("resourceGroup")
.initialDelay(50L)
.delay(20000L)
.delay(500L)
.schedulingTimeUnit(TimeUnit.MILLISECONDS)
.descriptionEnabled(false)
.monitoringClient(monitoringClient)
Expand All @@ -190,9 +199,9 @@ public void testDisableMetrics() {

delay(1000L);

webServer.shutdown().await(10, java.util.concurrent.TimeUnit.SECONDS);
// metric count should remain 0 as metrics is disabled
assertThat(testMetricCount, is(equalTo(0)));
webServer.shutdown();

}

Expand All @@ -202,6 +211,8 @@ private void mockPostMetricDataAndGetTestMetricCount() {
PostMetricDataRequest postMetricDataRequest = invocationOnMock.getArgument(0);
PostMetricDataDetails postMetricDataDetails = postMetricDataRequest.getPostMetricDataDetails();
testMetricCount = postMetricDataDetails.getMetricData().size();
// Give signal that testMetricCount was retrieved
countDownLatch1.countDown();
return PostMetricDataResponse.builder()
.__httpStatusCode__(200)
.build();
Expand All @@ -214,7 +225,7 @@ private WebServer createWebServer(Routing routing) {
.port(8888)
.routing(routing)
.build();
webServer.start();
webServer.start().await(10L, TimeUnit.SECONDS);
return webServer;
}

Expand Down Expand Up @@ -251,7 +262,7 @@ private void validateMetricCount(String[] scopes, int expectedMetricCount) {
.compartmentId("compartmentId")
.resourceGroup("resourceGroup")
.initialDelay(50L)
.delay(20000L)
.delay(2000L)
.schedulingTimeUnit(TimeUnit.MILLISECONDS)
.descriptionEnabled(false)
.scopes(scopes)
Expand All @@ -262,45 +273,25 @@ private void validateMetricCount(String[] scopes, int expectedMetricCount) {

private void validateMetricCount(OciMetricsSupport.Builder ociMetricsSupportBuilder, int expectedMetricCount) {
testMetricCount = 0;
countDownLatch1 = new CountDownLatch(1);
Routing routing = createRouting(ociMetricsSupportBuilder);
WebServer webServer = createWebServer(routing);

Timer timer = new Timer(10);
while (testMetricCount <= 0) {
if (timer.expired()) {
fail(String.format("Timed out after %d sec. waiting for testMetricCount",
timer.getTimeout()));
}
delay(50L);
try {
// Wait for signal from metric update that testMetricCount has been retrieved
countDownLatch1.await(10, TimeUnit.SECONDS);
} catch(InterruptedException e) {
fail("Error while waiting for testMetricCount: " + e.getMessage());
}

webServer.shutdown().await(10, java.util.concurrent.TimeUnit.SECONDS);
assertThat(testMetricCount, is(equalTo(expectedMetricCount)));
webServer.shutdown();
}

private static class Timer {
private final long endTime;
private final int timeOut;

public Timer(int timeOut) {
this.timeOut = timeOut;
this.endTime = System.nanoTime() + 1_000_000_000L * timeOut;
}

public boolean expired() {
return System.nanoTime() >= this.endTime;
}

int getTimeout() {
return this.timeOut;
}
}

private static void delay(long millis) {
try {
Thread.sleep(millis);
} catch (InterruptedException ignore) {
Thread.currentThread().interrupt();
} catch (InterruptedException ie) {
fail("InterruptedException received in delay(" + millis + ") with message: " + ie.getMessage());
}
}
}

0 comments on commit a18a856

Please sign in to comment.