Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed race condition in the OCI Metrics integration test between retrieval of metrics from registry and asserting that from expected results #4897

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
*/
public class OciMetricsCdiExtension implements Extension {

void registerOciMetrics(@Observes @Priority(LIBRARY_BEFORE + 10) @Initialized(ApplicationScoped.class) Object adv) {
// Make Priority higher than MetricsCdiExtension so this will only start after MetricsCdiExtension has completed.
void registerOciMetrics(@Observes @Priority(LIBRARY_BEFORE + 20) @Initialized(ApplicationScoped.class) Object adv) {
org.eclipse.microprofile.config.Config config = ConfigProvider.getConfig();
Config helidonConfig = MpConfig.toHelidonConfig(config).get("ocimetrics");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@
import org.eclipse.microprofile.metrics.MetricRegistry;

import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.fail;

import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
Expand All @@ -62,29 +64,25 @@
@AddConfig(key = "ocimetrics.resourceGroup",
value = OciMetricsCdiExtensionTest.MetricDataDetailsOCIParams.resourceGroup)
@AddConfig(key = "ocimetrics.initialDelay", value = "1")
@AddConfig(key = "ocimetrics.delay", value = "20")
@AddConfig(key = "ocimetrics.delay", value = "2")
public class OciMetricsCdiExtensionTest {
private final RegistryFactory rf = RegistryFactory.getInstance();
private final MetricRegistry baseMetricRegistry = rf.getRegistry(MetricRegistry.Type.BASE);
private final MetricRegistry vendorMetricRegistry = rf.getRegistry(MetricRegistry.Type.VENDOR);
private final MetricRegistry appMetricRegistry = rf.getRegistry(MetricRegistry.Type.APPLICATION);
private static int testMetricCount = 0;
private static volatile int testMetricCount = 0;
// Use countDownLatch1 to signal the test that results to be asserted has been retrieved
private static CountDownLatch countDownLatch1 = new CountDownLatch(1);
private static PostMetricDataDetails postMetricDataDetails;

@Test
public void testRegisterOciMetrics() {
public void testRegisterOciMetrics() throws InterruptedException {
baseMetricRegistry.counter("baseDummyCounter").inc();
vendorMetricRegistry.counter("vendorDummyCounter").inc();
appMetricRegistry.counter("appDummyCounter").inc();
// Wait for signal from metric update that testMetricCount has been retrieved
countDownLatch1.await(10, TimeUnit.SECONDS);

Timer timer = new Timer(10);
while (testMetricCount <= 0) {
if (timer.expired()) {
fail(String.format("Timed out after %d sec. waiting for testMetricCount",
timer.getTimeout()));
}
delay(50L);
}
assertThat(testMetricCount, is(equalTo(3)));

MetricDataDetails metricDataDetails = postMetricDataDetails.getMetricData().get(0);
Expand Down Expand Up @@ -144,6 +142,8 @@ public ListAlarmsStatusResponse listAlarmsStatus(ListAlarmsStatusRequest listAla
public PostMetricDataResponse postMetricData(PostMetricDataRequest postMetricDataRequest) {
postMetricDataDetails = postMetricDataRequest.getPostMetricDataDetails();
testMetricCount = postMetricDataDetails.getMetricData().size();
// Give signal that testMetricCount was retrieved
countDownLatch1.countDown();
return PostMetricDataResponse.builder()
.__httpStatusCode__(200)
.build();
Expand Down Expand Up @@ -178,24 +178,6 @@ public interface MetricDataDetailsOCIParams {
String resourceGroup = "dummy_resourceGroup";
}

private static class Timer {
private final long endTime;
private final int timeOut;

public Timer(int timeOut) {
this.timeOut = timeOut;
this.endTime = System.currentTimeMillis() + 1_000L * timeOut;
}

public boolean expired() {
return System.currentTimeMillis() >= this.endTime;
}

int getTimeout() {
return this.timeOut;
}
}

private static void delay(long millis) {
try {
Thread.sleep(millis);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;

import io.helidon.config.Config;
Expand Down Expand Up @@ -52,9 +53,11 @@ public class OciMetricsSupportTest {
private static final MonitoringClient monitoringClient = mock(MonitoringClient.class);
private final Type[] types = {Type.BASE, Type.VENDOR, Type.APPLICATION};

private static volatile int testMetricUpdatePostMetricDataCallCount = 0;
private static volatile Double testMetricUpdateCounterValue;
private static volatile Double[] testMetricUpdateCounterValue = new Double[2];
private static volatile int testMetricCount = 0;
// Use CountDownLatch to signal when to start testing, for example, test only after results has been retrieved.
private static CountDownLatch countDownLatch1;
private static int noOfExecutions;

private final RegistryFactory rf = RegistryFactory.getInstance();
private final MetricRegistry baseMetricRegistry = rf.getRegistry(Type.BASE);
Expand All @@ -76,49 +79,55 @@ public boolean matches(MetricID metricID, Metric metric) {
}

@Test
public void testMetricUpdate() {
// mock monitoringClient.postMetricData()
public void testMetricUpdate() throws InterruptedException {
Counter counter = baseMetricRegistry.counter("DummyCounter");

countDownLatch1 = new CountDownLatch(1);
noOfExecutions = 0;

doAnswer(invocationOnMock -> {
noOfExecutions++;
PostMetricDataRequest postMetricDataRequest = invocationOnMock.getArgument(0);
PostMetricDataDetails postMetricDataDetails = postMetricDataRequest.getPostMetricDataDetails();
List<MetricDataDetails> allMetricDataDetails = postMetricDataDetails.getMetricData();
testMetricUpdateCounterValue = allMetricDataDetails.get(0).getDatapoints().get(0).getValue();
testMetricUpdatePostMetricDataCallCount++;
// put 1st result in testMetricUpdateCounterValue index 0 and succeeding update in index 1 to ensure
// that the 1st update result does not overwrite the 2nd update in rare situations where all metric
// updates have already completed before the process to assert results has even started
testMetricUpdateCounterValue[noOfExecutions == 1 ? 0 : 1] =
allMetricDataDetails.get(0).getDatapoints().get(0).getValue();
if (noOfExecutions == 1) {
counter.inc();
} else {
// Give signal that multiple metric updates have been triggered
countDownLatch1.countDown();
}
return PostMetricDataResponse.builder()
.__httpStatusCode__(200)
.build();
}).when(monitoringClient).postMetricData(any());

Counter counter = baseMetricRegistry.counter("DummyCounter");

OciMetricsSupport.Builder ociMetricsSupportBuilder = OciMetricsSupport.builder()
.compartmentId("compartmentId")
.namespace("namespace")
.resourceGroup("resourceGroup")
.initialDelay(1L)
.delay(2L)
.descriptionEnabled(false)

.monitoringClient(monitoringClient);

Routing routing = createRouting(ociMetricsSupportBuilder);

counter.inc();
WebServer webServer = createWebServer(routing);

delay(1000L);
counter.inc();
// Wait for metric updates to complete
countDownLatch1.await(10, java.util.concurrent.TimeUnit.SECONDS);

Timer timer = new Timer(10);
while (testMetricUpdatePostMetricDataCallCount < 2) {
if (timer.expired()) {
fail(String.format("Timed out after %d sec. waiting for 2 Monitoring.postMetricData() calls",
timer.getTimeout()));
}
delay(50L);
}
assertThat(testMetricUpdateCounterValue.intValue(), is(equalTo(2)));
webServer.shutdown();
// Test the 1st and 2nd metric counter updates
assertThat(testMetricUpdateCounterValue[0].intValue(), is(equalTo(1)));
assertThat(testMetricUpdateCounterValue[1].intValue(), is(equalTo(2)));

webServer.shutdown().await(10, java.util.concurrent.TimeUnit.SECONDS);
}

@Test
Expand Down Expand Up @@ -178,7 +187,7 @@ public void testDisableMetrics() {
.compartmentId("compartmentId")
.resourceGroup("resourceGroup")
.initialDelay(50L)
.delay(20000L)
.delay(500L)
.schedulingTimeUnit(TimeUnit.MILLISECONDS)
.descriptionEnabled(false)
.monitoringClient(monitoringClient)
Expand All @@ -190,9 +199,9 @@ public void testDisableMetrics() {

delay(1000L);

webServer.shutdown().await(10, java.util.concurrent.TimeUnit.SECONDS);
// metric count should remain 0 as metrics is disabled
assertThat(testMetricCount, is(equalTo(0)));
webServer.shutdown();

}

Expand All @@ -202,6 +211,8 @@ private void mockPostMetricDataAndGetTestMetricCount() {
PostMetricDataRequest postMetricDataRequest = invocationOnMock.getArgument(0);
PostMetricDataDetails postMetricDataDetails = postMetricDataRequest.getPostMetricDataDetails();
testMetricCount = postMetricDataDetails.getMetricData().size();
// Give signal that testMetricCount was retrieved
countDownLatch1.countDown();
return PostMetricDataResponse.builder()
.__httpStatusCode__(200)
.build();
Expand All @@ -214,7 +225,7 @@ private WebServer createWebServer(Routing routing) {
.port(8888)
.routing(routing)
.build();
webServer.start();
webServer.start().await(10L, TimeUnit.SECONDS);
return webServer;
}

Expand Down Expand Up @@ -251,7 +262,7 @@ private void validateMetricCount(String[] scopes, int expectedMetricCount) {
.compartmentId("compartmentId")
.resourceGroup("resourceGroup")
.initialDelay(50L)
.delay(20000L)
.delay(2000L)
.schedulingTimeUnit(TimeUnit.MILLISECONDS)
.descriptionEnabled(false)
.scopes(scopes)
Expand All @@ -262,45 +273,25 @@ private void validateMetricCount(String[] scopes, int expectedMetricCount) {

private void validateMetricCount(OciMetricsSupport.Builder ociMetricsSupportBuilder, int expectedMetricCount) {
testMetricCount = 0;
countDownLatch1 = new CountDownLatch(1);
Routing routing = createRouting(ociMetricsSupportBuilder);
WebServer webServer = createWebServer(routing);

Timer timer = new Timer(10);
while (testMetricCount <= 0) {
if (timer.expired()) {
fail(String.format("Timed out after %d sec. waiting for testMetricCount",
timer.getTimeout()));
}
delay(50L);
try {
// Wait for signal from metric update that testMetricCount has been retrieved
countDownLatch1.await(10, TimeUnit.SECONDS);
} catch(InterruptedException e) {
fail("Error while waiting for testMetricCount: " + e.getMessage());
}

webServer.shutdown().await(10, java.util.concurrent.TimeUnit.SECONDS);
assertThat(testMetricCount, is(equalTo(expectedMetricCount)));
webServer.shutdown();
}

private static class Timer {
private final long endTime;
private final int timeOut;

public Timer(int timeOut) {
this.timeOut = timeOut;
this.endTime = System.nanoTime() + 1_000_000_000L * timeOut;
}

public boolean expired() {
return System.nanoTime() >= this.endTime;
}

int getTimeout() {
return this.timeOut;
}
}

private static void delay(long millis) {
try {
Thread.sleep(millis);
} catch (InterruptedException ignore) {
Thread.currentThread().interrupt();
klustria marked this conversation as resolved.
Show resolved Hide resolved
} catch (InterruptedException ie) {
fail("InterruptedException received in delay(" + millis + ") with message: " + ie.getMessage());
}
}
}