Skip to content

Commit

Permalink
Does not log while AgentHealthManager is locked. (#1599)
Browse files Browse the repository at this point in the history
There could have been a deadlock if AgentHealthManager is invalidated
while a change in Health is happening from different threads. Details
see #1597
  • Loading branch information
quandor authored Apr 24, 2023
1 parent aec7b1a commit 849900c
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -159,23 +159,32 @@ private void checkHealthAndSchedule() {

private void triggerEventAndMetricIfHealthChanged() {
if (getCurrentHealth() != lastNotifiedHealth) {
AgentHealthChangedEvent event = null;
synchronized (this) {
AgentHealth currHealth = getCurrentHealth();
if (currHealth != lastNotifiedHealth) {
AgentHealth lastHealth = lastNotifiedHealth;
lastNotifiedHealth = currHealth;
if (currHealth.isMoreSevereOrEqualTo(lastHealth)) {
log.warn(LOG_CHANGE_STATUS, lastHealth, currHealth);
} else {
log.info(LOG_CHANGE_STATUS, lastHealth, currHealth);
}

selfMonitoringService.recordMeasurement("health", currHealth.ordinal());

AgentHealthChangedEvent event = new AgentHealthChangedEvent(this, lastHealth, currHealth);
event = new AgentHealthChangedEvent(this, lastHealth, currHealth);
ctx.publishEvent(event);
}
}

// It is important that logging happens outside the synchronized block above.
// Otherwise, a deadlock may happen since _this_ is via Interface
// InternalProcessingAppender.LogEventConsumer indirectly a part of the logback infrastructure
if (event != null) {
AgentHealth newHealth = event.getNewHealth();
AgentHealth oldHealth = event.getOldHealth();
if(newHealth.isMoreSevereOrEqualTo(oldHealth)) {
log.warn(LOG_CHANGE_STATUS, oldHealth, newHealth);
} else {
log.info(LOG_CHANGE_STATUS, oldHealth, newHealth);
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package rocks.inspectit.ocelot.core.selfmonitoring;

import org.awaitility.Awaitility;
import org.checkerframework.checker.units.qual.A;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.test.annotation.DirtiesContext;
import rocks.inspectit.ocelot.core.SpringTestBase;
import rocks.inspectit.ocelot.core.logging.logback.LogbackInitializer;

import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

/**
* Regression test for <a href="https://github.com/inspectIT/inspectit-ocelot/issues/1597">Github issue 1597</a>.
*/
@DirtiesContext
public class AgentHealthManagerDeadlockGh1597IntTest extends SpringTestBase {

private static final Logger logger = LoggerFactory.getLogger("test-logger");

@Autowired
private AgentHealthManager cut;

@Test
void testLogging() throws Exception {
// This installs InternalProcessingAppender which together with AgentHealthManager caused a deadlock
LogbackInitializer.initDefaultLogging();

int millisToRun = 500;
Thread logThread = new Thread(() -> {
long start = System.currentTimeMillis();
boolean shouldLogError = true;
while (System.currentTimeMillis() - start < millisToRun) {
if (shouldLogError) {
logger.error("Reporting an error");
shouldLogError = false;
} else {
logger.info("Reporting an info");
shouldLogError = true;
}

}
});

AtomicBoolean isInvalidationThreadDone = new AtomicBoolean(false);

Thread invalidationThread = new Thread(() -> {
long start = System.currentTimeMillis();
while (System.currentTimeMillis() - start < millisToRun) {
cut.onInvalidationEvent(cut.getClass());
}
isInvalidationThreadDone.getAndSet(true);
});

// letting those two threads run for a while caused the deadlock on the developers machine before the fix.
// This is far from an ideal test, but the best we came up with.
logThread.start();
invalidationThread.start();

Awaitility.waitAtMost(millisToRun * 2, TimeUnit.MILLISECONDS).until(isInvalidationThreadDone::get);
}

}

0 comments on commit 849900c

Please sign in to comment.