Skip to content

Commit

Permalink
Recover from Service-Factory deadlock
Browse files Browse the repository at this point in the history
This assumes that factories are stateless.

Signed-off-by: Hannes Wellmann <wellmann.hannes1@gmx.net>
  • Loading branch information
HannesWell committed Oct 27, 2022
1 parent 2dbea48 commit fa69761
Show file tree
Hide file tree
Showing 11 changed files with 70 additions and 85 deletions.
1 change: 1 addition & 0 deletions bundles/org.eclipse.equinox.ds.tests/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
/scr_test
/OSGI-INF/org.eclipse.*.xml

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

package org.eclipse.equinox.ds.tests;

import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -60,13 +60,6 @@ public T getService(Bundle bundle, ServiceRegistration<T> registration) {
@Override
public void ungetService(Bundle bundle, ServiceRegistration<T> registration, T service) {
}

}

class Service1 {
}

class Service2 {
}

CountDownLatch l1 = new CountDownLatch(1);
Expand All @@ -75,13 +68,13 @@ class Service2 {

ServiceFactory<Service1> factory1 = new SimpleServiceFactory<>((bundle, registration) -> {
countDownAndAwaitOther(l1, l2);
ctx.getService(ctx.getServiceReference(Service2.class));
return new Service1();
Service2 s = ctx.getService(ctx.getServiceReference(Service2.class));
return new Service1(s);
});
ServiceFactory<Service2> factory2 = new SimpleServiceFactory<>((bundle, registration) -> {
countDownAndAwaitOther(l2, l1);
ctx.getService(ctx.getServiceReference(Service1.class));
return new Service2();
Service1 s = ctx.getService(ctx.getServiceReference(Service1.class));
return new Service2(s);
});
ctx.registerService(Service1.class, factory1, null);
ctx.registerService(Service2.class, factory2, null);
Expand All @@ -92,16 +85,31 @@ class Service2 {
Future<Service2> service2 = executor.submit(() -> ctx.getService(ctx.getServiceReference(Service2.class)));
Service1 s1 = service1.get(5, TimeUnit.SECONDS); // times out in case of dead-lock
Service2 s2 = service2.get(5, TimeUnit.SECONDS); // times out in case of dead-lock
if (s1 != null && s2 != null) {
fail("At least one dead-locked component should be null");
}
// TODO: when we have a strategy to re-cover from the deadlock, check that both
// services are not null.
assertNotNull(s1);
assertNotNull(s2);
assertFalse(s1.s2 == null && s2.s1 == null);
assertTrue(s1.s2 == null || s2.s1 == null);
} finally {
executor.shutdown();
}
}

class Service1 {
final Service2 s2;

Service1(Service2 s2) {
this.s2 = s2;
}
}

class Service2 {
final Service1 s1;

Service2(Service1 s1) {
this.s1 = s1;
}
}

void countDownAndAwaitOther(CountDownLatch l1, CountDownLatch l2) {
l1.countDown();
try {
Expand All @@ -122,16 +130,12 @@ public void testLateBindingInSameBundleDeadLock() throws Exception {
// component might or might not be null depending on in which thread the
// DeadLock was detected and the SCR-ServiceFactory therefore threw an
// Exception.
if (c1 != null) {
// If the main-thread did not detect the dead-lock and therefore Component1 is
// successfully created, the multi-ref from its Comp2 to Comp3 must be empty and
// its Comp4 must have its Comp5 successfully created.
assertTrue(c1.a.c3s.isEmpty());
assertNotNull(c1.b.b);
}
// TODO: when we have a strategy to re-cover from the deadlock, check that c1 is
// never null and that c1.a.c3s is not empty and its only element has a Comp2
// and Comp5
assertNotNull(c1);
assertFalse(c1.a.c3s.isEmpty());
assertNotNull(c1.b.b);
Component3 c3 = c1.a.c3s.get(0);
assertNotNull(c3.a);
assertNotNull(c3.b);
} finally {
executor.shutdown();
// ctx.ungetService(reference);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,9 @@ public S run() {
}
});
} catch (Throwable t) {
if (t instanceof ServiceException && ((ServiceException) t).getType() == ServiceUse.DEADLOCK) {
throw t;
}
if (debug.DEBUG_SERVICES) {
Debug.println(factory + ".getService() exception: " + t.getMessage()); //$NON-NLS-1$
Debug.printStackTrace(t);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,15 @@

package org.eclipse.osgi.internal.serviceregistry;

import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Dictionary;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.locks.ReentrantLock;
import org.eclipse.osgi.internal.debug.Debug;
Expand Down Expand Up @@ -493,6 +498,8 @@ public Bundle getRegisteringBundle() {
return bundle;
}

private static final Random DEADLOCK_RETRY_DELAY = new Random();

/**
* Get a service object for the using BundleContext.
*
Expand All @@ -514,6 +521,7 @@ S getService(BundleContextImpl user, ServiceConsumer consumer) {
+ ")"); //$NON-NLS-1$
}
/* Use a while loop to support retry if a call to a ServiceFactory fails */
int deadLockRetry = 0;
while (true) {
final ServiceUse<S> use;
final boolean added;
Expand Down Expand Up @@ -579,10 +587,37 @@ S getService(BundleContextImpl user, ServiceConsumer consumer) {
}
}
return serviceObject;
} catch (ServiceException e) {
if ((e.getType() != ServiceUse.DEADLOCK || !isFirstCall()) && deadLockRetry < 10) {
throw e;
}
if (registry.debug.DEBUG_SERVICES) {
Debug.println("[" + Thread.currentThread().getName() + "] getServiceRetryAfterDeadlock[" //$NON-NLS-1$ //$NON-NLS-2$
+ user.getBundleImpl() + "](" + this //$NON-NLS-1$
+ ")"); //$NON-NLS-1$
}
try {
// Try again and again with some random delay to change the timings
Thread.sleep(DEADLOCK_RETRY_DELAY.nextInt(1000) * deadLockRetry++);
} catch (InterruptedException e1) { // ignore
Thread.currentThread().interrupt();
}
}
}
}

private static boolean isFirstCall() {
ThreadMXBean threadMXBean = ManagementFactory.getThreadMXBean();
ThreadInfo threadInfo = threadMXBean.getThreadInfo(Thread.currentThread().getId(), Integer.MAX_VALUE);
StackTraceElement[] stackTrace = threadInfo.getStackTrace();
int callerFrameIndex = 4;
return Arrays.stream(stackTrace).skip(callerFrameIndex + 1L)
.noneMatch(f -> isSameMethod(f, stackTrace[callerFrameIndex]));
}

private static boolean isSameMethod(StackTraceElement f1, StackTraceElement f2) {
return f1.getClassName().equals(f2.getClassName()) && f1.getMethodName().equals(f2.getMethodName());
}

/**
* Create a new ServiceObjects for the requesting bundle.
Expand Down

0 comments on commit fa69761

Please sign in to comment.