-
Notifications
You must be signed in to change notification settings - Fork 869
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enable strict context check and fix some context issues. #2637
Changes from all commits
813fd73
8a6fc39
55aa488
b97c671
be61373
b2232ce
ecaa951
e49eef7
9d5adcd
be8bfe4
33a9fe6
2d32b4e
dd99146
b4b89ef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -128,10 +128,6 @@ private void populateGenericAttributes(Span span, ExecutionAttributes attributes | |
@Override | ||
public void afterExecution( | ||
Context.AfterExecution context, ExecutionAttributes executionAttributes) { | ||
Scope scope = executionAttributes.getAttribute(SCOPE_ATTRIBUTE); | ||
if (scope != null) { | ||
scope.close(); | ||
} | ||
io.opentelemetry.context.Context otelContext = getContext(executionAttributes); | ||
clearAttributes(executionAttributes); | ||
Span span = Span.fromContext(otelContext); | ||
|
@@ -168,6 +164,10 @@ public void onExecutionFailure( | |
} | ||
|
||
private void clearAttributes(ExecutionAttributes executionAttributes) { | ||
Scope scope = executionAttributes.getAttribute(SCOPE_ATTRIBUTE); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 😱 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 💯 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't get it. Both why this change is significant and your reaction to it :D There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a real context leak with big damage - if sync AWS SDK request failed and a subsequent client request of some form was made, it would parent to the AWS SDK request (well it'd be suppressed as a result usually)! Hooray for strict context checking |
||
if (scope != null) { | ||
scope.close(); | ||
} | ||
executionAttributes.putAttribute(CONTEXT_ATTRIBUTE, null); | ||
executionAttributes.putAttribute(AWS_SDK_REQUEST_ATTRIBUTE, null); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,5 +18,3 @@ dependencies { | |
|
||
latestDepTestLibrary group: 'com.squareup.okhttp', name: 'okhttp', version: '[2.6,3)' | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,3 +20,8 @@ dependencies { | |
testImplementation group: 'com.sun.activation', name: 'jakarta.activation', version: '1.2.2' | ||
} | ||
} | ||
|
||
tasks.withType(Test) { | ||
// https://github.com/open-telemetry/opentelemetry-java-instrumentation/issues/2648 | ||
jvmArgs "-Dio.opentelemetry.javaagent.shaded.io.opentelemetry.context.enableStrictContext=false" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this mean that our ratpack instrumentation actually leaks context, or is it an artifact of something else? If we are really leaking contexts, should we create issues to fix all of these, so we make sure to circle back and fix them? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, filed an issue |
||
} |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,20 +27,18 @@ public void consumeMessageBefore(ConsumeMessageContext context) { | |
if (context == null || context.getMsgList() == null || context.getMsgList().isEmpty()) { | ||
return; | ||
} | ||
Context traceContext = tracer.startSpan(Context.current(), context.getMsgList()); | ||
ContextAndScope contextAndScope = new ContextAndScope(traceContext, traceContext.makeCurrent()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, thanks |
||
context.setMqTraceContext(contextAndScope); | ||
Context otelContext = tracer.startSpan(Context.current(), context.getMsgList()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Like similar netty libraries, there's no thread guarantee for before and end. Since there's no downstream instrumentation it's not a big deal anyways There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about e.g. user-provided message hooks that want to augment existing span? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We will need to provide something like I hope such hooks will be able to use our instrumentation-api instead of that though. |
||
context.setMqTraceContext(otelContext); | ||
} | ||
|
||
@Override | ||
public void consumeMessageAfter(ConsumeMessageContext context) { | ||
if (context == null || context.getMsgList() == null || context.getMsgList().isEmpty()) { | ||
return; | ||
} | ||
if (context.getMqTraceContext() instanceof ContextAndScope) { | ||
ContextAndScope contextAndScope = (ContextAndScope) context.getMqTraceContext(); | ||
contextAndScope.closeScope(); | ||
tracer.end(contextAndScope.getContext()); | ||
if (context.getMqTraceContext() instanceof Context) { | ||
Context otelContext = (Context) context.getMqTraceContext(); | ||
tracer.end(otelContext); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,43 @@ | |
/** Utils for concurrent instrumentations. */ | ||
public class ExecutorInstrumentationUtils { | ||
|
||
private static final ClassValue<Boolean> NOT_INSTRUMENTED_RUNNABLE_ENCLOSING_CLASS = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice, i'm trying to remember to look out for opportunities to use also 😄 |
||
new ClassValue<Boolean>() { | ||
@Override | ||
protected Boolean computeValue(Class<?> enclosingClass) { | ||
// Avoid context leak on jetty. Runnable submitted from SelectChannelEndPoint is used to | ||
// process a new request which should not have context from them current request. | ||
if (enclosingClass.getName().equals("org.eclipse.jetty.io.nio.SelectChannelEndPoint")) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just an idea, but maybe those class names should be specified by instrumentation modules? Each instrumented library may bring its own set of excluded classes (also see There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah! That's the approach Datadog has implemented, it would be nice for us to follow it at some point. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice - I originally thought about adding some |
||
return true; | ||
} | ||
|
||
// Don't instrument the executor's own runnables. These runnables may never return until | ||
// netty shuts down. | ||
if (enclosingClass | ||
.getName() | ||
.equals("io.netty.util.concurrent.SingleThreadEventExecutor")) { | ||
return true; | ||
} | ||
|
||
// OkHttp task runner is a lazily-initialized shared pool of continuosly running threads | ||
// similar to an event loop. The submitted tasks themselves should already be instrumented | ||
// to | ||
// allow async propagation. | ||
if (enclosingClass.getName().equals("okhttp3.internal.concurrent.TaskRunner")) { | ||
return true; | ||
} | ||
|
||
// OkHttp connection pool lazily initializes a long running task to detect expired | ||
// connections | ||
// and should not itself be instrumented. | ||
if (enclosingClass.getName().equals("com.squareup.okhttp.ConnectionPool")) { | ||
return true; | ||
} | ||
|
||
return false; | ||
} | ||
}; | ||
|
||
/** | ||
* Checks if given task should get state attached. | ||
* | ||
|
@@ -28,22 +65,37 @@ public static boolean shouldAttachStateToTask(Object task) { | |
Class<?> taskClass = task.getClass(); | ||
Class<?> enclosingClass = taskClass.getEnclosingClass(); | ||
|
||
// not much point in propagating root context | ||
// plus it causes failures under otel.javaagent.testing.fail-on-context-leak=true | ||
return Context.current() != Context.root() | ||
// TODO Workaround for | ||
// https://github.com/open-telemetry/opentelemetry-java-instrumentation/issues/787 | ||
&& !taskClass.getName().equals("org.apache.tomcat.util.net.NioEndpoint$SocketProcessor") | ||
// Avoid context leak on jetty. Runnable submitted from SelectChannelEndPoint is used to | ||
// process a new request which should not have context from them current request. | ||
&& (enclosingClass == null | ||
|| !enclosingClass.getName().equals("org.eclipse.jetty.io.nio.SelectChannelEndPoint")) | ||
// Don't instrument the executor's own runnables. These runnables may never return until | ||
// netty shuts down. | ||
&& (enclosingClass == null | ||
|| !enclosingClass | ||
.getName() | ||
.equals("io.netty.util.concurrent.SingleThreadEventExecutor")); | ||
if (Context.current() == Context.root()) { | ||
// not much point in propagating root context | ||
// plus it causes failures under otel.javaagent.testing.fail-on-context-leak=true | ||
return false; | ||
} | ||
|
||
// ForkJoinPool threads are initialized lazily and continue to handle tasks similar to an event | ||
// loop. They should not have context propagated to the base of the thread, tasks themselves | ||
// will have it through other means. | ||
if (taskClass.getName().equals("java.util.concurrent.ForkJoinWorkerThread")) { | ||
return false; | ||
} | ||
|
||
// ThreadPoolExecutor worker threads may be initialized lazily and manage interruption of other | ||
// threads. The actual tasks being run on those threads will propagate context but we should not | ||
// propagate onto this management thread. | ||
if (taskClass.getName().equals("java.util.concurrent.ThreadPoolExecutor$Worker")) { | ||
return false; | ||
} | ||
|
||
// TODO Workaround for | ||
// https://github.com/open-telemetry/opentelemetry-java-instrumentation/issues/787 | ||
if (taskClass.getName().equals("org.apache.tomcat.util.net.NioEndpoint$SocketProcessor")) { | ||
return false; | ||
} | ||
|
||
if (enclosingClass != null && NOT_INSTRUMENTED_RUNNABLE_ENCLOSING_CLASS.get(enclosingClass)) { | ||
return false; | ||
} | ||
|
||
return true; | ||
} | ||
|
||
/** | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nice to see where this fails