Skip to content

Commit

Permalink
Request log tracing
Browse files Browse the repository at this point in the history
At the moment, we send trace ids and span ids with our requests. This is
excellent, and provides some coarse grained information. That said, it's
very limited - you can merely know that some number of requests were
triggered by the same underlying operation.

Here, when a trace is originated with a parent span, we keep track of
that parent (called an 'originating' span id) and also send it with
requests. This means that we can use request logs to piece together
accurate service level tracing as well as trace logs.

In other words, if I have a call graph which looks like:

```yml
service a/foo:
   check auth:
     call auth service:
       auth service/check auth:
   do work:
     call work service:
       work service/do operation:
         call auth service:
           auth service/check auth:
```

then I will see that in the trace logs (sampled) but I can derive

```yml
service a/foo:
   auth service/check auth:
   work service/do operation:
     auth service/check auth:
```

from the request logs on all requests. Currently this is possible
when requests are single-threaded, but not when people call services in
parallel.
  • Loading branch information
j-baker committed Jul 30, 2019
1 parent 1664198 commit 72a26f0
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 19 deletions.
23 changes: 21 additions & 2 deletions tracing-api/src/main/java/com/palantir/tracing/api/OpenSpan.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ public abstract class OpenSpan {
@Value.Parameter
public abstract Optional<String> getParentSpanId();

/**
* Returns the identifier of the 'originating' span if one exists.
* @see TraceHttpHeaders
*/
@Value.Parameter
public abstract Optional<String> getOriginatingSpanId();

/**
* Returns a globally unique identifier representing a single span within the call trace.
*/
Expand All @@ -86,8 +93,20 @@ public static Builder builder() {
/**
* Use this factory method to avoid allocate {@link Builder} in hot path.
*/
public static OpenSpan of(String operation, String spanId, SpanType type, Optional<String> parentSpanId) {
return ImmutableOpenSpan.of(operation, getNowInMicroSeconds(), System.nanoTime(), parentSpanId, spanId, type);
public static OpenSpan of(
String operation,
String spanId,
SpanType type,
Optional<String> parentSpanId,
Optional<String> originatingSpanId) {
return ImmutableOpenSpan.of(
operation,
getNowInMicroSeconds(),
System.nanoTime(),
parentSpanId,
originatingSpanId,
spanId,
type);
}

private static long getNowInMicroSeconds() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,16 @@ public interface TraceHttpHeaders {
String PARENT_SPAN_ID = "X-B3-ParentSpanId";
String SPAN_ID = "X-B3-SpanId";
String IS_SAMPLED = "X-B3-Sampled"; // Boolean (either “1” or “0”, can be absent)

/**
* Conceptually, a trace is a stack of spans. In implementation, this is actually many stacks, in many services,
* where a service's stack will typically contain a single parent span from a different service at the bottom,
* and many spans of its own above it.
*
* By communicating this deepest span id with future network calls as an 'originating' span id, this enables
* network-level tracing to be enabled always in a low-fidelity form, with request logs containing enough
* information to reconstruct a request-level trace. For service-internal tracing, the typical trace logs
* (with sampling) are still required.
*/
String ORIGINATING_SPAN_ID = "X-OrigSpanId";
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ public Response intercept(Chain chain) throws IOException {
if (span.getParentSpanId().isPresent()) {
tracedRequest.header(TraceHttpHeaders.PARENT_SPAN_ID, span.getParentSpanId().get());
}
if (span.getOriginatingSpanId().isPresent()) {
tracedRequest.header(TraceHttpHeaders.ORIGINATING_SPAN_ID, span.getOriginatingSpanId().get());
}

Response response;
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,28 +36,22 @@
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.ArgumentCaptor;
import org.mockito.Captor;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
import org.mockito.junit.MockitoJUnitRunner;

@RunWith(MockitoJUnitRunner.class)
public final class OkhttpTraceInterceptorTest {

@Mock
private Interceptor.Chain chain;

@Mock
private SpanObserver observer;

@Captor
private ArgumentCaptor<Request> requestCaptor;

@Captor
private ArgumentCaptor<Span> spanCaptor;
@Mock private Interceptor.Chain chain;
@Mock private SpanObserver observer;
@Captor private ArgumentCaptor<Request> requestCaptor;
@Captor private ArgumentCaptor<Span> spanCaptor;

@Before
public void before() {
MockitoAnnotations.initMocks(this);
Request request = new Request.Builder().url("http://localhost").build();
when(chain.request()).thenReturn(request);
Tracer.subscribe("", observer);
Expand All @@ -79,6 +73,7 @@ public void testPopulatesNewTrace_whenNoTraceIsPresentInGlobalState() throws IOE
Request intercepted = requestCaptor.getValue();
assertThat(intercepted.header(TraceHttpHeaders.SPAN_ID)).isNotNull();
assertThat(intercepted.header(TraceHttpHeaders.TRACE_ID)).isNotNull();
assertThat(intercepted.header(TraceHttpHeaders.ORIGINATING_SPAN_ID)).isNull();
assertThat(intercepted.header(TraceHttpHeaders.PARENT_SPAN_ID)).isNull();
}

Expand All @@ -101,6 +96,7 @@ public void testPopulatesNewTrace_whenParentTraceIsPresent() throws IOException
assertThat(intercepted.header(TraceHttpHeaders.SPAN_ID)).isNotEqualTo(parentState.getSpanId());
assertThat(intercepted.header(TraceHttpHeaders.TRACE_ID)).isEqualTo(traceId);
assertThat(intercepted.header(TraceHttpHeaders.PARENT_SPAN_ID)).isEqualTo(parentState.getSpanId());
assertThat(intercepted.header(TraceHttpHeaders.ORIGINATING_SPAN_ID)).isEqualTo(parentState.getSpanId());
}

@Test
Expand Down
56 changes: 52 additions & 4 deletions tracing/src/main/java/com/palantir/tracing/Trace.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,12 @@ private Trace(String traceId) {
final OpenSpan startSpan(String operation, String parentSpanId, SpanType type) {
checkState(isEmpty(), "Cannot start a span with explicit parent if the current thread's trace is non-empty");
checkArgument(!Strings.isNullOrEmpty(parentSpanId), "parentSpanId must be non-empty");
OpenSpan span = OpenSpan.of(operation, Tracers.randomId(), type, Optional.of(parentSpanId));
OpenSpan span = OpenSpan.of(
operation,
Tracers.randomId(),
type,
Optional.of(parentSpanId),
orElse(getOriginatingSpanId(), Optional.of(parentSpanId)));
push(span);
return span;
}
Expand All @@ -74,15 +79,32 @@ final OpenSpan startSpan(String operation, SpanType type) {
final OpenSpan span;
// Avoid lambda allocation in hot paths
if (prevState.isPresent()) {
span = OpenSpan.of(operation, Tracers.randomId(), type, Optional.of(prevState.get().getSpanId()));
span = OpenSpan.of(
operation,
Tracers.randomId(),
type,
Optional.of(prevState.get().getSpanId()),
orElse(getOriginatingSpanId(), prevState.get().getParentSpanId()));
} else {
span = OpenSpan.of(operation, Tracers.randomId(), type, Optional.empty());
span = OpenSpan.of(
operation,
Tracers.randomId(),
type,
Optional.empty(),
getOriginatingSpanId());
}

push(span);
return span;
}

private static <T> Optional<T> orElse(Optional<T> left, Optional<T> right) {
if (left.isPresent()) {
return left;
}
return right;
}

/**
* Like {@link #startSpan(String, String, SpanType)}, but does not return an {@link OpenSpan}.
*/
Expand Down Expand Up @@ -114,6 +136,8 @@ final String getTraceId() {
return traceId;
}

abstract Optional<String> getOriginatingSpanId();

/** Returns a copy of this Trace which can be independently mutated. */
abstract Trace deepCopy();

Expand Down Expand Up @@ -171,6 +195,14 @@ boolean isObservable() {
return true;
}

@Override
Optional<String> getOriginatingSpanId() {
if (stack.isEmpty()) {
return Optional.empty();
}
return stack.peekLast().getParentSpanId();
}

@Override
Trace deepCopy() {
return new Sampled(new ArrayDeque<>(stack), getTraceId());
Expand All @@ -188,6 +220,7 @@ private static final class Unsampled extends Trace {
* This allows thread trace state to be cleared when all "started" spans have been "removed".
*/
private int numberOfSpans;
private Optional<String> originatingSpanId = Optional.empty();

private Unsampled(int numberOfSpans, String traceId) {
super(traceId);
Expand All @@ -201,7 +234,7 @@ private Unsampled(String traceId) {

@Override
void fastStartSpan(String operation, String parentSpanId, SpanType type) {
fastStartSpan(operation, type);
startSpan(Optional.of(parentSpanId));
}

@Override
Expand All @@ -211,6 +244,13 @@ void fastStartSpan(String operation, SpanType type) {

@Override
protected void push(OpenSpan span) {
startSpan(span.getParentSpanId());
}

private void startSpan(Optional<String> parentSpanId) {
if (numberOfSpans == 0) {
originatingSpanId = parentSpanId;
}
numberOfSpans++;
}

Expand All @@ -225,6 +265,9 @@ Optional<OpenSpan> pop() {
if (numberOfSpans > 0) {
numberOfSpans--;
}
if (numberOfSpans == 0) {
originatingSpanId = Optional.empty();
}
return Optional.empty();
}

Expand All @@ -239,6 +282,11 @@ boolean isObservable() {
return false;
}

@Override
Optional<String> getOriginatingSpanId() {
return originatingSpanId;
}

@Override
Trace deepCopy() {
return new Unsampled(numberOfSpans, getTraceId());
Expand Down
35 changes: 35 additions & 0 deletions tracing/src/test/java/com/palantir/tracing/TraceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.junit.Test;

public final class TraceTest {
private static final String ORIGINATING_SPAN_ID = "originating span id";

@Test
public void constructTrace_emptyTraceId() {
Expand All @@ -40,4 +41,38 @@ public void testToString() {
.contains(span.getOperation())
.contains(span.getSpanId());
}

@Test
public void testOriginatingTraceId_slow_sampled() {
Trace trace = Trace.of(true, "traceId");
OpenSpan originating = trace.startSpan("1", ORIGINATING_SPAN_ID, SpanType.LOCAL);
assertThat(originating.getOriginatingSpanId()).contains(ORIGINATING_SPAN_ID);
OpenSpan span = trace.startSpan("2", SpanType.LOCAL);
assertThat(span.getOriginatingSpanId()).contains(ORIGINATING_SPAN_ID);
}

@Test
public void testOriginatingTraceId_fast_sampled() {
Trace trace = Trace.of(true, "traceId");
trace.fastStartSpan("1", ORIGINATING_SPAN_ID, SpanType.LOCAL);
OpenSpan span = trace.startSpan("2", SpanType.LOCAL);
assertThat(span.getOriginatingSpanId()).contains(ORIGINATING_SPAN_ID);
}

@Test
public void testOriginatingTraceId_slow_unsampled() {
Trace trace = Trace.of(false, "traceId");
OpenSpan originating = trace.startSpan("1", ORIGINATING_SPAN_ID, SpanType.LOCAL);
assertThat(originating.getOriginatingSpanId()).contains(ORIGINATING_SPAN_ID);
OpenSpan span = trace.startSpan("2", SpanType.LOCAL);
assertThat(span.getOriginatingSpanId()).contains(ORIGINATING_SPAN_ID);
}

@Test
public void testOriginatingTraceId_fast_unsampled() {
Trace trace = Trace.of(false, "traceId");
trace.fastStartSpan("1", ORIGINATING_SPAN_ID, SpanType.LOCAL);
OpenSpan span = trace.startSpan("2", SpanType.LOCAL);
assertThat(span.getOriginatingSpanId()).contains(ORIGINATING_SPAN_ID);
}
}

0 comments on commit 72a26f0

Please sign in to comment.