Skip to content

Commit

Permalink
[o11y] Provide OTel-compatible user span representation in trace
Browse files Browse the repository at this point in the history
  • Loading branch information
fhanau committed Nov 27, 2024
1 parent ba54ac7 commit 20b2330
Show file tree
Hide file tree
Showing 9 changed files with 268 additions and 55 deletions.
61 changes: 52 additions & 9 deletions src/workerd/api/trace.c++
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <workerd/util/uuid.h>

#include <capnp/schema.h>
#include <kj/encoding.h>

namespace workerd::api {

Expand Down Expand Up @@ -74,15 +75,11 @@ jsg::V8Ref<v8::Object> getTraceLogMessage(jsg::Lock& js, const tracing::Log& log
}

kj::Array<jsg::Ref<TraceLog>> getTraceLogs(jsg::Lock& js, const Trace& trace) {
auto builder = kj::heapArrayBuilder<jsg::Ref<TraceLog>>(trace.logs.size() + trace.spans.size());
for (auto i: kj::indices(trace.logs)) {
builder.add(jsg::alloc<TraceLog>(js, trace, trace.logs[i]));
}
// Add spans represented as logs to the logs object.
for (auto i: kj::indices(trace.spans)) {
builder.add(jsg::alloc<TraceLog>(js, trace, trace.spans[i]));
}
return builder.finish();
return KJ_MAP(x, trace.logs) -> jsg::Ref<TraceLog> { return jsg::alloc<TraceLog>(js, trace, x); };
}

kj::Array<jsg::Ref<OTelSpan>> getTraceSpanData(const Trace& trace) {
return KJ_MAP(x, trace.spans) -> jsg::Ref<OTelSpan> { return jsg::alloc<OTelSpan>(x); };
}

kj::Array<jsg::Ref<TraceDiagnosticChannelEvent>> getTraceDiagnosticChannelEvents(
Expand Down Expand Up @@ -205,6 +202,7 @@ TraceItem::TraceItem(jsg::Lock& js, const Trace& trace)
dispatchNamespace(trace.dispatchNamespace.map([](auto& ns) { return kj::str(ns); })),
scriptTags(getTraceScriptTags(trace)),
executionModel(enumToStr(trace.executionModel)),
spanData(getTraceSpanData(trace)),
outcome(enumToStr(trace.outcome)),
cpuTime(trace.cpuTime / kj::MILLISECONDS),
wallTime(trace.wallTime / kj::MILLISECONDS),
Expand Down Expand Up @@ -286,6 +284,10 @@ kj::StringPtr TraceItem::getExecutionModel() {
return executionModel;
}

kj::ArrayPtr<jsg::Ref<OTelSpan>> TraceItem::getSpanData() {
return spanData;
}

kj::StringPtr TraceItem::getOutcome() {
return outcome;
}
Expand Down Expand Up @@ -549,6 +551,47 @@ bool TraceItem::HibernatableWebSocketEventInfo::Close::getWasClean() {
return eventInfo.wasClean;
}

kj::StringPtr OTelSpan::getOperation() {
return operation;
}

kj::Date OTelSpan::getStartTime() {
return startTime;
}

kj::StringPtr OTelSpan::getSpanID() {
return spanId;
}
kj::StringPtr OTelSpan::getParentSpanID() {
return parentSpanId;
}

kj::Date OTelSpan::getEndTime() {
return endTime;
}

kj::ArrayPtr<OTelSpanTag> OTelSpan::getTags() {
return tags;
}

OTelSpan::OTelSpan(const CompleteSpan& span)
: operation(kj::str(span.operationName)),
startTime(span.startTime),
endTime(span.endTime),
tags(kj::heapArray<OTelSpanTag>(span.tags.size())) {
// IDs are represented as network-order hex strings.
uint64_t netSpanId = __builtin_bswap64(span.spanId);
uint64_t netParentSpanId = __builtin_bswap64(span.parentSpanId);
spanId = kj::encodeHex(kj::ArrayPtr<byte>((kj::byte*)&netSpanId, sizeof(uint64_t)));
parentSpanId = kj::encodeHex(kj::ArrayPtr<byte>((kj::byte*)&netParentSpanId, sizeof(uint64_t)));
uint32_t i = 0;
for (auto& tag: span.tags) {
tags[i].key = kj::str(tag.key);
tags[i].value = spanTagClone(tag.value);
i++;
}
}

TraceLog::TraceLog(jsg::Lock& js, const Trace& trace, const tracing::Log& log)
: timestamp(getTraceLogTimestamp(log)),
level(getTraceLogLevel(log)),
Expand Down
63 changes: 60 additions & 3 deletions src/workerd/api/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,57 @@ struct ScriptVersion {
}
};

struct OTelSpanTag final: public jsg::Object {
kj::String key;
kj::OneOf<bool, int64_t, double, kj::String> value;
JSG_STRUCT(key, value);
};

// OpenTelemetry-compatible span data exposed as part of the trace. Loosely based on https://github.com/open-telemetry/opentelemetry-js/blob/v1.28.0/experimental/packages/otlp-transformer/src/trace/types.ts#L64
class OTelSpan final: public jsg::Object {
public:
OTelSpan(const CompleteSpan& span);
kj::StringPtr getSpanID();
kj::StringPtr getParentSpanID();
kj::StringPtr getOperation();
kj::ArrayPtr<OTelSpanTag> getTags();
kj::Date getStartTime();
kj::Date getEndTime();

JSG_RESOURCE_TYPE(OTelSpan) {
JSG_LAZY_READONLY_INSTANCE_PROPERTY(spanId, getSpanID);
JSG_LAZY_READONLY_INSTANCE_PROPERTY(parentSpanId, getParentSpanID);
JSG_LAZY_READONLY_INSTANCE_PROPERTY(operation, getOperation);
JSG_LAZY_READONLY_INSTANCE_PROPERTY(tags, getTags);
JSG_LAZY_READONLY_INSTANCE_PROPERTY(startTime, getStartTime);
JSG_LAZY_READONLY_INSTANCE_PROPERTY(endTime, getEndTime);
}

void visitForMemoryInfo(jsg::MemoryTracker& tracker) const {
tracker.trackField("operation", operation);
// TODO(o11y): Do IDs (hex strings with fixed size) and non-string tag values also need to be tracked?
for (const OTelSpanTag& tag: tags) {
tracker.trackField("key", tag.key);
KJ_SWITCH_ONEOF(tag.value) {
KJ_CASE_ONEOF(str, kj::String) {
tracker.trackField("value", str);
}
KJ_CASE_ONEOF_DEFAULT break;
}
}
}

private:
// TODO(o11y): Provide traceId
kj::String spanId;
kj::String parentSpanId;
kj::String operation;
// TODO(o11y): startTimeUnixNano for OTel compat
kj::Date startTime; // => JS Date – ms precision
kj::Date endTime; // Use end time instead of duration here (this makes C++ interop eaier).
kj::Array<OTelSpanTag> tags;
};

class TraceItem final: public jsg::Object {
public:
class FetchEventInfo;
Expand Down Expand Up @@ -102,16 +153,20 @@ class TraceItem final: public jsg::Object {
jsg::Optional<kj::StringPtr> getDispatchNamespace();
jsg::Optional<kj::Array<kj::StringPtr>> getScriptTags();
kj::StringPtr getExecutionModel();
kj::ArrayPtr<jsg::Ref<OTelSpan>> getSpanData();
kj::StringPtr getOutcome();

uint getCpuTime();
uint getWallTime();
bool getTruncated();

JSG_RESOURCE_TYPE(TraceItem) {
JSG_RESOURCE_TYPE(TraceItem, CompatibilityFlags::Reader flags) {
JSG_LAZY_READONLY_INSTANCE_PROPERTY(event, getEvent);
JSG_LAZY_READONLY_INSTANCE_PROPERTY(eventTimestamp, getEventTimestamp);
JSG_LAZY_READONLY_INSTANCE_PROPERTY(logs, getLogs);
if (flags.getTailWorkerUserSpans()) {
JSG_LAZY_READONLY_INSTANCE_PROPERTY(spanData, getSpanData);
}
JSG_LAZY_READONLY_INSTANCE_PROPERTY(exceptions, getExceptions);
JSG_LAZY_READONLY_INSTANCE_PROPERTY(diagnosticsChannelEvents, getDiagnosticChannelEvents);
JSG_LAZY_READONLY_INSTANCE_PROPERTY(scriptName, getScriptName);
Expand All @@ -138,6 +193,7 @@ class TraceItem final: public jsg::Object {
kj::Maybe<kj::String> dispatchNamespace;
jsg::Optional<kj::Array<kj::String>> scriptTags;
kj::String executionModel;
kj::Array<jsg::Ref<OTelSpan>> spanData;
kj::String outcome;
uint cpuTime;
uint wallTime;
Expand Down Expand Up @@ -646,8 +702,9 @@ class TraceCustomEventImpl final: public WorkerInterface::CustomEvent {
api::TraceItem::HibernatableWebSocketEventInfo, \
api::TraceItem::HibernatableWebSocketEventInfo::Message, \
api::TraceItem::HibernatableWebSocketEventInfo::Close, \
api::TraceItem::HibernatableWebSocketEventInfo::Error, api::TraceLog, api::TraceException, \
api::TraceDiagnosticChannelEvent, api::TraceMetrics, api::UnsafeTraceMetrics
api::TraceItem::HibernatableWebSocketEventInfo::Error, api::TraceLog, api::OTelSpan, \
api::OTelSpanTag, api::TraceException, api::TraceDiagnosticChannelEvent, api::TraceMetrics, \
api::UnsafeTraceMetrics
// The list of trace.h types that are added to worker.c++'s JSG_DECLARE_ISOLATE_TYPE

} // namespace workerd::api
1 change: 1 addition & 0 deletions src/workerd/io/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ wd_capnp_library(
deps = [
":outcome_capnp",
":script-version_capnp",
":trace_capnp",
"@capnp-cpp//src/capnp/compat:http-over-capnp_capnp",
],
)
Expand Down
5 changes: 5 additions & 0 deletions src/workerd/io/compatibility-date.capnp
Original file line number Diff line number Diff line change
Expand Up @@ -668,4 +668,9 @@ struct CompatibilityFlags @0x8f8c1b68151b6cef {
# A bug in the original implementation of TransformStream failed to apply backpressure
# correctly. The fix, however, can break existing implementations that don't account
# for the bug so we need to put the fix behind a compat flag.

# Experimental support for exporting user spans to tail worker.
tailWorkerUserSpans @69 :Bool
$compatEnableFlag("tail_worker_user_spans")
$experimental;
}
Loading

0 comments on commit 20b2330

Please sign in to comment.