From 471f82db180955e59d20e712b2bf6b8c8723d878 Mon Sep 17 00:00:00 2001 From: Tigran Najaryan Date: Fri, 11 Oct 2019 10:07:48 -0400 Subject: [PATCH 01/11] Add OTLP Trace Data Format specification This is a continuation of OTLP RFC proposal https://github.com/open-telemetry/oteps/pull/35 This change defines the data format used by Span and Resource messages. The data format is a result of research of prior art (primarily OpenCensus and Jaeger), as well as experimentation and benchmarking done as part of OTLP RFC proposal. Go benchmark source code is available at https://github.com/tigrannajaryan/exp-otelproto (use `make benchmark-encoding` target). Benchmarking shows that depending on the payload composition this data format is about 4x-5x faster in encoding and 2x-3x faster in decoding equivalent data compared to OpenCensus data format (all benchmarks in Go). Notable differences from OpenCensus: - Attribute key/value pairs are represented as a list rather than as a map. This results in significant performance gains and at the same time changes the semantic of attributes because now it is possible to have multiple attributes with the same key. This is also in-line with Jaeger's tags representation. - Removed unnecessary wrappers such as google.protobuf.Timestamp which resulted in significant performance improvements for certain payload compositions (e.g. lots of TimedEvents). - Resource labels use the same data type as Span attributes which now allows to have labels with other data types (OpenCensus only allowed strings). --- text/0000-otlp-trace-data-format.md | 361 ++++++++++++++++++++++++++++ 1 file changed, 361 insertions(+) create mode 100644 text/0000-otlp-trace-data-format.md diff --git a/text/0000-otlp-trace-data-format.md b/text/0000-otlp-trace-data-format.md new file mode 100644 index 000000000..142df0dcc --- /dev/null +++ b/text/0000-otlp-trace-data-format.md @@ -0,0 +1,361 @@ +# OTLP Trace Data Format + +_Author: Tigran Najaryan, Splunk_ + +**Status:** `proposed` + +OTLP Trace Data Format specification describes the structure of the trace data that is transported by OpenTelemetry Protocol (RFC0035). + +## Motivation + +This document is a continuation of OpenTelemetry Protocol RFC0035 and is necessary part of OTLP specification. + +## Explanation + +OTLP Trace Data Format is primarily inherited from OpenCensus protocol. Several changes are introduced with the goal of more efficient serialization. Notable differences from OpenCensus protocol are: + +1. Removed `Node` as a concept. +2. Extended `Resource` to better describe the source of the telemetry data. +3. Replaced attribute maps by lists of key/value pairs. +4. Eliminated unnecessary additional nesting in various values. + +Changes 1-2 are conceptual, changes 3-4 improve performance. + +## Internal details + +This section specifies data format in Protocol Buffers. + +### Resource + +``` +// Resource information. This describes the source of telemetry data. +message Resource { + // Set of labels that describe the resource. See OpenTelemetry specification + // semantic conventions for standardized label names: + // https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/data-semantic-conventions.md + + repeated AttributeKeyValue labels = 3; + int32 dropped_labels_count = 11; +} +``` + +### Span + +``` +// A span represents a single operation within a trace. Spans can be +// nested to form a trace tree. Spans may also be linked to other spans +// from the same or different trace. And form graphs. Often, a trace +// contains a root span that describes the end-to-end latency, and one +// or more subspans for its sub-operations. A trace can also contain +// multiple root spans, or none at all. Spans do not need to be +// contiguous - there may be gaps or overlaps between spans in a trace. +// +// The next id is 17. +message Span { + // A unique identifier for a trace. All spans from the same trace share + // the same `trace_id`. The ID is a 16-byte array. An ID with all zeroes + // is considered invalid. + // + // This field is semantically required. Receiver should generate new + // random trace_id if empty or invalid trace_id was received. + // + // This field is required. + bytes trace_id = 1; + + // A unique identifier for a span within a trace, assigned when the span + // is created. The ID is an 8-byte array. An ID with all zeroes is considered + // invalid. + // + // This field is semantically required. Receiver should generate new + // random span_id if empty or invalid span_id was received. + // + // This field is required. + bytes span_id = 2; + + // This field conveys information about request position in multiple distributed tracing graphs. + // It is a list of Tracestate.Entry with a maximum of 32 members in the list. + // + // See the https://github.com/w3c/distributed-tracing for more details about this field. + message Tracestate { + message Entry { + // The key must begin with a lowercase letter, and can only contain + // lowercase letters 'a'-'z', digits '0'-'9', underscores '_', dashes + // '-', asterisks '*', and forward slashes '/'. + string key = 1; + + // The value is opaque string up to 256 characters printable ASCII + // RFC0020 characters (i.e., the range 0x20 to 0x7E) except ',' and '='. + // Note that this also excludes tabs, newlines, carriage returns, etc. + string value = 2; + } + + // A list of entries that represent the Tracestate. + repeated Entry entries = 1; + } + + // The Tracestate on the span. + Tracestate tracestate = 3; + + // The `span_id` of this span's parent span. If this is a root span, then this + // field must be empty. The ID is an 8-byte array. + bytes parent_span_id = 4; + + // An optional resource that is associated with this span. If not set, this span + // should be part of a ResourceSpan that does include the resource information, unless resource + // information is unknown. + Resource resource = 5; + + // A description of the span's operation. + // + // For example, the name can be a qualified method name or a file name + // and a line number where the operation is called. A best practice is to use + // the same display name at the same call point in an application. + // This makes it easier to correlate spans in different traces. + // + // This field is semantically required to be set to non-empty string. + // When null or empty string received - receiver may use string "name" + // as a replacement. There might be smarted algorithms implemented by + // receiver to fix the empty span name. + // + // This field is required. + string name = 6; + + // Type of span. Can be used to specify additional relationships between spans + // in addition to a parent/child relationship. + enum SpanKind { + // Unspecified. Do NOT use as default. + // Implementations MAY assume SpanKind to be INTERNAL when receiving UNSPECIFIED. + SPAN_KIND_UNSPECIFIED = 0; + + // Indicates that the span is used internally. Default value. + INTERNAL = 1; + + // Indicates that the span covers server-side handling of an RPC or other + // remote network request. + SERVER = 2; + + // Indicates that the span covers the client-side wrapper around an RPC or + // other remote request. + CLIENT = 3; + + // Indicates that the span describes producer sending a message to a broker. + // Unlike client and server, there is no direct critical path latency relationship + // between producer and consumer spans. + PRODUCER = 4; + + // Indicates that the span describes consumer receiving a message from a broker. + // Unlike client and server, there is no direct critical path latency relationship + // between producer and consumer spans. + CONSUMER = 5; + } + + // Distinguishes between spans generated in a particular context. For example, + // two spans with the same name may be distinguished using `CLIENT` (caller) + // and `SERVER` (callee) to identify queueing latency associated with the span. + SpanKind kind = 7; + + // The start time of the span. On the client side, this is the time kept by + // the local machine where the span execution starts. On the server side, this + // is the time when the server's application handler starts running. + // + // This field is semantically required. When not set on receive - + // receiver should set it to the value of end_time field if it was + // set. Or to the current time if neither was set. It is important to + // keep end_time > start_time for consistency. + // + // This field is required. + int64 start_time_unixnano = 8; + + // The end time of the span. On the client side, this is the time kept by + // the local machine where the span execution ends. On the server side, this + // is the time when the server application handler stops running. + // + // This field is semantically required. When not set on receive - + // receiver should set it to start_time value. It is important to + // keep end_time > start_time for consistency. + // + // This field is required. + int64 end_time_unixnano = 9; + + // The set of attributes. The value can be a string, an integer, a double + // or the Boolean values `true` or `false`. Note, global attributes like + // server name can be set as tags using resource API. Examples of attributes: + // + // "/http/user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" + // "/http/server_latency": 300 + // "abc.com/myattribute": true + // "abc.com/score": 10.239 + repeated AttributeKeyValue attributes = 10; + + // The number of attributes that were discarded. Attributes can be discarded + // because their keys are too long or because there are too many attributes. + // If this value is 0, then no attributes were dropped. + int32 dropped_attributes_count = 11; + + // A time-stamped event in the Span. + message TimedEvent { + // The time the event occurred. + int64 time_unixnano = 1; + + // A user-supplied name describing the event. + string name = 2; + + // A set of attributes on the event. + repeated AttributeKeyValue attributes = 3; + + int32 dropped_attributes_count = 4; + } + + // A collection of `TimedEvent`s. A `TimedEvent` is a time-stamped annotation + // on the span, consisting of either user-supplied key-value pairs, or + // details of a message sent/received between Spans. + message TimedEvents { + // A collection of `TimedEvent`s. + repeated TimedEvent timed_event = 1; + + // The number of dropped timed events. If the value is 0, then no events were dropped. + int32 dropped_timed_events_count = 2; + } + + // The included timed events. + TimedEvents timed_events = 12; + + // A pointer from the current span to another span in the same trace or in a + // different trace. For example, this can be used in batching operations, + // where a single batch handler processes multiple requests from different + // traces or when the handler receives a request from a different project. + message Link { + // A unique identifier of a trace that this linked span is part of. The ID is a + // 16-byte array. + bytes trace_id = 1; + + // A unique identifier for the linked span. The ID is an 8-byte array. + bytes span_id = 2; + + // The Tracestate associated with the link. + Tracestate tracestate = 3; + + // A set of attributes on the link. + repeated AttributeKeyValue attributes = 4; + + int32 dropped_attributes_count = 5; + } + + // A collection of links, which are references from this span to a span + // in the same or different trace. + message Links { + // A collection of links. + repeated Link link = 1; + + // The number of dropped links after the maximum size was enforced. If + // this value is 0, then no links were dropped. + int32 dropped_links_count = 2; + } + + // The included links. + Links links = 13; + + // An optional final status for this span. Semantically when Status + // wasn't set it is means span ended without errors and assume + // Status.Ok (code = 0). + Status status = 14; + + // An optional number of child spans that were generated while this span + // was active. If set, allows an implementation to detect missing child spans. + google.protobuf.UInt32Value child_span_count = 15; +} + +// The `Status` type defines a logical error model that is suitable for different +// programming environments, including REST APIs and RPC APIs. This proto's fields +// are a subset of those of +// [google.rpc.Status](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto), +// which is used by [gRPC](https://github.com/grpc). +message Status { + // The status code. This is optional field. It is safe to assume 0 (OK) + // when not set. + int32 code = 1; + + // A developer-facing error message, which should be in English. + string message = 2; +} +``` + +### AttributeKeyValue + +``` +message AttributeKeyValue { + enum ValueType { + STRING = 0; + BOOL = 1; + INT64 = 2; + FLOAT64 = 3; + BINARY = 4; + }; + + string key = 1; + // The type of the value. + ValueType type = 2; + // A string up to 256 bytes long. + string string_value = 3; + // A 64-bit signed integer. + int64 int_value = 4; + // A Boolean value represented by `true` or `false`. + bool bool_value = 5; + // A double value. + double double_value = 6; + // A binary value of bytes. + bytes binary_value = 7; +} + +``` + +## Trade-offs and mitigations + +Timestamps were changed from google.protobuf.Timestamp to a int64 representation in Unix epoch nanoseconds. This change reduces the type-safety but benchmarks show that for small spans there is 15-20% encoding/decoding CPU speed gain. This is the right trade-off to make because encoding/decoding CPU consumption tends to dominate many workloads (particularly in OpenTelemetry Service). + +## Prior art and alternatives + +OpenCensus and Jaeger protocol buffer data schemas were used as the inspiration for this specification. OpenCensus was the starting point, Jaeger provided performance improvement ideas. + +## Open questions + +A follow up RFC is required to define the data format for metrics. + +## Appendix A - Benchmarking + +The following shows [benchmarking of encoding/decoding in Go](https://github.com/tigrannajaryan/exp-otelproto/) using various schemas. + +Legend: +- OpenCensus - OpenCensus protocol schema. +- OTLP/AttrMap - OTLP schema using map for attributes. +- OTLP/AttrList - OTLP schema using list of key/values for attributes and with reduced nesting for values. +- OTLP/AttrList/TimeWrapped - Same as OTLP/AttrList, except using google.protobuf.Timestamp instead of int64 for timestamps. + +Suffixes: +- Attributes - a span with 3 attributes. +- TimedEvent - a span with 3 timed events. + +``` +BenchmarkEncode/OpenCensus/Attributes-8 10 605614915 ns/op +BenchmarkEncode/OpenCensus/TimedEvent-8 10 1025026687 ns/op +BenchmarkEncode/OTLP/AttrAsMap/Attributes-8 10 519539723 ns/op +BenchmarkEncode/OTLP/AttrAsMap/TimedEvent-8 10 841371163 ns/op +BenchmarkEncode/OTLP/AttrAsList/Attributes-8 50 128790429 ns/op +BenchmarkEncode/OTLP/AttrAsList/TimedEvent-8 50 175874878 ns/op +BenchmarkEncode/OTLP/AttrAsList/TimeWrapped/Attributes-8 50 153184772 ns/op +BenchmarkEncode/OTLP/AttrAsList/TimeWrapped/TimedEvent-8 30 232705272 ns/op +BenchmarkDecode/OpenCensus/Attributes-8 10 644103382 ns/op +BenchmarkDecode/OpenCensus/TimedEvent-8 5 1132059855 ns/op +BenchmarkDecode/OTLP/AttrAsMap/Attributes-8 10 529679038 ns/op +BenchmarkDecode/OTLP/AttrAsMap/TimedEvent-8 10 867364162 ns/op +BenchmarkDecode/OTLP/AttrAsList/Attributes-8 50 228834160 ns/op +BenchmarkDecode/OTLP/AttrAsList/TimedEvent-8 20 321160309 ns/op +BenchmarkDecode/OTLP/AttrAsList/TimeWrapped/Attributes-8 30 277597851 ns/op +BenchmarkDecode/OTLP/AttrAsList/TimeWrapped/TimedEvent-8 20 443386880 ns/op +``` + +The benchmark encodes/decodes 1000 batches of 100 spans, each span containing 3 attributes or 3 timed events. The total uncompressed, encoded size of each batch is around 20KBytes. + +The results show OTLP/AttrList is 5-6 times faster than OpenCensus in encoding and about 3 times faster in decoding. + +Using google.protobuf.Timestamp instead of int64-encoded unix timestamp results in 1.18-1.32 times slower encoding and 1.21-1.38 times slower decoding (depending on what the span contains). From 09a4a3fa370c9e05a02c0a9347bd9c516efa12c7 Mon Sep 17 00:00:00 2001 From: Tigran Najaryan Date: Fri, 18 Oct 2019 13:39:12 -0400 Subject: [PATCH 02/11] Address review comments --- ...rmat.md => 0059-otlp-trace-data-format.md} | 172 ++++++++---------- 1 file changed, 80 insertions(+), 92 deletions(-) rename text/{0000-otlp-trace-data-format.md => 0059-otlp-trace-data-format.md} (67%) diff --git a/text/0000-otlp-trace-data-format.md b/text/0059-otlp-trace-data-format.md similarity index 67% rename from text/0000-otlp-trace-data-format.md rename to text/0059-otlp-trace-data-format.md index 142df0dcc..617a05665 100644 --- a/text/0000-otlp-trace-data-format.md +++ b/text/0059-otlp-trace-data-format.md @@ -30,29 +30,31 @@ This section specifies data format in Protocol Buffers. ``` // Resource information. This describes the source of telemetry data. message Resource { - // Set of labels that describe the resource. See OpenTelemetry specification + // labels is a list of attributes that describe the resource. See OpenTelemetry specification // semantic conventions for standardized label names: // https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/data-semantic-conventions.md + repeated AttributeKeyValue labels = 1; - repeated AttributeKeyValue labels = 3; - int32 dropped_labels_count = 11; + // dropped_labels_count is the number of dropped labels. If the value is 0, then + // no labels were dropped. + int32 dropped_labels_count = 2; } ``` ### Span ``` -// A span represents a single operation within a trace. Spans can be +// Span represents a single operation within a trace. Spans can be // nested to form a trace tree. Spans may also be linked to other spans -// from the same or different trace. And form graphs. Often, a trace +// from the same or different trace and form graphs. Often, a trace // contains a root span that describes the end-to-end latency, and one // or more subspans for its sub-operations. A trace can also contain // multiple root spans, or none at all. Spans do not need to be // contiguous - there may be gaps or overlaps between spans in a trace. // -// The next id is 17. +// The next field id is 18. message Span { - // A unique identifier for a trace. All spans from the same trace share + // trace_id is the unique identifier of a trace. All spans from the same trace share // the same `trace_id`. The ID is a 16-byte array. An ID with all zeroes // is considered invalid. // @@ -62,7 +64,7 @@ message Span { // This field is required. bytes trace_id = 1; - // A unique identifier for a span within a trace, assigned when the span + // span_id is a unique identifier for a span within a trace, assigned when the span // is created. The ID is an 8-byte array. An ID with all zeroes is considered // invalid. // @@ -72,40 +74,35 @@ message Span { // This field is required. bytes span_id = 2; - // This field conveys information about request position in multiple distributed tracing graphs. + // TraceEntry is the entry that is repeated in tracestate field (see below). + message TraceEntry { + // key must begin with a lowercase letter, and can only contain + // lowercase letters 'a'-'z', digits '0'-'9', underscores '_', dashes + // '-', asterisks '*', and forward slashes '/'. + string key = 1; + + // value is opaque string up to 256 characters printable ASCII + // RFC0020 characters (i.e., the range 0x20 to 0x7E) except ',' and '='. + // Note that this also excludes tabs, newlines, carriage returns, etc. + string value = 2; + } + + // TraceState conveys information about request position in multiple distributed tracing graphs. // It is a list of Tracestate.Entry with a maximum of 32 members in the list. // // See the https://github.com/w3c/distributed-tracing for more details about this field. - message Tracestate { - message Entry { - // The key must begin with a lowercase letter, and can only contain - // lowercase letters 'a'-'z', digits '0'-'9', underscores '_', dashes - // '-', asterisks '*', and forward slashes '/'. - string key = 1; - - // The value is opaque string up to 256 characters printable ASCII - // RFC0020 characters (i.e., the range 0x20 to 0x7E) except ',' and '='. - // Note that this also excludes tabs, newlines, carriage returns, etc. - string value = 2; - } - - // A list of entries that represent the Tracestate. - repeated Entry entries = 1; - } - - // The Tracestate on the span. - Tracestate tracestate = 3; + repeated TraceEntry tracestate = 3; - // The `span_id` of this span's parent span. If this is a root span, then this - // field must be empty. The ID is an 8-byte array. + // parent_span_id is the `span_id` of this span's parent span. If this is a root span, then this + // field must be omitted. The ID is an 8-byte array. bytes parent_span_id = 4; - // An optional resource that is associated with this span. If not set, this span - // should be part of a ResourceSpan that does include the resource information, unless resource - // information is unknown. + // resource that is associated with this span. Optional. If not set, this span + // should be part of a ResourceSpans message that does include the resource information, + // unless resource information is unknown. Resource resource = 5; - // A description of the span's operation. + // name describes the span's operation. // // For example, the name can be a qualified method name or a file name // and a line number where the operation is called. A best practice is to use @@ -113,49 +110,47 @@ message Span { // This makes it easier to correlate spans in different traces. // // This field is semantically required to be set to non-empty string. - // When null or empty string received - receiver may use string "name" - // as a replacement. There might be smarted algorithms implemented by - // receiver to fix the empty span name. // // This field is required. string name = 6; - // Type of span. Can be used to specify additional relationships between spans + // SpanKind is the type of span. Can be used to specify additional relationships between spans // in addition to a parent/child relationship. enum SpanKind { // Unspecified. Do NOT use as default. // Implementations MAY assume SpanKind to be INTERNAL when receiving UNSPECIFIED. SPAN_KIND_UNSPECIFIED = 0; - // Indicates that the span is used internally. Default value. + // Indicates that the span represents an internal operation within an application, + // as opposed to an operations happening at the boundaries. Default value. INTERNAL = 1; // Indicates that the span covers server-side handling of an RPC or other // remote network request. SERVER = 2; - // Indicates that the span covers the client-side wrapper around an RPC or - // other remote request. + // Indicates that the span describes a request to some remote service. CLIENT = 3; - // Indicates that the span describes producer sending a message to a broker. - // Unlike client and server, there is no direct critical path latency relationship - // between producer and consumer spans. + // Indicates that the span describes a producer sending a message to a broker. + // Unlike CLIENT and SERVER, there is often no direct critical path latency relationship + // between producer and consumer spans. A PRODUCER span ends when the message was accepted + // by the broker while the logical processing of the message might span a much longer time. PRODUCER = 4; // Indicates that the span describes consumer receiving a message from a broker. - // Unlike client and server, there is no direct critical path latency relationship + // Like the PRODUCER kind, there is often no direct critical path latency relationship // between producer and consumer spans. CONSUMER = 5; } - // Distinguishes between spans generated in a particular context. For example, + // kind field distinguishes between spans generated in a particular context. For example, // two spans with the same name may be distinguished using `CLIENT` (caller) - // and `SERVER` (callee) to identify queueing latency associated with the span. + // and `SERVER` (callee) to identify network latency associated with the span. SpanKind kind = 7; - // The start time of the span. On the client side, this is the time kept by - // the local machine where the span execution starts. On the server side, this + // start_time_unixnano is the start time of the span. On the client side, this is the time + // kept by the local machine where the span execution starts. On the server side, this // is the time when the server's application handler starts running. // // This field is semantically required. When not set on receive - @@ -166,8 +161,8 @@ message Span { // This field is required. int64 start_time_unixnano = 8; - // The end time of the span. On the client side, this is the time kept by - // the local machine where the span execution ends. On the server side, this + // end_time_unixnano is the end time of the span. On the client side, this is the time + // kept by the local machine where the span execution ends. On the server side, this // is the time when the server application handler stops running. // // This field is semantically required. When not set on receive - @@ -177,7 +172,7 @@ message Span { // This field is required. int64 end_time_unixnano = 9; - // The set of attributes. The value can be a string, an integer, a double + // attributes is a list of AttributeKeyValue. The value can be a string, an integer, a double // or the Boolean values `true` or `false`. Note, global attributes like // server name can be set as tags using resource API. Examples of attributes: // @@ -192,7 +187,8 @@ message Span { // If this value is 0, then no attributes were dropped. int32 dropped_attributes_count = 11; - // A time-stamped event in the Span. + // TimedEvent is a time-stamped annotation of the span, consisting of either + // user-supplied key-value pairs, or details of a message sent/received between Spans. message TimedEvent { // The time the event occurred. int64 time_unixnano = 1; @@ -200,27 +196,21 @@ message Span { // A user-supplied name describing the event. string name = 2; - // A set of attributes on the event. + // A list of attributes of the event. repeated AttributeKeyValue attributes = 3; + // The number of dropped attributes. If the value is 0, then no attributes were dropped. int32 dropped_attributes_count = 4; } - // A collection of `TimedEvent`s. A `TimedEvent` is a time-stamped annotation - // on the span, consisting of either user-supplied key-value pairs, or - // details of a message sent/received between Spans. - message TimedEvents { - // A collection of `TimedEvent`s. - repeated TimedEvent timed_event = 1; - - // The number of dropped timed events. If the value is 0, then no events were dropped. - int32 dropped_timed_events_count = 2; - } + // timed_events is a collection of `TimedEvent`s. + repeated TimedEvent timed_events = 12; - // The included timed events. - TimedEvents timed_events = 12; + // dropped_timed_events_count is the number of dropped timed events. If the value is 0, then + // no events were dropped. + int32 dropped_timed_events_count = 13; - // A pointer from the current span to another span in the same trace or in a + // Link is a pointer from the current span to another span in the same trace or in a // different trace. For example, this can be used in batching operations, // where a single batch handler processes multiple requests from different // traces or when the handler receives a request from a different project. @@ -233,43 +223,36 @@ message Span { bytes span_id = 2; // The Tracestate associated with the link. - Tracestate tracestate = 3; + repeated TraceEntry tracestate = 3; - // A set of attributes on the link. + // A list of attributes of the link. repeated AttributeKeyValue attributes = 4; + // dropped_attributes_count is the number of dropped attributes. If the value is 0, then + // no attributes were dropped. int32 dropped_attributes_count = 5; } - // A collection of links, which are references from this span to a span + // links is a collection of Links, which are references from this span to a span // in the same or different trace. - message Links { - // A collection of links. - repeated Link link = 1; + repeated Link links = 14; - // The number of dropped links after the maximum size was enforced. If - // this value is 0, then no links were dropped. - int32 dropped_links_count = 2; - } - - // The included links. - Links links = 13; + // dropped_links_count is the number of dropped links after the maximum size was enforced. + // If this value is 0, then no links were dropped. + int32 dropped_links_count = 15; // An optional final status for this span. Semantically when Status // wasn't set it is means span ended without errors and assume // Status.Ok (code = 0). - Status status = 14; + Status status = 16; // An optional number of child spans that were generated while this span // was active. If set, allows an implementation to detect missing child spans. - google.protobuf.UInt32Value child_span_count = 15; + int32 child_span_count = 17; } // The `Status` type defines a logical error model that is suitable for different -// programming environments, including REST APIs and RPC APIs. This proto's fields -// are a subset of those of -// [google.rpc.Status](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto), -// which is used by [gRPC](https://github.com/grpc). +// programming environments, including REST APIs and RPC APIs. message Status { // The status code. This is optional field. It is safe to assume 0 (OK) // when not set. @@ -283,30 +266,35 @@ message Status { ### AttributeKeyValue ``` +// AttributeKeyValue is a key-value pair that is used to store Span attributes, Resource +// labels, etc. message AttributeKeyValue { + // ValueType is the enumeration of possible types that value can have. enum ValueType { STRING = 0; BOOL = 1; INT64 = 2; - FLOAT64 = 3; - BINARY = 4; + DOUBLE = 3; }; + // key part of the key-value pair. string key = 1; + // The type of the value. ValueType type = 2; + + // Only one of the following fields is supposed to contain data (determined by `type` field value). + // This is deliberately not using Protobuf `oneof` for performance reasons (verified by benchmarks). + // A string up to 256 bytes long. string string_value = 3; // A 64-bit signed integer. - int64 int_value = 4; + int64 int64_value = 4; // A Boolean value represented by `true` or `false`. bool bool_value = 5; // A double value. double double_value = 6; - // A binary value of bytes. - bytes binary_value = 7; } - ``` ## Trade-offs and mitigations From 98cf4e576db3381772fd0fc3c0a3626d03a8c121 Mon Sep 17 00:00:00 2001 From: Tigran Najaryan Date: Mon, 21 Oct 2019 20:04:49 -0400 Subject: [PATCH 03/11] Add protobuf type qualifier to pre-formatted blocks --- text/0059-otlp-trace-data-format.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/text/0059-otlp-trace-data-format.md b/text/0059-otlp-trace-data-format.md index 617a05665..ea43fb15a 100644 --- a/text/0059-otlp-trace-data-format.md +++ b/text/0059-otlp-trace-data-format.md @@ -27,7 +27,7 @@ This section specifies data format in Protocol Buffers. ### Resource -``` +```protobuf // Resource information. This describes the source of telemetry data. message Resource { // labels is a list of attributes that describe the resource. See OpenTelemetry specification @@ -43,7 +43,7 @@ message Resource { ### Span -``` +```protobuf // Span represents a single operation within a trace. Spans can be // nested to form a trace tree. Spans may also be linked to other spans // from the same or different trace and form graphs. Often, a trace @@ -265,7 +265,7 @@ message Status { ### AttributeKeyValue -``` +```protobuf // AttributeKeyValue is a key-value pair that is used to store Span attributes, Resource // labels, etc. message AttributeKeyValue { From f2e36c041b47f842d87f3c6edefca168b51240f7 Mon Sep 17 00:00:00 2001 From: Tigran Najaryan Date: Tue, 22 Oct 2019 15:06:48 -0400 Subject: [PATCH 04/11] Add a note about future goals for the protocol --- text/0059-otlp-trace-data-format.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/text/0059-otlp-trace-data-format.md b/text/0059-otlp-trace-data-format.md index ea43fb15a..c3c8ccda1 100644 --- a/text/0059-otlp-trace-data-format.md +++ b/text/0059-otlp-trace-data-format.md @@ -309,6 +309,8 @@ OpenCensus and Jaeger protocol buffer data schemas were used as the inspiration A follow up RFC is required to define the data format for metrics. +One of the original aspiring goals for OTLP was to _"support very fast pass-through mode (when no modifications to the data are needed), fast augmenting or tagging of data and partial inspection of data"_. This particular goal was not met directly (although performance improvements over OpenCensus encoding make OTLP more suitable for these tasks). This goal remains a good direction of future research and improvement. + ## Appendix A - Benchmarking The following shows [benchmarking of encoding/decoding in Go](https://github.com/tigrannajaryan/exp-otelproto/) using various schemas. From 40aa4ed33bd8c625cb71b954ca2daaf50e7e8ca9 Mon Sep 17 00:00:00 2001 From: Tigran Najaryan Date: Tue, 22 Oct 2019 15:27:37 -0400 Subject: [PATCH 05/11] Address review comments --- text/0059-otlp-trace-data-format.md | 50 ++++++++++++++--------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/text/0059-otlp-trace-data-format.md b/text/0059-otlp-trace-data-format.md index c3c8ccda1..29f99a976 100644 --- a/text/0059-otlp-trace-data-format.md +++ b/text/0059-otlp-trace-data-format.md @@ -30,8 +30,8 @@ This section specifies data format in Protocol Buffers. ```protobuf // Resource information. This describes the source of telemetry data. message Resource { - // labels is a list of attributes that describe the resource. See OpenTelemetry specification - // semantic conventions for standardized label names: + // labels is a collection of attributes that describe the resource. See OpenTelemetry + // specification semantic conventions for standardized label names: // https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/data-semantic-conventions.md repeated AttributeKeyValue labels = 1; @@ -58,24 +58,24 @@ message Span { // the same `trace_id`. The ID is a 16-byte array. An ID with all zeroes // is considered invalid. // - // This field is semantically required. Receiver should generate new - // random trace_id if empty or invalid trace_id was received. - // - // This field is required. + // This field is semantically required. If empty or invalid trace_id was received: + // - The receiver MAY reject the invalid data and respond with the appropriate error + // code to the sender. + // - The receiver MAY accept the invalid data and attempt to correct it. bytes trace_id = 1; // span_id is a unique identifier for a span within a trace, assigned when the span // is created. The ID is an 8-byte array. An ID with all zeroes is considered // invalid. // - // This field is semantically required. Receiver should generate new - // random span_id if empty or invalid span_id was received. - // - // This field is required. + // This field is semantically required. If empty or invalid span_id was received: + // - The receiver MAY reject the invalid data and respond with the appropriate error + // code to the sender. + // - The receiver MAY accept the invalid data and attempt to correct it. bytes span_id = 2; - // TraceEntry is the entry that is repeated in tracestate field (see below). - message TraceEntry { + // TraceStateEntry is the entry that is repeated in tracestate field (see below). + message TraceStateEntry { // key must begin with a lowercase letter, and can only contain // lowercase letters 'a'-'z', digits '0'-'9', underscores '_', dashes // '-', asterisks '*', and forward slashes '/'. @@ -88,10 +88,10 @@ message Span { } // TraceState conveys information about request position in multiple distributed tracing graphs. - // It is a list of Tracestate.Entry with a maximum of 32 members in the list. + // It is a collection of TracestateEntry with a maximum of 32 members in the list. // // See the https://github.com/w3c/distributed-tracing for more details about this field. - repeated TraceEntry tracestate = 3; + repeated TraceStateEntry tracestate = 3; // parent_span_id is the `span_id` of this span's parent span. If this is a root span, then this // field must be omitted. The ID is an 8-byte array. @@ -172,8 +172,8 @@ message Span { // This field is required. int64 end_time_unixnano = 9; - // attributes is a list of AttributeKeyValue. The value can be a string, an integer, a double - // or the Boolean values `true` or `false`. Note, global attributes like + // attributes is a collection of attributes. The value can be a string, an integer, + // a double or the Boolean values `true` or `false`. Note, global attributes like // server name can be set as tags using resource API. Examples of attributes: // // "/http/user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" @@ -196,7 +196,7 @@ message Span { // A user-supplied name describing the event. string name = 2; - // A list of attributes of the event. + // attributes is a collection of attributes on the event. repeated AttributeKeyValue attributes = 3; // The number of dropped attributes. If the value is 0, then no attributes were dropped. @@ -206,8 +206,8 @@ message Span { // timed_events is a collection of `TimedEvent`s. repeated TimedEvent timed_events = 12; - // dropped_timed_events_count is the number of dropped timed events. If the value is 0, then - // no events were dropped. + // dropped_timed_events_count is the number of dropped timed events. If the value is 0, + // then no events were dropped. int32 dropped_timed_events_count = 13; // Link is a pointer from the current span to another span in the same trace or in a @@ -222,14 +222,12 @@ message Span { // A unique identifier for the linked span. The ID is an 8-byte array. bytes span_id = 2; - // The Tracestate associated with the link. - repeated TraceEntry tracestate = 3; + // tracestate is the Tracestate associated with the link. + repeated TraceStateEntry tracestate = 3; - // A list of attributes of the link. + // attributes is a collection of attributes on the link. repeated AttributeKeyValue attributes = 4; - // dropped_attributes_count is the number of dropped attributes. If the value is 0, then - // no attributes were dropped. int32 dropped_attributes_count = 5; } @@ -237,8 +235,8 @@ message Span { // in the same or different trace. repeated Link links = 14; - // dropped_links_count is the number of dropped links after the maximum size was enforced. - // If this value is 0, then no links were dropped. + // dropped_links_count is the number of dropped links after the maximum size was + // enforced. If this value is 0, then no links were dropped. int32 dropped_links_count = 15; // An optional final status for this span. Semantically when Status From da9e9f16e4581ca99c2e05edcf4cf620c7148c24 Mon Sep 17 00:00:00 2001 From: Tigran Najaryan Date: Tue, 22 Oct 2019 15:34:04 -0400 Subject: [PATCH 06/11] Make sure all field comments start with field name --- text/0059-otlp-trace-data-format.md | 48 +++++++++++++++-------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/text/0059-otlp-trace-data-format.md b/text/0059-otlp-trace-data-format.md index 29f99a976..fa2f1a24c 100644 --- a/text/0059-otlp-trace-data-format.md +++ b/text/0059-otlp-trace-data-format.md @@ -87,8 +87,8 @@ message Span { string value = 2; } - // TraceState conveys information about request position in multiple distributed tracing graphs. - // It is a collection of TracestateEntry with a maximum of 32 members in the list. + // tracestate conveys information about request position in multiple distributed tracing graphs. + // It is a collection of TracestateEntry with a maximum of 32 members in the collection. // // See the https://github.com/w3c/distributed-tracing for more details about this field. repeated TraceStateEntry tracestate = 3; @@ -172,9 +172,9 @@ message Span { // This field is required. int64 end_time_unixnano = 9; - // attributes is a collection of attributes. The value can be a string, an integer, - // a double or the Boolean values `true` or `false`. Note, global attributes like - // server name can be set as tags using resource API. Examples of attributes: + // attributes is a collection of attribute key/value pairs. The value can be a string, + // an integer, a double or the Boolean values `true` or `false`. Note, global attributes + // like server name can be set as tags using resource API. Examples of attributes: // // "/http/user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" // "/http/server_latency": 300 @@ -182,28 +182,29 @@ message Span { // "abc.com/score": 10.239 repeated AttributeKeyValue attributes = 10; - // The number of attributes that were discarded. Attributes can be discarded - // because their keys are too long or because there are too many attributes. - // If this value is 0, then no attributes were dropped. + // dropped_attributes_count is the number of attributes that were discarded. Attributes + // can be discarded because their keys are too long or because there are too many + // attributes. If this value is 0, then no attributes were dropped. int32 dropped_attributes_count = 11; // TimedEvent is a time-stamped annotation of the span, consisting of either // user-supplied key-value pairs, or details of a message sent/received between Spans. message TimedEvent { - // The time the event occurred. + // time_unixnano is the time the event occurred. int64 time_unixnano = 1; - // A user-supplied name describing the event. + // name is a user-supplied description of the event. string name = 2; - // attributes is a collection of attributes on the event. + // attributes is a collection of attribute key/value pairs on the event. repeated AttributeKeyValue attributes = 3; - // The number of dropped attributes. If the value is 0, then no attributes were dropped. + // dropped_attributes_count is the number of dropped attributes. If the value is 0, + // then no attributes were dropped. int32 dropped_attributes_count = 4; } - // timed_events is a collection of `TimedEvent`s. + // timed_events is a collection of TimedEvent items. repeated TimedEvent timed_events = 12; // dropped_timed_events_count is the number of dropped timed events. If the value is 0, @@ -215,19 +216,21 @@ message Span { // where a single batch handler processes multiple requests from different // traces or when the handler receives a request from a different project. message Link { - // A unique identifier of a trace that this linked span is part of. The ID is a - // 16-byte array. + // trace_id is a unique identifier of a trace that this linked span is part of. + // The ID is a 16-byte array. bytes trace_id = 1; - // A unique identifier for the linked span. The ID is an 8-byte array. + // span_id is a unique identifier for the linked span. The ID is an 8-byte array. bytes span_id = 2; - // tracestate is the Tracestate associated with the link. + // tracestate is the trace state associated with the link. repeated TraceStateEntry tracestate = 3; - // attributes is a collection of attributes on the link. + // attributes is a collection of attribute key/value pairs on the link. repeated AttributeKeyValue attributes = 4; + // dropped_attributes_count is the number of dropped attributes. If the value is 0, + // then no attributes were dropped. int32 dropped_attributes_count = 5; } @@ -239,13 +242,12 @@ message Span { // enforced. If this value is 0, then no links were dropped. int32 dropped_links_count = 15; - // An optional final status for this span. Semantically when Status - // wasn't set it is means span ended without errors and assume - // Status.Ok (code = 0). + // status is an optional final status for this span. Semantically when status + // wasn't set it is means span ended without errors and assume Status.Ok (code = 0). Status status = 16; - // An optional number of child spans that were generated while this span - // was active. If set, allows an implementation to detect missing child spans. + // child_span_count is an optional number of child spans that were generated while this + // span was active. If set, allows an implementation to detect missing child spans. int32 child_span_count = 17; } From 0b8ae1d292063c8081a8a957a7b1db4c516eb6b1 Mon Sep 17 00:00:00 2001 From: Tigran Najaryan Date: Tue, 22 Oct 2019 23:11:42 -0400 Subject: [PATCH 07/11] Change status to approved --- text/0059-otlp-trace-data-format.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0059-otlp-trace-data-format.md b/text/0059-otlp-trace-data-format.md index fa2f1a24c..bc9ec49b2 100644 --- a/text/0059-otlp-trace-data-format.md +++ b/text/0059-otlp-trace-data-format.md @@ -2,7 +2,7 @@ _Author: Tigran Najaryan, Splunk_ -**Status:** `proposed` +**Status:** `approved` OTLP Trace Data Format specification describes the structure of the trace data that is transported by OpenTelemetry Protocol (RFC0035). From 9bef7a618ec5e6aad9876b7aa2336ee73241c9e7 Mon Sep 17 00:00:00 2001 From: Tigran Najaryan Date: Wed, 23 Oct 2019 11:27:04 -0400 Subject: [PATCH 08/11] Clarify start and end time expectations. --- text/0059-otlp-trace-data-format.md | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/text/0059-otlp-trace-data-format.md b/text/0059-otlp-trace-data-format.md index bc9ec49b2..bca35e182 100644 --- a/text/0059-otlp-trace-data-format.md +++ b/text/0059-otlp-trace-data-format.md @@ -153,10 +153,7 @@ message Span { // kept by the local machine where the span execution starts. On the server side, this // is the time when the server's application handler starts running. // - // This field is semantically required. When not set on receive - - // receiver should set it to the value of end_time field if it was - // set. Or to the current time if neither was set. It is important to - // keep end_time > start_time for consistency. + // This field is semantically required and it is expected that end_time >= start_time. // // This field is required. int64 start_time_unixnano = 8; @@ -165,9 +162,7 @@ message Span { // kept by the local machine where the span execution ends. On the server side, this // is the time when the server application handler stops running. // - // This field is semantically required. When not set on receive - - // receiver should set it to start_time value. It is important to - // keep end_time > start_time for consistency. + // This field is semantically required and it is expected that end_time >= start_time. // // This field is required. int64 end_time_unixnano = 9; From f04fa744c8258a276a65044b6b1e345e9d1e8caa Mon Sep 17 00:00:00 2001 From: Tigran Najaryan Date: Sat, 26 Oct 2019 22:42:24 -0400 Subject: [PATCH 09/11] Address Sergey's comments --- text/0059-otlp-trace-data-format.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/text/0059-otlp-trace-data-format.md b/text/0059-otlp-trace-data-format.md index bca35e182..5f46a1b17 100644 --- a/text/0059-otlp-trace-data-format.md +++ b/text/0059-otlp-trace-data-format.md @@ -32,7 +32,7 @@ This section specifies data format in Protocol Buffers. message Resource { // labels is a collection of attributes that describe the resource. See OpenTelemetry // specification semantic conventions for standardized label names: - // https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/data-semantic-conventions.md + // https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/data-resource-semantic-conventions.md repeated AttributeKeyValue labels = 1; // dropped_labels_count is the number of dropped labels. If the value is 0, then @@ -210,6 +210,8 @@ message Span { // different trace. For example, this can be used in batching operations, // where a single batch handler processes multiple requests from different // traces or when the handler receives a request from a different project. + // See also Links specification: + // https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/overview.md#links-between-spans message Link { // trace_id is a unique identifier of a trace that this linked span is part of. // The ID is a 16-byte array. @@ -253,7 +255,7 @@ message Status { // when not set. int32 code = 1; - // A developer-facing error message, which should be in English. + // A developer-facing human readable error message. string message = 2; } ``` From ad0381e75e142311ccf0ed96ec4607b02c600b4d Mon Sep 17 00:00:00 2001 From: Tigran Najaryan Date: Tue, 29 Oct 2019 13:18:49 -0400 Subject: [PATCH 10/11] Remove string length requirement --- text/0059-otlp-trace-data-format.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/text/0059-otlp-trace-data-format.md b/text/0059-otlp-trace-data-format.md index 5f46a1b17..f653dcb40 100644 --- a/text/0059-otlp-trace-data-format.md +++ b/text/0059-otlp-trace-data-format.md @@ -283,7 +283,7 @@ message AttributeKeyValue { // Only one of the following fields is supposed to contain data (determined by `type` field value). // This is deliberately not using Protobuf `oneof` for performance reasons (verified by benchmarks). - // A string up to 256 bytes long. + // A string value. string string_value = 3; // A 64-bit signed integer. int64 int64_value = 4; From 1f0ee7bd9404df3ffb174257d269373196dce840 Mon Sep 17 00:00:00 2001 From: Tigran Najaryan Date: Mon, 4 Nov 2019 18:33:01 -0500 Subject: [PATCH 11/11] Add StatusCode enum --- text/0059-otlp-trace-data-format.md | 37 +++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/text/0059-otlp-trace-data-format.md b/text/0059-otlp-trace-data-format.md index f653dcb40..bca74d813 100644 --- a/text/0059-otlp-trace-data-format.md +++ b/text/0059-otlp-trace-data-format.md @@ -58,7 +58,7 @@ message Span { // the same `trace_id`. The ID is a 16-byte array. An ID with all zeroes // is considered invalid. // - // This field is semantically required. If empty or invalid trace_id was received: + // This field is semantically required. If empty or invalid trace_id is received: // - The receiver MAY reject the invalid data and respond with the appropriate error // code to the sender. // - The receiver MAY accept the invalid data and attempt to correct it. @@ -68,7 +68,7 @@ message Span { // is created. The ID is an 8-byte array. An ID with all zeroes is considered // invalid. // - // This field is semantically required. If empty or invalid span_id was received: + // This field is semantically required. If empty or invalid span_id is received: // - The receiver MAY reject the invalid data and respond with the appropriate error // code to the sender. // - The receiver MAY accept the invalid data and attempt to correct it. @@ -167,9 +167,9 @@ message Span { // This field is required. int64 end_time_unixnano = 9; - // attributes is a collection of attribute key/value pairs. The value can be a string, + // attributes is a collection of key/value pairs. The value can be a string, // an integer, a double or the Boolean values `true` or `false`. Note, global attributes - // like server name can be set as tags using resource API. Examples of attributes: + // like server name can be set using the resource API. Examples of attributes: // // "/http/user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36" // "/http/server_latency": 300 @@ -243,17 +243,40 @@ message Span { // wasn't set it is means span ended without errors and assume Status.Ok (code = 0). Status status = 16; - // child_span_count is an optional number of child spans that were generated while this + // child_span_count is an optional number of local child spans that were generated while this // span was active. If set, allows an implementation to detect missing child spans. int32 child_span_count = 17; } -// The `Status` type defines a logical error model that is suitable for different +// The Status type defines a logical error model that is suitable for different // programming environments, including REST APIs and RPC APIs. message Status { + + // StatusCode mirrors the codes defined at + // https://github.com/open-telemetry/opentelemetry-specification/blob/master/specification/api-tracing.md#statuscanonicalcode + enum StatusCode { + Ok = 0; + Cancelled = 1; + UnknownError = 2; + InvalidArgument = 3; + DeadlineExceeded = 4; + NotFound = 5; + AlreadyExists = 6; + PermissionDenied = 7; + ResourceExhausted = 8; + FailedPrecondition = 9; + Aborted = 10; + OutOfRange = 11; + Unimplemented = 12; + InternalError = 13; + Unavailable = 14; + DataLoss = 15; + Unauthenticated = 16; + }; + // The status code. This is optional field. It is safe to assume 0 (OK) // when not set. - int32 code = 1; + StatusCode code = 1; // A developer-facing human readable error message. string message = 2;