diff --git a/.ruby-version b/.ruby-version new file mode 100644 index 000000000..49cdd668e --- /dev/null +++ b/.ruby-version @@ -0,0 +1 @@ +2.7.6 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..99ae43020 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +FROM ruby:2.7-alpine + +RUN apk update && apk add --update --no-cache \ + build-base \ + git \ + libxml2 \ + libxslt \ + libxml2-dev \ + libxslt-dev \ + libc-dev \ + libgcrypt-dev \ + bash \ + curl + +WORKDIR /lib +RUN gem install bundler:2.1.4 google-protobuf + +COPY . . +RUN bundle install diff --git a/bin/generate-protos.sh b/bin/generate-protos.sh index 5aead510a..93e9687d5 100755 --- a/bin/generate-protos.sh +++ b/bin/generate-protos.sh @@ -5,11 +5,11 @@ set -eo pipefail DIR=`dirname "$0"` OUTPUT_DIR=$DIR/../lib/apollo-studio-tracing/proto -echo "Removing old client" -rm -f $OUTPUT_DIR/apollo.proto $OUTPUT_DIR/apollo_pb.rb +# echo "Removing old client" +# rm -f $OUTPUT_DIR/apollo.proto $OUTPUT_DIR/apollo_pb.rb -echo "Downloading latest Apollo Protobuf IDL" -curl --silent --output $OUTPUT_DIR/apollo.proto https://raw.githubusercontent.com/apollographql/apollo-server/main/packages/apollo-reporting-protobuf/src/reports.proto +# echo "Downloading latest Apollo Protobuf IDL" +# curl --silent --output $OUTPUT_DIR/apollo.proto https://usage-reporting.api.apollographql.com/proto/reports.proto echo "Generating Ruby client stubs" protoc -I $OUTPUT_DIR --ruby_out $OUTPUT_DIR $OUTPUT_DIR/apollo.proto diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..aaa0b6bc2 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,7 @@ +version: '3.9' +name: 'apollo-studio-tracing-ruby' +services: + lib: + build: . + volumes: + - '.:/lib' diff --git a/lib/apollo-studio-tracing/api.rb b/lib/apollo-studio-tracing/api.rb index ead06946b..265464e68 100644 --- a/lib/apollo-studio-tracing/api.rb +++ b/lib/apollo-studio-tracing/api.rb @@ -9,7 +9,7 @@ module ApolloStudioTracing module API extend self - APOLLO_URL = 'https://engine-report.apollodata.com/api/ingress/traces' + APOLLO_URL = 'https://usage-reporting.api.apollographql.com/api/ingress/traces' APOLLO_URI = ::URI.parse(APOLLO_URL) UploadAttemptError = Class.new(StandardError) RetryableUploadAttemptError = Class.new(UploadAttemptError) @@ -36,7 +36,7 @@ def upload(report_data, max_attempts:, min_retry_delay_secs:, **options) def attempt_upload(report_data, compress:, api_key:) body = compress ? gzip(report_data) : report_data - headers = { 'X-Api-Key' => api_key } + headers = { 'X-Api-Key' => api_key, 'user-agent' => 'ApolloServerPluginUsageReporting', 'accept' => 'application/json' } headers['Content-Encoding'] = 'gzip' if compress result = Net::HTTP.post(APOLLO_URI, body, headers) diff --git a/lib/apollo-studio-tracing/proto/apollo.proto b/lib/apollo-studio-tracing/proto/apollo.proto index b5c70dcff..ddaaf320a 100644 --- a/lib/apollo-studio-tracing/proto/apollo.proto +++ b/lib/apollo-studio-tracing/proto/apollo.proto @@ -1,251 +1,288 @@ syntax = "proto3"; - package mdg.engine.proto; + import "google/protobuf/timestamp.proto"; message Trace { - message CachePolicy { - enum Scope { - UNKNOWN = 0; - PUBLIC = 1; - PRIVATE = 2; - } - - Scope scope = 1; - int64 max_age_ns = 2; // use 0 for absent, -1 for 0 - } - - message Details { - // The variables associated with this query (unless the reporting agent is - // configured to keep them all private). Values are JSON: ie, strings are - // enclosed in double quotes, etc. The value of a private variable is - // the empty string. - map variables_json = 4; - // Deprecated. Engineproxy did not encode variable values as JSON, so you - // couldn't tell numbers from numeric strings. Send variables_json instead. - map deprecated_variables = 1; - // This is deprecated and only used for legacy applications - // don't include this in traces inside a FullTracesReport; the operation - // name for these traces comes from the key of the traces_per_query map. - string operation_name = 3; - } - - message Error { - string message = 1; // required - repeated Location location = 2; - uint64 time_ns = 3; - string json = 4; - } - - message HTTP { - message Values { - repeated string value = 1; - } - - enum Method { - UNKNOWN = 0; - OPTIONS = 1; - GET = 2; - HEAD = 3; - POST = 4; - PUT = 5; - DELETE = 6; - TRACE = 7; - CONNECT = 8; - PATCH = 9; - } - Method method = 1; - string host = 2; - string path = 3; - - // Should exclude manual blacklist ("Auth" by default) - map request_headers = 4; - map response_headers = 5; - - uint32 status_code = 6; - - bool secure = 8; // TLS was used - string protocol = 9; // by convention "HTTP/1.0", "HTTP/1.1", "HTTP/2" or "h2" - } - - message Location { - uint32 line = 1; - uint32 column = 2; - } - - // We store information on each resolver execution as a Node on a tree. - // The structure of the tree corresponds to the structure of the GraphQL - // response; it does not indicate the order in which resolvers were - // invoked. Note that nodes representing indexes (and the root node) - // don't contain all Node fields (eg types and times). - message Node { - // The name of the field (for Nodes representing a resolver call) or the - // index in a list (for intermediate Nodes representing elements of a list). - // field_name is the name of the field as it appears in the GraphQL - // response: ie, it may be an alias. (In that case, the original_field_name - // field holds the actual field name from the schema.) In any context where - // we're building up a path, we use the response_name rather than the - // original_field_name. - oneof id { - string response_name = 1; - uint32 index = 2; - } - - string original_field_name = 14; - - // The field's return type; e.g. "String!" for User.email:String! - string type = 3; - - // The field's parent type; e.g. "User" for User.email:String! - string parent_type = 13; - - CachePolicy cache_policy = 5; - - // relative to the trace's start_time, in ns - uint64 start_time = 8; - // relative to the trace's start_time, in ns - uint64 end_time = 9; - - repeated Error error = 11; - repeated Node child = 12; - - reserved 4; - } - - // represents a node in the query plan, under which there is a trace tree for that service fetch. - // In particular, each fetch node represents a call to an implementing service, and calls to implementing - // services may not be unique. See https://github.com/apollographql/apollo-server/blob/main/packages/apollo-gateway/src/QueryPlan.ts - // for more information and details. - message QueryPlanNode { - // This represents a set of nodes to be executed sequentially by the Gateway executor - message SequenceNode { - repeated QueryPlanNode nodes = 1; - } - // This represents a set of nodes to be executed in parallel by the Gateway executor - message ParallelNode { - repeated QueryPlanNode nodes = 1; - } - // This represents a node to send an operation to an implementing service - message FetchNode { - // XXX When we want to include more details about the sub-operation that was - // executed against this service, we should include that here in each fetch node. - // This might include an operation signature, requires directive, reference resolutions, etc. - string service_name = 1; - - bool trace_parsing_failed = 2; - - // This Trace only contains start_time, end_time, duration_ns, and root; - // all timings were calculated **on the federated service**, and clock skew - // will be handled by the ingress server. - Trace trace = 3; - - // relative to the outer trace's start_time, in ns, measured in the gateway. - uint64 sent_time_offset = 4; - - // Wallclock times measured in the gateway for when this operation was - // sent and received. - google.protobuf.Timestamp sent_time = 5; - google.protobuf.Timestamp received_time = 6; - } - - // This node represents a way to reach into the response path and attach related entities. - // XXX Flatten is really not the right name and this node may be renamed in the query planner. - message FlattenNode { - repeated ResponsePathElement response_path = 1; - QueryPlanNode node = 2; - } - message ResponsePathElement { - oneof id { - string field_name = 1; - uint32 index = 2; - } - } - oneof node { - SequenceNode sequence = 1; - ParallelNode parallel = 2; - FetchNode fetch = 3; - FlattenNode flatten = 4; - } - } - - // Wallclock time when the trace began. - google.protobuf.Timestamp start_time = 4; // required - // Wallclock time when the trace ended. - google.protobuf.Timestamp end_time = 3; // required - // High precision duration of the trace; may not equal end_time-start_time - // (eg, if your machine's clock changed during the trace). - uint64 duration_ns = 11; // required - // A tree containing information about all resolvers run directly by this - // service, including errors. - Node root = 14; - - // ------------------------------------------------------------------------- - // Fields below this line are *not* included in federated traces (the traces - // sent from federated services to the gateway). - - // In addition to details.raw_query, we include a "signature" of the query, - // which can be normalized: for example, you may want to discard aliases, drop - // unused operations and fragments, sort fields, etc. The most important thing - // here is that the signature match the signature in StatsReports. In - // StatsReports signatures show up as the key in the per_query map (with the - // operation name prepended). The signature should be a valid GraphQL query. - // All traces must have a signature; if this Trace is in a FullTracesReport - // that signature is in the key of traces_per_query rather than in this field. - // Engineproxy provides the signature in legacy_signature_needs_resigning - // instead. - string signature = 19; - - Details details = 6; - - // Note: engineproxy always sets client_name, client_version, and client_address to "none". - // apollo-engine-reporting allows for them to be set by the user. - string client_name = 7; - string client_version = 8; - string client_address = 9; - string client_reference_id = 23; - - HTTP http = 10; - - CachePolicy cache_policy = 18; - - // If this Trace was created by a gateway, this is the query plan, including - // sub-Traces for federated services. Note that the 'root' tree on the - // top-level Trace won't contain any resolvers (though it could contain errors - // that occurred in the gateway itself). - QueryPlanNode query_plan = 26; - - // Was this response served from a full query response cache? (In that case - // the node tree will have no resolvers.) - bool full_query_cache_hit = 20; - - // Was this query specified successfully as a persisted query hash? - bool persisted_query_hit = 21; - // Did this query contain both a full query string and a persisted query hash? - // (This typically means that a previous request was rejected as an unknown - // persisted query.) - bool persisted_query_register = 22; - - // Was this operation registered and a part of the safelist? - bool registered_operation = 24; - - // Was this operation forbidden due to lack of safelisting? - bool forbidden_operation = 25; - - // -------------------------------------------------------------- - // Fields below this line are only set by the old Go engineproxy. - - // Older agents (eg the Go engineproxy) relied to some degree on the Engine - // backend to run their own semi-compatible implementation of a specific - // variant of query signatures. The backend does not do this for new agents (which - // set the above 'signature' field). It used to still "re-sign" signatures - // from engineproxy, but we've now simplified the backend to no longer do this. - // Deprecated and ignored in FullTracesReports. - string legacy_signature_needs_resigning = 5; - - // removed: Node parse = 12; Node validate = 13; - // Id128 server_id = 1; Id128 client_id = 2; - reserved 12, 13, 1, 2; + message CachePolicy { + enum Scope { + UNKNOWN = 0; + PUBLIC = 1; + PRIVATE = 2; + } + + Scope scope = 1; + int64 max_age_ns = 2; // use 0 for absent, -1 for 0 + } + + message Details { + // The variables associated with this query (unless the reporting agent is + // configured to keep them all private). Values are JSON: ie, strings are + // enclosed in double quotes, etc. The value of a private variable is + // the empty string. + map variables_json = 4; + + + // This is deprecated and only used for legacy applications + // don't include this in traces inside a FullTracesReport; the operation + // name for these traces comes from the key of the traces_per_query map. + string operation_name = 3; + } + + message Error { + string message = 1; // required + repeated Location location = 2; + uint64 time_ns = 3; + string json = 4; + } + + message HTTP { + message Values { + repeated string value = 1; + } + + enum Method { + UNKNOWN = 0; + OPTIONS = 1; + GET = 2; + HEAD = 3; + POST = 4; + PUT = 5; + DELETE = 6; + TRACE = 7; + CONNECT = 8; + PATCH = 9; + } + Method method = 1; + + // Should exclude manual blacklist ("Auth" by default) + map request_headers = 4; + map response_headers = 5; + + uint32 status_code = 6; + + reserved 2, 3, 8, 9; + } + + message Location { + uint32 line = 1; + uint32 column = 2; + } + + // We store information on each resolver execution as a Node on a tree. + // The structure of the tree corresponds to the structure of the GraphQL + // response; it does not indicate the order in which resolvers were + // invoked. Note that nodes representing indexes (and the root node) + // don't contain all Node fields (eg types and times). + message Node { + // The name of the field (for Nodes representing a resolver call) or the + // index in a list (for intermediate Nodes representing elements of a list). + // field_name is the name of the field as it appears in the GraphQL + // response: ie, it may be an alias. (In that case, the original_field_name + // field holds the actual field name from the schema.) In any context where + // we're building up a path, we use the response_name rather than the + // original_field_name. + oneof id { + string response_name = 1; + uint32 index = 2; + } + + string original_field_name = 14; + + // The field's return type; e.g. "String!" for User.email:String! + string type = 3; + + // The field's parent type; e.g. "User" for User.email:String! + string parent_type = 13; + + CachePolicy cache_policy = 5; + + // relative to the trace's start_time, in ns + uint64 start_time = 8; + // relative to the trace's start_time, in ns + uint64 end_time = 9; + + repeated Error error = 11; + repeated Node child = 12; + + reserved 4; + } + + // represents a node in the query plan, under which there is a trace tree for that service fetch. + // In particular, each fetch node represents a call to an implementing service, and calls to implementing + // services may not be unique. See https://github.com/apollographql/federation/blob/main/query-planner-js/src/QueryPlan.ts + // for more information and details. + message QueryPlanNode { + // This represents a set of nodes to be executed sequentially by the Gateway executor + message SequenceNode { + repeated QueryPlanNode nodes = 1; + } + // This represents a set of nodes to be executed in parallel by the Gateway executor + message ParallelNode { + repeated QueryPlanNode nodes = 1; + } + // This represents a node to send an operation to an implementing service + message FetchNode { + // XXX When we want to include more details about the sub-operation that was + // executed against this service, we should include that here in each fetch node. + // This might include an operation signature, requires directive, reference resolutions, etc. + string service_name = 1; + + bool trace_parsing_failed = 2; + + // This Trace only contains start_time, end_time, duration_ns, and root; + // all timings were calculated **on the federated service**, and clock skew + // will be handled by the ingress server. + Trace trace = 3; + + // relative to the outer trace's start_time, in ns, measured in the gateway. + uint64 sent_time_offset = 4; + + // Wallclock times measured in the gateway for when this operation was + // sent and received. + google.protobuf.Timestamp sent_time = 5; + google.protobuf.Timestamp received_time = 6; + } + + // This node represents a way to reach into the response path and attach related entities. + // XXX Flatten is really not the right name and this node may be renamed in the query planner. + message FlattenNode { + repeated ResponsePathElement response_path = 1; + QueryPlanNode node = 2; + } + + // A `DeferNode` corresponds to one or more @defer at the same level of "nestedness" in the planned query. + message DeferNode { + DeferNodePrimary primary = 1; + repeated DeferredNode deferred = 2; + } + + message ConditionNode { + string condition = 1; + QueryPlanNode if_clause = 2; + QueryPlanNode else_clause = 3; + } + + message DeferNodePrimary { + QueryPlanNode node = 1; + } + message DeferredNode { + repeated DeferredNodeDepends depends = 1; + string label = 2; + ResponsePathElement path = 3; + QueryPlanNode node = 4; + } + message DeferredNodeDepends { + string id = 1; + string defer_label = 2; + } + + message ResponsePathElement { + oneof id { + string field_name = 1; + uint32 index = 2; + } + } + oneof node { + SequenceNode sequence = 1; + ParallelNode parallel = 2; + FetchNode fetch = 3; + FlattenNode flatten = 4; + DeferNode defer = 5; + ConditionNode condition = 6; + } + } + + // Wallclock time when the trace began. + google.protobuf.Timestamp start_time = 4; // required + // Wallclock time when the trace ended. + google.protobuf.Timestamp end_time = 3; // required + // High precision duration of the trace; may not equal end_time-start_time + // (eg, if your machine's clock changed during the trace). + uint64 duration_ns = 11; // required + // A tree containing information about all resolvers run directly by this + // service, including errors. + Node root = 14; + + // If this is true, the trace is potentially missing some nodes that were + // present on the query plan. This can happen if the trace span buffer used + // in the router fills up and some spans have to be dropped. In these cases + // the overall trace timing will still be correct, but the trace data could + // be missing some referenced or executed fields, and some nodes may be + // missing. If this is true we should display a warning to the user when they + // view the trace in Explorer. + bool is_incomplete = 33; + + // ------------------------------------------------------------------------- + // Fields below this line are *not* included in federated traces (the traces + // sent from federated services to the gateway). + + // In addition to details.raw_query, we include a "signature" of the query, + // which can be normalized: for example, you may want to discard aliases, drop + // unused operations and fragments, sort fields, etc. The most important thing + // here is that the signature match the signature in StatsReports. In + // StatsReports signatures show up as the key in the per_query map (with the + // operation name prepended). The signature should be a valid GraphQL query. + // All traces must have a signature; if this Trace is in a FullTracesReport + // that signature is in the key of traces_per_query rather than in this field. + // Engineproxy provides the signature in legacy_signature_needs_resigning + // instead. + string signature = 19; + + // Optional: when GraphQL parsing or validation against the GraphQL schema fails, these fields + // can include reference to the operation being sent for users to dig into the set of operations + // that are failing validation. + string unexecutedOperationBody = 27; + string unexecutedOperationName = 28; + + Details details = 6; + + string client_name = 7; + string client_version = 8; + + HTTP http = 10; + + CachePolicy cache_policy = 18; + + // If this Trace was created by a gateway, this is the query plan, including + // sub-Traces for federated services. Note that the 'root' tree on the + // top-level Trace won't contain any resolvers (though it could contain errors + // that occurred in the gateway itself). + QueryPlanNode query_plan = 26; + + // Was this response served from a full query response cache? (In that case + // the node tree will have no resolvers.) + bool full_query_cache_hit = 20; + + // Was this query specified successfully as a persisted query hash? + bool persisted_query_hit = 21; + // Did this query contain both a full query string and a persisted query hash? + // (This typically means that a previous request was rejected as an unknown + // persisted query.) + bool persisted_query_register = 22; + + // Was this operation registered and a part of the safelist? + bool registered_operation = 24; + + // Was this operation forbidden due to lack of safelisting? + bool forbidden_operation = 25; + + // Some servers don't do field-level instrumentation for every request and assign + // each request a "weight" for each request that they do instrument. When this + // trace is aggregated into field usage stats, it should count as this value + // towards the estimated_execution_count rather than just 1. This value should + // typically be at least 1. + // + // 0 is treated as 1 for backwards compatibility. + double field_execution_weight = 31; + + + + // removed: Node parse = 12; Node validate = 13; + // Id128 server_id = 1; Id128 client_id = 2; + // String client_reference_id = 23; String client_address = 9; + reserved 1, 2, 9, 12, 13, 23; } // The `service` value embedded within the header key is not guaranteed to contain an actual service, @@ -256,88 +293,120 @@ message Trace { // agent_version, etc.) is sent by the Apollo Engine Reporting agent, but we do not currently save that // information to any of our persistent storage. message ReportHeader { - // eg "host-01.example.com" - string hostname = 5; - - // eg "engineproxy 0.1.0" - string agent_version = 6; // required - // eg "prod-4279-20160804T065423Z-5-g3cf0aa8" (taken from `git describe --tags`) - string service_version = 7; - // eg "node v4.6.0" - string runtime_version = 8; - // eg "Linux box 4.6.5-1-ec2 #1 SMP Mon Aug 1 02:31:38 PDT 2016 x86_64 GNU/Linux" - string uname = 9; - // eg "current", "prod" - string schema_tag = 10; - // An id that is used to represent the schema to Apollo Graph Manager - // Using this in place of what used to be schema_hash, since that is no longer - // attached to a schema in the backend. - string executable_schema_id = 11; - - reserved 3; // removed string service = 3; + // eg "mygraph@myvariant" + string graph_ref = 12; + + // eg "host-01.example.com" + string hostname = 5; + + // eg "engineproxy 0.1.0" + string agent_version = 6; // required + // eg "prod-4279-20160804T065423Z-5-g3cf0aa8" (taken from `git describe --tags`) + string service_version = 7; + // eg "node v4.6.0" + string runtime_version = 8; + // eg "Linux box 4.6.5-1-ec2 #1 SMP Mon Aug 1 02:31:38 PDT 2016 x86_64 GNU/Linux" + string uname = 9; + // An id that is used to represent the schema to Apollo Graph Manager + // Using this in place of what used to be schema_hash, since that is no longer + // attached to a schema in the backend. + string executable_schema_id = 11; + + reserved 3; // removed string service = 3; } message PathErrorStats { - map children = 1; - uint64 errors_count = 4; - uint64 requests_with_errors_count = 5; + map children = 1; + uint64 errors_count = 4; + uint64 requests_with_errors_count = 5; } message QueryLatencyStats { - repeated int64 latency_count = 1; - uint64 request_count = 2; - uint64 cache_hits = 3; - uint64 persisted_query_hits = 4; - uint64 persisted_query_misses = 5; - repeated int64 cache_latency_count = 6; - PathErrorStats root_error_stats = 7; - uint64 requests_with_errors_count = 8; - repeated int64 public_cache_ttl_count = 9; - repeated int64 private_cache_ttl_count = 10; - uint64 registered_operation_count = 11; - uint64 forbidden_operation_count = 12; + repeated sint64 latency_count = 13; + uint64 request_count = 2; + uint64 cache_hits = 3; + uint64 persisted_query_hits = 4; + uint64 persisted_query_misses = 5; + repeated sint64 cache_latency_count = 14; + PathErrorStats root_error_stats = 7; + uint64 requests_with_errors_count = 8; + repeated sint64 public_cache_ttl_count = 15; + repeated sint64 private_cache_ttl_count = 16; + uint64 registered_operation_count = 11; + uint64 forbidden_operation_count = 12; + // The number of requests that were executed without field-level + // instrumentation (and thus do not contribute to `observed_execution_count` + // fields on this message's cousin-twice-removed FieldStats). + uint64 requests_without_field_instrumentation = 17; + // 1, 6, 9, and 10 were old int64 histograms + reserved 1, 6, 9, 10; } message StatsContext { - string client_reference_id = 1; - string client_name = 2; - string client_version = 3; + // string client_reference_id = 1; + reserved 1; + string client_name = 2; + string client_version = 3; } message ContextualizedQueryLatencyStats { - QueryLatencyStats query_latency_stats = 1; - StatsContext context = 2; + QueryLatencyStats query_latency_stats = 1; + StatsContext context = 2; } message ContextualizedTypeStats { - StatsContext context = 1; - map per_type_stat = 2; + StatsContext context = 1; + map per_type_stat = 2; } message FieldStat { - string return_type = 3; // required; eg "String!" for User.email:String! - uint64 errors_count = 4; - uint64 count = 5; - uint64 requests_with_errors_count = 6; - repeated int64 latency_count = 8; // Duration histogram; see docs/histograms.md - reserved 1, 2, 7; + string return_type = 3; // required; eg "String!" for User.email:String! + // Number of errors whose path is this field. Note that we assume that error + // tracking does *not* require field-level instrumentation so this *will* + // include errors from requests that don't contribute to the + // `observed_execution_count` field (and does not need to be scaled by + // field_execution_weight). + uint64 errors_count = 4; + // Number of times that the resolver for this field is directly observed being + // executed. + uint64 observed_execution_count = 5; + // Same as `count` but potentially scaled upwards if the server was only + // performing field-level instrumentation on a sampling of operations. For + // example, if the server randomly instruments 1% of requests for this + // operation, this number will be 100 times greater than + // `observed_execution_count`. (When aggregating a Trace into FieldStats, + // this number goes up by the trace's `field_execution_weight` for each + // observed field execution, while `observed_execution_count` above goes + // up by 1.) + uint64 estimated_execution_count = 10; + // Number of times the resolver for this field is executed that resulted in + // at least one error. "Request" is a misnomer here as this corresponds to + // resolver calls, not overall operations. Like `errors_count` above, this + // includes all requests rather than just requests with field-level + // instrumentation. + uint64 requests_with_errors_count = 6; + // Duration histogram for the latency of this field. Note that it is scaled in + // the same way as estimated_execution_count so its "total count" might be + // greater than `observed_execution_count` and may not exactly equal + // `estimated_execution_count` due to rounding. + repeated sint64 latency_count = 9; + reserved 1, 2, 7, 8; } message TypeStat { - // Key is (eg) "email" for User.email:String! - map per_field_stat = 3; - reserved 1, 2; + // Key is (eg) "email" for User.email:String! + map per_field_stat = 3; + reserved 1, 2; } -message Field { - string name = 2; // required; eg "email" for User.email:String! - string return_type = 3; // required; eg "String!" for User.email:String! +message ReferencedFieldsForType { + // Contains (eg) "email" for User.email:String! + repeated string field_names = 1; + // True if this type is an interface. + bool is_interface = 2; } -message Type { - string name = 1; // required; eg "User" for User.email:String! - repeated Field field = 2; -} + // This is the top-level message used by the new traces ingress. This // is designed for the apollo-engine-reporting TypeScript agent and will @@ -348,34 +417,51 @@ message Type { // size has been reached (say, 4MB) or 5-10 seconds has passed is appropriate. // This message used to be know as FullTracesReport, but got renamed since it isn't just for traces anymore message Report { - ReportHeader header = 1; - - // key is statsReportKey (# operationName\nsignature) Note that the nested - // traces will *not* have a signature or details.operationName (because the - // key is adequate). - // - // We also assume that traces don't have - // legacy_per_query_implicit_operation_name, and we don't require them to have - // details.raw_query (which would consume a lot of space and has privacy/data - // access issues, and isn't currently exposed by our app anyway). - map traces_per_query = 5; - - // This is the time that the requests in this trace are considered to have taken place - // If this field is not present the max of the end_time of each trace will be used instead. - // If there are no traces and no end_time present the report will not be able to be processed. - // Note: This will override the end_time from traces. - google.protobuf.Timestamp end_time = 2; // required if no traces in this message + ReportHeader header = 1; + + // key is statsReportKey (# operationName\nsignature) Note that the nested + // traces will *not* have a signature or details.operationName (because the + // key is adequate). + // + // We also assume that traces don't have + // legacy_per_query_implicit_operation_name, and we don't require them to have + // details.raw_query (which would consume a lot of space and has privacy/data + // access issues, and isn't currently exposed by our app anyway). + map traces_per_query = 5; + + // This is the time that the requests in this trace are considered to have taken place + // If this field is not present the max of the end_time of each trace will be used instead. + // If there are no traces and no end_time present the report will not be able to be processed. + // Note: This will override the end_time from traces. + google.protobuf.Timestamp end_time = 2; // required if no traces in this message + + // Total number of operations processed during this period. + uint64 operation_count = 6; } message ContextualizedStats { - StatsContext context = 1; - QueryLatencyStats query_latency_stats = 2; - // Key is type name. - map per_type_stat = 3; + StatsContext context = 1; + QueryLatencyStats query_latency_stats = 2; + // Key is type name. This structure provides data for the count and latency of individual + // field executions and thus only reflects operations for which field-level tracing occurred. + map per_type_stat = 3; + } -// A sequence of traces and stats. An individual trace should either be counted as a stat or trace +// A sequence of traces and stats. An individual operation should either be described as a trace +// or as part of stats, but not both. message TracesAndStats { - repeated Trace trace = 1; - repeated ContextualizedStats stats_with_context = 2; + repeated Trace trace = 1; + repeated ContextualizedStats stats_with_context = 2; + // This describes the fields referenced in the operation. Note that this may + // include fields that don't show up in FieldStats (due to being interface fields, + // being nested under null fields or empty lists or non-matching fragments or + // `@include` or `@skip`, etc). It also may be missing fields that show up in FieldStats + // (as FieldStats will include the concrete object type for fields referenced + // via an interface type). + map referenced_fields_by_type = 4; + // This field is used to validate that the algorithm used to construct `stats_with_context` + // matches similar algorithms in Apollo's servers. It is otherwise ignored and should not + // be included in reports. + repeated Trace internal_traces_contributing_to_stats = 3; } diff --git a/lib/apollo-studio-tracing/proto/apollo_pb.rb b/lib/apollo-studio-tracing/proto/apollo_pb.rb index 59fe76ea9..82f539b2a 100644 --- a/lib/apollo-studio-tracing/proto/apollo_pb.rb +++ b/lib/apollo-studio-tracing/proto/apollo_pb.rb @@ -4,6 +4,7 @@ require 'google/protobuf' require 'google/protobuf/timestamp_pb' + Google::Protobuf::DescriptorPool.generated_pool.build do add_file("apollo.proto", :syntax => :proto3) do add_message "mdg.engine.proto.Trace" do @@ -11,12 +12,13 @@ optional :end_time, :message, 3, "google.protobuf.Timestamp" optional :duration_ns, :uint64, 11 optional :root, :message, 14, "mdg.engine.proto.Trace.Node" + optional :is_incomplete, :bool, 33 optional :signature, :string, 19 + optional :unexecutedOperationBody, :string, 27 + optional :unexecutedOperationName, :string, 28 optional :details, :message, 6, "mdg.engine.proto.Trace.Details" optional :client_name, :string, 7 optional :client_version, :string, 8 - optional :client_address, :string, 9 - optional :client_reference_id, :string, 23 optional :http, :message, 10, "mdg.engine.proto.Trace.HTTP" optional :cache_policy, :message, 18, "mdg.engine.proto.Trace.CachePolicy" optional :query_plan, :message, 26, "mdg.engine.proto.Trace.QueryPlanNode" @@ -25,7 +27,7 @@ optional :persisted_query_register, :bool, 22 optional :registered_operation, :bool, 24 optional :forbidden_operation, :bool, 25 - optional :legacy_signature_needs_resigning, :string, 5 + optional :field_execution_weight, :double, 31 end add_message "mdg.engine.proto.Trace.CachePolicy" do optional :scope, :enum, 1, "mdg.engine.proto.Trace.CachePolicy.Scope" @@ -38,7 +40,6 @@ end add_message "mdg.engine.proto.Trace.Details" do map :variables_json, :string, :string, 4 - map :deprecated_variables, :string, :bytes, 1 optional :operation_name, :string, 3 end add_message "mdg.engine.proto.Trace.Error" do @@ -49,13 +50,9 @@ end add_message "mdg.engine.proto.Trace.HTTP" do optional :method, :enum, 1, "mdg.engine.proto.Trace.HTTP.Method" - optional :host, :string, 2 - optional :path, :string, 3 map :request_headers, :string, :message, 4, "mdg.engine.proto.Trace.HTTP.Values" map :response_headers, :string, :message, 5, "mdg.engine.proto.Trace.HTTP.Values" optional :status_code, :uint32, 6 - optional :secure, :bool, 8 - optional :protocol, :string, 9 end add_message "mdg.engine.proto.Trace.HTTP.Values" do repeated :value, :string, 1 @@ -96,6 +93,8 @@ optional :parallel, :message, 2, "mdg.engine.proto.Trace.QueryPlanNode.ParallelNode" optional :fetch, :message, 3, "mdg.engine.proto.Trace.QueryPlanNode.FetchNode" optional :flatten, :message, 4, "mdg.engine.proto.Trace.QueryPlanNode.FlattenNode" + optional :defer, :message, 5, "mdg.engine.proto.Trace.QueryPlanNode.DeferNode" + optional :condition, :message, 6, "mdg.engine.proto.Trace.QueryPlanNode.ConditionNode" end end add_message "mdg.engine.proto.Trace.QueryPlanNode.SequenceNode" do @@ -116,6 +115,28 @@ repeated :response_path, :message, 1, "mdg.engine.proto.Trace.QueryPlanNode.ResponsePathElement" optional :node, :message, 2, "mdg.engine.proto.Trace.QueryPlanNode" end + add_message "mdg.engine.proto.Trace.QueryPlanNode.DeferNode" do + optional :primary, :message, 1, "mdg.engine.proto.Trace.QueryPlanNode.DeferNodePrimary" + repeated :deferred, :message, 2, "mdg.engine.proto.Trace.QueryPlanNode.DeferredNode" + end + add_message "mdg.engine.proto.Trace.QueryPlanNode.ConditionNode" do + optional :condition, :string, 1 + optional :if_clause, :message, 2, "mdg.engine.proto.Trace.QueryPlanNode" + optional :else_clause, :message, 3, "mdg.engine.proto.Trace.QueryPlanNode" + end + add_message "mdg.engine.proto.Trace.QueryPlanNode.DeferNodePrimary" do + optional :node, :message, 1, "mdg.engine.proto.Trace.QueryPlanNode" + end + add_message "mdg.engine.proto.Trace.QueryPlanNode.DeferredNode" do + repeated :depends, :message, 1, "mdg.engine.proto.Trace.QueryPlanNode.DeferredNodeDepends" + optional :label, :string, 2 + optional :path, :message, 3, "mdg.engine.proto.Trace.QueryPlanNode.ResponsePathElement" + optional :node, :message, 4, "mdg.engine.proto.Trace.QueryPlanNode" + end + add_message "mdg.engine.proto.Trace.QueryPlanNode.DeferredNodeDepends" do + optional :id, :string, 1 + optional :defer_label, :string, 2 + end add_message "mdg.engine.proto.Trace.QueryPlanNode.ResponsePathElement" do oneof :id do optional :field_name, :string, 1 @@ -123,12 +144,12 @@ end end add_message "mdg.engine.proto.ReportHeader" do + optional :graph_ref, :string, 12 optional :hostname, :string, 5 optional :agent_version, :string, 6 optional :service_version, :string, 7 optional :runtime_version, :string, 8 optional :uname, :string, 9 - optional :schema_tag, :string, 10 optional :executable_schema_id, :string, 11 end add_message "mdg.engine.proto.PathErrorStats" do @@ -137,21 +158,21 @@ optional :requests_with_errors_count, :uint64, 5 end add_message "mdg.engine.proto.QueryLatencyStats" do - repeated :latency_count, :int64, 1 + repeated :latency_count, :sint64, 13 optional :request_count, :uint64, 2 optional :cache_hits, :uint64, 3 optional :persisted_query_hits, :uint64, 4 optional :persisted_query_misses, :uint64, 5 - repeated :cache_latency_count, :int64, 6 + repeated :cache_latency_count, :sint64, 14 optional :root_error_stats, :message, 7, "mdg.engine.proto.PathErrorStats" optional :requests_with_errors_count, :uint64, 8 - repeated :public_cache_ttl_count, :int64, 9 - repeated :private_cache_ttl_count, :int64, 10 + repeated :public_cache_ttl_count, :sint64, 15 + repeated :private_cache_ttl_count, :sint64, 16 optional :registered_operation_count, :uint64, 11 optional :forbidden_operation_count, :uint64, 12 + optional :requests_without_field_instrumentation, :uint64, 17 end add_message "mdg.engine.proto.StatsContext" do - optional :client_reference_id, :string, 1 optional :client_name, :string, 2 optional :client_version, :string, 3 end @@ -166,25 +187,23 @@ add_message "mdg.engine.proto.FieldStat" do optional :return_type, :string, 3 optional :errors_count, :uint64, 4 - optional :count, :uint64, 5 + optional :observed_execution_count, :uint64, 5 + optional :estimated_execution_count, :uint64, 10 optional :requests_with_errors_count, :uint64, 6 - repeated :latency_count, :int64, 8 + repeated :latency_count, :sint64, 9 end add_message "mdg.engine.proto.TypeStat" do map :per_field_stat, :string, :message, 3, "mdg.engine.proto.FieldStat" end - add_message "mdg.engine.proto.Field" do - optional :name, :string, 2 - optional :return_type, :string, 3 - end - add_message "mdg.engine.proto.Type" do - optional :name, :string, 1 - repeated :field, :message, 2, "mdg.engine.proto.Field" + add_message "mdg.engine.proto.ReferencedFieldsForType" do + repeated :field_names, :string, 1 + optional :is_interface, :bool, 2 end add_message "mdg.engine.proto.Report" do optional :header, :message, 1, "mdg.engine.proto.ReportHeader" map :traces_per_query, :string, :message, 5, "mdg.engine.proto.TracesAndStats" optional :end_time, :message, 2, "google.protobuf.Timestamp" + optional :operation_count, :uint64, 6 end add_message "mdg.engine.proto.ContextualizedStats" do optional :context, :message, 1, "mdg.engine.proto.StatsContext" @@ -194,6 +213,8 @@ add_message "mdg.engine.proto.TracesAndStats" do repeated :trace, :message, 1, "mdg.engine.proto.Trace" repeated :stats_with_context, :message, 2, "mdg.engine.proto.ContextualizedStats" + map :referenced_fields_by_type, :string, :message, 4, "mdg.engine.proto.ReferencedFieldsForType" + repeated :internal_traces_contributing_to_stats, :message, 3, "mdg.engine.proto.Trace" end end end @@ -216,6 +237,11 @@ module Proto Trace::QueryPlanNode::ParallelNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.ParallelNode").msgclass Trace::QueryPlanNode::FetchNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.FetchNode").msgclass Trace::QueryPlanNode::FlattenNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.FlattenNode").msgclass + Trace::QueryPlanNode::DeferNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.DeferNode").msgclass + Trace::QueryPlanNode::ConditionNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.ConditionNode").msgclass + Trace::QueryPlanNode::DeferNodePrimary = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.DeferNodePrimary").msgclass + Trace::QueryPlanNode::DeferredNode = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.DeferredNode").msgclass + Trace::QueryPlanNode::DeferredNodeDepends = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.DeferredNodeDepends").msgclass Trace::QueryPlanNode::ResponsePathElement = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Trace.QueryPlanNode.ResponsePathElement").msgclass ReportHeader = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.ReportHeader").msgclass PathErrorStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.PathErrorStats").msgclass @@ -225,8 +251,7 @@ module Proto ContextualizedTypeStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.ContextualizedTypeStats").msgclass FieldStat = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.FieldStat").msgclass TypeStat = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.TypeStat").msgclass - Field = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Field").msgclass - Type = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Type").msgclass + ReferencedFieldsForType = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.ReferencedFieldsForType").msgclass Report = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.Report").msgclass ContextualizedStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.ContextualizedStats").msgclass TracesAndStats = ::Google::Protobuf::DescriptorPool.generated_pool.lookup("mdg.engine.proto.TracesAndStats").msgclass diff --git a/lib/apollo-studio-tracing/tracer.rb b/lib/apollo-studio-tracing/tracer.rb index d9a643973..17a3a23ce 100644 --- a/lib/apollo-studio-tracing/tracer.rb +++ b/lib/apollo-studio-tracing/tracer.rb @@ -47,7 +47,7 @@ class Tracer attr_reader :trace_prepare, :query_signature def initialize( - schema_tag: nil, + graph_ref: nil, executable_schema_id: nil, service_version: nil, trace_prepare: nil, @@ -69,7 +69,7 @@ def initialize( service_version: service_version, runtime_version: RUBY_DESCRIPTION, uname: uname, - schema_tag: schema_tag || ENV.fetch('ENGINE_SCHEMA_TAG', 'current'), + graph_ref: graph_ref || ENV.fetch('ENGINE_SCHEMA_TAG', 'current'), executable_schema_id: executable_schema_id, ) @trace_channel = ApolloStudioTracing::TraceChannel.new( diff --git a/spec/apollo-studio-tracing/tracing_spec.rb b/spec/apollo-studio-tracing/tracing_spec.rb index 5f91bc22c..2fefecf47 100644 --- a/spec/apollo-studio-tracing/tracing_spec.rb +++ b/spec/apollo-studio-tracing/tracing_spec.rb @@ -41,7 +41,7 @@ def clear_reports service_version: '1', runtime_version: '1', uname: 'test', - schema_tag: 'test', + graph_ref: 'test', executable_schema_id: 'test', ) end @@ -649,7 +649,7 @@ def items service_version: '1', runtime_version: '1', uname: 'test', - schema_tag: 'test', + graph_ref: 'test', executable_schema_id: 'test', ) end