Skip to content

Commit

Permalink
[BE-308] Federated metrics support
Browse files Browse the repository at this point in the history
Augments apollo-engine-reporting and @apollo/gateway to provide federated
metrics to Engine from the gateway.

Extend trace reports to contain a QueryPlan tree when traces are received from
a gateway, mimicking the query plan that the gateway makes internally.
FetchNodes on the tree correspond to GraphQL operations executed against
federated services; each FetchNode contains its own tree of Trace.Nodes.

- apollo-engine-reporting
  - Factor out core tree building logic to EngineReportingTreeBuilder class
  - New EngineFederatedTracingExtension adds a base64-encoded protobuf of a
    Trace (with just the node tree and top level timestamps) to an extension
    on federated service responses (if requested to do so via an HTTP header
    from gateway).
  - Implement new originalFieldName trace field (similar goal to #2401, BE-158).
  - If @apollo/gateway built us a query plan trace, add it to the trace

- @apollo/gateway
  - Builds up a protobuf representation of the query plan, including sub-traces
    from federated services on FetchNodes

- apollo-engine-reporting-protobuf
  - Copy reports.proto from internal repo
  - Set editorconfig settings to match internal repo
  - Always use normal numbers, not long.js objects

- apollo-server-core
  - Thread some data through between reporting and gateway modules.

- apollo-server-integration-testsuite
  - Clean up some ts warnings

TODO record __resolveReference as multiple nodes?
TODO is it actually OK to include the operation text in fetch trace nodes?
TODO more tests, especially of error cases
TODO internal docs (eg FIXME comment in reports.proto)
TODO external docs
  • Loading branch information
glasser committed Jul 3, 2019
1 parent a060162 commit a9d4ece
Show file tree
Hide file tree
Showing 20 changed files with 1,093 additions and 320 deletions.
143 changes: 75 additions & 68 deletions package-lock.json

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@
"@types/memcached": "2.2.6",
"@types/micro": "7.3.3",
"@types/multer": "1.3.7",
"@types/node": "8.10.49",
"@types/nock": "9.1.3",
"@types/node": "8.10.49",
"@types/node-fetch": "2.3.2",
"@types/parseurl": "1.3.1",
"@types/redis": "2.8.12",
Expand All @@ -98,6 +98,7 @@
"@types/test-listen": "1.1.0",
"@types/type-is": "1.6.2",
"@types/ws": "6.0.1",
"@types/yup": "0.26.18",
"apollo-fetch": "0.7.0",
"apollo-link": "1.2.12",
"apollo-link-http": "1.5.15",
Expand Down Expand Up @@ -131,7 +132,7 @@
"meteor-promise": "0.8.7",
"mock-req": "0.2.0",
"multer": "1.4.1",
"nock": "^10.0.6",
"nock": "10.0.6",
"node-fetch": "2.3.0",
"prettier": "1.18.2",
"prettier-check": "2.0.0",
Expand Down
12 changes: 12 additions & 0 deletions packages/apollo-engine-reporting-protobuf/src/.editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# reports.proto is copied from an internal Apollo repository which applies these
# editorconfig standards.

root = true

[reports.proto]
charset = utf-8
end_of_line = lf
indent_size = 2
indent_style = tab
insert_final_newline = true
trim_trailing_whitespace = true
7 changes: 7 additions & 0 deletions packages/apollo-engine-reporting-protobuf/src/index.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
const protobuf = require('./protobuf');
const protobufJS = require('protobufjs/minimal');

// Remove Long support. Our uint64s tend to be small (less
// than 104 days).
// https://github.com/protobufjs/protobuf.js/issues/1253
protobufJS.util.Long = undefined;
protobufJS.configure();

// Override the generated protobuf Traces.encode function so that it will look
// for Traces that are already encoded to Buffer as well as unencoded
Expand Down
109 changes: 89 additions & 20 deletions packages/apollo-engine-reporting-protobuf/src/reports.proto
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,26 @@ message Trace {
uint32 column = 2;
}

// We store information on each resolver execution as a Node on a tree.
// The structure of the tree corresponds to the structure of the GraphQL
// response; it does not indicate the order in which resolvers were
// invoked. Note that nodes representing indexes (and the root node)
// don't contain all Node fields (eg types and times).
message Node {
// The name of the field (for Nodes representing a resolver call) or
// the index in a list (for intermediate Nodes representing elements of
// a list). Note that nodes representing indexes (and the root node)
// don't contain all Node fields (eg types and times).
// The name of the field (for Nodes representing a resolver call) or the
// index in a list (for intermediate Nodes representing elements of a list).
// field_name is the name of the field as it appears in the GraphQL
// response: ie, it may be an alias. (In that case, the original_field_name
// field holds the actual field name from the schema.) In any context where
// we're building up a path, we use the response_name rather than the
// original_field_name.
oneof id {
string field_name = 1;
string response_name = 1;
uint32 index = 2;
}

string original_field_name = 14;

// The field's return type; e.g. "String!" for User.email:String!
string type = 3;

Expand All @@ -109,18 +119,65 @@ message Trace {
reserved 4;
}

// FIXME: document all new fields
message QueryPlanNode {
message SequenceNode {
repeated QueryPlanNode nodes = 1;
}
message ParallelNode {
repeated QueryPlanNode nodes = 1;
}
message FetchNode {
string serviceName = 1;
string operation = 2;
// FIXME: do we want selectionSet to be structured instead of a string?
// FIXME: do we need variableUsages?
// FIXME: do we need requires?

// This Trace only contains start_time, end_time, duration_ns, and root;
// all timings were calculated on the federated service.
Trace trace = 3;

// relative to the outer trace's start_time, in ns, measured in the gateway.
uint64 sent_time_offset = 4;

// Wallclock times measured in the gateway for when this operation was
// sent and recieved.
google.protobuf.Timestamp sent_time = 5;
google.protobuf.Timestamp received_time = 6;
}
message FlattenNode {
repeated ResponsePathElement response_path = 1;
QueryPlanNode node = 2;
}
message ResponsePathElement {
oneof id {
string field_name = 1;
uint32 index = 2;
}
}
oneof node {
SequenceNode sequence = 1;
ParallelNode parallel = 2;
FetchNode fetch = 3;
FlattenNode flatten = 4;
}
}

// Wallclock time when the trace began.
google.protobuf.Timestamp start_time = 4; // required
// Wallclock time when the trace ended.
google.protobuf.Timestamp end_time = 3; // required
// High precision duration of the trace; may not equal end_time-start_time
// (eg, if your machine's clock changed during the trace).
uint64 duration_ns = 11; // required
// A tree containing information about all resolvers run directly by this
// service, including errors.
Node root = 14;

// These fields are specific to engineproxy.
google.protobuf.Timestamp origin_reported_start_time = 15;
google.protobuf.Timestamp origin_reported_end_time = 16;
uint64 origin_reported_duration_ns = 17;
// -------------------------------------------------------------------------
// Fields below this line are *not* included in federated traces (the traces
// sent from federated services to the gateway).

// In addition to details.raw_query, we include a "signature" of the query,
// which can be normalized: for example, you may want to discard aliases, drop
Expand All @@ -134,14 +191,6 @@ message Trace {
// instead.
string signature = 19;

// Older agents (eg the Go engineproxy) relied to some degree on the Engine
// backend to run their own semi-compatible implementation of a specific
// variant of query signatures. The backend does not do this for new agents (which
// set the above 'signature' field). It used to still "re-sign" signatures
// from engineproxy, but we've now simplified the backend to no longer do this.
// Deprecated and ignored in FullTracesReports.
string legacy_signature_needs_resigning = 5;

Details details = 6;

// Note: engineproxy always sets client_name, client_version, and client_address to "none".
Expand All @@ -155,7 +204,11 @@ message Trace {

CachePolicy cache_policy = 18;

Node root = 14;
// If this Trace was created by a gateway, this is the query plan, including
// sub-Traces for federated services. Note that the 'root' tree on the
// top-level Trace won't contain any resolvers (though it could contain errors
// that occurred in the gateway itself).
QueryPlanNode query_plan = 26;

// Was this response served from a full query response cache? (In that case
// the node tree will have no resolvers.)
Expand All @@ -174,6 +227,21 @@ message Trace {
// Was this operation forbidden due to lack of safelisting?
bool forbidden_operation = 25;

// --------------------------------------------------------------
// Fields below this line are only set by the old Go engineproxy.
google.protobuf.Timestamp origin_reported_start_time = 15;
google.protobuf.Timestamp origin_reported_end_time = 16;
uint64 origin_reported_duration_ns = 17;

// Older agents (eg the Go engineproxy) relied to some degree on the Engine
// backend to run their own semi-compatible implementation of a specific
// variant of query signatures. The backend does not do this for new agents (which
// set the above 'signature' field). It used to still "re-sign" signatures
// from engineproxy, but we've now simplified the backend to no longer do this.
// Deprecated and ignored in FullTracesReports.
string legacy_signature_needs_resigning = 5;


// removed: Node parse = 12; Node validate = 13;
// Id128 server_id = 1; Id128 client_id = 2;
reserved 12, 13, 1, 2;
Expand Down Expand Up @@ -280,7 +348,7 @@ message FieldStat {

message TypeStat {
string name = 1; // deprecated; only set when stored in QueryStats.per_type
repeated FieldStat field = 2; // deprecated; use per_field_stat instead
repeated FieldStat field = 2; // deprecated; use per_field_stat instead
// Key is (eg) "email" for User.email:String!
map<string, FieldStat> per_field_stat = 3;
}
Expand Down Expand Up @@ -358,6 +426,7 @@ message StatsReport {
// FullTracesReports.
uint64 realtime_duration = 10;


// Maps from query descriptor to QueryStats. Required unless
// legacy_per_query_missing_operation_name is set. The keys are strings of the
// form `# operationName\nsignature` (literal hash and space), with
Expand Down Expand Up @@ -405,4 +474,4 @@ message Traces {
message TraceV1 {
ReportHeader header = 1;
Trace trace = 2;
}
}
Loading

0 comments on commit a9d4ece

Please sign in to comment.