-
-
Notifications
You must be signed in to change notification settings - Fork 439
/
reports.proto
641 lines (532 loc) · 25.2 KB
/
reports.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
syntax = "proto3";
option php_namespace = "Nuwave\\Lighthouse\\Tracing\\FederatedTracing\\Proto";
option php_metadata_namespace = "Nuwave\\Lighthouse\\Tracing\\FederatedTracing\\Proto\\Metadata";
// Note: The Apollo usage reporting API is subject to change. We strongly encourage developers to contact Apollo support
// at [email protected] to discuss their use case prior to building their own reporting agent using this module.
import "google/protobuf/timestamp.proto";
message Trace {
message CachePolicy {
enum Scope {
UNKNOWN = 0;
PUBLIC = 1;
PRIVATE = 2;
}
Scope scope = 1;
int64 max_age_ns = 2; // use 0 for absent, -1 for 0
}
message Details {
// The variables associated with this query (unless the reporting agent is
// configured to keep them all private). Values are JSON: ie, strings are
// enclosed in double quotes, etc. The value of a private variable is
// the empty string.
map<string, string> variables_json = 4;
// This is deprecated and only used for legacy applications
// don't include this in traces inside a FullTracesReport; the operation
// name for these traces comes from the key of the traces_per_query map.
string operation_name = 3;
}
message Error {
string message = 1; // required
repeated Location location = 2;
uint64 time_ns = 3;
string json = 4;
}
message HTTP {
message Values {
repeated string value = 1;
}
enum Method {
UNKNOWN = 0;
OPTIONS = 1;
GET = 2;
HEAD = 3;
POST = 4;
PUT = 5;
DELETE = 6;
TRACE = 7;
CONNECT = 8;
PATCH = 9;
}
Method method = 1;
// Should exclude manual blacklist ("Auth" by default)
map<string, Values> request_headers = 4;
map<string, Values> response_headers = 5;
uint32 status_code = 6;
reserved 2, 3, 8, 9;
}
message Location {
uint32 line = 1;
uint32 column = 2;
}
// We store information on each resolver execution as a Node on a tree.
// The structure of the tree corresponds to the structure of the GraphQL
// response; it does not indicate the order in which resolvers were
// invoked. Note that nodes representing indexes (and the root node)
// don't contain all Node fields (eg types and times).
message Node {
// The name of the field (for Nodes representing a resolver call) or the
// index in a list (for intermediate Nodes representing elements of a list).
// field_name is the name of the field as it appears in the GraphQL
// response: ie, it may be an alias. (In that case, the original_field_name
// field holds the actual field name from the schema.) In any context where
// we're building up a path, we use the response_name rather than the
// original_field_name.
oneof id {
string response_name = 1;
uint32 index = 2;
}
string original_field_name = 14;
// The field's return type; e.g. "String!" for User.email:String!
string type = 3;
// The field's parent type; e.g. "User" for User.email:String!
string parent_type = 13;
CachePolicy cache_policy = 5;
// relative to the trace's start_time, in ns
uint64 start_time = 8;
// relative to the trace's start_time, in ns
uint64 end_time = 9;
repeated Error error = 11;
repeated Node child = 12;
reserved 4;
}
// represents a node in the query plan, under which there is a trace tree for that service fetch.
// In particular, each fetch node represents a call to an implementing service, and calls to implementing
// services may not be unique. See https://github.com/apollographql/federation/blob/main/query-planner-js/src/QueryPlan.ts
// for more information and details.
message QueryPlanNode {
// This represents a set of nodes to be executed sequentially by the Router/Gateway executor
message SequenceNode {
repeated QueryPlanNode nodes = 1;
}
// This represents a set of nodes to be executed in parallel by the Router/Gateway executor
message ParallelNode {
repeated QueryPlanNode nodes = 1;
}
// This represents a node to send an operation to an implementing service
message FetchNode {
// XXX When we want to include more details about the sub-operation that was
// executed against this service, we should include that here in each fetch node.
// This might include an operation signature, requires directive, reference resolutions, etc.
string service_name = 1;
bool trace_parsing_failed = 2;
// This Trace only contains start_time, end_time, duration_ns, and root;
// all timings were calculated **on the subgraph**, and clock skew
// will be handled by the ingress server.
Trace trace = 3;
// relative to the outer trace's start_time, in ns, measured in the Router/Gateway.
uint64 sent_time_offset = 4;
// Wallclock times measured in the Router/Gateway for when this operation was
// sent and received.
google.protobuf.Timestamp sent_time = 5;
google.protobuf.Timestamp received_time = 6;
}
// This node represents a way to reach into the response path and attach related entities.
// XXX Flatten is really not the right name and this node may be renamed in the query planner.
message FlattenNode {
repeated ResponsePathElement response_path = 1;
QueryPlanNode node = 2;
}
// A `DeferNode` corresponds to one or more @defer at the same level of "nestedness" in the planned query.
message DeferNode {
DeferNodePrimary primary = 1;
repeated DeferredNode deferred = 2;
}
message ConditionNode {
string condition = 1;
QueryPlanNode if_clause = 2;
QueryPlanNode else_clause = 3;
}
message DeferNodePrimary {
QueryPlanNode node = 1;
}
message DeferredNode {
repeated DeferredNodeDepends depends = 1;
string label = 2;
repeated ResponsePathElement path = 3;
QueryPlanNode node = 4;
}
message DeferredNodeDepends {
string id = 1;
string defer_label = 2;
}
message ResponsePathElement {
oneof id {
string field_name = 1;
uint32 index = 2;
}
}
oneof node {
SequenceNode sequence = 1;
ParallelNode parallel = 2;
FetchNode fetch = 3;
FlattenNode flatten = 4;
DeferNode defer = 5;
ConditionNode condition = 6;
}
}
// The cost of the operation
message Limits {
// The result of the operation.
string result = 1;
// The strategy used in cost calculations.
string strategy = 2;
// The estimated cost as calculated via the strategy specified in strategy
uint64 cost_estimated = 3;
// The actual cost using the strategy specified in strategy
uint64 cost_actual = 4;
// The depth of the query
uint64 depth = 5;
// The height of the query
uint64 height = 6;
// The number of aliases in the query
uint64 alias_count = 7;
// The number of root fields in the query
uint64 root_field_count = 8;
}
// Wallclock time when the trace began.
google.protobuf.Timestamp start_time = 4; // required
// Wallclock time when the trace ended.
google.protobuf.Timestamp end_time = 3; // required
// High precision duration of the trace; may not equal end_time-start_time
// (eg, if your machine's clock changed during the trace).
uint64 duration_ns = 11; // required
// A tree containing information about all resolvers run directly by this
// service, including errors.
Node root = 14;
// If this is true, the trace is potentially missing some nodes that were
// present on the query plan. This can happen if the trace span buffer used
// in the Router fills up and some spans have to be dropped. In these cases
// the overall trace timing will still be correct, but the trace data could
// be missing some referenced or executed fields, and some nodes may be
// missing. If this is true we should display a warning to the user when they
// view the trace in Explorer.
bool is_incomplete = 33;
// -------------------------------------------------------------------------
// Fields below this line are *not* included in inline traces (the traces
// sent from subgraphs to the Router/Gateway).
// In addition to details.raw_query, we include a "signature" of the query,
// which can be normalized: for example, you may want to discard aliases, drop
// unused operations and fragments, sort fields, etc. The most important thing
// here is that the signature match the signature in StatsReports. In
// StatsReports signatures show up as the key in the per_query map (with the
// operation name prepended). The signature should be a valid GraphQL query.
// All traces must have a signature; if this Trace is in a FullTracesReport
// that signature is in the key of traces_per_query rather than in this field.
// Engineproxy provides the signature in legacy_signature_needs_resigning
// instead.
string signature = 19;
// Optional: when GraphQL parsing or validation against the GraphQL schema fails, these fields
// can include reference to the operation being sent for users to dig into the set of operations
// that are failing validation.
string unexecutedOperationBody = 27;
string unexecutedOperationName = 28;
Details details = 6;
string client_name = 7;
string client_version = 8;
string operation_type = 35;
string operation_subtype = 36;
HTTP http = 10;
CachePolicy cache_policy = 18;
// If this Trace was created by a Router/Gateway, this is the query plan, including
// sub-Traces for subgraphs. Note that the 'root' tree on the
// top-level Trace won't contain any resolvers (though it could contain errors
// that occurred in the Router/Gateway itself).
QueryPlanNode query_plan = 26;
// Was this response served from a full query response cache? (In that case
// the node tree will have no resolvers.)
bool full_query_cache_hit = 20;
// Was this query specified successfully as a persisted query hash?
bool persisted_query_hit = 21;
// Did this query contain both a full query string and a persisted query hash?
// (This typically means that a previous request was rejected as an unknown
// persisted query.)
bool persisted_query_register = 22;
// Was this operation registered and a part of the safelist?
bool registered_operation = 24;
// Was this operation forbidden due to lack of safelisting?
bool forbidden_operation = 25;
// Some servers don't do field-level instrumentation for every request and assign
// each request a "weight" for each request that they do instrument. When this
// trace is aggregated into field usage stats, it should count as this value
// towards the estimated_execution_count rather than just 1. This value should
// typically be at least 1.
//
// 0 is treated as 1 for backwards compatibility.
double field_execution_weight = 31;
// The limits information of the query.
Limits limits = 32;
// removed: Node parse = 12; Node validate = 13;
// Id128 server_id = 1; Id128 client_id = 2;
// String client_reference_id = 23; String client_address = 9;
// 29 and 30 were internal fields.
reserved 1, 2, 9, 12, 13, 23, 29, 30;
}
// The `service` value embedded within the header key is not guaranteed to contain an actual service,
// and, in most cases, the service information is trusted to come from upstream processing. If the
// service _is_ specified in this header, then it is checked to match the context that is reporting it.
// Otherwise, the service information is deduced from the token context of the reporter and then sent
// along via other mechanisms (in Kafka, the `ReportKafkaKey). The other information (hostname,
// agent_version, etc.) is sent by the Apollo Engine Reporting agent, but we do not currently save that
// information to any of our persistent storage.
message ReportHeader {
// eg "mygraph@myvariant"
string graph_ref = 12;
// eg "host-01.example.com"
string hostname = 5;
// eg "engineproxy 0.1.0"
string agent_version = 6; // required
// eg "prod-4279-20160804T065423Z-5-g3cf0aa8" (taken from `git describe --tags`)
string service_version = 7;
// eg "node v4.6.0"
string runtime_version = 8;
// eg "Linux box 4.6.5-1-ec2 #1 SMP Mon Aug 1 02:31:38 PDT 2016 x86_64 GNU/Linux"
string uname = 9;
// An id that is used to represent the schema to Apollo Graph Manager
// Using this in place of what used to be schema_hash, since that is no longer
// attached to a schema in the backend.
string executable_schema_id = 11;
reserved 3; // removed string service = 3;
}
message PathErrorStats {
map<string, PathErrorStats> children = 1;
uint64 errors_count = 4;
uint64 requests_with_errors_count = 5;
}
message QueryLatencyStats {
// The latencies of all non-cached requests, so the sum of all counts should equal request_count minus cache_hits.
// This is an array of counts within a logarithmic range of 384 latency buckets. To calculate the bucket from a
// microsecond, use the formula: max(0, min(ceil(ln(x)/ln(1.1)), 383)). So for example, 323424 microseconds (323.424
// ms) corresponds to bucket 134. Buckets can be skipped using a negative number, so one request on that bucket could
// be represented as [-134, 1] (skip buckets numbered 0 to 133 and set a 1 in bucket 134).
repeated sint64 latency_count = 13;
// The total number of requests, including both cache hits and cache misses
uint64 request_count = 2;
// The total number of requests that were cache hits. Each request should be represented in cache_latency_count
uint64 cache_hits = 3;
uint64 persisted_query_hits = 4;
uint64 persisted_query_misses = 5;
// This array includes the latency buckets for all operations included in cache_hits
// See comment on latency_count for details.
repeated sint64 cache_latency_count = 14;
// Paths and counts for each error. The total number of requests with errors within this object should be the same as
// requests_with_errors_count below.
PathErrorStats root_error_stats = 7;
// Total number of requests that contained at least one error
uint64 requests_with_errors_count = 8;
repeated sint64 public_cache_ttl_count = 15;
repeated sint64 private_cache_ttl_count = 16;
uint64 registered_operation_count = 11;
uint64 forbidden_operation_count = 12;
// The number of requests that were executed without field-level
// instrumentation (and thus do not contribute to `observed_execution_count`
// fields on this message's cousin-twice-removed FieldStats).
uint64 requests_without_field_instrumentation = 17;
// 1, 6, 9, and 10 were old int64 histograms
reserved 1, 6, 9, 10;
}
// Stats on the query that can be populated by the gateway or router.
message LimitsStats {
// The strategy used in cost calculations.
string strategy = 1;
// The estimated cost as calculated via the strategy specified in stats context
// The reason that this is a histogram rather than fixed cost is that it can be affected by paging variables.
repeated sint64 cost_estimated = 2;
// The maximum estimated cost of the query
uint64 max_cost_estimated = 3;
// The actual cost using the strategy specified in stats context
repeated sint64 cost_actual = 4;
// The maximum estimated cost of the query
uint64 max_cost_actual = 5;
// The total depth of the query
uint64 depth = 6;
// The height of the query
uint64 height = 7;
// The number of aliases in the query
uint64 alias_count = 8;
// The number of root fields in the query
uint64 root_field_count = 9;
}
// The context around a block of stats and traces indicating from which client the operation was executed and its
// operation type. Operation type and subtype are only used by Apollo Router.
message StatsContext {
reserved 1; // string client_reference_id = 1;
string client_name = 2;
string client_version = 3;
string operation_type = 4;
string operation_subtype = 5;
// The result of the operation. Either OK or the error code that caused the operation to fail.
// This will not contain all errors from a query, only the primary reason the operation failed. e.g. a limits failure or an auth failure.
string result = 6;
}
message ContextualizedQueryLatencyStats {
QueryLatencyStats query_latency_stats = 1;
StatsContext context = 2;
}
message ContextualizedTypeStats {
StatsContext context = 1;
map<string, TypeStat> per_type_stat = 2;
}
message FieldStat {
string return_type = 3; // required; eg "String!" for User.email:String!
// Number of errors whose path is this field. Note that we assume that error
// tracking does *not* require field-level instrumentation so this *will*
// include errors from requests that don't contribute to the
// `observed_execution_count` field (and does not need to be scaled by
// field_execution_weight).
uint64 errors_count = 4;
// Number of times that the resolver for this field is directly observed being
// executed.
uint64 observed_execution_count = 5;
// Same as `observed_execution_count` but potentially scaled upwards if the server was only
// performing field-level instrumentation on a sampling of operations. For
// example, if the server randomly instruments 1% of requests for this
// operation, this number will be 100 times greater than
// `observed_execution_count`. (When aggregating a Trace into FieldStats,
// this number goes up by the trace's `field_execution_weight` for each
// observed field execution, while `observed_execution_count` above goes
// up by 1.)
uint64 estimated_execution_count = 10;
// Number of times the resolver for this field is executed that resulted in
// at least one error. "Request" is a misnomer here as this corresponds to
// resolver calls, not overall operations. Like `errors_count` above, this
// includes all requests rather than just requests with field-level
// instrumentation.
uint64 requests_with_errors_count = 6;
// Duration histogram for the latency of this field. Note that it is scaled in
// the same way as estimated_execution_count so its "total count" might be
// greater than `observed_execution_count` and may not exactly equal
// `estimated_execution_count` due to rounding.
// See comment on QueryLatencyStats's latency_count for details.
repeated sint64 latency_count = 9;
reserved 1, 2, 7, 8;
}
// As FieldStat only gets returned for FTV1 payloads this is a separate message that can be used to collect stats in the router or gateway obtained directly from the request schema and response.
message LocalFieldStat {
string return_type = 1; // required; eg "String!" for User.email:String!
// Histogram of returned array sizes
repeated sint64 array_size = 2;
}
message TypeStat {
// Key is (eg) "email" for User.email:String!
map<string, FieldStat> per_field_stat = 3;
reserved 1, 2;
}
message LocalTypeStat {
// Key is (eg) "email" for User.email:String!
// Unlike FieldStat, this is populated outside of FTV1 requests.
map<string, LocalFieldStat> local_per_field_stat = 1;
}
message ReferencedFieldsForType {
// Contains (eg) "email" for User.email:String!
repeated string field_names = 1;
// True if this type is an interface.
bool is_interface = 2;
}
// This is the top-level message used by Apollo Server, Apollo Router, and other libraries to report usage information
// to Apollo. This message consists of traces and stats for operations. By default, each individual operation execution
// should be either represented as a trace or within stats, but not both. However if the "traces_pre_aggregated" field
// is set to true, all operations should be included in stats and anything specified as a trace is not added in to the
// aggregate stats. For performance reasons, we recommend that traces are sampled so that only somewhere around 1% of
// operation executions are sent as traces. Additionally, buffering operations up until a large size has been reached
// (say, 4MB) or 5-10 seconds has passed is appropriate.
// This message used to be known as FullTracesReport, but got renamed since it isn't just for traces anymore.
message Report {
message OperationCountByType {
string type = 1;
string subtype = 2;
uint64 operation_count = 3;
}
ReportHeader header = 1;
// If QueryMetadata isn't provided, this key should be a statsReportKey (# operationName\nsignature). If the operation
// name, signature, and persisted query IDs are provided in the QueryMetadata, and this operation was requested via a
// persisted query, this key can be "pq# <persisted query id>" instead of the signature and operation.
map<string, TracesAndStats> traces_per_query = 5;
// This is the time that the requests in this trace are considered to have taken place
// If this field is not present the max of the end_time of each trace will be used instead.
// If there are no traces and no end_time present the report will not be able to be processed.
// Note: This will override the end_time from traces.
google.protobuf.Timestamp end_time = 2; // required if no traces in this message
// Total number of operations processed during this period. This includes all operations, even if they are sampled
// and not included in the query latency stats.
uint64 operation_count = 6;
// Total number of operations broken up by operation type and operation subtype.
// Only either this or operation_count should be populated, but if both are present, the total across all types and
// subtypes should match the overall operation_count.
repeated OperationCountByType operation_count_by_type = 8;
// If this is set to true, the stats in TracesWithStats.stats_with_context
// represent all of the operations described from this report, and the
// traces in TracesWithStats.trace are a sampling of some of the same
// operations. If this is false, each operation is described in precisely
// one of those two fields.
bool traces_pre_aggregated = 7;
// This indicates whether or not extended references are enabled, which are within the stats with context and contain
// input type and enum value references. We need this flag so we can tell if the option is enabled even when there are
// no extended references to report.
bool extended_references_enabled = 9;
}
message ContextualizedStats {
StatsContext context = 1;
QueryLatencyStats query_latency_stats = 2;
// Key is type name. This structure provides data for the count and latency of individual
// field executions and thus only reflects operations for which field-level tracing occurred.
map<string, TypeStat> per_type_stat = 3;
// Extended references including input types and enum values.
ExtendedReferences extended_references = 6;
// Per type stats that are obtained directly by the router or gateway rather than FTV1.
map<string, LocalTypeStat> local_per_type_stat = 7;
// Stats that contain limits information for the query.
LimitsStats limits_stats = 8;
// Total number of operations processed during this period for this context. This includes all operations, even if they are sampled
// and not included in the query latency stats.
uint64 operation_count = 9;
reserved 4, 5;
}
message QueryMetadata {
// The operation name. For now this is a required field if QueryMetadata is present.
string name = 1;
// the operation signature. For now this is a required field if QueryMetadata is present.
string signature = 2;
// (Optional) Persisted query ID that was used to request this operation.
string pq_id = 3;
}
message ExtendedReferences {
map<string, InputTypeStats> input_types = 1;
// Map of enum name to stats about that enum.
map<string, EnumStats> enum_values = 2;
}
message InputTypeStats {
// Map of input object type to the stats about the fields within that object.
map<string, InputFieldStats> field_names = 1;
}
message InputFieldStats {
// The total number of operations that reference the input object field.
uint64 refs = 1;
// The number of operations that reference the input object field as a null value.
uint64 null_refs = 2;
// The number of operations that don't reference this input object field (the field is missing or undefined).
uint64 missing = 3;
}
message EnumStats {
// Map of enum value name to the number of referencing operations.
map<string, uint64> enum_values = 1;
}
// A sequence of traces and stats. If Report.traces_pre_aggregated (at the top
// level of the report) is false, an individual operation should either be
// described as a trace or as part of stats, but not both. If that flag
// is true, then all operations are described as stats and some are also
// described as traces.
message TracesAndStats {
repeated Trace trace = 1;
repeated ContextualizedStats stats_with_context = 2;
// This describes the fields referenced in the operation. Note that this may
// include fields that don't show up in FieldStats (due to being interface fields,
// being nested under null fields or empty lists or non-matching fragments or
// `@include` or `@skip`, etc). It also may be missing fields that show up in FieldStats
// (as FieldStats will include the concrete object type for fields referenced
// via an interface type).
map<string, ReferencedFieldsForType> referenced_fields_by_type = 4;
// This is an optional field that is used to provide more context to the key of this object within the
// traces_per_query map. If it's omitted, we assume the key is a standard operation name and signature key.
QueryMetadata query_metadata = 5;
reserved 3;
}