diff --git a/CHANGELOG.md b/CHANGELOG.md index ad0f98a67fb..c5731c3d767 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ * [CHANGE] Return 200 instead of 206 when blocks failed is < tolerate_failed_blocks. [#1725](https://github.com/grafana/tempo/pull/1725) (@joe-elliott) * [CHANGE] Update Go to 1.19 [#1665](https://github.com/grafana/tempo/pull/1665) (@ie-pham) * [CHANGE] Remove unsued scheduler frontend code [#1734](https://github.com/grafana/tempo/pull/1734) (@mapno) +* [CHANGE] Deprecated `query-frontend.query_shards` in favor of `query_frontend.trace_by_id.query_shards`. +Old config will still work but will be removed in a future release. [#1735](https://github.com/grafana/tempo/pull/1735) (@mapno) * [FEATURE] Add capability to configure the used S3 Storage Class [#1697](https://github.com/grafana/tempo/pull/1714) (@amitsetty) * [ENHANCEMENT] cache: expose username and sentinel_username redis configuration options for ACL-based Redis Auth support [#1708](https://github.com/grafana/tempo/pull/1708) (@jsievenpiper) * [ENHANCEMENT] metrics-generator: expose span size as a metric [#1662](https://github.com/grafana/tempo/pull/1662) (@ie-pham) @@ -17,6 +19,14 @@ * [ENHANCEMENT] Add cli command an existing file to tempodb's current parquet schema. [#1706](https://github.com/grafana/tempo/pull/1707) (@joe-elliott) * [ENHANCEMENT] Add query parameter to search API for traceQL queries [#1729](https://github.com/grafana/tempo/pull/1729) (@kvrhdn) * [ENHANCEMENT] metrics-generator: filter out older spans before metrics are aggregated [#1612](https://github.com/grafana/tempo/pull/1612) (@ie-pham) +* * [ENHANCEMENT] Add hedging to trace by ID lookups created by the frontend. [#1735](https://github.com/grafana/tempo/pull/1735) (@mapno) + New config options and defaults: +``` +query_frontend: + trace_by_id: + hedge_requests_at: 5s + hedge_requests_up_to: 3 +``` * [BUGFIX] Honor caching and buffering settings when finding traces by id [#1697](https://github.com/grafana/tempo/pull/1697) (@joe-elliott) * [BUGFIX] Correctly propagate errors from the iterator layer up through the queriers [#1723](https://github.com/grafana/tempo/pull/1723) (@joe-elliott) diff --git a/docs/tempo/website/configuration/_index.md b/docs/tempo/website/configuration/_index.md index 8530b816844..f972a21a5da 100644 --- a/docs/tempo/website/configuration/_index.md +++ b/docs/tempo/website/configuration/_index.md @@ -340,6 +340,17 @@ query_frontend: # (default: 1h) [query_ingesters_until: ] + + # Trace by ID lookup configuration + trace_by_id: + + # If set to a non-zero value, a second request will be issued at the provided duration. + # Recommended to be set to p99 of search requests to reduce long-tail latency. + [hedge_requests_at: | default = 2s ] + + # The maximum number of requests to execute when hedging. + # Requires hedge_requests_at to be set. Must be greater than 0. + [hedge_requests_up_to: | default = 2 ] ``` ## Querier diff --git a/docs/tempo/website/configuration/manifest.md b/docs/tempo/website/configuration/manifest.md index f3b50499818..bfd9cc063f8 100644 --- a/docs/tempo/website/configuration/manifest.md +++ b/docs/tempo/website/configuration/manifest.md @@ -16,7 +16,7 @@ go run ./cmd/tempo --storage.trace.backend=local --storage.trace.local.path=/tmp ## Complete configuration -> **Note**: This manifest was generated on 2022-03-29. +> **Note**: This manifest was generated on 2022-09-13. ```yaml target: all @@ -46,8 +46,8 @@ server: http_server_read_timeout: 30s http_server_write_timeout: 30s http_server_idle_timeout: 2m0s - grpc_server_max_recv_msg_size: 4194304 - grpc_server_max_send_msg_size: 4194304 + grpc_server_max_recv_msg_size: 16777216 + grpc_server_max_send_msg_size: 16777216 grpc_server_max_concurrent_streams: 100 grpc_server_max_connection_idle: 2562047h47m16.854775807s grpc_server_max_connection_age: 2562047h47m16.854775807s @@ -61,6 +61,7 @@ server: log_source_ips_enabled: false log_source_ips_header: "" log_source_ips_regex: "" + log_request_at_info_level_enabled: false http_path_prefix: "" distributor: ring: @@ -74,6 +75,7 @@ distributor: consistent_reads: false watch_rate_limit: 1 watch_burst_size: 1 + cas_retry_delay: 1s etcd: endpoints: [] dial_timeout: 10s @@ -101,8 +103,7 @@ distributor: instance_addr: "" receivers: {} override_ring_key: distributor - log_received_spans: - enabled: false + log_received_traces: false extend_writes: true search_tags_deny_list: [] ingester_client: @@ -184,6 +185,7 @@ querier: tls_ca_path: "" tls_server_name: "" tls_insecure_skip_verify: false + query_relevant_ingesters: false query_frontend: log_queries_longer_than: 0s max_body_size: 0 @@ -224,6 +226,9 @@ query_frontend: max_duration: 1h1m0s query_backend_after: 15m0s query_ingesters_until: 1h0m0s + trace_by_id: + hedge_requests_at: 2s + hedge_requests_up_to: 2 compactor: ring: kvstore: @@ -236,6 +241,7 @@ compactor: consistent_reads: false watch_rate_limit: 1 watch_burst_size: 1 + cas_retry_delay: 1s etcd: endpoints: [] dial_timeout: 10s @@ -266,7 +272,7 @@ compactor: wait_active_instance_timeout: 10m0s compaction: chunk_size_bytes: 5242880 - flush_size_bytes: 31457280 + flush_size_bytes: 20971520 compaction_window: 1h0m0s max_compaction_objects: 6000000 max_block_bytes: 107374182400 @@ -290,6 +296,7 @@ ingester: consistent_reads: false watch_rate_limit: 1 watch_burst_size: 1 + cas_retry_delay: 1s etcd: endpoints: [] dial_timeout: 10s @@ -313,16 +320,12 @@ ingester: excluded_zones: "" num_tokens: 128 heartbeat_period: 5s + heartbeat_timeout: 1m0s observe_period: 0s join_after: 0s min_ready_duration: 15s interface_names: - - wlp2s0 - - docker0 - - br-f163873defd4 - - br-f56e9de73d01 - - br-16536cce4aa3 - - br-3bc02eb7efdd + - en0 final_sleep: 0s tokens_file_path: "" availability_zone: "" @@ -351,6 +354,7 @@ metrics_generator: consistent_reads: false watch_rate_limit: 1 watch_burst_size: 1 + cas_retry_delay: 1s etcd: endpoints: [] dial_timeout: 10s @@ -404,6 +408,8 @@ metrics_generator: - 1.024 - 2.048 - 4.096 + - 8.192 + - 16.384 dimensions: [] registry: collection_interval: 15s @@ -419,6 +425,7 @@ metrics_generator: max_wal_time: 14400000 no_lockfile: false remote_write_flush_deadline: 1m0s + metrics_ingestion_time_range_slack: 30s storage: trace: pool: @@ -436,17 +443,26 @@ storage: index_page_size_bytes: 256000 bloom_filter_false_positive: 0.01 bloom_filter_shard_size_bytes: 102400 + version: v2 encoding: zstd search_encoding: snappy search_page_size_bytes: 1048576 + row_group_size_bytes: 30000000 search: chunk_size_bytes: 1000000 prefetch_trace_count: 1000 + read_buffer_count: 8 + read_buffer_size_bytes: 4194304 + cache_control: + footer: false + column_index: false + offset_index: false blocklist_poll: 5m0s blocklist_poll_concurrency: 50 blocklist_poll_fallback: true blocklist_poll_tenant_index_builders: 2 blocklist_poll_stale_tenant_index: 0s + blocklist_poll_jitter_ms: 0 backend: local local: path: /tmp/tempo/traces @@ -466,14 +482,20 @@ storage: access_key: "" secret_key: "" insecure: false + insecure_skip_verify: false part_size: 0 hedge_requests_at: 0s hedge_requests_up_to: 2 signature_v2: false forcepathstyle: false + tags: {} + storage_class: "" + metadata: {} azure: storage-account-name: "" storage-account-key: "" + use-managed-identity: false + user-assigned-id: "" container-name: "" endpoint-suffix: blob.core.windows.net max-buffers: 4 @@ -503,6 +525,10 @@ overrides: metrics_generator_disable_collection: false metrics_generator_forwarder_queue_size: 0 metrics_generator_forwarder_workers: 0 + metrics_generator_processor_service_graphs_histogram_buckets: [] + metrics_generator_processor_service_graphs_dimensions: [] + metrics_generator_processor_span_metrics_histogram_buckets: [] + metrics_generator_processor_span_metrics_dimensions: [] block_retention: 0s max_bytes_per_tag_values_query: 5000000 max_search_duration: 0s @@ -522,18 +548,20 @@ memberlist: compression_enabled: false advertise_addr: "" advertise_port: 7946 + cluster_label: "" + cluster_label_verification_disabled: false join_members: [] min_join_backoff: 1s max_join_backoff: 1m0s max_join_retries: 10 - abort_if_cluster_join_fails: true + abort_if_cluster_join_fails: false rejoin_interval: 0s left_ingesters_timeout: 5m0s - leave_timeout: 5s + leave_timeout: 20s message_history_buffer_bytes: 0 bind_addr: [] bind_port: 7946 - packet_dial_timeout: 5s + packet_dial_timeout: 2s packet_write_timeout: 5s tls_enabled: false tls_cert_path: "" @@ -541,4 +569,10 @@ memberlist: tls_ca_path: "" tls_server_name: "" tls_insecure_skip_verify: false +usage_report: + reporting_enabled: true + backoff: + min_period: 100ms + max_period: 10s + max_retries: 0 ``` diff --git a/modules/frontend/config.go b/modules/frontend/config.go index 5ed86074a5c..038de84d754 100644 --- a/modules/frontend/config.go +++ b/modules/frontend/config.go @@ -21,19 +21,31 @@ var ( type Config struct { Config v1.Config `yaml:",inline"` MaxRetries int `yaml:"max_retries,omitempty"` - QueryShards int `yaml:"query_shards,omitempty"` TolerateFailedBlocks int `yaml:"tolerate_failed_blocks,omitempty"` Search SearchConfig `yaml:"search"` + // Deprecated: Use TraceByID.QueryShards instead. + // TODO: Remove QueryShards with Tempo v2 + QueryShards int `yaml:"query_shards,omitempty"` + TraceByID TraceByIDConfig `yaml:"trace_by_id"` } type SearchConfig struct { Sharder SearchSharderConfig `yaml:",inline"` } +type TraceByIDConfig struct { + QueryShards int `yaml:"query_shards,omitempty"` + Hedging HedgingConfig `yaml:",inline"` +} + +type HedgingConfig struct { + HedgeRequestsAt time.Duration `yaml:"hedge_requests_at"` + HedgeRequestsUpTo int `yaml:"hedge_requests_up_to"` +} + func (cfg *Config) RegisterFlagsAndApplyDefaults(string, *flag.FlagSet) { cfg.Config.MaxOutstandingPerTenant = 100 cfg.MaxRetries = 2 - cfg.QueryShards = 20 cfg.TolerateFailedBlocks = 0 cfg.Search = SearchConfig{ Sharder: SearchSharderConfig{ @@ -46,6 +58,13 @@ func (cfg *Config) RegisterFlagsAndApplyDefaults(string, *flag.FlagSet) { TargetBytesPerRequest: defaultTargetBytesPerRequest, }, } + cfg.TraceByID = TraceByIDConfig{ + QueryShards: 20, + Hedging: HedgingConfig{ + HedgeRequestsAt: 2 * time.Second, + HedgeRequestsUpTo: 2, + }, + } } type CortexNoQuerierLimits struct{} diff --git a/modules/frontend/frontend.go b/modules/frontend/frontend.go index c0d9e7d7e62..9e35f02d1da 100644 --- a/modules/frontend/frontend.go +++ b/modules/frontend/frontend.go @@ -42,7 +42,12 @@ type QueryFrontend struct { func New(cfg Config, next http.RoundTripper, o *overrides.Overrides, store storage.Store, logger log.Logger, registerer prometheus.Registerer) (*QueryFrontend, error) { level.Info(logger).Log("msg", "creating middleware in query frontend") - if cfg.QueryShards < minQueryShards || cfg.QueryShards > maxQueryShards { + if cfg.QueryShards != 0 { + cfg.TraceByID.QueryShards = cfg.QueryShards + level.Warn(logger).Log("msg", "query_shards is deprecated, use trace_by_id.query_shards instead") + } + + if cfg.TraceByID.QueryShards < minQueryShards || cfg.TraceByID.QueryShards > maxQueryShards { return nil, fmt.Errorf("frontend query shards should be between %d and %d (both inclusive)", minQueryShards, maxQueryShards) } @@ -95,7 +100,12 @@ func newTraceByIDMiddleware(cfg Config, logger log.Logger) Middleware { // - the Deduper dedupes Span IDs for Zipkin support // - the ShardingWare shards queries by splitting the block ID space // - the RetryWare retries requests that have failed (error or http status 500) - rt := NewRoundTripper(next, newDeduper(logger), newTraceByIDSharder(cfg.QueryShards, cfg.TolerateFailedBlocks, logger)) + rt := NewRoundTripper( + next, + newDeduper(logger), + newTraceByIDSharder(cfg.TraceByID.QueryShards, cfg.TolerateFailedBlocks, logger), + newHedgedRequestWare(cfg.TraceByID.Hedging), + ) return RoundTripperFunc(func(r *http.Request) (*http.Response, error) { // validate traceID @@ -108,7 +118,7 @@ func newTraceByIDMiddleware(cfg Config, logger log.Logger) Middleware { }, nil } - //validate start and end parameter + // validate start and end parameter _, _, _, _, _, reqErr := api.ValidateAndSanitizeRequest(r) if reqErr != nil { return &http.Response{ diff --git a/modules/frontend/hedged_requests.go b/modules/frontend/hedged_requests.go new file mode 100644 index 00000000000..354097f76aa --- /dev/null +++ b/modules/frontend/hedged_requests.go @@ -0,0 +1,53 @@ +package frontend + +import ( + "net/http" + "time" + + "github.com/cristalhq/hedgedhttp" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +const ( + hedgedMetricsPublishDuration = 10 * time.Second +) + +var ( + hedgedRequestsMetrics = promauto.NewGauge( + prometheus.GaugeOpts{ + Namespace: "tempo", + Name: "query_frontend_hedged_roundtrips_total", + Help: "Total number of hedged trace by ID requests. Registered as a gauge for code sanity. This is a counter.", + }, + ) +) + +func newHedgedRequestWare(cfg HedgingConfig) Middleware { + return MiddlewareFunc(func(next http.RoundTripper) http.RoundTripper { + if cfg.HedgeRequestsAt == 0 { + return next + } + ret, stats, err := hedgedhttp.NewRoundTripperAndStats(cfg.HedgeRequestsAt, cfg.HedgeRequestsUpTo, next) + if err != nil { + panic(err) + } + publishHedgedMetrics(stats) + return ret + }) +} + +// PublishHedgedMetrics flushes metrics from hedged requests every 10 seconds +func publishHedgedMetrics(s *hedgedhttp.Stats) { + ticker := time.NewTicker(hedgedMetricsPublishDuration) + go func() { + for range ticker.C { + snap := s.Snapshot() + hedgedRequests := int64(snap.ActualRoundTrips) - int64(snap.RequestedRoundTrips) + if hedgedRequests < 0 { + hedgedRequests = 0 + } + hedgedRequestsMetrics.Set(float64(hedgedRequests)) + } + }() +}