-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add metrics instrumentation to collector endpoints (#2664)
* Add instrumentation handler to collector endpoints Currently we do not expose any HTTP metrics on any of the collector endpoints. This makes it harder to detect when the collector is having issues ingesting bad spans for example. This change adds a metric that reports the duration of each request and adds HTTP status and path labels to it. Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Fix static check problems Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Add godoc for NewMetricsHandler Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Fix import formatting Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Up test coverage Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Rename packages and update license statements Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Up test coverage for server package We weren't testing the starting of collector's HTTP and HTTP zipkin servers. Added sanity tests for that. Also those two handlers exited with log.Fatal, which made them hard to test; this has been changed as well Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Cache metric timers in httpmetrics Only the prometheus metrics.Factory implementaiton caches the instantiated Timers, Gauges, etc. The other implementations allocate new objects each time. We want to prevent that because this is in the hot path. This adds caching on the request duration timers. This shouldn't lead to noticeable increase in memory usage. Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Add synchronisation in httpmetrics It was possible that we get concurrent map read-write panics. Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Change server tests to use ephemeral ports It's more robust if we use the next available port instead of hardcoding a large port. Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Replace string with struct keys in httpmetrics We can use directly struct keys instead of building strings out of each struct we get. Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Increase await timeout in httmetrics test Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Simplify network calls in server tests Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Consolidate port strings in server tests Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Clean up a Stringer.String() and zero value init Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev> * Use httptest in collector/app/server tests Signed-off-by: Dimitar Dimitrov <hi@dimitardimitrov.dev>
- Loading branch information
1 parent
221ce20
commit 5c3b53b
Showing
7 changed files
with
298 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
// Copyright (c) 2020 The Jaeger Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package server | ||
|
||
import ( | ||
"net/http" | ||
"net/http/httptest" | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"github.com/uber/jaeger-lib/metrics/metricstest" | ||
"go.uber.org/zap" | ||
|
||
"github.com/jaegertracing/jaeger/cmd/collector/app/handler" | ||
"github.com/jaegertracing/jaeger/pkg/healthcheck" | ||
) | ||
|
||
// test wrong port number | ||
func TestFailToListenHTTP(t *testing.T) { | ||
logger, _ := zap.NewDevelopment() | ||
server, err := StartHTTPServer(&HTTPServerParams{ | ||
HostPort: ":-1", | ||
Logger: logger, | ||
}) | ||
assert.Nil(t, server) | ||
assert.EqualError(t, err, "listen tcp: address -1: invalid port") | ||
} | ||
|
||
func TestSpanCollectorHTTP(t *testing.T) { | ||
logger, _ := zap.NewDevelopment() | ||
params := &HTTPServerParams{ | ||
Handler: handler.NewJaegerSpanHandler(logger, &mockSpanProcessor{}), | ||
SamplingStore: &mockSamplingStore{}, | ||
MetricsFactory: metricstest.NewFactory(time.Hour), | ||
HealthCheck: healthcheck.New(), | ||
Logger: logger, | ||
} | ||
|
||
server := httptest.NewServer(nil) | ||
defer server.Close() | ||
|
||
serveHTTP(server.Config, server.Listener, params) | ||
|
||
response, err := http.Post(server.URL, "", nil) | ||
assert.NoError(t, err) | ||
assert.NotNil(t, response) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
// Copyright (c) 2020 The Jaeger Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package server | ||
|
||
import ( | ||
"net/http" | ||
"net/http/httptest" | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"github.com/uber/jaeger-lib/metrics/metricstest" | ||
"go.uber.org/zap" | ||
|
||
"github.com/jaegertracing/jaeger/cmd/collector/app/handler" | ||
"github.com/jaegertracing/jaeger/pkg/healthcheck" | ||
) | ||
|
||
// test wrong port number | ||
func TestFailToListenZipkin(t *testing.T) { | ||
logger, _ := zap.NewDevelopment() | ||
server, err := StartZipkinServer(&ZipkinServerParams{ | ||
HostPort: ":-1", | ||
Logger: logger, | ||
}) | ||
assert.Nil(t, server) | ||
assert.EqualError(t, err, "listen tcp: address -1: invalid port") | ||
} | ||
|
||
func TestSpanCollectorZipkin(t *testing.T) { | ||
logger, _ := zap.NewDevelopment() | ||
params := &ZipkinServerParams{ | ||
Handler: handler.NewZipkinSpanHandler(logger, nil, nil), | ||
MetricsFactory: metricstest.NewFactory(time.Hour), | ||
HealthCheck: healthcheck.New(), | ||
Logger: logger, | ||
} | ||
|
||
server := httptest.NewServer(nil) | ||
defer server.Close() | ||
|
||
serveZipkin(server.Config, server.Listener, params) | ||
|
||
response, err := http.Post(server.URL, "", nil) | ||
assert.NoError(t, err) | ||
assert.NotNil(t, response) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
// Copyright (c) 2020 The Jaeger Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package httpmetrics | ||
|
||
import ( | ||
"net/http" | ||
"strconv" | ||
"sync" | ||
"time" | ||
|
||
"github.com/uber/jaeger-lib/metrics" | ||
) | ||
|
||
type statusRecorder struct { | ||
http.ResponseWriter | ||
status int | ||
wroteHeader bool | ||
} | ||
|
||
func (r *statusRecorder) WriteHeader(status int) { | ||
if r.wroteHeader { | ||
return | ||
} | ||
r.status = status | ||
r.wroteHeader = true | ||
r.ResponseWriter.WriteHeader(status) | ||
} | ||
|
||
// Wrap returns a handler that wraps the provided one and emits metrics based on the HTTP requests and responses. | ||
// It will record the HTTP response status, HTTP method, duration and path of the call. | ||
// The duration will be reported in metrics.Timer and the rest will be labels on that timer. | ||
// | ||
// Do not use with HTTP endpoints that take parameters from URL path, such as `/user/{user_id}`, | ||
// because they will result in high cardinality metrics. | ||
func Wrap(h http.Handler, metricsFactory metrics.Factory) http.Handler { | ||
timers := newRequestDurations(metricsFactory) | ||
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { | ||
start := time.Now() | ||
recorder := &statusRecorder{ResponseWriter: w} | ||
|
||
h.ServeHTTP(recorder, r) | ||
|
||
req := recordedRequest{ | ||
key: recordedRequestKey{ | ||
status: strconv.Itoa(recorder.status), | ||
path: r.URL.Path, | ||
method: r.Method, | ||
}, | ||
duration: time.Since(start), | ||
} | ||
timers.record(req) | ||
}) | ||
} | ||
|
||
type recordedRequestKey struct { | ||
method string | ||
path string | ||
status string | ||
} | ||
|
||
type recordedRequest struct { | ||
key recordedRequestKey | ||
duration time.Duration | ||
} | ||
|
||
type requestDurations struct { | ||
lock sync.RWMutex | ||
metrics metrics.Factory | ||
timers map[recordedRequestKey]metrics.Timer | ||
} | ||
|
||
func newRequestDurations(metricsFactory metrics.Factory) *requestDurations { | ||
return &requestDurations{ | ||
timers: make(map[recordedRequestKey]metrics.Timer), | ||
metrics: metricsFactory, | ||
} | ||
} | ||
|
||
func (r *requestDurations) record(request recordedRequest) { | ||
cacheKey := request.key | ||
|
||
r.lock.RLock() | ||
timer, ok := r.timers[cacheKey] | ||
r.lock.RUnlock() | ||
if !ok { | ||
r.lock.Lock() | ||
timer, ok = r.timers[cacheKey] | ||
if !ok { | ||
timer = buildTimer(r.metrics, cacheKey) | ||
r.timers[cacheKey] = timer | ||
} | ||
r.lock.Unlock() | ||
} | ||
|
||
timer.Record(request.duration) | ||
} | ||
|
||
func buildTimer(metricsFactory metrics.Factory, key recordedRequestKey) metrics.Timer { | ||
return metricsFactory.Timer(metrics.TimerOptions{ | ||
Name: "http.request.duration", | ||
Help: "Duration of HTTP requests", | ||
Tags: map[string]string{ | ||
"status": key.status, | ||
"path": key.path, | ||
"method": key.method, | ||
}, | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
// Copyright (c) 2020 The Jaeger Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package httpmetrics | ||
|
||
import ( | ||
"net/http" | ||
"net/http/httptest" | ||
"testing" | ||
"time" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"github.com/uber/jaeger-lib/metrics/metricstest" | ||
) | ||
|
||
func TestNewMetricsHandler(t *testing.T) { | ||
dummyHandlerFunc := http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { | ||
time.Sleep(time.Millisecond) | ||
w.WriteHeader(http.StatusAccepted) | ||
w.WriteHeader(http.StatusTeapot) // any subsequent statuses should be ignored | ||
}) | ||
|
||
mb := metricstest.NewFactory(time.Hour) | ||
handler := Wrap(dummyHandlerFunc, mb) | ||
|
||
req, err := http.NewRequest(http.MethodGet, "/subdir/qwerty", nil) | ||
assert.NoError(t, err) | ||
handler.ServeHTTP(httptest.NewRecorder(), req) | ||
|
||
for i := 0; i < 1000; i++ { | ||
_, gauges := mb.Snapshot() | ||
if _, ok := gauges["http.request.duration|method=GET|path=/subdir/qwerty|status=202.P999"]; ok { | ||
return | ||
} | ||
time.Sleep(15 * time.Millisecond) | ||
} | ||
|
||
assert.Fail(t, "gauge hasn't been updated within a reasonable amount of time") | ||
} |