-
Notifications
You must be signed in to change notification settings - Fork 327
Initial metrics spike for gathering metrics about Waypoint operations #3440
Changes from 4 commits
3d12a33
c06d49b
e4dd5df
4529402
8d73a87
3b7452b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
```release-note:improvement | ||
server: Introduce basic server-side metric collections around operations | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
package metrics | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"os" | ||
"time" | ||
|
||
"go.opencensus.io/stats" | ||
"go.opencensus.io/stats/view" | ||
"go.opencensus.io/tag" | ||
) | ||
|
||
func init() { | ||
// Register OpenCensus views. | ||
if err := view.Register(statsViews...); err != nil { | ||
fmt.Fprintf(os.Stderr, "error registering OpenCensus views: %v", err) | ||
} | ||
} | ||
|
||
var ( | ||
// TagOperation is a tag for capturing the operation type, e.g. build, | ||
// deploy, release | ||
TagOperation = tag.MustNewKey("operation") | ||
|
||
operationDurationMeasure = stats.Float64( | ||
"operation_duration", | ||
"The duration for this operation, measured in seconds", | ||
stats.UnitSeconds, | ||
) | ||
|
||
operationCountMeasure = stats.Int64( | ||
"operation_count", | ||
"count of operations", | ||
stats.UnitDimensionless, | ||
) | ||
|
||
// views aggregate measurements | ||
waypointOperationCounts = &view.View{ | ||
Name: operationCountMeasure.Name(), | ||
Description: operationCountMeasure.Description(), | ||
TagKeys: []tag.Key{TagOperation}, | ||
Measure: operationCountMeasure, | ||
Aggregation: view.Count(), | ||
} | ||
|
||
waypointOperationDurations = &view.View{ | ||
Name: operationDurationMeasure.Name(), | ||
Description: operationDurationMeasure.Description(), | ||
TagKeys: []tag.Key{TagOperation}, | ||
Measure: operationDurationMeasure, | ||
// add a custom distribution bucket of 10 second intervals between | ||
// 0 and 240 seconds, then 10 minute intervals up to 60 minutes | ||
Aggregation: view.Distribution( | ||
(10 * time.Second).Seconds(), | ||
(20 * time.Second).Seconds(), | ||
(30 * time.Second).Seconds(), | ||
(40 * time.Second).Seconds(), | ||
(50 * time.Second).Seconds(), | ||
(60 * time.Second).Seconds(), | ||
(70 * time.Second).Seconds(), | ||
(80 * time.Second).Seconds(), | ||
(90 * time.Second).Seconds(), | ||
(100 * time.Second).Seconds(), | ||
(110 * time.Second).Seconds(), | ||
(120 * time.Second).Seconds(), | ||
(130 * time.Second).Seconds(), | ||
(140 * time.Second).Seconds(), | ||
(150 * time.Second).Seconds(), | ||
(160 * time.Second).Seconds(), | ||
(170 * time.Second).Seconds(), | ||
(180 * time.Second).Seconds(), | ||
(190 * time.Second).Seconds(), | ||
(200 * time.Second).Seconds(), | ||
(210 * time.Second).Seconds(), | ||
(220 * time.Second).Seconds(), | ||
(230 * time.Second).Seconds(), | ||
(240 * time.Second).Seconds(), | ||
(10 * time.Minute).Seconds(), | ||
(20 * time.Minute).Seconds(), | ||
(30 * time.Minute).Seconds(), | ||
(40 * time.Minute).Seconds(), | ||
(50 * time.Minute).Seconds(), | ||
(60 * time.Minute).Seconds(), | ||
), | ||
} | ||
|
||
// statsViews is a list of all stats views for | ||
// measurements emitted by this package. | ||
statsViews = []*view.View{ | ||
waypointOperationDurations, | ||
waypointOperationCounts, | ||
} | ||
) | ||
|
||
// MeasureOperation records the duration of an operation and upserts the | ||
// operation value into the TagOperation tag | ||
func MeasureOperation(ctx context.Context, lastWriteAt time.Time, operationName string) { | ||
_ = stats.RecordWithTags(ctx, []tag.Mutator{ | ||
tag.Upsert(TagOperation, operationName), | ||
}, operationDurationMeasure.M(time.Since(lastWriteAt).Seconds())) | ||
} | ||
|
||
// CountOperation records a single incremental value for an operation | ||
func CountOperation(ctx context.Context, operationName string) { | ||
_ = stats.RecordWithTags(ctx, []tag.Mutator{ | ||
tag.Upsert(TagOperation, operationName), | ||
}, operationCountMeasure.M(1)) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ import ( | |
"context" | ||
"io" | ||
"strings" | ||
"time" | ||
|
||
"github.com/hashicorp/go-hclog" | ||
"github.com/hashicorp/go-memdb" | ||
|
@@ -14,6 +15,7 @@ import ( | |
"google.golang.org/protobuf/types/known/emptypb" | ||
empty "google.golang.org/protobuf/types/known/emptypb" | ||
|
||
"github.com/hashicorp/waypoint/internal/telemetry/metrics" | ||
pb "github.com/hashicorp/waypoint/pkg/server/gen" | ||
"github.com/hashicorp/waypoint/pkg/server/logstream" | ||
serverptypes "github.com/hashicorp/waypoint/pkg/server/ptypes" | ||
|
@@ -535,7 +537,11 @@ func (s *Service) RunnerJobStream( | |
} | ||
log.Trace("loaded config sources for job", "total_sourcers", len(cfgSrcs)) | ||
|
||
log.Debug("sending job assignment to runner") | ||
catsby marked this conversation as resolved.
Show resolved
Hide resolved
|
||
operation := operationString(job.Job) | ||
defer func(start time.Time) { | ||
metrics.MeasureOperation(ctx, start, operation) | ||
}(time.Now()) | ||
metrics.CountOperation(ctx, operation) | ||
// Send the job assignment. | ||
// | ||
// If this has an error, we continue to accumulate the error until | ||
|
@@ -874,3 +880,50 @@ func (s *Service) runnerVerifyToken( | |
|
||
return nil | ||
} | ||
|
||
func operationString(job *pb.Job) string { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we seem to do this in more than one place in Waypoint, so I'm open to suggestions to where we could place this and use it. Maybe /pkg/server/ptypes/job.go? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The CLI client needs something like this too, so maybe |
||
// Types that are assignable to Operation: | ||
switch job.Operation.(type) { | ||
case *pb.Job_Noop_: | ||
return "noop" | ||
case *pb.Job_Build: | ||
return "build" | ||
case *pb.Job_Push: | ||
return "push" | ||
case *pb.Job_Deploy: | ||
return "deploy" | ||
case *pb.Job_Destroy: | ||
return "destroy" | ||
case *pb.Job_Release: | ||
return "release" | ||
case *pb.Job_Validate: | ||
return "validate" | ||
case *pb.Job_Auth: | ||
return "auth" | ||
case *pb.Job_Docs: | ||
return "docs" | ||
case *pb.Job_ConfigSync: | ||
return "config_sync" | ||
case *pb.Job_Exec: | ||
return "exec" | ||
case *pb.Job_Up: | ||
return "up" | ||
case *pb.Job_Logs: | ||
return "logs" | ||
case *pb.Job_QueueProject: | ||
return "queue_project" | ||
case *pb.Job_Poll: | ||
return "poll" | ||
case *pb.Job_StatusReport: | ||
return "status_report" | ||
case *pb.Job_StartTask: | ||
return "start_task" | ||
case *pb.Job_StopTask: | ||
return "stop_task" | ||
case *pb.Job_WatchTask: | ||
return "watch_task" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you think about making these PascalCase so they match the proto go definition? Like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From what I've seen internally (in other HashiCorp projects that use OpenCensus), lowercase with EDIT: clarify "where" internally I have seen this style There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ooh ok! Makes sense from that standpoint 👍🏻 Sounds good. |
||
case *pb.Job_Init: | ||
return "init" | ||
} | ||
return "unknown" | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here I simply re-use the current trace address, but we could easily add a separate flag for stats.