diff --git a/apis/metrics/metrics.go b/apis/metrics/metrics.go index fc3c24701..dbd969294 100644 --- a/apis/metrics/metrics.go +++ b/apis/metrics/metrics.go @@ -1,6 +1,7 @@ package metrics import ( + "fmt" "sync" "time" @@ -13,19 +14,34 @@ func init() { } const ( - pouchSubsystem = "pouch" + namespace = "engine" + subsystem = "daemon" ) var ( // ImagePullSummary records the summary of pulling image latency. - ImagePullSummary = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ - Subsystem: pouchSubsystem, - Name: "image_pull_latency_microseconds", - Help: "Latency in microseconds to pull a image.", - }, - []string{"image"}, - ) + ImagePullSummary = newLabelSummary("image_pull_latency_microseconds", "Latency in microseconds to pull a image.", "image") + + // ContainerActionsCounter records the number of container operations. + ContainerActionsCounter = newLabelCounter("container_actions_counter", "The number of container operations", "action") + + // ContainerSuccessActionsCounter records the number of container success operations. + ContainerSuccessActionsCounter = newLabelCounter("container_success_actions_counter", "The number of container success operations", "action") + + // ImageActionsCounter records the number of image operations. + ImageActionsCounter = newLabelCounter("image_actions_counter", "The number of image operations", "action") + + // ImageSuccessActionsCounter the number of image success operations. + ImageSuccessActionsCounter = newLabelCounter("image_success_actions_counter", "The number of image success operations", "action") + + // ContainerActionsTimer records the time cost of each container action. + ContainerActionsTimer = newLabelTimer("container_actions", "The number of seconds it takes to process each container action", "action") + + // ImageActionsTimer records the time cost of each image action. + ImageActionsTimer = newLabelTimer("image_actions", "The number of seconds it takes to process each image action", "action") + + // EngineVersion records the version and commit information of the engine process. + EngineVersion = newLabelGauge("engine", "The version and commit information of the engine process", "commit") ) var registerMetrics sync.Once @@ -35,6 +51,13 @@ func Register() { // Register the metrics. registerMetrics.Do(func() { prometheus.MustRegister(ImagePullSummary) + prometheus.MustRegister(EngineVersion) + prometheus.MustRegister(ContainerActionsCounter) + prometheus.MustRegister(ContainerSuccessActionsCounter) + prometheus.MustRegister(ImageActionsCounter) + prometheus.MustRegister(ImageSuccessActionsCounter) + prometheus.MustRegister(ContainerActionsTimer) + prometheus.MustRegister(ImageActionsTimer) }) } @@ -42,3 +65,47 @@ func Register() { func SinceInMicroseconds(start time.Time) float64 { return float64(time.Since(start).Nanoseconds() / time.Microsecond.Nanoseconds()) } + +func newLabelSummary(name, help string, labels ...string) *prometheus.SummaryVec { + return prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: name, + Help: help, + ConstLabels: nil, + }, labels) +} + +func newLabelCounter(name, help string, labels ...string) *prometheus.CounterVec { + return prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: fmt.Sprintf("%s_%s", name, total), + Help: help, + ConstLabels: nil, + }, labels) +} + +func newLabelGauge(name, help string, labels ...string) *prometheus.GaugeVec { + return prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: fmt.Sprintf("%s_%s", name, Unit("info")), + Help: help, + ConstLabels: nil, + }, labels) +} + +func newLabelTimer(name, help string, labels ...string) *prometheus.HistogramVec { + return prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: namespace, + Subsystem: subsystem, + Name: fmt.Sprintf("%s_%s", name, seconds), + Help: help, + ConstLabels: nil, + }, labels) +} diff --git a/apis/metrics/unit.go b/apis/metrics/unit.go new file mode 100644 index 000000000..97d407f61 --- /dev/null +++ b/apis/metrics/unit.go @@ -0,0 +1,12 @@ +package metrics + +// Unit represents the type or precision of a metric that is appended to +// the metrics fully qualified name +type Unit string + +const ( + nanoseconds Unit = "nanoseconds" + seconds Unit = "seconds" + bytes Unit = "bytes" + total Unit = "total" +) diff --git a/apis/server/container_bridge.go b/apis/server/container_bridge.go index 8d3036fb6..68b73967c 100644 --- a/apis/server/container_bridge.go +++ b/apis/server/container_bridge.go @@ -9,6 +9,7 @@ import ( "strings" "time" + "github.com/alibaba/pouch/apis/metrics" "github.com/alibaba/pouch/apis/types" "github.com/alibaba/pouch/daemon/mgr" "github.com/alibaba/pouch/pkg/httputils" @@ -22,6 +23,12 @@ import ( ) func (s *Server) createContainer(ctx context.Context, rw http.ResponseWriter, req *http.Request) error { + label := "create" + metrics.ContainerActionsCounter.WithLabelValues(label).Inc() + defer func(start time.Time) { + metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds()) + }(time.Now()) + config := &types.ContainerCreateConfig{} reader := req.Body var ex error @@ -57,6 +64,8 @@ func (s *Server) createContainer(ctx context.Context, rw http.ResponseWriter, re return err } + metrics.ContainerSuccessActionsCounter.WithLabelValues(label).Inc() + return EncodeResponse(rw, http.StatusCreated, container) } @@ -150,6 +159,12 @@ func (s *Server) getContainers(ctx context.Context, rw http.ResponseWriter, req } func (s *Server) startContainer(ctx context.Context, rw http.ResponseWriter, req *http.Request) error { + label := "start" + metrics.ContainerActionsCounter.WithLabelValues(label).Inc() + defer func(start time.Time) { + metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds()) + }(time.Now()) + name := mux.Vars(req)["name"] options := &types.ContainerStartOptions{ @@ -162,6 +177,8 @@ func (s *Server) startContainer(ctx context.Context, rw http.ResponseWriter, req return err } + metrics.ContainerSuccessActionsCounter.WithLabelValues(label).Inc() + rw.WriteHeader(http.StatusNoContent) return nil } @@ -171,6 +188,11 @@ func (s *Server) restartContainer(ctx context.Context, rw http.ResponseWriter, r t int err error ) + label := "restart" + metrics.ContainerActionsCounter.WithLabelValues(label).Inc() + defer func(start time.Time) { + metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds()) + }(time.Now()) if v := req.FormValue("t"); v != "" { if t, err = strconv.Atoi(v); err != nil { @@ -184,6 +206,8 @@ func (s *Server) restartContainer(ctx context.Context, rw http.ResponseWriter, r return err } + metrics.ContainerSuccessActionsCounter.WithLabelValues(label).Inc() + rw.WriteHeader(http.StatusNoContent) return nil } @@ -194,6 +218,12 @@ func (s *Server) stopContainer(ctx context.Context, rw http.ResponseWriter, req err error ) + label := "stop" + metrics.ContainerActionsCounter.WithLabelValues(label).Inc() + defer func(start time.Time) { + metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds()) + }(time.Now()) + if v := req.FormValue("t"); v != "" { if t, err = strconv.Atoi(v); err != nil { return httputils.NewHTTPError(err, http.StatusBadRequest) @@ -206,6 +236,8 @@ func (s *Server) stopContainer(ctx context.Context, rw http.ResponseWriter, req return err } + metrics.ContainerSuccessActionsCounter.WithLabelValues(label).Inc() + rw.WriteHeader(http.StatusNoContent) return nil } @@ -233,6 +265,12 @@ func (s *Server) unpauseContainer(ctx context.Context, rw http.ResponseWriter, r } func (s *Server) renameContainer(ctx context.Context, rw http.ResponseWriter, req *http.Request) error { + label := "rename" + metrics.ContainerActionsCounter.WithLabelValues(label).Inc() + defer func(start time.Time) { + metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds()) + }(time.Now()) + oldName := mux.Vars(req)["name"] newName := req.FormValue("name") @@ -240,6 +278,8 @@ func (s *Server) renameContainer(ctx context.Context, rw http.ResponseWriter, re return err } + metrics.ContainerSuccessActionsCounter.WithLabelValues(label).Inc() + rw.WriteHeader(http.StatusNoContent) return nil } @@ -270,6 +310,12 @@ func (s *Server) attachContainer(ctx context.Context, rw http.ResponseWriter, re } func (s *Server) updateContainer(ctx context.Context, rw http.ResponseWriter, req *http.Request) error { + label := "update" + metrics.ContainerActionsCounter.WithLabelValues(label).Inc() + defer func(start time.Time) { + metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds()) + }(time.Now()) + config := &types.UpdateConfig{} // set pre update hook plugin @@ -293,11 +339,19 @@ func (s *Server) updateContainer(ctx context.Context, rw http.ResponseWriter, re return httputils.NewHTTPError(err, http.StatusInternalServerError) } + metrics.ContainerSuccessActionsCounter.WithLabelValues(label).Inc() + rw.WriteHeader(http.StatusOK) return nil } func (s *Server) upgradeContainer(ctx context.Context, rw http.ResponseWriter, req *http.Request) error { + label := "upgrade" + metrics.ContainerActionsCounter.WithLabelValues(label).Inc() + defer func(start time.Time) { + metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds()) + }(time.Now()) + config := &types.ContainerUpgradeConfig{} // decode request body if err := json.NewDecoder(req.Body).Decode(config); err != nil { @@ -314,6 +368,8 @@ func (s *Server) upgradeContainer(ctx context.Context, rw http.ResponseWriter, r return err } + metrics.ContainerSuccessActionsCounter.WithLabelValues(label).Inc() + rw.WriteHeader(http.StatusOK) return nil } @@ -399,6 +455,12 @@ func (s *Server) resizeContainer(ctx context.Context, rw http.ResponseWriter, re } func (s *Server) removeContainers(ctx context.Context, rw http.ResponseWriter, req *http.Request) error { + label := "delete" + metrics.ContainerActionsCounter.WithLabelValues(label).Inc() + defer func(start time.Time) { + metrics.ContainerActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds()) + }(time.Now()) + name := mux.Vars(req)["name"] option := &types.ContainerRemoveOptions{ @@ -412,6 +474,8 @@ func (s *Server) removeContainers(ctx context.Context, rw http.ResponseWriter, r return err } + metrics.ContainerSuccessActionsCounter.WithLabelValues(label).Inc() + rw.WriteHeader(http.StatusNoContent) return nil } diff --git a/apis/server/image_bridge.go b/apis/server/image_bridge.go index b6a686916..17f10e799 100644 --- a/apis/server/image_bridge.go +++ b/apis/server/image_bridge.go @@ -34,9 +34,13 @@ func (s *Server) pullImage(ctx context.Context, rw http.ResponseWriter, req *htt image = image + ":" + tag } + label := "pull" + metrics.ImageActionsCounter.WithLabelValues(label).Inc() + // record the time spent during image pull procedure. defer func(start time.Time) { metrics.ImagePullSummary.WithLabelValues(image).Observe(metrics.SinceInMicroseconds(start)) + metrics.ImageActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds()) }(time.Now()) // get registry auth from Request header @@ -53,6 +57,7 @@ func (s *Server) pullImage(ctx context.Context, rw http.ResponseWriter, req *htt logrus.Errorf("failed to pull image %s: %v", image, err) return nil } + metrics.ImageSuccessActionsCounter.WithLabelValues(label).Inc() return nil } @@ -105,6 +110,12 @@ func (s *Server) removeImage(ctx context.Context, rw http.ResponseWriter, req *h return err } + label := "delete" + metrics.ImageActionsCounter.WithLabelValues(label).Inc() + defer func(start time.Time) { + metrics.ImageActionsTimer.WithLabelValues(label).Observe(time.Since(start).Seconds()) + }(time.Now()) + isForce := httputils.BoolValue(req, "force") // We only should check the image whether used by container when there is only one primary reference. if len(refs) == 1 { @@ -126,6 +137,7 @@ func (s *Server) removeImage(ctx context.Context, rw http.ResponseWriter, req *h return err } + metrics.ImageSuccessActionsCounter.WithLabelValues(label).Inc() rw.WriteHeader(http.StatusNoContent) return nil } diff --git a/main.go b/main.go index b9fd7d5a0..6727bee77 100644 --- a/main.go +++ b/main.go @@ -9,6 +9,7 @@ import ( "syscall" "time" + "github.com/alibaba/pouch/apis/metrics" "github.com/alibaba/pouch/apis/opts" optscfg "github.com/alibaba/pouch/apis/opts/config" "github.com/alibaba/pouch/apis/types" @@ -150,7 +151,7 @@ func runDaemon(cmd *cobra.Command) error { fmt.Printf("pouchd version: %s, build: %s, build at: %s\n", version.Version, version.GitCommit, version.BuildTime) return nil } - + metrics.EngineVersion.WithLabelValues(version.GitCommit).Set(1) // initialize log. initLog() diff --git a/test/api_container_metrics_test.go b/test/api_container_metrics_test.go new file mode 100644 index 000000000..370a426e4 --- /dev/null +++ b/test/api_container_metrics_test.go @@ -0,0 +1,78 @@ +package main + +import ( + "fmt" + + "github.com/alibaba/pouch/test/environment" + "github.com/alibaba/pouch/test/request" + + "github.com/go-check/check" +) + +// APIContainerMetricsSuite is the test suite for container metrics API. +type APIContainerMetricsSuite struct { + cname string +} + +func init() { + check.Suite(&APIContainerMetricsSuite{}) +} + +// SetUpTest does common setup in the beginning of each test. +func (suite *APIContainerMetricsSuite) SetUpTest(c *check.C) { + SkipIfFalse(c, environment.IsLinux) + +} + +// SetUpSuite does common setup in the beginning of each suite . +func (suite *APIContainerMetricsSuite) SetUpSuite(c *check.C) { + SkipIfFalse(c, environment.IsLinux) + suite.cname = "TestMetricsContainer" + PullImage(c, busyboxImage) +} + +// TearDownSuite run after each suite to do cleanup work for the whole suite. +func (suite *APIContainerMetricsSuite) TearDownSuite(c *check.C) { + SkipIfFalse(c, environment.IsLinux) + + request.Delete("/containers/" + suite.cname + "?force=1") +} + +// TestContainerMetrics test metrics of container. +func (suite *APIContainerMetricsSuite) TestContainerMetrics(c *check.C) { + cname := suite.cname + + suite.checkAction(c, cname, "create") + suite.checkAction(c, cname, "start") + suite.checkAction(c, cname, "stop") + suite.checkAction(c, cname, "delete") +} + +func (suite *APIContainerMetricsSuite) checkAction(c *check.C, cname string, label string) { + key := fmt.Sprintf(`engine_daemon_container_actions_counter_total{action="%s"}`, label) + keySuccess := fmt.Sprintf(`engine_daemon_container_success_actions_counter_total{action="%s"}`, label) + countBefore, countSuccessBefore := GetMetric(c, + key, + keySuccess) + switch label { + case "create": + CreateBusyboxContainerOk(c, cname) + case "start": + StartContainerOk(c, cname) + case "stop": + StopContainerOk(c, cname) + case "delete": + resp, err := request.Delete("/containers/" + cname) + c.Assert(err, check.IsNil) + CheckRespStatus(c, resp, 204) + default: + fmt.Println("error") + c.Fatal("error") + } + + count, successCount := GetMetric(c, + key, + keySuccess) + c.Assert(count, check.Equals, countBefore+1) + c.Assert(successCount, check.Equals, countSuccessBefore+1) +} diff --git a/test/api_image_metrics_test.go b/test/api_image_metrics_test.go new file mode 100644 index 000000000..d179e5327 --- /dev/null +++ b/test/api_image_metrics_test.go @@ -0,0 +1,62 @@ +package main + +import ( + "fmt" + + "github.com/alibaba/pouch/test/environment" + "github.com/alibaba/pouch/test/request" + + "github.com/go-check/check" +) + +// APIImageMetricsSuite is the test suite for image metrics API. +type APIImageMetricsSuite struct{} + +func init() { + check.Suite(&APIImageMetricsSuite{}) +} + +// SetUpTest does common setup in the beginning of each test. +func (suite *APIImageMetricsSuite) SetUpTest(c *check.C) { + SkipIfFalse(c, environment.IsLinux) +} + +// SetUpSuite does common setup in the beginning of each suite . +func (suite *APIImageMetricsSuite) SetUpSuite(c *check.C) { + SkipIfFalse(c, environment.IsLinux) + PullImage(c, helloworldImage) + DelImageForceOk(c, helloworldImage) +} + +// TearDownSuite run after each suite to do cleanup work for the whole suite. +func (suite *APIImageMetricsSuite) TearDownSuite(c *check.C) { + SkipIfFalse(c, environment.IsLinux) +} + +// TestDeleteImageOk tests metrics of image. +func (suite *APIImageMetricsSuite) TestImageMetrics(c *check.C) { + suite.checkAction(c, "pull") + suite.checkAction(c, "delete") +} + +func (suite *APIImageMetricsSuite) checkAction(c *check.C, label string) { + key := fmt.Sprintf(`engine_daemon_image_actions_counter_total{action="%s"}`, label) + keySuccess := fmt.Sprintf(`engine_daemon_image_success_actions_counter_total{action="%s"}`, label) + countBefore, countSuccessBefore := GetMetric(c, + key, + keySuccess) + switch label { + case "pull": + PullImage(c, helloworldImage) + case "delete": + resp, err := request.Delete("/images/" + helloworldImage) + c.Assert(err, check.IsNil) + CheckRespStatus(c, resp, 204) + } + + count, successCount := GetMetric(c, + key, + keySuccess) + c.Assert(count, check.Equals, countBefore+1) + c.Assert(successCount, check.Equals, countSuccessBefore+1) +} diff --git a/test/util_api.go b/test/util_api.go index 67500ffb3..54788d9d4 100644 --- a/test/util_api.go +++ b/test/util_api.go @@ -8,6 +8,8 @@ import ( "net" "net/http" "net/url" + "strconv" + "strings" "time" "github.com/alibaba/pouch/apis/types" @@ -306,3 +308,41 @@ func discardPullStatus(r io.ReadCloser) error { } return nil } + +// GetMetric get metrics from prometheus server, return total count and success count. +func GetMetric(c *check.C, key string, keySuccess string) (int, int) { + resp, err := request.Get("/metrics") + c.Assert(err, check.IsNil) + defer resp.Body.Close() + scanner := bufio.NewScanner(resp.Body) + value := "" + valueSuccess := "" + for scanner.Scan() { + line := scanner.Text() + if strings.Contains(line, key) { + kv := strings.Split(line, " ") + if len(kv) == 2 { + value = kv[1] + } + } else if strings.Contains(line, keySuccess) { + kv := strings.Split(line, " ") + if len(kv) == 2 { + valueSuccess = kv[1] + } + } + } + + iCount := 0 + if value != "" { + iCount, err = strconv.Atoi(value) + c.Assert(err, check.IsNil) + } + + iCountSuccess := 0 + if valueSuccess != "" { + iCountSuccess, err = strconv.Atoi(valueSuccess) + c.Assert(err, check.IsNil) + } + + return iCount, iCountSuccess +}