From 63eaecec3c3ef0ff7b4560a81e3c06d1a8b6099f Mon Sep 17 00:00:00 2001 From: yuyufei Date: Tue, 12 Jul 2022 13:56:35 +0800 Subject: [PATCH] 1. Add GUI interface for debugging DSP hyper-parameters 2. Support workload metrics for gRPC --- cmd/craned/app/manager.go | 13 ++ examples/tsp-workload-dsp.yaml | 1 + examples/tsp-workload-resource-dsp.yaml | 1 + go.mod | 1 - go.sum | 4 - .../time_series_prediction_controller.go | 3 +- pkg/prediction/dsp/debug.go | 76 +++++++ pkg/providers/grpc/grpc.go | 10 + pkg/server/handler/prediction/prediction.go | 186 ++++++++++++++++++ pkg/server/server.go | 11 +- 10 files changed, 299 insertions(+), 7 deletions(-) create mode 100644 pkg/prediction/dsp/debug.go create mode 100644 pkg/server/handler/prediction/prediction.go diff --git a/cmd/craned/app/manager.go b/cmd/craned/app/manager.go index a30cadf58..3168c41f9 100644 --- a/cmd/craned/app/manager.go +++ b/cmd/craned/app/manager.go @@ -46,6 +46,7 @@ import ( _ "github.com/gocrane/crane/pkg/querybuilder-providers/prometheus" "github.com/gocrane/crane/pkg/server" serverconfig "github.com/gocrane/crane/pkg/server/config" + "github.com/gocrane/crane/pkg/server/handler/prediction" "github.com/gocrane/crane/pkg/utils/target" "github.com/gocrane/crane/pkg/webhooks" ) @@ -351,6 +352,18 @@ func runAll(ctx context.Context, mgr ctrl.Manager, predictorMgr predictor.Manage if err != nil { klog.Exit(err) } + + discoveryClientSet, err := discovery.NewDiscoveryClientForConfig(mgr.GetConfig()) + if err != nil { + klog.Exit(err, "Unable to create discover client") + } + + scaleKindResolver := scale.NewDiscoveryScaleKindResolver(discoveryClientSet) + scaleClient := scale.New(discoveryClientSet.RESTClient(), mgr.GetRESTMapper(), dynamic.LegacyAPIPathResolverFunc, scaleKindResolver) + selectorFetcher := target.NewSelectorFetcher(mgr.GetScheme(), mgr.GetRESTMapper(), scaleClient, mgr.GetClient()) + ctx = context.WithValue(ctx, prediction.PredictorManagerKey, predictorMgr) + ctx = context.WithValue(ctx, prediction.SelectorFetcherKey, selectorFetcher) + craneServer.Run(ctx) return nil }) diff --git a/examples/tsp-workload-dsp.yaml b/examples/tsp-workload-dsp.yaml index 256cc29c2..f4ff4931f 100644 --- a/examples/tsp-workload-dsp.yaml +++ b/examples/tsp-workload-dsp.yaml @@ -5,6 +5,7 @@ metadata: namespace: default spec: targetRef: + apiVersion: apps/v1 kind: Deployment name: dep-1-100m-500mib namespace: default diff --git a/examples/tsp-workload-resource-dsp.yaml b/examples/tsp-workload-resource-dsp.yaml index 1df8bb1fb..4ec98406a 100644 --- a/examples/tsp-workload-resource-dsp.yaml +++ b/examples/tsp-workload-resource-dsp.yaml @@ -8,6 +8,7 @@ spec: kind: Deployment name: dep-1-100m-500mib namespace: default + apiVersion: apps/v1 predictionWindowSeconds: 3600 predictionMetrics: - resourceIdentifier: workload-cpu diff --git a/go.mod b/go.mod index 7379e3690..834d00ee7 100644 --- a/go.mod +++ b/go.mod @@ -181,7 +181,6 @@ require ( golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect golang.org/x/tools v0.1.8 // indirect google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa // indirect - google.golang.org/protobuf v1.27.1 ) replace ( diff --git a/go.sum b/go.sum index 4afc15c88..535491850 100644 --- a/go.sum +++ b/go.sum @@ -310,10 +310,6 @@ github.com/gobwas/pool v0.2.1 h1:xfeeEhW7pwmX8nuLVlqbzVc7udMDrwetjEv+TZIz1og= github.com/gobwas/pool v0.2.1/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= github.com/gobwas/ws v1.1.0-rc.5 h1:QOAag7FoBaBYYHRqzqkhhd8fq5RTubvI4v3Ft/gDVVQ= github.com/gobwas/ws v1.1.0-rc.5/go.mod h1:nzvNcVha5eUziGrbxFCo6qFIojQHjJV5cLYIbezhfL0= -github.com/gocrane/api v0.4.1-0.20220507041258-d376db2b4ad4 h1:vGDg3G6y661KAlhjf/8/r8JCjaIi6aV8szCP+MZRU3Y= -github.com/gocrane/api v0.4.1-0.20220507041258-d376db2b4ad4/go.mod h1:GxI+t9AW8+NsHkz2JkPBIJN//9eLUjTZl1ScYAbXMbk= -github.com/gocrane/api v0.4.1-0.20220520134105-09d430d903ac h1:lBKVVOA4del0Plj80PCE+nglxaJxaXanCv5N6a3laVY= -github.com/gocrane/api v0.4.1-0.20220520134105-09d430d903ac/go.mod h1:GxI+t9AW8+NsHkz2JkPBIJN//9eLUjTZl1ScYAbXMbk= github.com/gocrane/api v0.5.1-0.20220706040335-eaadbb4b99ed h1:aARCU+Hs1ZKTqJFJT/4/or/iGR6qYwMcG99CGmBFJpg= github.com/gocrane/api v0.5.1-0.20220706040335-eaadbb4b99ed/go.mod h1:GxI+t9AW8+NsHkz2JkPBIJN//9eLUjTZl1ScYAbXMbk= github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= diff --git a/pkg/controller/timeseriesprediction/time_series_prediction_controller.go b/pkg/controller/timeseriesprediction/time_series_prediction_controller.go index fe82607de..460bfb73d 100644 --- a/pkg/controller/timeseriesprediction/time_series_prediction_controller.go +++ b/pkg/controller/timeseriesprediction/time_series_prediction_controller.go @@ -102,6 +102,7 @@ func (tc *Controller) syncTimeSeriesPrediction(ctx context.Context, tsp *predict c, err := NewMetricContext(tc.TargetFetcher, tsp, tc.predictorMgr) if err != nil { + klog.ErrorS(err, "Failed to NewMetricContext.") return ctrl.Result{}, err } @@ -116,7 +117,7 @@ func (tc *Controller) syncTimeSeriesPrediction(ctx context.Context, tsp *predict c.WithApiConfigs(tsp.Spec.PredictionMetrics) return } - // predictor need an interface to query the config and then diff. + // predictor needs an interface to query the config and then diff. // now just diff the cache in the controller to decide, it can not cover all the cases when users modify the spec for _, oldMetricConf := range old.Spec.PredictionMetrics { if !ExistsPredictionMetric(oldMetricConf, tsp.Spec.PredictionMetrics) { diff --git a/pkg/prediction/dsp/debug.go b/pkg/prediction/dsp/debug.go new file mode 100644 index 000000000..91985d1d1 --- /dev/null +++ b/pkg/prediction/dsp/debug.go @@ -0,0 +1,76 @@ +package dsp + +import ( + "fmt" + "time" + + "k8s.io/klog/v2" + + "github.com/gocrane/crane/pkg/common" + "github.com/gocrane/crane/pkg/metricnaming" + "github.com/gocrane/crane/pkg/prediction" + "github.com/gocrane/crane/pkg/prediction/config" +) + +func Debug(predictor prediction.Interface, namer metricnaming.MetricNamer, config *config.Config) (*Signal, *Signal, *Signal, error) { + internalConfig, err := makeInternalConfig(config.DSP) + if err != nil { + return nil, nil, nil, err + } + + historyTimeSeriesList, err := queryHistoryTimeSeries(predictor.(*periodicSignalPrediction), namer, internalConfig) + if err != nil { + return nil, nil, nil, err + } + + queryExpr := namer.BuildUniqueKey() + + var signal, history, test, estimate *Signal + var nPeriods int + var chosenEstimator Estimator + for _, ts := range historyTimeSeriesList { + periodLength := findPeriod(ts, internalConfig.historyResolution) + if periodLength == Day || periodLength == Week { + signal = SamplesToSignal(ts.Samples, internalConfig.historyResolution) + signal, nPeriods = signal.Truncate(periodLength) + if nPeriods >= 2 { + chosenEstimator = bestEstimator(queryExpr, internalConfig.estimators, signal, nPeriods, periodLength) + } + if chosenEstimator != nil { + samplesPerPeriod := len(signal.Samples) / nPeriods + history = &Signal{ + SampleRate: signal.SampleRate, + Samples: signal.Samples[:(nPeriods-1)*samplesPerPeriod], + } + test = &Signal{ + SampleRate: signal.SampleRate, + Samples: signal.Samples[(nPeriods-1)*samplesPerPeriod:], + } + estimate = chosenEstimator.GetEstimation(history, periodLength) + return history, test, estimate, nil + } + } + } + + return nil, nil, nil, fmt.Errorf("no prediction result") +} + +func queryHistoryTimeSeries(predictor *periodicSignalPrediction, namer metricnaming.MetricNamer, config *internalConfig) ([]*common.TimeSeries, error) { + p := predictor.GetHistoryProvider() + if p == nil { + return nil, fmt.Errorf("history provider not provisioned") + } + + end := time.Now().Truncate(config.historyResolution) + start := end.Add(-config.historyDuration - time.Hour) + + tsList, err := p.QueryTimeSeries(namer, start, end, config.historyResolution) + if err != nil { + klog.ErrorS(err, "Failed to query history time series.") + return nil, err + } + + klog.V(4).InfoS("DSP debug | queryHistoryTimeSeries", "timeSeriesList", tsList, "config", *config) + + return preProcessTimeSeriesList(tsList, config) +} diff --git a/pkg/providers/grpc/grpc.go b/pkg/providers/grpc/grpc.go index 9de781405..d04434c9f 100644 --- a/pkg/providers/grpc/grpc.go +++ b/pkg/providers/grpc/grpc.go @@ -104,6 +104,16 @@ func grpcMetric(namer metricnaming.MetricNamer) (*pb.Metric, error) { Name: c.Name, }, } + case metricquery.WorkloadMetricType: + w := q.GenericQuery.Metric.Workload + m.Info = &pb.Metric_Workload{ + Workload: &pb.Workload{ + Namespace: w.Namespace, + Name: w.Name, + Kind: w.Kind, + ApiVersion: w.APIVersion, + }, + } default: return nil, fmt.Errorf("%s not supported", q.GenericQuery.Metric.Type) } diff --git a/pkg/server/handler/prediction/prediction.go b/pkg/server/handler/prediction/prediction.go new file mode 100644 index 000000000..f2f84c03c --- /dev/null +++ b/pkg/server/handler/prediction/prediction.go @@ -0,0 +1,186 @@ +package prediction + +import ( + "context" + "fmt" + "net/http" + + "github.com/gin-gonic/gin" + "github.com/go-echarts/go-echarts/v2/charts" + "github.com/go-echarts/go-echarts/v2/components" + "github.com/go-echarts/go-echarts/v2/opts" + "github.com/go-echarts/go-echarts/v2/types" + "github.com/gocrane/crane/pkg/prediction/dsp" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/rest" + "k8s.io/klog/v2" + + craneclientset "github.com/gocrane/api/pkg/generated/clientset/versioned" + "github.com/gocrane/api/prediction/v1alpha1" + + "github.com/gocrane/crane/pkg/controller/timeseriesprediction" + predictormgr "github.com/gocrane/crane/pkg/predictor" + "github.com/gocrane/crane/pkg/server/ginwrapper" + "github.com/gocrane/crane/pkg/utils/target" +) + +type DebugHandler struct { + craneClient *craneclientset.Clientset + predictorManager predictormgr.Manager + selectorFetcher target.SelectorFetcher +} + +type ContextKey string + +var ( + PredictorManagerKey ContextKey = "predictorManager" + SelectorFetcherKey ContextKey = "selectorFetcher" +) + +func NewDebugHandler(ctx context.Context) *DebugHandler { + config, err := rest.InClusterConfig() + if err != nil { + klog.Fatalf("Failed to get InClusterConfig, %v.", err) + } + + val := ctx.Value(PredictorManagerKey) + if val == nil { + klog.Fatalf("predictorManager not found") + } + predictorManager := val.(predictormgr.Manager) + + val = ctx.Value(SelectorFetcherKey) + if val == nil { + klog.Fatalf("selectorFetcher not found") + } + selectorFetcher := val.(target.SelectorFetcher) + + return &DebugHandler{ + craneClient: craneclientset.NewForConfigOrDie(config), + predictorManager: predictorManager, + selectorFetcher: selectorFetcher, + } +} + +func (dh *DebugHandler) Display(c *gin.Context) { + namespace := c.Param("namespace") + name := c.Param("tsp") + if len(namespace) == 0 || len(name) == 0 { + c.Writer.WriteHeader(http.StatusBadRequest) + return + } + + tsp, err := dh.craneClient.PredictionV1alpha1().TimeSeriesPredictions(namespace).Get(context.TODO(), name, metav1.GetOptions{}) + if err != nil { + ginwrapper.WriteResponse(c, err, nil) + return + } + + if len(tsp.Spec.PredictionMetrics) > 0 { + if tsp.Spec.PredictionMetrics[0].Algorithm.AlgorithmType == v1alpha1.AlgorithmTypeDSP && tsp.Spec.PredictionMetrics[0].Algorithm.DSP != nil { + mc, err := timeseriesprediction.NewMetricContext(dh.selectorFetcher, tsp, dh.predictorManager) + if err != nil { + ginwrapper.WriteResponse(c, err, nil) + return + } + + internalConf := mc.ConvertApiMetric2InternalConfig(&tsp.Spec.PredictionMetrics[0]) + namer := mc.GetMetricNamer(&tsp.Spec.PredictionMetrics[0]) + pred := dh.predictorManager.GetPredictor(v1alpha1.AlgorithmTypeDSP) + history, test, estimate, err := dsp.Debug(pred, namer, internalConf) + if err != nil { + ginwrapper.WriteResponse(c, err, nil) + return + } + + page := components.NewPage() + page.AddCharts(plot(history, "history", "green", charts.WithTitleOpts(opts.Title{Title: "history"}))) + page.AddCharts(plots([]*dsp.Signal{test, estimate}, []string{"actual", "forecasted"}, + charts.WithTitleOpts(opts.Title{Title: "actual/forecasted"}))) + err = page.Render(c.Writer) + if err != nil { + klog.ErrorS(err, "Failed to display debug time series") + } + + return + } + } + + c.Writer.WriteHeader(http.StatusBadRequest) + return +} + +func plot(s *dsp.Signal, name string, color string, o ...charts.GlobalOpts) *charts.Line { + x := make([]string, 0) + y := make([]opts.LineData, 0) + for i := 0; i < s.Num(); i++ { + x = append(x, fmt.Sprintf("%.1f", float64(i)/s.SampleRate)) + y = append(y, opts.LineData{Value: s.Samples[i], Symbol: "none"}) + } + + line := charts.NewLine() + line.SetGlobalOptions( + charts.WithInitializationOpts(opts.Initialization{Width: "3000px", Theme: types.ThemeRoma}), + charts.WithLegendOpts( + opts.Legend{ + Show: true, + Data: name, + }), + charts.WithTooltipOpts(opts.Tooltip{ + Show: true, + Trigger: "axis", + TriggerOn: "mousemove", + }), + charts.WithTitleOpts(opts.Title{Title: s.String()})) + if o != nil { + line.SetGlobalOptions(o...) + } + line.SetXAxis(x).AddSeries(name, y, charts.WithLineStyleOpts(opts.LineStyle{Color: color})) + + return line +} + +func plots(signals []*dsp.Signal, names []string, o ...charts.GlobalOpts) *charts.Line { + if len(signals) < 1 { + return nil + } + s := signals[0] + n := signals[0].Num() + x := make([]string, 0) + y := make([][]opts.LineData, len(signals)) + for j := 0; j < len(signals); j++ { + y[j] = make([]opts.LineData, 0) + } + for i := 0; i < n; i++ { + x = append(x, fmt.Sprintf("%.1f", float64(i)/s.SampleRate)) + for j := 0; j < len(signals); j++ { + y[j] = append(y[j], opts.LineData{Value: signals[j].Samples[i], Symbol: "none"}) + } + } + + line := charts.NewLine() + line.SetGlobalOptions( + charts.WithInitializationOpts(opts.Initialization{Width: "3000px", Theme: types.ThemeShine}), + charts.WithLegendOpts( + opts.Legend{ + Show: true, + Data: names, + }), + charts.WithTooltipOpts(opts.Tooltip{ + Show: true, + Trigger: "axis", + TriggerOn: "mousemove", + })) + if o != nil { + line.SetGlobalOptions(o...) + } + line.SetXAxis(x) + for j := 0; j < len(signals); j++ { + line.AddSeries(names[j], y[j], charts.WithAreaStyleOpts( + opts.AreaStyle{ + Opacity: 0.1, + }), + ) + } + return line +} diff --git a/pkg/server/server.go b/pkg/server/server.go index eac0a9b38..59e49f7fe 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -17,6 +17,7 @@ import ( "github.com/gocrane/crane/pkg/server/config" "github.com/gocrane/crane/pkg/server/ginwrapper" + "github.com/gocrane/crane/pkg/server/handler/prediction" "github.com/gocrane/crane/pkg/server/middleware" clustersrv "github.com/gocrane/crane/pkg/server/service/cluster" dashboardsrv "github.com/gocrane/crane/pkg/server/service/dashboard" @@ -54,6 +55,14 @@ func NewServer(cfg *config.Config) (*apiServer, error) { return server, nil } +func (s *apiServer) installPredictionDebugAPIs(ctx context.Context) { + debugHandler := prediction.NewDebugHandler(ctx) + debug := s.Group("/api/prediction/debug") + { + debug.GET(":namespace/:tsp", debugHandler.Display) + } +} + func (s *apiServer) installGenericAPIs() { // install metric handler if s.config.EnableMetrics { @@ -136,7 +145,7 @@ func (s *apiServer) Run(ctx context.Context) { s.installDefaultMiddlewares() s.installGenericAPIs() s.initRouter() - + s.installPredictionDebugAPIs(ctx) s.startGracefulShutDownManager(ctx) go func() {