diff --git a/cmd/craned/app/manager.go b/cmd/craned/app/manager.go index 4350c0c58..ee37d1fb8 100644 --- a/cmd/craned/app/manager.go +++ b/cmd/craned/app/manager.go @@ -23,6 +23,7 @@ import ( autoscalingapi "github.com/gocrane/api/autoscaling/v1alpha1" ensuranceapi "github.com/gocrane/api/ensurance/v1alpha1" predictionapi "github.com/gocrane/api/prediction/v1alpha1" + "github.com/gocrane/crane/cmd/craned/app/options" "github.com/gocrane/crane/pkg/controller/analytics" "github.com/gocrane/crane/pkg/controller/cnp" @@ -282,29 +283,30 @@ func initializationControllers(ctx context.Context, mgr ctrl.Manager, opts *opti } if utilfeature.DefaultMutableFeatureGate.Enabled(features.CraneAnalysis) { - if err := (&analytics.Controller{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - RestMapper: mgr.GetRESTMapper(), - Recorder: mgr.GetEventRecorderFor("analytics-controller"), - }).SetupWithManager(mgr); err != nil { - klog.Exit(err, "unable to create controller", "controller", "AnalyticsController") - } - configSet, err := recommend.LoadConfigSetFromFile(opts.RecommendationConfigFile) if err != nil { klog.Errorf("Failed to load recommendation config file: %v", err) os.Exit(1) } - if err := (&recommendation.Controller{ + + if err := (&analytics.Controller{ Client: mgr.GetClient(), - ConfigSet: configSet, Scheme: mgr.GetScheme(), RestMapper: mgr.GetRESTMapper(), - Recorder: mgr.GetEventRecorderFor("recommendation-controller"), + Recorder: mgr.GetEventRecorderFor("analytics-controller"), + ConfigSet: configSet, ScaleClient: scaleClient, PredictorMgr: predictorMgr, Provider: historyDataSource, + }).SetupWithManager(mgr); err != nil { + klog.Exit(err, "unable to create controller", "controller", "AnalyticsController") + } + + if err := (&recommendation.Controller{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + RestMapper: mgr.GetRESTMapper(), + Recorder: mgr.GetEventRecorderFor("recommendation-controller"), }).SetupWithManager(mgr); err != nil { klog.Exit(err, "unable to create controller", "controller", "RecommendationController") } diff --git a/docs/images/analytics-arch.png b/docs/images/analytics-arch.png new file mode 100644 index 000000000..262f17cb4 Binary files /dev/null and b/docs/images/analytics-arch.png differ diff --git a/docs/tutorials/analytics-and-recommendation.zh.md b/docs/tutorials/analytics-and-recommendation.zh.md new file mode 100644 index 000000000..081792aa7 --- /dev/null +++ b/docs/tutorials/analytics-and-recommendation.zh.md @@ -0,0 +1,34 @@ +# 分析和推荐 + +分析和推荐提供了一套自动化的成本优化能力,它帮助用户自动的发现问题并提供优化的方案。就像电脑/手机助手一样,它会定期的扫描、分析你的集群并给出推荐建议。目前,我们提供了两种优化能力: + +- **资源推荐**: 基于应用的历史资源使用推荐 Container 合适的 requests 和 limits +- **弹性推荐**: 筛选所有的工作负载,推荐出适合做弹性的工作负载并给出弹性建议 + +## 架构 + +![analytics-arch](../images/analytics-arch.png) + +## 一次分析的过程 + +1. 用户创建 Analytics 对象,通过 ResourceSelector 选择需要分析的资源,支持选择多类型(基于Group,Kind,Version)的批量选择 +2. 并行分析每个选择的资源,尝试进行分析推荐,每次分析过程分成筛选和推荐两个阶段: + 1. 筛选:排除不满足推荐条件的资源。比如对于弹性推荐,排除没有 running pod 的workload + 2. 推荐:通过算法计算分析,给出推荐结果 +3. 如果通过筛选,创建 Recommendation 对象,将推荐结果展示在 Recommendation.Status +4. 未通过筛选的原因和状态展示在 Analytics.Status +5. 根据运行间隔等待下次分析 + +## 名词解释 + +### 分析 + +分析定义了一个扫描分析任务。支持两种任务类型:资源推荐和弹性推荐。Crane 定期运行分析任务,并产生推荐结果。 + +### 推荐 + +推荐展示了一个优化推荐的结果。推荐的结果是一段 YAML 配置,根据结果用户可以进行相应的优化动作,比如调整应用的资源配置。 + +### 参数配置 + +不同的分析采用不同的计算模型,Crane 提供了一套默认的计算模型以及一套配套的配置,用户可以通过修改配置来定制推荐的效果。支持修改全局的默认配置和修改单个分析任务的配置。 diff --git a/docs/tutorials/hpa-recommendation.zh.md b/docs/tutorials/hpa-recommendation.zh.md new file mode 100644 index 000000000..a16f790ba --- /dev/null +++ b/docs/tutorials/hpa-recommendation.zh.md @@ -0,0 +1,83 @@ +# 弹性推荐 + +通过弹性推荐,你可以发现集群中适合弹性的资源,并使用 Crane 推荐的弹性配置创建自动弹性器: EffectiveHPA + +## 创建弹性分析 + +创建一个**弹性分析** `Analytics`,这里我们通过 deployment: `craned` and `metric-adapter` 作为一个例子 + +```bash +kubectl apply -f https://raw.githubusercontent.com/gocrane/crane/main/examples/analytics/analytics-hpa.yaml +kubectl get analytics -n crane-system +``` + +## 查看分析结果 + + + +## 弹性推荐计算模型 + +### 筛选阶段 + +1. 低副本数的工作负载: 过低的副本数可能弹性需求不高,关联配置: ehpa.deployment-min-replicas | ehpa.statefulset-min-replicas | ehpa.workload-min-replicas +2. 存在一定比例非 Running Pod 的工作负载: 如果工作负载的 Pod 大多不能正常运行,可能不适合弹性,关联配置: ehpa.pod-min-ready-seconds | ehpa.pod-available-ratio +3. 低 CPU 使用量的工作负载: 过低使用量的工作负载意味着没有业务压力,此时通过使用率推荐弹性不准,关联配置: ehpa.min-cpu-usage-threshold +4. CPU 使用量的波动率过低: 使用量的最大值和最小值的倍数定义为波动率,波动率过低的工作负载通过弹性降本的收益不大,关联配置: ehpa.fluctuation-threshold + +### 推荐 + +推荐阶段通过以下模型推荐一个 EffectiveHPA 的 Spec。每个字段的推荐逻辑如下: + +**推荐 TargetUtilization** + +原理: 使用 Pod P99 资源利用率推荐弹性的目标。因为如果应用可以在 P99 时间内接受这个利用率,可以推断出可作为弹性的目标。 + + 1. 通过 Percentile 算法得到 Pod 过去七天 的 P99 使用量: pod_cpu_usage_p99 + 2. 对应的利用率:target_pod_cpu_utilization = pod_cpu_usage_p99 / pod_cpu_request + 3. 为了防止利用率过大或过小,target_pod_cpu_utilization 需要小于 ehpa.min-cpu-target-utilization 和大于 ehpa.max-cpu-target-utilization + +**推荐 minReplicas** + +原理: 使用 workload 过去七天内每小时负载最低的利用率推荐 minReplicas。 + +1. 计算过去7天 workload 每小时使用量中位数的最低值: workload_cpu_usage_medium_min +2. 对应的最低利用率对应的副本数: minReplicas = workload_cpu_usage_medium_min / pod_cpu_request / ehpa.max-cpu-target-utilization +3. 为了防止 minReplicas 过小,minReplicas 需要大于等于 ehpa.default-min-replicas + +**推荐 maxReplicas** + +原理: 使用 workload 过去和未来七天的负载推荐最大副本数。 + +1. 计算过去七天和未来七天 workload cpu 使用量的 P95: workload_cpu_usage_p95 +2. 对应的副本数: max_replicas_origin = workload_cpu_usage_p95 / pod_cpu_request / target_cpu_utilization +3. 为了应对流量洪峰,放大一定倍数: max_replicas = max_replicas_origin * ehpa.max-replicas-factor + +**推荐CPU以外 MetricSpec** + +1. 如果 workload 配置了 HPA,继承相应除 CpuUtilization 以外的其他 MetricSpec + +**推荐 Behavior** + +1. 如果 workload 配置了 HPA,继承相应的 Behavior 配置 + +**预测** + +1. 尝试预测工作负载未来七天的 CPU 使用量,算法是 DSP +2. 如果预测成功则添加预测配置 +3. 如果不可预测则不添加预测配置,退化成不具有预测功能的 EffectiveHPA + + +## 弹性分析计算配置 + +- ehpa.deployment-min-replicas: 默认值 1,小于该值的工作负载不做弹性推荐 +- ehpa.statefulset-min-replicas: 默认值 1,小于该值的工作负载不做弹性推荐 +- ehpa.workload-min-replicas: 默认值 1,小于该值的工作负载不做弹性推荐 +- ehpa.pod-min-ready-seconds: 默认值 30,定义了 Pod 是否 Ready 的秒数 +- ehpa.pod-available-ratio: 默认值 0.5,Ready Pod 比例小于该值的工作负载不做弹性推荐 +- ehpa.default-min-replicas: 默认值 2,最小 minReplicas +- ehpa.max-replicas-factor: 默认值 3,计算 maxReplicas 的倍数 +- ehpa.min-cpu-usage-threshold: 默认值 10, 小于该值的工作负载不做弹性推荐 +- ehpa.fluctuation-threshold: 默认值 1.5, 小于该值的工作负载不做弹性推荐 +- ehpa.min-cpu-target-utilization: 默认值 30 +- ehpa.max-cpu-target-utilization: 默认值 75 +- ehpa.reference-hpa: 默认值 true,继承现有的 HPA 配置 \ No newline at end of file diff --git a/go.mod b/go.mod index 337ef666e..9a5b1a7aa 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.17 require ( github.com/go-echarts/go-echarts/v2 v2.2.4 - github.com/gocrane/api v0.4.0 + github.com/gocrane/api v0.4.1-0.20220507041258-d376db2b4ad4 github.com/google/cadvisor v0.39.2 github.com/mjibson/go-dsp v0.0.0-20180508042940-11479a337f12 github.com/prometheus/client_golang v1.11.0 diff --git a/go.sum b/go.sum index 732f29695..e4ee71bad 100644 --- a/go.sum +++ b/go.sum @@ -310,6 +310,8 @@ github.com/gocrane/api v0.3.0 h1:ziH+zYQy/shiqQ6yskMs67e+bQ9WmPp8eCVhLW85NFQ= github.com/gocrane/api v0.3.0/go.mod h1:GxI+t9AW8+NsHkz2JkPBIJN//9eLUjTZl1ScYAbXMbk= github.com/gocrane/api v0.4.0 h1:1IWP3gbkp3T4kX68w4+PfqUr4Cb/gaJrihLYg6aKOLY= github.com/gocrane/api v0.4.0/go.mod h1:GxI+t9AW8+NsHkz2JkPBIJN//9eLUjTZl1ScYAbXMbk= +github.com/gocrane/api v0.4.1-0.20220507041258-d376db2b4ad4 h1:vGDg3G6y661KAlhjf/8/r8JCjaIi6aV8szCP+MZRU3Y= +github.com/gocrane/api v0.4.1-0.20220507041258-d376db2b4ad4/go.mod h1:GxI+t9AW8+NsHkz2JkPBIJN//9eLUjTZl1ScYAbXMbk= github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.4 h1:9349emZab16e7zQvpmsbtjc18ykshndd8y2PG3sgJbA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= diff --git a/pkg/controller/analytics/analytics_controller.go b/pkg/controller/analytics/analytics_controller.go index eaf4bd7a9..d6f6e7e51 100644 --- a/pkg/controller/analytics/analytics_controller.go +++ b/pkg/controller/analytics/analytics_controller.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "sync" "time" corev1 "k8s.io/api/core/v1" @@ -19,11 +20,13 @@ import ( "k8s.io/client-go/discovery" "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/scale" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/yaml" analysisv1alph1 "github.com/gocrane/api/analysis/v1alpha1" craneclient "github.com/gocrane/api/pkg/generated/clientset/versioned" @@ -31,6 +34,13 @@ import ( analysislister "github.com/gocrane/api/pkg/generated/listers/analysis/v1alpha1" "github.com/gocrane/crane/pkg/known" + predictormgr "github.com/gocrane/crane/pkg/predictor" + "github.com/gocrane/crane/pkg/providers" + "github.com/gocrane/crane/pkg/recommend" +) + +const ( + RecommendationMissionMessageSuccess = "Success" ) type Controller struct { @@ -43,6 +53,10 @@ type Controller struct { discoveryClient discovery.DiscoveryInterface recommLister analysislister.RecommendationLister K8SVersion *version.Version + ScaleClient scale.ScalesGetter + PredictorMgr predictormgr.Manager + Provider providers.History + ConfigSet *analysisv1alph1.ConfigSet } func (c *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { @@ -69,9 +83,9 @@ func (c *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu return ctrl.Result{}, nil } - c.DoAnalytics(ctx, analytics) + finished := c.DoAnalytics(ctx, analytics) - if analytics.Spec.CompletionStrategy.CompletionStrategyType == analysisv1alph1.CompletionStrategyPeriodical { + if finished && analytics.Spec.CompletionStrategy.CompletionStrategyType == analysisv1alph1.CompletionStrategyPeriodical { if analytics.Spec.CompletionStrategy.PeriodSeconds != nil { d := time.Second * time.Duration(*analytics.Spec.CompletionStrategy.PeriodSeconds) klog.V(4).InfoS("Will re-sync", "after", d) @@ -81,7 +95,7 @@ func (c *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu } } - return ctrl.Result{}, nil + return ctrl.Result{RequeueAfter: time.Second * 1}, nil } // ShouldAnalytics decide if we need do analytics according to status @@ -105,7 +119,7 @@ func (c *Controller) ShouldAnalytics(analytics *analysisv1alph1.Analytics) bool return true } -func (c *Controller) DoAnalytics(ctx context.Context, analytics *analysisv1alph1.Analytics) { +func (c *Controller) DoAnalytics(ctx context.Context, analytics *analysisv1alph1.Analytics) bool { newStatus := analytics.Status.DeepCopy() identities, err := c.GetIdentities(ctx, analytics) @@ -115,14 +129,34 @@ func (c *Controller) DoAnalytics(ctx context.Context, analytics *analysisv1alph1 klog.Errorf(msg) setReadyCondition(newStatus, metav1.ConditionFalse, "FailedSelectResource", msg) c.UpdateStatus(ctx, analytics, newStatus) - return + return false + } + + timeNow := metav1.Now() + + // if the first mission start time is last round, reset currMissions here + currMissions := newStatus.Recommendations + if currMissions != nil && len(currMissions) > 0 { + planingTime := currMissions[0].LastStartTime.Add(time.Duration(*analytics.Spec.CompletionStrategy.PeriodSeconds) * time.Second) + if time.Now().After(planingTime) { + currMissions = nil // reset missions to trigger creation for missions + } + } + + if currMissions == nil { + // create recommendation Missions for this round + for _, id := range identities { + currMissions = append(currMissions, analysisv1alph1.RecommendationMission{ + TargetRef: corev1.ObjectReference{Kind: id.Kind, APIVersion: id.APIVersion, Namespace: id.Namespace, Name: id.Name}, + }) + } } - var recommendations []*analysisv1alph1.Recommendation + var currRecommendations []*analysisv1alph1.Recommendation if analytics.Namespace == known.CraneSystemNamespace { - recommendations, err = c.recommLister.List(labels.Everything()) + currRecommendations, err = c.recommLister.List(labels.Everything()) } else { - recommendations, err = c.recommLister.Recommendations(analytics.Namespace).List(labels.Everything()) + currRecommendations, err = c.recommLister.Recommendations(analytics.Namespace).List(labels.Everything()) } if err != nil { c.Recorder.Event(analytics, corev1.EventTypeNormal, "FailedSelectResource", err.Error()) @@ -130,11 +164,11 @@ func (c *Controller) DoAnalytics(ctx context.Context, analytics *analysisv1alph1 klog.Errorf(msg) setReadyCondition(newStatus, metav1.ConditionFalse, "FailedSelectResource", msg) c.UpdateStatus(ctx, analytics, newStatus) - return + return false } recommendationMap := map[string]*analysisv1alph1.Recommendation{} - for _, r := range recommendations { + for _, r := range currRecommendations { k := objRefKey(r.Spec.TargetRef.Kind, r.Spec.TargetRef.APIVersion, r.Spec.TargetRef.Namespace, r.Spec.TargetRef.Name, string(r.Spec.Type)) recommendationMap[k] = r.DeepCopy() } @@ -150,63 +184,59 @@ func (c *Controller) DoAnalytics(ctx context.Context, analytics *analysisv1alph1 } } - var refs []analysisv1alph1.RecommendationReference - - for k, id := range identities { - if r, exists := recommendationMap[k]; exists { - refs = append(refs, analysisv1alph1.RecommendationReference{ - ObjectReference: corev1.ObjectReference{ - Kind: recommendationMap[k].Kind, - Name: recommendationMap[k].Name, - Namespace: recommendationMap[k].Namespace, - APIVersion: recommendationMap[k].APIVersion, - UID: recommendationMap[k].UID, - }, - TargetRef: recommendationMap[k].Spec.TargetRef, - }) - found := false - for _, or := range r.OwnerReferences { - if or.Name == analytics.Name && or.Kind == analytics.Kind && or.APIVersion == analytics.APIVersion { - found = true - break - } - } - if !found { - rCopy := r.DeepCopy() - rCopy.OwnerReferences = append(rCopy.OwnerReferences, *newOwnerRef(analytics)) - if err = c.Update(ctx, rCopy); err != nil { - c.Recorder.Event(analytics, corev1.EventTypeNormal, "FailedUpdateRecommendation", err.Error()) - msg := fmt.Sprintf("Failed to update ownerReferences for recommendation %s, Analytics %s error %v", klog.KObj(rCopy), klog.KObj(analytics), err) - klog.Errorf(msg) - setReadyCondition(newStatus, metav1.ConditionFalse, "FailedUpdateRecommendation", msg) - c.UpdateStatus(ctx, analytics, newStatus) - return - } - klog.InfoS("Successful to update ownerReferences", "Recommendation", rCopy, "Analytics", analytics) - } - } else { - if err = c.CreateRecommendation(ctx, analytics, id, &refs); err != nil { - c.Recorder.Event(analytics, corev1.EventTypeNormal, "FailedCreateRecommendation", err.Error()) - msg := fmt.Sprintf("Failed to create recommendation, Analytics %s error %v", klog.KObj(analytics), err) - klog.Errorf(msg) - setReadyCondition(newStatus, metav1.ConditionFalse, "FailedCreateRecommendation", msg) - c.UpdateStatus(ctx, analytics, newStatus) - return + maxConcurrency := 10 + executeIndex := -1 + var concurrency int + for index, mission := range currMissions { + if mission.LastStartTime != nil { + continue + } + if executeIndex == -1 { + executeIndex = index + } + if concurrency < maxConcurrency { + concurrency++ + } + } + + wg := sync.WaitGroup{} + wg.Add(concurrency) + for index := range currMissions { + if index < executeIndex || index >= concurrency+executeIndex { + continue + } + + var existRecommendation *analysisv1alph1.Recommendation + for _, r := range currRecommendations { + if currMissions[index].UID == r.UID { + existRecommendation = r } } + + go c.ExecuteMission(ctx, &wg, analytics, identities, &currMissions[index], existRecommendation, timeNow) } - newStatus.Recommendations = refs - timeNow := metav1.Now() - newStatus.LastUpdateTime = &timeNow + + wg.Wait() + + finished := false + if executeIndex+concurrency == len(currMissions) { + finished = true + } + + if finished { + newStatus.LastUpdateTime = &timeNow + } + + newStatus.Recommendations = currMissions setReadyCondition(newStatus, metav1.ConditionTrue, "AnalyticsReady", "Analytics is ready") c.UpdateStatus(ctx, analytics, newStatus) + return finished } -func (c *Controller) CreateRecommendation(ctx context.Context, analytics *analysisv1alph1.Analytics, - id ObjectIdentity, refs *[]analysisv1alph1.RecommendationReference) error { +func (c *Controller) CreateRecommendationObject(ctx context.Context, analytics *analysisv1alph1.Analytics, + target corev1.ObjectReference, id ObjectIdentity) *analysisv1alph1.Recommendation { - targetRef := corev1.ObjectReference{Kind: id.Kind, APIVersion: id.APIVersion, Namespace: id.Namespace, Name: id.Name} recommendation := &analysisv1alph1.Recommendation{ ObjectMeta: metav1.ObjectMeta{ GenerateName: fmt.Sprintf("%s-%s-", analytics.Name, strings.ToLower(string(analytics.Spec.Type))), @@ -217,31 +247,19 @@ func (c *Controller) CreateRecommendation(ctx context.Context, analytics *analys Labels: id.Labels, }, Spec: analysisv1alph1.RecommendationSpec{ - TargetRef: targetRef, - Type: analytics.Spec.Type, - CompletionStrategy: analytics.Spec.CompletionStrategy, + TargetRef: target, + Type: analytics.Spec.Type, }, } - if err := c.Create(ctx, recommendation); err != nil { - klog.Error(err, "Failed to create Recommendation") - return err + if recommendation.Labels == nil { + recommendation.Labels = map[string]string{} } + recommendation.Labels[known.AnalyticsNameLabel] = analytics.Name + recommendation.Labels[known.AnalyticsUidLabel] = string(analytics.UID) + recommendation.Labels[known.AnalyticsTypeLabel] = string(analytics.Spec.Type) - klog.InfoS("Successful to create", "Recommendation", klog.KObj(recommendation), "Analytics", klog.KObj(analytics)) - - *refs = append(*refs, analysisv1alph1.RecommendationReference{ - ObjectReference: corev1.ObjectReference{ - Kind: recommendation.Kind, - Name: recommendation.Name, - Namespace: recommendation.Namespace, - APIVersion: recommendation.APIVersion, - UID: recommendation.UID, - }, - TargetRef: targetRef, - }) - - return nil + return recommendation } func (c *Controller) SetupWithManager(mgr ctrl.Manager) error { @@ -352,9 +370,97 @@ func (c *Controller) GetIdentities(ctx context.Context, analytics *analysisv1alp } } + if len(identities) == 0 { + return nil, fmt.Errorf("no resource matched resource selector") + } + return identities, nil } +func (c *Controller) ExecuteMission(ctx context.Context, wg *sync.WaitGroup, analytics *analysisv1alph1.Analytics, identities map[string]ObjectIdentity, mission *analysisv1alph1.RecommendationMission, existRecommendation *analysisv1alph1.Recommendation, timeNow metav1.Time) { + defer func() { + mission.LastStartTime = &timeNow + if mission.Message != RecommendationMissionMessageSuccess { + if existRecommendation != nil { + err := c.Client.Delete(ctx, existRecommendation) + if err != nil { + klog.Errorf("Delete recommendation %s failed: %v", klog.KObj(existRecommendation), err) + } + } + + klog.Errorf("Mission failed: %s", mission.Message) + } + + wg.Done() + }() + + k := objRefKey(mission.TargetRef.Kind, mission.TargetRef.APIVersion, mission.TargetRef.Namespace, mission.TargetRef.Name, string(analytics.Spec.Type)) + if id, exist := identities[k]; !exist { + mission.Message = fmt.Sprintf("Failed to get identity, key %s. ", k) + return + } else { + recommendation := existRecommendation + if recommendation == nil { + recommendation = c.CreateRecommendationObject(ctx, analytics, mission.TargetRef, id) + } + // do recommendation + recommender, err := recommend.NewRecommender(c.Client, c.RestMapper, c.ScaleClient, recommendation, c.PredictorMgr, c.Provider, c.ConfigSet) + if err != nil { + mission.Message = fmt.Sprintf("Failed to create recommender, Recommendation %s error %v", klog.KObj(recommendation), err) + return + } + + proposed, err := recommender.Offer() + if err != nil { + mission.Message = fmt.Sprintf("Failed to offer recommend, Recommendation %s: %v", klog.KObj(recommendation), err) + return + } + + var value string + if proposed.ResourceRequest != nil { + valueBytes, err := yaml.Marshal(proposed.ResourceRequest) + if err != nil { + mission.Message = err.Error() + return + } + value = string(valueBytes) + } else if proposed.EffectiveHPA != nil { + valueBytes, err := yaml.Marshal(proposed.EffectiveHPA) + if err != nil { + mission.Message = err.Error() + return + } + value = string(valueBytes) + } + + recommendation.Status.RecommendedValue = value + if existRecommendation != nil { + klog.Infof("Update recommendation %s", klog.KObj(recommendation)) + if err := c.Update(ctx, recommendation); err != nil { + mission.Message = fmt.Sprintf("Failed to create recommendation %s: %v", klog.KObj(recommendation), err) + return + } + + klog.Infof("Successful to update Recommendation %s", klog.KObj(recommendation)) + } else { + klog.Infof("Create recommendation %s", klog.KObj(recommendation)) + if err := c.Create(ctx, recommendation); err != nil { + mission.Message = fmt.Sprintf("Failed to create recommendation %s: %v", klog.KObj(recommendation), err) + return + } + + klog.Infof("Successful to create Recommendation %s", klog.KObj(recommendation)) + } + + mission.Message = "Success" + mission.UID = recommendation.UID + mission.Name = recommendation.Name + mission.Namespace = recommendation.Namespace + mission.Kind = recommendation.Kind + mission.APIVersion = recommendation.APIVersion + } +} + func (c *Controller) UpdateStatus(ctx context.Context, analytics *analysisv1alph1.Analytics, newStatus *analysisv1alph1.AnalyticsStatus) { if !equality.Semantic.DeepEqual(&analytics.Status, newStatus) { analytics.Status = *newStatus diff --git a/pkg/controller/recommendation/recommendation_controller.go b/pkg/controller/recommendation/recommendation_controller.go index e4c3a1077..09d80565f 100644 --- a/pkg/controller/recommendation/recommendation_controller.go +++ b/pkg/controller/recommendation/recommendation_controller.go @@ -3,7 +3,6 @@ package recommendation import ( "context" "fmt" - "time" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/equality" @@ -17,9 +16,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" analysisv1alph1 "github.com/gocrane/api/analysis/v1alpha1" + predictormgr "github.com/gocrane/crane/pkg/predictor" "github.com/gocrane/crane/pkg/providers" - "github.com/gocrane/crane/pkg/recommend" ) // Controller is responsible for reconcile Recommendation @@ -47,89 +46,28 @@ func (c *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu return ctrl.Result{}, nil } - shouldRecommend := c.ShouldRecommend(recommendation) - if !shouldRecommend { - klog.V(4).Infof("Nothing happens for Recommendation %s", req.NamespacedName) - return ctrl.Result{}, nil - } - // defaulting for TargetRef.Namespace if recommendation.Spec.TargetRef.Namespace == "" { recommendation.Spec.TargetRef.Namespace = recommendation.Namespace } - c.DoRecommend(ctx, recommendation) - - if recommendation.Spec.CompletionStrategy.CompletionStrategyType == analysisv1alph1.CompletionStrategyPeriodical { - if recommendation.Spec.CompletionStrategy.PeriodSeconds != nil { - d := time.Second * time.Duration(*recommendation.Spec.CompletionStrategy.PeriodSeconds) - klog.V(4).InfoS("Will re-sync", "after", d) - return ctrl.Result{ - RequeueAfter: d, - }, nil - } - } - return ctrl.Result{}, nil -} - -// ShouldRecommend decide if we need do recommendation according to status -func (c *Controller) ShouldRecommend(recommendation *analysisv1alph1.Recommendation) bool { - lastUpdateTime := recommendation.Status.LastUpdateTime - - if recommendation.Spec.CompletionStrategy.CompletionStrategyType == analysisv1alph1.CompletionStrategyOnce { - if lastUpdateTime != nil { - // already finish recommendation - return false - } - } else { - if lastUpdateTime != nil { - planingTime := lastUpdateTime.Add(time.Duration(*recommendation.Spec.CompletionStrategy.PeriodSeconds) * time.Second) - if time.Now().Before(planingTime) { - return false - } - } - } - - return true -} - -func (c *Controller) DoRecommend(ctx context.Context, recommendation *analysisv1alph1.Recommendation) { - klog.V(4).Infof("Starting to process Recommendation %s", klog.KObj(recommendation)) - newStatus := recommendation.Status.DeepCopy() - recommender, err := recommend.NewRecommender(c.Client, c.RestMapper, c.ScaleClient, recommendation, c.PredictorMgr, c.Provider, c.ConfigSet) - if err != nil { - c.Recorder.Event(recommendation, v1.EventTypeWarning, "FailedCreateRecommender", err.Error()) - msg := fmt.Sprintf("Failed to create recommender, Recommendation %s error %v", klog.KObj(recommendation), err) - klog.Errorf(msg) - setReadyCondition(newStatus, metav1.ConditionFalse, "FailedCreateRecommender", msg) - c.UpdateStatus(ctx, recommendation, newStatus) - return - } - - proposed, err := recommender.Offer() - if err != nil { - c.Recorder.Event(recommendation, v1.EventTypeWarning, "FailedOfferRecommendation", err.Error()) - msg := fmt.Sprintf("Failed to offer recommend, Recommendation %s: %v", klog.KObj(recommendation), err) - klog.Errorf(msg) - setReadyCondition(newStatus, metav1.ConditionFalse, "FailedOfferRecommend", msg) - c.UpdateStatus(ctx, recommendation, newStatus) - return - } - - err = c.UpdateRecommendation(ctx, recommendation, proposed, newStatus) + err = c.UpdateRecommendation(ctx, recommendation) if err != nil { c.Recorder.Event(recommendation, v1.EventTypeWarning, "FailedUpdateRecommendationValue", err.Error()) msg := fmt.Sprintf("Failed to update recommendation value, Recommendation %s: %v", klog.KObj(recommendation), err) klog.Errorf(msg) setReadyCondition(newStatus, metav1.ConditionFalse, "FailedUpdateRecommendationValue", msg) c.UpdateStatus(ctx, recommendation, newStatus) - return + return ctrl.Result{}, err } + c.Recorder.Event(recommendation, v1.EventTypeNormal, "UpdatedRecommendationValue", "") + setReadyCondition(newStatus, metav1.ConditionTrue, "RecommendationReady", "Recommendation is ready") c.UpdateStatus(ctx, recommendation, newStatus) + return ctrl.Result{}, nil } func (c *Controller) UpdateStatus(ctx context.Context, recommendation *analysisv1alph1.Recommendation, newStatus *analysisv1alph1.RecommendationStatus) { diff --git a/pkg/controller/recommendation/updater.go b/pkg/controller/recommendation/updater.go index 5dd79b8f0..8d35dc781 100644 --- a/pkg/controller/recommendation/updater.go +++ b/pkg/controller/recommendation/updater.go @@ -18,27 +18,28 @@ import ( autoscalingapi "github.com/gocrane/api/autoscaling/v1alpha1" "github.com/gocrane/crane/pkg/known" - "github.com/gocrane/crane/pkg/recommend/types" + recommendtypes "github.com/gocrane/crane/pkg/recommend/types" "github.com/gocrane/crane/pkg/utils" ) -func (c *Controller) UpdateRecommendation(ctx context.Context, recommendation *analysisapi.Recommendation, proposed *types.ProposedRecommendation, status *analysisapi.RecommendationStatus) error { - var value string - if proposed.ResourceRequest != nil { - valueBytes, err := yaml.Marshal(proposed.ResourceRequest) +func (c *Controller) UpdateRecommendation(ctx context.Context, recommendation *analysisapi.Recommendation) error { + var proposedEHPA *recommendtypes.EffectiveHorizontalPodAutoscalerRecommendation + var proposedResource *recommendtypes.ProposedRecommendation + + if recommendation.Spec.Type == analysisapi.AnalysisTypeResource { + err := yaml.Unmarshal([]byte(recommendation.Status.RecommendedValue), proposedResource) if err != nil { return err } - value = string(valueBytes) - } else if proposed.EffectiveHPA != nil { - valueBytes, err := yaml.Marshal(proposed.EffectiveHPA) + } + + if recommendation.Spec.Type == analysisapi.AnalysisTypeHPA { + err := yaml.Unmarshal([]byte(recommendation.Status.RecommendedValue), proposedEHPA) if err != nil { return err } - value = string(valueBytes) } - status.RecommendedValue = value if recommendation.Spec.AdoptionType == analysisapi.AdoptionTypeStatus { return nil } @@ -59,9 +60,9 @@ func (c *Controller) UpdateRecommendation(ctx context.Context, recommendation *a switch recommendation.Spec.Type { case analysisapi.AnalysisTypeResource: - annotation[known.ResourceRecommendationValueAnnotation] = value + annotation[known.ResourceRecommendationValueAnnotation] = recommendation.Status.RecommendedValue case analysisapi.AnalysisTypeHPA: - annotation[known.HPARecommendationValueAnnotation] = value + annotation[known.HPARecommendationValueAnnotation] = recommendation.Status.RecommendedValue } unstructed.SetAnnotations(annotation) @@ -73,7 +74,7 @@ func (c *Controller) UpdateRecommendation(ctx context.Context, recommendation *a // Only support Auto Type for EHPA recommendation if recommendation.Spec.AdoptionType == analysisapi.AdoptionTypeAuto { - if proposed.EffectiveHPA != nil { + if proposedEHPA != nil { ehpa, err := utils.GetEHPAFromScaleTarget(ctx, c.Client, recommendation.Spec.TargetRef.Namespace, recommendation.Spec.TargetRef) if err != nil { return fmt.Errorf("get EHPA from target failed: %v. ", err) @@ -85,11 +86,11 @@ func (c *Controller) UpdateRecommendation(ctx context.Context, recommendation *a Name: recommendation.Spec.TargetRef.Name, }, Spec: autoscalingapi.EffectiveHorizontalPodAutoscalerSpec{ - MinReplicas: proposed.EffectiveHPA.MinReplicas, - MaxReplicas: *proposed.EffectiveHPA.MaxReplicas, - Metrics: proposed.EffectiveHPA.Metrics, + MinReplicas: proposedEHPA.MinReplicas, + MaxReplicas: *proposedEHPA.MaxReplicas, + Metrics: proposedEHPA.Metrics, ScaleStrategy: autoscalingapi.ScaleStrategyPreview, - Prediction: proposed.EffectiveHPA.Prediction, + Prediction: proposedEHPA.Prediction, ScaleTargetRef: autoscalingv2.CrossVersionObjectReference{ Kind: recommendation.Spec.TargetRef.Kind, APIVersion: recommendation.Spec.TargetRef.APIVersion, @@ -109,9 +110,9 @@ func (c *Controller) UpdateRecommendation(ctx context.Context, recommendation *a // if user change it, we don't want to override it. // The reason for Prediction is the same. ehpaUpdate := ehpa.DeepCopy() - ehpaUpdate.Spec.MinReplicas = proposed.EffectiveHPA.MinReplicas - ehpaUpdate.Spec.MaxReplicas = *proposed.EffectiveHPA.MaxReplicas - ehpaUpdate.Spec.Metrics = proposed.EffectiveHPA.Metrics + ehpaUpdate.Spec.MinReplicas = proposedEHPA.MinReplicas + ehpaUpdate.Spec.MaxReplicas = *proposedEHPA.MaxReplicas + ehpaUpdate.Spec.Metrics = proposedEHPA.Metrics if !equality.Semantic.DeepEqual(&ehpaUpdate.Spec, &ehpa.Spec) { err = c.Client.Update(ctx, ehpaUpdate) @@ -124,7 +125,7 @@ func (c *Controller) UpdateRecommendation(ctx context.Context, recommendation *a } } - if proposed.ResourceRequest != nil { + if proposedResource != nil { evpa, err := utils.GetEVPAFromScaleTarget(ctx, c.Client, recommendation.Spec.TargetRef.Namespace, recommendation.Spec.TargetRef) if err != nil { return fmt.Errorf("get EVPA from target failed: %v. ", err) diff --git a/pkg/known/label.go b/pkg/known/label.go index c33988061..f3a1aa4be 100644 --- a/pkg/known/label.go +++ b/pkg/known/label.go @@ -9,3 +9,9 @@ const ( EnsuranceAnalyzedPressureTaintKey = "ensurance.crane.io/analyzed-pressure" EnsuranceAnalyzedPressureConditionKey = "analyzed-pressure" ) + +const ( + AnalyticsNameLabel = "analysis.crane.io/analytics-name" + AnalyticsUidLabel = "analysis.crane.io/analytics-uid" + AnalyticsTypeLabel = "analysis.crane.io/analytics-type" +)