Merge branch 'main' into main

gocrane · Jun 1, 2022 · e111f0b · e111f0b
2 parents bd3b535 + c594dde
commit e111f0b
Show file tree

Hide file tree

Showing 61 changed files with 1,551 additions and 492 deletions.
diff --git a/cmd/crane-agent/app/agent.go b/cmd/crane-agent/app/agent.go
@@ -99,7 +99,7 @@ func Run(ctx context.Context, opts *options.Options) error {
 	actionInformer := craneInformerFactory.Ensurance().V1alpha1().AvoidanceActions()
 	tspInformer := craneInformerFactory.Prediction().V1alpha1().TimeSeriesPredictions()
 
-	newAgent, err := agent.NewAgent(ctx, hostname, opts.RuntimeEndpoint, kubeClient, craneClient,
+	newAgent, err := agent.NewAgent(ctx, hostname, opts.RuntimeEndpoint, opts.CgroupDriver, kubeClient, craneClient,
 		podInformer, nodeInformer, nepInformer, actionInformer, tspInformer, opts.NodeResourceReserved, opts.Ifaces, healthCheck, opts.CollectInterval)
 
 	if err != nil {

diff --git a/cmd/crane-agent/app/options/option.go b/cmd/crane-agent/app/options/option.go
@@ -13,6 +13,8 @@ type Options struct {
 	HostnameOverride string
 	// RuntimeEndpoint is the endpoint of runtime
 	RuntimeEndpoint string
+	// driver that the kubelet uses to manipulate cgroups on the host (cgroupfs or systemd)
+	CgroupDriver string
 	// Is debug/pprof endpoint enabled
 	EnableProfiling bool
 	// BindAddr is the address the endpoint binds to.
@@ -45,6 +47,7 @@ func (o *Options) Validate() error {
 func (o *Options) AddFlags(flags *pflag.FlagSet) {
 	flags.StringVar(&o.HostnameOverride, "hostname-override", "", "Which is the name of k8s node be used to filtered.")
 	flags.StringVar(&o.RuntimeEndpoint, "runtime-endpoint", "", "The runtime endpoint docker: unix:///var/run/dockershim.sock, containerd: unix:///run/containerd/containerd.sock, cri-o: unix:///run/crio/crio.sock, k3s: unix:///run/k3s/containerd/containerd.sock.")
+	flags.StringVar(&o.CgroupDriver, "cgroup-driver", "cgroupfs", "Driver that the kubelet uses to manipulate cgroups on the host.  Possible values: 'cgroupfs', 'systemd'. Default to 'cgroupfs'")
 	flags.Bool("enable-profiling", false, "Is debug/pprof endpoint enabled, default: false")
 	flags.StringVar(&o.BindAddr, "bind-address", "0.0.0.0:8081", "The address the agent binds to for metrics, health-check and pprof, default: 0.0.0.0:8081.")
 	flags.DurationVar(&o.CollectInterval, "collect-interval", 10*time.Second, "Period for the state collector to collect metrics, default: 10s")

diff --git a/cmd/craned/app/manager.go b/cmd/craned/app/manager.go
@@ -106,20 +106,20 @@ func Run(ctx context.Context, opts *options.Options) error {
 		return err
 	}
 	// initialize data sources and predictor
-	realtimeDataSources, histroyDataSources, _ := initializationDataSource(mgr, opts)
-	predictorMgr := initializationPredictorManager(opts, realtimeDataSources, histroyDataSources)
-
-	initializationScheme()
-	initializationWebhooks(mgr, opts)
-	initializationControllers(ctx, mgr, opts, predictorMgr, histroyDataSources[providers.PrometheusDataSource])
-	// initialization custom collector metrics
-	initializationMetricCollector(mgr)
+	realtimeDataSources, histroyDataSources, _ := initDataSources(mgr, opts)
+	predictorMgr := initPredictorManager(opts, realtimeDataSources, histroyDataSources)
+
+	initScheme()
+	initWebhooks(mgr, opts)
+	initControllers(ctx, mgr, opts, predictorMgr, histroyDataSources[providers.PrometheusDataSource])
+	// initialize custom collector metrics
+	initMetricCollector(mgr)
 	runAll(ctx, mgr, predictorMgr, opts)
 
 	return nil
 }
 
-func initializationScheme() {
+func initScheme() {
 	utilruntime.Must(clientgoscheme.AddToScheme(scheme))
 	if utilfeature.DefaultFeatureGate.Enabled(features.CraneAutoscaling) {
 		utilruntime.Must(autoscalingapi.AddToScheme(scheme))
@@ -135,12 +135,12 @@ func initializationScheme() {
 	}
 }
 
-func initializationMetricCollector(mgr ctrl.Manager) {
+func initMetricCollector(mgr ctrl.Manager) {
 	// register as prometheus metric collector
 	metrics.CustomCollectorRegister(metrics.NewTspMetricCollector(mgr.GetClient()))
 }
 
-func initializationWebhooks(mgr ctrl.Manager, opts *options.Options) {
+func initWebhooks(mgr ctrl.Manager, opts *options.Options) {
 	if !opts.WebhookConfig.Enabled {
 		return
 	}
@@ -159,7 +159,7 @@ func initializationWebhooks(mgr ctrl.Manager, opts *options.Options) {
 	}
 }
 
-func initializationDataSource(mgr ctrl.Manager, opts *options.Options) (map[providers.DataSourceType]providers.RealTime, map[providers.DataSourceType]providers.History, map[providers.DataSourceType]providers.Interface) {
+func initDataSources(mgr ctrl.Manager, opts *options.Options) (map[providers.DataSourceType]providers.RealTime, map[providers.DataSourceType]providers.History, map[providers.DataSourceType]providers.Interface) {
 	realtimeDataSources := make(map[providers.DataSourceType]providers.RealTime)
 	historyDataSources := make(map[providers.DataSourceType]providers.History)
 	hybridDataSources := make(map[providers.DataSourceType]providers.Interface)
@@ -193,12 +193,12 @@ func initializationDataSource(mgr ctrl.Manager, opts *options.Options) (map[prov
 	return realtimeDataSources, historyDataSources, hybridDataSources
 }
 
-func initializationPredictorManager(opts *options.Options, realtimeDataSources map[providers.DataSourceType]providers.RealTime, historyDataSources map[providers.DataSourceType]providers.History) predictor.Manager {
+func initPredictorManager(opts *options.Options, realtimeDataSources map[providers.DataSourceType]providers.RealTime, historyDataSources map[providers.DataSourceType]providers.History) predictor.Manager {
 	return predictor.NewManager(realtimeDataSources, historyDataSources, predictor.DefaultPredictorsConfig(opts.AlgorithmModelConfig))
 }
 
-// initializationControllers setup controllers with manager
-func initializationControllers(ctx context.Context, mgr ctrl.Manager, opts *options.Options, predictorMgr predictor.Manager, historyDataSource providers.History) {
+// initControllers setup controllers with manager
+func initControllers(ctx context.Context, mgr ctrl.Manager, opts *options.Options, predictorMgr predictor.Manager, historyDataSource providers.History) {
 	discoveryClientSet, err := discovery.NewDiscoveryClientForConfig(mgr.GetConfig())
 	if err != nil {
 		klog.Exit(err, "Unable to create discover client")

diff --git a/deploy/craned/deployment.yaml b/deploy/craned/deployment.yaml
@@ -101,18 +101,18 @@ data:
       - targets: []
         properties:
           resource.cpu-request-percentile: "0.98"
-          ehpa.deployment-min-replicas: "1"
-          ehpa.statefulset-min-replicas: "1"
-          ehpa.workload-min-replicas: "1"
-          ehpa.pod-min-ready-seconds: "30"
-          ehpa.pod-available-ratio: "0.5"
-          ehpa.default-min-replicas: "2"
-          ehpa.max-replicas-factor: "3"
-          ehpa.min-cpu-usage-threshold: "10"
-          ehpa.fluctuation-threshold: "1.5"
-          ehpa.min-cpu-target-utilization: "30"
-          ehpa.max-cpu-target-utilization: "75"
-          ehpa.reference-hpa: "true"
+          replicas.workload-min-replicas: "3"
+          replicas.pod-min-ready-seconds: "30"
+          replicas.pod-available-ratio: "0.5"
+          replicas.default-min-replicas: "3"
+          replicas.max-replicas-factor: "3"
+          replicas.min-cpu-usage-threshold: "1"
+          replicas.fluctuation-threshold: "1.5"
+          replicas.min-cpu-target-utilization: "30"
+          replicas.max-cpu-target-utilization: "75"
+          replicas.cpu-target-utilization: "50"
+          replicas.cpu-percentile: "95"
+          replicas.reference-hpa: "true"
 
 ---
 apiVersion: v1

diff --git a/deploy/manifests/analysis.crane.io_analytics.yaml b/deploy/manifests/analysis.crane.io_analytics.yaml
@@ -57,6 +57,11 @@ spec:
                     format: int64
                     type: integer
                 type: object
+              config:
+                additionalProperties:
+                  type: string
+                description: Override Recommendation configs
+                type: object
               resourceSelectors:
                 description: ResourceSelector indicates how to select resources(e.g.
                   a set of Deployments) for an Analytics.
@@ -210,8 +215,8 @@ spec:
                 format: date-time
                 type: string
               recommendations:
-                description: Recommendations is a list of pointers to recommendations
-                  that are updated by this analytics.
+                description: Recommendations is a list of RecommendationMission that
+                  run parallel.
                 items:
                   properties:
                     apiVersion:
@@ -233,6 +238,14 @@ spec:
                     kind:
                       description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
                       type: string
+                    lastStartTime:
+                      description: LastStartTime is last time we start a recommendation
+                        mission.
+                      format: date-time
+                      type: string
+                    message:
+                      description: Message presents the running message for this mission
+                      type: string
                     name:
                       description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
                       type: string

diff --git a/deploy/manifests/autoscaling.crane.io_effectivehorizontalpodautoscalers.yaml b/deploy/manifests/autoscaling.crane.io_effectivehorizontalpodautoscalers.yaml
@@ -785,6 +785,8 @@ spec:
                             type: string
                           sampleInterval:
                             type: string
+                          targetUtilization:
+                            type: string
                         type: object
                     type: object
                   predictionWindowSeconds:

diff --git a/deploy/manifests/ensurance.crane.io_avoidanceactions.yaml b/deploy/manifests/ensurance.crane.io_avoidanceactions.yaml
@@ -13,6 +13,8 @@ spec:
     kind: AvoidanceAction
     listKind: AvoidanceActionList
     plural: avoidanceactions
+    shortNames:
+    - avoid
     singular: avoidanceaction
   scope: Cluster
   versions:

diff --git a/deploy/manifests/ensurance.crane.io_nodeqosensurancepolicies.yaml b/deploy/manifests/ensurance.crane.io_nodeqosensurancepolicies.yaml
@@ -13,6 +13,8 @@ spec:
     kind: NodeQOSEnsurancePolicy
     listKind: NodeQOSEnsurancePolicyList
     plural: nodeqosensurancepolicies
+    shortNames:
+    - nep
     singular: nodeqosensurancepolicy
   scope: Cluster
   versions:
@@ -95,7 +97,8 @@ spec:
                         type: integer
                     type: object
                   timeoutSeconds:
-                    description: TimeoutSeconds is the timeout for request
+                    description: TimeoutSeconds is the timeout for request. Defaults
+                      to 0, no timeout forever.
                     format: int32
                     type: integer
                 type: object

diff --git a/deploy/manifests/ensurance.crane.io_podqosensurancepolicies.yaml b/deploy/manifests/ensurance.crane.io_podqosensurancepolicies.yaml
@@ -13,6 +13,8 @@ spec:
     kind: PodQOSEnsurancePolicy
     listKind: PodQOSEnsurancePolicyList
     plural: podqosensurancepolicies
+    shortNames:
+    - qep
     singular: podqosensurancepolicy
   scope: Namespaced
   versions:
@@ -182,12 +184,9 @@ spec:
                     required:
                     - port
                     type: object
-                  initialDelaySeconds:
-                    description: Init delay time for handler Defaults to 5
-                    format: int32
-                    type: integer
                   timeoutSeconds:
-                    description: Timeout for request. Defaults to 0, instead not timeout
+                    description: TimeoutSeconds is the timeout for request. Defaults
+                      to 0, no timeout forever
                     format: int32
                     type: integer
                 type: object

diff --git a/deploy/manifests/prediction.crane.io_clusternodepredictions.yaml b/deploy/manifests/prediction.crane.io_clusternodepredictions.yaml
@@ -140,6 +140,8 @@ spec:
                                       type: string
                                     sampleInterval:
                                       type: string
+                                    targetUtilization:
+                                      type: string
                                   type: object
                               type: object
                             expressionQuery:

diff --git a/deploy/manifests/prediction.crane.io_timeseriespredictions.yaml b/deploy/manifests/prediction.crane.io_timeseriespredictions.yaml
@@ -145,6 +145,8 @@ spec:
                               type: string
                             sampleInterval:
                               type: string
+                            targetUtilization:
+                              type: string
                           type: object
                       type: object
                     expressionQuery:

diff --git a/docs/assets/util.css b/docs/assets/util.css
@@ -5,3 +5,7 @@
 .arithmatex {
   font-size: 0.85rem;
 }
+
+foreignObject > div {
+  font-size: 0.85rem;
+}
diff --git a/docs/images/crane-keda-ali-compare-cron.png b/docs/images/crane-keda-ali-compare-cron.png
diff --git a/docs/roadmaps/roadmap-1h-2022.md b/docs/roadmaps/roadmap-1h-2022.md
@@ -18,15 +18,17 @@ Please let us know if you have urgent needs which are not presented in the plan.
 - Node QoS Ensurance for Mem
 - Prediction with CPU, Memory, and Business Metrics
 - Scalability to support 1K TSP and 1K EPA			
-### 0.4.0 [April]
-- EVPA support
-- Dynamic Scheduler
+### 0.4.0 [released]
 - UI to support EPA.					
 ### 0.5.0 [May]
-- HPC open source
-- Node & Pod QoS Ensurance for DiskIO and Network
-- Prediction with DiskIO, Network				
+- Resource and Replicas Recommendation
+- Load-aware Scheduler
 ### 0.6.0 [June]
 - Scalability to support 3k TSP and 3k EPA		
-- Application Portrait	
-- SLO based Application QoS for CPU and Mem
+- Algorithm and QoS Documentation
+- EHPA grafana dashboard
+### 0.7.0 [July]
+- Support apiservice router for multiple metric adapters
+- Prediction with Business Metrics
+### 0.8.0 [August]
+- Algorithm estimate notebook
diff --git a/docs/tutorials/dynamic-scheduler-plugin.zh.md b/docs/tutorials/dynamic-scheduler-plugin.zh.md
@@ -0,0 +1,39 @@
+# Dynamic Scheduler：负载感知调度器插件
+
+## 介绍
+kubernetes 的原生调度器只能通过资源请求来调度 pod，这很容易造成一系列负载不均的问题：
+
+- 对于某些节点，实际负载与资源请求相差不大，这会导致很大概率出现稳定性问题。
+- 对于其他节点来说，实际负载远小于资源请求，这将导致资源的巨大浪费。
+
+为了解决这些问题，动态调度器根据实际的节点利用率构建了一个简单但高效的模型，并过滤掉那些负载高的节点来平衡集群。
+
+## 设计细节
+
+### 架构 
+![](./../images/dynamic-scheduler-plugin.png)
+
+
+如上图，动态调度器依赖于`Prometheus`和`Node-exporter`收集和汇总指标数据，它由两个组件组成：
+
+!!! note "Note"
+    `Node-annotator` 目前是 `Crane-scheduler-controller`的一个模块.
+
+- `Node-annotator`定期从 Prometheus 拉取数据，并以注释的形式在节点上用时间戳标记它们。
+- `Dynamic plugin`直接从节点的注释中读取负载数据，过滤并基于简单的算法对候选节点进行评分。
+
+###  调度策略
+动态调度器提供了一个默认值[调度策略](../deploy/manifests/policy.yaml)并支持用户自定义策略。默认策略依赖于以下指标：
+
+- `cpu_usage_avg_5m`
+- `cpu_usage_max_avg_1h`
+- `cpu_usage_max_avg_1d`
+- `mem_usage_avg_5m`
+- `mem_usage_max_avg_1h`
+- `mem_usage_max_avg_1d`
+
+在调度的`Filter`阶段，如果该节点的实际使用率大于上述任一指标的阈值，则该节点将被过滤。而在`Score`阶段，最终得分是这些指标值的加权和。
+
+### Hot Value
+
+在生产集群中，可能会频繁出现调度热点，因为创建 Pod 后节点的负载不能立即增加。因此，我们定义了一个额外的指标，名为`Hot Value`，表示节点最近几次的调度频率。并且节点的最终优先级是最终得分减去`Hot Value`。