diff --git a/docs/effective-hpa-with-prometheus-adapter.zh.md b/docs/effective-hpa-with-prometheus-adapter.zh.md deleted file mode 100644 index 2546e4091..000000000 --- a/docs/effective-hpa-with-prometheus-adapter.zh.md +++ /dev/null @@ -1,362 +0,0 @@ -# 基于 Effective HPA 实现自定义指标的智能弹性实践 - -Kubernetes HPA 支持了丰富的弹性扩展能力,Kubernetes 平台开发者部署服务实现自定义 Metric 的服务,Kubernetes 用户配置多项内置的资源指标或者自定义 Metric 指标实现自定义水平弹性。 -Crane Effective HPA 兼容社区的 Kubernetes HPA 的能力,提供了更智能的弹性策略,比如基于预测的弹性和基于 Cron 周期的弹性等。 -Prometheus 是当下流行的开源监控系统,通过 Prometheus 可以获取到用户的自定义指标配置。 - -本文将通过一个例子介绍了如何基于 Effective HPA 实现自定义指标的智能弹性。 - -## 部署环境要求 - -- Kubernetes 1.18+ -- Helm 3.1.0 -- Crane v0.6.0+ - -## 环境搭建 - -### 集群版本 - -版本:v1.22.9 -注:为了接入阿里云ECI以降低成本,需要指定节点进行副本扩缩,故采用该版本作为基础环境 - -```bash -# kubectl version - -Client Version: version.Info{Major:"1", Minor:"22", GitVersion:"v1.22.9", GitCommit:"6df4433e288edc9c40c2e344eb336f63fad45cd2", GitTreeState:"clean", BuildDate:"2022-04-13T19:57:43Z", GoVersion:"go1.16.15", Compiler:"gc", Platform:"linux/amd64"} -Server Version: version.Info{Major:"1", Minor:"22", GitVersion:"v1.22.9", GitCommit:"6df4433e288edc9c40c2e344eb336f63fad45cd2", GitTreeState:"clean", BuildDate:"2022-04-13T19:52:02Z", GoVersion:"go1.16.15", Compiler:"gc", Platform:"linux/amd64"} -``` - -### Prometheus - -镜像及版本:registry.cn-hangzhou.aliyuncs.com/istios/prometheus-adapter-amd64:v0.9.0 -注:EHPA需要兼容目前已有的外部指标,该部分通过prometheus-adapter实现 - -#### 指标采集 - -服务设置 - -```bash -# kubectl get deployment metric-external-service -o yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: metric-external-service - namespace: default -spec: - template: - metadata: - annotations: - prometheus.aispeech.com/metric_path: /metrics - prometheus.aispeech.com/scrape_port: "28002" - prometheus.aispeech.com/scrape_scheme: http - prometheus/should_be_scraped: "true" -``` - -接口验证 - -```bash -# kubectl get pods -owide -NAME READY STATUS RESTARTS AGE -metric-external-service-6c6b4b4648-n7bmc 1/1 Running 0 14d -# curl 10.244.0.59:28002/metrics -mock_traffic{} 13820 -``` - -#### 外部指标创建 - -注:指标来源与需要应用该指标实现扩缩容的服务跨namespace,设置为false - -```bash -# kubectl -n ${prometheus-adapter-namespace} get configmap prometheus-adapter-config -o yaml -apiVersion: v1 -data: - config.yaml: | - externalRules: - - seriesQuery:'{__name__="mock_traffic",pod_name!=""}' - metricsQuery: max(<<.Series>>{<<.LabelMatchers>>}) by (pod_name) - resources: - namespaced: false - -``` - -配置添加后重启prometheus-adapter -查询外部指标状态 - -external-apiservice - -```bash -# kubectl get apiservice v1beta1.external.metrics.k8s.io -NAME SERVICE AVAILABLE AGE -v1beta1.external.metrics.k8s.io monitoring/prometheus-adapter True 36d -# kubectl get --raw /apis/external.metrics.k8s.io/v1beta1|tee |python -m json.tool -{ - "apiVersion": "v1", - "groupVersion": "external.metrics.k8s.io/v1beta1", - "kind":"APIResourceList", - "resources": [ - { - "kind": "ExternalMetricValueList", - "name": "mock_traffic", - "namespaced": true, - "singularName":"", - "verbs": [ - "get" - ] - } - ] -} - -``` - -### Metric-Adapter - -镜像及版本:docker.io/gocrane/metric-adapter:v0.5.0-tke.1-7-g10ddeb6 -注:时序预测模型功能需要通过metric-adapter获取,因此需要对prometheus-adapter与metric-adapter进行指标集成 - -#### 配置remote-adapter -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: metric-adapter - namespace: crane-system -spec: - template: - spec: - containers: - - args: -#添加外部Adapter - - --remote-adapter=true - - --remote-adapter-service-namespace=monitoring - - --remote-adapter-service-name=prometheus-adapter - - --remote-adapter-service-port=443 -``` - -#### 修改apiservice - -指定外部指标源为metric-adapter,prometheus-adapter指标通过metric-adapter代理 - -```bash -# kubectl edit apiservice v1beta1.external.metrics.k8s.io -# kubectl get apiservice v1beta1.external.metrics.k8s.io -o yaml - -#外部指标正常 -# kubectl get --raw /apis/external.metrics.k8s.io/v1beta1|tee |python -m json.tool -{ - "apiVersion":"v1", - "groupVersion":"external.metrics.k8s.io/v1beta1", - "kind":"APIResourceList", - "resources": [ - { - "kind":"ExternalMetricValueList", - "name":"mock_traffic", - "namespaced":true, - "singularName":"", - "verbs": [ - "get" - ] - } -] -} -``` - -## 配置弹性 - -### EHPA - -#### 设置EHPA - -```yaml -apiVersion: autoscaling.crane.io/v1alpha1 -kind: EffectiveHorizontalPodAutoscaler -metadata: - name: metric-source-service - annotations: - metric-name.autoscaling.crane.io/mock_traffic: | -#添加注解,当前版本需要配置查询语句 -#metric-name.autoscaling.crane.io/${需要获取时序模型的指标名}: - mock_traffic{job="metrics-service1-lyg-test", pod_namespace="cloud", pod_project="metric-external-service"} -spec: - behavior: - scaleDown: - stabilizationWindowSeconds: 6 - policies: - - type: Percent - value: 100 - periodSeconds: 15 - scaleUp: - stabilizationWindowSeconds: 0 - policies: - - type: Percent - value: 100 - periodSeconds: 15 - - type: Pods - value: 2 - periodSeconds: 15 - selectPolicy: Max - scaleTargetRef: -#指定需要实现扩缩容的deployment - apiVersion: apps/v1 - kind: Deployment - name: metric-source-service - minReplicas: 2 - maxReplicas: 20 -#Auto为应用,Previoew为DryRun模式 - scaleStrategy: Auto - metrics: -#控制扩缩容的外部指标 - - type: External - external: - metric: - name: mock_traffic - target: - averageValue: 2000 - type: AverageValue -#设置时序预测模型 - prediction: - predictionWindowSeconds: 3600 - predictionAlgorithm: - algorithmType: dsp - dsp: - sampleInterval: "60s" - historyLength: "7d" -``` - -应用并查看EHPA状态 - -```bash -# kubectl apply -f ehpa.yaml -#查询ehpa状态 -# kubectl get ehpa -NAME STRATEGY MINPODS MAXPODS SPECIFICPODS REPLICAS AGE -metric-source-service Auto 2 20 10 1m -``` - -#### 时序预测模型 -注:craned定义TimeSeriesPrediction资源作为时序预测模型,命名定义ehpa-${ehpa-name} -增加时序预测指标,命名定义crane-${extrenal-metric-name} - -```bash -# kubectl get tsp -NAME TARGETREFNAME TARGETREFKIND PREDICTIONWINDOWSECONDS AGE -ehpa-metric-source-service metric-source-service Deployment 3600 1m -# kubectl get tsp ehpa-metric-source-service -o yaml -apiVersion: prediction.crane.io/v1alpha1 -kind: TimeSeriesPrediction -metadata: - name: ehpa-metric-source-service - namespace: default -spec: - predictionMetrics: - - algorithm: - algorithmType: dsp - dsp: - estimators: {} - historyLength: 7d - sampleInterval: 60s - expressionQuery: - expression: | - mock_traffic{job="metrics-service1-lyg-test", pod_namespace="cloud", pod_project="metric-external-service"} - resourceIdentifier: crane-mock_traffic - type: ExpressionQuery - targetRef: - apiVersion: apps/v1 - kind: Deployment - name: metric-source-service - namespace: default -status: - predictionMetrics: - - prediction: - - labels: - samples: - - timestamp: 1656402060 - value: "15767.77871" - ... - ... - - timestamp: 1656409200 - value: "22202.37755" - resourceIdentifier: crane-mock_traffic -``` - -#### HPA - -HPA通过相关指标实现扩缩 - -```bash -# kubectl get hpa -NAME REFERENCE TARGETS MINPODS MAXPODS REPLICAS AGE -metric-source-service Deployment/metric-source-service 1480200m/2k (avg), 2020300m/2k (avg) 2 20 10 1m -# kubectl describe hpa ehpa-metric-source-service -Name: ehpa-metric-source-service -Namespace: default -Labels: app.kubernetes.io/managed-by=effective-hpa-controller - app.kubernetes.io/name=ehpa-metric-source-service - app.kubernetes.io/part-of=metric-source-service - autoscaling.crane.io/effective-hpa-uid=b2cb76db-61c9-4d00-b333-af67d36bbd65 -Annotations: -CreationTimestamp: Tue, 28 Jun 2022 13:38:06 +0800 -Reference: Deployment/metric-source-service -Metrics: ( current / target ) - "mock_traffic" (target average value): 1470600m / 2k - "crane-mock_traffic" (target average value): 2032500m / 2k -Min replicas: 2 -Max replicas: 20 -Behavior: - Scale Up: - Stabilization Window: 0 seconds - Select Policy: Max - Policies: - - Type: Percent Value: 100 Period: 15 seconds - - Type: Pods Value: 2 Period: 15 seconds - Scale Down: - Stabilization Window: 6 seconds - Select Policy: Max - Policies: - - Type: Percent Value: 100 Period: 15 seconds -Deployment pods: 10 current / 10 desired -``` - -### craned指标采集 - -#### 副本指标 - -```bash -# kubectl -n crane-system get deployment craned -o yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: craned - namespace: crane-system -spec: - template: - metadata: - annotations: - prometheus.aispeech.com/metric_path: /metrics - prometheus.aispeech.com/scrape_port: "8080" - prometheus.aispeech.com/scrape_scheme: http - prometheus.aispeech.com/should_be_scraped: "true" - -# kubectl -n crane-system get pods craned-854bcdb88b-d5fgx -o wide -NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES -craned-854bcdb88b-d5fgx 2/2 Running 0 96m 10.244.0.177 d2-node-012 -#指标查询 -# curl -sL 10.244.0.177:8080/metrics | grep ehpa -crane_autoscaling_effective_hpa_replicas{name="metric-source-service",namespace="default"} 10 -crane_autoscaling_hpa_replicas{name="ehpa-metric-source-service",namespace="default"} 10 -crane_autoscaling_hpa_scale_count{name="ehpa-metric-source-service",namespace="default",type="hpa"} 3 -``` - -#### TSP指标 - -```bash -# curl -sL 10.244.0.177:8080/metrics | grep ^crane_prediction_time_series_prediction -crane_prediction_time_series_prediction_external_by_window{algorithm="dsp",resourceIdentifier="crane-mock_traffic",targetKind="Deployment",targetName="metric-source-service",targetNamespace="default",type="ExpressionQuery"} 23011 1657270905000 -crane_prediction_time_series_prediction_resource{algorithm="dsp",resourceIdentifier="crane_pod_cpu_usage",targetKind="Deployment",targetName="metric-source-service",namespace="default"} 10 -``` - -## 总结: - -基于历史指标预测功能实现原理: -● EHPA开启预测,建立相应指标的时序预测模型【TimeSeriesPrediction】 -● 创建HPA,在原有指标基础上,增加时序预测模型指标 -● HPA基于metric-adapter服务获取时序预测模型指标,实现服务提前扩容 diff --git a/docs/images/remote-adapter.png b/docs/images/remote-adapter.png new file mode 100644 index 000000000..aa70270f9 Binary files /dev/null and b/docs/images/remote-adapter.png differ diff --git a/docs/tutorials/effective-hpa-with-prometheus-adapter.zh.md b/docs/tutorials/effective-hpa-with-prometheus-adapter.zh.md new file mode 100644 index 000000000..3ed7a2b80 --- /dev/null +++ b/docs/tutorials/effective-hpa-with-prometheus-adapter.zh.md @@ -0,0 +1,402 @@ +# 基于 Effective HPA 实现自定义指标的智能弹性实践 + +Kubernetes HPA 支持了丰富的弹性扩展能力,Kubernetes 平台开发者部署服务实现自定义 Metric 的服务,Kubernetes 用户配置多项内置的资源指标或者自定义 Metric 指标实现自定义水平弹性。 +Effective HPA 兼容社区的 Kubernetes HPA 的能力,提供了更智能的弹性策略,比如基于预测的弹性和基于 Cron 周期的弹性等。 +Prometheus 是当下流行的开源监控系统,通过 Prometheus 可以获取到用户的自定义指标配置。 + +本文将通过一个例子介绍了如何基于 Effective HPA 实现自定义指标的智能弹性。部分配置来自于 [官方文档](https://github.com/kubernetes-sigs/prometheus-adapter/blob/master/docs/walkthrough.md) + +## 部署环境要求 + +- Kubernetes 1.18+ +- Helm 3.1.0 +- Crane v0.6.0+ +- Prometheus + +参考 [安裝文档](https://docs.gocrane.io/dev/installation/) 在集群中安装 Crane,Prometheus 可以使用安装文档中的也可以是已部署的 Prometheus。 + +## 环境搭建 + +### 安装 PrometheusAdapter + +Crane 组件 Metric-Adapter 和 PrometheusAdapter 都基于 [custom-metric-apiserver](https://github.com/kubernetes-sigs/custom-metrics-apiserver) 实现了 Custom Metric 和 External Metric 的 ApiService。在安装 Crane 时会将对应的 ApiService 安装为 Crane 的 Metric-Adapter,因此安装 PrometheusAdapter 前需要删除 ApiService 以确保 Helm 安装成功。 + +```bash +# 查看当前集群 ApiService +kubectl get apiservice +``` + +因为安装了 Crane, 结果如下: + +```bash +NAME SERVICE AVAILABLE AGE +v1beta1.batch Local True 35d +v1beta1.custom.metrics.k8s.io crane-system/metric-adapter True 18d +v1beta1.discovery.k8s.io Local True 35d +v1beta1.events.k8s.io Local True 35d +v1beta1.external.metrics.k8s.io crane-system/metric-adapter True 18d +v1beta1.flowcontrol.apiserver.k8s.io Local True 35d +v1beta1.metrics.k8s.io kube-system/metrics-service True 35d +``` + +删除 crane 安装的 ApiService + +```bash +kubectl delete apiservice v1beta1.custom.metrics.k8s.io +kubectl delete apiservice v1beta1.external.metrics.k8s.io +``` + +通过 Helm 安装 PrometheusAdapter + +```bash +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +helm install prometheus-adapter -n crane-system prometheus-community/prometheus-adapter +``` + +再将 ApiService 改回 Crane 的 Metric-Adapter + +```bash +kubectl apply -f https://raw.githubusercontent.com/gocrane/crane/main/deploy/metric-adapter/apiservice.yaml +``` + +### 配置 Metric-Adapter 开启 RemoteAdapter 功能 + +在安装 PrometheusAdapter 时没有将 ApiService 指向 PrometheusAdapter,因此为了让 PrometheusAdapter 也可以提供自定义 Metric,通过 Crane Metric Adapter 的 `RemoteAdapter` 功能将请求转发给 PrometheusAdapter。 + +修改 Metric-Adapter 的配置,将 PrometheusAdapter 的 Service 配置成 Crane Metric Adapter 的 RemoteAdapter + +```bash +# 查看当前集群 ApiService +kubectl edit deploy metric-adapter -n crane-system +``` + +根据 PrometheusAdapter 的配置做以下修改: + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: metric-adapter + namespace: crane-system +spec: + template: + spec: + containers: + - args: + #添加外部 Adapter 配置 + - --remote-adapter=true + - --remote-adapter-service-namespace=crane-system + - --remote-adapter-service-name=prometheus-adapter + - --remote-adapter-service-port=443 +``` + +#### RemoteAdapter 能力 + +![](../images/remote-adapter.png) + +Kubernetes 限制一个 ApiService 只能配置一个后端服务,因此,为了在一个集群内使用 Crane 提供的 Metric 和 PrometheusAdapter 提供的 Metric,Crane 支持了 RemoteAdapter 解决此问题 + +- Crane Metric-Adapter 支持配置一个 Kubernetes Service 作为一个远程 Adapter +- Crane Metric-Adapter 处理请求时会先检查是否是 Crane 提供的 Local Metric,如果不是,则转发给远程 Adapter + +## 运行例子 + +### 准备应用 + +将以下应用部署到集群中,应用暴露了 Metric 展示每秒收到的 http 请求数量。 + +sample-app.deploy.yaml + +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sample-app + labels: + app: sample-app +spec: + replicas: 1 + selector: + matchLabels: + app: sample-app + template: + metadata: + labels: + app: sample-app + spec: + containers: + - image: luxas/autoscale-demo:v0.1.2 + name: metrics-provider + ports: + - name: http + containerPort: 8080 +``` + +sample-app.service.yaml + +```yaml +apiVersion: v1 +kind: Service +metadata: + labels: + app: sample-app + name: sample-app +spec: + ports: + - name: http + port: 80 + protocol: TCP + targetPort: 8080 + selector: + app: sample-app + type: ClusterIP +``` + +```bash +kubectl create -f sample-app.deploy.yaml +kubectl create -f sample-app.service.yaml +``` + +当应用部署完成后,您可以通过命令检查 `http_requests_total` Metric: + +```bash +curl http://$(kubectl get service sample-app -o jsonpath='{ .spec.clusterIP }')/metrics +``` + +### 配置采集规则 + +配置 Prometheus 的 ScrapeConfig,收集应用的 Metric: http_requests_total + +```bash +kubectl edit configmap -n crane-system prometheus-server +``` + +添加以下配置 + +```yaml + - job_name: sample-app + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: keep + regex: default;sample-app-(.+) + source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_pod_name + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod +``` + +此时,您可以在 Prometheus 查询 psql:sum(rate(http_requests_total[5m])) by (pod) + +### 验证 PrometheusAdapter + +PrometheusAdapter 默认的 Rule 配置支持将 http_requests_total 转换成 Pods 类型的 Custom Metric,通过命令验证: + +```bash +kubectl get --raw /apis/custom.metrics.k8s.io/v1beta1 | jq . +``` + +结果应包括 `pods/http_requests`: + +```bash +{ + "name": "pods/http_requests", + "singularName": "", + "namespaced": true, + "kind": "MetricValueList", + "verbs": [ + "get" + ] +} +``` + +这表明现在可以通过 Pod Metric 配置 HPA。 + +### 配置弹性 + +现在我们可以创建 Effective HPA。此时 Effective HPA 可以通过 Pod Metric `http_requests` 进行弹性: + +#### 如何定义一个自定义指标开启预测功能 + +在 Effective HPA 的 Annotation 按以下规则添加配置: + +```yaml +annotations: + # metric-query.autoscaling.crane.io 是固定的前缀,后面是 Metric 名字,需跟 spec.metrics 中的 Metric.name 相同,支持 Pods 类型和 External 类型 + metric-query.autoscaling.crane.io/http_requests: "sum(rate(http_requests_total[5m])) by (pod)" +``` + +sample-app-hpa.yaml + +```yaml +apiVersion: autoscaling.crane.io/v1alpha1 +kind: EffectiveHorizontalPodAutoscaler +metadata: + name: php-apache + annotations: + # metric-query.autoscaling.crane.io 是固定的前缀,后面是 Metric 名字,需跟 spec.metrics 中的 Metric.name 相同,支持 Pods 类型和 External 类型 + metric-query.autoscaling.crane.io/http_requests: "sum(rate(http_requests_total[5m])) by (pod)" +spec: + # ScaleTargetRef is the reference to the workload that should be scaled. + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: sample-app + minReplicas: 1 # MinReplicas is the lower limit replicas to the scale target which the autoscaler can scale down to. + maxReplicas: 10 # MaxReplicas is the upper limit replicas to the scale target which the autoscaler can scale up to. + scaleStrategy: Auto # ScaleStrategy indicate the strategy to scaling target, value can be "Auto" and "Manual". + # Metrics contains the specifications for which to use to calculate the desired replica count. + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 50 + - type: Pods + pods: + metric: + name: http_requests + target: + type: AverageValue + averageValue: 500m + # Prediction defines configurations for predict resources. + # If unspecified, defaults don't enable prediction. + prediction: + predictionWindowSeconds: 3600 # PredictionWindowSeconds is the time window to predict metrics in the future. + predictionAlgorithm: + algorithmType: dsp + dsp: + sampleInterval: "60s" + historyLength: "7d" +``` + +```bash +kubectl create -f sample-app-hpa.yaml +``` + +查看 TimeSeriesPrediction 状态,如果应用运行时间较短,可能会无法预测: + +```yaml +apiVersion: prediction.crane.io/v1alpha1 +kind: TimeSeriesPrediction +metadata: + creationTimestamp: "2022-07-11T16:10:09Z" + generation: 1 + labels: + app.kubernetes.io/managed-by: effective-hpa-controller + app.kubernetes.io/name: ehpa-php-apache + app.kubernetes.io/part-of: php-apache + autoscaling.crane.io/effective-hpa-uid: 1322c5ac-a1c6-4c71-98d6-e85d07b22da0 + name: ehpa-php-apache + namespace: default +spec: + predictionMetrics: + - algorithm: + algorithmType: dsp + dsp: + estimators: {} + historyLength: 7d + sampleInterval: 60s + resourceIdentifier: crane_pod_cpu_usage + resourceQuery: cpu + type: ResourceQuery + - algorithm: + algorithmType: dsp + dsp: + estimators: {} + historyLength: 7d + sampleInterval: 60s + expressionQuery: + expression: sum(rate(http_requests_total[5m])) by (pod) + resourceIdentifier: crane_custom.pods_http_requests + type: ExpressionQuery + predictionWindowSeconds: 3600 + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: sample-app + namespace: default +status: + conditions: + - lastTransitionTime: "2022-07-12T06:54:42Z" + message: not all metric predicted + reason: PredictPartial + status: "False" + type: Ready + predictionMetrics: + - ready: false + resourceIdentifier: crane_pod_cpu_usage + - prediction: + - labels: + - name: pod + value: sample-app-7cfb596f98-8h5vv + samples: + - timestamp: 1657608900 + value: "0.01683" + - timestamp: 1657608960 + value: "0.01683" + ...... + ready: true + resourceIdentifier: crane_custom.pods_http_requests +``` + +查看 Effective HPA 创建的 HPA 对象,可以观测到已经创建出基于自定义指标预测的 Metric: `crane_custom.pods_http_requests` + +```yaml +apiVersion: autoscaling/v2beta2 +kind: HorizontalPodAutoscaler +metadata: + creationTimestamp: "2022-07-11T16:10:10Z" + labels: + app.kubernetes.io/managed-by: effective-hpa-controller + app.kubernetes.io/name: ehpa-php-apache + app.kubernetes.io/part-of: php-apache + autoscaling.crane.io/effective-hpa-uid: 1322c5ac-a1c6-4c71-98d6-e85d07b22da0 + name: ehpa-php-apache + namespace: default +spec: + maxReplicas: 10 + metrics: + - pods: + metric: + name: http_requests + target: + averageValue: 500m + type: AverageValue + type: Pods + - pods: + metric: + name: crane_custom.pods_http_requests + selector: + matchLabels: + autoscaling.crane.io/effective-hpa-uid: 1322c5ac-a1c6-4c71-98d6-e85d07b22da0 + target: + averageValue: 500m + type: AverageValue + type: Pods + - resource: + name: cpu + target: + averageUtilization: 50 + type: Utilization + type: Resource + minReplicas: 1 + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: sample-app +``` + +## 总结 + +由于生产环境的复杂性,基于多指标的弹性(CPU/Memory/自定义指标)往往是生产应用的常见选择,因此 Effective HPA 通过预测算法覆盖了多指标的弹性,达到了帮助更多业务在生产环境落地水平弹性的成效。 diff --git a/mkdocs.yml b/mkdocs.yml index d80e64ab7..314504085 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -56,6 +56,7 @@ plugins: Roadmap: 路线图 Overview: 概述 Load-aware Scheduling: 负载感知调度 + Custom Metric Prediction With Prometheus: 基于自定义指标的弹性预测 zh_TW: Getting Started: 入門 Introduction: 介紹 @@ -111,6 +112,8 @@ nav: - Crane-scheduler: - Overview: tutorials/scheduling-pods-based-on-actual-node-load.md - Load-aware Scheduling: tutorials/dynamic-scheduler-plugin.md + - Best Practices: + - Custom Metric Prediction With Prometheus: tutorials/effective-hpa-with-prometheus-adapter.zh.md - Proposals: - Advanced CpuSet Manager: proposals/20220228-advanced-cpuset-manger.md - Pod Sorting And Precise Execution For Crane Agent: proposals/Pod-Sorting-And-Precise-Execution-For-Crane-Agent.md diff --git a/pkg/metricprovider/custom_metric_provider.go b/pkg/metricprovider/custom_metric_provider.go index 896683a22..54c827e67 100644 --- a/pkg/metricprovider/custom_metric_provider.go +++ b/pkg/metricprovider/custom_metric_provider.go @@ -22,7 +22,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/custom-metrics-apiserver/pkg/provider" - autoscalingapi "github.com/gocrane/api/autoscaling/v1alpha1" predictionapi "github.com/gocrane/api/prediction/v1alpha1" "github.com/gocrane/crane/pkg/known" @@ -182,33 +181,24 @@ func ListAllLocalMetrics(client client.Client) []provider.CustomMetricInfo { Metric: known.MetricNamePodCpuUsage, }) - var ehpaList autoscalingapi.EffectiveHorizontalPodAutoscalerList - err := client.List(context.TODO(), &ehpaList) + var hpaList autoscalingv2.HorizontalPodAutoscalerList + err := client.List(context.TODO(), &hpaList) if err != nil { - klog.Errorf("Failed to list ehpa: %v", err) + klog.Errorf("Failed to list hpa: %v", err) return metricInfos } - for _, ehpa := range ehpaList.Items { - for _, metric := range ehpa.Spec.Metrics { + for _, hpa := range hpaList.Items { + if !strings.HasPrefix(hpa.Name, "ehpa-") { + // filter hpa that not created by ehpa + continue + } + for _, metric := range hpa.Spec.Metrics { if metric.Type == autoscalingv2.PodsMetricSourceType && metric.Pods != nil && metric.Pods.Metric.Selector != nil && metric.Pods.Metric.Selector.MatchLabels != nil { if _, exist := metric.Pods.Metric.Selector.MatchLabels[known.EffectiveHorizontalPodAutoscalerUidLabel]; exist { - metricName := utils.GetGeneralPredictionMetricName(autoscalingv2.PodsMetricSourceType, false, metric.Pods.Metric.Name) - metricInfos = append(metricInfos, provider.CustomMetricInfo{Metric: metricName}) - } - } - } - - for _, metric := range ehpa.Spec.Metrics { - if metric.Type == autoscalingv2.ObjectMetricSourceType && - metric.Object != nil && - metric.Object.Metric.Selector != nil && - metric.Object.Metric.Selector.MatchLabels != nil { - if _, exist := metric.Object.Metric.Selector.MatchLabels[known.EffectiveHorizontalPodAutoscalerUidLabel]; exist { - metricName := utils.GetGeneralPredictionMetricName(autoscalingv2.ObjectMetricSourceType, false, metric.Object.Metric.Name) - metricInfos = append(metricInfos, provider.CustomMetricInfo{Metric: metricName}) + metricInfos = append(metricInfos, provider.CustomMetricInfo{Metric: metric.Pods.Metric.Name, Namespaced: true, GroupResource: schema.GroupResource{Group: "", Resource: "pods"}}) } } } diff --git a/pkg/metricprovider/external_metric_provider.go b/pkg/metricprovider/external_metric_provider.go index 751e75070..62a9c7335 100644 --- a/pkg/metricprovider/external_metric_provider.go +++ b/pkg/metricprovider/external_metric_provider.go @@ -229,14 +229,26 @@ func ListAllLocalExternalMetrics(client client.Client) []provider.ExternalMetric metricName := utils.GetGeneralPredictionMetricName(autoscalingv2.PodsMetricSourceType, true, ehpa.Name) metricInfos = append(metricInfos, provider.ExternalMetricInfo{Metric: metricName}) } - for _, metric := range ehpa.Spec.Metrics { + } + + var hpaList autoscalingv2.HorizontalPodAutoscalerList + err = client.List(context.TODO(), &hpaList) + if err != nil { + klog.Errorf("Failed to list hpa: %v", err) + return metricInfos + } + for _, hpa := range hpaList.Items { + if !strings.HasPrefix(hpa.Name, "ehpa-") { + // filter hpa that not created by ehpa + continue + } + for _, metric := range hpa.Spec.Metrics { if metric.Type == autoscalingv2.ExternalMetricSourceType && metric.External != nil && metric.External.Metric.Selector != nil && metric.External.Metric.Selector.MatchLabels != nil { if _, exist := metric.External.Metric.Selector.MatchLabels[known.EffectiveHorizontalPodAutoscalerUidLabel]; exist { - metricName := utils.GetGeneralPredictionMetricName(autoscalingv2.ExternalMetricSourceType, false, ehpa.Name) - metricInfos = append(metricInfos, provider.ExternalMetricInfo{Metric: metricName}) + metricInfos = append(metricInfos, provider.ExternalMetricInfo{Metric: metric.External.Metric.Name}) } } }