Skip to content

Commit

Permalink
Fix hot region label setting for tikv auto-scaling (#1833)
Browse files Browse the repository at this point in the history
* mutate

* fix log

* add admission configuration

* remove useless log

* format by comment

* use tikv cli

* remove useless code

* remove cmlister

* fix lint

* fix tpl
  • Loading branch information
cofyc authored Mar 5, 2020
1 parent fffef5f commit 1308c28
Show file tree
Hide file tree
Showing 14 changed files with 226 additions and 33 deletions.
2 changes: 2 additions & 0 deletions charts/tidb-cluster/templates/scripts/_start_tikv.sh.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ ARGS="--pd={{ template "cluster.scheme" . }}://${CLUSTER_NAME}-pd:2379 \
--config=/etc/tikv/tikv.toml
"

{{ .Values.tikv.postArgScript }}

echo "starting tikv-server ..."
echo "/tikv-server ${ARGS}"
exec /tikv-server ${ARGS}
7 changes: 7 additions & 0 deletions charts/tidb-cluster/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,13 @@ tikv:
# After waiting for 5 minutes, TiDB Operator creates a new TiKV node if this TiKV node is still down.
# maxFailoverCount is used to configure the maximum number of TiKV nodes that TiDB Operator can create when failover occurs.
maxFailoverCount: 3
# postArgscript is the script executed after the normal tikv instance start args is built,
# it is recommended to modify the args constructor logic if you have any special needs.
postArgScript: |
if [ ! -z "${STORE_LABELS:-}" ]; then
LABELS=" --labels ${STORE_LABELS} "
ARGS="${ARGS}${LABELS}"
fi
tidb:
# Please refer to https://github.com/pingcap/tidb/blob/master/config/config.toml.example for the default
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ rules:
resources: ["pods"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: [""]
resources: ["secrets"]
resources: ["secrets","configmaps"]
verbs: ["get", "list"]
- apiGroups: [""]
resources: ["events"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,41 @@ webhooks:
apiVersions: ["v1alpha1"]
resources: ["tidbclusters"]
{{- end }}
---
{{- if .Values.admissionWebhook.mutation.pods }}
apiVersion: admissionregistration.k8s.io/v1beta1
kind: MutatingWebhookConfiguration
metadata:
name: mutation-tidb-pod-webhook-cfg
labels:
app.kubernetes.io/name: {{ template "chart.name" . }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/component: admission-webhook
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
webhooks:
- name: podadmission.tidb.pingcap.com
{{- if semverCompare ">=1.15-0" .Capabilities.KubeVersion.GitVersion }}
objectSelector:
matchLabels:
"app.kubernetes.io/managed-by": "tidb-operator"
"app.kubernetes.io/name": "tidb-cluster"
{{- end }}
failurePolicy: {{ .Values.admissionWebhook.failurePolicy.mutation | default "Ignore" }}
clientConfig:
service:
name: kubernetes
namespace: default
path: "/apis/admission.tidb.pingcap.com/v1alpha1/mutatingreviews"
{{- if .Values.admissionWebhook.cabundle }}
caBundle: {{ .Values.admissionWebhook.cabundle | b64enc }}
{{- else }}
caBundle: null
{{- end }}
rules:
- operations: ["CREATE"]
apiGroups: [""]
apiVersions: ["v1"]
resources: ["pods"]
{{- end }}
{{- end }}
3 changes: 3 additions & 0 deletions charts/tidb-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,9 @@ admissionWebhook:
pingcapResources: false
## mutation webhook would mutate the given request for the specific resource and operation
mutation:
## pods mutation hook would mutate the pod. Currently It is used for TiKV Auto-Scaling.
## refer to https://github.com/pingcap/tidb-operator/issues/1651
pods: true
## defaulting hook set default values for the the resources under pingcap.com group
pingcapResources: true
## failurePolicy are applied to ValidatingWebhookConfiguration which affect tidb-admission-webhook
Expand Down
7 changes: 0 additions & 7 deletions pkg/manager/member/scaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,6 @@ const (
skipReasonScalerAnnDeferDeletingIsEmpty = "scaler: pvc annotations defer deleting is empty"
)

// TODO: add document to explain the hot region label
var (
hostRegionLabel = map[string]string{
"specialUse": "hotRegion",
}
)

// Scaler implements the logic for scaling out or scaling in the cluster.
type Scaler interface {
// Scale scales the cluster. It does nothing if scaling is not needed.
Expand Down
5 changes: 5 additions & 0 deletions pkg/manager/member/template.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,11 @@ ARGS="--pd={{ .Scheme }}://${CLUSTER_NAME}-pd:2379 \
--config=/etc/tikv/tikv.toml
"
if [ ! -z "${STORE_LABELS:-}" ]; then
LABELS=" --labels ${STORE_LABELS} "
ARGS="${ARGS}${LABELS}"
fi
echo "starting tikv-server ..."
echo "/tikv-server ${ARGS}"
exec /tikv-server ${ARGS}
Expand Down
3 changes: 2 additions & 1 deletion pkg/manager/member/tikv_member_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ type tikvMemberManager struct {
}

// NewTiKVMemberManager returns a *tikvMemberManager
func NewTiKVMemberManager(pdControl pdapi.PDControlInterface,
func NewTiKVMemberManager(
pdControl pdapi.PDControlInterface,
setControl controller.StatefulSetControlInterface,
svcControl controller.ServiceControlInterface,
certControl controller.CertControlInterface,
Expand Down
23 changes: 0 additions & 23 deletions pkg/manager/member/tikv_scaler.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ func (tsd *tikvScaler) ScaleIn(tc *v1alpha1.TidbCluster, oldSet *apps.StatefulSe
}

// SyncAutoScalerAnn would reclaim the auto-scaling out slots if the target pod is no longer existed
// For the auto-scaling slots, we would add the special hot region label to the store with pdapi.
func (tsd *tikvScaler) SyncAutoScalerAnn(tc *v1alpha1.TidbCluster, actual *apps.StatefulSet) error {
currentScalingSlots := util.GetAutoScalingOutSlots(tc, v1alpha1.TiKVMemberType)
if currentScalingSlots.Len() < 1 {
Expand All @@ -217,28 +216,6 @@ func (tsd *tikvScaler) SyncAutoScalerAnn(tc *v1alpha1.TidbCluster, actual *apps.
tc.Annotations[label.AnnTiKVAutoScalingOutOrdinals] = v
return nil
}

// For the auto-scaling slots, we would add the special hot region label to the store with pdapi.
pdClient := tsd.pdControl.GetPDClient(pdapi.Namespace(tc.Namespace), tc.Name, *tc.Spec.EnableTLSCluster)
for k := range currentScalingSlots {
podName := util.GetPodName(tc, v1alpha1.TiKVMemberType, k)
for _, store := range tc.Status.TiKV.Stores {
if store.PodName == podName {
id, err := strconv.ParseUint(store.ID, 10, 64)
if err != nil {
return err
}
ok, err := pdClient.SetStoreLabels(id, hostRegionLabel)
if err != nil {
return err
}
if !ok {
return fmt.Errorf("tc[%s/%s]'s pod[%s] failed to add special hot region label", tc.Namespace, tc.Name, podName)
}
break
}
}
}
return nil
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/manager/member/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ func MarshalTOML(v interface{}) ([]byte, error) {
return data, nil
}

func UnmarshalTOML(b []byte, obj interface{}) error {
return toml.Unmarshal(b, obj)
}

func Sha256Sum(v interface{}) (string, error) {
data, err := json.Marshal(v)
if err != nil {
Expand Down
20 changes: 19 additions & 1 deletion pkg/webhook/admission_hooks.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,25 @@ func (a *AdmissionHook) MutatingResource() (plural schema.GroupVersionResource,
}

func (a *AdmissionHook) Admit(ar *admission.AdmissionRequest) *admission.AdmissionResponse {
return a.strategyAC.Mutate(ar)
name := ar.Name
namespace := ar.Namespace
kind := ar.Kind.Kind
klog.Infof("receive mutation request for %s[%s/%s]", kind, namespace, name)

resp := a.strategyAC.Mutate(ar)
if !resp.Allowed {
return resp
}
// see if other ACs are interested in this resource
switch ar.Kind.Kind {
case "Pod":
if "" != ar.Kind.Group {
return a.unknownAdmissionRequest(ar)
}
return a.podAC.MutatePods(ar)
default:
return resp
}
}

// any special initialization goes here
Expand Down
116 changes: 116 additions & 0 deletions pkg/webhook/pod/pod_mutater.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// Copyright 2020 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.

package pod

import (
"encoding/json"
"fmt"
"github.com/BurntSushi/toml"
"github.com/pingcap/tidb-operator/pkg/apis/pingcap/v1alpha1"
"github.com/pingcap/tidb-operator/pkg/controller"
"github.com/pingcap/tidb-operator/pkg/features"
"github.com/pingcap/tidb-operator/pkg/label"
operatorUtils "github.com/pingcap/tidb-operator/pkg/util"
"github.com/pingcap/tidb-operator/pkg/webhook/util"
admissionv1beta1 "k8s.io/api/admission/v1beta1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog"
)

func (pc *PodAdmissionControl) mutatePod(ar *admissionv1beta1.AdmissionRequest) *admissionv1beta1.AdmissionResponse {
pod := &corev1.Pod{}
if err := json.Unmarshal(ar.Object.Raw, pod); err != nil {
return util.ARFail(err)
}
original := pod.DeepCopy()
l := label.Label(pod.Labels)
if !l.IsManagedByTiDBOperator() {
return util.ARSuccess()
}
if !l.IsTiKV() {
return util.ARSuccess()
}
tcName, exist := pod.Labels[label.InstanceLabelKey]
if !exist {
return util.ARSuccess()
}
namespace := ar.Namespace

tc, err := pc.operatorCli.PingcapV1alpha1().TidbClusters(namespace).Get(tcName, metav1.GetOptions{})
if err != nil {
if errors.IsNotFound(err) {
return util.ARSuccess()
}
return util.ARFail(err)
}

if features.DefaultFeatureGate.Enabled(features.AutoScaling) {
err := pc.tikvHotRegionSchedule(tc, pod)
if err != nil {
return util.ARFail(err)
}
}

patch, err := util.CreateJsonPatch(original, pod)
if err != nil {
return util.ARFail(err)
}
return util.ARPatch(patch)
}

func (pc *PodAdmissionControl) tikvHotRegionSchedule(tc *v1alpha1.TidbCluster, pod *corev1.Pod) error {
podName := pod.Name
ordinal, err := operatorUtils.GetOrdinalFromPodName(podName)
if err != nil {
return err
}
sets := operatorUtils.GetAutoScalingOutSlots(tc, v1alpha1.TiKVMemberType)
if !sets.Has(ordinal) {
return nil
}

cmName := controller.TiKVMemberName(tc.Name)
cm, err := pc.kubeCli.CoreV1().ConfigMaps(tc.Namespace).Get(cmName, metav1.GetOptions{})
if err != nil {
klog.Infof("cm[%s/%s] found error,err %v", tc.Namespace, cmName, err)
return err
}
v, ok := cm.Data["config-file"]
if !ok {
return fmt.Errorf("tc[%s/%s]'s tikv config[config-file] is missing", tc.Namespace, tc.Name)
}
config := &v1alpha1.TiKVConfig{}
err = toml.Unmarshal([]byte(v), config)
if err != nil {
return err
}
if config.Server == nil {
config.Server = &v1alpha1.TiKVServerConfig{}
}
if config.Server.Labels == nil {
config.Server.Labels = map[string]string{}
}
// TODO: add document to explain the hot region label
config.Server.Labels["specialUse"] = "hotRegion"
for id, c := range pod.Spec.Containers {
if c.Name == "tikv" {
appendExtraLabelsENVForTiKV(config.Server.Labels, &c)
pod.Spec.Containers[id] = c
break
}
}
return nil
}
7 changes: 7 additions & 0 deletions pkg/webhook/pod/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ type admitPayload struct {
pdClient pdapi.PDClient
}

func (pc *PodAdmissionControl) MutatePods(ar *admission.AdmissionRequest) *admission.AdmissionResponse {
if ar.Operation != admission.Create && ar.Operation != admission.Update {
return util.ARSuccess()
}
return pc.mutatePod(ar)
}

func (pc *PodAdmissionControl) AdmitPods(ar *admission.AdmissionRequest) *admission.AdmissionResponse {

name := ar.Name
Expand Down
23 changes: 23 additions & 0 deletions pkg/webhook/pod/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,26 @@ func checkFormerPodRestartStatus(kubeCli kubernetes.Interface, memberType v1alph
}
return false, nil
}

func appendExtraLabelsENVForTiKV(labels map[string]string, container *core.Container) {
s := ""
for k, v := range labels {
s = fmt.Sprintf("%s,%s", s, fmt.Sprintf("%s=%s", k, v))
}
s = s[1:]
existed := false
for id, env := range container.Env {
if env.Name == "STORE_LABELS" {
env.Value = fmt.Sprintf("%s,%s", env.Value, s)
container.Env[id] = env
existed = true
break
}
}
if !existed {
container.Env = append(container.Env, core.EnvVar{
Name: "STORE_LABELS",
Value: s,
})
}
}

0 comments on commit 1308c28

Please sign in to comment.