Skip to content

Commit

Permalink
Check for in progress failover and add events on cluster realted to o…
Browse files Browse the repository at this point in the history
…rchestrator
  • Loading branch information
AMecea authored and calind committed Jun 4, 2019
1 parent 017acf9 commit 80e1ccd
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 17 deletions.
65 changes: 52 additions & 13 deletions cmd/orchestrator-helper/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,48 +17,87 @@ limitations under the License.
package main

import (
"log"
"os"

"github.com/presslabs/mysql-operator/pkg/orc-helper"
"github.com/spf13/cobra"
"k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
"log"
kclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"

"github.com/presslabs/mysql-operator/pkg/apis"
"github.com/presslabs/mysql-operator/pkg/orc-helper"
)

var (
client kclient.Client
)

func main() {
// check command line args
if len(os.Args) != 2 {
log.Fatal("see usage: <cluster.name>")

cmd := &cobra.Command{
Use: "orchestrator-helper",
Short: "Helper for orchestrator.",
Long: `This command is a helper for updating MySQL cluster resources. Record events.`,
Run: func(cmd *cobra.Command, args []string) {
log.Fatal("you run orchestrator helper, see help section")
},
}

// Get a config to talk to the apiserver
cfg, err := config.GetConfig()
if err != nil {
log.Fatal("unable to get configuration", err)
log.Fatal("unable to get configuration: ", err)
}

// Setup Scheme for all resources
s := scheme.Scheme
if err = apis.AddToScheme(s); err != nil {
log.Fatal("unable to register types to scheme", err)
log.Fatal("unable to register types to scheme: ", err)
}

// initialize k8s client
client, err = kclient.New(cfg, kclient.Options{Scheme: s})
if err != nil {
log.Fatal("unable to get the k8s client", err)
log.Fatal("unable to get the k8s client: ", err)
}

err = orchelper.UpdateClusterFailoverCond(client, os.Args[1], "orcFailoverInProgress", "Orc failover in progress", true)
if err != nil {
log.Fatal("error in updating cluster: ", err)
fipCmd := &cobra.Command{
Use: "failover-in-progress",
Short: "Set failover in progress condition for given cluster",
Run: func(cmd *cobra.Command, args []string) {
// check command line args
if len(args) != 2 {
log.Fatal("see usage: <cluster.name> <message>")
}

err = orchelper.UpdateClusterFailoverCond(client, args[0], "OrcFailoverInProgress", args[1], true)
if err != nil {
log.Fatal("error in updating cluster: ", err)
}
},
}
cmd.AddCommand(fipCmd)

evWarningType := false
evCmd := &cobra.Command{
Use: "event",
Short: "Set event on a given cluster",
Run: func(cmd *cobra.Command, args []string) {
// check command line args
if len(args) != 3 {
log.Fatal("see usage: <cluster.name> <event-name> <message> [-warning]")
}

err = orchelper.UpdateEventForCluster(client, s, args[0], args[1], args[2], evWarningType)
if err != nil {
log.Fatal("error in updating cluster: ", err)
}
},
}
evCmd.Flags().BoolVarP(&evWarningType, "warning", "w", false, "if it's a warning event in k8s")
cmd.AddCommand(evCmd)

if err := cmd.Execute(); err != nil {
log.Fatal("failed to execute command: ", err)
}
}
25 changes: 25 additions & 0 deletions hack/charts/mysql-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,28 @@ orchestrator:
FailMasterPromotionIfSQLThreadNotUpToDate: true
DetachLostReplicasAfterMasterFailover: true

# orchestrator hooks called in the following order
# for more information about template: https://github.com/github/orchestrator/blob/master/go/logic/topology_recovery.go#L256
ProcessesShellCommand: "sh"

OnFailureDetectionProcesses:
- "/usr/local/bin/orchestrator-helper event -w '{failureClusterAlias}' 'OrcFailureDetection' 'Failure: {failureType}, failed host: {failedHost}, lost replcas: {lostReplicas}' || true"
- "/usr/local/bin/orchestrator-helper failover-in-progress '{failureClusterAlias}' '{failureDescription}' || true"

# PreGracefulTakeoverProcesses:
PreFailoverProcesses:
# as backup in case the first request fails
- "/usr/local/bin/orchestrator-helper failover-in-progress '{failureClusterAlias}' '{failureDescription}' || true"
# PostFailoverProcesses:
# - "/usr/local/bin/orchestrator-helper event '{failureClusterAlias}' 'Orc{command}' 'Failure type: {failureType}, failed hosts: {failedHost}, slaves: {countSlaves}' || true"

PostUnsuccessfulFailoverProcesses:
- "/usr/local/bin/orchestrator-helper event -w '{failureClusterAlias}' 'OrcPostUnsuccessfulFailover' 'Failure: {failureType}, failed host: {failedHost} with {countSlaves} slaves' || true"

PostMasterFailoverProcesses:
- "/usr/local/bin/orchestrator-helper event '{failureClusterAlias}' 'OrcPostMasterFailover' 'Failure type: {failureType}, new master: {successorHost}, slaves: {slaveHosts}' || true"

PostIntermediateMasterFailoverProcesses:
- "/usr/local/bin/orchestrator-helper event '{failureClusterAlias}' 'OrcPostIntermediateMasterFailover' 'Failure type: {failureType}, failed hosts: {failedHost}, slaves: {countSlaves}' || true"

# PostGracefulTakeoverProcesses:
3 changes: 2 additions & 1 deletion hack/development/Dockerfile.orchestrator
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
###############################################################################
FROM golang:1.9-alpine as builder

RUN set -ex \
&& apk add --no-cache \
bash gcc git musl-dev openssl rsync
Expand All @@ -16,7 +18,6 @@ RUN set -ex \
&& ./script/build

###############################################################################

FROM alpine:3.7

# Create a group and user
Expand Down
9 changes: 9 additions & 0 deletions pkg/controller/node/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func Add(mgr manager.Manager) error {
// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager, sqlI sqlFactoryFunc) reconcile.Reconciler {
return &ReconcileMysqlNode{
// TODO(amecea): use client without cache here
Client: mgr.GetClient(),
scheme: mgr.GetScheme(),
recorder: mgr.GetRecorder(controllerName),
Expand Down Expand Up @@ -141,6 +142,7 @@ type ReconcileMysqlNode struct {
// and what is in the MysqlCluster.Spec
// Automatically generate RBAC rules to allow the Controller to read and write Deployments
// +kubebuilder:rbac:groups=core,resources=pods/status,verbs=get;list;watch;create;update;patch;delete
// nolint: gocyclo
func (r *ReconcileMysqlNode) Reconcile(request reconcile.Request) (reconcile.Result, error) {
ctx, cancel := context.WithTimeout(context.TODO(), mysqlReconciliationTimeout)
defer cancel()
Expand Down Expand Up @@ -173,6 +175,13 @@ func (r *ReconcileMysqlNode) Reconcile(request reconcile.Request) (reconcile.Res
return reconcile.Result{}, nil
}

// check if there is an in progress failover. K8s cluster resource may be inconsistent with what exists in k8s
fip := cluster.GetClusterCondition(api.ClusterConditionFailoverInProgress)
if fip != nil && fip.Status == corev1.ConditionTrue {
log.Info("cluster has failover in progress, given up to sync new node", "pod", pod.Spec.Hostname, "time", fip.LastTransitionTime)
return reconcile.Result{}, nil
}

// if it's a old version cluster then don't do anything
if shouldUpdateToVersion(cluster, 300) {
// if the cluster is upgraded then set on the cluster an annotations that skips the GTID configuration
Expand Down
7 changes: 7 additions & 0 deletions pkg/controller/orchestrator/orchestrator_reconcile.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,13 @@ func (ou *orcUpdater) updateStatusFromOrc(insts InstancesSet, master *orc.Instan
ou.cluster.UpdateStatusCondition(api.ClusterConditionReadOnly,
core.ConditionFalse, "ClusterReadOnlyFalse", "cluster is writable")
}

// check if the master is up to date and is not downtime to remove in progress failover condition
if master != nil && !master.IsDowntimed && master.IsUpToDate {
log.Info("cluster failover finished", "master", master.Key.Hostname)
ou.cluster.UpdateStatusCondition(api.ClusterConditionFailoverInProgress, core.ConditionFalse,
"ClusterMasterHealthy", "Master is healthy in orchestrator")
}
}

// updateNodesInOrc is the functions that tries to register
Expand Down
58 changes: 55 additions & 3 deletions pkg/orc-helper/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,18 @@ package orchelper
import (
"context"
"fmt"
"github.com/presslabs/controller-util/rand"
core "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/reference"
"sigs.k8s.io/controller-runtime/pkg/client"
"strings"
"time"

api "github.com/presslabs/mysql-operator/pkg/apis/mysql/v1alpha1"
"github.com/presslabs/mysql-operator/pkg/internal/mysqlcluster"
core "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
)

// parse the orchestrator cluster name as NamespacedName
Expand Down Expand Up @@ -67,3 +72,50 @@ func UpdateClusterFailoverCond(c client.Client, clusterName, reason, msg string,

return nil
}

// UpdateEventForCluster records an event on MySQL cluster resource
func UpdateEventForCluster(c client.Client, s *runtime.Scheme, clusterName, evReason, evMsg string, warning bool) error {
key, err := orcNameToKey(clusterName)
if err != nil {
return err
}

cluster := mysqlcluster.New(&api.MysqlCluster{})

// get cluster from k8s
if err = c.Get(context.TODO(), key, cluster.Unwrap()); err != nil {
return err
}

evType := core.EventTypeNormal
if warning {
evType = core.EventTypeWarning
}

ref, err := reference.GetReference(s, cluster.Unwrap())
if err != nil {
return err
}

randStr, err := rand.AlphaNumericString(5)
if err != nil {
return err
}
event := &core.Event{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%s.%d", cluster.Name, randStr, time.Now().Unix()),
Namespace: cluster.Namespace,
},
FirstTimestamp: metav1.Now(),
Type: evType,
Reason: evReason,
Message: evMsg,
Source: core.EventSource{Component: "orchestrator"},
InvolvedObject: *ref,
}
if err := c.Create(context.TODO(), event); err != nil {
return err
}

return nil
}

0 comments on commit 80e1ccd

Please sign in to comment.