Skip to content

Commit

Permalink
MetadataStore: Upgrade tool
Browse files Browse the repository at this point in the history
  • Loading branch information
dushyanthsc committed May 4, 2020
1 parent 9ade740 commit 1ac8d9e
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 0 deletions.
35 changes: 35 additions & 0 deletions tools/metadatastore-upgrade/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# MetadataStore Upgrade Tool

Upgrade tool provides a mechanism for KFP users to upgrade MetadataStore component in their KFP cluster. A MetadataStore upgrade is composed of two aspects:
* Upgrading the image used in `metadata-grpc-deployment` [K8s Deployment](https://github.com/kubeflow/pipelines/blob/master/manifests/kustomize/base/metadata/metadata-grpc-deployment.yaml). The user is expected to provide an image-tag for this upgrade.
* Upgrading the MYSQL database schema to adhere to MLMD library used in the `metadata-grpc-deployment` image. The tool automatically handles schema upgrade by setting `--enable_database_upgrade=true` [flag](https://github.com/kubeflow/pipelines/blob/master/manifests/kustomize/base/metadata/metadata-grpc-deployment.yaml) while invoking the metadata gRPC server.

**Note: This upgrade tool should be the only client interacting with the MetadataStore during upgrade**

The contract for this tool was published and shared with Kubeflow Pipelines community in this [doc](https://docs.google.com/document/d/1gF-mx3lMyU9h7MAAOXP-KGV-BF-UabDsAlFrWNNhKBo/edit?usp=sharing)

To run the tool execute the following command from this folder:

```
go run main.go --new_image_tag=<image-tag> --kubeconfig=<kubeconfig-path> --namespace=<namespace-name>
```

Arguments:
* `--new_image_tag`(Required) - The image tag for the gRPC server version to upgrade to. The list of available images can be found [here](gcr.io/tfx-oss-public/ml_metadata_store_server)
* `--kubeconfig`(Optional) - Absolute path to a kubeconfig file. If this argument is not specified `.kubecofing` in user's home directory is used.
* `--namespace`(Optional) - Namespace where `metadata-deployment` is deployed in the KFP cluster. Defaults to `kubeflow`.

**Note:**
1. Upgrade is supported from version [ml-metadata v0.21.0](https://github.com/google/ml-metadata/releases/tag/v0.21.0) onwards.
2. The ML Metadata Store Server image version used in the `metadata-grpc-deployment` deployment of a KFP cluster can be found in the `Active revisions` section of the deployment details page.

## Execution Flow

The tool using the K8's [client-go](https://github.com/kubernetes/client-go) library performs upgrade in following steps:

1. Queries the KFP cluster to get the `metadata-grpc-deployment` K8 Deployment resource.
2. Updates the deployment's Spec Image value using the image tag provided as argument and adds `--enable_database_upgrade=true` to the deployment's container arguments.
3. Uses [client-go's](https://github.com/kubernetes/client-go) `RetryOnConflict` API to update the Deployment.
4. If the update is successful, `metadata-grpc-deployment` deployment is updated again to remove the `--enable_database_upgrade=true` argument. If this update fails, the tool logs the failure message to `stdout` with error details.
5. If update in Step-3 fails, the tool errors out by logging the failure message to `stdout` with error details. In this state, the user is expected to fix the MetadataStore deployment in the cluster.

137 changes: 137 additions & 0 deletions tools/metadatastore-upgrade/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package main

import (
"flag"
"fmt"
"log"
"os"
"path/filepath"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
v1 "k8s.io/client-go/kubernetes/typed/apps/v1"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/client-go/util/homedir"
"k8s.io/client-go/util/retry"

_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
)

var (
kubeconfigPath = flag.String("kubeconfig", "", "Absolute path to kubeconfig file.")
imageTag = flag.String("new_image_tag", "", "Image tag of a released ML Metadata store server image in repository gcr.io/tfx-oss-public/ml_metadata_store_server.")
deploymentNamespace = flag.String("namespace", "kubeflow", "Namespace of the Metadata deployment in the cluster.")
)

const (
mlMetadataDeployment = "metadata-grpc-deployment"
mlMetadataImage = "gcr.io/tfx-oss-public/ml_metadata_store_server"
upgradeFlag = "--enable_database_upgrade=true"
maxWaitTime = 30
)

func updateDeployment(deploymentsClient v1.DeploymentInterface, image string, containerArgs []string) error {
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
var err error
// Retrieve the latest version of Deployment before attempting update
// RetryOnConflict uses exponential backoff to avoid exhausting the apiserver
result, err := deploymentsClient.Get(mlMetadataDeployment, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("failed to get latest version of the deployment: %v", err)
}

result.Spec.Template.Spec.Containers[0].Image = image
result.Spec.Template.Spec.Containers[0].Args = containerArgs
if _, err := deploymentsClient.Update(result); err != nil {
return err
}

result, err = deploymentsClient.Get(mlMetadataDeployment, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("failed to get latest version of deployment after update: %v", err)
}

waitTime := 0
updateSuccessful := false
for waitTime < maxWaitTime && !updateSuccessful {
if *result.Spec.Replicas != result.Status.ReadyReplicas {
time.Sleep(time.Second)
waitTime++
} else {
updateSuccessful = true
}
}

if !updateSuccessful {
return fmt.Errorf("updated deployment failed to reach running state")
}

return nil
})
}

func main() {
var err error
flag.Parse()
log.SetOutput(os.Stdout)

if *imageTag == "" {
log.Printf("Missing image_tag flag")
flag.Usage()
os.Exit(1)
}

if *kubeconfigPath == "" {
if home := homedir.HomeDir(); home != "" {
*kubeconfigPath = filepath.Join(home, ".kube", "config")
}
}

config, err := clientcmd.BuildConfigFromFlags("", *kubeconfigPath)
if err != nil {
log.Fatalf("Error reading kubeconfig file: %v", err)
}

clientset, err := kubernetes.NewForConfig(config)
if err != nil {
log.Fatalf("Error setting up client auth: %v", err)
}

deploymentsClient := clientset.AppsV1().Deployments(*deploymentNamespace)

originalDeployment, err := deploymentsClient.Get(mlMetadataDeployment, metav1.GetOptions{})
if err != nil {
log.Fatalf("Failed to get old Deployment: %v", err)
}

originalImage := originalDeployment.Spec.Template.Spec.Containers[0].Image
originalContainerArgs := originalDeployment.Spec.Template.Spec.Containers[0].Args

newImage := mlMetadataImage + ":" + *imageTag
log.Printf("Upgrading MetadataStore in Namespace: %s from Image: %s to Image: %s", *deploymentNamespace, originalImage, newImage)
if err := updateDeployment(deploymentsClient, newImage, append(originalContainerArgs, upgradeFlag)); err == nil {
log.Printf("MetadataStore successfully upgraded to Image: %s", newImage)
log.Printf("Cleaning up Upgrade")
// In a highly unlikely scenario upgrade cleanup can fail.
if err := updateDeployment(deploymentsClient, newImage, originalContainerArgs); err != nil {
log.Printf("Upgrade cleanup failed: %v. \nLikely MetadataStore is in a functioning state but needs verifcation.", err)
} else {
log.Fatalf("Upgrade attempt failed. MetadataStore deployment in the cluster needs attention.")
}
}
}

0 comments on commit 1ac8d9e

Please sign in to comment.