From 7675273a4d2ccdddf399cb737516a2e31d14f2c7 Mon Sep 17 00:00:00 2001 From: Yao Xiao Date: Thu, 10 Sep 2020 15:21:35 -0700 Subject: [PATCH] Remove redundant args and make as ENVS --- ...tatus.sh => check-load-balancer-status.sh} | 29 ++++++---- images/aws-scripts/create-eks-cluster.sh | 16 +++--- images/aws-scripts/delete-eks-cluster.sh | 4 +- images/aws-scripts/deploy-kubeflow.sh | 2 +- images/run_workflows.sh | 9 ++-- py/kubeflow/testing/run_e2e_workflow.py | 54 +++---------------- 6 files changed, 42 insertions(+), 72 deletions(-) rename images/aws-scripts/{check-deployment-status.sh => check-load-balancer-status.sh} (61%) diff --git a/images/aws-scripts/check-deployment-status.sh b/images/aws-scripts/check-load-balancer-status.sh similarity index 61% rename from images/aws-scripts/check-deployment-status.sh rename to images/aws-scripts/check-load-balancer-status.sh index 48e6fcf3b..0c4703401 100755 --- a/images/aws-scripts/check-deployment-status.sh +++ b/images/aws-scripts/check-load-balancer-status.sh @@ -20,15 +20,26 @@ set -o errexit set -o nounset set -o pipefail -EKS_CLUSTER_NAME="${EKS_CLUSTER}" - -# Allow 5 minutes to wait for kubeflow deployment to be ready -sleep 5m +EKS_CLUSTER_NAME="${CLUSTER_NAME}" aws eks update-kubeconfig --name=$EKS_CLUSTER_NAME -ingress_ip=$(kubectl get ingress istio-ingress -n istio-system -o json | jq '.status.loadBalancer.ingress' | grep aws) - -if [ ${#ingress_ip} -eq 0 ] ;then echo "ERROR" >&2 & exit 64; fi - -echo "The Kubeflow Deployment succeeded" +# Retry 10 times w/ 30 seconds interval +retry_times=0 +retry_limit=10 +until [ "$retry_times" -ge "$retry_limit" ] +do + ingress_ip=$(kubectl get ingress istio-ingress -n istio-system -o json | jq '.status.loadBalancer.ingress' | grep aws) + if [ ${#ingress_ip} -eq 0 ] ; + then + sleep 30 + else + echo "The Kubeflow Deployment succeeded" + exit 0 + fi + + retry_times=$((retry_times+1)) +done + +echo "Kubeflow Deployment Status: ERROR" +exit 64 diff --git a/images/aws-scripts/create-eks-cluster.sh b/images/aws-scripts/create-eks-cluster.sh index f18cb1ca0..4da341080 100755 --- a/images/aws-scripts/create-eks-cluster.sh +++ b/images/aws-scripts/create-eks-cluster.sh @@ -20,16 +20,16 @@ set -o errexit set -o nounset set -o pipefail -EKS_CLUSTER_NAME="${EKS_CLUSTER}" +EKS_CLUSTER_NAME="${CLUSTER_NAME}" # Create EKS Cluster -# AI: Need to determine which NG template we need???? +# TODO (PatrickXYS): Need to determine which NG template we need eksctl create cluster \ --name $EKS_CLUSTER_NAME \ ---version 1.17 \ ---region us-west-2 \ +--version ${EKS_CLUSTER_VERSION:-"1.17"} \ +--region ${AWS_REGION:-"us-west-2"} \ --nodegroup-name linux-nodes \ ---node-type m5.xlarge \ ---nodes ${DESIRED_NODE} \ ---nodes-min ${MIN_NODE} \ ---nodes-max ${MAX_NODE} +--node-type ${EKS_NODE_TYPE:-"m5.xlarge"} \ +--nodes ${DESIRED_NODE:-"2"} \ +--nodes-min ${MIN_NODE:-"1"} \ +--nodes-max ${MAX_NODE:-"4"} diff --git a/images/aws-scripts/delete-eks-cluster.sh b/images/aws-scripts/delete-eks-cluster.sh index 61b5b5c69..279b3de96 100755 --- a/images/aws-scripts/delete-eks-cluster.sh +++ b/images/aws-scripts/delete-eks-cluster.sh @@ -14,13 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -# This shell script is used to delete an EKS cluste from our argo workflow +# This shell script is used to delete an EKS cluster from our argo workflow set -o errexit set -o nounset set -o pipefail -EKS_CLUSTER_NAME="${EKS_CLUSTER}" +EKS_CLUSTER_NAME="${CLUSTER_NAME}" # Delete EKS Cluster eksctl delete cluster $EKS_CLUSTER_NAME \ No newline at end of file diff --git a/images/aws-scripts/deploy-kubeflow.sh b/images/aws-scripts/deploy-kubeflow.sh index cd1ce072b..267a0f5ef 100755 --- a/images/aws-scripts/deploy-kubeflow.sh +++ b/images/aws-scripts/deploy-kubeflow.sh @@ -20,7 +20,7 @@ set -o errexit set -o nounset set -o pipefail -EKS_CLUSTER_NAME="${EKS_CLUSTER}" +EKS_CLUSTER_NAME="${CLUSTER_NAME}" EKS_NAMESPACE_NAME="${EKS_NAMESPACE}" # Load kubeconfig diff --git a/images/run_workflows.sh b/images/run_workflows.sh index bd4381e25..37550376c 100644 --- a/images/run_workflows.sh +++ b/images/run_workflows.sh @@ -23,14 +23,11 @@ else then echo "Triggering AWS Argo Workflows" python -m kubeflow.testing.run_e2e_workflow \ - --cluster=${AWS_EKS_CLUSTER} \ - --bucket=${ARTIFACTS_S3_BUCKET} \ + --cluster=${AWS_EKS_CLUSTER:-"kubeflow-prow-dev-test"} \ + --bucket=${ARTIFACTS_S3_BUCKET:-"aws-kubernetes-jenkins"} \ --config_file=/src/${REPO_OWNER}/${REPO_NAME}/prow_config.yaml \ --repos_dir=/src \ --cloud_provider=aws \ - --aws_region=${AWS_DEFAULT_REGION} \ - --desired_node=${DESIRED_NODE} \ - --min_node=${MIN_NODE} \ - --max_node=${MAX_NODE} + --aws_region=${AWS_DEFAULT_REGION:-"us-west-2"} \ fi fi \ No newline at end of file diff --git a/py/kubeflow/testing/run_e2e_workflow.py b/py/kubeflow/testing/run_e2e_workflow.py index b9a9558c1..4cdb20975 100644 --- a/py/kubeflow/testing/run_e2e_workflow.py +++ b/py/kubeflow/testing/run_e2e_workflow.py @@ -60,6 +60,7 @@ import os import tempfile import six +import sys from kubernetes import client as k8s_client from kubeflow.testing import argo_client from kubeflow.testing import ks_util @@ -167,12 +168,6 @@ def run(args, file_handler): # pylint: disable=too-many-statements,too-many-bran base_branch_name = os.getenv("PULL_BASE_REF") pull_base_sha = os.getenv("PULL_BASE_SHA") - if args.cloud_provider == "aws": - args.bucket = "aws-kubernetes-jenkins" if not args.bucket else args.bucket - args.desired_node = "2" if not args.desired_node else args.desired_node - args.min_node = "1" if not args.min_node else args.min_node - args.max_node = "4" if not args.max_node else args.max_node - # For presubmit/postsubmit jobs, find the list of files changed by the PR. diff_command = [] if job_type == "presubmit": @@ -361,12 +356,6 @@ def run(args, file_handler): # pylint: disable=too-many-statements,too-many-bran args.bucket], cwd=w.app_dir) util.run([ks_cmd, "param", "set", "--env=" + env, w.component, "cluster_name", "eks-cluster-{}".format(salt)], cwd=w.app_dir) - util.run([ks_cmd, "param", "set", "--env=" + env, w.component, "desired_node", - args.desired_node], cwd=w.app_dir) - util.run([ks_cmd, "param", "set", "--env=" + env, w.component, "min_node", - args.min_node], cwd=w.app_dir) - util.run([ks_cmd, "param", "set", "--env=" + env, w.component, "max_node", - args.max_node], cwd=w.app_dir) if args.release: util.run([ks_cmd, "param", "set", "--env=" + env, w.component, "versionTag", os.getenv("VERSION_TAG")], cwd=w.app_dir) @@ -374,24 +363,19 @@ def run(args, file_handler): # pylint: disable=too-many-statements,too-many-bran # Set any extra params. We do this in alphabetical order to make it easier to verify in # the unittest. param_names = w.params.keys() - # In python3, dict_keys.sort() not work given - # https://docs.python.org/3/whatsnew/3.0.html#views-and-iterators-instead-of-lists - param_names = sorted(param_names) + if sys.version_info >= (3, 0): + # In python3, dict_keys.sort() not work given + # https://docs.python.org/3/whatsnew/3.0.html#views-and-iterators-instead-of-lists + param_names = sorted(param_names) + else: + param_names.sort() for k in param_names: util.run([ks_cmd, "param", "set", "--env=" + env, w.component, k, "{0}".format(w.params[k])], cwd=w.app_dir) # For debugging print out the manifest util.run([ks_cmd, "show", env, "-c", w.component], cwd=w.app_dir) - - if not args.cloud_provider: - util.run([ks_cmd, "apply", env, "-c", w.component], cwd=w.app_dir) - elif args.cloud_provider == "aws": - generated_workflow_name = "generated_workflow.yaml" - util.save_process_output([ks_cmd, "show", env, "-c", w.component], - cwd=w.app_dir, - output=w.app_dir + '/' + generated_workflow_name) - util.run(["kubectl", "apply", "-f", generated_workflow_name], cwd=w.app_dir) + util.run([ks_cmd, "apply", env, "-c", w.component], cwd=w.app_dir) ui_url = ("http://testing-argo.kubeflow.org/workflows/kubeflow-test-infra/{0}" "?tab=workflow".format(workflow_name)) @@ -657,28 +641,6 @@ def main(unparsed_args=None): # pylint: disable=too-many-locals help="region containing the EKS cluster to use to run the workflow." ) - parser.add_argument( - "--desired_node", - type=str, - default="2", - help="desired number of nodes lives in new EKS cluster" - ) - - parser.add_argument( - "--min_node", - type=str, - default="1", - help="minimum number of nodes lives in new EKS cluster" - ) - - parser.add_argument( - "--max_node", - type=str, - default="4", - help="maximum number of nodes lives in new EKS cluster" - ) - - ############################################################################# # Process the command line arguments.