From 7675273a4d2ccdddf399cb737516a2e31d14f2c7 Mon Sep 17 00:00:00 2001
From: Yao Xiao <XYSHOW@amazon.com>
Date: Thu, 10 Sep 2020 15:21:35 -0700
Subject: [PATCH] Remove redundant args and make as ENVS

---
 ...tatus.sh => check-load-balancer-status.sh} | 29 ++++++----
 images/aws-scripts/create-eks-cluster.sh      | 16 +++---
 images/aws-scripts/delete-eks-cluster.sh      |  4 +-
 images/aws-scripts/deploy-kubeflow.sh         |  2 +-
 images/run_workflows.sh                       |  9 ++--
 py/kubeflow/testing/run_e2e_workflow.py       | 54 +++----------------
 6 files changed, 42 insertions(+), 72 deletions(-)
 rename images/aws-scripts/{check-deployment-status.sh => check-load-balancer-status.sh} (61%)

diff --git a/images/aws-scripts/check-deployment-status.sh b/images/aws-scripts/check-load-balancer-status.sh
similarity index 61%
rename from images/aws-scripts/check-deployment-status.sh
rename to images/aws-scripts/check-load-balancer-status.sh
index 48e6fcf3b..0c4703401 100755
--- a/images/aws-scripts/check-deployment-status.sh
+++ b/images/aws-scripts/check-load-balancer-status.sh
@@ -20,15 +20,26 @@ set -o errexit
 set -o nounset
 set -o pipefail
 
-EKS_CLUSTER_NAME="${EKS_CLUSTER}"
-
-# Allow 5 minutes to wait for kubeflow deployment to be ready
-sleep 5m
+EKS_CLUSTER_NAME="${CLUSTER_NAME}"
 
 aws eks update-kubeconfig --name=$EKS_CLUSTER_NAME
 
-ingress_ip=$(kubectl get ingress istio-ingress -n istio-system  -o json | jq '.status.loadBalancer.ingress' | grep aws)
-
-if [ ${#ingress_ip} -eq 0 ] ;then echo "ERROR" >&2 & exit 64; fi
-
-echo "The Kubeflow Deployment succeeded"
+# Retry 10 times w/ 30 seconds interval
+retry_times=0
+retry_limit=10
+until [ "$retry_times" -ge "$retry_limit" ]
+do
+  ingress_ip=$(kubectl get ingress istio-ingress -n istio-system  -o json | jq '.status.loadBalancer.ingress' | grep aws)
+  if [ ${#ingress_ip} -eq 0 ] ;
+  then
+    sleep 30
+  else
+    echo "The Kubeflow Deployment succeeded"
+    exit 0
+  fi
+
+  retry_times=$((retry_times+1))
+done
+
+echo "Kubeflow Deployment Status: ERROR"
+exit 64
diff --git a/images/aws-scripts/create-eks-cluster.sh b/images/aws-scripts/create-eks-cluster.sh
index f18cb1ca0..4da341080 100755
--- a/images/aws-scripts/create-eks-cluster.sh
+++ b/images/aws-scripts/create-eks-cluster.sh
@@ -20,16 +20,16 @@ set -o errexit
 set -o nounset
 set -o pipefail
 
-EKS_CLUSTER_NAME="${EKS_CLUSTER}"
+EKS_CLUSTER_NAME="${CLUSTER_NAME}"
 
 # Create EKS Cluster
-# AI: Need to determine which NG template we need????
+# TODO (PatrickXYS): Need to determine which NG template we need
 eksctl create cluster \
 --name $EKS_CLUSTER_NAME \
---version 1.17 \
---region us-west-2 \
+--version ${EKS_CLUSTER_VERSION:-"1.17"} \
+--region ${AWS_REGION:-"us-west-2"} \
 --nodegroup-name linux-nodes \
---node-type m5.xlarge \
---nodes ${DESIRED_NODE} \
---nodes-min ${MIN_NODE} \
---nodes-max ${MAX_NODE}
+--node-type ${EKS_NODE_TYPE:-"m5.xlarge"} \
+--nodes ${DESIRED_NODE:-"2"} \
+--nodes-min ${MIN_NODE:-"1"} \
+--nodes-max ${MAX_NODE:-"4"}
diff --git a/images/aws-scripts/delete-eks-cluster.sh b/images/aws-scripts/delete-eks-cluster.sh
index 61b5b5c69..279b3de96 100755
--- a/images/aws-scripts/delete-eks-cluster.sh
+++ b/images/aws-scripts/delete-eks-cluster.sh
@@ -14,13 +14,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# This shell script is used to delete an EKS cluste from our argo workflow
+# This shell script is used to delete an EKS cluster from our argo workflow
 
 set -o errexit
 set -o nounset
 set -o pipefail
 
-EKS_CLUSTER_NAME="${EKS_CLUSTER}"
+EKS_CLUSTER_NAME="${CLUSTER_NAME}"
 
 # Delete EKS Cluster
 eksctl delete cluster $EKS_CLUSTER_NAME
\ No newline at end of file
diff --git a/images/aws-scripts/deploy-kubeflow.sh b/images/aws-scripts/deploy-kubeflow.sh
index cd1ce072b..267a0f5ef 100755
--- a/images/aws-scripts/deploy-kubeflow.sh
+++ b/images/aws-scripts/deploy-kubeflow.sh
@@ -20,7 +20,7 @@ set -o errexit
 set -o nounset
 set -o pipefail
 
-EKS_CLUSTER_NAME="${EKS_CLUSTER}"
+EKS_CLUSTER_NAME="${CLUSTER_NAME}"
 EKS_NAMESPACE_NAME="${EKS_NAMESPACE}"
 
 # Load kubeconfig
diff --git a/images/run_workflows.sh b/images/run_workflows.sh
index bd4381e25..37550376c 100644
--- a/images/run_workflows.sh
+++ b/images/run_workflows.sh
@@ -23,14 +23,11 @@ else
   then
     echo "Triggering AWS Argo Workflows"
     python -m kubeflow.testing.run_e2e_workflow \
-      --cluster=${AWS_EKS_CLUSTER} \
-      --bucket=${ARTIFACTS_S3_BUCKET} \
+      --cluster=${AWS_EKS_CLUSTER:-"kubeflow-prow-dev-test"} \
+      --bucket=${ARTIFACTS_S3_BUCKET:-"aws-kubernetes-jenkins"} \
       --config_file=/src/${REPO_OWNER}/${REPO_NAME}/prow_config.yaml \
       --repos_dir=/src \
       --cloud_provider=aws \
-      --aws_region=${AWS_DEFAULT_REGION} \
-      --desired_node=${DESIRED_NODE} \
-      --min_node=${MIN_NODE} \
-      --max_node=${MAX_NODE}
+      --aws_region=${AWS_DEFAULT_REGION:-"us-west-2"} \
   fi
 fi
\ No newline at end of file
diff --git a/py/kubeflow/testing/run_e2e_workflow.py b/py/kubeflow/testing/run_e2e_workflow.py
index b9a9558c1..4cdb20975 100644
--- a/py/kubeflow/testing/run_e2e_workflow.py
+++ b/py/kubeflow/testing/run_e2e_workflow.py
@@ -60,6 +60,7 @@
 import os
 import tempfile
 import six
+import sys
 from kubernetes import client as k8s_client
 from kubeflow.testing import argo_client
 from kubeflow.testing import ks_util
@@ -167,12 +168,6 @@ def run(args, file_handler): # pylint: disable=too-many-statements,too-many-bran
   base_branch_name = os.getenv("PULL_BASE_REF")
   pull_base_sha = os.getenv("PULL_BASE_SHA")
 
-  if args.cloud_provider == "aws":
-    args.bucket = "aws-kubernetes-jenkins" if not args.bucket else args.bucket
-    args.desired_node = "2" if not args.desired_node else args.desired_node
-    args.min_node = "1" if not args.min_node else args.min_node
-    args.max_node = "4" if not args.max_node else args.max_node
-
   # For presubmit/postsubmit jobs, find the list of files changed by the PR.
   diff_command = []
   if job_type == "presubmit":
@@ -361,12 +356,6 @@ def run(args, file_handler): # pylint: disable=too-many-statements,too-many-bran
                args.bucket], cwd=w.app_dir)
       util.run([ks_cmd, "param", "set", "--env=" + env, w.component, "cluster_name",
                 "eks-cluster-{}".format(salt)], cwd=w.app_dir)
-      util.run([ks_cmd, "param", "set", "--env=" + env, w.component, "desired_node",
-               args.desired_node], cwd=w.app_dir)
-      util.run([ks_cmd, "param", "set", "--env=" + env, w.component, "min_node",
-               args.min_node], cwd=w.app_dir)
-      util.run([ks_cmd, "param", "set", "--env=" + env, w.component, "max_node",
-               args.max_node], cwd=w.app_dir)
       if args.release:
         util.run([ks_cmd, "param", "set", "--env=" + env, w.component, "versionTag",
                   os.getenv("VERSION_TAG")], cwd=w.app_dir)
@@ -374,24 +363,19 @@ def run(args, file_handler): # pylint: disable=too-many-statements,too-many-bran
       # Set any extra params. We do this in alphabetical order to make it easier to verify in
       # the unittest.
       param_names = w.params.keys()
-      # In python3, dict_keys.sort() not work given
-      # https://docs.python.org/3/whatsnew/3.0.html#views-and-iterators-instead-of-lists
-      param_names = sorted(param_names)
+      if sys.version_info >= (3, 0):
+        # In python3, dict_keys.sort() not work given
+        # https://docs.python.org/3/whatsnew/3.0.html#views-and-iterators-instead-of-lists
+        param_names = sorted(param_names)
+      else:
+        param_names.sort()
       for k in param_names:
         util.run([ks_cmd, "param", "set", "--env=" + env, w.component, k,
                  "{0}".format(w.params[k])], cwd=w.app_dir)
 
       # For debugging print out the manifest
       util.run([ks_cmd, "show", env, "-c", w.component], cwd=w.app_dir)
-
-      if not args.cloud_provider:
-        util.run([ks_cmd, "apply", env, "-c", w.component], cwd=w.app_dir)
-      elif args.cloud_provider == "aws":
-        generated_workflow_name = "generated_workflow.yaml"
-        util.save_process_output([ks_cmd, "show", env, "-c", w.component],
-                                 cwd=w.app_dir,
-                                 output=w.app_dir + '/' + generated_workflow_name)
-        util.run(["kubectl", "apply", "-f", generated_workflow_name], cwd=w.app_dir)
+      util.run([ks_cmd, "apply", env, "-c", w.component], cwd=w.app_dir)
 
       ui_url = ("http://testing-argo.kubeflow.org/workflows/kubeflow-test-infra/{0}"
               "?tab=workflow".format(workflow_name))
@@ -657,28 +641,6 @@ def main(unparsed_args=None):  # pylint: disable=too-many-locals
     help="region containing the EKS cluster to use to run the workflow."
   )
 
-  parser.add_argument(
-    "--desired_node",
-    type=str,
-    default="2",
-    help="desired number of nodes lives in new EKS cluster"
-  )
-
-  parser.add_argument(
-    "--min_node",
-    type=str,
-    default="1",
-    help="minimum number of nodes lives in new EKS cluster"
-  )
-
-  parser.add_argument(
-    "--max_node",
-    type=str,
-    default="4",
-    help="maximum number of nodes lives in new EKS cluster"
-  )
-
-
   #############################################################################
   # Process the command line arguments.