Skip to content

Commit

Permalink
AWS Init Support for Kuberflow/Testing (#755)
Browse files Browse the repository at this point in the history
* Add init support for AWS Kubeflow Tests

* Clean up GCP resources in AWS Dockerfile

* Add AWS-CREDENTIALS ENV Support

* Clean Up Resources

* Make Params / Envs Configurable

* Code Clean-up

* Remove redundant args and make as ENVS

* Address PlaceHold Functions for AWS

* Put AWS Python Files in seperate folder

* Add OWNERS files for AWS assets

* Remove added OWNERS

* Clean up

* Clean up

* Clean up aws_util functions
  • Loading branch information
Yao Xiao authored Oct 10, 2020
1 parent 6bd7f25 commit db508d7
Show file tree
Hide file tree
Showing 13 changed files with 992 additions and 44 deletions.
93 changes: 93 additions & 0 deletions images/Dockerfile.py3.aws
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Build the docker image used to run the scripts
# to continuously update our docker files.
#
# The context for this docker file should be the root of the kubeflow/testing repository.
FROM ubuntu:18.04

RUN apt-get update -y && \
apt-get install -y curl git python3.8 python3-pip wget jq && \
ln -sf /usr/bin/python3.8 /usr/bin/python

RUN python3.8 -m pip install \
filelock \
fire \
google-api-python-client \
google-cloud \
google-cloud-storage \
junit-xml \
# See https://github.com/kubeflow/gcp-blueprints/issues/52#issuecomment-645446088
# our libs seem to break with 11.0.0
kubernetes==9.0.0 \
lint \
oauth2client \
pytest==5.4 \
pytest-timeout==1.4 \
python-dateutil \
retrying \
watchdog \
awscli \
boto3

# Install go
RUN cd /tmp && \
wget -O /tmp/go.tar.gz https://dl.google.com/go/go1.14.2.linux-amd64.tar.gz && \
tar -C /usr/local -xzf go.tar.gz

# Install the hub CLI for git
RUN cd /tmp && \
curl -LO https://github.com/github/hub/releases/download/v2.13.0/hub-linux-amd64-2.13.0.tgz && \
tar -xvf hub-linux-amd64-2.13.0.tgz && \
mv hub-linux-amd64-2.13.0 /usr/local && \
ln -sf /usr/local/hub-linux-amd64-2.13.0/bin/hub /usr/local/bin/hub

RUN export KUSTOMIZE_VERSION=3.2.0 && \
cd /tmp && \
curl -LO https://github.com/kubernetes-sigs/kustomize/releases/download/v${KUSTOMIZE_VERSION}/kustomize_${KUSTOMIZE_VERSION}_linux_amd64 && \
mv kustomize_${KUSTOMIZE_VERSION}_linux_amd64 /usr/local/bin/kustomize && \
chmod a+x /usr/local/bin/kustomize

# Install kubectl
RUN curl -LO https://amazon-eks.s3.us-west-2.amazonaws.com/1.17.9/2020-08-04/bin/linux/amd64/kubectl && \
mv kubectl /usr/local/bin && \
chmod a+x /usr/local/bin/kubectl

# Install aws-iam-authenticator
RUN curl -o aws-iam-authenticator https://amazon-eks.s3.us-west-2.amazonaws.com/1.17.9/2020-08-04/bin/linux/amd64/aws-iam-authenticator && \
mv aws-iam-authenticator /usr/local/bin && \
chmod a+x /usr/local/bin/aws-iam-authenticator

# Install eksctl
RUN curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp && \
mv /tmp/eksctl /usr/local/bin && \
chmod a+x /usr/local/bin/eksctl

# Install ks
RUN curl --silent --location https://github.com/ksonnet/ksonnet/releases/download/v0.13.1/ks_0.13.1_linux_amd64.tar.gz | tar xz -C /tmp && \
mv /tmp/ks_0.13.1_linux_amd64/ks /usr/local/bin/ks-13 && \
chmod a+x /usr/local/bin/ks-13

# Create go symlinks
RUN ln -sf /usr/local/go/bin/go /usr/local/bin && \
ln -sf /usr/local/go/bin/gofmt /usr/local/bin && \
ln -sf /usr/local/go/bin/godoc /usr/local/bin

RUN go get -u github.com/jstemmer/go-junit-report

COPY ./images/checkout.sh /usr/local/bin
COPY ./images/checkout_repos.sh /usr/local/bin
RUN chmod a+x /usr/local/bin/checkout*

COPY ./images/run_workflows.sh /usr/local/bin
RUN chmod a+x /usr/local/bin/run_workflows.sh

# AWS BASH SCRIPTS
COPY ./images/aws-scripts/*.sh /usr/local/bin/
RUN chmod a+x /usr/local/bin/*.sh

ENV PYTHONPATH /src/kubeflow/testing/py

ENV CLOUD_PROVIDER aws

ENV PATH=/root/go/bin:${PATH}

ENTRYPOINT ["/usr/local/bin/run_workflows.sh"]
3 changes: 3 additions & 0 deletions images/aws-scripts/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
approvers:
- Jeffwan
- PatrickXYS
49 changes: 49 additions & 0 deletions images/aws-scripts/check-load-balancer-status.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash

# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This shell script is used to check if all deployments are RUNNING

set -o errexit
set -o nounset
set -o pipefail

EKS_CLUSTER_NAME="${CLUSTER_NAME}"

aws eks update-kubeconfig --name=$EKS_CLUSTER_NAME

echo "Start Fetching Ingress IP Address"

# Retry 10 times w/ 30 seconds interval
retry_times=0
retry_limit=10
while [ "$retry_times" -lt "$retry_limit" ]
do
echo "See if we can fetch ingress"
ingress_ip=$(kubectl get ingress istio-ingress -n istio-system -o json | jq -r '.status.loadBalancer.ingress[0].hostname')
if [ ${#ingress_ip} -eq 0 ] ;
then
sleep 30
echo "Retrying Fetching Ingress IP Address"
else
echo "The Kubeflow Deployment succeeded"
exit 0
fi

retry_times=$((retry_times+1))
done

echo "Kubeflow Deployment Status: ERROR"
exit 64
36 changes: 36 additions & 0 deletions images/aws-scripts/create-eks-cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash

# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This shell script is used to build an EKS cluster from our argo workflow

set -o errexit
set -o nounset
set -o pipefail

EKS_CLUSTER_NAME="${CLUSTER_NAME}"

# Create EKS Cluster
# TODO (PatrickXYS): Need to determine which NG template we need
eksctl create cluster \
--name $EKS_CLUSTER_NAME \
--version ${EKS_CLUSTER_VERSION:-"1.17"} \
--region ${AWS_REGION:-"us-west-2"} \
--nodegroup-name linux-nodes \
--node-type ${EKS_NODE_TYPE:-"m5.xlarge"} \
--nodes ${DESIRED_NODE:-"2"} \
--nodes-min ${MIN_NODE:-"1"} \
--nodes-max ${MAX_NODE:-"4"} \
--managed
26 changes: 26 additions & 0 deletions images/aws-scripts/delete-eks-cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This shell script is used to delete an EKS cluster from our argo workflow

set -o errexit
set -o nounset
set -o pipefail

EKS_CLUSTER_NAME="${CLUSTER_NAME}"

# Delete EKS Cluster
eksctl delete cluster $EKS_CLUSTER_NAME
55 changes: 55 additions & 0 deletions images/aws-scripts/deploy-kubeflow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash

# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This shell script is used to deploy kubeflow by kfctl

set -o errexit
set -o nounset
set -o pipefail

EKS_CLUSTER_NAME="${CLUSTER_NAME}"
EKS_NAMESPACE_NAME="${EKS_NAMESPACE}"

# Load kubeconfig
aws eks update-kubeconfig --name=$EKS_CLUSTER_NAME

# Fetch v1.1-branch kfctl
wget https://github.com/PatrickXYS/kfctl/releases/download/test1/kfctl_v1.1.0-2-g08ee6e4_linux.tar.gz -O kfctl.tar.gz
tar -xvf kfctl.tar.gz

# Add kfctl to PATH, to make the kfctl binary easier to use.
export PATH=$PATH:"$PWD:kfctl"

echo "kfctl version: "
kfctl version

### Workaround to fix issue
## msg="Encountered error applying application bootstrap: (kubeflow.error): Code 500 with message: Apply.Run
## : error when creating \"/tmp/kout927048001\": namespaces \"kubeflow-test-infra\" not found" filename="kustomize/kustomize.go:266"
kubectl create namespace $EKS_NAMESPACE_NAME
###

# Use the following kfctl configuration file for the AWS setup without authentication:
export CONFIG_URI="https://raw.githubusercontent.com/kubeflow/manifests/v1.1-branch/kfdef/kfctl_aws.v1.1.0.yaml"

# Create the directory you want to store deployment, this has to be ${EKS_CLUSTER_NAME}
mkdir ${EKS_CLUSTER_NAME} && cd ${EKS_CLUSTER_NAME}

# Download your configuration files, so that you can customize the configuration before deploying Kubeflow.
wget -O kfctl_aws.yaml $CONFIG_URI

# Deploy Kubeflow
kfctl apply -V -f kfctl_aws.yaml
42 changes: 42 additions & 0 deletions images/aws-scripts/uninstall-kubeflow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash

# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This shell script is used to uninstall kubeflow by kfctl

set -o errexit
set -o nounset
set -o pipefail

EKS_CLUSTER_NAME="${CLUSTER_NAME}"
EKS_NAMESPACE_NAME="${EKS_NAMESPACE}"

# Load kubeconfig
aws eks update-kubeconfig --name=$EKS_CLUSTER_NAME

# Add kfctl to PATH, to make the kfctl binary easier to use.
export PATH=$PATH:"$PWD:kfctl"

echo "kfctl version: "
kfctl version

# Cd directory ${EKS_CLUSTER_NAME}
cd ${EKS_CLUSTER_NAME}

# Print YAML file
cat kfctl_aws.yaml

# Uninstall Kubeflow
kfctl delete -V -f kfctl_aws.yaml
29 changes: 22 additions & 7 deletions images/run_workflows.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,25 @@ set -ex
/usr/local/bin/checkout.sh /src

# Trigger a workflow
python -m kubeflow.testing.run_e2e_workflow \
--project=kubeflow-ci \
--zone=us-east1-d \
--cluster=kubeflow-testing \
--bucket=kubernetes-jenkins \
--config_file=/src/${REPO_OWNER}/${REPO_NAME}/prow_config.yaml \
--repos_dir=/src
if [ -z "$CLOUD_PROVIDER" ] || [ "$CLOUD_PROVIDER" == "gcp" ]
then
python -m kubeflow.testing.run_e2e_workflow \
--project=kubeflow-ci \
--zone=us-east1-d \
--cluster=kubeflow-testing \
--bucket=kubernetes-jenkins \
--config_file=/src/${REPO_OWNER}/${REPO_NAME}/prow_config.yaml \
--repos_dir=/src
else
if [[ "$CLOUD_PROVIDER" == "aws" ]]
then
echo "Triggering AWS Argo Workflows"
python -m kubeflow.testing.run_e2e_workflow \
--cluster=${AWS_EKS_CLUSTER:-"kubeflow-shared-test-infra-poc-argo"} \
--bucket=${ARTIFACTS_S3_BUCKET:-"aws-kubernetes-jenkins"} \
--config_file=/src/${REPO_OWNER}/${REPO_NAME}/prow_config.yaml \
--repos_dir=/src \
--cloud_provider=aws \
--aws_region=${AWS_DEFAULT_REGION:-"us-west-2"}
fi
fi
3 changes: 3 additions & 0 deletions py/kubeflow/testing/cloudprovider/aws/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
approvers:
- Jeffwan
- PatrickXYS
Empty file.
Loading

0 comments on commit db508d7

Please sign in to comment.