Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AWS Init Support for Kuberflow/Testing #755

Merged
merged 14 commits into from
Oct 10, 2020
93 changes: 93 additions & 0 deletions images/Dockerfile.py3.aws
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Build the docker image used to run the scripts
# to continuously update our docker files.
#
# The context for this docker file should be the root of the kubeflow/testing repository.
FROM ubuntu:18.04

RUN apt-get update -y && \
apt-get install -y curl git python3.8 python3-pip wget jq && \
ln -sf /usr/bin/python3.8 /usr/bin/python

RUN python3.8 -m pip install \
filelock \
fire \
google-api-python-client \
google-cloud \
google-cloud-storage \
junit-xml \
# See https://github.com/kubeflow/gcp-blueprints/issues/52#issuecomment-645446088
# our libs seem to break with 11.0.0
kubernetes==9.0.0 \
lint \
oauth2client \
pytest==5.4 \
pytest-timeout==1.4 \
python-dateutil \
retrying \
watchdog \
awscli \
boto3

# Install go
RUN cd /tmp && \
wget -O /tmp/go.tar.gz https://dl.google.com/go/go1.14.2.linux-amd64.tar.gz && \
tar -C /usr/local -xzf go.tar.gz

# Install the hub CLI for git
RUN cd /tmp && \
curl -LO https://github.com/github/hub/releases/download/v2.13.0/hub-linux-amd64-2.13.0.tgz && \
tar -xvf hub-linux-amd64-2.13.0.tgz && \
mv hub-linux-amd64-2.13.0 /usr/local && \
ln -sf /usr/local/hub-linux-amd64-2.13.0/bin/hub /usr/local/bin/hub

RUN export KUSTOMIZE_VERSION=3.2.0 && \
cd /tmp && \
curl -LO https://github.com/kubernetes-sigs/kustomize/releases/download/v${KUSTOMIZE_VERSION}/kustomize_${KUSTOMIZE_VERSION}_linux_amd64 && \
mv kustomize_${KUSTOMIZE_VERSION}_linux_amd64 /usr/local/bin/kustomize && \
chmod a+x /usr/local/bin/kustomize

# Install kubectl
RUN curl -LO https://amazon-eks.s3.us-west-2.amazonaws.com/1.17.9/2020-08-04/bin/linux/amd64/kubectl && \
mv kubectl /usr/local/bin && \
chmod a+x /usr/local/bin/kubectl

# Install aws-iam-authenticator
RUN curl -o aws-iam-authenticator https://amazon-eks.s3.us-west-2.amazonaws.com/1.17.9/2020-08-04/bin/linux/amd64/aws-iam-authenticator && \
mv aws-iam-authenticator /usr/local/bin && \
chmod a+x /usr/local/bin/aws-iam-authenticator

# Install eksctl
RUN curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp && \
mv /tmp/eksctl /usr/local/bin && \
chmod a+x /usr/local/bin/eksctl

# Install ks
RUN curl --silent --location https://github.com/ksonnet/ksonnet/releases/download/v0.13.1/ks_0.13.1_linux_amd64.tar.gz | tar xz -C /tmp && \
mv /tmp/ks_0.13.1_linux_amd64/ks /usr/local/bin/ks-13 && \
chmod a+x /usr/local/bin/ks-13

# Create go symlinks
RUN ln -sf /usr/local/go/bin/go /usr/local/bin && \
ln -sf /usr/local/go/bin/gofmt /usr/local/bin && \
ln -sf /usr/local/go/bin/godoc /usr/local/bin

RUN go get -u github.com/jstemmer/go-junit-report

COPY ./images/checkout.sh /usr/local/bin
COPY ./images/checkout_repos.sh /usr/local/bin
RUN chmod a+x /usr/local/bin/checkout*

COPY ./images/run_workflows.sh /usr/local/bin
RUN chmod a+x /usr/local/bin/run_workflows.sh

# AWS BASH SCRIPTS
COPY ./images/aws-scripts/*.sh /usr/local/bin/
RUN chmod a+x /usr/local/bin/*.sh

ENV PYTHONPATH /src/kubeflow/testing/py

ENV CLOUD_PROVIDER aws

ENV PATH=/root/go/bin:${PATH}

ENTRYPOINT ["/usr/local/bin/run_workflows.sh"]
3 changes: 3 additions & 0 deletions images/aws-scripts/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
approvers:
- Jeffwan
- PatrickXYS
49 changes: 49 additions & 0 deletions images/aws-scripts/check-load-balancer-status.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash

# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This shell script is used to check if all deployments are RUNNING

set -o errexit
set -o nounset
set -o pipefail

EKS_CLUSTER_NAME="${CLUSTER_NAME}"

aws eks update-kubeconfig --name=$EKS_CLUSTER_NAME

echo "Start Fetching Ingress IP Address"

# Retry 10 times w/ 30 seconds interval
retry_times=0
retry_limit=10
while [ "$retry_times" -lt "$retry_limit" ]
do
echo "See if we can fetch ingress"
ingress_ip=$(kubectl get ingress istio-ingress -n istio-system -o json | jq -r '.status.loadBalancer.ingress[0].hostname')
if [ ${#ingress_ip} -eq 0 ] ;
then
sleep 30
echo "Retrying Fetching Ingress IP Address"
else
echo "The Kubeflow Deployment succeeded"
exit 0
fi

retry_times=$((retry_times+1))
done

echo "Kubeflow Deployment Status: ERROR"
exit 64
36 changes: 36 additions & 0 deletions images/aws-scripts/create-eks-cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash

# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This shell script is used to build an EKS cluster from our argo workflow

set -o errexit
set -o nounset
set -o pipefail

EKS_CLUSTER_NAME="${CLUSTER_NAME}"

# Create EKS Cluster
# TODO (PatrickXYS): Need to determine which NG template we need
eksctl create cluster \
--name $EKS_CLUSTER_NAME \
--version ${EKS_CLUSTER_VERSION:-"1.17"} \
--region ${AWS_REGION:-"us-west-2"} \
--nodegroup-name linux-nodes \
--node-type ${EKS_NODE_TYPE:-"m5.xlarge"} \
--nodes ${DESIRED_NODE:-"2"} \
--nodes-min ${MIN_NODE:-"1"} \
--nodes-max ${MAX_NODE:-"4"} \
--managed
26 changes: 26 additions & 0 deletions images/aws-scripts/delete-eks-cluster.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This shell script is used to delete an EKS cluster from our argo workflow

set -o errexit
set -o nounset
set -o pipefail

EKS_CLUSTER_NAME="${CLUSTER_NAME}"

# Delete EKS Cluster
eksctl delete cluster $EKS_CLUSTER_NAME
58 changes: 58 additions & 0 deletions images/aws-scripts/deploy-kubeflow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/bash

# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This shell script is used to deploy kubeflow by kfctl

set -o errexit
set -o nounset
set -o pipefail

EKS_CLUSTER_NAME="${CLUSTER_NAME}"
EKS_NAMESPACE_NAME="${EKS_NAMESPACE}"

# Load kubeconfig
aws eks update-kubeconfig --name=$EKS_CLUSTER_NAME

# Fetch v1.1-branch kfctl
wget https://github.com/PatrickXYS/kfctl/releases/download/test1/kfctl_v1.1.0-2-g08ee6e4_linux.tar.gz -O kfctl.tar.gz
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this be changed later? Can you extract these into envs? If you have a follow up PR. I am fine to leave them here for now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might need to come up with a better plan to add Compatibility matrix later.

tar -xvf kfctl.tar.gz

# Add kfctl to PATH, to make the kfctl binary easier to use.
export PATH=$PATH:"$PWD:kfctl"

echo "kfctl version: "
kfctl version

### Workaround to fix issue
## msg="Encountered error applying application bootstrap: (kubeflow.error): Code 500 with message: Apply.Run
## : error when creating \"/tmp/kout927048001\": namespaces \"kubeflow-test-infra\" not found" filename="kustomize/kustomize.go:266"
kubectl create namespace $EKS_NAMESPACE_NAME
###

# Use the following kfctl configuration file for the AWS setup without authentication:
export CONFIG_URI="https://raw.githubusercontent.com/kubeflow/manifests/v1.1-branch/kfdef/kfctl_aws.v1.1.0.yaml"

# Set an environment variable for your AWS cluster name.
export AWS_CLUSTER_NAME=$EKS_CLUSTER_NAME

# Create the directory you want to store deployment, this has to be ${AWS_CLUSTER_NAME}
mkdir ${AWS_CLUSTER_NAME} && cd ${AWS_CLUSTER_NAME}

# Download your configuration files, so that you can customize the configuration before deploying Kubeflow.
wget -O kfctl_aws.yaml $CONFIG_URI

# Deploy Kubeflow
kfctl apply -V -f kfctl_aws.yaml
45 changes: 45 additions & 0 deletions images/aws-scripts/uninstall-kubeflow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash

# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This shell script is used to uninstall kubeflow by kfctl

set -o errexit
set -o nounset
set -o pipefail

EKS_CLUSTER_NAME="${CLUSTER_NAME}"
EKS_NAMESPACE_NAME="${EKS_NAMESPACE}"

# Load kubeconfig
aws eks update-kubeconfig --name=$EKS_CLUSTER_NAME

# Add kfctl to PATH, to make the kfctl binary easier to use.
export PATH=$PATH:"$PWD:kfctl"

echo "kfctl version: "
kfctl version

# Set an environment variable for your AWS cluster name.
export AWS_CLUSTER_NAME=$EKS_CLUSTER_NAME

# Cd directory ${AWS_CLUSTER_NAME}
cd ${AWS_CLUSTER_NAME}

# Print YAML file
cat kfctl_aws.yaml

# Uninstall Kubeflow
kfctl delete -V -f kfctl_aws.yaml
29 changes: 22 additions & 7 deletions images/run_workflows.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,25 @@ set -ex
/usr/local/bin/checkout.sh /src

# Trigger a workflow
python -m kubeflow.testing.run_e2e_workflow \
--project=kubeflow-ci \
--zone=us-east1-d \
--cluster=kubeflow-testing \
--bucket=kubernetes-jenkins \
--config_file=/src/${REPO_OWNER}/${REPO_NAME}/prow_config.yaml \
--repos_dir=/src
if [ -z "$CLOUD_PROVIDER" ] || [ "$CLOUD_PROVIDER" == "gcp" ]
then
python -m kubeflow.testing.run_e2e_workflow \
--project=kubeflow-ci \
--zone=us-east1-d \
--cluster=kubeflow-testing \
--bucket=kubernetes-jenkins \
--config_file=/src/${REPO_OWNER}/${REPO_NAME}/prow_config.yaml \
--repos_dir=/src
else
if [[ "$CLOUD_PROVIDER" == "aws" ]]
then
echo "Triggering AWS Argo Workflows"
python -m kubeflow.testing.run_e2e_workflow \
--cluster=${AWS_EKS_CLUSTER:-"kubeflow-shared-test-infra-poc-argo"} \
--bucket=${ARTIFACTS_S3_BUCKET:-"aws-kubernetes-jenkins"} \
--config_file=/src/${REPO_OWNER}/${REPO_NAME}/prow_config.yaml \
--repos_dir=/src \
--cloud_provider=aws \
--aws_region=${AWS_DEFAULT_REGION:-"us-west-2"}
fi
fi
3 changes: 3 additions & 0 deletions py/kubeflow/testing/cloudprovider/aws/OWNERS
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
approvers:
- Jeffwan
- PatrickXYS
Empty file.
Loading