Skip to content
This repository has been archived by the owner on Aug 28, 2024. It is now read-only.

Commit

Permalink
aws-up and aws-down working in a containerized context via makefile t…
Browse files Browse the repository at this point in the history
…argets
  • Loading branch information
brandonjbjelland committed Aug 8, 2023
1 parent 83d2ef0 commit c6b53ea
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 118 deletions.
79 changes: 50 additions & 29 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ all: build

.PHONY: help
help: ## Display this help.
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)

##@ Development

Expand Down Expand Up @@ -120,40 +120,60 @@ skaffold-dev-gcpmanager: protoc skaffold protogen render-skaffold-manifests ## R
build: manifests generate fmt vet ## Build manager binary.
go build -o bin/manager cmd/controllermanager/main.go

.PHONY: dev-up
dev-up:
docker build ./install -t substratus-installer && \
.PHONY: gcp-dev-up
gcp-dev-up: build-installer
docker run -it \
-v ${HOME}/.kube:/root/.kube \
-e PROJECT=$(shell gcloud config get project) \
-e TOKEN=$(shell gcloud auth print-access-token) \
-e TF_VAR_attach_gpu_nodepools=${ATTACH_GPU_NODEPOOLS} \
-e INSTALL_OPERATOR=false \
substratus-installer gcp-up.sh
-v ${HOME}/.kube:/root/.kube \
-e PROJECT=$(shell gcloud config get project) \
-e TOKEN=$(shell gcloud auth print-access-token) \
-e TF_VAR_attach_gpu_nodepools=${ATTACH_GPU_NODEPOOLS} \
-e INSTALL_OPERATOR=false \
substratus-installer gcp-up.sh
mkdir -p secrets
gcloud iam service-accounts keys create --iam-account=substratus-gcp-manager@$(shell gcloud config get project).iam.gserviceaccount.com ./secrets/gcp-manager-key.json

.PHONY: dev-down
dev-down:
.PHONY: gcp-dev-down
gcp-dev-down: build-installer
docker run -it \
-v ${HOME}/.kube:/root/.kube \
-e PROJECT=$(shell gcloud config get project) \
-e TOKEN=$(shell gcloud auth print-access-token) \
-e TF_VAR_attach_gpu_nodepools=${ATTACH_GPU_NODEPOOLS} \
substratus-installer gcp-down.sh
-v ${HOME}/.kube:/root/.kube \
-e PROJECT=$(shell gcloud config get project) \
-e TOKEN=$(shell gcloud auth print-access-token) \
-e TF_VAR_attach_gpu_nodepools=${ATTACH_GPU_NODEPOOLS} \
substratus-installer gcp-down.sh
rm ./secrets/gcp-manager-key.json

.PHONY: dev-run
.PHONY: aws-dev-up
aws-dev-up: build-installer
docker run -it \
-v ${HOME}/.kube:/root/.kube \
-e AWS_ACCOUNT_ID="$(shell aws sts get-caller-identity --query Account --output text)" \
-e AWS_ACCESS_KEY_ID=$(shell aws configure get aws_access_key_id) \
-e AWS_SECRET_ACCESS_KEY=$(shell aws configure get aws_secret_access_key) \
-e AWS_SESSION_TOKEN=$(shell aws configure get aws_session_token) \
-e INSTALL_OPERATOR=false \
substratus-installer aws-up.sh

.PHONY: aws-dev-down
aws-dev-down: build-installer
docker run -it \
-v ${HOME}/.kube:/root/.kube \
-e AWS_ACCOUNT_ID="$(shell aws sts get-caller-identity --query Account --output text)" \
-e AWS_ACCESS_KEY_ID=$(shell aws configure get aws_access_key_id) \
-e AWS_SECRET_ACCESS_KEY=$(shell aws configure get aws_secret_access_key) \
-e AWS_SESSION_TOKEN=$(shell aws configure get aws_session_token) \
substratus-installer aws-down.sh

.PHONY: gcp-dev-run
# Controller manager configuration #
dev-run: export CLOUD=gcp
dev-run: export GPU_TYPE=nvidia-l4
dev-run: export PROJECT_ID=$(shell gcloud config get project)
dev-run: export CLUSTER_NAME=substratus
dev-run: export CLUSTER_LOCATION=us-central1
gcp-dev-run: export CLOUD=gcp
gcp-dev-run: export GPU_TYPE=nvidia-l4
gcp-dev-run: export PROJECT_ID=$(shell gcloud config get project)
gcp-dev-run: export CLUSTER_NAME=substratus
gcp-dev-run: export CLUSTER_LOCATION=us-central1
# Cloud manager configuration #
dev-run: export GOOGLE_APPLICATION_CREDENTIALS=./secrets/gcp-manager-key.json
gcp-dev-run: export GOOGLE_APPLICATION_CREDENTIALS=./secrets/gcp-manager-key.json
# Run the controller manager and the cloud manager.
dev-run: manifests kustomize install-crds
gcp-dev-run: manifests kustomize install-crds
go run ./cmd/gcpmanager & \
go run ./cmd/controllermanager/main.go \
--sci-address=localhost:10080 \
Expand All @@ -176,16 +196,17 @@ docker-push: ## Push docker image with the manager.

.PHONY: docs
docs: crd-ref-docs embedmd
$(CRD_REF_DOCS) --config=./docs/api/config.yaml \
$(CRD_REF_DOCS) \
--config=./docs/api/config.yaml \
--log-level=INFO \
--output-path=./docs/api/generated.md \
--source-path=./api \
--templates-dir=./docs/api/templates/markdown \
--templates-dir=./docs/api/templates/markdown \
--renderer=markdown
# TODO: Embed YAML examples into the generate API documentation.
# $(EMBEDMD) -w ./docs/api/generated.md

# PLATFORMS defines the target platforms for the manager image be build to provide support to multiple
# PLATFORMS defines the target platforms for the manager image be build to provide support to multiple
# architectures. (i.e. make docker-buildx IMG=myregistry/mypoperator:0.0.1). To use this option you need to:
# - able to use docker buildx . More info: https://docs.docker.com/build/buildx/
# - have enable BuildKit, More info: https://docs.docker.com/develop/develop-images/build_enhancements/
Expand All @@ -209,7 +230,7 @@ protogen: protoc ## Generate protobuf files.
##@ Deployment

ifndef ignore-not-found
ignore-not-found = false
ignore-not-found=false
endif

.PHONY: install-crds
Expand Down
6 changes: 3 additions & 3 deletions docs/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,19 @@
Create a GCP environment.

```sh
make dev-up
make gcp-dev-up
```

Run Substratus control plane locally.

```sh
make dev-run
make gcp-dev-run
```

Delete GCP infra.

```sh
make dev-down
make gcp-dev-down
```

TODO: Automate the cleanup of PVs... Don't forget to manually clean them up for now.
Expand Down
1 change: 1 addition & 0 deletions install/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ RUN DEBIAN_FRONTEND="noninteractive" \
curl \
git \
tzdata \
gettext-base \
keyboard-configuration

# AWS CLI
Expand Down
17 changes: 8 additions & 9 deletions install/kubernetes/eks-cluster.yaml.tpl
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig
metadata:
name: substratus
region: us-west-2
name: ${CLUSTER_NAME}
region: ${REGION}
version: "1.27"
tags:
createdBy: eksctl
environment: dev
karpenter.sh/discovery: substratus
karpenter.sh/discovery: ${CLUSTER_NAME}

karpenter:
createServiceAccount: true
withSpotInterruptionQueue: true
defaultInstanceProfile: "KarpenterNodeInstanceProfile-substratus"
defaultInstanceProfile: "KarpenterNodeInstanceProfile-${CLUSTER_NAME}"
version: "v0.29.0"

# TODO(bjb): do we need mngs with karpenter?
# if karpenter doesn't suffice: https://github.com/eksctl-io/eksctl/blob/main/examples/23-kubeflow-spot-instance.yaml
managedNodeGroups:
- name: builder-ng
Expand All @@ -26,7 +25,7 @@ managedNodeGroups:
volumeSize: 100
minSize: 0
maxSize: 3
desiredCapacity: 2
desiredCapacity: 1
iam:
withAddonPolicies:
ebs: true
Expand Down Expand Up @@ -64,8 +63,8 @@ iam:
wellKnownPolicies:
ebsCSIController: true
- metadata:
name: substratus
namespace: substratus
name: ${CLUSTER_NAME}
namespace: ${CLUSTER_NAME}
attachPolicy:
Version: "2012-10-17"
Statement:
Expand All @@ -83,7 +82,7 @@ iam:
- "arn:aws:s3:::${ARTIFACTS_BUCKET_NAME}"
- metadata:
name: aws-manager
namespace: substratus
namespace: ${CLUSTER_NAME}
attachPolicy:
# https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-presigned-url.html
Version: "2012-10-17"
Expand Down
53 changes: 9 additions & 44 deletions install/kubernetes/karpenter-provisioner.yaml.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ spec:
karpenter.sh/discovery: ${CLUSTER_NAME}
securityGroupSelector:
karpenter.sh/discovery: ${CLUSTER_NAME}
ttlSecondsAfterEmpty: 30
consolidation:
enabled: true
taints:
Expand All @@ -34,46 +33,12 @@ spec:
- key: node.kubernetes.io/instance-type
operator: In
values:
# aws ec2 describe-instance-types --region us-west-2 --query "InstanceTypes[?GpuInfo!=null].InstanceType" --output json | jq -r '.[]' | sort | grep -v dl1 | grep -v inf | grep -v p5 | grep -v trn1 | awk '{print "\""$1"\","}'
[
"g2.2xlarge",
"g2.8xlarge",
"g3.16xlarge",
"g3.4xlarge",
"g3.8xlarge",
"g3s.xlarge",
"g4ad.16xlarge",
"g4ad.2xlarge",
"g4ad.4xlarge",
"g4ad.8xlarge",
"g4ad.xlarge",
"g4dn.12xlarge",
"g4dn.16xlarge",
"g4dn.2xlarge",
"g4dn.4xlarge",
"g4dn.8xlarge",
"g4dn.metal",
"g4dn.xlarge",
"g5.12xlarge",
"g5.16xlarge",
"g5.24xlarge",
"g5.2xlarge",
"g5.48xlarge",
"g5.4xlarge",
"g5.8xlarge",
"g5.xlarge",
"g5g.16xlarge",
"g5g.2xlarge",
"g5g.4xlarge",
"g5g.8xlarge",
"g5g.metal",
"g5g.xlarge",
"p2.16xlarge",
"p2.8xlarge",
"p2.xlarge",
"p3.16xlarge",
"p3.2xlarge",
"p3.8xlarge",
"p3dn.24xlarge",
"p4d.24xlarge",
]
- key: karpenter.k8s.aws/instance-category
operator: In
values: ["g", "p"]
- key: karpenter.k8s.aws/instance-family
operator: NotIn
values: ["p5"]
- key: "kubernetes.io/arch"
operator: In
values: ["amd64"]
34 changes: 25 additions & 9 deletions install/scripts/aws-down.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,37 @@ set -e
set -u

# Required env variables:
# : "$TOKEN $PROJECT"
: "$AWS_ACCOUNT_ID $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
KUBERENTES_DIR=${SCRIPT_DIR}/../kubernetes

export EKSCTL_ENABLE_CREDENTIAL_CACHE=1
EKSCTL_ENABLE_CREDENTIAL_CACHE=1
export CLUSTER_NAME=substratus
export REGION=us-west-2
export ARTIFACTS_REPO_NAME=substratus
export AWS_ACCOUNT_ID="$(aws sts get-caller-identity --query Account --output text)"
export ARTIFACTS_BUCKET_NAME=${AWS_ACCOUNT_ID}-substratus-artifacts
export ARTIFACTS_REPO_NAME=${CLUSTER_NAME}
export ARTIFACTS_BUCKET_NAME=${AWS_ACCOUNT_ID}-${CLUSTER_NAME}-artifacts

aws eks update-kubeconfig \
--region ${REGION} \
--name ${CLUSTER_NAME} &&
kubectl delete deployments --namespace=karpenter --all &&
kubectl delete deployments --namespace=kube-system --all ||
true

aws iam delete-policy \
--policy-arn arn:aws:iam::${AWS_ACCOUNT_ID}:policy/KarpenterControllerPolicy-${CLUSTER_NAME} ||
true

aws s3 rb s3://${ARTIFACTS_BUCKET_NAME} --region ${REGION} >/dev/null || true
aws ecr delete-repository --repository-name ${ARTIFACTS_REPO_NAME} >/dev/null || true
aws cloudformation delete-stack \
--stack-name "Karpenter-${CLUSTER_NAME}" || true
--stack-name "Karpenter-${CLUSTER_NAME}" \
--region ${REGION} || true

envsubst <${KUBERENTES_DIR}/eks-cluster.yaml.tpl >${KUBERENTES_DIR}/eks-cluster.yaml
eksctl delete cluster -f ${KUBERENTES_DIR}/eks-cluster.yaml
eksctl delete cluster -f ${KUBERENTES_DIR}/eks-cluster.yaml || true

aws ecr delete-repository \
--repository-name ${ARTIFACTS_REPO_NAME} \
--region ${REGION} >/dev/null || true

aws s3 rb s3://${ARTIFACTS_BUCKET_NAME} \
--region ${REGION} >/dev/null || true
Loading

0 comments on commit c6b53ea

Please sign in to comment.