Skip to content

Add ShutdownGuard to replace oneshot for shutdown (#1491) #102

Add ShutdownGuard to replace oneshot for shutdown (#1491)

Add ShutdownGuard to replace oneshot for shutdown (#1491) #102

Workflow file for this run

---
name: Local Remote Execution
on:
push:
branches: [main]
pull_request:
branches: [main]
paths-ignore:
- 'docs/**'
permissions: read-all
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
jobs:
local:
strategy:
fail-fast: false
matrix:
os: [ubuntu-22.04]
name: Local / ${{ matrix.os }}
runs-on: ${{ matrix.os }}
timeout-minutes: 45
steps:
- name: Checkout
uses: >- # v4.1.1
actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
- name: Install Nix
uses: >- # v10
DeterminateSystems/nix-installer-action@de22e16c4711fca50c816cc9081563429d1cf563
- name: Free disk space
uses: >- # v2.0.0
endersonmenezes/free-disk-space@3f9ec39ebae520864ac93467ee395f5237585c21
with:
remove_android: true
remove_dotnet: true
remove_haskell: true
remove_tool_cache: false
- name: Cache Nix derivations
uses: >- # v4
DeterminateSystems/magic-nix-cache-action@fc6aaceb40b9845a02b91e059ec147e78d1b4e41
- name: Build hello_lre with LRE toolchain.
run: >
nix develop --impure --command
bash -c "bazel run \
--verbose_failures \
@local-remote-execution//examples:hello_lre"
remote:
strategy:
fail-fast: false
matrix:
os: [large-ubuntu-22.04]
name: Remote / ${{ matrix.os }}
runs-on: ${{ matrix.os }}
timeout-minutes: 45
steps:
- name: Checkout
uses: >- # v4.1.1
actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
- name: Install Nix
uses: >- # v10
DeterminateSystems/nix-installer-action@de22e16c4711fca50c816cc9081563429d1cf563
- name: Free disk space
uses: >- # v2.0.0
endersonmenezes/free-disk-space@3f9ec39ebae520864ac93467ee395f5237585c21
with:
remove_android: true
remove_dotnet: true
remove_haskell: true
remove_tool_cache: false
- name: Cache Nix derivations
uses: >- # v4
DeterminateSystems/magic-nix-cache-action@fc6aaceb40b9845a02b91e059ec147e78d1b4e41
- name: Start Kubernetes cluster
run: >
nix run .#native up
- name: Start NativeLink operator
env:
REPO_URL: ${{ github.event.pull_request.head.repo.clone_url || format('https://github.com/{0}.git', github.repository) }}
BRANCH: ${{ github.event.pull_request.head.ref || github.ref_name }}
COMMIT: ${{ github.event.pull_request.head.sha || github.sha }}
run: |
nix develop --impure --command bash -c 'cat > kustomization.yaml << EOF
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
components:
- kubernetes/components/operator
patches:
- patch: |-
- op: replace
path: /spec/path
value: ./kubernetes/overlays/lre
target:
kind: Kustomization
name: nativelink
- patch: |-
- op: replace
path: /spec/url
value: ${REPO_URL}
- op: replace
path: /spec/ref/branch
value: ${BRANCH}
- op: replace
path: /spec/ref/commit
value: ${COMMIT}
target:
kind: GitRepository
name: nativelink
- patch: |-
- op: replace
path: /spec/eventMetadata/flakeOutput
value: ./src_root#image
target:
kind: Alert
name: nativelink-image-alert
- patch: |-
- op: replace
path: /spec/eventMetadata/flakeOutput
value: ./src_root#nativelink-worker-init
target:
kind: Alert
name: nativelink-worker-init-alert
- patch: |-
- op: replace
path: /spec/eventMetadata/flakeOutput
value: ./src_root#nativelink-worker-lre-cc
target:
kind: Alert
name: nativelink-worker-alert
EOF
kubectl apply -k . &&
rm kustomization.yaml'
- name: Wait for Tekton resources
run: >
nix develop --impure --command
bash -c "flux reconcile kustomization -n default \
--timeout=15m \
nativelink-tekton-resources"
- name: Wait for Tekton pipelines
run: >
nix develop --impure --command bash << 'EOF'
until pr=$(kubectl get pipelinerun -o name | \
grep rebuild-nativelink-run-); do
echo "Waiting for pipeline to be created..."
sleep 1
done
echo "Found pipeline: $pr"
kubectl wait --for=create $pr
echo "Waiting for pipeline to succeed..."
kubectl wait \
--for=condition=Succeeded \
--timeout=45m \
pipelinerun \
-l tekton.dev/pipeline=rebuild-nativelink
EOF
- name: Wait for Configmaps
run: >
nix develop --impure --command
bash -c "flux reconcile kustomization -n default \
--timeout=15m \
nativelink-configmaps"
- name: Wait for NativeLink Kustomization
run: >
nix develop --impure --command
bash -c "flux reconcile kustomization -n default \
--timeout=15m \
nativelink"
- name: Wait for CAS
run: >
nix develop --impure --command
bash -c "kubectl rollout status deploy/nativelink-cas"
- name: Wait for scheduler
run: >
nix develop --impure --command
bash -c "kubectl rollout status deploy/nativelink-scheduler"
- name: Wait for worker
run: >
nix develop --impure --command
bash -c "kubectl rollout status deploy/nativelink-worker"
- name: Get gateway IPs
id: gateway-ips
run: |
echo "cache_ip=$(kubectl get gtw cache-gateway -o=jsonpath='{.status.addresses[0].value}')" >> "$GITHUB_ENV"
echo "scheduler_ip=$(kubectl get gtw scheduler-gateway -o=jsonpath='{.status.addresses[0].value}')" >> "$GITHUB_ENV"
- name: Print cluster state
run: |
kubectl get svc -A
kubectl get pod -A
kubectl get svc -A
kubectl get deployments -A
kubectl describe gtw
echo "cas"
kubectl logs -l app=nativelink-cas
echo "scheduler"
kubectl logs -l app=nativelink-scheduler
echo "worker"
kubectl logs -l app=nativelink-worker
- name: Build hello_lre with LRE toolchain.
run: >
nix develop --impure --command
bash -c "bazel run \
--remote_instance_name=main \
--remote_cache=grpc://$cache_ip \
--remote_executor=grpc://$scheduler_ip \
--verbose_failures \
@local-remote-execution//examples:hello_lre"