Skip to content

Commit f356fe5

Browse files
PhilippMatthesSoWieMarkusmblos
authored
Adding scheduling decision CRD: "why/why not" api (#307)
This contribution adds an api to cortex that describes why a virtual machine has been placed on a specific compute host. It exposes the raw pipeline weights with a kubernetes custom resource, and the associated operator calculates a human-readable description from the provided values. The resource is also connected to the nova visualizer so that decisions can be spectated in the browser removing this dependency on the mqtt broker. --------- Co-authored-by: Markus Wieland <44964229+SoWieMarkus@users.noreply.github.com> Co-authored-by: mblos <156897072+mblos@users.noreply.github.com>
1 parent a25a10f commit f356fe5

File tree

90 files changed

+7214
-365
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

90 files changed

+7214
-365
lines changed

.github/workflows/push-charts.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,21 @@ jobs:
8080
CHART_PACKAGE=$(ls $CHART_DIR/*.tgz)
8181
helm push $CHART_PACKAGE oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/
8282
done
83+
- name: Get all changed decisions Chart.yaml files
84+
id: changed-chart-yaml-files-decisions
85+
uses: tj-actions/changed-files@v47
86+
with:
87+
files: |
88+
decisions/dist/chart/Chart.yaml
89+
- name: Push decisions charts to registry
90+
if: steps.changed-chart-yaml-files-decisions.outputs.all_changed_files != ''
91+
shell: bash
92+
env:
93+
ALL_CHANGED_FILES: ${{ steps.changed-chart-yaml-files-decisions.outputs.all_changed_files }}
94+
run: |
95+
for CHART_FILE in ${ALL_CHANGED_FILES}; do
96+
CHART_DIR=$(dirname $CHART_FILE)
97+
helm package $CHART_DIR --dependency-update --destination $CHART_DIR
98+
CHART_PACKAGE=$(ls $CHART_DIR/*.tgz)
99+
helm push $CHART_PACKAGE oci://${{ env.REGISTRY }}/${{ github.repository }}/charts/
100+
done

.github/workflows/push-images.yaml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,3 +139,44 @@ jobs:
139139
subject-name: ${{ env.REGISTRY }}/${{ github.repository }}-reservations-operator
140140
subject-digest: ${{ steps.push_cortex_reservations.outputs.digest }}
141141
push-to-registry: true
142+
# Only build and push the decisions operator image if there are changes
143+
# in the decisions directory.
144+
- name: Get all changed decisions/ files
145+
id: changed_decisions_files
146+
uses: tj-actions/changed-files@v47
147+
with:
148+
files: |
149+
decisions/**
150+
- name: Docker Meta (Cortex Decisions)
151+
if: steps.changed_decisions_files.outputs.all_changed_files != ''
152+
id: meta_cortex_decisions
153+
uses: docker/metadata-action@v5
154+
with:
155+
images: ${{ env.REGISTRY }}/${{ github.repository }}-decisions-operator
156+
tags: |
157+
type=semver,pattern={{version}}
158+
type=semver,pattern={{major}}.{{minor}}
159+
type=sha
160+
latest
161+
- name: Build and Push Cortex Decisions Operator
162+
if: steps.changed_decisions_files.outputs.all_changed_files != ''
163+
id: push_cortex_decisions
164+
uses: docker/build-push-action@v6
165+
with:
166+
context: .
167+
file: Dockerfile.kubebuilder
168+
platforms: linux/amd64,linux/arm64
169+
push: true
170+
tags: ${{ steps.meta_cortex_decisions.outputs.tags }}
171+
labels: ${{ steps.meta_cortex_decisions.outputs.labels }}
172+
build-args: |
173+
GO_MOD_PATH=decisions
174+
GIT_TAG=${{ github.ref_name }}
175+
GIT_COMMIT=${{ github.sha }}
176+
- name: Generate Artifact Attestation for Cortex Decisions
177+
if: steps.changed_decisions_files.outputs.all_changed_files != ''
178+
uses: actions/attest-build-provenance@v3
179+
with:
180+
subject-name: ${{ env.REGISTRY }}/${{ github.repository }}-decisions-operator
181+
subject-digest: ${{ steps.push_cortex_decisions.outputs.digest }}
182+
push-to-registry: true

.github/workflows/test.yaml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ jobs:
2525
go test -v ./...
2626
echo "Testing reservations module..."
2727
cd reservations && go test -v ./...
28+
echo "Testing decisions module..."
29+
cd ../decisions && go test -v ./...
2830
2931
test-with-docker:
3032
# We don't need to run this longer test if the previous one already failed.
@@ -61,13 +63,22 @@ jobs:
6163
-coverprofile=reservations_profile.cov ./internal/...
6264
go tool cover -func reservations_profile.cov > reservations_func_coverage.txt
6365
cd ..
66+
67+
echo "Running tests for decisions module..."
68+
cd decisions
69+
go test -v \
70+
-coverpkg=./internal/... \
71+
-coverprofile=decisions_profile.cov ./internal/...
72+
go tool cover -func decisions_profile.cov > decisions_func_coverage.txt
73+
cd ..
6474
- name: Upload coverage files
6575
uses: actions/upload-artifact@v4
6676
with:
6777
name: pr-func-coverage
6878
path: |
6979
pr_func_coverage.txt
7080
reservations/reservations_func_coverage.txt
81+
decisions/decisions_func_coverage.txt
7182
# Steps below are only executed if the workflow is triggered by a pull request
7283
- name: Delete old coverage comments (PR only)
7384
if: ${{ github.event_name == 'pull_request' }}
@@ -123,6 +134,19 @@ jobs:
123134
reservationsCoverageReport = 'No coverage data available';
124135
}
125136
137+
// Read decisions module coverage report
138+
let decisionsCoverageReport = '';
139+
let decisionsCoveragePercentage = 'unknown';
140+
try {
141+
decisionsCoverageReport = fs.readFileSync('decisions/decisions_func_coverage.txt', 'utf8');
142+
const decisionsLines = decisionsCoverageReport.trim().split('\n');
143+
const decisionsLastLine = decisionsLines[decisionsLines.length - 1];
144+
const decisionsCoverageMatch = decisionsLastLine.match(/total:\s+\(statements\)\s+(\d+\.\d+)%/);
145+
decisionsCoveragePercentage = decisionsCoverageMatch ? decisionsCoverageMatch[1] : 'unknown';
146+
} catch (error) {
147+
decisionsCoverageReport = 'No coverage data available';
148+
}
149+
126150
let commentBody = '<!-- coverage-comment -->\n';
127151
commentBody += '## Test Coverage Report\n\n';
128152
@@ -144,6 +168,16 @@ jobs:
144168
commentBody += '```text\n';
145169
commentBody += reservationsCoverageReport;
146170
commentBody += '```\n';
171+
commentBody += '</details>\n\n';
172+
173+
// Decisions module coverage
174+
commentBody += '<details>\n';
175+
commentBody += '<summary>Coverage in decisions module (decisions/internal/): ';
176+
commentBody += decisionsCoveragePercentage;
177+
commentBody += '%</summary>\n\n';
178+
commentBody += '```text\n';
179+
commentBody += decisionsCoverageReport;
180+
commentBody += '```\n';
147181
commentBody += '</details>\n';
148182
149183
// Post the comment

.github/workflows/update-appversion.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,13 @@ jobs:
3030
files: |
3131
postgres/**
3232
33+
- name: Get all changed decisions/ files
34+
id: changed_decisions_files
35+
uses: tj-actions/changed-files@v47
36+
with:
37+
files: |
38+
decisions/**
39+
3340
# Always bumped
3441
- name: Update appVersion in cortex-core Chart.yaml
3542
run: |
@@ -69,3 +76,17 @@ jobs:
6976
git add reservations/dist/chart/Chart.yaml
7077
git commit -m "Bump cortex-reservations chart appVersions to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit"
7178
git push origin HEAD:main
79+
80+
# Only bumped if there are changes in the decisions directory.
81+
- name: Update appVersion in cortex-decisions Chart.yaml
82+
if: steps.changed_decisions_files.outputs.all_changed_files != ''
83+
run: |
84+
sed -i 's/^\([ ]*appVersion:[ ]*\).*/\1"${{ steps.vars.outputs.sha }}"/' decisions/dist/chart/Chart.yaml
85+
- name: Commit and push changes for cortex-decisions
86+
if: steps.changed_decisions_files.outputs.all_changed_files != ''
87+
run: |
88+
git config user.name "github-actions[bot]"
89+
git config user.email "github-actions[bot]@users.noreply.github.com"
90+
git add decisions/dist/chart/Chart.yaml
91+
git commit -m "Bump cortex-decisions chart appVersions to ${{ steps.vars.outputs.sha }} [skip ci]" || echo "No changes to commit"
92+
git push origin HEAD:main

Tiltfile

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,22 @@ def kubebuilder_binary_files(path):
3737
docker_build('ghcr.io/cobaltcore-dev/cortex-reservations-operator', '.',
3838
dockerfile='Dockerfile.kubebuilder',
3939
build_args={'GO_MOD_PATH': 'reservations'},
40-
only=kubebuilder_binary_files('reservations') + ['internal/', 'go.mod', 'go.sum'],
40+
only=kubebuilder_binary_files('reservations') + ['internal/', 'decisions/', 'go.mod', 'go.sum'],
4141
)
4242
local('sh helm/sync.sh reservations/dist/chart')
4343
k8s_yaml(helm('reservations/dist/chart', name='cortex-reservations', values=[tilt_values]))
4444
k8s_resource('reservations-controller-manager', labels=['Reservations'])
4545

46+
########### Decisions Operator & CRDs
47+
docker_build('ghcr.io/cobaltcore-dev/cortex-decisions-operator', '.',
48+
dockerfile='Dockerfile.kubebuilder',
49+
build_args={'GO_MOD_PATH': 'decisions'},
50+
only=kubebuilder_binary_files('decisions') + ['internal/', 'go.mod', 'go.sum'],
51+
)
52+
local('sh helm/sync.sh decisions/dist/chart')
53+
k8s_yaml(helm('decisions/dist/chart', name='cortex-decisions', values=[tilt_values]))
54+
k8s_resource('decisions-controller-manager', labels=['Decisions'])
55+
4656
########### Dev Dependencies
4757
local('sh helm/sync.sh helm/dev/cortex-prometheus-operator')
4858
k8s_yaml(helm('./helm/dev/cortex-prometheus-operator', name='cortex-prometheus-operator')) # Operator
@@ -82,6 +92,7 @@ k8s_resource('cortex-plutono', port_forwards=[
8292
docker_build('ghcr.io/cobaltcore-dev/cortex', '.', only=[
8393
'internal/', 'commands/', 'main.go', 'go.mod', 'go.sum', 'Makefile',
8494
'reservations/api/', # API module of the reservations operator needed for the scheduler.
95+
'decisions/api/', # API module of the decisions operator needed for the scheduler.
8596
])
8697
docker_build('ghcr.io/cobaltcore-dev/cortex-postgres', 'postgres')
8798

commands/checks/nova/checks.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ func randomRequest(dc datacenter, seed int) api.ExternalSchedulerRequest {
271271
slog.Info("using flavor extra specs", "extraSpecs", extraSpecs)
272272
request := api.ExternalSchedulerRequest{
273273
Spec: api.NovaObject[api.NovaSpec]{Data: api.NovaSpec{
274+
InstanceUUID: "cortex-e2e-tests",
274275
AvailabilityZone: az,
275276
ProjectID: project.ID,
276277
Flavor: api.NovaObject[api.NovaFlavor]{Data: api.NovaFlavor{

0 commit comments

Comments
 (0)