From 11b0b6abf3c0d1d94380296677a008dbc22f38ae Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Thu, 6 Mar 2025 17:58:25 +0000 Subject: [PATCH 01/11] dockerignore node_modules in subdirectories --- .dockerignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index 4d8429b6dc..414b991945 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,7 +9,7 @@ # dependencies -node_modules +**/node_modules .pnp .pnp.js From e1eb2e4f41af77828de7828b3ab69f541ae3e058 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Thu, 6 Mar 2025 17:58:49 +0000 Subject: [PATCH 02/11] image tag action should handle re2 tags --- .github/actions/get-image-tag/action.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/actions/get-image-tag/action.yml b/.github/actions/get-image-tag/action.yml index b9fc3e565b..4180394d54 100644 --- a/.github/actions/get-image-tag/action.yml +++ b/.github/actions/get-image-tag/action.yml @@ -31,6 +31,11 @@ runs: sha=$(echo ${{ github.sha }} | head -c7) ts=$(date +%s) tag=${env}-${sha}-${ts} + if [[ "${{ github.ref_name }}" == re2-*-* ]]; then + env=$(echo ${{ github.ref_name }} | cut -d- -f2) + sha=$(echo ${{ github.sha }} | head -c7) + ts=$(date +%s) + tag=${env}-${sha}-${ts} elif [[ "${{ github.ref_name }}" == v.docker.* ]]; then version="${GITHUB_REF_NAME#v.docker.}" tag="v${version}" From d4100f97fd743946aec143154b7b918f09221423 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Thu, 6 Mar 2025 18:02:00 +0000 Subject: [PATCH 03/11] add supervisor containerfile --- apps/supervisor/Containerfile | 63 +++++++++++++++++++++++++++++++++++ apps/supervisor/package.json | 1 + 2 files changed, 64 insertions(+) create mode 100644 apps/supervisor/Containerfile diff --git a/apps/supervisor/Containerfile b/apps/supervisor/Containerfile new file mode 100644 index 0000000000..3e7fbd3781 --- /dev/null +++ b/apps/supervisor/Containerfile @@ -0,0 +1,63 @@ +FROM node:22-alpine@sha256:9bef0ef1e268f60627da9ba7d7605e8831d5b56ad07487d24d1aa386336d1944 AS node-22-alpine + +WORKDIR /app + +FROM node-22-alpine AS pruner + +COPY --chown=node:node . . +RUN npx -q turbo@1.10.9 prune --scope=supervisor --docker +RUN find . -name "node_modules" -type d -prune -exec rm -rf '{}' + + +FROM node-22-alpine AS base + +RUN apk add --no-cache dumb-init + +COPY --chown=node:node .gitignore .gitignore +COPY --from=pruner --chown=node:node /app/out/json/ . +COPY --from=pruner --chown=node:node /app/out/pnpm-lock.yaml ./pnpm-lock.yaml +COPY --from=pruner --chown=node:node /app/out/pnpm-workspace.yaml ./pnpm-workspace.yaml + +FROM base AS dev-deps +RUN corepack enable +ENV NODE_ENV development + +RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store pnpm fetch --no-frozen-lockfile +RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store pnpm install --ignore-scripts --no-frozen-lockfile + +FROM base AS prod-deps +RUN corepack enable +ENV NODE_ENV production + +RUN --mount=type=cache,id=pnpm,target=/root/.local/share/pnpm/store pnpm install --prod --no-frozen-lockfile + +COPY --from=pruner --chown=node:node /app/internal-packages/database/prisma/schema.prisma /app/internal-packages/database/prisma/schema.prisma + +ENV NPM_CONFIG_IGNORE_WORKSPACE_ROOT_CHECK true +RUN pnpx prisma@5.4.1 generate --schema /app/internal-packages/database/prisma/schema.prisma + +FROM base AS builder +RUN corepack enable + +COPY --from=pruner --chown=node:node /app/out/full/ . +COPY --from=dev-deps --chown=node:node /app/ . +COPY --chown=node:node turbo.json turbo.json +COPY --chown=node:node .configs/tsconfig.base.json .configs/tsconfig.base.json +COPY --chown=node:node scripts/updateVersion.ts scripts/updateVersion.ts + +RUN pnpm run generate && \ + pnpm run -r --filter supervisor... build + +FROM base AS runner + +RUN corepack enable +ENV NODE_ENV production + +COPY --from=pruner --chown=node:node /app/out/full/ . +COPY --from=prod-deps --chown=node:node /app . +COPY --from=builder --chown=node:node /app/apps/supervisor ./apps/supervisor + +EXPOSE 8000 + +USER node + +CMD [ "/usr/bin/dumb-init", "--", "pnpm", "run", "--filter", "supervisor", "start"] diff --git a/apps/supervisor/package.json b/apps/supervisor/package.json index af25633177..3718e6514d 100644 --- a/apps/supervisor/package.json +++ b/apps/supervisor/package.json @@ -5,6 +5,7 @@ "main": "dist/index.js", "type": "module", "scripts": { + "build": "tsc", "dev": "tsx --experimental-sqlite --require dotenv/config --watch src/index.ts", "start": "node --experimental-sqlite dist/index.js", "typecheck": "tsc --noEmit" From 35797f868e930eae306138f955e6527cc433148d Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Thu, 6 Mar 2025 18:02:38 +0000 Subject: [PATCH 04/11] add publish worker re2 workflow --- .github/workflows/publish-worker-re2.yml | 94 ++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 .github/workflows/publish-worker-re2.yml diff --git a/.github/workflows/publish-worker-re2.yml b/.github/workflows/publish-worker-re2.yml new file mode 100644 index 0000000000..9953f6f993 --- /dev/null +++ b/.github/workflows/publish-worker-re2.yml @@ -0,0 +1,94 @@ +name: "⚒️ Publish Worker RE2" + +on: + workflow_call: + inputs: + image_tag: + description: The image tag to publish + type: string + required: false + default: "" + push: + tags: + - "re2-test-*" + - "re2-prod-*" + +permissions: + packages: write + contents: read + +jobs: + check-branch: + runs-on: ubuntu-latest + steps: + - name: Fail if re2-prod-* is pushed from a non-main branch + if: startsWith(github.ref_name, 're2-prod-') && github.base_ref != 'main' + run: | + echo "🚫 re2-prod-* tags can only be pushed from the main branch." + exit 1 + build: + strategy: + matrix: + package: [supervisor] + runs-on: ubuntu-latest + env: + DOCKER_BUILDKIT: "1" + steps: + - name: ⬇️ Checkout git repo + uses: actions/checkout@v4 + + - name: 📦 Get image repo + id: get_repository + run: | + if [[ "${{ matrix.package }}" == *-provider ]]; then + provider_type=$(echo "${{ matrix.package }}" | cut -d- -f1) + repo=provider/${provider_type} + else + repo="${{ matrix.package }}" + fi + echo "repo=${repo}" >> "$GITHUB_OUTPUT" + + - id: get_tag + uses: ./.github/actions/get-image-tag + with: + tag: ${{ inputs.image_tag }} + + - name: 🐋 Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # ..to avoid rate limits when pulling images + - name: 🐳 Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: 🚢 Build Container Image + run: | + docker build -t infra_image -f ./apps/${{ matrix.package }}/Containerfile . + + # ..to push image + - name: 🐙 Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: 🐙 Push to GitHub Container Registry + run: | + docker tag infra_image "$REGISTRY/$REPOSITORY:$IMAGE_TAG" + docker push "$REGISTRY/$REPOSITORY:$IMAGE_TAG" + env: + REGISTRY: ghcr.io/triggerdotdev + REPOSITORY: ${{ steps.get_repository.outputs.repo }} + IMAGE_TAG: ${{ steps.get_tag.outputs.tag }} + + - name: 🐙 Push 'v3' tag to GitHub Container Registry + if: steps.get_tag.outputs.is_semver == 'true' + run: | + docker tag infra_image "$REGISTRY/$REPOSITORY:v3" + docker push "$REGISTRY/$REPOSITORY:v3" + env: + REGISTRY: ghcr.io/triggerdotdev + REPOSITORY: ${{ steps.get_repository.outputs.repo }} From 2fc49597f6f045672446ad89734c36b13f48e212 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Thu, 6 Mar 2025 18:05:21 +0000 Subject: [PATCH 05/11] fix copypasta --- .github/actions/get-image-tag/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/get-image-tag/action.yml b/.github/actions/get-image-tag/action.yml index 4180394d54..e064623046 100644 --- a/.github/actions/get-image-tag/action.yml +++ b/.github/actions/get-image-tag/action.yml @@ -31,7 +31,7 @@ runs: sha=$(echo ${{ github.sha }} | head -c7) ts=$(date +%s) tag=${env}-${sha}-${ts} - if [[ "${{ github.ref_name }}" == re2-*-* ]]; then + elif [[ "${{ github.ref_name }}" == re2-*-* ]]; then env=$(echo ${{ github.ref_name }} | cut -d- -f2) sha=$(echo ${{ github.sha }} | head -c7) ts=$(date +%s) From 4e27ced8d4df0987768317590007ad1c4d2dc575 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Thu, 6 Mar 2025 18:15:18 +0000 Subject: [PATCH 06/11] require branch check --- .github/workflows/publish-worker-re2.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/publish-worker-re2.yml b/.github/workflows/publish-worker-re2.yml index 9953f6f993..1f2b316afe 100644 --- a/.github/workflows/publish-worker-re2.yml +++ b/.github/workflows/publish-worker-re2.yml @@ -27,6 +27,7 @@ jobs: echo "🚫 re2-prod-* tags can only be pushed from the main branch." exit 1 build: + needs: check-branch strategy: matrix: package: [supervisor] From 8ee6b4986fbdbe0a0ad7a1a07f7b9cb6175e3d96 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Fri, 7 Mar 2025 10:41:06 +0000 Subject: [PATCH 07/11] add more granular service control to supervisor session --- .../v3/runEngineWorker/supervisor/session.ts | 51 +++++++++++++------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/packages/core/src/v3/runEngineWorker/supervisor/session.ts b/packages/core/src/v3/runEngineWorker/supervisor/session.ts index fd6c81bebf..4ce3b312b4 100644 --- a/packages/core/src/v3/runEngineWorker/supervisor/session.ts +++ b/packages/core/src/v3/runEngineWorker/supervisor/session.ts @@ -11,6 +11,8 @@ import { getDefaultWorkerHeaders } from "./util.js"; import { HeartbeatService } from "../../utils/heartbeat.js"; type SupervisorSessionOptions = SupervisorClientCommonOptions & { + queueConsumerEnabled?: boolean; + runNotificationsEnabled?: boolean; heartbeatIntervalSeconds?: number; dequeueIntervalMs?: number; preDequeue?: PreDequeueFn; @@ -20,15 +22,21 @@ type SupervisorSessionOptions = SupervisorClientCommonOptions & { export class SupervisorSession extends EventEmitter { public readonly httpClient: SupervisorHttpClient; - private socket?: Socket; + private readonly runNotificationsEnabled: boolean; + private runNotificationsSocket?: Socket; + private readonly queueConsumerEnabled: boolean; private readonly queueConsumer: RunQueueConsumer; + private readonly heartbeatService: HeartbeatService; private readonly heartbeatIntervalSeconds: number; constructor(private opts: SupervisorSessionOptions) { super(); + this.runNotificationsEnabled = opts.runNotificationsEnabled ?? true; + this.queueConsumerEnabled = opts.queueConsumerEnabled ?? true; + this.httpClient = new SupervisorHttpClient(opts); this.queueConsumer = new RunQueueConsumer({ client: this.httpClient, @@ -76,12 +84,12 @@ export class SupervisorSession extends EventEmitter { subscribeToRunNotifications(runFriendlyIds: string[]) { console.log("[SupervisorSession] Subscribing to run notifications", { runFriendlyIds }); - if (!this.socket) { + if (!this.runNotificationsSocket) { console.error("[SupervisorSession] Socket not connected"); return; } - this.socket.emit("run:subscribe", { version: "1", runFriendlyIds }); + this.runNotificationsSocket.emit("run:subscribe", { version: "1", runFriendlyIds }); Promise.allSettled( runFriendlyIds.map((runFriendlyId) => @@ -96,12 +104,12 @@ export class SupervisorSession extends EventEmitter { unsubscribeFromRunNotifications(runFriendlyIds: string[]) { console.log("[SupervisorSession] Unsubscribing from run notifications", { runFriendlyIds }); - if (!this.socket) { + if (!this.runNotificationsSocket) { console.error("[SupervisorSession] Socket not connected"); return; } - this.socket.emit("run:unsubscribe", { version: "1", runFriendlyIds }); + this.runNotificationsSocket.emit("run:unsubscribe", { version: "1", runFriendlyIds }); Promise.allSettled( runFriendlyIds.map((runFriendlyId) => @@ -116,15 +124,15 @@ export class SupervisorSession extends EventEmitter { ); } - private createSocket() { + private createRunNotificationsSocket() { const wsUrl = new URL(this.opts.apiUrl); wsUrl.pathname = "/worker"; - this.socket = io(wsUrl.href, { + const socket = io(wsUrl.href, { transports: ["websocket"], extraHeaders: getDefaultWorkerHeaders(this.opts), }); - this.socket.on("run:notify", ({ version, run }) => { + socket.on("run:notify", ({ version, run }) => { console.log("[SupervisorSession][WS] Received run notification", { version, run }); this.emit("runNotification", { time: new Date(), run }); @@ -137,15 +145,17 @@ export class SupervisorSession extends EventEmitter { console.error("[SupervisorSession] Failed to send debug log", { error }); }); }); - this.socket.on("connect", () => { + socket.on("connect", () => { console.log("[SupervisorSession][WS] Connected to platform"); }); - this.socket.on("connect_error", (error) => { + socket.on("connect_error", (error) => { console.error("[SupervisorSession][WS] Connection error", { error }); }); - this.socket.on("disconnect", (reason, description) => { + socket.on("disconnect", (reason, description) => { console.log("[SupervisorSession][WS] Disconnected from platform", { reason, description }); }); + + return socket; } async start() { @@ -167,14 +177,25 @@ export class SupervisorSession extends EventEmitter { name: workerGroup.name, }); - this.queueConsumer.start(); - this.heartbeatService.start(); - this.createSocket(); + if (this.queueConsumerEnabled) { + console.log("[SupervisorSession] Queue consumer enabled"); + this.queueConsumer.start(); + this.heartbeatService.start(); + } else { + console.warn("[SupervisorSession] Queue consumer disabled"); + } + + if (this.runNotificationsEnabled) { + console.log("[SupervisorSession] Run notifications enabled"); + this.runNotificationsSocket = this.createRunNotificationsSocket(); + } else { + console.warn("[SupervisorSession] Run notifications disabled"); + } } async stop() { this.heartbeatService.stop(); - this.socket?.disconnect(); + this.runNotificationsSocket?.disconnect(); } private getHeartbeatBody(): WorkerApiHeartbeatRequestBody { From 22a6035ced7716cbb0e06521ba94f4ecd2d714b6 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Fri, 7 Mar 2025 11:39:30 +0000 Subject: [PATCH 08/11] fix supervisor api domain for split setups --- apps/supervisor/src/env.ts | 27 +++++++++++++----- apps/supervisor/src/index.ts | 16 ++++++++--- apps/supervisor/src/workloadManager/docker.ts | 14 ++++++++-- .../src/workloadManager/kubernetes.ts | 28 +++++++++++++++++-- apps/supervisor/src/workloadManager/types.ts | 4 ++- .../src/entryPoints/managed-run-controller.ts | 13 ++++++--- 6 files changed, 81 insertions(+), 21 deletions(-) diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index 8736ad038b..7786836923 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -4,18 +4,31 @@ import { z } from "zod"; import { getDockerHostDomain } from "./util.js"; const Env = z.object({ - // This will come from `status.hostIP` in k8s - WORKER_HOST_IP: z.string().default(getDockerHostDomain()), - TRIGGER_API_URL: z.string().url(), - TRIGGER_WORKER_TOKEN: z.string(), // This will come from `spec.nodeName` in k8s TRIGGER_WORKER_INSTANCE_NAME: z.string().default(randomUUID()), + + // Required settings + TRIGGER_API_URL: z.string().url(), + TRIGGER_WORKER_TOKEN: z.string(), MANAGED_WORKER_SECRET: z.string(), - TRIGGER_WORKLOAD_API_PORT: z.coerce.number().default(8020), - TRIGGER_WORKLOAD_API_PORT_EXTERNAL: z.coerce.number().default(8020), + + // Workload API settings (coordinator mode) - the workload API is what the run controller connects to + TRIGGER_WORKLOAD_API_ENABLED: z.coerce.boolean().default(true), + TRIGGER_WORKLOAD_API_PROTOCOL: z + .string() + .transform((s) => z.enum(["http", "https"]).parse(s.toLowerCase())) + .default("http"), + TRIGGER_WORKLOAD_API_DOMAIN: z.string().default(getDockerHostDomain()), // If unset, will use orchestrator-specific default + TRIGGER_WORKLOAD_API_PORT_INTERNAL: z.coerce.number().default(8020), // This is the port the workload API listens on + TRIGGER_WORKLOAD_API_PORT_EXTERNAL: z.coerce.number().default(8020), // This is the exposed port passed to the run controller + + // Dequeue settings (provider mode) + TRIGGER_DEQUEUE_ENABLED: z.coerce.boolean().default(true), + TRIGGER_DEQUEUE_INTERVAL_MS: z.coerce.number().int().default(1000), + + // Optional services TRIGGER_WARM_START_URL: z.string().optional(), TRIGGER_CHECKPOINT_URL: z.string().optional(), - TRIGGER_DEQUEUE_INTERVAL_MS: z.coerce.number().int().default(1000), // Used by the workload manager, e.g docker/k8s DOCKER_NETWORK: z.string().default("host"), diff --git a/apps/supervisor/src/index.ts b/apps/supervisor/src/index.ts index b7f2400aa6..ebab788970 100644 --- a/apps/supervisor/src/index.ts +++ b/apps/supervisor/src/index.ts @@ -29,7 +29,9 @@ class ManagedSupervisor { private readonly warmStartUrl = env.TRIGGER_WARM_START_URL; constructor() { - const workerApiUrl = `http://${env.WORKER_HOST_IP}:${env.TRIGGER_WORKLOAD_API_PORT_EXTERNAL}`; + const workloadApiProtocol = env.TRIGGER_WORKLOAD_API_PROTOCOL; + const workloadApiDomain = env.TRIGGER_WORKLOAD_API_DOMAIN; + const workloadApiPortExternal = env.TRIGGER_WORKLOAD_API_PORT_EXTERNAL; if (this.warmStartUrl) { this.logger.log("[ManagedWorker] 🔥 Warm starts enabled", { @@ -40,13 +42,17 @@ class ManagedSupervisor { if (this.isKubernetes) { this.resourceMonitor = new KubernetesResourceMonitor(createK8sApi(), ""); this.workloadManager = new KubernetesWorkloadManager({ - workerApiUrl, + workloadApiProtocol, + workloadApiDomain, + workloadApiPort: workloadApiPortExternal, warmStartUrl: this.warmStartUrl, }); } else { this.resourceMonitor = new DockerResourceMonitor(new Docker()); this.workloadManager = new DockerWorkloadManager({ - workerApiUrl, + workloadApiProtocol, + workloadApiDomain, + workloadApiPort: workloadApiPortExternal, warmStartUrl: this.warmStartUrl, }); } @@ -57,6 +63,8 @@ class ManagedSupervisor { instanceName: env.TRIGGER_WORKER_INSTANCE_NAME, managedWorkerSecret: env.MANAGED_WORKER_SECRET, dequeueIntervalMs: env.TRIGGER_DEQUEUE_INTERVAL_MS, + queueConsumerEnabled: env.TRIGGER_DEQUEUE_ENABLED, + runNotificationsEnabled: env.TRIGGER_WORKLOAD_API_ENABLED, preDequeue: async () => { if (this.isKubernetes) { // TODO: Test k8s resource monitor and remove this @@ -180,7 +188,7 @@ class ManagedSupervisor { // Responds to workload requests only this.workloadServer = new WorkloadServer({ - port: env.TRIGGER_WORKLOAD_API_PORT, + port: env.TRIGGER_WORKLOAD_API_PORT_INTERNAL, workerClient: this.workerSession.httpClient, checkpointClient: this.checkpointClient, }); diff --git a/apps/supervisor/src/workloadManager/docker.ts b/apps/supervisor/src/workloadManager/docker.ts index 48c0b9b7f3..950ad2a343 100644 --- a/apps/supervisor/src/workloadManager/docker.ts +++ b/apps/supervisor/src/workloadManager/docker.ts @@ -6,12 +6,18 @@ import { } from "./types.js"; import { x } from "tinyexec"; import { env } from "../env.js"; -import { RunnerId } from "../util.js"; +import { getDockerHostDomain, RunnerId } from "../util.js"; export class DockerWorkloadManager implements WorkloadManager { private readonly logger = new SimpleStructuredLogger("docker-workload-provider"); - constructor(private opts: WorkloadManagerOptions) {} + constructor(private opts: WorkloadManagerOptions) { + if (opts.workloadApiDomain) { + this.logger.warn("[DockerWorkloadProvider] ⚠️ Custom workload API domain", { + domain: opts.workloadApiDomain, + }); + } + } async create(opts: WorkloadManagerCreateOptions) { this.logger.log("[DockerWorkloadProvider] Creating container", { opts }); @@ -24,7 +30,9 @@ export class DockerWorkloadManager implements WorkloadManager { `--env=TRIGGER_ENV_ID=${opts.envId}`, `--env=TRIGGER_RUN_ID=${opts.runFriendlyId}`, `--env=TRIGGER_SNAPSHOT_ID=${opts.snapshotFriendlyId}`, - `--env=TRIGGER_WORKER_API_URL=${this.opts.workerApiUrl}`, + `--env=TRIGGER_SUPERVISOR_API_PROTOCOL=${this.opts.workloadApiProtocol}`, + `--env=TRIGGER_SUPERVISOR_API_PORT=${this.opts.workloadApiPort}`, + `--env=TRIGGER_SUPERVISOR_API_DOMAIN=${this.opts.workloadApiDomain ?? getDockerHostDomain()}`, `--env=TRIGGER_WORKER_INSTANCE_NAME=${env.TRIGGER_WORKER_INSTANCE_NAME}`, `--env=OTEL_EXPORTER_OTLP_ENDPOINT=${env.OTEL_EXPORTER_OTLP_ENDPOINT}`, `--env=TRIGGER_RUNNER_ID=${runnerId}`, diff --git a/apps/supervisor/src/workloadManager/kubernetes.ts b/apps/supervisor/src/workloadManager/kubernetes.ts index ed8dd6ae8c..b4e4a7e19a 100644 --- a/apps/supervisor/src/workloadManager/kubernetes.ts +++ b/apps/supervisor/src/workloadManager/kubernetes.ts @@ -23,6 +23,12 @@ export class KubernetesWorkloadManager implements WorkloadManager { constructor(private opts: WorkloadManagerOptions) { this.k8s = createK8sApi(); + + if (opts.workloadApiDomain) { + this.logger.warn("[KubernetesWorkloadManager] ⚠️ Custom workload API domain", { + domain: opts.workloadApiDomain, + }); + } } async create(opts: WorkloadManagerCreateOptions) { @@ -72,8 +78,26 @@ export class KubernetesWorkloadManager implements WorkloadManager { value: opts.snapshotFriendlyId, }, { - name: "TRIGGER_WORKER_API_URL", - value: this.opts.workerApiUrl, + name: "TRIGGER_SUPERVISOR_API_PROTOCOL", + value: this.opts.workloadApiProtocol, + }, + { + name: "TRIGGER_SUPERVISOR_API_PORT", + value: `${this.opts.workloadApiPort}`, + }, + { + name: "TRIGGER_SUPERVISOR_API_DOMAIN", + ...(this.opts.workloadApiDomain + ? { + value: this.opts.workloadApiDomain, + } + : { + valueFrom: { + fieldRef: { + fieldPath: "status.hostIP", + }, + }, + }), }, { name: "TRIGGER_WORKER_INSTANCE_NAME", diff --git a/apps/supervisor/src/workloadManager/types.ts b/apps/supervisor/src/workloadManager/types.ts index 57874e5334..ea2046b631 100644 --- a/apps/supervisor/src/workloadManager/types.ts +++ b/apps/supervisor/src/workloadManager/types.ts @@ -1,7 +1,9 @@ import { type EnvironmentType, type MachinePreset } from "@trigger.dev/core/v3"; export interface WorkloadManagerOptions { - workerApiUrl: string; + workloadApiProtocol: "http" | "https"; + workloadApiDomain?: string; // If unset, will use orchestrator-specific default + workloadApiPort: number; warmStartUrl?: string; } diff --git a/packages/cli-v3/src/entryPoints/managed-run-controller.ts b/packages/cli-v3/src/entryPoints/managed-run-controller.ts index cf85dce006..a33e8382ff 100644 --- a/packages/cli-v3/src/entryPoints/managed-run-controller.ts +++ b/packages/cli-v3/src/entryPoints/managed-run-controller.ts @@ -36,7 +36,9 @@ const Env = z.object({ NODE_EXTRA_CA_CERTS: z.string().optional(), // Set at runtime - TRIGGER_WORKER_API_URL: z.string().url(), + TRIGGER_SUPERVISOR_API_PROTOCOL: z.enum(["http", "https"]), + TRIGGER_SUPERVISOR_API_DOMAIN: z.string(), + TRIGGER_SUPERVISOR_API_PORT: z.coerce.number(), TRIGGER_WORKLOAD_CONTROLLER_ID: z.string().default(`controller_${randomUUID()}`), TRIGGER_ENV_ID: z.string(), TRIGGER_RUN_ID: z.string().optional(), // This is only useful for cold starts @@ -84,6 +86,8 @@ class ManagedRunController { private readonly snapshotPoller: HeartbeatService; private readonly snapshotPollIntervalSeconds: number; + private readonly workerApiUrl: string; + private state: | { phase: "RUN"; @@ -246,8 +250,10 @@ class ManagedRunController { this.heartbeatIntervalSeconds = opts.heartbeatIntervalSeconds || 30; this.snapshotPollIntervalSeconds = 5; + this.workerApiUrl = `${env.TRIGGER_SUPERVISOR_API_PROTOCOL}://${env.TRIGGER_SUPERVISOR_API_DOMAIN}:${env.TRIGGER_SUPERVISOR_API_PORT}`; + this.httpClient = new WorkloadHttpClient({ - workerApiUrl: env.TRIGGER_WORKER_API_URL, + workerApiUrl: this.workerApiUrl, deploymentId: env.TRIGGER_DEPLOYMENT_ID, runnerId: env.TRIGGER_RUNNER_ID, }); @@ -746,8 +752,7 @@ class ManagedRunController { } createSocket() { - const wsUrl = new URL(env.TRIGGER_WORKER_API_URL); - wsUrl.pathname = "/workload"; + const wsUrl = new URL("/workload", this.workerApiUrl); this.socket = io(wsUrl.href, { transports: ["websocket"], From b0bee1d17463b9334117567aa2e66d04e06789b3 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Fri, 7 Mar 2025 11:46:06 +0000 Subject: [PATCH 09/11] remove default workload api domain --- apps/supervisor/src/env.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index 7786836923..112bc790ff 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -1,7 +1,6 @@ import { randomUUID } from "crypto"; import { env as stdEnv } from "std-env"; import { z } from "zod"; -import { getDockerHostDomain } from "./util.js"; const Env = z.object({ // This will come from `spec.nodeName` in k8s @@ -18,7 +17,7 @@ const Env = z.object({ .string() .transform((s) => z.enum(["http", "https"]).parse(s.toLowerCase())) .default("http"), - TRIGGER_WORKLOAD_API_DOMAIN: z.string().default(getDockerHostDomain()), // If unset, will use orchestrator-specific default + TRIGGER_WORKLOAD_API_DOMAIN: z.string().optional(), // If unset, will use orchestrator-specific default TRIGGER_WORKLOAD_API_PORT_INTERNAL: z.coerce.number().default(8020), // This is the port the workload API listens on TRIGGER_WORKLOAD_API_PORT_EXTERNAL: z.coerce.number().default(8020), // This is the exposed port passed to the run controller From ff0154d773f3413df67762b9a3ea6319e542337d Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Fri, 7 Mar 2025 11:47:09 +0000 Subject: [PATCH 10/11] option to disable workload api --- apps/supervisor/src/index.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/apps/supervisor/src/index.ts b/apps/supervisor/src/index.ts index ebab788970..7ea3f03e0a 100644 --- a/apps/supervisor/src/index.ts +++ b/apps/supervisor/src/index.ts @@ -246,7 +246,17 @@ class ManagedSupervisor { async start() { this.logger.log("[ManagedWorker] Starting up"); - await this.workloadServer.start(); + if (env.TRIGGER_WORKLOAD_API_ENABLED) { + this.logger.log("[ManagedWorker] Workload API enabled", { + protocol: env.TRIGGER_WORKLOAD_API_PROTOCOL, + domain: env.TRIGGER_WORKLOAD_API_DOMAIN, + port: env.TRIGGER_WORKLOAD_API_PORT_INTERNAL, + }); + await this.workloadServer.start(); + } else { + this.logger.warn("[ManagedWorker] Workload API disabled"); + } + await this.workerSession.start(); await this.httpServer.start(); From 94a4ff86c6523d6f090adf1b5cfcf9ab94f6e694 Mon Sep 17 00:00:00 2001 From: nicktrn <55853254+nicktrn@users.noreply.github.com> Date: Fri, 7 Mar 2025 12:11:44 +0000 Subject: [PATCH 11/11] fix bool env var coercion --- apps/supervisor/src/env.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/apps/supervisor/src/env.ts b/apps/supervisor/src/env.ts index 112bc790ff..2b4ca5eb5e 100644 --- a/apps/supervisor/src/env.ts +++ b/apps/supervisor/src/env.ts @@ -2,6 +2,14 @@ import { randomUUID } from "crypto"; import { env as stdEnv } from "std-env"; import { z } from "zod"; +const BoolEnv = z.preprocess((val) => { + if (typeof val !== "string") { + return val; + } + + return ["true", "1"].includes(val.toLowerCase().trim()); +}, z.boolean()); + const Env = z.object({ // This will come from `spec.nodeName` in k8s TRIGGER_WORKER_INSTANCE_NAME: z.string().default(randomUUID()), @@ -12,7 +20,7 @@ const Env = z.object({ MANAGED_WORKER_SECRET: z.string(), // Workload API settings (coordinator mode) - the workload API is what the run controller connects to - TRIGGER_WORKLOAD_API_ENABLED: z.coerce.boolean().default(true), + TRIGGER_WORKLOAD_API_ENABLED: BoolEnv.default("true"), TRIGGER_WORKLOAD_API_PROTOCOL: z .string() .transform((s) => z.enum(["http", "https"]).parse(s.toLowerCase())) @@ -22,7 +30,7 @@ const Env = z.object({ TRIGGER_WORKLOAD_API_PORT_EXTERNAL: z.coerce.number().default(8020), // This is the exposed port passed to the run controller // Dequeue settings (provider mode) - TRIGGER_DEQUEUE_ENABLED: z.coerce.boolean().default(true), + TRIGGER_DEQUEUE_ENABLED: BoolEnv.default("true"), TRIGGER_DEQUEUE_INTERVAL_MS: z.coerce.number().int().default(1000), // Optional services