diff --git a/components/ws-manager-bridge/src/app-cluster-instance-controller.ts b/components/ws-manager-bridge/src/app-cluster-instance-controller.ts new file mode 100644 index 00000000000000..7b4587327a3d87 --- /dev/null +++ b/components/ws-manager-bridge/src/app-cluster-instance-controller.ts @@ -0,0 +1,86 @@ +/** + * Copyright (c) 2022 Gitpod GmbH. All rights reserved. + * Licensed under the GNU Affero General Public License (AGPL). + * See License-AGPL.txt in the project root for license information. + */ + +import { WorkspaceDB } from "@gitpod/gitpod-db/lib/workspace-db"; +import { Disposable, DisposableCollection } from "@gitpod/gitpod-protocol"; +import { log } from "@gitpod/gitpod-protocol/lib/util/logging"; +import { repeat } from "@gitpod/gitpod-protocol/lib/util/repeat"; +import { TraceContext } from "@gitpod/gitpod-protocol/lib/util/tracing"; +import { inject, injectable } from "inversify"; +import { Configuration } from "./config"; +import { WorkspaceInstanceController } from "./workspace-instance-controller"; + +/** + * The WorkspaceInstance lifecycle is split between application clusters and workspace clusters on the transition from + * pending/building -> starting (cmp. WorkspacePhases here: + * https://github.com/gitpod-io/gitpod/blob/008ea3fadc89d4817cf3effc8a5b30eaf469fb1c/components/gitpod-protocol/src/workspace-instance.ts#L111). + * + * Before the transition, WorkspaceInstances belong to the respective app cluster, denoted by "instance.region === 'eu02'", for exmaple. + * After a WorkspaceInstance has been moved over to a workspace cluster, that moved "ownership" is reflected in said field. + * We maintain a constant connection (called "bridge") to all workspace clusters to be able to keep reality (workspace + * side) in sync with what we have in our DB/forward to clients. + * + * This class is meant to take the same responsibility for all WorkspaceInstances that have not (yet) been passed over + * to a workspace cluster for whatever reason. Here's a list of examples, prefixed by phase: + * - "preparing": failed cleanup after failed call to wsManager.StartWorkspace + * - "building": failed cleanup after failed image-build (which is still controlled by the application cluster, + * although that might change in the future) + */ +@injectable() +export class AppClusterWorkspaceInstancesController implements Disposable { + @inject(Configuration) protected readonly config: Configuration; + + @inject(WorkspaceDB) protected readonly workspaceDb: WorkspaceDB; + + @inject(WorkspaceInstanceController) protected readonly workspaceInstanceController: WorkspaceInstanceController; + + protected readonly dispoables = new DisposableCollection(); + + public async start() { + const disposable = repeat( + async () => this.controlAppClusterManagedWorkspaceInstances(), + this.config.controllerIntervalSeconds * 1000, + ); + this.dispoables.push(disposable); + } + + protected async controlAppClusterManagedWorkspaceInstances() { + const appClusterInstallation = this.config.installation; + + const span = TraceContext.startSpan("controlAppClusterManagedWorkspaceInstances"); + const ctx = { span }; + try { + log.info("Controlling app cluster instances", { installation: appClusterInstallation }); + + const notStoppedInstances = await this.workspaceDb.findRunningInstancesWithWorkspaces( + appClusterInstallation, + undefined, + false, + ); + await this.workspaceInstanceController.controlNotStoppedAppClusterManagedInstanceTimeouts( + ctx, + notStoppedInstances, + appClusterInstallation, + ); + + log.info("Done controlling app cluster instances", { + installation: appClusterInstallation, + instancesCount: notStoppedInstances.length, + }); + } catch (err) { + log.error("Error controlling app cluster instances", err, { + installation: appClusterInstallation, + }); + TraceContext.setError(ctx, err); + } finally { + span.finish(); + } + } + + public dispose() { + this.dispoables.dispose(); + } +} diff --git a/components/ws-manager-bridge/src/container-module.ts b/components/ws-manager-bridge/src/container-module.ts index 0fab040fbefd4f..98d2bc7f33cf56 100644 --- a/components/ws-manager-bridge/src/container-module.ts +++ b/components/ws-manager-bridge/src/container-module.ts @@ -39,6 +39,7 @@ import { Client } from "@gitpod/gitpod-protocol/lib/experiments/types"; import { getExperimentsClientForBackend } from "@gitpod/gitpod-protocol/lib/experiments/configcat-server"; import { ClusterSyncService } from "./cluster-sync-service"; import { WorkspaceInstanceController, WorkspaceInstanceControllerImpl } from "./workspace-instance-controller"; +import { AppClusterWorkspaceInstancesController } from "./app-cluster-instance-controller"; export const containerModule = new ContainerModule((bind) => { bind(MessagebusConfiguration).toSelf().inSingletonScope(); @@ -95,4 +96,6 @@ export const containerModule = new ContainerModule((bind) => { // transient to make sure we're creating a separate instance every time we ask for it bind(WorkspaceInstanceController).to(WorkspaceInstanceControllerImpl).inTransientScope(); + + bind(AppClusterWorkspaceInstancesController).toSelf().inSingletonScope(); }); diff --git a/components/ws-manager-bridge/src/main.ts b/components/ws-manager-bridge/src/main.ts index 7fbf4b8c4b5ef3..8c3a9602bcbf07 100644 --- a/components/ws-manager-bridge/src/main.ts +++ b/components/ws-manager-bridge/src/main.ts @@ -15,6 +15,7 @@ import { TracingManager } from "@gitpod/gitpod-protocol/lib/util/tracing"; import { ClusterServiceServer } from "./cluster-service-server"; import { BridgeController } from "./bridge-controller"; import { ClusterSyncService } from "./cluster-sync-service"; +import { AppClusterWorkspaceInstancesController } from "./app-cluster-instance-controller"; log.enableJSONLogging("ws-manager-bridge", undefined, LogrusLogLevel.getFromEnv()); @@ -52,6 +53,11 @@ export const start = async (container: Container) => { const clusterSyncService = container.get(ClusterSyncService); clusterSyncService.start(); + const appClusterInstanceController = container.get( + AppClusterWorkspaceInstancesController, + ); + appClusterInstanceController.start(); + process.on("SIGTERM", async () => { log.info("SIGTERM received, stopping"); bridgeController.dispose(); @@ -64,6 +70,7 @@ export const start = async (container: Container) => { }); } clusterServiceServer.stop().then(() => log.info("gRPC shutdown completed")); + appClusterInstanceController.dispose(); }); log.info("ws-manager-bridge is up and running"); await new Promise((rs, rj) => {});