From fd7720d5baf9ffecde42b4bb70f6165d25cb276b Mon Sep 17 00:00:00 2001 From: Gero Posmyk-Leinemann Date: Mon, 29 Aug 2022 16:02:23 +0000 Subject: [PATCH] [server] Only alert on explicit system failures --- components/server/src/prometheus-metrics.ts | 6 +++++- .../server/src/workspace/workspace-starter.ts | 14 +++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/components/server/src/prometheus-metrics.ts b/components/server/src/prometheus-metrics.ts index 1b3e79df359458..206d9787ad96e3 100644 --- a/components/server/src/prometheus-metrics.ts +++ b/components/server/src/prometheus-metrics.ts @@ -148,7 +148,11 @@ const instanceStartsFailedTotal = new prometheusClient.Counter({ registers: [prometheusClient.register], }); -export type FailedInstanceStartReason = "clusterSelectionFailed" | "startOnClusterFailed" | "other"; +export type FailedInstanceStartReason = + | "clusterSelectionFailed" + | "startOnClusterFailed" + | "imageBuildFailed" + | "other"; export function increaseFailedInstanceStartCounter(reason: FailedInstanceStartReason) { instanceStartsFailedTotal.inc({ reason }); } diff --git a/components/server/src/workspace/workspace-starter.ts b/components/server/src/workspace/workspace-starter.ts index 108db903785f38..473944d2b4d040 100644 --- a/components/server/src/workspace/workspace-starter.ts +++ b/components/server/src/workspace/workspace-starter.ts @@ -591,12 +591,11 @@ export class WorkspaceStarter { if (rethrow) { throw err; } else { - log.error("error starting instance", err, { instanceId: instance.id }); - let failedReason: FailedInstanceStartReason = "other"; + TraceContext.setError({ span }, err); + log.error({ userId: user.id, instanceId: instance.id }, "error starting instance", err); if (err instanceof StartInstanceError) { - failedReason = err.reason; + increaseFailedInstanceStartCounter(err.reason); } - increaseFailedInstanceStartCounter(failedReason); } return { instanceID: instance.id }; @@ -765,7 +764,7 @@ export class WorkspaceStarter { ideConfig: IDEConfig, pvcEnabledForPrebuilds: boolean, ): Promise { - const span = TraceContext.startSpan("buildWorkspaceImage", ctx); + const span = TraceContext.startSpan("newInstance", ctx); //#endregion IDE resolution TODO(ak) move to IDE service // TODO: Compatible with ide-config not deployed, need revert after ide-config deployed delete ideConfig.ideOptions.options["code-latest"]; @@ -1264,7 +1263,7 @@ export class WorkspaceStarter { TraceContext.setError({ span }, err); const looksLikeUserError = (msg: string): boolean => { - return msg.startsWith("build failed:"); + return msg.startsWith("build failed:") || msg.startsWith("headless task failed:"); }; if (looksLikeUserError(message)) { log.debug( @@ -1272,10 +1271,11 @@ export class WorkspaceStarter { `workspace image build failed: ${message}`, ); } else { - log.warn( + log.error( { instanceId: instance.id, userId: user.id, workspaceId: workspace.id }, `workspace image build failed: ${message}`, ); + err = new StartInstanceError("imageBuildFailed", err); } this.analytics.track({ userId: user.id,