Skip to content

Commit

Permalink
[server] Only alert on explicit system failures
Browse files Browse the repository at this point in the history
  • Loading branch information
geropl authored and roboquat committed Aug 29, 2022
1 parent 912410c commit fd7720d
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 8 deletions.
6 changes: 5 additions & 1 deletion components/server/src/prometheus-metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,11 @@ const instanceStartsFailedTotal = new prometheusClient.Counter({
registers: [prometheusClient.register],
});

export type FailedInstanceStartReason = "clusterSelectionFailed" | "startOnClusterFailed" | "other";
export type FailedInstanceStartReason =
| "clusterSelectionFailed"
| "startOnClusterFailed"
| "imageBuildFailed"
| "other";
export function increaseFailedInstanceStartCounter(reason: FailedInstanceStartReason) {
instanceStartsFailedTotal.inc({ reason });
}
Expand Down
14 changes: 7 additions & 7 deletions components/server/src/workspace/workspace-starter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -591,12 +591,11 @@ export class WorkspaceStarter {
if (rethrow) {
throw err;
} else {
log.error("error starting instance", err, { instanceId: instance.id });
let failedReason: FailedInstanceStartReason = "other";
TraceContext.setError({ span }, err);
log.error({ userId: user.id, instanceId: instance.id }, "error starting instance", err);
if (err instanceof StartInstanceError) {
failedReason = err.reason;
increaseFailedInstanceStartCounter(err.reason);
}
increaseFailedInstanceStartCounter(failedReason);
}

return { instanceID: instance.id };
Expand Down Expand Up @@ -765,7 +764,7 @@ export class WorkspaceStarter {
ideConfig: IDEConfig,
pvcEnabledForPrebuilds: boolean,
): Promise<WorkspaceInstance> {
const span = TraceContext.startSpan("buildWorkspaceImage", ctx);
const span = TraceContext.startSpan("newInstance", ctx);
//#endregion IDE resolution TODO(ak) move to IDE service
// TODO: Compatible with ide-config not deployed, need revert after ide-config deployed
delete ideConfig.ideOptions.options["code-latest"];
Expand Down Expand Up @@ -1264,18 +1263,19 @@ export class WorkspaceStarter {

TraceContext.setError({ span }, err);
const looksLikeUserError = (msg: string): boolean => {
return msg.startsWith("build failed:");
return msg.startsWith("build failed:") || msg.startsWith("headless task failed:");
};
if (looksLikeUserError(message)) {
log.debug(
{ instanceId: instance.id, userId: user.id, workspaceId: workspace.id },
`workspace image build failed: ${message}`,
);
} else {
log.warn(
log.error(
{ instanceId: instance.id, userId: user.id, workspaceId: workspace.id },
`workspace image build failed: ${message}`,
);
err = new StartInstanceError("imageBuildFailed", err);
}
this.analytics.track({
userId: user.id,
Expand Down

0 comments on commit fd7720d

Please sign in to comment.