diff --git a/core/services/workflows/engine.go b/core/services/workflows/engine.go index f1786d113da..8e98405eef1 100644 --- a/core/services/workflows/engine.go +++ b/core/services/workflows/engine.go @@ -689,7 +689,7 @@ func (e *Engine) finishExecution(ctx context.Context, executionID string, status e.stepUpdatesChMap.remove(executionID) metrics.updateTotalWorkflowsGauge(ctx, e.stepUpdatesChMap.len()) metrics.updateWorkflowExecutionLatencyGauge(ctx, executionDuration) - metrics.incrementWorkflowExecutionFinishedCounter(ctx) + metrics.incrementWorkflowExecutionFinishedCounter(ctx) // maybe just decide on `success` vs `failure` here based on status e.onExecutionFinished(executionID) return nil } @@ -733,6 +733,7 @@ func (e *Engine) worker(ctx context.Context) { if err != nil { e.logger.With(platform.KeyWorkflowExecutionID, executionID).Errorf("failed to start execution: %v", err) logCustMsg(ctx, cma, fmt.Sprintf("failed to start execution: %s", err), e.logger) + e.metrics.with(platform.KeyTriggerID, te.ID).incrementTriggerWorkflowStarterErrorCounter(ctx) } else { e.logger.With(platform.KeyWorkflowExecutionID, executionID).Debug("execution started") logCustMsg(ctx, cma, "execution started", e.logger) diff --git a/core/services/workflows/monitoring.go b/core/services/workflows/monitoring.go index 18d7d8fe179..f450f52a20b 100644 --- a/core/services/workflows/monitoring.go +++ b/core/services/workflows/monitoring.go @@ -13,6 +13,7 @@ import ( ) var registerTriggerFailureCounter metric.Int64Counter +var triggerWorkflowStarterErrorCounter metric.Int64Counter var workflowsRunningGauge metric.Int64Gauge var capabilityInvocationCounter metric.Int64Counter var capabilityFailureCounter metric.Int64Counter @@ -32,6 +33,11 @@ func initMonitoringResources() (err error) { return fmt.Errorf("failed to register trigger failure counter: %w", err) } + triggerWorkflowStarterErrorCounter, err = beholder.GetMeter().Int64Counter("platform_engine_triggerworkflow_starter_errors") + if err != nil { + return fmt.Errorf("failed to register trigger workflow starter error counter: %w", err) + } + workflowsRunningGauge, err = beholder.GetMeter().Int64Gauge("platform_engine_workflow_count") if err != nil { return fmt.Errorf("failed to register workflows running gauge: %w", err) @@ -110,6 +116,11 @@ func (c workflowsMetricLabeler) incrementRegisterTriggerFailureCounter(ctx conte registerTriggerFailureCounter.Add(ctx, 1, metric.WithAttributes(otelLabels...)) } +func (c workflowsMetricLabeler) incrementTriggerWorkflowStarterErrorCounter(ctx context.Context) { + otelLabels := localMonitoring.KvMapToOtelAttributes(c.Labels) + triggerWorkflowStarterErrorCounter.Add(ctx, 1, metric.WithAttributes(otelLabels...)) +} + func (c workflowsMetricLabeler) incrementCapabilityInvocationCounter(ctx context.Context) { otelLabels := localMonitoring.KvMapToOtelAttributes(c.Labels) capabilityInvocationCounter.Add(ctx, 1, metric.WithAttributes(otelLabels...))