Skip to content

Commit

Permalink
Addressed Comments From @jessesuen
Browse files Browse the repository at this point in the history
Signed-off-by: Rohit Agrawal <rohit.agrawal@databricks.com>
  • Loading branch information
agrawroh committed Nov 30, 2021
1 parent f390173 commit aa6b320
Show file tree
Hide file tree
Showing 15 changed files with 289 additions and 337 deletions.
61 changes: 41 additions & 20 deletions analysis/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func (c *Controller) reconcileAnalysisRun(origRun *v1alpha1.AnalysisRun) *v1alph
return run
}

err = analysisutil.ValidateDryRunMetrics(resolvedMetrics, run.Spec.DryRun)
dryRunMetricsMap, err := analysisutil.GetDryRunMetrics(run.Spec.DryRun, resolvedMetrics)
if err != nil {
message := fmt.Sprintf("Analysis spec invalid: %v", err)
logger.Warn(message)
Expand All @@ -82,7 +82,7 @@ func (c *Controller) reconcileAnalysisRun(origRun *v1alpha1.AnalysisRun) *v1alph

tasks := generateMetricTasks(run, resolvedMetrics)
logger.Infof("Taking %d Measurement(s)...", len(tasks))
err = c.runMeasurements(run, tasks)
err = c.runMeasurements(run, tasks, dryRunMetricsMap)
if err != nil {
message := fmt.Sprintf("Unable to resolve metric arguments: %v", err)
logger.Warn(message)
Expand All @@ -92,7 +92,7 @@ func (c *Controller) reconcileAnalysisRun(origRun *v1alpha1.AnalysisRun) *v1alph
return run
}

newStatus, newMessage := c.assessRunStatus(run, resolvedMetrics)
newStatus, newMessage := c.assessRunStatus(run, resolvedMetrics, dryRunMetricsMap)
if newStatus != run.Status.Phase {
run.Status.Phase = newStatus
run.Status.Message = newMessage
Expand Down Expand Up @@ -289,7 +289,7 @@ func (c *Controller) resolveArgs(tasks []metricTask, args []v1alpha1.Argument, n
}

// runMeasurements iterates a list of metric tasks, and runs, resumes, or terminates measurements
func (c *Controller) runMeasurements(run *v1alpha1.AnalysisRun, tasks []metricTask) error {
func (c *Controller) runMeasurements(run *v1alpha1.AnalysisRun, tasks []metricTask, dryRunMetricsMap map[string]bool) error {
var wg sync.WaitGroup
// resultsLock should be held whenever we are accessing or setting status.metricResults since
// we are performing queries in parallel
Expand All @@ -315,12 +315,11 @@ func (c *Controller) runMeasurements(run *v1alpha1.AnalysisRun, tasks []metricTa
metricResult := analysisutil.GetResult(run, t.metric.Name)
resultsLock.Unlock()

dryRunMetricsMap, everythingRunningInDryRun := analysisutil.GetDryRunMetricNames(run.Spec.DryRun)
if metricResult == nil {
metricResult = &v1alpha1.MetricResult{
Name: t.metric.Name,
Phase: v1alpha1.AnalysisPhaseRunning,
DryRun: everythingRunningInDryRun || dryRunMetricsMap[t.metric.Name],
DryRun: dryRunMetricsMap[t.metric.Name],
}
}

Expand Down Expand Up @@ -405,12 +404,11 @@ func (c *Controller) runMeasurements(run *v1alpha1.AnalysisRun, tasks []metricTa
// assessRunStatus assesses the overall status of this AnalysisRun
// If any metric is not yet completed, the AnalysisRun is still considered Running
// Once all metrics are complete, the worst status is used as the overall AnalysisRun status
func (c *Controller) assessRunStatus(run *v1alpha1.AnalysisRun, metrics []v1alpha1.Metric) (v1alpha1.AnalysisPhase, string) {
func (c *Controller) assessRunStatus(run *v1alpha1.AnalysisRun, metrics []v1alpha1.Metric, dryRunMetricsMap map[string]bool) (v1alpha1.AnalysisPhase, string) {
var worstStatus v1alpha1.AnalysisPhase
var worstMessage string
terminating := analysisutil.IsTerminating(run)
everythingCompleted := true
dryRunMetricsMap, everythingRunningInDryRun := analysisutil.GetDryRunMetricNames(run.Spec.DryRun)

if run.Status.StartedAt == nil {
now := metav1.Now()
Expand All @@ -420,20 +418,29 @@ func (c *Controller) assessRunStatus(run *v1alpha1.AnalysisRun, metrics []v1alph
worstMessage = "Run Terminated"
}

// Initialize Dry-Run summary object.
dryRunSummary := v1alpha1.DryRunSummary{
// Initialize Run & Dry-Run summary object
runSummary := v1alpha1.RunSummary{
Count: 0,
Successful: 0,
Failed: 0,
Inconclusive: 0,
Error: 0,
DryRun: v1alpha1.DryRunSummary{
Count: 0,
Successful: 0,
Failed: 0,
Inconclusive: 0,
Error: 0,
},
}

// Iterate all metrics and update `MetricResult.Phase` fields based on latest measurement(s)
for _, metric := range metrics {
if everythingRunningInDryRun || dryRunMetricsMap[metric.Name] {
if dryRunMetricsMap[metric.Name] {
log.Infof("Metric '%s' is running in the Dry-Run mode.", metric.Name)
dryRunSummary.Count++
runSummary.DryRun.Count++
} else {
runSummary.Count++
}
if result := analysisutil.GetResult(run, metric.Name); result != nil {
logger := logutil.WithAnalysisRun(run).WithField("metric", metric.Name)
Expand Down Expand Up @@ -461,7 +468,7 @@ func (c *Controller) assessRunStatus(run *v1alpha1.AnalysisRun, metrics []v1alph
phase, message := assessMetricFailureInconclusiveOrError(metric, *result)
// NOTE: We don't care about the status if the metric is marked as a Dry-Run
// otherwise, remember the worst status of all completed metric results
if !(everythingRunningInDryRun || dryRunMetricsMap[metric.Name]) {
if !dryRunMetricsMap[metric.Name] {
if worstStatus == "" || analysisutil.IsWorse(worstStatus, metricStatus) {
worstStatus = metricStatus
if message != "" {
Expand All @@ -471,6 +478,20 @@ func (c *Controller) assessRunStatus(run *v1alpha1.AnalysisRun, metrics []v1alph
}
}
}
// Update Run Summary
switch phase {
case v1alpha1.AnalysisPhaseError:
runSummary.Error++
case v1alpha1.AnalysisPhaseFailed:
runSummary.Failed++
case v1alpha1.AnalysisPhaseInconclusive:
runSummary.Inconclusive++
case v1alpha1.AnalysisPhaseSuccessful:
runSummary.Successful++
default:
// We'll mark the status as success by default if it doesn't match anything.
runSummary.Successful++
}
} else {
// Update metric result message
if message != "" {
Expand All @@ -482,19 +503,19 @@ func (c *Controller) assessRunStatus(run *v1alpha1.AnalysisRun, metrics []v1alph
}
analysisutil.SetResult(run, *result)
}
// Update DryRun results
// Update DryRun Summary
switch phase {
case v1alpha1.AnalysisPhaseError:
dryRunSummary.Error++
runSummary.DryRun.Error++
case v1alpha1.AnalysisPhaseFailed:
dryRunSummary.Failed++
runSummary.DryRun.Failed++
case v1alpha1.AnalysisPhaseInconclusive:
dryRunSummary.Inconclusive++
runSummary.DryRun.Inconclusive++
case v1alpha1.AnalysisPhaseSuccessful:
dryRunSummary.Successful++
runSummary.DryRun.Successful++
default:
// We'll mark the status as success by default if it doesn't match anything.
dryRunSummary.Successful++
runSummary.DryRun.Successful++
}
}
}
Expand All @@ -505,7 +526,7 @@ func (c *Controller) assessRunStatus(run *v1alpha1.AnalysisRun, metrics []v1alph
}
// Append Dry-Run metrics results if any.
worstMessage = strings.TrimSpace(worstMessage)
run.Status.DryRunSummary = dryRunSummary
run.Status.RunSummary = runSummary
if terminating {
if worstStatus == "" {
// we have yet to take a single measurement, but have already been instructed to stop
Expand Down
24 changes: 12 additions & 12 deletions analysis/analysis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ func TestAssessRunStatus(t *testing.T) {
},
},
}
status, message := c.assessRunStatus(run, run.Spec.Metrics)
status, message := c.assessRunStatus(run, run.Spec.Metrics, map[string]bool{})
assert.Equal(t, v1alpha1.AnalysisPhaseRunning, status)
assert.Equal(t, "", message)
}
Expand All @@ -466,7 +466,7 @@ func TestAssessRunStatus(t *testing.T) {
},
},
}
status, message := c.assessRunStatus(run, run.Spec.Metrics)
status, message := c.assessRunStatus(run, run.Spec.Metrics, map[string]bool{})
assert.Equal(t, v1alpha1.AnalysisPhaseFailed, status)
assert.Equal(t, "", message)
}
Expand Down Expand Up @@ -520,7 +520,7 @@ func TestAssessRunStatusUpdateResult(t *testing.T) {
},
},
}
status, message := c.assessRunStatus(run, run.Spec.Metrics)
status, message := c.assessRunStatus(run, run.Spec.Metrics, map[string]bool{})
assert.Equal(t, v1alpha1.AnalysisPhaseRunning, status)
assert.Equal(t, "", message)
assert.Equal(t, v1alpha1.AnalysisPhaseFailed, run.Status.MetricResults[1].Phase)
Expand Down Expand Up @@ -1419,8 +1419,8 @@ func StartAssessRunStatusErrorMessageAnalysisPhaseFail(t *testing.T, isDryRun bo

run := newTerminatingRun(v1alpha1.AnalysisPhaseFailed, isDryRun)
run.Status.MetricResults[0].Phase = v1alpha1.AnalysisPhaseSuccessful
status, message := c.assessRunStatus(run, run.Spec.Metrics)
return status, message, run.Status.DryRunSummary
status, message := c.assessRunStatus(run, run.Spec.Metrics, map[string]bool{"run-forever": isDryRun, "failed-metric": isDryRun})
return status, message, run.Status.RunSummary.DryRun
}

func TestAssessRunStatusErrorMessageAnalysisPhaseFail(t *testing.T) {
Expand Down Expand Up @@ -1459,8 +1459,8 @@ func StartAssessRunStatusErrorMessageFromProvider(t *testing.T, providerMessage
run := newTerminatingRun(v1alpha1.AnalysisPhaseFailed, isDryRun)
run.Status.MetricResults[0].Phase = v1alpha1.AnalysisPhaseSuccessful // All metrics must complete, or assessRunStatus will not return message
run.Status.MetricResults[1].Message = providerMessage
status, message := c.assessRunStatus(run, run.Spec.Metrics)
return status, message, run.Status.DryRunSummary
status, message := c.assessRunStatus(run, run.Spec.Metrics, map[string]bool{"run-forever": isDryRun, "failed-metric": isDryRun})
return status, message, run.Status.RunSummary.DryRun
}

// TestAssessRunStatusErrorMessageFromProvider verifies that the message returned by assessRunStatus
Expand Down Expand Up @@ -1504,8 +1504,8 @@ func StartAssessRunStatusMultipleFailures(t *testing.T, isDryRun bool) (v1alpha1
run := newTerminatingRun(v1alpha1.AnalysisPhaseFailed, isDryRun)
run.Status.MetricResults[0].Phase = v1alpha1.AnalysisPhaseFailed
run.Status.MetricResults[0].Failed = 1
status, message := c.assessRunStatus(run, run.Spec.Metrics)
return status, message, run.Status.DryRunSummary
status, message := c.assessRunStatus(run, run.Spec.Metrics, map[string]bool{"run-forever": isDryRun, "failed-metric": isDryRun})
return status, message, run.Status.RunSummary.DryRun
}

// TestAssessRunStatusMultipleFailures verifies that if there are multiple failed metrics, assessRunStatus returns the message
Expand Down Expand Up @@ -1571,7 +1571,7 @@ func TestAssessRunStatusWorstMessageInReconcileAnalysisRunInDryRunMode(t *testin
Inconclusive: 0,
Error: 0,
}
assert.Equal(t, expectedDryRunSummary, newRun.Status.DryRunSummary)
assert.Equal(t, expectedDryRunSummary, newRun.Status.RunSummary.DryRun)
assert.Equal(t, "Metric assessed Failed due to failed (1) > failureLimit (0)", newRun.Status.MetricResults[0].Message)
assert.Equal(t, "Metric assessed Failed due to failed (1) > failureLimit (0)", newRun.Status.MetricResults[1].Message)
}
Expand Down Expand Up @@ -1633,7 +1633,7 @@ func TestTerminateAnalysisRunInDryRunMode(t *testing.T) {
Inconclusive: 0,
Error: 0,
}
assert.Equal(t, expectedDryRunSummary, newRun.Status.DryRunSummary)
assert.Equal(t, expectedDryRunSummary, newRun.Status.RunSummary.DryRun)
}

func TestInvalidDryRunConfigThrowsError(t *testing.T) {
Expand Down Expand Up @@ -1674,5 +1674,5 @@ func TestInvalidDryRunConfigThrowsError(t *testing.T) {
}
newRun := c.reconcileAnalysisRun(run)
assert.Equal(t, v1alpha1.AnalysisPhaseError, newRun.Status.Phase)
assert.Equal(t, "Analysis spec invalid: dryRun[0]: Invalid metric name 'error-rate'", newRun.Status.Message)
assert.Equal(t, "Analysis spec invalid: dryRun[0]: Rule didn't match any metric name(s)", newRun.Status.Message)
}
16 changes: 8 additions & 8 deletions controller/metrics/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,21 +82,21 @@ func collectAnalysisRuns(ch chan<- prometheus.Metric, ar *v1alpha1.AnalysisRun)
addGauge(MetricAnalysisRunPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhaseRunning), string(v1alpha1.AnalysisPhaseRunning))
addGauge(MetricAnalysisRunPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhaseInconclusive), string(v1alpha1.AnalysisPhaseInconclusive))

dryRunMetricsMap, everythingRunningInDryRun := analysisutil.GetDryRunMetricNames(ar.Spec.DryRun)
dryRunMetricsMap, _ := analysisutil.GetDryRunMetrics(ar.Spec.DryRun, ar.Spec.Metrics)
for _, metric := range ar.Spec.Metrics {
metricType := metricproviders.Type(metric)
metricResult := analysisutil.GetResult(ar, metric.Name)
addGauge(MetricAnalysisRunMetricType, 1, metric.Name, metricType, fmt.Sprint(everythingRunningInDryRun || dryRunMetricsMap[metric.Name]))
addGauge(MetricAnalysisRunMetricType, 1, metric.Name, metricType)
calculatedPhase := v1alpha1.AnalysisPhase("")
if metricResult != nil {
calculatedPhase = metricResult.Phase
}
addGauge(MetricAnalysisRunMetricPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhasePending || calculatedPhase == ""), metric.Name, metricType, fmt.Sprint(everythingRunningInDryRun || dryRunMetricsMap[metric.Name]), string(v1alpha1.AnalysisPhasePending))
addGauge(MetricAnalysisRunMetricPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhaseError), metric.Name, metricType, fmt.Sprint(everythingRunningInDryRun || dryRunMetricsMap[metric.Name]), string(v1alpha1.AnalysisPhaseError))
addGauge(MetricAnalysisRunMetricPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhaseFailed), metric.Name, metricType, fmt.Sprint(everythingRunningInDryRun || dryRunMetricsMap[metric.Name]), string(v1alpha1.AnalysisPhaseFailed))
addGauge(MetricAnalysisRunMetricPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhaseSuccessful), metric.Name, metricType, fmt.Sprint(everythingRunningInDryRun || dryRunMetricsMap[metric.Name]), string(v1alpha1.AnalysisPhaseSuccessful))
addGauge(MetricAnalysisRunMetricPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhaseRunning), metric.Name, metricType, fmt.Sprint(everythingRunningInDryRun || dryRunMetricsMap[metric.Name]), string(v1alpha1.AnalysisPhaseRunning))
addGauge(MetricAnalysisRunMetricPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhaseInconclusive), metric.Name, metricType, fmt.Sprint(everythingRunningInDryRun || dryRunMetricsMap[metric.Name]), string(v1alpha1.AnalysisPhaseInconclusive))
addGauge(MetricAnalysisRunMetricPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhasePending || calculatedPhase == ""), metric.Name, metricType, fmt.Sprint(dryRunMetricsMap[metric.Name]), string(v1alpha1.AnalysisPhasePending))
addGauge(MetricAnalysisRunMetricPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhaseError), metric.Name, metricType, fmt.Sprint(dryRunMetricsMap[metric.Name]), string(v1alpha1.AnalysisPhaseError))
addGauge(MetricAnalysisRunMetricPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhaseFailed), metric.Name, metricType, fmt.Sprint(dryRunMetricsMap[metric.Name]), string(v1alpha1.AnalysisPhaseFailed))
addGauge(MetricAnalysisRunMetricPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhaseSuccessful), metric.Name, metricType, fmt.Sprint(dryRunMetricsMap[metric.Name]), string(v1alpha1.AnalysisPhaseSuccessful))
addGauge(MetricAnalysisRunMetricPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhaseRunning), metric.Name, metricType, fmt.Sprint(dryRunMetricsMap[metric.Name]), string(v1alpha1.AnalysisPhaseRunning))
addGauge(MetricAnalysisRunMetricPhase, boolFloat64(calculatedPhase == v1alpha1.AnalysisPhaseInconclusive), metric.Name, metricType, fmt.Sprint(dryRunMetricsMap[metric.Name]), string(v1alpha1.AnalysisPhaseInconclusive))
}
}

Expand Down
14 changes: 7 additions & 7 deletions controller/metrics/analysis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,15 @@ const expectedAnalysisRunResponse = `# HELP analysis_run_info Information about
analysis_run_info{name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Error"} 1
# HELP analysis_run_metric_phase Information on the duration of a specific metric in the Analysis Run
# TYPE analysis_run_metric_phase gauge
analysis_run_metric_phase{dryRun="false",metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Error",type="Web"} 1
analysis_run_metric_phase{dryRun="false",metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Failed",type="Web"} 0
analysis_run_metric_phase{dryRun="false",metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Inconclusive",type="Web"} 0
analysis_run_metric_phase{dryRun="false",metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Pending",type="Web"} 0
analysis_run_metric_phase{dryRun="false",metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Running",type="Web"} 0
analysis_run_metric_phase{dryRun="false",metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Successful",type="Web"} 0
analysis_run_metric_phase{dry_run="false",metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Error",type="Web"} 1
analysis_run_metric_phase{dry_run="false",metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Failed",type="Web"} 0
analysis_run_metric_phase{dry_run="false",metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Inconclusive",type="Web"} 0
analysis_run_metric_phase{dry_run="false",metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Pending",type="Web"} 0
analysis_run_metric_phase{dry_run="false",metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Running",type="Web"} 0
analysis_run_metric_phase{dry_run="false",metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Successful",type="Web"} 0
# HELP analysis_run_metric_type Information on the type of a specific metric in the Analysis Runs
# TYPE analysis_run_metric_type gauge
analysis_run_metric_type{dryRun="false",metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",type="Web"} 1
analysis_run_metric_type{metric="webmetric",name="http-benchmark-test-tr8rn",namespace="jesse-test",type="Web"} 1
# HELP analysis_run_phase Information on the state of the Analysis Run (DEPRECATED - use analysis_run_info)
# TYPE analysis_run_phase gauge
analysis_run_phase{name="http-benchmark-test-tr8rn",namespace="jesse-test",phase="Error"} 1
Expand Down
4 changes: 2 additions & 2 deletions controller/metrics/prommetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,14 @@ var (
MetricAnalysisRunMetricType = prometheus.NewDesc(
"analysis_run_metric_type",
"Information on the type of a specific metric in the Analysis Runs",
append(namespaceNameLabels, "metric", "type", "dryRun"),
append(namespaceNameLabels, "metric", "type"),
nil,
)

MetricAnalysisRunMetricPhase = prometheus.NewDesc(
"analysis_run_metric_phase",
"Information on the duration of a specific metric in the Analysis Run",
append(namespaceNameLabels, "metric", "type", "dryRun", "phase"),
append(namespaceNameLabels, "metric", "type", "dry_run", "phase"),
nil,
)
)
Expand Down
20 changes: 10 additions & 10 deletions docs/features/analysis.md
Original file line number Diff line number Diff line change
Expand Up @@ -581,12 +581,12 @@ evaluation of the metric to monitor the 5XX error-rate fail, the analysis run wi
))
```

A wildcard '*' can be used to make all the metrics run in the dry-run mode. In the following example, even if one or
both metrics fail, the analysis run will pass.
RegEx matches are also supported. `.*` can be used to make all the metrics run in the dry-run mode. In the following
example, even if one or both metrics fail, the analysis run will pass.

```yaml hl_lines="1 2"
dryRun:
- metricName: *
- metricName: .*
metrics:
- name: total-5xx-errors
interval: 5m
Expand Down Expand Up @@ -618,14 +618,14 @@ If one or more metrics are running in the dry-run mode, the summary of the dry-r
run message. Assuming that the `total-4xx-errors` metric fails in the above example but, the `total-5xx-errors`
succeeds, the final dry-run summary will look like this.

```yaml hl_lines="2 3 4 5 6 7"
```yaml hl_lines="4 5 6 7"
Message: Run Terminated
Dry-Run Summary:
Count: 2
Successful: 1
Failed: 1
Inconclusive: 0
Error: 0
Run Summary:
...
Dry Run:
Count: 2
Successful: 1
Failed: 1
Metric Results:
...
```
Expand Down
Loading

0 comments on commit aa6b320

Please sign in to comment.