Skip to content

Commit

Permalink
feat: add audit_last_run_end_time metric
Browse files Browse the repository at this point in the history
Signed-off-by: Viktor Oreshkin <imselfish@stek29.rocks>
  • Loading branch information
stek29 committed Aug 22, 2022
1 parent 8f6d95a commit 95c04b2
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 22 deletions.
6 changes: 5 additions & 1 deletion pkg/audit/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,14 @@ func (am *Manager) audit(ctx context.Context) error {
// record audit latency
defer func() {
logFinish(am.log)
latency := time.Since(startTime)
endTime := time.Now()
latency := endTime.Sub(startTime)
if err := am.reporter.reportLatency(latency); err != nil {
am.log.Error(err, "failed to report latency")
}
if err := am.reporter.reportRunEnd(endTime); err != nil {
am.log.Error(err, "failed to report run end time")
}
}()

if err := am.reporter.reportRunStart(startTime); err != nil {
Expand Down
38 changes: 27 additions & 11 deletions pkg/audit/stats_reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,17 @@ import (
)

const (
violationsMetricName = "violations"
auditDurationMetricName = "audit_duration_seconds"
lastRunTimeMetricName = "audit_last_run_time"
violationsMetricName = "violations"
auditDurationMetricName = "audit_duration_seconds"
lastRunStartTimeMetricName = "audit_last_run_time"
lastRunEndTimeMetricName = "audit_last_run_end_time"
)

var (
violationsM = stats.Int64(violationsMetricName, "Total number of audited violations", stats.UnitDimensionless)
auditDurationM = stats.Float64(auditDurationMetricName, "Latency of audit operation in seconds", stats.UnitSeconds)
lastRunTimeM = stats.Float64(lastRunTimeMetricName, "Timestamp of last audit run time", stats.UnitSeconds)
violationsM = stats.Int64(violationsMetricName, "Total number of audited violations", stats.UnitDimensionless)
auditDurationM = stats.Float64(auditDurationMetricName, "Latency of audit operation in seconds", stats.UnitSeconds)
lastRunStartTimeM = stats.Float64(lastRunStartTimeMetricName, "Timestamp of last audit run starting time", stats.UnitSeconds)
lastRunEndTimeM = stats.Float64(lastRunEndTimeMetricName, "Timestamp of last audit run ending time", stats.UnitSeconds)

enforcementActionKey = tag.MustNewKey("enforcement_action")
)
Expand All @@ -45,9 +47,13 @@ func register() error {
Aggregation: view.Distribution(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5),
},
{
Name: lastRunTimeMetricName,
Measure: lastRunTimeM,
Description: "Timestamp of last audit run time",
Name: lastRunStartTimeMetricName,
Measure: lastRunStartTimeM,
Aggregation: view.LastValue(),
},
{
Name: lastRunEndTimeMetricName,
Measure: lastRunEndTimeM,
Aggregation: view.LastValue(),
},
}
Expand Down Expand Up @@ -80,8 +86,18 @@ func (r *reporter) reportRunStart(t time.Time) error {
return err
}

val := float64(t.UnixNano()) / 1e9
return metrics.Record(ctx, lastRunTimeM.M(val))
val := float64(t.Unix())
return metrics.Record(ctx, lastRunStartTimeM.M(val))
}

func (r *reporter) reportRunEnd(t time.Time) error {
ctx, err := tag.New(context.Background())
if err != nil {
return err
}

val := float64(t.Unix())
return metrics.Record(ctx, lastRunEndTimeM.M(val))
}

// newStatsReporter creates a reporter for audit metrics.
Expand Down
36 changes: 28 additions & 8 deletions pkg/audit/stats_reporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,30 +101,50 @@ func checkData(t *testing.T, name string, wantRowLength int) *view.Row {
}

func TestLastRestartCheck(t *testing.T) {
wantTime := time.Now()
wantTs := float64(wantTime.UnixNano()) / 1e9
wantStartTime := time.Now()
wantEndTime := wantStartTime.Add(1 * time.Minute)
wantStartTs := float64(wantStartTime.Unix())
wantEndTs := float64(wantEndTime.Unix())
const wantRowLength = 1

r, err := newStatsReporter()
if err != nil {
t.Fatalf("got newStatsReporter() error %v", err)
}

err = r.reportRunStart(wantTime)
err = r.reportRunStart(wantStartTime)
if err != nil {
t.Fatalf("reportRunStart error %v", err)
}
row := checkData(t, lastRunTimeMetricName, wantRowLength)
row := checkData(t, lastRunStartTimeMetricName, wantRowLength)
got, ok := row.Data.(*view.LastValueData)
if !ok {
t.Error("lastRunTimeMetricName should have aggregation LastValue()")
t.Errorf("%s should have aggregation LastValue()", lastRunStartTimeMetricName)
}

if len(row.Tags) != 0 {
t.Errorf("got %q tags %v, want empty", lastRunTimeMetricName, row.Tags)
t.Errorf("got %q tags %v, want empty", lastRunStartTimeMetricName, row.Tags)
}

if got.Value != wantTs {
t.Errorf("got %q = %v, want %v", lastRunTimeMetricName, got.Value, wantTs)
if got.Value != wantStartTs {
t.Errorf("got %q = %v, want %v", lastRunStartTimeMetricName, got.Value, wantStartTs)
}

err = r.reportRunEnd(wantEndTime)
if err != nil {
t.Fatalf("reportRunEnd error %v", err)
}
row = checkData(t, lastRunEndTimeMetricName, wantRowLength)
got, ok = row.Data.(*view.LastValueData)
if !ok {
t.Errorf("%s should have aggregation LastValue()", lastRunEndTimeMetricName)
}

if len(row.Tags) != 0 {
t.Errorf("got %q tags %v, want empty", lastRunEndTimeMetricName, row.Tags)
}

if got.Value != wantEndTs {
t.Errorf("got %q = %v, want %v", lastRunEndTimeMetricName, got.Value, wantEndTs)
}
}
3 changes: 2 additions & 1 deletion website/docs/audit.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ There are three ways to gather audit results, depending on the level of detail n

Prometheus metrics provide an aggregated look at the number of audit violations:

* `gatekeeper_audit_last_run_time` provides the timestamp of the most recently completed audit run
* `gatekeeper_audit_last_run_time` provides the start time timestamp of the most recent audit run
* `gatekeeper_audit_last_run_end_time` provides the end time timestamp of the last completed audit run
* `gatekeeper_violations` provides the total number of audited violations for the last audit run, broken down by violation severity

### Constraint Status
Expand Down
8 changes: 7 additions & 1 deletion website/docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,13 @@ Below are the list of metrics provided by Gatekeeper:

- Name: `gatekeeper_audit_last_run_time`

Description: `Timestamp of last audit run time`
Description: `Timestamp of last audit run starting time`

Aggregation: `LastValue`

- Name: `gatekeeper_audit_last_run_end_time`

Description: `Timestamp of last audit run ending time`

Aggregation: `LastValue`

Expand Down

0 comments on commit 95c04b2

Please sign in to comment.