Skip to content

Commit

Permalink
ui/db-console: surface more job metrics around reverting and retrying…
Browse files Browse the repository at this point in the history
… in the

DBConsole Jobs Overview page

Fixes #68179

This commit surfaces the status `reverting`, annotates existing `running` and
`reverting` statuses UI with "retrying" where applicable, and adds the "Last
Execution Time (UTC)" and "Execution Count" columns to the jobs overview table
in db console. "Retrying" is defined as `status IN ('running', 'reverting') AND
next_run > now() AND num_runs > 1`.

Hovering a retrying status shows the next execution time. The "Status" column
was also moved left to the second column. Filtering using the dropdown by
`Status: Running` or `Status: Reverting` will include those that are
also "retrying". Users can also filter by `Status: Retrying`.

The `/jobs` endpoint was modified to add the `last_run`, `next_run`, and
`num_runs` fields required for the UI change. Jobs with status `running` or
`reverting` and are also "retrying" have their statuses sent as `retry-running`
and `retry-reverting` respectively. The endpoint was also modified to support
the value `retrying` for the `status` query parameter.

This commit also adds a storybook story for the jobs table, which showcases the
different possible statuses in permutations of information that could be
present for the `running` status.

Release note (ui change): The jobs overview table in DBConsole now shows when
jobs have the status "reverting", and shows the badge "retrying" when running
or reverting jobs are also retrying. Hovering the status for a "retrying" job
will show the "Next execution time" in UTC. Two new columns, "Last Execution
Time (UTC)" and "Execution Count", were also added to the jobs overview table
in DBConsole, and the "Status" column was moved left to the second column in
the table.

The `status` query parameter in the `/jobs` endpoint now supports the values
`reverting` and `retrying`.
  • Loading branch information
jocrl committed Dec 2, 2021
1 parent 2c014c4 commit f0d0467
Show file tree
Hide file tree
Showing 21 changed files with 1,321 additions and 414 deletions.
6 changes: 6 additions & 0 deletions docs/generated/http/full.md
Original file line number Diff line number Diff line change
Expand Up @@ -5142,6 +5142,9 @@ JobResponse contains the job record for a job.
| highwater_timestamp | [google.protobuf.Timestamp](#cockroach.server.serverpb.JobsResponse-google.protobuf.Timestamp) | | highwater_timestamp is the highwater timestamp returned as normal timestamp. This is appropriate for display to humans. | [reserved](#support-status) |
| highwater_decimal | [string](#cockroach.server.serverpb.JobsResponse-string) | | highwater_decimal is the highwater timestamp in the proprietary decimal form used by logical timestamps internally. This is appropriate to pass to a "AS OF SYSTEM TIME" SQL statement. | [reserved](#support-status) |
| running_status | [string](#cockroach.server.serverpb.JobsResponse-string) | | | [reserved](#support-status) |
| last_run | [google.protobuf.Timestamp](#cockroach.server.serverpb.JobsResponse-google.protobuf.Timestamp) | | | [reserved](#support-status) |
| next_run | [google.protobuf.Timestamp](#cockroach.server.serverpb.JobsResponse-google.protobuf.Timestamp) | | | [reserved](#support-status) |
| num_runs | [int64](#cockroach.server.serverpb.JobsResponse-int64) | | | [reserved](#support-status) |



Expand Down Expand Up @@ -5200,6 +5203,9 @@ JobResponse contains the job record for a job.
| highwater_timestamp | [google.protobuf.Timestamp](#cockroach.server.serverpb.JobResponse-google.protobuf.Timestamp) | | highwater_timestamp is the highwater timestamp returned as normal timestamp. This is appropriate for display to humans. | [reserved](#support-status) |
| highwater_decimal | [string](#cockroach.server.serverpb.JobResponse-string) | | highwater_decimal is the highwater timestamp in the proprietary decimal form used by logical timestamps internally. This is appropriate to pass to a "AS OF SYSTEM TIME" SQL statement. | [reserved](#support-status) |
| running_status | [string](#cockroach.server.serverpb.JobResponse-string) | | | [reserved](#support-status) |
| last_run | [google.protobuf.Timestamp](#cockroach.server.serverpb.JobResponse-google.protobuf.Timestamp) | | | [reserved](#support-status) |
| next_run | [google.protobuf.Timestamp](#cockroach.server.serverpb.JobResponse-google.protobuf.Timestamp) | | | [reserved](#support-status) |
| num_runs | [int64](#cockroach.server.serverpb.JobResponse-int64) | | | [reserved](#support-status) |



Expand Down
2 changes: 1 addition & 1 deletion pkg/server/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ go_library(

go_test(
name = "server_test",
size = "large",
size = "enormous",
srcs = [
"addjoin_test.go",
"admin_cluster_test.go",
Expand Down
23 changes: 18 additions & 5 deletions pkg/server/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -1847,15 +1847,24 @@ func (s *adminServer) Jobs(
return nil, err
}

retryRunningCondition := "status='running' AND next_run > now() AND num_runs > 1"
retryRevertingCondition := "status='reverting' AND next_run > now() AND num_runs > 1"

q := makeSQLQuery()
q.Append(`
SELECT job_id, job_type, description, statement, user_name, descriptor_ids, status,
running_status, created, started, finished, modified,
fraction_completed, high_water_timestamp, error
SELECT job_id, job_type, description, statement, user_name, descriptor_ids,
case
when ` + retryRunningCondition + `then 'retry-running'
when ` + retryRevertingCondition + ` then 'retry-reverting'
else status
end as status, running_status, created, started, finished, modified, fraction_completed,
high_water_timestamp, error, last_run, next_run, num_runs
FROM crdb_internal.jobs
WHERE true
`)
if req.Status != "" {
if req.Status == "retrying" {
q.Append(" AND ( ( " + retryRunningCondition + " ) OR ( " + retryRevertingCondition + " ) )")
} else if req.Status != "" {
q.Append(" AND status = $", req.Status)
}
if req.Type != jobspb.TypeUnspecified {
Expand Down Expand Up @@ -1929,6 +1938,9 @@ func scanRowIntoJob(scanner resultScanner, row tree.Datums, job *serverpb.JobRes
&fractionCompletedOrNil,
&highwaterOrNil,
&job.Error,
&job.LastRun,
&job.NextRun,
&job.NumRuns,
); err != nil {
return err
}
Expand Down Expand Up @@ -1972,7 +1984,8 @@ func (s *adminServer) Job(
const query = `
SELECT job_id, job_type, description, statement, user_name, descriptor_ids, status,
running_status, created, started, finished, modified,
fraction_completed, high_water_timestamp, error
fraction_completed, high_water_timestamp, error, last_run,
next_run, num_runs
FROM crdb_internal.jobs
WHERE job_id = $1`

Expand Down
140 changes: 128 additions & 12 deletions pkg/server/admin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1460,18 +1460,30 @@ func TestAdminAPIJobs(t *testing.T) {
existingRunningIDs := getSystemJobIDs(t, sqlDB, jobs.StatusRunning)
existingIDs := append(existingSucceededIDs, existingRunningIDs...)

runningOnlyIds := []int64{1, 2, 4}
revertingOnlyIds := []int64{7, 8, 9}
retryRunningIds := []int64{6}
retryRevertingIds := []int64{10}

testJobs := []struct {
id int64
status jobs.Status
details jobspb.Details
progress jobspb.ProgressDetails
username security.SQLUsername
numRuns int64
lastRun time.Time
}{
{1, jobs.StatusRunning, jobspb.RestoreDetails{}, jobspb.RestoreProgress{}, security.RootUserName()},
{2, jobs.StatusRunning, jobspb.BackupDetails{}, jobspb.BackupProgress{}, security.RootUserName()},
{3, jobs.StatusSucceeded, jobspb.BackupDetails{}, jobspb.BackupProgress{}, security.RootUserName()},
{4, jobs.StatusRunning, jobspb.ChangefeedDetails{}, jobspb.ChangefeedProgress{}, security.RootUserName()},
{5, jobs.StatusSucceeded, jobspb.BackupDetails{}, jobspb.BackupProgress{}, authenticatedUserNameNoAdmin()},
{1, jobs.StatusRunning, jobspb.RestoreDetails{}, jobspb.RestoreProgress{}, security.RootUserName(), 1, time.Time{}},
{2, jobs.StatusRunning, jobspb.BackupDetails{}, jobspb.BackupProgress{}, security.RootUserName(), 1, timeutil.Now().Add(10 * time.Minute)},
{3, jobs.StatusSucceeded, jobspb.BackupDetails{}, jobspb.BackupProgress{}, security.RootUserName(), 1, time.Time{}},
{4, jobs.StatusRunning, jobspb.ChangefeedDetails{}, jobspb.ChangefeedProgress{}, security.RootUserName(), 2, time.Time{}},
{5, jobs.StatusSucceeded, jobspb.BackupDetails{}, jobspb.BackupProgress{}, authenticatedUserNameNoAdmin(), 1, time.Time{}},
{6, jobs.StatusRunning, jobspb.ImportDetails{}, jobspb.ImportProgress{}, security.RootUserName(), 2, timeutil.Now().Add(10 * time.Minute)},
{7, jobs.StatusReverting, jobspb.ImportDetails{}, jobspb.ImportProgress{}, security.RootUserName(), 1, time.Time{}},
{8, jobs.StatusReverting, jobspb.ImportDetails{}, jobspb.ImportProgress{}, security.RootUserName(), 1, timeutil.Now().Add(10 * time.Minute)},
{9, jobs.StatusReverting, jobspb.ImportDetails{}, jobspb.ImportProgress{}, security.RootUserName(), 2, time.Time{}},
{10, jobs.StatusReverting, jobspb.ImportDetails{}, jobspb.ImportProgress{}, security.RootUserName(), 2, timeutil.Now().Add(10 * time.Minute)},
}
for _, job := range testJobs {
payload := jobspb.Payload{UsernameProto: job.username.EncodeProto(), Details: jobspb.WrapPayloadDetails(job.details)}
Expand All @@ -1498,8 +1510,8 @@ func TestAdminAPIJobs(t *testing.T) {
t.Fatal(err)
}
sqlDB.Exec(t,
`INSERT INTO system.jobs (id, status, payload, progress) VALUES ($1, $2, $3, $4)`,
job.id, job.status, payloadBytes, progressBytes,
`INSERT INTO system.jobs (id, status, payload, progress, num_runs, last_run) VALUES ($1, $2, $3, $4, $5, $6)`,
job.id, job.status, payloadBytes, progressBytes, job.numRuns, job.lastRun,
)
}

Expand All @@ -1512,23 +1524,33 @@ func TestAdminAPIJobs(t *testing.T) {
}{
{
"jobs",
append([]int64{5, 4, 3, 2, 1}, existingIDs...),
append([]int64{10, 9, 8, 7, 6, 5, 4, 3, 2, 1}, existingIDs...),
[]int64{5},
},
{
"jobs?limit=1",
[]int64{10},
[]int64{5},
},
{
"jobs?status=succeeded",
append([]int64{5, 3}, existingSucceededIDs...),
[]int64{5},
},
{
"jobs?status=running",
append([]int64{4, 2, 1}, existingRunningIDs...),
append(append(append([]int64{}, runningOnlyIds...), retryRunningIds...), existingRunningIDs...),
[]int64{},
},
{
"jobs?status=succeeded",
append([]int64{5, 3}, existingSucceededIDs...),
[]int64{5},
"jobs?status=reverting",
append(append([]int64{}, revertingOnlyIds...), retryRevertingIds...),
[]int64{},
},
{
"jobs?status=retrying",
append(append([]int64{}, retryRunningIds...), retryRevertingIds...),
[]int64{},
},
{
"jobs?status=pending",
Expand Down Expand Up @@ -1588,10 +1610,104 @@ func TestAdminAPIJobs(t *testing.T) {
if e, a := expected, resIDs; !reflect.DeepEqual(e, a) {
t.Errorf("%d: expected job IDs %v, but got %v", i, e, a)
}

}
})
}

func TestAdminAPIJobsDetails(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

s, conn, _ := serverutils.StartServer(t, base.TestServerArgs{})
defer s.Stopper().Stop(context.Background())
sqlDB := sqlutils.MakeSQLRunner(conn)

runningOnlyIds := []int64{1, 3, 5}
revertingOnlyIds := []int64{2, 4, 6}
retryRunningIds := []int64{7}
retryRevertingIds := []int64{8}

testJobs := []struct {
id int64
status jobs.Status
details jobspb.Details
progress jobspb.ProgressDetails
username security.SQLUsername
numRuns int64
lastRun time.Time
}{
{1, jobs.StatusRunning, jobspb.RestoreDetails{}, jobspb.RestoreProgress{}, security.RootUserName(), 1, time.Time{}},
{2, jobs.StatusReverting, jobspb.BackupDetails{}, jobspb.BackupProgress{}, security.RootUserName(), 1, time.Time{}},
{3, jobs.StatusRunning, jobspb.BackupDetails{}, jobspb.BackupProgress{}, security.RootUserName(), 1, timeutil.Now().Add(10 * time.Minute)},
{4, jobs.StatusReverting, jobspb.ChangefeedDetails{}, jobspb.ChangefeedProgress{}, security.RootUserName(), 1, timeutil.Now().Add(10 * time.Minute)},
{5, jobs.StatusRunning, jobspb.BackupDetails{}, jobspb.BackupProgress{}, security.RootUserName(), 2, time.Time{}},
{6, jobs.StatusReverting, jobspb.ChangefeedDetails{}, jobspb.ChangefeedProgress{}, security.RootUserName(), 2, time.Time{}},
{7, jobs.StatusRunning, jobspb.BackupDetails{}, jobspb.BackupProgress{}, security.RootUserName(), 2, timeutil.Now().Add(10 * time.Minute)},
{8, jobs.StatusReverting, jobspb.ChangefeedDetails{}, jobspb.ChangefeedProgress{}, security.RootUserName(), 2, timeutil.Now().Add(10 * time.Minute)},
}
for _, job := range testJobs {
payload := jobspb.Payload{UsernameProto: job.username.EncodeProto(), Details: jobspb.WrapPayloadDetails(job.details)}
payloadBytes, err := protoutil.Marshal(&payload)
if err != nil {
t.Fatal(err)
}

progress := jobspb.Progress{Details: jobspb.WrapProgressDetails(job.progress)}
// Populate progress.Progress field with a specific progress type based on
// the job type.
if _, ok := job.progress.(jobspb.ChangefeedProgress); ok {
progress.Progress = &jobspb.Progress_HighWater{
HighWater: &hlc.Timestamp{},
}
} else {
progress.Progress = &jobspb.Progress_FractionCompleted{
FractionCompleted: 1.0,
}
}

progressBytes, err := protoutil.Marshal(&progress)
if err != nil {
t.Fatal(err)
}
sqlDB.Exec(t,
`INSERT INTO system.jobs (id, status, payload, progress, num_runs, last_run) VALUES ($1, $2, $3, $4, $5, $6)`,
job.id, job.status, payloadBytes, progressBytes, job.numRuns, job.lastRun,
)

}

var res serverpb.JobsResponse
if err := getAdminJSONProto(s, "jobs", &res); err != nil {
t.Fatal(err)
}

// test that the select statement correctly converts expected jobs to retry-____ statuses
expectedStatuses := []struct {
status string
ids []int64
}{
{"running", runningOnlyIds},
{"reverting", revertingOnlyIds},
{"retry-running", retryRunningIds},
{"retry-reverting", retryRevertingIds},
}
for _, expected := range expectedStatuses {
jobsWithStatus := []serverpb.JobResponse{}
for _, job := range res.Jobs {
for _, expectedID := range expected.ids {
if job.ID == expectedID {
jobsWithStatus = append(jobsWithStatus, job)
}
}
}

for _, job := range jobsWithStatus {
assert.Equal(t, expected.status, job.Status)
}
}
}

func TestAdminAPILocations(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand Down
Loading

0 comments on commit f0d0467

Please sign in to comment.