diff --git a/pkg/jobs/registry.go b/pkg/jobs/registry.go index 101cf0a3fb6e..75e72bdc53c2 100644 --- a/pkg/jobs/registry.go +++ b/pkg/jobs/registry.go @@ -24,6 +24,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/security" + "github.com/cockroachdb/cockroach/pkg/server/telemetry" "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/sql/catalog" @@ -33,6 +34,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" "github.com/cockroachdb/cockroach/pkg/sql/sqlliveness" + "github.com/cockroachdb/cockroach/pkg/sql/sqltelemetry" "github.com/cockroachdb/cockroach/pkg/sql/sqlutil" "github.com/cockroachdb/cockroach/pkg/sql/types" "github.com/cockroachdb/cockroach/pkg/util/envutil" @@ -1218,6 +1220,7 @@ func (r *Registry) stepThroughStateMachine( // restarted during the next adopt loop and reverting will be retried. return errors.Wrapf(err, "job %d: could not mark as canceled: %v", job.ID(), jobErr) } + telemetry.Inc(sqltelemetry.SchemaJobCanceledCounter(jobType.String())) return errors.WithSecondaryError(errors.Errorf("job %s", status), jobErr) case StatusSucceeded: if jobErr != nil { @@ -1232,6 +1235,7 @@ func (r *Registry) stepThroughStateMachine( // better. return r.stepThroughStateMachine(ctx, execCtx, resumer, job, StatusReverting, errors.Wrapf(err, "could not mark job %d as succeeded", job.ID())) } + telemetry.Inc(sqltelemetry.SchemaJobSuccessCounter(jobType.String())) return nil case StatusReverting: if err := job.reverted(ctx, nil /* txn */, jobErr, nil /* fn */); err != nil { @@ -1286,6 +1290,7 @@ func (r *Registry) stepThroughStateMachine( // restarted during the next adopt loop and reverting will be retried. return errors.Wrapf(err, "job %d: could not mark as failed: %s", job.ID(), jobErr) } + telemetry.Inc(sqltelemetry.SchemaJobFailedCounter(jobType.String())) return jobErr default: return errors.NewAssertionErrorWithWrappedErrf(jobErr, diff --git a/pkg/sql/logictest/testdata/logic_test/alter_table b/pkg/sql/logictest/testdata/logic_test/alter_table index 0063789d8011..180cdb78d028 100644 --- a/pkg/sql/logictest/testdata/logic_test/alter_table +++ b/pkg/sql/logictest/testdata/logic_test/alter_table @@ -1688,3 +1688,14 @@ SELECT count(descriptor_id) WHERE descriptor_id = ('test.public.t45985'::REGCLASS)::INT8; ---- 0 + +# Validate that the schema_change_successful metric +query T +SELECT feature_name FROM crdb_internal.feature_usage +WHERE feature_name IN ('sql.schema.job.schema_change.successful', +'sql.schema.job.schema_change.failed') AND +usage_count > 0 +ORDER BY feature_name DESC +---- +sql.schema.job.schema_change.successful +sql.schema.job.schema_change.failed diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_stats b/pkg/sql/logictest/testdata/logic_test/distsql_stats index cfb8ce4f3eb7..3cfe4f12a2f5 100644 --- a/pkg/sql/logictest/testdata/logic_test/distsql_stats +++ b/pkg/sql/logictest/testdata/logic_test/distsql_stats @@ -1065,3 +1065,18 @@ SHOW STATISTICS USING JSON FOR TABLE greeting_stats statement ok ALTER TABLE greeting_stats INJECT STATISTICS '$stats' + +# Validate that the schema_change_successful metric +query T +SELECT feature_name FROM crdb_internal.feature_usage +WHERE feature_name in ('sql.schema.job.typedesc_schema_change.successful', +'sql.schema.job.schema_change.successful', +'sql.schema.job.create_stats.successful', +'sql.schema.job.auto_create_stats.successful') AND +usage_count > 0 +ORDER BY feature_name DESC +---- +sql.schema.job.typedesc_schema_change.successful +sql.schema.job.schema_change.successful +sql.schema.job.create_stats.successful +sql.schema.job.auto_create_stats.successful diff --git a/pkg/sql/logictest/testdata/logic_test/jobs b/pkg/sql/logictest/testdata/logic_test/jobs index 21214acf5197..8266571ede86 100644 --- a/pkg/sql/logictest/testdata/logic_test/jobs +++ b/pkg/sql/logictest/testdata/logic_test/jobs @@ -130,3 +130,14 @@ user testuser # testuser should no longer have the ability to control jobs. statement error pq: user testuser does not have CONTROLJOB privilege PAUSE JOB (SELECT job_id FROM [SHOW JOBS] WHERE user_name = 'testuser2' AND job_type = 'SCHEMA CHANGE GC') + +user root + +# Validate that the schema_change_successful metric +query T +SELECT feature_name FROM crdb_internal.feature_usage +WHERE feature_name in ('sql.schema.job.schema_change.successful') AND +usage_count > 0 +ORDER BY feature_name DESC +---- +sql.schema.job.schema_change.successful diff --git a/pkg/sql/sqltelemetry/schema.go b/pkg/sql/sqltelemetry/schema.go index 8d7b6afcb998..da028c475a50 100644 --- a/pkg/sql/sqltelemetry/schema.go +++ b/pkg/sql/sqltelemetry/schema.go @@ -12,6 +12,7 @@ package sqltelemetry import ( "fmt" + "strings" "github.com/cockroachdb/cockroach/pkg/server/telemetry" ) @@ -146,3 +147,24 @@ var CreateUnloggedTableCounter = telemetry.GetCounterOnce("sql.schema.create_unl // SchemaRefreshMaterializedView is to be incremented every time a materialized // view is refreshed. var SchemaRefreshMaterializedView = telemetry.GetCounterOnce("sql.schema.refresh_materialized_view") + +// SchemaJobSuccessCounter gets the successful job completion counter +// for a given job type. +func SchemaJobSuccessCounter(jobName string) telemetry.Counter { + jobName = strings.ToLower(strings.Replace(jobName, " ", "_", -1)) + return telemetry.GetCounter(fmt.Sprintf("sql.schema.job.%s.successful", jobName)) +} + +// SchemaJobFailedCounter gets the failed job completion counter +// for a given job type. +func SchemaJobFailedCounter(jobName string) telemetry.Counter { + jobName = strings.ToLower(strings.Replace(jobName, " ", "_", -1)) + return telemetry.GetCounter(fmt.Sprintf("sql.schema.job.%s.failed", jobName)) +} + +// SchemaJobCanceledCounter gets the canceled job completion counter +// for a given job type. +func SchemaJobCanceledCounter(jobName string) telemetry.Counter { + jobName = strings.ToLower(strings.Replace(jobName, " ", "_", -1)) + return telemetry.GetCounter(fmt.Sprintf("sql.schema.job.%s.canceled", jobName)) +} diff --git a/pkg/sql/testdata/telemetry/schema b/pkg/sql/testdata/telemetry/schema index 3563a15fb29d..48f1c7240b61 100644 --- a/pkg/sql/testdata/telemetry/schema +++ b/pkg/sql/testdata/telemetry/schema @@ -55,6 +55,7 @@ sql.schema.alter_table sql.schema.alter_table.add_column sql.schema.alter_table.add_column.references sql.schema.alter_table.add_constraint +sql.schema.job.schema_change.successful sql.schema.new_column_type.int8 schema