From ab8f82c105287b47e310b9c9252c24b3b1ccd095 Mon Sep 17 00:00:00 2001 From: Roey Berman Date: Fri, 14 Apr 2023 15:23:48 -0700 Subject: [PATCH 01/17] Worker versioning related updates (#272) Decided not to expose (the yet unused) `CompatibleVersionSet.version_set_id`, the set will have multiple IDs internally. The affected projects have been updated: - [x] [Server PR](https://github.com/temporalio/temporal/pull/4170) - [x] [Go SDK PR](https://github.com/temporalio/sdk-go/pull/1089) Also added a `use_versioning` flag to `RespondWorkflowTaskCompletedRequest` to differentiate between the version stamp being used for versioning (matching) purposes or just as a marker. --- temporal/api/taskqueue/v1/message.proto | 11 ++++------- .../api/workflowservice/v1/request_response.proto | 5 +++++ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/temporal/api/taskqueue/v1/message.proto b/temporal/api/taskqueue/v1/message.proto index d790dac5..353e7f7a 100644 --- a/temporal/api/taskqueue/v1/message.proto +++ b/temporal/api/taskqueue/v1/message.proto @@ -87,12 +87,9 @@ message StickyExecutionAttributes { google.protobuf.Duration schedule_to_start_timeout = 2 [(gogoproto.stdduration) = true]; } -// Used by the worker versioning APIs, represents an ordering of one or more versions which are -// considered to be compatible with each other. Currently the versions are always worker build ids. +// Used by the worker versioning APIs, represents an unordered set of one or more versions which are +// considered to be compatible with each other. Currently the versions are always worker build IDs. message CompatibleVersionSet { - // A unique identifier for this version set. Users don't need to understand or care about this - // value, but it has value for debugging purposes. - string version_set_id = 1; - // All the compatible versions, ordered from oldest to newest - repeated string build_ids = 2; + // All the compatible versions, unordered, except for the last element, which is considered the set "default". + repeated string build_ids = 1; } diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index a4f0c5ad..7b8d23cb 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -327,6 +327,11 @@ message RespondWorkflowTaskCompletedRequest { temporal.api.sdk.v1.WorkflowTaskCompletedMetadata sdk_metadata = 12; // Local usage data collected for metering temporal.api.common.v1.MeteringMetadata metering_metadata = 13; + + // If set, the worker is opting in to worker versioning. Otherwise, worker_version_stamp is used as a marker for + // workflow reset points and the BuildId search attibute. + // This flag must only be set if worker_version_stamp is provided. + bool use_versioning = 14; } message RespondWorkflowTaskCompletedResponse { From 19c11bb0f93bb39d026dcb44035bf7d2a458dc3c Mon Sep 17 00:00:00 2001 From: Spencer Judge Date: Mon, 24 Apr 2023 11:02:01 -0700 Subject: [PATCH 02/17] Add flags for using the default build id set (#273) --- temporal/api/command/v1/message.proto | 18 ++++++++++++++++++ temporal/api/history/v1/message.proto | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/temporal/api/command/v1/message.proto b/temporal/api/command/v1/message.proto index 30d4b28d..34fe8fe1 100644 --- a/temporal/api/command/v1/message.proto +++ b/temporal/api/command/v1/message.proto @@ -83,6 +83,12 @@ message ScheduleActivityTaskCommandAttributes { // Request to start the activity directly bypassing matching service and worker polling // The slot for executing the activity should be reserved when setting this field to true. bool request_eager_execution = 12; + // If this is set and the workflow executing this command is on a task queue using build-id + // versioning, then the scheduled activity will not use the same compatible version set (which + // is the default behavior) and instead will use the current overall default for the queue. + // If this command's `task_queue` field differs from the executing workflow's task queue, then + // this flag has no effect. + bool use_latest_build_id = 13; } message RequestCancelActivityTaskCommandAttributes { @@ -191,6 +197,12 @@ message ContinueAsNewWorkflowExecutionCommandAttributes { temporal.api.common.v1.Header header = 12; temporal.api.common.v1.Memo memo = 13; temporal.api.common.v1.SearchAttributes search_attributes = 14; + // If this is set and the workflow executing this command is on a task queue using build-id + // versioning, then the continued workflow will not use the same compatible version set (which + // is the default behavior) and instead will use the current overall default for the queue. + // If this command's `task_queue` field differs from the executing workflow's task queue, then + // this flag has no effect. + bool use_latest_build_id = 15; // `workflow_execution_timeout` is omitted as it shouldn't be overridden from within a workflow. } @@ -218,6 +230,12 @@ message StartChildWorkflowExecutionCommandAttributes { temporal.api.common.v1.Header header = 14; temporal.api.common.v1.Memo memo = 15; temporal.api.common.v1.SearchAttributes search_attributes = 16; + // If this is set and the workflow executing this command is on a task queue using build-id + // versioning, then the child workflow will not use the same compatible version set (which + // is the default behavior) and instead will use the current overall default for the queue. + // If this command's `task_queue` field differs from the executing workflow's task queue, then + // this flag has no effect. + bool use_latest_build_id = 17; } message ProtocolMessageCommandAttributes { diff --git a/temporal/api/history/v1/message.proto b/temporal/api/history/v1/message.proto index 41293022..2220c297 100644 --- a/temporal/api/history/v1/message.proto +++ b/temporal/api/history/v1/message.proto @@ -154,6 +154,12 @@ message WorkflowExecutionContinuedAsNewEventAttributes { temporal.api.common.v1.Header header = 12; temporal.api.common.v1.Memo memo = 13; temporal.api.common.v1.SearchAttributes search_attributes = 14; + // If this is set and the workflow executing this command is on a task queue using build-id + // versioning, then the scheduled activity will not use the same compatible version set (which + // is the default behavior) and instead will use the current overall default for the queue. + // If this command's `task_queue` field differs from the executing workflow's task queue, then + // this flag has no effect. + bool use_latest_build_id = 15; // workflow_execution_timeout is omitted as it shouldn't be overridden from within a workflow. } @@ -272,6 +278,12 @@ message ActivityTaskScheduledEventAttributes { // configuration. Retries will happen up to `schedule_to_close_timeout`. To disable retries set // retry_policy.maximum_attempts to 1. temporal.api.common.v1.RetryPolicy retry_policy = 12; + // If this is set and the workflow executing this command is on a task queue using build-id + // versioning, then the scheduled activity will not use the same compatible version set (which + // is the default behavior) and instead will use the current overall default for the queue. + // If this command's `task_queue` field differs from the executing workflow's task queue, then + // this flag has no effect. + bool use_latest_build_id = 13; } message ActivityTaskStartedEventAttributes { @@ -558,6 +570,12 @@ message StartChildWorkflowExecutionInitiatedEventAttributes { temporal.api.common.v1.Header header = 15; temporal.api.common.v1.Memo memo = 16; temporal.api.common.v1.SearchAttributes search_attributes = 17; + // If this is set and the workflow executing this command is on a task queue using build-id + // versioning, then the child workflow will not use the same compatible version set (which + // is the default behavior) and instead will use the current overall default for the queue. + // If this command's `task_queue` field differs from the executing workflow's task queue, then + // this flag has no effect. + bool use_latest_build_id = 19; } message StartChildWorkflowExecutionFailedEventAttributes { From 27f319bdbd5500ae3a0f10011e72048687e98e44 Mon Sep 17 00:00:00 2001 From: David Reiss Date: Wed, 3 May 2023 18:22:19 -0700 Subject: [PATCH 03/17] Add NewerBuildExistsFailure (#275) --- temporal/api/errordetails/v1/message.proto | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/temporal/api/errordetails/v1/message.proto b/temporal/api/errordetails/v1/message.proto index b1c512a9..ff9ecf07 100644 --- a/temporal/api/errordetails/v1/message.proto +++ b/temporal/api/errordetails/v1/message.proto @@ -106,3 +106,8 @@ message SystemWorkflowFailure { message WorkflowNotReadyFailure { } + +message NewerBuildExistsFailure { + // Build ID of the newer compatible build that will receive tasks. + string latest_build_id = 1; +} From 94ca2f0c2aff46a0808922203ba5fdf2db3ba8dc Mon Sep 17 00:00:00 2001 From: Roey Berman Date: Thu, 4 May 2023 10:33:13 -0700 Subject: [PATCH 04/17] Store more build ID and versioning information in history and reset points (#277) * Record intent to use versioning in WorkerVersionStamp * Add build_id to ResetPointInfo --- temporal/api/common/v1/message.proto | 4 ++++ temporal/api/history/v1/message.proto | 1 + temporal/api/workflow/v1/message.proto | 6 ++++++ temporal/api/workflowservice/v1/request_response.proto | 5 ----- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/temporal/api/common/v1/message.proto b/temporal/api/common/v1/message.proto index b41a839b..e7766ff2 100644 --- a/temporal/api/common/v1/message.proto +++ b/temporal/api/common/v1/message.proto @@ -129,6 +129,10 @@ message WorkerVersionStamp { // Set if the worker used a dynamically loadable bundle to process // the task. The bundle could be a WASM blob, JS bundle, etc. string bundle_id = 2; + + // If set, the worker is opting in to worker versioning. Otherwise, this is used as a marker for workflow reset + // points and the BuildIDs search attribute. + bool use_versioning = 3; } // Identifies the version(s) that a worker is compatible with when polling or identifying itself diff --git a/temporal/api/history/v1/message.proto b/temporal/api/history/v1/message.proto index 2220c297..1776cc98 100644 --- a/temporal/api/history/v1/message.proto +++ b/temporal/api/history/v1/message.proto @@ -208,6 +208,7 @@ message WorkflowTaskCompletedEventAttributes { // Data the SDK wishes to record for itself, but server need not interpret, and does not // directly impact workflow state. temporal.api.sdk.v1.WorkflowTaskCompletedMetadata sdk_metadata = 6; + // Local usage data sent during workflow task completion and recorded here for posterity temporal.api.common.v1.MeteringMetadata metering_metadata = 13; } diff --git a/temporal/api/workflow/v1/message.proto b/temporal/api/workflow/v1/message.proto index fd882f10..04124655 100644 --- a/temporal/api/workflow/v1/message.proto +++ b/temporal/api/workflow/v1/message.proto @@ -109,8 +109,12 @@ message ResetPoints { } message ResetPointInfo { + // A worker binary version identifier, will be deprecated and superceeded by a newer concept of build_id (see + // below). string binary_checksum = 1; + // The first run ID in the execution chain that was touched by this worker build. string run_id = 2; + // Event ID of the first WorkflowTaskCompleted event processed by this worker build. int64 first_workflow_task_completed_id = 3; google.protobuf.Timestamp create_time = 4 [(gogoproto.stdtime) = true]; // (-- api-linter: core::0214::resource-expiry=disabled @@ -119,6 +123,8 @@ message ResetPointInfo { google.protobuf.Timestamp expire_time = 5 [(gogoproto.stdtime) = true]; // false if the reset point has pending childWFs/reqCancels/signalExternals. bool resettable = 6; + // A worker build identifier, may or may not be tied to task dispatching and the "worker versioning" feature. + string build_id = 7; } // NewWorkflowExecutionInfo is a shared message that encapsulates all the diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index 7b8d23cb..a4f0c5ad 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -327,11 +327,6 @@ message RespondWorkflowTaskCompletedRequest { temporal.api.sdk.v1.WorkflowTaskCompletedMetadata sdk_metadata = 12; // Local usage data collected for metering temporal.api.common.v1.MeteringMetadata metering_metadata = 13; - - // If set, the worker is opting in to worker versioning. Otherwise, worker_version_stamp is used as a marker for - // workflow reset points and the BuildId search attibute. - // This flag must only be set if worker_version_stamp is provided. - bool use_versioning = 14; } message RespondWorkflowTaskCompletedResponse { From cc02a43b996fcc8990df413b80d502e9865d8d31 Mon Sep 17 00:00:00 2001 From: Roey Berman Date: Thu, 4 May 2023 21:13:50 -0700 Subject: [PATCH 05/17] Remove build_id from ResetPointInfo (still TBD if needed) --- temporal/api/workflow/v1/message.proto | 2 -- 1 file changed, 2 deletions(-) diff --git a/temporal/api/workflow/v1/message.proto b/temporal/api/workflow/v1/message.proto index 04124655..8d56880c 100644 --- a/temporal/api/workflow/v1/message.proto +++ b/temporal/api/workflow/v1/message.proto @@ -123,8 +123,6 @@ message ResetPointInfo { google.protobuf.Timestamp expire_time = 5 [(gogoproto.stdtime) = true]; // false if the reset point has pending childWFs/reqCancels/signalExternals. bool resettable = 6; - // A worker build identifier, may or may not be tied to task dispatching and the "worker versioning" feature. - string build_id = 7; } // NewWorkflowExecutionInfo is a shared message that encapsulates all the From f5d84940e76e770b3ff4e11cbd21eba8eb35404e Mon Sep 17 00:00:00 2001 From: Roey Berman Date: Wed, 10 May 2023 19:45:35 -0700 Subject: [PATCH 06/17] Task reachability API (#278) --- temporal/api/enums/v1/task_queue.proto | 14 +++++++ temporal/api/taskqueue/v1/message.proto | 28 +++++++++++++ .../workflowservice/v1/request_response.proto | 41 ++++++------------- temporal/api/workflowservice/v1/service.proto | 6 ++- 4 files changed, 59 insertions(+), 30 deletions(-) diff --git a/temporal/api/enums/v1/task_queue.proto b/temporal/api/enums/v1/task_queue.proto index 7fd620cc..29e66b40 100644 --- a/temporal/api/enums/v1/task_queue.proto +++ b/temporal/api/enums/v1/task_queue.proto @@ -57,3 +57,17 @@ enum TaskQueueType { // Activity type of task queue. TASK_QUEUE_TYPE_ACTIVITY = 2; } + +// Specifies which category of tasks may reach a worker on a versioned task queue. +enum TaskReachability { + TASK_REACHABILITY_UNSPECIFIED = 0; + // There's a possiblity for a worker to receive new workflow tasks. Workers should *not* be retired. + TASK_REACHABILITY_NEW_WORKFLOWS = 1; + // There's a possiblity for a worker to receive existing workflow and activity tasks from open workflows. Workers + // should *not* be retired. + TASK_REACHABILITY_OPEN_WORKFLOWS = 2; + // There's a possiblity for a worker to receive existing workflow tasks from closed workflows. Workers may be + // retired dependending on application requirements. For example, if there's no need to query closed workflows. + TASK_REACHABILITY_CLOSED_WORKFLOWS = 3; +} + diff --git a/temporal/api/taskqueue/v1/message.proto b/temporal/api/taskqueue/v1/message.proto index 353e7f7a..55f14acd 100644 --- a/temporal/api/taskqueue/v1/message.proto +++ b/temporal/api/taskqueue/v1/message.proto @@ -32,6 +32,7 @@ option ruby_package = "Temporalio::Api::TaskQueue::V1"; option csharp_namespace = "Temporalio.Api.TaskQueue.V1"; import "google/protobuf/duration.proto"; +import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; import "google/protobuf/wrappers.proto"; @@ -93,3 +94,30 @@ message CompatibleVersionSet { // All the compatible versions, unordered, except for the last element, which is considered the set "default". repeated string build_ids = 1; } + +// Reachability of tasks for a worker on a single task queue. +message TaskQueueReachability { + string task_queue = 1; + // Task reachability for a worker in a single task queue. + // See the TaskReachability docstring for information about each enum variant. + // If reachability is empty, this worker is considered unreachable in this task queue. + repeated temporal.api.enums.v1.TaskReachability reachability = 2; +} + +// Reachability of tasks for a worker by build id, in one or more task queues. +message BuildIdReachability { + // A build id or empty if unversioned. + string build_id = 1; + // Reachability per task queue. + repeated TaskQueueReachability task_queue_reachability = 2; +} + +// Scope of task reachability for a reachability query. +message TaskReachabilityScope { + oneof variant { + // Query task reachability globally in a namespace. + google.protobuf.Empty namespace = 1; + // Query task reachability for a specific task queue. + string task_queue = 2; + } +} diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index a4f0c5ad..3d200f32 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -1114,12 +1114,6 @@ message GetWorkerBuildIdCompatibilityRequest { // Limits how many compatible sets will be returned. Specify 1 to only return the current // default major version set. 0 returns all sets. int32 max_sets = 3; - // If set, the response will include information about worker versions which are ready to be - // retired. - bool include_retirement_candidates = 4; - // If set, the response will include information about which versions have open workflows, and - // whether or not there are currently polling workers who are compatible with those versions. - bool include_poller_compatibility = 5; } message GetWorkerBuildIdCompatibilityResponse { // Major version sets, in order from oldest to newest. The last element of the list will always @@ -1128,32 +1122,21 @@ message GetWorkerBuildIdCompatibilityResponse { // // There may be fewer sets returned than exist, if the request chose to limit this response. repeated temporal.api.taskqueue.v1.CompatibleVersionSet major_version_sets = 1; +} - message RetirementCandidate { - // The worker build id which is ready for retirement - string build_id = 1; - // If true, there are no open *or* closed workflows, meaning there is no reason at all - // to keep the worker alive, not even to service queries on closed workflows. If not true, - // then there are no open workflows, but some closed ones. - bool all_workflows_are_archived = 2; - // Currently polling workers who match the build id ready for retirement - repeated temporal.api.taskqueue.v1.PollerInfo pollers = 3; - } - - // A list of workers who are still live and polling the task queue, but may no longer be needed - // to make progress on open workflows. - repeated RetirementCandidate retirement_candidates = 2; - - message VersionsWithCompatiblePollers { - // The latest build id which completed a workflow task on some open workflow - string most_recent_build_id = 1; - // Currently polling workers who are compatible with `most_recent_build_id`. - repeated temporal.api.taskqueue.v1.PollerInfo pollers = 2; +message GetWorkerTaskReachabilityRequest { + string namespace = 1; + // Specify whether reachability should be returned for a specific worker version or all workers polling on a specific task queue. + oneof subject { + string task_queue = 2; + string build_id = 3; } + temporal.api.taskqueue.v1.TaskReachabilityScope scope = 4; +} - // A list of versions and pollers who are capable of processing tasks at that version (if any) - // for which there are currently open workflows. - repeated VersionsWithCompatiblePollers active_versions_and_pollers = 3; +message GetWorkerTaskReachabilityResponse { + // Task reachability, broken down by build id. + repeated temporal.api.taskqueue.v1.BuildIdReachability build_id_reachability = 1; } // (-- api-linter: core::0134=disabled diff --git a/temporal/api/workflowservice/v1/service.proto b/temporal/api/workflowservice/v1/service.proto index dba0c2a6..57ebc831 100644 --- a/temporal/api/workflowservice/v1/service.proto +++ b/temporal/api/workflowservice/v1/service.proto @@ -390,9 +390,13 @@ service WorkflowService { // (-- api-linter: core::0134::method-signature=disabled // aip.dev/not-precedent: UpdateWorkerBuildIdCompatibility RPC doesn't follow Google API format. --) rpc UpdateWorkerBuildIdCompatibility (UpdateWorkerBuildIdCompatibilityRequest) returns (UpdateWorkerBuildIdCompatibilityResponse) {} - // Fetches the worker build id versioning sets for some task queue and related metadata. + // Fetches the worker build id versioning sets for a task queue. rpc GetWorkerBuildIdCompatibility (GetWorkerBuildIdCompatibilityRequest) returns (GetWorkerBuildIdCompatibilityResponse) {} + // Fetches task reachability to determine whether a single worker or all workers polling on a specific task queue may + // be retired. + rpc GetWorkerTaskReachability (GetWorkerTaskReachabilityRequest) returns (GetWorkerTaskReachabilityResponse) {} + // Invokes the specified update function on user workflow code. // (-- api-linter: core::0134=disabled // aip.dev/not-precedent: UpdateWorkflowExecution doesn't follow Google API format --) From c3d5a514fb87f8f7a8193a82e9a748ffc80f7a14 Mon Sep 17 00:00:00 2001 From: David Reiss Date: Tue, 16 May 2023 11:01:16 -0700 Subject: [PATCH 07/17] Add version stamp to WorkflowExecutionStartedEventAttributes (#284) --- temporal/api/history/v1/message.proto | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/temporal/api/history/v1/message.proto b/temporal/api/history/v1/message.proto index 1776cc98..0bd7974a 100644 --- a/temporal/api/history/v1/message.proto +++ b/temporal/api/history/v1/message.proto @@ -102,6 +102,12 @@ message WorkflowExecutionStartedEventAttributes { int64 parent_initiated_event_version = 26; // This field is new in 1.21. string workflow_id = 28; + // When using build-id versioning, we tie the version of a child workflow or + // continued-as-new workflow on the same task queue to the parent/previous workflow by + // default (unless requested with use_latest_build_id). To make that version + // information available in the new workflow, we include it here. If this field is not + // present, workflows on a versioned task queue will be assigned the latest version. + temporal.api.common.v1.WorkerVersionStamp source_version_stamp = 29; } message WorkflowExecutionCompletedEventAttributes { From 821f988e26abfc86360afbb26edba87558edb1af Mon Sep 17 00:00:00 2001 From: Roey Berman Date: Tue, 16 May 2023 16:20:01 -0700 Subject: [PATCH 08/17] Another take on the task reachability API (#286) Co-authored-by: Spencer Judge --- temporal/api/taskqueue/v1/message.proto | 8 ------- .../workflowservice/v1/request_response.proto | 22 ++++++++++++------- temporal/api/workflowservice/v1/service.proto | 10 +++++++-- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/temporal/api/taskqueue/v1/message.proto b/temporal/api/taskqueue/v1/message.proto index 55f14acd..cc6774d1 100644 --- a/temporal/api/taskqueue/v1/message.proto +++ b/temporal/api/taskqueue/v1/message.proto @@ -104,14 +104,6 @@ message TaskQueueReachability { repeated temporal.api.enums.v1.TaskReachability reachability = 2; } -// Reachability of tasks for a worker by build id, in one or more task queues. -message BuildIdReachability { - // A build id or empty if unversioned. - string build_id = 1; - // Reachability per task queue. - repeated TaskQueueReachability task_queue_reachability = 2; -} - // Scope of task reachability for a reachability query. message TaskReachabilityScope { oneof variant { diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index 3d200f32..19ddf858 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -1126,17 +1126,23 @@ message GetWorkerBuildIdCompatibilityResponse { message GetWorkerTaskReachabilityRequest { string namespace = 1; - // Specify whether reachability should be returned for a specific worker version or all workers polling on a specific task queue. - oneof subject { - string task_queue = 2; - string build_id = 3; - } - temporal.api.taskqueue.v1.TaskReachabilityScope scope = 4; + // Build id to retrieve reachability for. Leave empty to query reachability of an unversioned worker. + string build_id = 2; + // Scope of the reachability query (namespace or task queue). + // Must specify a task queue if querying for an unversioned worker. + temporal.api.taskqueue.v1.TaskReachabilityScope scope = 3; } message GetWorkerTaskReachabilityResponse { - // Task reachability, broken down by build id. - repeated temporal.api.taskqueue.v1.BuildIdReachability build_id_reachability = 1; + // Task reachability, broken down by task queue. + // This response lists all task queues mapped to the requested build id when the requested query scope is for an + // entire namespace but the number of task queues that include reachability information is limited. + // When reaching the limit, task queues that reachability information could not be retrieved for will be marked with a single + // TASK_REACHABILITY_UNSPECIFIED entry. The caller may issue separate calls to get the reachability for those task + // queues. + // Open source users can adjust this limit by setting the server's dynamic config value for + // `limit.reachabilityTaskQueueScan` with the caveat that this call can strain the visibility store. + repeated temporal.api.taskqueue.v1.TaskQueueReachability task_queue_reachability = 1; } // (-- api-linter: core::0134=disabled diff --git a/temporal/api/workflowservice/v1/service.proto b/temporal/api/workflowservice/v1/service.proto index 57ebc831..c5bf969d 100644 --- a/temporal/api/workflowservice/v1/service.proto +++ b/temporal/api/workflowservice/v1/service.proto @@ -393,8 +393,14 @@ service WorkflowService { // Fetches the worker build id versioning sets for a task queue. rpc GetWorkerBuildIdCompatibility (GetWorkerBuildIdCompatibilityRequest) returns (GetWorkerBuildIdCompatibilityResponse) {} - // Fetches task reachability to determine whether a single worker or all workers polling on a specific task queue may - // be retired. + // Fetches task reachability to determine whether a worker may be retired. + // The response lists all task queues mapped to the requested build id when the requested query scope is for an + // entire namespace but the number of task queues that include reachability information is limited. + // When reaching the limit, task queues that reachability information could not be retrieved for will be marked with a single + // TASK_REACHABILITY_UNSPECIFIED entry. The caller may issue separate calls to get the reachability for those task + // queues. + // Open source users can adjust this limit by setting the server's dynamic config value for + // `limit.reachabilityTaskQueueScan` with the caveat that this call can strain the visibility store. rpc GetWorkerTaskReachability (GetWorkerTaskReachabilityRequest) returns (GetWorkerTaskReachabilityResponse) {} // Invokes the specified update function on user workflow code. From fcef3ba606197660dc9891dfa74b119f4e73c6c3 Mon Sep 17 00:00:00 2001 From: Roey Berman Date: Thu, 18 May 2023 18:09:10 -0700 Subject: [PATCH 09/17] Apply more feedback to the reachability API (#288) --- temporal/api/enums/v1/task_queue.proto | 9 ++++++-- temporal/api/taskqueue/v1/message.proto | 8 +++++++ .../workflowservice/v1/request_response.proto | 23 ++++++++++++++----- temporal/api/workflowservice/v1/service.proto | 8 +++++++ 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/temporal/api/enums/v1/task_queue.proto b/temporal/api/enums/v1/task_queue.proto index 29e66b40..3ae43491 100644 --- a/temporal/api/enums/v1/task_queue.proto +++ b/temporal/api/enums/v1/task_queue.proto @@ -59,15 +59,20 @@ enum TaskQueueType { } // Specifies which category of tasks may reach a worker on a versioned task queue. +// Used both in a reachability query and its response. enum TaskReachability { TASK_REACHABILITY_UNSPECIFIED = 0; // There's a possiblity for a worker to receive new workflow tasks. Workers should *not* be retired. TASK_REACHABILITY_NEW_WORKFLOWS = 1; + // There's a possiblity for a worker to receive existing workflow and activity tasks from existing workflows. Workers + // should *not* be retired. + // This enum value does not distinguish between open and closed workflows. + TASK_REACHABILITY_EXISTING_WORKFLOWS = 2; // There's a possiblity for a worker to receive existing workflow and activity tasks from open workflows. Workers // should *not* be retired. - TASK_REACHABILITY_OPEN_WORKFLOWS = 2; + TASK_REACHABILITY_OPEN_WORKFLOWS = 3; // There's a possiblity for a worker to receive existing workflow tasks from closed workflows. Workers may be // retired dependending on application requirements. For example, if there's no need to query closed workflows. - TASK_REACHABILITY_CLOSED_WORKFLOWS = 3; + TASK_REACHABILITY_CLOSED_WORKFLOWS = 4; } diff --git a/temporal/api/taskqueue/v1/message.proto b/temporal/api/taskqueue/v1/message.proto index cc6774d1..55f14acd 100644 --- a/temporal/api/taskqueue/v1/message.proto +++ b/temporal/api/taskqueue/v1/message.proto @@ -104,6 +104,14 @@ message TaskQueueReachability { repeated temporal.api.enums.v1.TaskReachability reachability = 2; } +// Reachability of tasks for a worker by build id, in one or more task queues. +message BuildIdReachability { + // A build id or empty if unversioned. + string build_id = 1; + // Reachability per task queue. + repeated TaskQueueReachability task_queue_reachability = 2; +} + // Scope of task reachability for a reachability query. message TaskReachabilityScope { oneof variant { diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index 19ddf858..6f47c516 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -1126,23 +1126,34 @@ message GetWorkerBuildIdCompatibilityResponse { message GetWorkerTaskReachabilityRequest { string namespace = 1; - // Build id to retrieve reachability for. Leave empty to query reachability of an unversioned worker. - string build_id = 2; + // Build ids to retrieve reachability for. An empty string will be interpreted as an unversioned worker. + // The number of build ids that can be queried in a single API call is limited. + // Open source users can adjust this limit by setting the server's dynamic config value for + // `limit.reachabilityQueryMaxBuildIds` with the caveat that this call can strain the visibility store. + repeated string build_ids = 2; // Scope of the reachability query (namespace or task queue). // Must specify a task queue if querying for an unversioned worker. temporal.api.taskqueue.v1.TaskReachabilityScope scope = 3; + + // Type of reachability to query for. + // TASK_REACHABILITY_NEW_WORKFLOWS is always returned in the response and is considered the default if reachability + // is unspecified here. + // Use TASK_REACHABILITY_OPEN_WORKFLOWS if your application needs to respond to queries on closed workflows. + // Otherwise, use TASK_REACHABILITY_EXISTING_WORKFLOWS. + // See the TaskReachability docstring for information about each enum variant. + temporal.api.enums.v1.TaskReachability reachability = 4; } message GetWorkerTaskReachabilityResponse { - // Task reachability, broken down by task queue. - // This response lists all task queues mapped to the requested build id when the requested query scope is for an + // Task reachability, broken down by build id and then task queue. + // This response lists all task queues mapped to the requested build ids when the requested query scope is for an // entire namespace but the number of task queues that include reachability information is limited. // When reaching the limit, task queues that reachability information could not be retrieved for will be marked with a single // TASK_REACHABILITY_UNSPECIFIED entry. The caller may issue separate calls to get the reachability for those task // queues. // Open source users can adjust this limit by setting the server's dynamic config value for - // `limit.reachabilityTaskQueueScan` with the caveat that this call can strain the visibility store. - repeated temporal.api.taskqueue.v1.TaskQueueReachability task_queue_reachability = 1; + // `limit.reachabilityMaxTaskQueueScan` with the caveat that this call can strain the visibility store. + repeated temporal.api.taskqueue.v1.BuildIdReachability build_id_reachability = 1; } // (-- api-linter: core::0134=disabled diff --git a/temporal/api/workflowservice/v1/service.proto b/temporal/api/workflowservice/v1/service.proto index c5bf969d..061438bc 100644 --- a/temporal/api/workflowservice/v1/service.proto +++ b/temporal/api/workflowservice/v1/service.proto @@ -385,6 +385,14 @@ service WorkflowService { // version, forming sets of ids which are incompatible with each other, but whose contained // members are compatible with one another. // + // A single build id may be mapped to multiple task queues using this API for cases where a single process hosts + // multiple workers. + // + // To query which workers can be retired, use the `GetWorkerTaskReachability` API. + // + // NOTE: The number of task queues mapped to a single build id is limited by the `limit.taskQueuesPerBuildId` + // (default is 20), if this limit is exceeded this API will error with a FailedPrecondition. + // // (-- api-linter: core::0134::response-message-name=disabled // aip.dev/not-precedent: UpdateWorkerBuildIdCompatibility RPC doesn't follow Google API format. --) // (-- api-linter: core::0134::method-signature=disabled From 1ff1a1504cd52a0e2abbd5ed8742428af91675d8 Mon Sep 17 00:00:00 2001 From: Roey Berman Date: Thu, 18 May 2023 18:19:54 -0700 Subject: [PATCH 10/17] Remove "Max" from dynamic config comments --- temporal/api/workflowservice/v1/request_response.proto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index 6f47c516..2ab80111 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -1129,7 +1129,7 @@ message GetWorkerTaskReachabilityRequest { // Build ids to retrieve reachability for. An empty string will be interpreted as an unversioned worker. // The number of build ids that can be queried in a single API call is limited. // Open source users can adjust this limit by setting the server's dynamic config value for - // `limit.reachabilityQueryMaxBuildIds` with the caveat that this call can strain the visibility store. + // `limit.reachabilityQueryBuildIds` with the caveat that this call can strain the visibility store. repeated string build_ids = 2; // Scope of the reachability query (namespace or task queue). // Must specify a task queue if querying for an unversioned worker. @@ -1152,7 +1152,7 @@ message GetWorkerTaskReachabilityResponse { // TASK_REACHABILITY_UNSPECIFIED entry. The caller may issue separate calls to get the reachability for those task // queues. // Open source users can adjust this limit by setting the server's dynamic config value for - // `limit.reachabilityMaxTaskQueueScan` with the caveat that this call can strain the visibility store. + // `limit.reachabilityTaskQueueScan` with the caveat that this call can strain the visibility store. repeated temporal.api.taskqueue.v1.BuildIdReachability build_id_reachability = 1; } From eb85e5fbc5d53d150c7337164325fee78987acfc Mon Sep 17 00:00:00 2001 From: Roey Berman Date: Thu, 18 May 2023 21:09:20 -0700 Subject: [PATCH 11/17] Allow querying reachability of multiple specific task queues (#289) --- temporal/api/taskqueue/v1/message.proto | 11 ---------- .../workflowservice/v1/request_response.proto | 21 ++++++++++++------- temporal/api/workflowservice/v1/service.proto | 14 ++++++++----- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/temporal/api/taskqueue/v1/message.proto b/temporal/api/taskqueue/v1/message.proto index 55f14acd..0539ce92 100644 --- a/temporal/api/taskqueue/v1/message.proto +++ b/temporal/api/taskqueue/v1/message.proto @@ -32,7 +32,6 @@ option ruby_package = "Temporalio::Api::TaskQueue::V1"; option csharp_namespace = "Temporalio.Api.TaskQueue.V1"; import "google/protobuf/duration.proto"; -import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; import "google/protobuf/wrappers.proto"; @@ -111,13 +110,3 @@ message BuildIdReachability { // Reachability per task queue. repeated TaskQueueReachability task_queue_reachability = 2; } - -// Scope of task reachability for a reachability query. -message TaskReachabilityScope { - oneof variant { - // Query task reachability globally in a namespace. - google.protobuf.Empty namespace = 1; - // Query task reachability for a specific task queue. - string task_queue = 2; - } -} diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index 2ab80111..c1ee4e47 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -1131,9 +1131,13 @@ message GetWorkerTaskReachabilityRequest { // Open source users can adjust this limit by setting the server's dynamic config value for // `limit.reachabilityQueryBuildIds` with the caveat that this call can strain the visibility store. repeated string build_ids = 2; - // Scope of the reachability query (namespace or task queue). - // Must specify a task queue if querying for an unversioned worker. - temporal.api.taskqueue.v1.TaskReachabilityScope scope = 3; + + // Task queues to retrieve reachability for. Leave this empty to query for all task queues associated with given + // build ids in the namespace. + // Must specify at least one task queue if querying for an unversioned worker. + // The number of task queues that the server will fetch reachability information for is limited. + // See the `GetWorkerTaskReachabilityResponse` documentation for more information. + repeated string task_queues = 3; // Type of reachability to query for. // TASK_REACHABILITY_NEW_WORKFLOWS is always returned in the response and is considered the default if reachability @@ -1146,11 +1150,12 @@ message GetWorkerTaskReachabilityRequest { message GetWorkerTaskReachabilityResponse { // Task reachability, broken down by build id and then task queue. - // This response lists all task queues mapped to the requested build ids when the requested query scope is for an - // entire namespace but the number of task queues that include reachability information is limited. - // When reaching the limit, task queues that reachability information could not be retrieved for will be marked with a single - // TASK_REACHABILITY_UNSPECIFIED entry. The caller may issue separate calls to get the reachability for those task - // queues. + // When requesting a large number of task queues or all task queues associated with the given build ids in a + // namespace, all task queues will be listed in the response but some of them may not contain reachability + // information due to a server enforced limit. When reaching the limit, task queues that reachability information + // could not be retrieved for will be marked with a single TASK_REACHABILITY_UNSPECIFIED entry. The caller may issue + // another call to get the reachability for those task queues. + // // Open source users can adjust this limit by setting the server's dynamic config value for // `limit.reachabilityTaskQueueScan` with the caveat that this call can strain the visibility store. repeated temporal.api.taskqueue.v1.BuildIdReachability build_id_reachability = 1; diff --git a/temporal/api/workflowservice/v1/service.proto b/temporal/api/workflowservice/v1/service.proto index 061438bc..b26b8f64 100644 --- a/temporal/api/workflowservice/v1/service.proto +++ b/temporal/api/workflowservice/v1/service.proto @@ -402,11 +402,15 @@ service WorkflowService { rpc GetWorkerBuildIdCompatibility (GetWorkerBuildIdCompatibilityRequest) returns (GetWorkerBuildIdCompatibilityResponse) {} // Fetches task reachability to determine whether a worker may be retired. - // The response lists all task queues mapped to the requested build id when the requested query scope is for an - // entire namespace but the number of task queues that include reachability information is limited. - // When reaching the limit, task queues that reachability information could not be retrieved for will be marked with a single - // TASK_REACHABILITY_UNSPECIFIED entry. The caller may issue separate calls to get the reachability for those task - // queues. + // The request may specify task queues to query for or let the server fetch all task queues mapped to the given + // build IDs. + // + // When requesting a large number of task queues or all task queues associated with the given build ids in a + // namespace, all task queues will be listed in the response but some of them may not contain reachability + // information due to a server enforced limit. When reaching the limit, task queues that reachability information + // could not be retrieved for will be marked with a single TASK_REACHABILITY_UNSPECIFIED entry. The caller may issue + // another call to get the reachability for those task queues. + // // Open source users can adjust this limit by setting the server's dynamic config value for // `limit.reachabilityTaskQueueScan` with the caveat that this call can strain the visibility store. rpc GetWorkerTaskReachability (GetWorkerTaskReachabilityRequest) returns (GetWorkerTaskReachabilityResponse) {} From c3d063ba1ba16475291db863a534a52b0c4d3180 Mon Sep 17 00:00:00 2001 From: Roey Berman Date: Tue, 23 May 2023 14:34:18 -0700 Subject: [PATCH 12/17] Change the default reachability query and fix the docstring (#290) --- temporal/api/workflowservice/v1/request_response.proto | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index c1ee4e47..b086b0fd 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -1140,10 +1140,10 @@ message GetWorkerTaskReachabilityRequest { repeated string task_queues = 3; // Type of reachability to query for. - // TASK_REACHABILITY_NEW_WORKFLOWS is always returned in the response and is considered the default if reachability - // is unspecified here. - // Use TASK_REACHABILITY_OPEN_WORKFLOWS if your application needs to respond to queries on closed workflows. - // Otherwise, use TASK_REACHABILITY_EXISTING_WORKFLOWS. + // `TASK_REACHABILITY_NEW_WORKFLOWS` is always returned in the response. + // Use `TASK_REACHABILITY_EXISTING_WORKFLOWS` if your application needs to respond to queries on closed workflows. + // Otherwise, use `TASK_REACHABILITY_OPEN_WORKFLOWS`. Default is `TASK_REACHABILITY_EXISTING_WORKFLOWS` if left + // unspecified. // See the TaskReachability docstring for information about each enum variant. temporal.api.enums.v1.TaskReachability reachability = 4; } From 572a6c5cc5b865ab1a2f894b74835b5213540d7d Mon Sep 17 00:00:00 2001 From: Spencer Judge Date: Tue, 23 May 2023 16:30:42 -0700 Subject: [PATCH 13/17] Final versioning API changes (#285) * Add versioning stamp to activity task completions * Add to failed / cancelled events * Change use latest flag to use compat * Add use_versioning flag to capabilities message & comment wording updates --- temporal/api/command/v1/message.proto | 29 +++++------- temporal/api/common/v1/message.proto | 15 +++++-- temporal/api/history/v1/message.proto | 45 +++++++++++-------- .../workflowservice/v1/request_response.proto | 39 ++++++++++------ 4 files changed, 74 insertions(+), 54 deletions(-) diff --git a/temporal/api/command/v1/message.proto b/temporal/api/command/v1/message.proto index 34fe8fe1..8a2b2b6a 100644 --- a/temporal/api/command/v1/message.proto +++ b/temporal/api/command/v1/message.proto @@ -83,12 +83,10 @@ message ScheduleActivityTaskCommandAttributes { // Request to start the activity directly bypassing matching service and worker polling // The slot for executing the activity should be reserved when setting this field to true. bool request_eager_execution = 12; - // If this is set and the workflow executing this command is on a task queue using build-id - // versioning, then the scheduled activity will not use the same compatible version set (which - // is the default behavior) and instead will use the current overall default for the queue. - // If this command's `task_queue` field differs from the executing workflow's task queue, then - // this flag has no effect. - bool use_latest_build_id = 13; + // If this is set, the workflow executing this command wishes to start the activity using + // a version compatible with the version that this workflow most recently ran on, if such + // behavior is possible. + bool use_compatible_version = 13; } message RequestCancelActivityTaskCommandAttributes { @@ -197,12 +195,9 @@ message ContinueAsNewWorkflowExecutionCommandAttributes { temporal.api.common.v1.Header header = 12; temporal.api.common.v1.Memo memo = 13; temporal.api.common.v1.SearchAttributes search_attributes = 14; - // If this is set and the workflow executing this command is on a task queue using build-id - // versioning, then the continued workflow will not use the same compatible version set (which - // is the default behavior) and instead will use the current overall default for the queue. - // If this command's `task_queue` field differs from the executing workflow's task queue, then - // this flag has no effect. - bool use_latest_build_id = 15; + // If this is set, the workflow executing this command wishes to continue as new using a version + // compatible with the version that this workflow most recently ran on. + bool use_compatible_version = 15; // `workflow_execution_timeout` is omitted as it shouldn't be overridden from within a workflow. } @@ -230,12 +225,10 @@ message StartChildWorkflowExecutionCommandAttributes { temporal.api.common.v1.Header header = 14; temporal.api.common.v1.Memo memo = 15; temporal.api.common.v1.SearchAttributes search_attributes = 16; - // If this is set and the workflow executing this command is on a task queue using build-id - // versioning, then the child workflow will not use the same compatible version set (which - // is the default behavior) and instead will use the current overall default for the queue. - // If this command's `task_queue` field differs from the executing workflow's task queue, then - // this flag has no effect. - bool use_latest_build_id = 17; + // If this is set, the workflow executing this command wishes to start the child workflow using + // a version compatible with the version that this workflow most recently ran on, if such + // behavior is possible. + bool use_compatible_version = 17; } message ProtocolMessageCommandAttributes { diff --git a/temporal/api/common/v1/message.proto b/temporal/api/common/v1/message.proto index e7766ff2..a66e4d6a 100644 --- a/temporal/api/common/v1/message.proto +++ b/temporal/api/common/v1/message.proto @@ -124,21 +124,28 @@ message MeteringMetadata { // Identifies the version(s) of a worker that processed a task message WorkerVersionStamp { - // An opaque whole-worker identifier + // An opaque whole-worker identifier. Replaces the deprecated `binary_checksum` field when this + // message is included in requests which previously used that. string build_id = 1; // Set if the worker used a dynamically loadable bundle to process // the task. The bundle could be a WASM blob, JS bundle, etc. string bundle_id = 2; - // If set, the worker is opting in to worker versioning. Otherwise, this is used as a marker for workflow reset - // points and the BuildIDs search attribute. + // If set, the worker is opting in to worker versioning. Otherwise, this is used only as a + // marker for workflow reset points and the BuildIDs search attribute. bool use_versioning = 3; } -// Identifies the version(s) that a worker is compatible with when polling or identifying itself +// Identifies the version(s) that a worker is compatible with when polling or identifying itself, +// and whether or not this worker is opting into the build-id based versioning feature. This is +// used by matching to determine which workers ought to receive what tasks. message WorkerVersionCapabilities { // An opaque whole-worker identifier string build_id = 1; + // If set, the worker is opting in to worker versioning, and wishes to only receive appropriate + // tasks. + bool use_versioning = 2; + // Later, may include info like "I can process WASM and/or JS bundles" } diff --git a/temporal/api/history/v1/message.proto b/temporal/api/history/v1/message.proto index 0bd7974a..d515add7 100644 --- a/temporal/api/history/v1/message.proto +++ b/temporal/api/history/v1/message.proto @@ -160,12 +160,9 @@ message WorkflowExecutionContinuedAsNewEventAttributes { temporal.api.common.v1.Header header = 12; temporal.api.common.v1.Memo memo = 13; temporal.api.common.v1.SearchAttributes search_attributes = 14; - // If this is set and the workflow executing this command is on a task queue using build-id - // versioning, then the scheduled activity will not use the same compatible version set (which - // is the default behavior) and instead will use the current overall default for the queue. - // If this command's `task_queue` field differs from the executing workflow's task queue, then - // this flag has no effect. - bool use_latest_build_id = 15; + // If this is set, the workflow executing this command wishes to continue as new using a version + // compatible with the version that this workflow most recently ran on. + bool use_compatible_version = 15; // workflow_execution_timeout is omitted as it shouldn't be overridden from within a workflow. } @@ -243,8 +240,13 @@ message WorkflowTaskFailedEventAttributes { string new_run_id = 7; // TODO: ? int64 fork_event_version = 8; - // If a worker explicitly failed this task, it's binary id + // DEPRECATED since 1.21 - use `worker_version` instead. + // If a worker explicitly failed this task, its binary id string binary_checksum = 9; + // Version info of the worker who processed this workflow task, or missing if worker is not + // using versioning. If present, the `build_id` field within is also used as `binary_checksum`, + // which may be omitted in that case (it may also be populated to preserve compatibility). + temporal.api.common.v1.WorkerVersionStamp worker_version = 10; } message ActivityTaskScheduledEventAttributes { @@ -285,12 +287,10 @@ message ActivityTaskScheduledEventAttributes { // configuration. Retries will happen up to `schedule_to_close_timeout`. To disable retries set // retry_policy.maximum_attempts to 1. temporal.api.common.v1.RetryPolicy retry_policy = 12; - // If this is set and the workflow executing this command is on a task queue using build-id - // versioning, then the scheduled activity will not use the same compatible version set (which - // is the default behavior) and instead will use the current overall default for the queue. - // If this command's `task_queue` field differs from the executing workflow's task queue, then - // this flag has no effect. - bool use_latest_build_id = 13; + // If this is set, the workflow executing this command wishes to start the activity using + // a version compatible with the version that this workflow most recently ran on, if such + // behavior is possible. + bool use_compatible_version = 13; } message ActivityTaskStartedEventAttributes { @@ -316,6 +316,9 @@ message ActivityTaskCompletedEventAttributes { int64 started_event_id = 3; // id of the worker that completed this task string identity = 4; + // Version info of the worker who processed this activity task, or missing if worker is not + // using versioning. + temporal.api.common.v1.WorkerVersionStamp worker_version = 5; } message ActivityTaskFailedEventAttributes { @@ -328,6 +331,9 @@ message ActivityTaskFailedEventAttributes { // id of the worker that failed this task string identity = 4; temporal.api.enums.v1.RetryState retry_state = 5; + // Version info of the worker who processed this activity task, or missing if worker is not + // using versioning. + temporal.api.common.v1.WorkerVersionStamp worker_version = 6; } message ActivityTaskTimedOutEventAttributes { @@ -360,6 +366,9 @@ message ActivityTaskCanceledEventAttributes { int64 started_event_id = 4; // id of the worker who canceled this activity string identity = 5; + // Version info of the worker who processed this activity task, or missing if worker is not + // using versioning. + temporal.api.common.v1.WorkerVersionStamp worker_version = 6; } message TimerStartedEventAttributes { @@ -577,12 +586,10 @@ message StartChildWorkflowExecutionInitiatedEventAttributes { temporal.api.common.v1.Header header = 15; temporal.api.common.v1.Memo memo = 16; temporal.api.common.v1.SearchAttributes search_attributes = 17; - // If this is set and the workflow executing this command is on a task queue using build-id - // versioning, then the child workflow will not use the same compatible version set (which - // is the default behavior) and instead will use the current overall default for the queue. - // If this command's `task_queue` field differs from the executing workflow's task queue, then - // this flag has no effect. - bool use_latest_build_id = 19; + // If this is set, the workflow executing this command wishes to start the child workflow using + // a version compatible with the version that this workflow most recently ran on, if such + // behavior is possible. + bool use_compatible_version = 19; } message StartChildWorkflowExecutionFailedEventAttributes { diff --git a/temporal/api/workflowservice/v1/request_response.proto b/temporal/api/workflowservice/v1/request_response.proto index b086b0fd..f148edb3 100644 --- a/temporal/api/workflowservice/v1/request_response.proto +++ b/temporal/api/workflowservice/v1/request_response.proto @@ -238,14 +238,12 @@ message PollWorkflowTaskQueueRequest { temporal.api.taskqueue.v1.TaskQueue task_queue = 2; // The identity of the worker/client who is polling this task queue string identity = 3; + // DEPRECATED since 1.21 - use `worker_version_capabilities` instead. // Each worker process should provide an ID unique to the specific set of code it is running // "checksum" in this field name isn't very accurate, it should be though of as an id. string binary_checksum = 4; - // If set, the worker is opting in to versioning and wishes to only - // receive tasks that are considered compatible with the version capabilities provided. - // Doing so only makes sense in conjunction with the `UpdateWorkerBuildIdCompatibility` API. - // When this field has a `worker_build_id`, and `binary_checksum` is not - // set, that value should also be considered as the `binary_checksum`. + // Information about this worker's build identifier and if it is choosing to use the versioning + // feature. See the `WorkerVersionCapabilities` docstring for more. temporal.api.common.v1.WorkerVersionCapabilities worker_version_capabilities = 5; } @@ -309,16 +307,15 @@ message RespondWorkflowTaskCompletedRequest { // something useful, but cannot complete it within the workflow task timeout. Local activities // which run for longer than the task timeout being the prime example. bool force_create_new_workflow_task = 6; + // DEPRECATED since 1.21 - use `worker_version_stamp` instead. // Worker process' unique binary id string binary_checksum = 7; // Responses to the `queries` field in the task being responded to map query_results = 8; string namespace = 9; - // If using versioning, the worker uses this field to indicate what version(s) it used to - // process the task. When this field has a `worker_build_id`, and `binary_checksum` is not set, - // that value should also be considered as the `binary_checksum`. Leaving this field empty when - // replying to a task has had this field previously populated in history in an error, and such - // a completion will be rejected. + // Version info of the worker who processed this task. This message's `build_id` field should + // always be set by SDKs. Workers opting into versioning will also set the `use_versioning` + // field to true. See message docstrings for more. temporal.api.common.v1.WorkerVersionStamp worker_version_stamp = 10; // Protocol messages piggybacking on a WFT as a transport repeated temporal.api.protocol.v1.Message messages = 11; @@ -348,11 +345,16 @@ message RespondWorkflowTaskFailedRequest { temporal.api.failure.v1.Failure failure = 3; // The identity of the worker/client string identity = 4; + // DEPRECATED since 1.21 - use `worker_version_stamp` instead. // Worker process' unique binary id string binary_checksum = 5; string namespace = 6; // Protocol messages piggybacking on a WFT as a transport repeated temporal.api.protocol.v1.Message messages = 7; + // Version info of the worker who processed this task. This message's `build_id` field should + // always be set by SDKs. Workers opting into versioning will also set the `use_versioning` + // field to true. See message docstrings for more. + temporal.api.common.v1.WorkerVersionStamp worker_version = 8; } message RespondWorkflowTaskFailedResponse { @@ -364,9 +366,8 @@ message PollActivityTaskQueueRequest { // The identity of the worker/client string identity = 3; temporal.api.taskqueue.v1.TaskQueueMetadata task_queue_metadata = 4; - // If set, the worker is opting in to versioning and wishes to only - // receive tasks that are considered compatible with the capabilities provided. - // Doing so only makes sense in conjunction with the `UpdateWorkerBuildIdCompatibility` API. + // Information about this worker's build identifier and if it is choosing to use the versioning + // feature. See the `WorkerVersionCapabilities` docstring for more. temporal.api.common.v1.WorkerVersionCapabilities worker_version_capabilities = 5; } @@ -463,6 +464,10 @@ message RespondActivityTaskCompletedRequest { // The identity of the worker/client string identity = 3; string namespace = 4; + // Version info of the worker who processed this task. This message's `build_id` field should + // always be set by SDKs. Workers opting into versioning will also set the `use_versioning` + // field to true. See message docstrings for more. + temporal.api.common.v1.WorkerVersionStamp worker_version = 5; } message RespondActivityTaskCompletedResponse { @@ -496,6 +501,10 @@ message RespondActivityTaskFailedRequest { string namespace = 4; // Additional details to be stored as last activity heartbeat temporal.api.common.v1.Payloads last_heartbeat_details = 5; + // Version info of the worker who processed this task. This message's `build_id` field should + // always be set by SDKs. Workers opting into versioning will also set the `use_versioning` + // field to true. See message docstrings for more. + temporal.api.common.v1.WorkerVersionStamp worker_version = 6; } message RespondActivityTaskFailedResponse { @@ -535,6 +544,10 @@ message RespondActivityTaskCanceledRequest { // The identity of the worker/client string identity = 3; string namespace = 4; + // Version info of the worker who processed this task. This message's `build_id` field should + // always be set by SDKs. Workers opting into versioning will also set the `use_versioning` + // field to true. See message docstrings for more. + temporal.api.common.v1.WorkerVersionStamp worker_version = 5; } message RespondActivityTaskCanceledResponse { From a9d2380ac706c06ea26ef1662ed5f67eee41eabd Mon Sep 17 00:00:00 2001 From: David Reiss Date: Wed, 24 May 2023 09:05:34 -0700 Subject: [PATCH 14/17] Add normal_name to TaskQueue (#291) Co-authored-by: Chad Retz --------- Co-authored-by: Spencer Judge Co-authored-by: Chad Retz --- temporal/api/taskqueue/v1/message.proto | 3 +++ 1 file changed, 3 insertions(+) diff --git a/temporal/api/taskqueue/v1/message.proto b/temporal/api/taskqueue/v1/message.proto index 0539ce92..7310f221 100644 --- a/temporal/api/taskqueue/v1/message.proto +++ b/temporal/api/taskqueue/v1/message.proto @@ -45,6 +45,9 @@ message TaskQueue { string name = 1; // Default: TASK_QUEUE_KIND_NORMAL. temporal.api.enums.v1.TaskQueueKind kind = 2; + // Iff kind == TASK_QUEUE_KIND_STICKY, then this field contains the name of + // the normal task that the sticky worker is running on. + string normal_name = 3; } // Only applies to activity task queues From 948b780cf0d4c2b6a3c71fc3c75dc0d07259bd96 Mon Sep 17 00:00:00 2001 From: Spencer Judge Date: Thu, 25 May 2023 09:30:52 -0700 Subject: [PATCH 15/17] Disable breaking check --- buf.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/buf.yaml b/buf.yaml index 68d6bb9f..9509f202 100644 --- a/buf.yaml +++ b/buf.yaml @@ -1,5 +1,9 @@ version: v1 breaking: + ignore: + # TODO: Remove after PR 293 + - temporal/api/taskqueue/v1 + - temporal/api/workflowservice/v1 use: - WIRE_JSON lint: From 31bf8d1722833a6d406eb89b8c8f91a981c0ca5a Mon Sep 17 00:00:00 2001 From: Spencer Judge Date: Thu, 25 May 2023 10:00:38 -0700 Subject: [PATCH 16/17] Fix typo --- temporal/api/workflow/v1/message.proto | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/temporal/api/workflow/v1/message.proto b/temporal/api/workflow/v1/message.proto index 8d56880c..bb83f40b 100644 --- a/temporal/api/workflow/v1/message.proto +++ b/temporal/api/workflow/v1/message.proto @@ -109,8 +109,8 @@ message ResetPoints { } message ResetPointInfo { - // A worker binary version identifier, will be deprecated and superceeded by a newer concept of build_id (see - // below). + // A worker binary version identifier, will be deprecated and superseded by a newer concept of + // build_id. string binary_checksum = 1; // The first run ID in the execution chain that was touched by this worker build. string run_id = 2; From 9525aecbdb2a5a535831a00334b7ef72c074cd2f Mon Sep 17 00:00:00 2001 From: Spencer Judge Date: Thu, 25 May 2023 13:26:21 -0700 Subject: [PATCH 17/17] Fix review comments --- temporal/api/errordetails/v1/message.proto | 4 ++-- temporal/api/history/v1/message.proto | 28 +++++++++------------- temporal/api/taskqueue/v1/message.proto | 2 +- 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/temporal/api/errordetails/v1/message.proto b/temporal/api/errordetails/v1/message.proto index ff9ecf07..5c9dec6e 100644 --- a/temporal/api/errordetails/v1/message.proto +++ b/temporal/api/errordetails/v1/message.proto @@ -108,6 +108,6 @@ message WorkflowNotReadyFailure { } message NewerBuildExistsFailure { - // Build ID of the newer compatible build that will receive tasks. - string latest_build_id = 1; + // The current default compatible build ID which will receive tasks + string default_build_id = 1; } diff --git a/temporal/api/history/v1/message.proto b/temporal/api/history/v1/message.proto index d515add7..da238e9b 100644 --- a/temporal/api/history/v1/message.proto +++ b/temporal/api/history/v1/message.proto @@ -102,11 +102,8 @@ message WorkflowExecutionStartedEventAttributes { int64 parent_initiated_event_version = 26; // This field is new in 1.21. string workflow_id = 28; - // When using build-id versioning, we tie the version of a child workflow or - // continued-as-new workflow on the same task queue to the parent/previous workflow by - // default (unless requested with use_latest_build_id). To make that version - // information available in the new workflow, we include it here. If this field is not - // present, workflows on a versioned task queue will be assigned the latest version. + // If this workflow intends to use anything other than the current overall default version for + // the queue, then we include it here. temporal.api.common.v1.WorkerVersionStamp source_version_stamp = 29; } @@ -204,9 +201,9 @@ message WorkflowTaskCompletedEventAttributes { string identity = 3; // Binary ID of the worker who completed this task string binary_checksum = 4; - // Version info of the worker who processed this workflow task, or missing if worker is not - // using versioning. If present, the `build_id` field within is also used as `binary_checksum`, - // which may be omitted in that case (it may also be populated to preserve compatibility). + // Version info of the worker who processed this workflow task. If present, the `build_id` field + // within is also used as `binary_checksum`, which may be omitted in that case (it may also be + // populated to preserve compatibility). temporal.api.common.v1.WorkerVersionStamp worker_version = 5; // Data the SDK wishes to record for itself, but server need not interpret, and does not // directly impact workflow state. @@ -243,9 +240,9 @@ message WorkflowTaskFailedEventAttributes { // DEPRECATED since 1.21 - use `worker_version` instead. // If a worker explicitly failed this task, its binary id string binary_checksum = 9; - // Version info of the worker who processed this workflow task, or missing if worker is not - // using versioning. If present, the `build_id` field within is also used as `binary_checksum`, - // which may be omitted in that case (it may also be populated to preserve compatibility). + // Version info of the worker who processed this workflow task. If present, the `build_id` field + // within is also used as `binary_checksum`, which may be omitted in that case (it may also be + // populated to preserve compatibility). temporal.api.common.v1.WorkerVersionStamp worker_version = 10; } @@ -316,8 +313,7 @@ message ActivityTaskCompletedEventAttributes { int64 started_event_id = 3; // id of the worker that completed this task string identity = 4; - // Version info of the worker who processed this activity task, or missing if worker is not - // using versioning. + // Version info of the worker who processed this workflow task. temporal.api.common.v1.WorkerVersionStamp worker_version = 5; } @@ -331,8 +327,7 @@ message ActivityTaskFailedEventAttributes { // id of the worker that failed this task string identity = 4; temporal.api.enums.v1.RetryState retry_state = 5; - // Version info of the worker who processed this activity task, or missing if worker is not - // using versioning. + // Version info of the worker who processed this workflow task. temporal.api.common.v1.WorkerVersionStamp worker_version = 6; } @@ -366,8 +361,7 @@ message ActivityTaskCanceledEventAttributes { int64 started_event_id = 4; // id of the worker who canceled this activity string identity = 5; - // Version info of the worker who processed this activity task, or missing if worker is not - // using versioning. + // Version info of the worker who processed this workflow task. temporal.api.common.v1.WorkerVersionStamp worker_version = 6; } diff --git a/temporal/api/taskqueue/v1/message.proto b/temporal/api/taskqueue/v1/message.proto index 7310f221..e8f027d1 100644 --- a/temporal/api/taskqueue/v1/message.proto +++ b/temporal/api/taskqueue/v1/message.proto @@ -46,7 +46,7 @@ message TaskQueue { // Default: TASK_QUEUE_KIND_NORMAL. temporal.api.enums.v1.TaskQueueKind kind = 2; // Iff kind == TASK_QUEUE_KIND_STICKY, then this field contains the name of - // the normal task that the sticky worker is running on. + // the normal task queue that the sticky worker is running on. string normal_name = 3; }