diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index 3617d5d51edb..12367812521b 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -235,6 +235,10 @@ "tasks" ], "properties": { + "failFast": { + "description": "This flag is for DAG logic. The DAG logic has a built-in \"fail fast\" feature to stop scheduling new steps, as soon as it detects that one of the DAG nodes is failed. Then it waits until all DAG nodes are completed before failing the DAG itself. The FailFast flag default is true, if set to false, it will allow a DAG to run all branches of the DAG to completion (either success or failure), regardless of the failed outcomes of branches in the DAG. More info and example about this feature at https://github.com/argoproj/argo/issues/1442", + "type": "boolean" + }, "target": { "description": "Target are one or more names of targets to execute in a DAG", "type": "string" diff --git a/pkg/apis/workflow/v1alpha1/openapi_generated.go b/pkg/apis/workflow/v1alpha1/openapi_generated.go index 392818bd5a32..85d9389a626b 100644 --- a/pkg/apis/workflow/v1alpha1/openapi_generated.go +++ b/pkg/apis/workflow/v1alpha1/openapi_generated.go @@ -478,6 +478,13 @@ func schema_pkg_apis_workflow_v1alpha1_DAGTemplate(ref common.ReferenceCallback) }, }, }, + "failFast": { + SchemaProps: spec.SchemaProps{ + Description: "This flag is for DAG logic. The DAG logic has a built-in \"fail fast\" feature to stop scheduling new steps, as soon as it detects that one of the DAG nodes is failed. Then it waits until all DAG nodes are completed before failing the DAG itself. The FailFast flag default is true, if set to false, it will allow a DAG to run all branches of the DAG to completion (either success or failure), regardless of the failed outcomes of branches in the DAG. More info and example about this feature at https://github.com/argoproj/argo/issues/1442", + Type: []string{"boolean"}, + Format: "", + }, + }, }, Required: []string{"tasks"}, }, diff --git a/pkg/apis/workflow/v1alpha1/types.go b/pkg/apis/workflow/v1alpha1/types.go index f5d35003437a..17f0d0760e76 100644 --- a/pkg/apis/workflow/v1alpha1/types.go +++ b/pkg/apis/workflow/v1alpha1/types.go @@ -891,6 +891,14 @@ type DAGTemplate struct { // Tasks are a list of DAG tasks Tasks []DAGTask `json:"tasks"` + + // This flag is for DAG logic. The DAG logic has a built-in "fail fast" feature to stop scheduling new steps, + // as soon as it detects that one of the DAG nodes is failed. Then it waits until all DAG nodes are completed + // before failing the DAG itself. + // The FailFast flag default is true, if set to false, it will allow a DAG to run all branches of the DAG to + // completion (either success or failure), regardless of the failed outcomes of branches in the DAG. + // More info and example about this feature at https://github.com/argoproj/argo/issues/1442 + FailFast *bool `json:"failFast,omitempty"` } // DAGTask represents a node in the graph during DAG execution diff --git a/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go index 48f38e70c121..7977fd1ec5ed 100644 --- a/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/workflow/v1alpha1/zz_generated.deepcopy.go @@ -251,6 +251,11 @@ func (in *DAGTemplate) DeepCopyInto(out *DAGTemplate) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.FailFast != nil { + in, out := &in.FailFast, &out.FailFast + *out = new(bool) + **out = **in + } return } diff --git a/test/e2e/expectedfailures/dag-disable-failFast.yaml b/test/e2e/expectedfailures/dag-disable-failFast.yaml new file mode 100644 index 000000000000..683c990a1939 --- /dev/null +++ b/test/e2e/expectedfailures/dag-disable-failFast.yaml @@ -0,0 +1,49 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: dag-primay-branch- +spec: + entrypoint: statis + templates: + - name: a + container: + image: docker/whalesay:latest + command: [cowsay] + args: ["hello world"] + - name: b + retryStrategy: + limit: 2 + container: + image: alpine:latest + command: [sh, -c] + args: ["sleep 30; echo haha"] + - name: c + retryStrategy: + limit: 3 + container: + image: alpine:latest + command: [sh, -c] + args: ["echo intentional failure; exit 2"] + - name: d + container: + image: docker/whalesay:latest + command: [cowsay] + args: ["hello world"] + - name: statis + dag: + failFast: false + tasks: + - name: A + template: a + - name: B + dependencies: [A] + template: b + - name: C + dependencies: [A] + template: c + - name: D + dependencies: [B] + template: d + - name: E + dependencies: [D] + template: d \ No newline at end of file diff --git a/test/e2e/expectedfailures/dag-noroot-branch-failed.yaml b/test/e2e/expectedfailures/dag-noroot-branch-failed.yaml new file mode 100644 index 000000000000..867d4cc6f8b7 --- /dev/null +++ b/test/e2e/expectedfailures/dag-noroot-branch-failed.yaml @@ -0,0 +1,48 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: dag-primay-branch- +spec: + entrypoint: statis + templates: + - name: a + container: + image: docker/whalesay:latest + command: [cowsay] + args: ["hello world"] + - name: b + retryStrategy: + limit: 2 + container: + image: alpine:latest + command: [sh, -c] + args: ["sleep 30; echo haha"] + - name: c + retryStrategy: + limit: 3 + container: + image: alpine:latest + command: [sh, -c] + args: ["echo intentional failure; exit 2"] + - name: d + container: + image: docker/whalesay:latest + command: [cowsay] + args: ["hello world"] + - name: statis + dag: + tasks: + - name: A + template: a + - name: B + dependencies: [A] + template: b + - name: C + dependencies: [A] + template: c + - name: D + dependencies: [B] + template: d + - name: E + dependencies: [D] + template: d \ No newline at end of file diff --git a/workflow/controller/dag.go b/workflow/controller/dag.go index ec3442bf6b7b..ed9e90a25e65 100644 --- a/workflow/controller/dag.go +++ b/workflow/controller/dag.go @@ -87,7 +87,29 @@ func (d *dagContext) assessDAGPhase(targetTasks []string, nodes map[string]wfv1. retriesExhausted = false } } + if unsuccessfulPhase != "" { + // If failFast set to false, we should return Running to continue this workflow for other DAG branch + if d.tmpl.DAG.FailFast != nil && !*d.tmpl.DAG.FailFast { + tmpOverAllFinished := true + // If all the nodes have finished, we should mark the failed node to finish overall workflow + // So we should check all the targetTasks have finished + for _, tmpDepName := range targetTasks { + tmpDepNode := d.getTaskNode(tmpDepName) + if tmpDepNode == nil { + tmpOverAllFinished = false + break + } + if tmpDepNode.Type == wfv1.NodeTypeRetry && hasMoreRetries(tmpDepNode, d.wf) { + tmpOverAllFinished = false + break + } + } + if !tmpOverAllFinished { + return wfv1.NodeRunning + } + } + // if we were unsuccessful, we can return *only* if all retry nodes have ben exhausted. if retriesExhausted { return unsuccessfulPhase diff --git a/workflow/controller/dag_test.go b/workflow/controller/dag_test.go index c9aa2b8ecc53..7f51de45d741 100644 --- a/workflow/controller/dag_test.go +++ b/workflow/controller/dag_test.go @@ -32,3 +32,11 @@ func TestDagRetryExhaustedXfail(t *testing.T) { woc.operate() assert.Equal(t, string(wfv1.NodeFailed), string(woc.wf.Status.Phase)) } + +// TestDagDisableFailFast test disable fail fast function +func TestDagDisableFailFast(t *testing.T) { + wf := test.LoadTestWorkflow("testdata/dag-disable-fail-fast.yaml") + woc := newWoc(*wf) + woc.operate() + assert.Equal(t, string(wfv1.NodeFailed), string(woc.wf.Status.Phase)) +} diff --git a/workflow/controller/testdata/dag-disable-fail-fast.yaml b/workflow/controller/testdata/dag-disable-fail-fast.yaml new file mode 100644 index 000000000000..45e9772bcb64 --- /dev/null +++ b/workflow/controller/testdata/dag-disable-fail-fast.yaml @@ -0,0 +1,211 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + creationTimestamp: 2019-06-26T09:11:58Z + generateName: dag-disable-fail-fast- + generation: 1 + labels: + workflows.argoproj.io/completed: "true" + workflows.argoproj.io/phase: Failed + name: dag-disable-fail-fast-r6xdc + namespace: default + resourceVersion: "15772210" + selfLink: /apis/argoproj.io/v1alpha1/namespaces/default/workflows/dag-disable-fail-fast-r6xdc + uid: 734516b5-97f2-11e9-9fea-00163e00cf4e +spec: + arguments: {} + entrypoint: statis + templates: + - container: + args: + - hello world + command: + - cowsay + image: docker/whalesay:latest + name: "" + resources: {} + inputs: {} + metadata: {} + name: a + outputs: {} + - container: + args: + - sleep 30; echo haha + command: + - sh + - -c + image: alpine:latest + name: "" + resources: {} + inputs: {} + metadata: {} + name: b + outputs: {} + retryStrategy: + limit: 2 + - container: + args: + - echo intentional failure; exit 2 + command: + - sh + - -c + image: alpine:latest + name: "" + resources: {} + inputs: {} + metadata: {} + name: c + outputs: {} + retryStrategy: + limit: 1 + - container: + args: + - hello world + command: + - cowsay + image: docker/whalesay:latest + name: "" + resources: {} + inputs: {} + metadata: {} + name: d + outputs: {} + - dag: + failFast: false + tasks: + - arguments: {} + name: A + template: a + - arguments: {} + dependencies: + - A + name: B + template: b + - arguments: {} + dependencies: + - A + name: C + template: c + - arguments: {} + dependencies: + - B + name: D + template: d + - arguments: {} + dependencies: + - D + name: E + template: d + inputs: {} + metadata: {} + name: statis + outputs: {} +status: + finishedAt: 2019-06-26T09:12:46Z + nodes: + dag-disable-fail-fast-r6xdc: + children: + - dag-disable-fail-fast-r6xdc-3928436299 + displayName: dag-disable-fail-fast-r6xdc + finishedAt: 2019-06-26T09:12:46Z + id: dag-disable-fail-fast-r6xdc + name: dag-disable-fail-fast-r6xdc + phase: Failed + startedAt: 2019-06-26T09:11:58Z + templateName: statis + type: DAG + dag-disable-fail-fast-r6xdc-3256495944: + boundaryID: dag-disable-fail-fast-r6xdc + displayName: C(0) + finishedAt: 2019-06-26T09:12:08Z + id: dag-disable-fail-fast-r6xdc-3256495944 + message: failed with exit code 2 + name: dag-disable-fail-fast-r6xdc.C(0) + phase: Failed + startedAt: 2019-06-26T09:12:03Z + templateName: c + type: Pod + dag-disable-fail-fast-r6xdc-3457680277: + boundaryID: dag-disable-fail-fast-r6xdc + displayName: C(1) + finishedAt: 2019-06-26T09:12:12Z + id: dag-disable-fail-fast-r6xdc-3457680277 + message: failed with exit code 2 + name: dag-disable-fail-fast-r6xdc.C(1) + phase: Failed + startedAt: 2019-06-26T09:12:09Z + templateName: c + type: Pod + dag-disable-fail-fast-r6xdc-3928436299: + boundaryID: dag-disable-fail-fast-r6xdc + children: + - dag-disable-fail-fast-r6xdc-3945213918 + - dag-disable-fail-fast-r6xdc-3961991537 + displayName: A + finishedAt: 2019-06-26T09:12:02Z + id: dag-disable-fail-fast-r6xdc-3928436299 + name: dag-disable-fail-fast-r6xdc.A + phase: Succeeded + startedAt: 2019-06-26T09:11:58Z + templateName: a + type: Pod + dag-disable-fail-fast-r6xdc-3945213918: + boundaryID: dag-disable-fail-fast-r6xdc + children: + - dag-disable-fail-fast-r6xdc-4286504589 + displayName: B + finishedAt: 2019-06-26T09:12:36Z + id: dag-disable-fail-fast-r6xdc-3945213918 + name: dag-disable-fail-fast-r6xdc.B + phase: Succeeded + startedAt: 2019-06-26T09:12:03Z + type: Retry + dag-disable-fail-fast-r6xdc-3961991537: + boundaryID: dag-disable-fail-fast-r6xdc + children: + - dag-disable-fail-fast-r6xdc-3256495944 + - dag-disable-fail-fast-r6xdc-3457680277 + displayName: C + finishedAt: 2019-06-26T09:12:13Z + id: dag-disable-fail-fast-r6xdc-3961991537 + message: No more retries left + name: dag-disable-fail-fast-r6xdc.C + phase: Failed + startedAt: 2019-06-26T09:12:03Z + type: Retry + dag-disable-fail-fast-r6xdc-3978769156: + boundaryID: dag-disable-fail-fast-r6xdc + children: + - dag-disable-fail-fast-r6xdc-3995546775 + displayName: D + finishedAt: 2019-06-26T09:12:41Z + id: dag-disable-fail-fast-r6xdc-3978769156 + name: dag-disable-fail-fast-r6xdc.D + phase: Succeeded + startedAt: 2019-06-26T09:12:37Z + templateName: d + type: Pod + dag-disable-fail-fast-r6xdc-3995546775: + boundaryID: dag-disable-fail-fast-r6xdc + displayName: E + finishedAt: 2019-06-26T09:12:45Z + id: dag-disable-fail-fast-r6xdc-3995546775 + name: dag-disable-fail-fast-r6xdc.E + phase: Succeeded + startedAt: 2019-06-26T09:12:42Z + templateName: d + type: Pod + dag-disable-fail-fast-r6xdc-4286504589: + boundaryID: dag-disable-fail-fast-r6xdc + children: + - dag-disable-fail-fast-r6xdc-3978769156 + displayName: B(0) + finishedAt: 2019-06-26T09:12:36Z + id: dag-disable-fail-fast-r6xdc-4286504589 + name: dag-disable-fail-fast-r6xdc.B(0) + phase: Succeeded + startedAt: 2019-06-26T09:12:03Z + templateName: b + type: Pod + phase: Failed + startedAt: 2019-06-26T09:11:58Z \ No newline at end of file