@@ -18,6 +18,7 @@ package clusterstate
18
18
19
19
import (
20
20
"fmt"
21
+ "time"
21
22
22
23
"github.com/aws/aws-sdk-go/service/cloudformation"
23
24
"github.com/cortexlabs/cortex/pkg/lib/aws"
@@ -42,8 +43,16 @@ type ClusterState struct {
42
43
Status Status
43
44
}
44
45
45
- func any ( statuses [] string , allowedStatuses ... string ) bool {
46
+ func is ( status string , allowedStatus string , allowedStatuses ... string ) bool {
46
47
statusSet := strset .New (allowedStatuses ... )
48
+ statusSet .Add (allowedStatus )
49
+
50
+ return statusSet .Has (status )
51
+ }
52
+
53
+ func any (statuses []string , allowedStatus string , allowedStatuses ... string ) bool {
54
+ statusSet := strset .New (allowedStatuses ... )
55
+ statusSet .Add (allowedStatus )
47
56
for _ , stackStatus := range statuses {
48
57
if statusSet .Has (stackStatus ) {
49
58
return true
@@ -53,8 +62,9 @@ func any(statuses []string, allowedStatuses ...string) bool {
53
62
return false
54
63
}
55
64
56
- func all (statuses []string , allowedStatuses ... string ) bool {
65
+ func all (statuses []string , allowedStatus string , allowedStatuses ... string ) bool {
57
66
statusSet := strset .New (allowedStatuses ... )
67
+ statusSet .Add (allowedStatus )
58
68
for _ , stackStatus := range statuses {
59
69
if ! statusSet .Has (stackStatus ) {
60
70
return false
@@ -76,9 +86,8 @@ func (cs ClusterState) TableString() string {
76
86
77
87
func getStatus (statusMap map [string ]string , controlPlane string ) (Status , error ) {
78
88
// the order matters
79
-
80
89
allStatuses := []string {}
81
- controlPlaneStatus := [] string { statusMap [controlPlane ]}
90
+ controlPlaneStatus := statusMap [controlPlane ]
82
91
nodeGroupStatuses := []string {}
83
92
84
93
for stackName , status := range statusMap {
@@ -88,6 +97,19 @@ func getStatus(statusMap map[string]string, controlPlane string) (Status, error)
88
97
}
89
98
}
90
99
100
+ if any (allStatuses , string (StatusCreateFailedTimedOut )) {
101
+ return StatusNotFound , ErrorUnexpectedCloudFormationStatus (s .ObjFlat (statusMap ))
102
+ }
103
+
104
+ if len (nodeGroupStatuses ) == 0 && controlPlaneStatus == string (StatusNotFound ) {
105
+ return StatusNotFound , nil
106
+ }
107
+
108
+ // controlplane stack may be created while nodegroup stacks aren't listed in cloudformation stacks during cluster spin up
109
+ if len (nodeGroupStatuses ) == 0 && is (controlPlaneStatus , cloudformation .StackStatusCreateComplete , cloudformation .StackStatusCreateInProgress ) {
110
+ return StatusCreateInProgress , nil
111
+ }
112
+
91
113
if any (allStatuses , cloudformation .StackStatusCreateFailed ) {
92
114
return StatusCreateFailed , nil
93
115
}
@@ -96,8 +118,8 @@ func getStatus(statusMap map[string]string, controlPlane string) (Status, error)
96
118
return StatusDeleteFailed , nil
97
119
}
98
120
99
- if all (allStatuses , string ( StatusNotFound ) ) {
100
- return StatusCreateComplete , nil
121
+ if any (allStatuses , cloudformation . StackStatusDeleteInProgress ) {
122
+ return StatusDeleteInProgress , nil
101
123
}
102
124
103
125
if all (allStatuses , cloudformation .StackStatusCreateComplete ) {
@@ -108,45 +130,54 @@ func getStatus(statusMap map[string]string, controlPlane string) (Status, error)
108
130
return StatusDeleteComplete , nil
109
131
}
110
132
111
- if any (allStatuses , cloudformation .StackStatusDeleteInProgress ) {
133
+ // nodegroup stacks are deleted first while control plane stack is still in create complete state
134
+ if controlPlaneStatus == cloudformation .StackStatusCreateComplete &&
135
+ all (nodeGroupStatuses , cloudformation .StackStatusDeleteInProgress , cloudformation .StackStatusDeleteComplete ) {
112
136
return StatusDeleteInProgress , nil
113
137
}
114
138
115
- // controlplane stack may be in complete state while nodegroup stacks are still in status not found
116
- if all ( controlPlaneStatus , cloudformation . StackStatusCreateComplete , cloudformation .StackStatusCreateInProgress ) &&
117
- all (nodeGroupStatuses , cloudformation .StackStatusCreateInProgress , string ( StatusNotFound ), cloudformation .StackStatusCreateComplete ) {
139
+ // controlplane stack may be in complete state while nodegroup stacks are still in creating or one nodegroup finishes before the other
140
+ if controlPlaneStatus == cloudformation .StackStatusCreateComplete &&
141
+ all (nodeGroupStatuses , cloudformation .StackStatusCreateInProgress , cloudformation .StackStatusCreateComplete ) {
118
142
return StatusCreateInProgress , nil
119
143
}
120
144
121
145
return StatusNotFound , ErrorUnexpectedCloudFormationStatus (s .ObjFlat (statusMap ))
122
146
}
123
147
124
- func GetClusterState (awsClient * aws.Client , clusterConfig * clusterconfig.Config ) (* ClusterState , error ) {
125
- controlPlaneStackName := fmt .Sprintf (controlPlaneTemplate , clusterConfig .ClusterName )
126
- operatorStackName := fmt .Sprintf (operatorTemplate , clusterConfig .ClusterName )
127
- spotStackName := fmt .Sprintf (spotTemplate , clusterConfig .ClusterName )
128
- onDemandStackName := fmt .Sprintf (onDemandTemplate , clusterConfig .ClusterName )
129
-
130
- nodeGroupStackNames := []string {operatorStackName }
131
- if clusterConfig .Spot != nil && * clusterConfig .Spot {
132
- nodeGroupStackNames = append (nodeGroupStackNames , spotStackName )
133
- if clusterConfig .SpotConfig != nil && clusterConfig .SpotConfig .OnDemandBackup != nil && * clusterConfig .SpotConfig .OnDemandBackup {
134
- nodeGroupStackNames = append (nodeGroupStackNames , onDemandStackName )
135
- }
136
- } else {
137
- nodeGroupStackNames = append (nodeGroupStackNames , onDemandStackName )
138
- }
148
+ func GetClusterState (awsClient * aws.Client , accessConfig * clusterconfig.AccessConfig ) (* ClusterState , error ) {
149
+ controlPlaneStackName := fmt .Sprintf (controlPlaneTemplate , * accessConfig .ClusterName )
150
+ operatorStackName := fmt .Sprintf (operatorTemplate , * accessConfig .ClusterName )
151
+ spotStackName := fmt .Sprintf (spotTemplate , * accessConfig .ClusterName )
152
+ onDemandStackName := fmt .Sprintf (onDemandTemplate , * accessConfig .ClusterName )
139
153
140
- stackSummaries , err := awsClient .ListEKSStacks (controlPlaneStackName , nodeGroupStackNames ... )
154
+ nodeGroupStackNamesSet := strset .New (operatorStackName , spotStackName , onDemandStackName )
155
+
156
+ stackSummaries , err := awsClient .ListEKSStacks (controlPlaneStackName , nodeGroupStackNamesSet )
141
157
if err != nil {
142
158
return nil , errors .Wrap (err , "unable to get cluster state from cloudformation" )
143
159
}
144
160
145
161
statusMap := map [string ]string {}
146
- statusMap [controlPlaneStackName ] = getStatusFromSummaries (stackSummaries , controlPlaneStackName )
162
+ nodeGroupStackNames := []string {}
163
+ var controlPlaneCreationTime time.Time
164
+
165
+ for _ , stackSummary := range stackSummaries {
166
+ statusMap [* stackSummary .StackName ] = * stackSummary .StackStatus
167
+ if * stackSummary .StackName == controlPlaneStackName {
168
+ controlPlaneCreationTime = * stackSummary .CreationTime
169
+ } else {
170
+ nodeGroupStackNames = append (nodeGroupStackNames , * stackSummary .StackName )
171
+ }
172
+ }
147
173
148
- for _ , nodeGroupName := range nodeGroupStackNames {
149
- statusMap [nodeGroupName ] = getStatusFromSummaries (stackSummaries , nodeGroupName )
174
+ if _ , ok := statusMap [controlPlaneStackName ]; ! ok {
175
+ statusMap [controlPlaneStackName ] = string (StatusNotFound )
176
+ }
177
+
178
+ // add a timeout for situations where the control plane is listed in the cloudformation stacks but not the nodegroup stacks
179
+ if ! is (statusMap [controlPlaneStackName ], string (StatusNotFound ), cloudformation .StackStatusDeleteComplete ) && len (nodeGroupStackNames ) == 0 && time .Now ().After (controlPlaneCreationTime .Add (30 * time .Minute )) {
180
+ statusMap [operatorStackName ] = string (StatusCreateFailedTimedOut )
150
181
}
151
182
152
183
status , err := getStatus (statusMap , controlPlaneStackName )
@@ -161,13 +192,3 @@ func GetClusterState(awsClient *aws.Client, clusterConfig *clusterconfig.Config)
161
192
Status : status ,
162
193
}, nil
163
194
}
164
-
165
- func getStatusFromSummaries (stackSummaries []* cloudformation.StackSummary , stackName string ) string {
166
- for _ , stackSummary := range stackSummaries {
167
- if * stackSummary .StackName == stackName {
168
- return * stackSummary .StackStatus
169
- }
170
- }
171
-
172
- return string (StatusNotFound )
173
- }
0 commit comments