Skip to content

Commit 9ea4636

Browse files
authored
Add Requeue logic for Job Style resources till completion (#10)
* Add Requeue logic for Job Style resources till completion * Add custom describe such that requeue stops on job completion correctly. Also Debugger check * address review comments
1 parent 22d2882 commit 9ea4636

File tree

13 files changed

+514
-1
lines changed

13 files changed

+514
-1
lines changed

apis/v1alpha1/endpoint.go

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apis/v1alpha1/training_job.go

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apis/v1alpha1/zz_generated.deepcopy.go

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/sagemaker.services.k8s.aws_trainingjobs.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ spec:
1717
scope: Namespaced
1818
versions:
1919
- additionalPrinterColumns:
20+
- jsonPath: .status.failureReason
21+
name: FailureReason
22+
type: string
2023
- jsonPath: .status.secondaryStatus
2124
name: SecondaryStatus
2225
type: string
@@ -430,6 +433,24 @@ spec:
430433
- type
431434
type: object
432435
type: array
436+
debugRuleEvaluationStatuses:
437+
description: Evaluation status of Debugger rules for debugging on
438+
a training job.
439+
items:
440+
properties:
441+
lastModifiedTime:
442+
format: date-time
443+
type: string
444+
ruleConfigurationName:
445+
type: string
446+
ruleEvaluationJobARN:
447+
type: string
448+
ruleEvaluationStatus:
449+
type: string
450+
statusDetails:
451+
type: string
452+
type: object
453+
type: array
433454
failureReason:
434455
description: If the training job failed, the reason it failed.
435456
type: string

generator.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,22 @@
11
operations:
2+
CreateTrainingJob:
3+
set_output_custom_method_name: customCreateTrainingJobSetOutput
4+
DescribeTrainingJob:
5+
set_output_custom_method_name: customDescribeTrainingJobSetOutput
26
StopTrainingJob:
37
operation_type: Delete
48
resource_name: TrainingJob
9+
CreateProcessingJob:
10+
set_output_custom_method_name: customCreateProcessingJobSetOutput
11+
DescribeProcessingJob:
12+
set_output_custom_method_name: customDescribeProcessingJobSetOutput
513
StopProcessingJob:
614
operation_type: Delete
715
resource_name: ProcessingJob
16+
CreateTransformJob:
17+
set_output_custom_method_name: customCreateTransformJobSetOutput
18+
DescribeTransformJob:
19+
set_output_custom_method_name: customDescribeTransformJobSetOutput
820
StopTransformJob:
921
operation_type: Delete
1022
resource_name: TransformJob
@@ -19,6 +31,10 @@ operations:
1931
RetainAllVariantProperties: true
2032
DeleteEndpoint:
2133
custom_implementation: customDeleteEndpoint
34+
CreateHyperParameterTuningJob:
35+
set_output_custom_method_name: customCreateHyperParameterTuningJobSetOutput
36+
DescribeHyperParameterTuningJob:
37+
set_output_custom_method_name: customDescribeHyperParameterTuningJobSetOutput
2238
StopHyperParameterTuningJob:
2339
operation_type: Delete
2440
resource_name: HyperParameterTuningJob
@@ -155,8 +171,14 @@ resources:
155171
from:
156172
operation: DescribeTrainingJob
157173
path: SecondaryStatus
174+
DebugRuleEvaluationStatuses:
175+
is_read_only: true
176+
from:
177+
operation: DescribeTrainingJob
178+
path: DebugRuleEvaluationStatuses
158179
FailureReason:
159180
is_read_only: true
181+
is_printable: true
160182
from:
161183
operation: DescribeTrainingJob
162184
path: FailureReason
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"). You may
4+
// not use this file except in compliance with the License. A copy of the
5+
// License is located at
6+
//
7+
// http://aws.amazon.com/apache2.0/
8+
//
9+
// or in the "license" file accompanying this file. This file is distributed
10+
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
11+
// express or implied. See the License for the specific language governing
12+
// permissions and limitations under the License.
13+
14+
// Use this file if the Status/Spec of the CR needs to be modified after
15+
// create/describe/update operation
16+
17+
package hyper_parameter_tuning_job
18+
19+
import (
20+
"context"
21+
22+
ackv1alpha1 "github.com/aws-controllers-k8s/runtime/apis/core/v1alpha1"
23+
svcapitypes "github.com/aws-controllers-k8s/sagemaker-controller/apis/v1alpha1"
24+
"github.com/aws/aws-sdk-go/aws"
25+
svcsdk "github.com/aws/aws-sdk-go/service/sagemaker"
26+
corev1 "k8s.io/api/core/v1"
27+
)
28+
29+
// customCreateHyperParameterTuningJobSetOutput sets the resource in TempOutofSync if HyperParameterTuningJob is
30+
// in creating state. At this stage we know call to createHyperParameterTuningJob was successful.
31+
func (rm *resourceManager) customCreateHyperParameterTuningJobSetOutput(
32+
ctx context.Context,
33+
r *resource,
34+
resp *svcsdk.CreateHyperParameterTuningJobOutput,
35+
ko *svcapitypes.HyperParameterTuningJob,
36+
) (*svcapitypes.HyperParameterTuningJob, error) {
37+
rm.customSetOutput(r, aws.String(svcsdk.HyperParameterTuningJobStatusInProgress), ko)
38+
return ko, nil
39+
}
40+
41+
// customDescribeHyperParameterTuningJobSetOutput sets the resource in TempOutofSync if
42+
// HyperParameterTuningJob is being modified by AWS.
43+
func (rm *resourceManager) customDescribeHyperParameterTuningJobSetOutput(
44+
ctx context.Context,
45+
r *resource,
46+
resp *svcsdk.DescribeHyperParameterTuningJobOutput,
47+
ko *svcapitypes.HyperParameterTuningJob,
48+
) (*svcapitypes.HyperParameterTuningJob, error) {
49+
rm.customSetOutput(r, resp.HyperParameterTuningJobStatus, ko)
50+
return ko, nil
51+
}
52+
53+
// customSetOutput sets ConditionTypeResourceSynced condition to True or False
54+
// based on the hyperParameterTuningJobStatus on AWS so the reconciler can determine if a
55+
// requeue is needed
56+
func (rm *resourceManager) customSetOutput(
57+
r *resource,
58+
hyperParameterTuningJobStatus *string,
59+
ko *svcapitypes.HyperParameterTuningJob,
60+
) {
61+
if hyperParameterTuningJobStatus == nil {
62+
return
63+
}
64+
65+
syncConditionStatus := corev1.ConditionUnknown
66+
if *hyperParameterTuningJobStatus == svcsdk.HyperParameterTuningJobStatusCompleted || *hyperParameterTuningJobStatus == svcsdk.HyperParameterTuningJobStatusStopped || *hyperParameterTuningJobStatus == svcsdk.HyperParameterTuningJobStatusFailed {
67+
syncConditionStatus = corev1.ConditionTrue
68+
} else {
69+
syncConditionStatus = corev1.ConditionFalse
70+
}
71+
72+
var resourceSyncedCondition *ackv1alpha1.Condition = nil
73+
if ko.Status.Conditions == nil {
74+
ko.Status.Conditions = []*ackv1alpha1.Condition{}
75+
} else {
76+
for _, condition := range ko.Status.Conditions {
77+
if condition.Type == ackv1alpha1.ConditionTypeResourceSynced {
78+
resourceSyncedCondition = condition
79+
break
80+
}
81+
}
82+
}
83+
84+
if resourceSyncedCondition == nil {
85+
resourceSyncedCondition = &ackv1alpha1.Condition{
86+
Type: ackv1alpha1.ConditionTypeResourceSynced,
87+
}
88+
ko.Status.Conditions = append(ko.Status.Conditions, resourceSyncedCondition)
89+
}
90+
resourceSyncedCondition.Status = syncConditionStatus
91+
92+
}

pkg/resource/hyper_parameter_tuning_job/sdk.go

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License"). You may
4+
// not use this file except in compliance with the License. A copy of the
5+
// License is located at
6+
//
7+
// http://aws.amazon.com/apache2.0/
8+
//
9+
// or in the "license" file accompanying this file. This file is distributed
10+
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
11+
// express or implied. See the License for the specific language governing
12+
// permissions and limitations under the License.
13+
14+
// Use this file if the Status/Spec of the CR needs to be modified after
15+
// create/describe/update operation
16+
17+
package processing_job
18+
19+
import (
20+
"context"
21+
22+
ackv1alpha1 "github.com/aws-controllers-k8s/runtime/apis/core/v1alpha1"
23+
svcapitypes "github.com/aws-controllers-k8s/sagemaker-controller/apis/v1alpha1"
24+
"github.com/aws/aws-sdk-go/aws"
25+
svcsdk "github.com/aws/aws-sdk-go/service/sagemaker"
26+
corev1 "k8s.io/api/core/v1"
27+
)
28+
29+
// customCreateProcessingJobSetOutput sets the resource in TempOutofSync if ProcessingJob is
30+
// in creating state. At this stage we know call to createProcessingJob was successful.
31+
func (rm *resourceManager) customCreateProcessingJobSetOutput(
32+
ctx context.Context,
33+
r *resource,
34+
resp *svcsdk.CreateProcessingJobOutput,
35+
ko *svcapitypes.ProcessingJob,
36+
) (*svcapitypes.ProcessingJob, error) {
37+
rm.customSetOutput(r, aws.String(svcsdk.ProcessingJobStatusInProgress), ko)
38+
return ko, nil
39+
}
40+
41+
// customDescribeProcessingJobSetOutput sets the resource in TempOutofSync if
42+
// ProcessingJob is being modified by AWS.
43+
func (rm *resourceManager) customDescribeProcessingJobSetOutput(
44+
ctx context.Context,
45+
r *resource,
46+
resp *svcsdk.DescribeProcessingJobOutput,
47+
ko *svcapitypes.ProcessingJob,
48+
) (*svcapitypes.ProcessingJob, error) {
49+
rm.customSetOutput(r, resp.ProcessingJobStatus, ko)
50+
return ko, nil
51+
}
52+
53+
// customSetOutput sets ConditionTypeResourceSynced condition to True or False
54+
// based on the processingJobStatus on AWS so the reconciler can determine if a
55+
// requeue is needed
56+
func (rm *resourceManager) customSetOutput(
57+
r *resource,
58+
processingJobStatus *string,
59+
ko *svcapitypes.ProcessingJob,
60+
) {
61+
if processingJobStatus == nil {
62+
return
63+
}
64+
65+
syncConditionStatus := corev1.ConditionUnknown
66+
if *processingJobStatus == svcsdk.ProcessingJobStatusCompleted || *processingJobStatus == svcsdk.ProcessingJobStatusStopped || *processingJobStatus == svcsdk.ProcessingJobStatusFailed {
67+
syncConditionStatus = corev1.ConditionTrue
68+
} else {
69+
syncConditionStatus = corev1.ConditionFalse
70+
}
71+
72+
var resourceSyncedCondition *ackv1alpha1.Condition = nil
73+
if ko.Status.Conditions == nil {
74+
ko.Status.Conditions = []*ackv1alpha1.Condition{}
75+
} else {
76+
for _, condition := range ko.Status.Conditions {
77+
if condition.Type == ackv1alpha1.ConditionTypeResourceSynced {
78+
resourceSyncedCondition = condition
79+
break
80+
}
81+
}
82+
}
83+
84+
if resourceSyncedCondition == nil {
85+
resourceSyncedCondition = &ackv1alpha1.Condition{
86+
Type: ackv1alpha1.ConditionTypeResourceSynced,
87+
}
88+
ko.Status.Conditions = append(ko.Status.Conditions, resourceSyncedCondition)
89+
}
90+
resourceSyncedCondition.Status = syncConditionStatus
91+
92+
}

pkg/resource/processing_job/sdk.go

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)