From bebf6c023a40a4569fb25efc9fa748b3423b05f9 Mon Sep 17 00:00:00 2001 From: avelichk Date: Mon, 9 Dec 2019 18:06:12 +0000 Subject: [PATCH 1/4] Increase Suggestion memLimit --- pkg/controller.v1alpha3/suggestion/composer/composer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller.v1alpha3/suggestion/composer/composer.go b/pkg/controller.v1alpha3/suggestion/composer/composer.go index e4dac0c6246..2afc9800c6e 100644 --- a/pkg/controller.v1alpha3/suggestion/composer/composer.go +++ b/pkg/controller.v1alpha3/suggestion/composer/composer.go @@ -28,7 +28,7 @@ const ( cpuLimit = "500m" cpuRequest = "50m" - memLimit = "100Mi" + memLimit = "200Mi" memRequest = "10Mi" ) From f1bd3ca8a85a68a00c3ce20666fc8e56ef4de81f Mon Sep 17 00:00:00 2001 From: avelichk Date: Tue, 10 Dec 2019 16:18:16 +0000 Subject: [PATCH 2/4] Create getSuggestionConfigData function --- .../katib-controller/katib-config.yaml | 6 +- pkg/controller.v1alpha3/consts/const.go | 16 +++++ .../experiment/manifest/generator.go | 6 +- .../suggestion/composer/composer.go | 21 +++---- .../v1alpha3/experiment/manifest/generator.go | 14 ++--- pkg/util/v1alpha3/katibconfig/config.go | 59 +++++++++++++++---- .../experiment/validator/validator.go | 2 +- .../experiment/validator/validator_test.go | 6 +- 8 files changed, 95 insertions(+), 35 deletions(-) diff --git a/manifests/v1alpha3/katib-controller/katib-config.yaml b/manifests/v1alpha3/katib-controller/katib-config.yaml index 09b385b805f..53020bb0fdc 100644 --- a/manifests/v1alpha3/katib-controller/katib-config.yaml +++ b/manifests/v1alpha3/katib-controller/katib-config.yaml @@ -34,6 +34,10 @@ data: "image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperopt" }, "nasrl": { - "image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-nasrl" + "image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-nasrl", + "cpuLimit": "130m", + "cpuRequest": "15m", + "memLimit": "150Mi", + "memRequest": "13Mi" } } diff --git a/pkg/controller.v1alpha3/consts/const.go b/pkg/controller.v1alpha3/consts/const.go index 9486ae8bb01..4c2f617d3bb 100644 --- a/pkg/controller.v1alpha3/consts/const.go +++ b/pkg/controller.v1alpha3/consts/const.go @@ -33,6 +33,22 @@ const ( LabelSuggestionTag = "suggestion" // LabelSuggestionImageTag is the name of suggestion image config in configmap. LabelSuggestionImageTag = "image" + // LabelSuggestionCPULimitTag is the name of suggestion CPU Limit config in configmap. + LabelSuggestionCPULimitTag = "cpuLimit" + // DefaultCPULimit is the default value for CPU Limit + DefaultCPULimit = "500m" + // LabelSuggestionCPURequestTag is the name of suggestion CPU Request config in configmap. + LabelSuggestionCPURequestTag = "cpuRequest" + // DefaultCPURequest is the default value for CPU Request + DefaultCPURequest = "50m" + // LabelSuggestionMemLimitTag is the name of suggestion Mem Limit config in configmap. + LabelSuggestionMemLimitTag = "memLimit" + // DefaultMemLimit is the default value for mem Limit + DefaultMemLimit = "100Mi" + // LabelSuggestionMemRequestTag is the name of suggestion Mem Request config in configmap. + LabelSuggestionMemRequestTag = "memRequest" + // DefaultMemRequest is the default value for mem Request + DefaultMemRequest = "10Mi" // LabelMetricsCollectorSidecar is the name of metrics collector config in configmap. LabelMetricsCollectorSidecar = "metrics-collector-sidecar" // LabelMetricsCollectorSidecarImage is the name of metrics collector image config in configmap. diff --git a/pkg/controller.v1alpha3/experiment/manifest/generator.go b/pkg/controller.v1alpha3/experiment/manifest/generator.go index 406ecb78a95..69765d2d84d 100644 --- a/pkg/controller.v1alpha3/experiment/manifest/generator.go +++ b/pkg/controller.v1alpha3/experiment/manifest/generator.go @@ -23,7 +23,7 @@ type Generator interface { InjectClient(c client.Client) GetRunSpec(e *experimentsv1alpha3.Experiment, experiment, trial, namespace string) (string, error) GetRunSpecWithHyperParameters(e *experimentsv1alpha3.Experiment, experiment, trial, namespace string, hps []commonapiv1alpha3.ParameterAssignment) (string, error) - GetSuggestionContainerImage(algorithmName string) (string, error) + GetSuggestionConfigData(algorithmName string) (map[string]string, error) GetMetricsCollectorImage(cKind commonapiv1alpha3.CollectorKind) (string, error) } @@ -48,8 +48,8 @@ func (g *DefaultGenerator) GetMetricsCollectorImage(cKind commonapiv1alpha3.Coll return katibconfig.GetMetricsCollectorImage(cKind, g.client.GetClient()) } -func (g *DefaultGenerator) GetSuggestionContainerImage(algorithmName string) (string, error) { - return katibconfig.GetSuggestionContainerImage(algorithmName, g.client.GetClient()) +func (g *DefaultGenerator) GetSuggestionConfigData(algorithmName string) (map[string]string, error) { + return katibconfig.GetSuggestionConfigData(algorithmName, g.client.GetClient()) } // GetRunSpec get the specification for trial. diff --git a/pkg/controller.v1alpha3/suggestion/composer/composer.go b/pkg/controller.v1alpha3/suggestion/composer/composer.go index 2afc9800c6e..0934c5b1b00 100644 --- a/pkg/controller.v1alpha3/suggestion/composer/composer.go +++ b/pkg/controller.v1alpha3/suggestion/composer/composer.go @@ -25,11 +25,6 @@ const ( defaultFailureThreshold = 12 // Ref https://github.com/grpc-ecosystem/grpc-health-probe/ defaultGRPCHealthCheckProbe = "/bin/grpc_health_probe" - - cpuLimit = "500m" - cpuRequest = "50m" - memLimit = "200Mi" - memRequest = "10Mi" ) var log = logf.Log.WithName("suggestion-composer") @@ -114,10 +109,16 @@ func (g *General) DesiredService(s *suggestionsv1alpha3.Suggestion) (*corev1.Ser } func (g *General) desiredContainer(s *suggestionsv1alpha3.Suggestion) (*corev1.Container, error) { - suggestionContainerImage, err := katibconfig.GetSuggestionContainerImage(s.Spec.AlgorithmName, g.Client) + suggestionConfigData, err := katibconfig.GetSuggestionConfigData(s.Spec.AlgorithmName, g.Client) if err != nil { return nil, err } + // Get Suggestion data from config + suggestionContainerImage := suggestionConfigData[consts.LabelSuggestionImageTag] + suggestionCPULimit := suggestionConfigData[consts.LabelSuggestionCPULimitTag] + suggestionCPURequest := suggestionConfigData[consts.LabelSuggestionCPURequestTag] + suggestionMemLimit := suggestionConfigData[consts.LabelSuggestionMemLimitTag] + suggestionMemRequest := suggestionConfigData[consts.LabelSuggestionMemRequestTag] c := &corev1.Container{ Name: consts.ContainerSuggestion, } @@ -130,19 +131,19 @@ func (g *General) desiredContainer(s *suggestionsv1alpha3.Suggestion) (*corev1.C }, } - cpuLimitQuantity, err := resource.ParseQuantity(cpuLimit) + cpuLimitQuantity, err := resource.ParseQuantity(suggestionCPULimit) if err != nil { return nil, err } - cpuRequestQuantity, err := resource.ParseQuantity(cpuRequest) + cpuRequestQuantity, err := resource.ParseQuantity(suggestionCPURequest) if err != nil { return nil, err } - memLimitQuantity, err := resource.ParseQuantity(memLimit) + memLimitQuantity, err := resource.ParseQuantity(suggestionMemLimit) if err != nil { return nil, err } - memRequestQuantity, err := resource.ParseQuantity(memRequest) + memRequestQuantity, err := resource.ParseQuantity(suggestionMemRequest) if err != nil { return nil, err } diff --git a/pkg/mock/v1alpha3/experiment/manifest/generator.go b/pkg/mock/v1alpha3/experiment/manifest/generator.go index deda4bd3729..0f03d5965fe 100644 --- a/pkg/mock/v1alpha3/experiment/manifest/generator.go +++ b/pkg/mock/v1alpha3/experiment/manifest/generator.go @@ -80,19 +80,19 @@ func (mr *MockGeneratorMockRecorder) GetRunSpecWithHyperParameters(arg0, arg1, a return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetRunSpecWithHyperParameters", reflect.TypeOf((*MockGenerator)(nil).GetRunSpecWithHyperParameters), arg0, arg1, arg2, arg3, arg4) } -// GetSuggestionContainerImage mocks base method -func (m *MockGenerator) GetSuggestionContainerImage(arg0 string) (string, error) { +// GetSuggestionConfigData mocks base method +func (m *MockGenerator) GetSuggestionConfigData(arg0 string) (map[string]string, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetSuggestionContainerImage", arg0) - ret0, _ := ret[0].(string) + ret := m.ctrl.Call(m, "GetSuggestionConfigData", arg0) + ret0, _ := ret[0].(map[string]string) ret1, _ := ret[1].(error) return ret0, ret1 } -// GetSuggestionContainerImage indicates an expected call of GetSuggestionContainerImage -func (mr *MockGeneratorMockRecorder) GetSuggestionContainerImage(arg0 interface{}) *gomock.Call { +// GetSuggestionConfigData indicates an expected call of GetSuggestionConfigData +func (mr *MockGeneratorMockRecorder) GetSuggestionConfigData(arg0 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetSuggestionContainerImage", reflect.TypeOf((*MockGenerator)(nil).GetSuggestionContainerImage), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetSuggestionConfigData", reflect.TypeOf((*MockGenerator)(nil).GetSuggestionConfigData), arg0) } // InjectClient mocks base method diff --git a/pkg/util/v1alpha3/katibconfig/config.go b/pkg/util/v1alpha3/katibconfig/config.go index 29df9f2855a..6651a8ad516 100644 --- a/pkg/util/v1alpha3/katibconfig/config.go +++ b/pkg/util/v1alpha3/katibconfig/config.go @@ -14,36 +14,71 @@ import ( "github.com/kubeflow/katib/pkg/controller.v1alpha3/consts" ) -func GetSuggestionContainerImage(algorithmName string, client client.Client) (string, error) { +func GetSuggestionConfigData(algorithmName string, client client.Client) (map[string]string, error) { configMap := &corev1.ConfigMap{} + suggestionConfigData := map[string]string{} err := client.Get( context.TODO(), apitypes.NamespacedName{Name: consts.KatibConfigMapName, Namespace: consts.DefaultKatibNamespace}, configMap) if err != nil { - return "", err + return map[string]string{}, err } if config, ok := configMap.Data[consts.LabelSuggestionTag]; ok { - suggestionConfig := map[string]map[string]string{} - if err := json.Unmarshal([]byte(config), &suggestionConfig); err != nil { - return "", err + suggestionsConfig := map[string]map[string]string{} + if err := json.Unmarshal([]byte(config), &suggestionsConfig); err != nil { + return map[string]string{}, err } - if imageConfig, ok := suggestionConfig[algorithmName]; ok { - if image, yes := imageConfig[consts.LabelSuggestionImageTag]; yes { + if suggestionConfig, ok := suggestionsConfig[algorithmName]; ok { + // Get image from config + if image, yes := suggestionConfig[consts.LabelSuggestionImageTag]; yes { if strings.TrimSpace(image) != "" { - return image, nil + suggestionConfigData[consts.LabelSuggestionImageTag] = image } else { - return "", errors.New("Required value for " + consts.LabelSuggestionImageTag + " configuration of algorithm name " + algorithmName) + return map[string]string{}, errors.New("Required value for " + consts.LabelSuggestionImageTag + " configuration of algorithm name " + algorithmName) } } else { - return "", errors.New("Failed to find " + consts.LabelSuggestionImageTag + " configuration of algorithm name " + algorithmName) + return map[string]string{}, errors.New("Failed to find " + consts.LabelSuggestionImageTag + " configuration of algorithm name " + algorithmName) + } + // Get CPU Limit from config + cpuLimit, yes := suggestionConfig[consts.LabelSuggestionCPULimitTag] + if yes && strings.TrimSpace(cpuLimit) != "" { + suggestionConfigData[consts.LabelSuggestionCPULimitTag] = cpuLimit + } else { + // Set default value + suggestionConfigData[consts.LabelSuggestionCPULimitTag] = consts.DefaultCPULimit + } + // Get CPU Request from config + cpuRequest, yes := suggestionConfig[consts.LabelSuggestionCPURequestTag] + if yes && strings.TrimSpace(cpuRequest) != "" { + suggestionConfigData[consts.LabelSuggestionCPURequestTag] = cpuRequest + } else { + // Set default value + suggestionConfigData[consts.LabelSuggestionCPURequestTag] = consts.DefaultCPURequest + } + // Get Mem Limit from config + memLimit, yes := suggestionConfig[consts.LabelSuggestionMemLimitTag] + if yes && strings.TrimSpace(memLimit) != "" { + suggestionConfigData[consts.LabelSuggestionMemLimitTag] = memLimit + } else { + // Set default value + suggestionConfigData[consts.LabelSuggestionMemLimitTag] = consts.DefaultMemLimit + } + // Get Mem Request from config + memRequest, yes := suggestionConfig[consts.LabelSuggestionMemRequestTag] + if yes && strings.TrimSpace(memRequest) != "" { + suggestionConfigData[consts.LabelSuggestionMemRequestTag] = memRequest + } else { + // Set default value + suggestionConfigData[consts.LabelSuggestionMemRequestTag] = consts.DefaultMemRequest } } else { - return "", errors.New("Failed to find algorithm image mapping " + algorithmName) + return map[string]string{}, errors.New("Failed to find algorithm " + algorithmName + " config in configmap " + consts.KatibConfigMapName) } } else { - return "", errors.New("Failed to find algorithm image mapping in configmap " + consts.KatibConfigMapName) + return map[string]string{}, errors.New("Failed to find suggestions config in configmap " + consts.KatibConfigMapName) } + return suggestionConfigData, nil } func GetMetricsCollectorImage(cKind common.CollectorKind, client client.Client) (string, error) { diff --git a/pkg/webhook/v1alpha3/experiment/validator/validator.go b/pkg/webhook/v1alpha3/experiment/validator/validator.go index fbff85a8944..140f9398dd2 100644 --- a/pkg/webhook/v1alpha3/experiment/validator/validator.go +++ b/pkg/webhook/v1alpha3/experiment/validator/validator.go @@ -89,7 +89,7 @@ func (g *DefaultValidator) validateAlgorithm(ag *commonapiv1alpha3.AlgorithmSpec return fmt.Errorf("No spec.algorithm.name specified.") } - if _, err := g.GetSuggestionContainerImage(ag.AlgorithmName); err != nil { + if _, err := g.GetSuggestionConfigData(ag.AlgorithmName); err != nil { return fmt.Errorf("Don't support algorithm %s: %v.", ag.AlgorithmName, err) } diff --git a/pkg/webhook/v1alpha3/experiment/validator/validator_test.go b/pkg/webhook/v1alpha3/experiment/validator/validator_test.go index f2526f5bfa3..171c588f02f 100644 --- a/pkg/webhook/v1alpha3/experiment/validator/validator_test.go +++ b/pkg/webhook/v1alpha3/experiment/validator/validator_test.go @@ -9,6 +9,7 @@ import ( commonv1alpha3 "github.com/kubeflow/katib/pkg/apis/controller/common/v1alpha3" experimentsv1alpha3 "github.com/kubeflow/katib/pkg/apis/controller/experiments/v1alpha3" + "github.com/kubeflow/katib/pkg/controller.v1alpha3/consts" manifestmock "github.com/kubeflow/katib/pkg/mock/v1alpha3/experiment/manifest" ) @@ -84,8 +85,11 @@ metadata: name: "fake-trial" namespace: fakens` + suggestionConfigData := map[string]string{} + suggestionConfigData[consts.LabelSuggestionImageTag] = "algorithmImage" + p.EXPECT().GetRunSpec(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(trialJobTemplate, nil).AnyTimes() - p.EXPECT().GetSuggestionContainerImage(gomock.Any()).Return("algorithmImage", nil).AnyTimes() + p.EXPECT().GetSuggestionConfigData(gomock.Any()).Return(suggestionConfigData, nil).AnyTimes() p.EXPECT().GetMetricsCollectorImage(gomock.Any()).Return("metricsCollectorImage", nil).AnyTimes() tcs := []struct { From cafd115539e87d8cd06b5515d7fabef4191c6080 Mon Sep 17 00:00:00 2001 From: avelichk Date: Tue, 10 Dec 2019 16:30:10 +0000 Subject: [PATCH 3/4] Change memLimit for nasrl --- manifests/v1alpha3/katib-controller/katib-config.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/manifests/v1alpha3/katib-controller/katib-config.yaml b/manifests/v1alpha3/katib-controller/katib-config.yaml index 53020bb0fdc..33766a02afe 100644 --- a/manifests/v1alpha3/katib-controller/katib-config.yaml +++ b/manifests/v1alpha3/katib-controller/katib-config.yaml @@ -35,9 +35,6 @@ data: }, "nasrl": { "image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-nasrl", - "cpuLimit": "130m", - "cpuRequest": "15m", - "memLimit": "150Mi", - "memRequest": "13Mi" + "memLimit": "200Mi" } } From f08fd42ca7da88633b80d600e88fa9671f5f6885 Mon Sep 17 00:00:00 2001 From: avelichk Date: Wed, 11 Dec 2019 13:18:27 +0000 Subject: [PATCH 4/4] Change resources format for katib-config --- .../katib-controller/katib-config.yaml | 6 +- pkg/util/v1alpha3/katibconfig/config.go | 68 +++++++++---------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/manifests/v1alpha3/katib-controller/katib-config.yaml b/manifests/v1alpha3/katib-controller/katib-config.yaml index 33766a02afe..ea8c6bfe0e5 100644 --- a/manifests/v1alpha3/katib-controller/katib-config.yaml +++ b/manifests/v1alpha3/katib-controller/katib-config.yaml @@ -35,6 +35,10 @@ data: }, "nasrl": { "image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-nasrl", - "memLimit": "200Mi" + "resources": { + "limits": { + "memory": "200Mi" + } + } } } diff --git a/pkg/util/v1alpha3/katibconfig/config.go b/pkg/util/v1alpha3/katibconfig/config.go index 6651a8ad516..a1e5f0b0829 100644 --- a/pkg/util/v1alpha3/katibconfig/config.go +++ b/pkg/util/v1alpha3/katibconfig/config.go @@ -24,54 +24,50 @@ func GetSuggestionConfigData(algorithmName string, client client.Client) (map[st if err != nil { return map[string]string{}, err } + type suggestionConfigJSON struct { + Image string `json:"image"` + Resource corev1.ResourceRequirements `json:"resources"` + } if config, ok := configMap.Data[consts.LabelSuggestionTag]; ok { - suggestionsConfig := map[string]map[string]string{} + suggestionsConfig := map[string]suggestionConfigJSON{} if err := json.Unmarshal([]byte(config), &suggestionsConfig); err != nil { return map[string]string{}, err } if suggestionConfig, ok := suggestionsConfig[algorithmName]; ok { // Get image from config - if image, yes := suggestionConfig[consts.LabelSuggestionImageTag]; yes { - if strings.TrimSpace(image) != "" { - suggestionConfigData[consts.LabelSuggestionImageTag] = image - } else { - return map[string]string{}, errors.New("Required value for " + consts.LabelSuggestionImageTag + " configuration of algorithm name " + algorithmName) - } + image := suggestionConfig.Image + if strings.TrimSpace(image) != "" { + suggestionConfigData[consts.LabelSuggestionImageTag] = image } else { - return map[string]string{}, errors.New("Failed to find " + consts.LabelSuggestionImageTag + " configuration of algorithm name " + algorithmName) + return map[string]string{}, errors.New("Required value for " + consts.LabelSuggestionImageTag + " configuration of algorithm name " + algorithmName) } - // Get CPU Limit from config - cpuLimit, yes := suggestionConfig[consts.LabelSuggestionCPULimitTag] - if yes && strings.TrimSpace(cpuLimit) != "" { - suggestionConfigData[consts.LabelSuggestionCPULimitTag] = cpuLimit - } else { - // Set default value - suggestionConfigData[consts.LabelSuggestionCPULimitTag] = consts.DefaultCPULimit + + // Set default values for CPU and Memory + suggestionConfigData[consts.LabelSuggestionCPURequestTag] = consts.DefaultCPURequest + suggestionConfigData[consts.LabelSuggestionMemRequestTag] = consts.DefaultMemRequest + suggestionConfigData[consts.LabelSuggestionCPULimitTag] = consts.DefaultCPULimit + suggestionConfigData[consts.LabelSuggestionMemLimitTag] = consts.DefaultMemLimit + + // Get CPU and Memory Requests from config + cpuRequest := suggestionConfig.Resource.Requests[corev1.ResourceCPU] + memRequest := suggestionConfig.Resource.Requests[corev1.ResourceMemory] + if !cpuRequest.IsZero() { + suggestionConfigData[consts.LabelSuggestionCPURequestTag] = cpuRequest.String() } - // Get CPU Request from config - cpuRequest, yes := suggestionConfig[consts.LabelSuggestionCPURequestTag] - if yes && strings.TrimSpace(cpuRequest) != "" { - suggestionConfigData[consts.LabelSuggestionCPURequestTag] = cpuRequest - } else { - // Set default value - suggestionConfigData[consts.LabelSuggestionCPURequestTag] = consts.DefaultCPURequest + if !memRequest.IsZero() { + suggestionConfigData[consts.LabelSuggestionMemRequestTag] = memRequest.String() } - // Get Mem Limit from config - memLimit, yes := suggestionConfig[consts.LabelSuggestionMemLimitTag] - if yes && strings.TrimSpace(memLimit) != "" { - suggestionConfigData[consts.LabelSuggestionMemLimitTag] = memLimit - } else { - // Set default value - suggestionConfigData[consts.LabelSuggestionMemLimitTag] = consts.DefaultMemLimit + + // Get CPU and Memory Limits from config + cpuLimit := suggestionConfig.Resource.Limits[corev1.ResourceCPU] + memLimit := suggestionConfig.Resource.Limits[corev1.ResourceMemory] + if !cpuLimit.IsZero() { + suggestionConfigData[consts.LabelSuggestionCPULimitTag] = cpuLimit.String() } - // Get Mem Request from config - memRequest, yes := suggestionConfig[consts.LabelSuggestionMemRequestTag] - if yes && strings.TrimSpace(memRequest) != "" { - suggestionConfigData[consts.LabelSuggestionMemRequestTag] = memRequest - } else { - // Set default value - suggestionConfigData[consts.LabelSuggestionMemRequestTag] = consts.DefaultMemRequest + if !memLimit.IsZero() { + suggestionConfigData[consts.LabelSuggestionMemLimitTag] = memLimit.String() } + } else { return map[string]string{}, errors.New("Failed to find algorithm " + algorithmName + " config in configmap " + consts.KatibConfigMapName) }