Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase Suggestion memory limit #958

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion manifests/v1alpha3/katib-controller/katib-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ data:
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperopt"
},
"nasrl": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-nasrl"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-nasrl",
"resources": {
"limits": {
"memory": "200Mi"
}
}
}
}
16 changes: 16 additions & 0 deletions pkg/controller.v1alpha3/consts/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,22 @@ const (
LabelSuggestionTag = "suggestion"
// LabelSuggestionImageTag is the name of suggestion image config in configmap.
LabelSuggestionImageTag = "image"
// LabelSuggestionCPULimitTag is the name of suggestion CPU Limit config in configmap.
LabelSuggestionCPULimitTag = "cpuLimit"
// DefaultCPULimit is the default value for CPU Limit
DefaultCPULimit = "500m"
// LabelSuggestionCPURequestTag is the name of suggestion CPU Request config in configmap.
LabelSuggestionCPURequestTag = "cpuRequest"
// DefaultCPURequest is the default value for CPU Request
DefaultCPURequest = "50m"
// LabelSuggestionMemLimitTag is the name of suggestion Mem Limit config in configmap.
LabelSuggestionMemLimitTag = "memLimit"
// DefaultMemLimit is the default value for mem Limit
DefaultMemLimit = "100Mi"
// LabelSuggestionMemRequestTag is the name of suggestion Mem Request config in configmap.
LabelSuggestionMemRequestTag = "memRequest"
// DefaultMemRequest is the default value for mem Request
DefaultMemRequest = "10Mi"
// LabelMetricsCollectorSidecar is the name of metrics collector config in configmap.
LabelMetricsCollectorSidecar = "metrics-collector-sidecar"
// LabelMetricsCollectorSidecarImage is the name of metrics collector image config in configmap.
Expand Down
6 changes: 3 additions & 3 deletions pkg/controller.v1alpha3/experiment/manifest/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ type Generator interface {
InjectClient(c client.Client)
GetRunSpec(e *experimentsv1alpha3.Experiment, experiment, trial, namespace string) (string, error)
GetRunSpecWithHyperParameters(e *experimentsv1alpha3.Experiment, experiment, trial, namespace string, hps []commonapiv1alpha3.ParameterAssignment) (string, error)
GetSuggestionContainerImage(algorithmName string) (string, error)
GetSuggestionConfigData(algorithmName string) (map[string]string, error)
GetMetricsCollectorImage(cKind commonapiv1alpha3.CollectorKind) (string, error)
}

Expand All @@ -48,8 +48,8 @@ func (g *DefaultGenerator) GetMetricsCollectorImage(cKind commonapiv1alpha3.Coll
return katibconfig.GetMetricsCollectorImage(cKind, g.client.GetClient())
}

func (g *DefaultGenerator) GetSuggestionContainerImage(algorithmName string) (string, error) {
return katibconfig.GetSuggestionContainerImage(algorithmName, g.client.GetClient())
func (g *DefaultGenerator) GetSuggestionConfigData(algorithmName string) (map[string]string, error) {
return katibconfig.GetSuggestionConfigData(algorithmName, g.client.GetClient())
}

// GetRunSpec get the specification for trial.
Expand Down
21 changes: 11 additions & 10 deletions pkg/controller.v1alpha3/suggestion/composer/composer.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,6 @@ const (
defaultFailureThreshold = 12
// Ref https://github.com/grpc-ecosystem/grpc-health-probe/
defaultGRPCHealthCheckProbe = "/bin/grpc_health_probe"

cpuLimit = "500m"
cpuRequest = "50m"
memLimit = "100Mi"
memRequest = "10Mi"
)

var log = logf.Log.WithName("suggestion-composer")
Expand Down Expand Up @@ -114,10 +109,16 @@ func (g *General) DesiredService(s *suggestionsv1alpha3.Suggestion) (*corev1.Ser
}

func (g *General) desiredContainer(s *suggestionsv1alpha3.Suggestion) (*corev1.Container, error) {
suggestionContainerImage, err := katibconfig.GetSuggestionContainerImage(s.Spec.AlgorithmName, g.Client)
suggestionConfigData, err := katibconfig.GetSuggestionConfigData(s.Spec.AlgorithmName, g.Client)
if err != nil {
return nil, err
}
// Get Suggestion data from config
suggestionContainerImage := suggestionConfigData[consts.LabelSuggestionImageTag]
suggestionCPULimit := suggestionConfigData[consts.LabelSuggestionCPULimitTag]
suggestionCPURequest := suggestionConfigData[consts.LabelSuggestionCPURequestTag]
suggestionMemLimit := suggestionConfigData[consts.LabelSuggestionMemLimitTag]
suggestionMemRequest := suggestionConfigData[consts.LabelSuggestionMemRequestTag]
c := &corev1.Container{
Name: consts.ContainerSuggestion,
}
Expand All @@ -130,19 +131,19 @@ func (g *General) desiredContainer(s *suggestionsv1alpha3.Suggestion) (*corev1.C
},
}

cpuLimitQuantity, err := resource.ParseQuantity(cpuLimit)
cpuLimitQuantity, err := resource.ParseQuantity(suggestionCPULimit)
if err != nil {
return nil, err
}
cpuRequestQuantity, err := resource.ParseQuantity(cpuRequest)
cpuRequestQuantity, err := resource.ParseQuantity(suggestionCPURequest)
if err != nil {
return nil, err
}
memLimitQuantity, err := resource.ParseQuantity(memLimit)
memLimitQuantity, err := resource.ParseQuantity(suggestionMemLimit)
if err != nil {
return nil, err
}
memRequestQuantity, err := resource.ParseQuantity(memRequest)
memRequestQuantity, err := resource.ParseQuantity(suggestionMemRequest)
if err != nil {
return nil, err
}
Expand Down
14 changes: 7 additions & 7 deletions pkg/mock/v1alpha3/experiment/manifest/generator.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

61 changes: 46 additions & 15 deletions pkg/util/v1alpha3/katibconfig/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,36 +14,67 @@ import (
"github.com/kubeflow/katib/pkg/controller.v1alpha3/consts"
)

func GetSuggestionContainerImage(algorithmName string, client client.Client) (string, error) {
func GetSuggestionConfigData(algorithmName string, client client.Client) (map[string]string, error) {
configMap := &corev1.ConfigMap{}
suggestionConfigData := map[string]string{}
err := client.Get(
context.TODO(),
apitypes.NamespacedName{Name: consts.KatibConfigMapName, Namespace: consts.DefaultKatibNamespace},
configMap)
if err != nil {
return "", err
return map[string]string{}, err
}
type suggestionConfigJSON struct {
Image string `json:"image"`
Resource corev1.ResourceRequirements `json:"resources"`
}
if config, ok := configMap.Data[consts.LabelSuggestionTag]; ok {
suggestionConfig := map[string]map[string]string{}
if err := json.Unmarshal([]byte(config), &suggestionConfig); err != nil {
return "", err
suggestionsConfig := map[string]suggestionConfigJSON{}
if err := json.Unmarshal([]byte(config), &suggestionsConfig); err != nil {
return map[string]string{}, err
}
if imageConfig, ok := suggestionConfig[algorithmName]; ok {
if image, yes := imageConfig[consts.LabelSuggestionImageTag]; yes {
if strings.TrimSpace(image) != "" {
return image, nil
} else {
return "", errors.New("Required value for " + consts.LabelSuggestionImageTag + " configuration of algorithm name " + algorithmName)
}
if suggestionConfig, ok := suggestionsConfig[algorithmName]; ok {
// Get image from config
image := suggestionConfig.Image
if strings.TrimSpace(image) != "" {
suggestionConfigData[consts.LabelSuggestionImageTag] = image
} else {
return "", errors.New("Failed to find " + consts.LabelSuggestionImageTag + " configuration of algorithm name " + algorithmName)
return map[string]string{}, errors.New("Required value for " + consts.LabelSuggestionImageTag + " configuration of algorithm name " + algorithmName)
}

// Set default values for CPU and Memory
suggestionConfigData[consts.LabelSuggestionCPURequestTag] = consts.DefaultCPURequest
suggestionConfigData[consts.LabelSuggestionMemRequestTag] = consts.DefaultMemRequest
suggestionConfigData[consts.LabelSuggestionCPULimitTag] = consts.DefaultCPULimit
suggestionConfigData[consts.LabelSuggestionMemLimitTag] = consts.DefaultMemLimit

// Get CPU and Memory Requests from config
cpuRequest := suggestionConfig.Resource.Requests[corev1.ResourceCPU]
memRequest := suggestionConfig.Resource.Requests[corev1.ResourceMemory]
if !cpuRequest.IsZero() {
suggestionConfigData[consts.LabelSuggestionCPURequestTag] = cpuRequest.String()
}
if !memRequest.IsZero() {
suggestionConfigData[consts.LabelSuggestionMemRequestTag] = memRequest.String()
}

// Get CPU and Memory Limits from config
cpuLimit := suggestionConfig.Resource.Limits[corev1.ResourceCPU]
memLimit := suggestionConfig.Resource.Limits[corev1.ResourceMemory]
if !cpuLimit.IsZero() {
suggestionConfigData[consts.LabelSuggestionCPULimitTag] = cpuLimit.String()
}
if !memLimit.IsZero() {
suggestionConfigData[consts.LabelSuggestionMemLimitTag] = memLimit.String()
}

} else {
return "", errors.New("Failed to find algorithm image mapping " + algorithmName)
return map[string]string{}, errors.New("Failed to find algorithm " + algorithmName + " config in configmap " + consts.KatibConfigMapName)
}
} else {
return "", errors.New("Failed to find algorithm image mapping in configmap " + consts.KatibConfigMapName)
return map[string]string{}, errors.New("Failed to find suggestions config in configmap " + consts.KatibConfigMapName)
}
return suggestionConfigData, nil
}

func GetMetricsCollectorImage(cKind common.CollectorKind, client client.Client) (string, error) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/webhook/v1alpha3/experiment/validator/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ func (g *DefaultValidator) validateAlgorithm(ag *commonapiv1alpha3.AlgorithmSpec
return fmt.Errorf("No spec.algorithm.name specified.")
}

if _, err := g.GetSuggestionContainerImage(ag.AlgorithmName); err != nil {
if _, err := g.GetSuggestionConfigData(ag.AlgorithmName); err != nil {
return fmt.Errorf("Don't support algorithm %s: %v.", ag.AlgorithmName, err)
}

Expand Down
6 changes: 5 additions & 1 deletion pkg/webhook/v1alpha3/experiment/validator/validator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

commonv1alpha3 "github.com/kubeflow/katib/pkg/apis/controller/common/v1alpha3"
experimentsv1alpha3 "github.com/kubeflow/katib/pkg/apis/controller/experiments/v1alpha3"
"github.com/kubeflow/katib/pkg/controller.v1alpha3/consts"
manifestmock "github.com/kubeflow/katib/pkg/mock/v1alpha3/experiment/manifest"
)

Expand Down Expand Up @@ -84,8 +85,11 @@ metadata:
name: "fake-trial"
namespace: fakens`

suggestionConfigData := map[string]string{}
suggestionConfigData[consts.LabelSuggestionImageTag] = "algorithmImage"

p.EXPECT().GetRunSpec(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(trialJobTemplate, nil).AnyTimes()
p.EXPECT().GetSuggestionContainerImage(gomock.Any()).Return("algorithmImage", nil).AnyTimes()
p.EXPECT().GetSuggestionConfigData(gomock.Any()).Return(suggestionConfigData, nil).AnyTimes()
p.EXPECT().GetMetricsCollectorImage(gomock.Any()).Return("metricsCollectorImage", nil).AnyTimes()

tcs := []struct {
Expand Down