From 846d9eb01a001bc1d6c0094ce287c5ee035a24d9 Mon Sep 17 00:00:00 2001 From: deepakbshetty Date: Thu, 25 Jan 2024 09:11:54 +0000 Subject: [PATCH] add managed_instance_scaling to sagemaker endpoint config production_variants block --- .../sagemaker/endpoint_configuration.go | 110 ++++++++++++++++++ ...maker_endpoint_configuration.html.markdown | 7 ++ 2 files changed, 117 insertions(+) diff --git a/internal/service/sagemaker/endpoint_configuration.go b/internal/service/sagemaker/endpoint_configuration.go index aaadb9512a2f..5398c861cfb8 100644 --- a/internal/service/sagemaker/endpoint_configuration.go +++ b/internal/service/sagemaker/endpoint_configuration.go @@ -353,6 +353,34 @@ func ResourceEndpointConfiguration() *schema.Resource { }, }, }, + "managed_instance_scaling": { + Type: schema.TypeList, + Optional: true, + MaxItems: 1, + ForceNew: true, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "status": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateFunc: validation.StringInSlice(sagemaker.ManagedInstanceScalingStatus_Values(), false), + }, + "min_instance_count": { + Type: schema.TypeInt, + Optional: true, + ForceNew: true, + ValidateFunc: validation.IntAtLeast(1), + }, + "max_instance_count": { + Type: schema.TypeInt, + Optional: true, + ForceNew: true, + ValidateFunc: validation.IntAtLeast(1), + }, + }, + }, + }, "variant_name": { Type: schema.TypeString, Optional: true, @@ -476,6 +504,34 @@ func ResourceEndpointConfiguration() *schema.Resource { }, }, }, + "managed_instance_scaling": { + Type: schema.TypeList, + Optional: true, + MaxItems: 1, + ForceNew: true, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "status": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateFunc: validation.StringInSlice(sagemaker.ManagedInstanceScalingStatus_Values(), false), + }, + "min_instance_count": { + Type: schema.TypeInt, + Optional: true, + ForceNew: true, + ValidateFunc: validation.IntAtLeast(1), + }, + "max_instance_count": { + Type: schema.TypeInt, + Optional: true, + ForceNew: true, + ValidateFunc: validation.IntAtLeast(1), + }, + }, + }, + }, "variant_name": { Type: schema.TypeString, Optional: true, @@ -663,6 +719,10 @@ func expandProductionVariants(configured []interface{}) []*sagemaker.ProductionV l.EnableSSMAccess = aws.Bool(v) } + if v, ok := data["managed_instance_scaling"].([]interface{}); ok && len(v) > 0 { + l.ManagedInstanceScaling = expandManagedInstanceScaling(v) + } + containers = append(containers, l) } @@ -712,6 +772,10 @@ func flattenProductionVariants(list []*sagemaker.ProductionVariant) []map[string l["enable_ssm_access"] = aws.BoolValue(i.EnableSSMAccess) } + if i.ManagedInstanceScaling != nil { + l["managed_instance_scaling"] = flattenManagedInstanceScaling(i.ManagedInstanceScaling) + } + result = append(result, l) } return result @@ -960,6 +1024,30 @@ func expandCoreDumpConfig(configured []interface{}) *sagemaker.ProductionVariant return c } +func expandManagedInstanceScaling(configured []interface{}) *sagemaker.ProductionVariantManagedInstanceScaling { + if len(configured) == 0 { + return nil + } + + m := configured[0].(map[string]interface{}) + + c := &sagemaker.ProductionVariantManagedInstanceScaling{} + + if v, ok := m["status"].(string); ok { + c.Status = aws.String(v) + } + + if v, ok := m["min_instance_count"].(int); ok && v > 0 { + c.MinInstanceCount = aws.Int64(int64(v)) + } + + if v, ok := m["max_instance_count"].(int); ok && v > 0 { + c.MaxInstanceCount = aws.Int64(int64(v)) + } + + return c +} + func flattenEndpointConfigAsyncInferenceConfig(config *sagemaker.AsyncInferenceConfig) []map[string]interface{} { if config == nil { return []map[string]interface{}{} @@ -1077,3 +1165,25 @@ func flattenCoreDumpConfig(config *sagemaker.ProductionVariantCoreDumpConfig) [] return []map[string]interface{}{cfg} } + +func flattenManagedInstanceScaling(config *sagemaker.ProductionVariantManagedInstanceScaling) []map[string]interface{} { + if config == nil { + return []map[string]interface{}{} + } + + cfg := map[string]interface{}{} + + if config.Status != nil { + cfg["status"] = aws.StringValue(config.Status) + } + + if config.MinInstanceCount != nil { + cfg["min_instance_count"] = aws.Int64Value(config.MinInstanceCount) + } + + if config.MaxInstanceCount != nil { + cfg["max_instance_count"] = aws.Int64Value(config.MaxInstanceCount) + } + + return []map[string]interface{}{cfg} +} diff --git a/website/docs/r/sagemaker_endpoint_configuration.html.markdown b/website/docs/r/sagemaker_endpoint_configuration.html.markdown index 34903dcc1797..bb370695a564 100644 --- a/website/docs/r/sagemaker_endpoint_configuration.html.markdown +++ b/website/docs/r/sagemaker_endpoint_configuration.html.markdown @@ -56,6 +56,7 @@ This resource supports the following arguments: * `model_data_download_timeout_in_seconds` - (Optional) The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`. * `model_name` - (Required) The name of the model to use. * `serverless_config` - (Optional) Specifies configuration for how an endpoint performs asynchronous inference. +* `managed_instance_scaling` - (Optional) Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic. * `variant_name` - (Optional) The name of the variant. If omitted, Terraform will assign a random, unique name. * `volume_size_in_gb` - (Optional) The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`. @@ -70,6 +71,12 @@ This resource supports the following arguments: * `memory_size_in_mb` - (Required) The memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB. * `provisioned_concurrency` - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`. +#### managed_instance_scaling + +* `status` - (Optional) Indicates whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`. +* `min_instance_count` - (Optional) The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic. +* `max_instance_count` - (Optional) The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic. + ### data_capture_config * `initial_sampling_percentage` - (Required) Portion of data to capture. Should be between 0 and 100.