diff --git a/.changelog/35479.txt b/.changelog/35479.txt new file mode 100644 index 000000000000..aa082e3a38d7 --- /dev/null +++ b/.changelog/35479.txt @@ -0,0 +1,3 @@ +```release-note:enhancement +resource/aws_sagemaker_endpoint_configuration: Add `production_variants.managed_instance_scaling` block and `shadow_production_variants.managed_instance_scaling` block +``` \ No newline at end of file diff --git a/internal/service/sagemaker/endpoint_configuration.go b/internal/service/sagemaker/endpoint_configuration.go index 5c226f55959a..f6431e9a83a4 100644 --- a/internal/service/sagemaker/endpoint_configuration.go +++ b/internal/service/sagemaker/endpoint_configuration.go @@ -374,6 +374,34 @@ func ResourceEndpointConfiguration() *schema.Resource { }, }, }, + "managed_instance_scaling": { + Type: schema.TypeList, + Optional: true, + MaxItems: 1, + ForceNew: true, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "status": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateFunc: validation.StringInSlice(sagemaker.ManagedInstanceScalingStatus_Values(), false), + }, + "min_instance_count": { + Type: schema.TypeInt, + Optional: true, + ForceNew: true, + ValidateFunc: validation.IntAtLeast(1), + }, + "max_instance_count": { + Type: schema.TypeInt, + Optional: true, + ForceNew: true, + ValidateFunc: validation.IntAtLeast(1), + }, + }, + }, + }, "variant_name": { Type: schema.TypeString, Optional: true, @@ -518,6 +546,34 @@ func ResourceEndpointConfiguration() *schema.Resource { }, }, }, + "managed_instance_scaling": { + Type: schema.TypeList, + Optional: true, + MaxItems: 1, + ForceNew: true, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "status": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateFunc: validation.StringInSlice(sagemaker.ManagedInstanceScalingStatus_Values(), false), + }, + "min_instance_count": { + Type: schema.TypeInt, + Optional: true, + ForceNew: true, + ValidateFunc: validation.IntAtLeast(1), + }, + "max_instance_count": { + Type: schema.TypeInt, + Optional: true, + ForceNew: true, + ValidateFunc: validation.IntAtLeast(1), + }, + }, + }, + }, "variant_name": { Type: schema.TypeString, Optional: true, @@ -709,6 +765,10 @@ func expandProductionVariants(configured []interface{}) []*sagemaker.ProductionV l.EnableSSMAccess = aws.Bool(v) } + if v, ok := data["managed_instance_scaling"].([]interface{}); ok && len(v) > 0 { + l.ManagedInstanceScaling = expandManagedInstanceScaling(v) + } + if v, ok := data["inference_ami_version"].(string); ok && v != "" { l.InferenceAmiVersion = aws.String(v) } @@ -766,6 +826,10 @@ func flattenProductionVariants(list []*sagemaker.ProductionVariant) []map[string l["enable_ssm_access"] = aws.BoolValue(i.EnableSSMAccess) } + if i.ManagedInstanceScaling != nil { + l["managed_instance_scaling"] = flattenManagedInstanceScaling(i.ManagedInstanceScaling) + } + if i.InferenceAmiVersion != nil { l["inference_ami_version"] = aws.StringValue(i.InferenceAmiVersion) } @@ -1034,6 +1098,30 @@ func expandCoreDumpConfig(configured []interface{}) *sagemaker.ProductionVariant return c } +func expandManagedInstanceScaling(configured []interface{}) *sagemaker.ProductionVariantManagedInstanceScaling { + if len(configured) == 0 { + return nil + } + + m := configured[0].(map[string]interface{}) + + c := &sagemaker.ProductionVariantManagedInstanceScaling{} + + if v, ok := m["status"].(string); ok { + c.Status = aws.String(v) + } + + if v, ok := m["min_instance_count"].(int); ok && v > 0 { + c.MinInstanceCount = aws.Int64(int64(v)) + } + + if v, ok := m["max_instance_count"].(int); ok && v > 0 { + c.MaxInstanceCount = aws.Int64(int64(v)) + } + + return c +} + func flattenEndpointConfigAsyncInferenceConfig(config *sagemaker.AsyncInferenceConfig) []map[string]interface{} { if config == nil { return []map[string]interface{}{} @@ -1165,3 +1253,25 @@ func flattenCoreDumpConfig(config *sagemaker.ProductionVariantCoreDumpConfig) [] return []map[string]interface{}{cfg} } + +func flattenManagedInstanceScaling(config *sagemaker.ProductionVariantManagedInstanceScaling) []map[string]interface{} { + if config == nil { + return []map[string]interface{}{} + } + + cfg := map[string]interface{}{} + + if config.Status != nil { + cfg["status"] = aws.StringValue(config.Status) + } + + if config.MinInstanceCount != nil { + cfg["min_instance_count"] = aws.Int64Value(config.MinInstanceCount) + } + + if config.MaxInstanceCount != nil { + cfg["max_instance_count"] = aws.Int64Value(config.MaxInstanceCount) + } + + return []map[string]interface{}{cfg} +} diff --git a/website/docs/r/sagemaker_endpoint_configuration.html.markdown b/website/docs/r/sagemaker_endpoint_configuration.html.markdown index 0cf7337ccfab..981d460fdfda 100644 --- a/website/docs/r/sagemaker_endpoint_configuration.html.markdown +++ b/website/docs/r/sagemaker_endpoint_configuration.html.markdown @@ -58,6 +58,7 @@ This resource supports the following arguments: * `model_name` - (Required) The name of the model to use. * `routing_config` - (Optional) Sets how the endpoint routes incoming traffic. See [routing_config](#routing_config) below. * `serverless_config` - (Optional) Specifies configuration for how an endpoint performs asynchronous inference. +* `managed_instance_scaling` - (Optional) Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic. * `variant_name` - (Optional) The name of the variant. If omitted, Terraform will assign a random, unique name. * `volume_size_in_gb` - (Optional) The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`. @@ -76,6 +77,12 @@ This resource supports the following arguments: * `memory_size_in_mb` - (Required) The memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB. * `provisioned_concurrency` - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`. +#### managed_instance_scaling + +* `status` - (Optional) Indicates whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`. +* `min_instance_count` - (Optional) The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic. +* `max_instance_count` - (Optional) The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic. + ### data_capture_config * `initial_sampling_percentage` - (Required) Portion of data to capture. Should be between 0 and 100.