add managed_instance_scaling to sagemaker endpoint config production_…

…variants and shadow_production_variants
hashicorp · Jul 20, 2024 · 6fa6b4e · 6fa6b4e
1 parent cc6a0f5
commit 6fa6b4e
Show file tree

Hide file tree

Showing 3 changed files with 120 additions and 0 deletions.
diff --git a/.changelog/35479.txt b/.changelog/35479.txt
@@ -0,0 +1,3 @@
+```release-note:enhancement
+resource/aws_sagemaker_endpoint_configuration: Add `production_variants.managed_instance_scaling` block and `shadow_production_variants.managed_instance_scaling` block
+```
diff --git a/internal/service/sagemaker/endpoint_configuration.go b/internal/service/sagemaker/endpoint_configuration.go
@@ -374,6 +374,34 @@ func ResourceEndpointConfiguration() *schema.Resource {
  },
  },
  },
+ "managed_instance_scaling": {
+ Type: schema.TypeList,
+ Optional: true,
+ MaxItems: 1,
+ ForceNew: true,
+ Elem: &schema.Resource{
+ Schema: map[string]*schema.Schema{
+ "status": {
+ Type: schema.TypeString,
+ Optional: true,
+ ForceNew: true,
+ ValidateFunc: validation.StringInSlice(sagemaker.ManagedInstanceScalingStatus_Values(), false),
+ },
+ "min_instance_count": {
+ Type: schema.TypeInt,
+ Optional: true,
+ ForceNew: true,
+ ValidateFunc: validation.IntAtLeast(1),
+ },
+ "max_instance_count": {
+ Type: schema.TypeInt,
+ Optional: true,
+ ForceNew: true,
+ ValidateFunc: validation.IntAtLeast(1),
+ },
+ },
+ },
+ },
  "variant_name": {
  Type: schema.TypeString,
  Optional: true,
@@ -518,6 +546,34 @@ func ResourceEndpointConfiguration() *schema.Resource {
  },
  },
  },
+ "managed_instance_scaling": {
+ Type: schema.TypeList,
+ Optional: true,
+ MaxItems: 1,
+ ForceNew: true,
+ Elem: &schema.Resource{
+ Schema: map[string]*schema.Schema{
+ "status": {
+ Type: schema.TypeString,
+ Optional: true,
+ ForceNew: true,
+ ValidateFunc: validation.StringInSlice(sagemaker.ManagedInstanceScalingStatus_Values(), false),
+ },
+ "min_instance_count": {
+ Type: schema.TypeInt,
+ Optional: true,
+ ForceNew: true,
+ ValidateFunc: validation.IntAtLeast(1),
+ },
+ "max_instance_count": {
+ Type: schema.TypeInt,
+ Optional: true,
+ ForceNew: true,
+ ValidateFunc: validation.IntAtLeast(1),
+ },
+ },
+ },
+ },
  "variant_name": {
  Type: schema.TypeString,
  Optional: true,
@@ -709,6 +765,10 @@ func expandProductionVariants(configured []interface{}) []*sagemaker.ProductionV
  l.EnableSSMAccess = aws.Bool(v)
  }
 
+ if v, ok := data["managed_instance_scaling"].([]interface{}); ok && len(v) > 0 {
+ l.ManagedInstanceScaling = expandManagedInstanceScaling(v)
+ }
+
  if v, ok := data["inference_ami_version"].(string); ok && v != "" {
  l.InferenceAmiVersion = aws.String(v)
  }
@@ -766,6 +826,10 @@ func flattenProductionVariants(list []*sagemaker.ProductionVariant) []map[string
  l["enable_ssm_access"] = aws.BoolValue(i.EnableSSMAccess)
  }
 
+ if i.ManagedInstanceScaling != nil {
+ l["managed_instance_scaling"] = flattenManagedInstanceScaling(i.ManagedInstanceScaling)
+ }
+
  if i.InferenceAmiVersion != nil {
  l["inference_ami_version"] = aws.StringValue(i.InferenceAmiVersion)
  }
@@ -1034,6 +1098,30 @@ func expandCoreDumpConfig(configured []interface{}) *sagemaker.ProductionVariant
  return c
 }
 
+func expandManagedInstanceScaling(configured []interface{}) *sagemaker.ProductionVariantManagedInstanceScaling {
+ if len(configured) == 0 {
+ return nil
+ }
+
+ m := configured[0].(map[string]interface{})
+
+ c := &sagemaker.ProductionVariantManagedInstanceScaling{}
+
+ if v, ok := m["status"].(string); ok {
+ c.Status = aws.String(v)
+ }
+
+ if v, ok := m["min_instance_count"].(int); ok && v > 0 {
+ c.MinInstanceCount = aws.Int64(int64(v))
+ }
+
+ if v, ok := m["max_instance_count"].(int); ok && v > 0 {
+ c.MaxInstanceCount = aws.Int64(int64(v))
+ }
+
+ return c
+}
+
 func flattenEndpointConfigAsyncInferenceConfig(config *sagemaker.AsyncInferenceConfig) []map[string]interface{} {
  if config == nil {
  return []map[string]interface{}{}
@@ -1165,3 +1253,25 @@ func flattenCoreDumpConfig(config *sagemaker.ProductionVariantCoreDumpConfig) []
 
  return []map[string]interface{}{cfg}
 }
+
+func flattenManagedInstanceScaling(config *sagemaker.ProductionVariantManagedInstanceScaling) []map[string]interface{} {
+ if config == nil {
+ return []map[string]interface{}{}
+ }
+
+ cfg := map[string]interface{}{}
+
+ if config.Status != nil {
+ cfg["status"] = aws.StringValue(config.Status)
+ }
+
+ if config.MinInstanceCount != nil {
+ cfg["min_instance_count"] = aws.Int64Value(config.MinInstanceCount)
+ }
+
+ if config.MaxInstanceCount != nil {
+ cfg["max_instance_count"] = aws.Int64Value(config.MaxInstanceCount)
+ }
+
+ return []map[string]interface{}{cfg}
+}
diff --git a/website/docs/r/sagemaker_endpoint_configuration.html.markdown b/website/docs/r/sagemaker_endpoint_configuration.html.markdown
@@ -58,6 +58,7 @@ This resource supports the following arguments:
 * `model_name` - (Required) The name of the model to use.
 * `routing_config` - (Optional) Sets how the endpoint routes incoming traffic. See [routing_config](#routing_config) below.
 * `serverless_config` - (Optional) Specifies configuration for how an endpoint performs asynchronous inference.
+* `managed_instance_scaling` - (Optional) Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
 * `variant_name` - (Optional) The name of the variant. If omitted, Terraform will assign a random, unique name.
 * `volume_size_in_gb` - (Optional) The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
 
@@ -76,6 +77,12 @@ This resource supports the following arguments:
 * `memory_size_in_mb` - (Required) The memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
 * `provisioned_concurrency` - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
 
+#### managed_instance_scaling
+
+* `status` - (Optional) Indicates whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
+* `min_instance_count` - (Optional) The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
+* `max_instance_count` - (Optional) The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
+
 ### data_capture_config
 
 * `initial_sampling_percentage` - (Required) Portion of data to capture. Should be between 0 and 100.