add managed_instance_scaling to sagemaker endpoint config production_…

…variants block
hashicorp · Jan 25, 2024 · 846d9eb · 846d9eb
1 parent 0067d5c
commit 846d9eb
Show file tree

Hide file tree

Showing 2 changed files with 117 additions and 0 deletions.
diff --git a/internal/service/sagemaker/endpoint_configuration.go b/internal/service/sagemaker/endpoint_configuration.go
@@ -353,6 +353,34 @@ func ResourceEndpointConfiguration() *schema.Resource {
  },
  },
  },
+ "managed_instance_scaling": {
+ Type: schema.TypeList,
+ Optional: true,
+ MaxItems: 1,
+ ForceNew: true,
+ Elem: &schema.Resource{
+ Schema: map[string]*schema.Schema{
+ "status": {
+ Type: schema.TypeString,
+ Optional: true,
+ ForceNew: true,
+ ValidateFunc: validation.StringInSlice(sagemaker.ManagedInstanceScalingStatus_Values(), false),
+ },
+ "min_instance_count": {
+ Type: schema.TypeInt,
+ Optional: true,
+ ForceNew: true,
+ ValidateFunc: validation.IntAtLeast(1),
+ },
+ "max_instance_count": {
+ Type: schema.TypeInt,
+ Optional: true,
+ ForceNew: true,
+ ValidateFunc: validation.IntAtLeast(1),
+ },
+ },
+ },
+ },
  "variant_name": {
  Type: schema.TypeString,
  Optional: true,
@@ -476,6 +504,34 @@ func ResourceEndpointConfiguration() *schema.Resource {
  },
  },
  },
+ "managed_instance_scaling": {
+ Type: schema.TypeList,
+ Optional: true,
+ MaxItems: 1,
+ ForceNew: true,
+ Elem: &schema.Resource{
+ Schema: map[string]*schema.Schema{
+ "status": {
+ Type: schema.TypeString,
+ Optional: true,
+ ForceNew: true,
+ ValidateFunc: validation.StringInSlice(sagemaker.ManagedInstanceScalingStatus_Values(), false),
+ },
+ "min_instance_count": {
+ Type: schema.TypeInt,
+ Optional: true,
+ ForceNew: true,
+ ValidateFunc: validation.IntAtLeast(1),
+ },
+ "max_instance_count": {
+ Type: schema.TypeInt,
+ Optional: true,
+ ForceNew: true,
+ ValidateFunc: validation.IntAtLeast(1),
+ },
+ },
+ },
+ },
  "variant_name": {
  Type: schema.TypeString,
  Optional: true,
@@ -663,6 +719,10 @@ func expandProductionVariants(configured []interface{}) []*sagemaker.ProductionV
  l.EnableSSMAccess = aws.Bool(v)
  }
 
+ if v, ok := data["managed_instance_scaling"].([]interface{}); ok && len(v) > 0 {
+ l.ManagedInstanceScaling = expandManagedInstanceScaling(v)
+ }
+
  containers = append(containers, l)
  }
 
@@ -712,6 +772,10 @@ func flattenProductionVariants(list []*sagemaker.ProductionVariant) []map[string
  l["enable_ssm_access"] = aws.BoolValue(i.EnableSSMAccess)
  }
 
+ if i.ManagedInstanceScaling != nil {
+ l["managed_instance_scaling"] = flattenManagedInstanceScaling(i.ManagedInstanceScaling)
+ }
+
  result = append(result, l)
  }
  return result
@@ -960,6 +1024,30 @@ func expandCoreDumpConfig(configured []interface{}) *sagemaker.ProductionVariant
  return c
 }
 
+func expandManagedInstanceScaling(configured []interface{}) *sagemaker.ProductionVariantManagedInstanceScaling {
+ if len(configured) == 0 {
+ return nil
+ }
+
+ m := configured[0].(map[string]interface{})
+
+ c := &sagemaker.ProductionVariantManagedInstanceScaling{}
+
+ if v, ok := m["status"].(string); ok {
+ c.Status = aws.String(v)
+ }
+
+ if v, ok := m["min_instance_count"].(int); ok && v > 0 {
+ c.MinInstanceCount = aws.Int64(int64(v))
+ }
+
+ if v, ok := m["max_instance_count"].(int); ok && v > 0 {
+ c.MaxInstanceCount = aws.Int64(int64(v))
+ }
+
+ return c
+}
+
 func flattenEndpointConfigAsyncInferenceConfig(config *sagemaker.AsyncInferenceConfig) []map[string]interface{} {
  if config == nil {
  return []map[string]interface{}{}
@@ -1077,3 +1165,25 @@ func flattenCoreDumpConfig(config *sagemaker.ProductionVariantCoreDumpConfig) []
 
  return []map[string]interface{}{cfg}
 }
+
+func flattenManagedInstanceScaling(config *sagemaker.ProductionVariantManagedInstanceScaling) []map[string]interface{} {
+ if config == nil {
+ return []map[string]interface{}{}
+ }
+
+ cfg := map[string]interface{}{}
+
+ if config.Status != nil {
+ cfg["status"] = aws.StringValue(config.Status)
+ }
+
+ if config.MinInstanceCount != nil {
+ cfg["min_instance_count"] = aws.Int64Value(config.MinInstanceCount)
+ }
+
+ if config.MaxInstanceCount != nil {
+ cfg["max_instance_count"] = aws.Int64Value(config.MaxInstanceCount)
+ }
+
+ return []map[string]interface{}{cfg}
+}
diff --git a/website/docs/r/sagemaker_endpoint_configuration.html.markdown b/website/docs/r/sagemaker_endpoint_configuration.html.markdown
@@ -56,6 +56,7 @@ This resource supports the following arguments:
 * `model_data_download_timeout_in_seconds` - (Optional) The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
 * `model_name` - (Required) The name of the model to use.
 * `serverless_config` - (Optional) Specifies configuration for how an endpoint performs asynchronous inference.
+* `managed_instance_scaling` - (Optional) Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
 * `variant_name` - (Optional) The name of the variant. If omitted, Terraform will assign a random, unique name.
 * `volume_size_in_gb` - (Optional) The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.
 
@@ -70,6 +71,12 @@ This resource supports the following arguments:
 * `memory_size_in_mb` - (Required) The memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
 * `provisioned_concurrency` - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.
 
+#### managed_instance_scaling
+
+* `status` - (Optional) Indicates whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
+* `min_instance_count` - (Optional) The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
+* `max_instance_count` - (Optional) The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
+
 ### data_capture_config
 
 * `initial_sampling_percentage` - (Required) Portion of data to capture. Should be between 0 and 100.