Skip to content

Commit

Permalink
add managed_instance_scaling to sagemaker endpoint config production_…
Browse files Browse the repository at this point in the history
…variants block
  • Loading branch information
deepakbshetty committed Jan 25, 2024
1 parent 0067d5c commit 846d9eb
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 0 deletions.
110 changes: 110 additions & 0 deletions internal/service/sagemaker/endpoint_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,34 @@ func ResourceEndpointConfiguration() *schema.Resource {
},
},
},
"managed_instance_scaling": {
Type: schema.TypeList,
Optional: true,
MaxItems: 1,
ForceNew: true,
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
"status": {
Type: schema.TypeString,
Optional: true,
ForceNew: true,
ValidateFunc: validation.StringInSlice(sagemaker.ManagedInstanceScalingStatus_Values(), false),
},
"min_instance_count": {
Type: schema.TypeInt,
Optional: true,
ForceNew: true,
ValidateFunc: validation.IntAtLeast(1),
},
"max_instance_count": {
Type: schema.TypeInt,
Optional: true,
ForceNew: true,
ValidateFunc: validation.IntAtLeast(1),
},
},
},
},
"variant_name": {
Type: schema.TypeString,
Optional: true,
Expand Down Expand Up @@ -476,6 +504,34 @@ func ResourceEndpointConfiguration() *schema.Resource {
},
},
},
"managed_instance_scaling": {
Type: schema.TypeList,
Optional: true,
MaxItems: 1,
ForceNew: true,
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
"status": {
Type: schema.TypeString,
Optional: true,
ForceNew: true,
ValidateFunc: validation.StringInSlice(sagemaker.ManagedInstanceScalingStatus_Values(), false),
},
"min_instance_count": {
Type: schema.TypeInt,
Optional: true,
ForceNew: true,
ValidateFunc: validation.IntAtLeast(1),
},
"max_instance_count": {
Type: schema.TypeInt,
Optional: true,
ForceNew: true,
ValidateFunc: validation.IntAtLeast(1),
},
},
},
},
"variant_name": {
Type: schema.TypeString,
Optional: true,
Expand Down Expand Up @@ -663,6 +719,10 @@ func expandProductionVariants(configured []interface{}) []*sagemaker.ProductionV
l.EnableSSMAccess = aws.Bool(v)
}

if v, ok := data["managed_instance_scaling"].([]interface{}); ok && len(v) > 0 {
l.ManagedInstanceScaling = expandManagedInstanceScaling(v)
}

containers = append(containers, l)
}

Expand Down Expand Up @@ -712,6 +772,10 @@ func flattenProductionVariants(list []*sagemaker.ProductionVariant) []map[string
l["enable_ssm_access"] = aws.BoolValue(i.EnableSSMAccess)
}

if i.ManagedInstanceScaling != nil {
l["managed_instance_scaling"] = flattenManagedInstanceScaling(i.ManagedInstanceScaling)
}

result = append(result, l)
}
return result
Expand Down Expand Up @@ -960,6 +1024,30 @@ func expandCoreDumpConfig(configured []interface{}) *sagemaker.ProductionVariant
return c
}

func expandManagedInstanceScaling(configured []interface{}) *sagemaker.ProductionVariantManagedInstanceScaling {
if len(configured) == 0 {
return nil
}

m := configured[0].(map[string]interface{})

c := &sagemaker.ProductionVariantManagedInstanceScaling{}

if v, ok := m["status"].(string); ok {
c.Status = aws.String(v)
}

if v, ok := m["min_instance_count"].(int); ok && v > 0 {
c.MinInstanceCount = aws.Int64(int64(v))
}

if v, ok := m["max_instance_count"].(int); ok && v > 0 {
c.MaxInstanceCount = aws.Int64(int64(v))
}

return c
}

func flattenEndpointConfigAsyncInferenceConfig(config *sagemaker.AsyncInferenceConfig) []map[string]interface{} {
if config == nil {
return []map[string]interface{}{}
Expand Down Expand Up @@ -1077,3 +1165,25 @@ func flattenCoreDumpConfig(config *sagemaker.ProductionVariantCoreDumpConfig) []

return []map[string]interface{}{cfg}
}

func flattenManagedInstanceScaling(config *sagemaker.ProductionVariantManagedInstanceScaling) []map[string]interface{} {
if config == nil {
return []map[string]interface{}{}
}

cfg := map[string]interface{}{}

if config.Status != nil {
cfg["status"] = aws.StringValue(config.Status)
}

if config.MinInstanceCount != nil {
cfg["min_instance_count"] = aws.Int64Value(config.MinInstanceCount)
}

if config.MaxInstanceCount != nil {
cfg["max_instance_count"] = aws.Int64Value(config.MaxInstanceCount)
}

return []map[string]interface{}{cfg}
}
7 changes: 7 additions & 0 deletions website/docs/r/sagemaker_endpoint_configuration.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ This resource supports the following arguments:
* `model_data_download_timeout_in_seconds` - (Optional) The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between `60` and `3600`.
* `model_name` - (Required) The name of the model to use.
* `serverless_config` - (Optional) Specifies configuration for how an endpoint performs asynchronous inference.
* `managed_instance_scaling` - (Optional) Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
* `variant_name` - (Optional) The name of the variant. If omitted, Terraform will assign a random, unique name.
* `volume_size_in_gb` - (Optional) The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.

Expand All @@ -70,6 +71,12 @@ This resource supports the following arguments:
* `memory_size_in_mb` - (Required) The memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
* `provisioned_concurrency` - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.

#### managed_instance_scaling

* `status` - (Optional) Indicates whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
* `min_instance_count` - (Optional) The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
* `max_instance_count` - (Optional) The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.

### data_capture_config

* `initial_sampling_percentage` - (Required) Portion of data to capture. Should be between 0 and 100.
Expand Down

0 comments on commit 846d9eb

Please sign in to comment.