Skip to content

Commit

Permalink
add managed_instance_scaling to sagemaker endpoint config production_…
Browse files Browse the repository at this point in the history
…variants and shadow_production_variants
  • Loading branch information
deepakbshetty committed Jul 20, 2024
1 parent cc6a0f5 commit 6fa6b4e
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .changelog/35479.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:enhancement
resource/aws_sagemaker_endpoint_configuration: Add `production_variants.managed_instance_scaling` block and `shadow_production_variants.managed_instance_scaling` block
```
110 changes: 110 additions & 0 deletions internal/service/sagemaker/endpoint_configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,34 @@ func ResourceEndpointConfiguration() *schema.Resource {
},
},
},
"managed_instance_scaling": {
Type: schema.TypeList,
Optional: true,
MaxItems: 1,
ForceNew: true,
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
"status": {
Type: schema.TypeString,
Optional: true,
ForceNew: true,
ValidateFunc: validation.StringInSlice(sagemaker.ManagedInstanceScalingStatus_Values(), false),
},
"min_instance_count": {
Type: schema.TypeInt,
Optional: true,
ForceNew: true,
ValidateFunc: validation.IntAtLeast(1),
},
"max_instance_count": {
Type: schema.TypeInt,
Optional: true,
ForceNew: true,
ValidateFunc: validation.IntAtLeast(1),
},
},
},
},
"variant_name": {
Type: schema.TypeString,
Optional: true,
Expand Down Expand Up @@ -518,6 +546,34 @@ func ResourceEndpointConfiguration() *schema.Resource {
},
},
},
"managed_instance_scaling": {
Type: schema.TypeList,
Optional: true,
MaxItems: 1,
ForceNew: true,
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
"status": {
Type: schema.TypeString,
Optional: true,
ForceNew: true,
ValidateFunc: validation.StringInSlice(sagemaker.ManagedInstanceScalingStatus_Values(), false),
},
"min_instance_count": {
Type: schema.TypeInt,
Optional: true,
ForceNew: true,
ValidateFunc: validation.IntAtLeast(1),
},
"max_instance_count": {
Type: schema.TypeInt,
Optional: true,
ForceNew: true,
ValidateFunc: validation.IntAtLeast(1),
},
},
},
},
"variant_name": {
Type: schema.TypeString,
Optional: true,
Expand Down Expand Up @@ -709,6 +765,10 @@ func expandProductionVariants(configured []interface{}) []*sagemaker.ProductionV
l.EnableSSMAccess = aws.Bool(v)
}

if v, ok := data["managed_instance_scaling"].([]interface{}); ok && len(v) > 0 {
l.ManagedInstanceScaling = expandManagedInstanceScaling(v)
}

if v, ok := data["inference_ami_version"].(string); ok && v != "" {
l.InferenceAmiVersion = aws.String(v)
}
Expand Down Expand Up @@ -766,6 +826,10 @@ func flattenProductionVariants(list []*sagemaker.ProductionVariant) []map[string
l["enable_ssm_access"] = aws.BoolValue(i.EnableSSMAccess)
}

if i.ManagedInstanceScaling != nil {
l["managed_instance_scaling"] = flattenManagedInstanceScaling(i.ManagedInstanceScaling)
}

if i.InferenceAmiVersion != nil {
l["inference_ami_version"] = aws.StringValue(i.InferenceAmiVersion)
}
Expand Down Expand Up @@ -1034,6 +1098,30 @@ func expandCoreDumpConfig(configured []interface{}) *sagemaker.ProductionVariant
return c
}

func expandManagedInstanceScaling(configured []interface{}) *sagemaker.ProductionVariantManagedInstanceScaling {
if len(configured) == 0 {
return nil
}

m := configured[0].(map[string]interface{})

c := &sagemaker.ProductionVariantManagedInstanceScaling{}

if v, ok := m["status"].(string); ok {
c.Status = aws.String(v)
}

if v, ok := m["min_instance_count"].(int); ok && v > 0 {
c.MinInstanceCount = aws.Int64(int64(v))
}

if v, ok := m["max_instance_count"].(int); ok && v > 0 {
c.MaxInstanceCount = aws.Int64(int64(v))
}

return c
}

func flattenEndpointConfigAsyncInferenceConfig(config *sagemaker.AsyncInferenceConfig) []map[string]interface{} {
if config == nil {
return []map[string]interface{}{}
Expand Down Expand Up @@ -1165,3 +1253,25 @@ func flattenCoreDumpConfig(config *sagemaker.ProductionVariantCoreDumpConfig) []

return []map[string]interface{}{cfg}
}

func flattenManagedInstanceScaling(config *sagemaker.ProductionVariantManagedInstanceScaling) []map[string]interface{} {
if config == nil {
return []map[string]interface{}{}
}

cfg := map[string]interface{}{}

if config.Status != nil {
cfg["status"] = aws.StringValue(config.Status)
}

if config.MinInstanceCount != nil {
cfg["min_instance_count"] = aws.Int64Value(config.MinInstanceCount)
}

if config.MaxInstanceCount != nil {
cfg["max_instance_count"] = aws.Int64Value(config.MaxInstanceCount)
}

return []map[string]interface{}{cfg}
}
7 changes: 7 additions & 0 deletions website/docs/r/sagemaker_endpoint_configuration.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ This resource supports the following arguments:
* `model_name` - (Required) The name of the model to use.
* `routing_config` - (Optional) Sets how the endpoint routes incoming traffic. See [routing_config](#routing_config) below.
* `serverless_config` - (Optional) Specifies configuration for how an endpoint performs asynchronous inference.
* `managed_instance_scaling` - (Optional) Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
* `variant_name` - (Optional) The name of the variant. If omitted, Terraform will assign a random, unique name.
* `volume_size_in_gb` - (Optional) The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between `1` and `512`.

Expand All @@ -76,6 +77,12 @@ This resource supports the following arguments:
* `memory_size_in_mb` - (Required) The memory size of your serverless endpoint. Valid values are in 1 GB increments: `1024` MB, `2048` MB, `3072` MB, `4096` MB, `5120` MB, or `6144` MB.
* `provisioned_concurrency` - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to `max_concurrency`. Valid values are between `1` and `200`.

#### managed_instance_scaling

* `status` - (Optional) Indicates whether managed instance scaling is enabled. Valid values are `ENABLED` and `DISABLED`.
* `min_instance_count` - (Optional) The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
* `max_instance_count` - (Optional) The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.

### data_capture_config

* `initial_sampling_percentage` - (Required) Portion of data to capture. Should be between 0 and 100.
Expand Down

0 comments on commit 6fa6b4e

Please sign in to comment.