From 72b549a2e06e5c1b21e6c9888352e08f7e8eb046 Mon Sep 17 00:00:00 2001 From: deepakbshetty Date: Sat, 17 Feb 2024 21:18:43 +0000 Subject: [PATCH 1/4] Add model_access_config, multi_model_config and inference_specification_name to primary_container and container block --- .changelog/35873.txt | 6 + internal/service/sagemaker/model.go | 155 ++++++++++++ internal/service/sagemaker/model_test.go | 251 ++++++++++++++++++- website/docs/r/sagemaker_model.html.markdown | 11 + 4 files changed, 422 insertions(+), 1 deletion(-) create mode 100644 .changelog/35873.txt diff --git a/.changelog/35873.txt b/.changelog/35873.txt new file mode 100644 index 00000000000..b9d0f1b742e --- /dev/null +++ b/.changelog/35873.txt @@ -0,0 +1,6 @@ +@@ -0,0 +1,7 @@ +```release-note:enhancement +resource/aws_sagemaker_model: Add `primary_container.model_data_source.model_access_config` block and `container.model_data_source.model_access_config` block +resource/aws_sagemaker_model: Add `primary_container.multi_model_config` block and `container.multi_model_config` block +resource/aws_sagemaker_model: Add `primary_container.inference_specification_name` attribute and `container.inference_specification_name` attribute +``` \ No newline at end of file diff --git a/internal/service/sagemaker/model.go b/internal/service/sagemaker/model.go index d3a31d7bda5..b4b18acb0ef 100644 --- a/internal/service/sagemaker/model.go +++ b/internal/service/sagemaker/model.go @@ -147,12 +147,48 @@ func resourceModel() *schema.Resource { ForceNew: true, ValidateDiagFunc: enum.Validate[awstypes.ModelCompressionType](), }, + "model_access_config": { + Type: schema.TypeList, + Optional: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "accept_eula": { + Type: schema.TypeBool, + Required: true, + ForceNew: true, + }, + }, + }, + }, }, }, }, }, }, }, + "inference_specification_name": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateFunc: validName, + }, + "multi_model_config": { + Type: schema.TypeList, + Optional: true, + ForceNew: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "model_cache_setting": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateDiagFunc: enum.Validate[awstypes.ModelCacheSetting](), + }, + }, + }, + }, }, }, }, @@ -294,12 +330,49 @@ func resourceModel() *schema.Resource { ForceNew: true, ValidateDiagFunc: enum.Validate[awstypes.ModelCompressionType](), }, + "model_access_config": { + Type: schema.TypeList, + Optional: true, + ForceNew: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "accept_eula": { + Type: schema.TypeBool, + Required: true, + ForceNew: true, + }, + }, + }, + }, }, }, }, }, }, }, + "inference_specification_name": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateFunc: validName, + }, + "multi_model_config": { + Type: schema.TypeList, + Optional: true, + ForceNew: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "model_cache_setting": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateDiagFunc: enum.Validate[awstypes.ModelCacheSetting](), + }, + }, + }, + }, }, }, }, @@ -551,6 +624,14 @@ func expandContainer(m map[string]interface{}) *awstypes.ContainerDefinition { container.ImageConfig = expandModelImageConfig(v.([]interface{})) } + if v, ok := m["inference_specification_name"]; ok && v.(string) != "" { + container.InferenceSpecificationName = aws.String(v.(string)) + } + + if v, ok := m["multi_model_config"].([]interface{}); ok && len(v) > 0 { + container.MultiModelConfig = expandMultiModelConfig(v) + } + return &container } @@ -589,6 +670,10 @@ func expandS3ModelDataSource(l []interface{}) *awstypes.S3ModelDataSource { s3ModelDataSource.CompressionType = awstypes.ModelCompressionType(v.(string)) } + if v, ok := m["model_access_config"].([]interface{}); ok && len(v) > 0 { + s3ModelDataSource.ModelAccessConfig = expandModelAccessConfig(v) + } + return &s3ModelDataSource } @@ -634,6 +719,38 @@ func expandContainers(a []interface{}) []awstypes.ContainerDefinition { return containers } +func expandModelAccessConfig(l []interface{}) *awstypes.ModelAccessConfig { + if len(l) == 0 { + return nil + } + + m := l[0].(map[string]interface{}) + + modelAccessConfig := &awstypes.ModelAccessConfig{} + + if v, ok := m["accept_eula"].(bool); ok { + modelAccessConfig.AcceptEula = aws.Bool(v) + } + + return modelAccessConfig +} + +func expandMultiModelConfig(l []interface{}) *awstypes.MultiModelConfig { + if len(l) == 0 { + return nil + } + + m := l[0].(map[string]interface{}) + + multiModelConfig := &awstypes.MultiModelConfig{} + + if v, ok := m["model_cache_setting"].(string); ok && v != "" { + multiModelConfig.ModelCacheSetting = awstypes.ModelCacheSetting(v) + } + + return multiModelConfig +} + func flattenContainer(container *awstypes.ContainerDefinition) []interface{} { if container == nil { return []interface{}{} @@ -667,6 +784,14 @@ func flattenContainer(container *awstypes.ContainerDefinition) []interface{} { cfg["image_config"] = flattenImageConfig(container.ImageConfig) } + if container.InferenceSpecificationName != nil { + cfg["inference_specification_name"] = aws.ToString(container.InferenceSpecificationName) + } + + if container.MultiModelConfig != nil { + cfg["multi_model_config"] = flattenMultiModelConfig(container.MultiModelConfig) + } + return []interface{}{cfg} } @@ -699,6 +824,10 @@ func flattenS3ModelDataSource(s3ModelDataSource *awstypes.S3ModelDataSource) []i cfg["compression_type"] = s3ModelDataSource.CompressionType + if s3ModelDataSource.ModelAccessConfig != nil { + cfg["model_access_config"] = flattenModelAccessConfig(s3ModelDataSource.ModelAccessConfig) + } + return []interface{}{cfg} } @@ -740,6 +869,32 @@ func flattenContainers(containers []awstypes.ContainerDefinition) []interface{} return fContainers } +func flattenModelAccessConfig(config *awstypes.ModelAccessConfig) []interface{} { + if config == nil { + return []interface{}{} + } + + cfg := make(map[string]interface{}) + + cfg["accept_eula"] = aws.ToBool(config.AcceptEula) + + return []interface{}{cfg} +} + +func flattenMultiModelConfig(config *awstypes.MultiModelConfig) []interface{} { + if config == nil { + return []interface{}{} + } + + cfg := make(map[string]interface{}) + + if config.ModelCacheSetting != "" { + cfg["model_cache_setting"] = config.ModelCacheSetting + } + + return []interface{}{cfg} +} + func expandModelInferenceExecutionConfig(l []interface{}) *awstypes.InferenceExecutionConfig { if len(l) == 0 { return nil diff --git a/internal/service/sagemaker/model_test.go b/internal/service/sagemaker/model_test.go index 6dbbbbdda8d..50563e562e6 100644 --- a/internal/service/sagemaker/model_test.go +++ b/internal/service/sagemaker/model_test.go @@ -501,8 +501,118 @@ func testAccCheckModelExists(ctx context.Context, n string) resource.TestCheckFu } } +func TestAccSageMakerModel_primaryContainerModelS3DataSourceAcceptEula(t *testing.T) { + ctx := acctest.Context(t) + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_sagemaker_model.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, names.SageMakerServiceID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckModelDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccModelConfig_primaryContainerModelS3DataSourceAcceptEula(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(ctx, resourceName), + resource.TestCheckResourceAttr(resourceName, "primary_container.0.model_data_source.0.s3_data_source.0.model_access_config.0.accept_eula", acctest.CtTrue), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + +func TestAccSageMakerModel_primaryContainerInferenceSpecificationName(t *testing.T) { + ctx := acctest.Context(t) + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_sagemaker_model.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, names.SageMakerServiceID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckModelDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccModelConfig_primaryContainerInferenceSpecificationName(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(ctx, resourceName), + resource.TestCheckResourceAttr(resourceName, "primary_container.0.inference_specification_name", "test"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + +func TestAccSageMakerModel_primaryContainerMultiModelConfigModelCacheSetting(t *testing.T) { + ctx := acctest.Context(t) + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_sagemaker_model.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, names.SageMakerServiceID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckModelDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccModelConfig_primaryContainerMultiModelConfigModelCacheSetting(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(ctx, resourceName), + resource.TestCheckResourceAttr(resourceName, "primary_container.0.multi_model_config.0.model_cache_setting", "Disabled"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + +func TestAccSageMakerModel_containersMultiModelConfigModelCacheSetting(t *testing.T) { + ctx := acctest.Context(t) + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_sagemaker_model.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, names.SageMakerServiceID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckModelDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccModelConfig_containersMultiModelConfigModelCacheSetting(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(ctx, resourceName), + resource.TestCheckResourceAttr(resourceName, "container.0.multi_model_config.0.model_cache_setting", "Disabled"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + func testAccModelConfig_base(rName string) string { return fmt.Sprintf(` +data "aws_region" "current" {} +data "aws_partition" "current" {} resource "aws_iam_role" "test" { name = %[1]q path = "/" @@ -515,11 +625,16 @@ data "aws_iam_policy_document" "test" { principals { type = "Service" - identifiers = ["sagemaker.amazonaws.com"] + identifiers = ["sagemaker.${data.aws_partition.current.dns_suffix}"] } } } +resource "aws_iam_role_policy_attachment" "test" { + role = aws_iam_role.test.name + policy_arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/AmazonSageMakerFullAccess" +} + data "aws_sagemaker_prebuilt_ecr_image" "test" { repository_name = "kmeans" } @@ -949,3 +1064,137 @@ resource "aws_security_group" "test" { } `, rName)) } + +// https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/image_uri_config/huggingface-llm.json + +func testAccModelConfig_primaryContainerModelS3DataSourceAcceptEula(rName string) string { + return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` +data "aws_sagemaker_prebuilt_ecr_image" "accept_eula_test" { + repository_name = "huggingface-pytorch-tgi-inference" + image_tag = "2.3.0-tgi2.2.0-gpu-py310-cu121-ubuntu22.04-v2.0" +} + +resource "aws_sagemaker_model" "test" { + name = %[1]q + enable_network_isolation = true + execution_role_arn = aws_iam_role.test.arn + + primary_container { + image = data.aws_sagemaker_prebuilt_ecr_image.accept_eula_test.registry_path + mode = "SingleModel" + environment = { + ENDPOINT_SERVER_TIMEOUT = 3600 + HF_MODEL_ID = "/opt/ml/model" + MAX_INPUT_LENGTH = 4095 + MAX_TOTAL_TOKENS = 4096 + MODEL_CACHE_ROOT = "/opt/ml/model" + SAGEMAKER_ENV = 1 + SAGEMAKER_MODEL_SERVER_WORKERS = 1 + SAGEMAKER_PROGRAM = "inference.py" + SM_NUM_GPUS = 4 + } + + model_data_source { + s3_data_source { + compression_type = "None" + s3_data_type = "S3Prefix" + s3_uri = format("s3://jumpstart-private-cache-prod-%%s/meta-textgeneration/meta-textgeneration-llama-2-13b-f/artifacts/inference-prepack/v1.0.0/", data.aws_region.current.name) + model_access_config { + accept_eula = true + } + } + } + } +} +`, rName)) +} + +func testAccModelConfig_primaryContainerInferenceSpecificationName(rName string) string { + return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` +resource "aws_sagemaker_model" "test" { + name = %[1]q + execution_role_arn = aws_iam_role.test.arn + + primary_container { + image = data.aws_sagemaker_prebuilt_ecr_image.test.registry_path + inference_specification_name = "test" + } +} +`, rName)) +} + +// https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/image_uri_config/sagemaker-tritonserver.json + +func testAccModelConfig_primaryContainerMultiModelConfigModelCacheSetting(rName string) string { + return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` +data "aws_sagemaker_prebuilt_ecr_image" "model_cache_setting_test" { + repository_name = "sagemaker-tritonserver" + image_tag = "24.03-py3" +} + +resource "aws_s3_bucket" "test" { + bucket = %[1]q + force_destroy = true +} + +resource "aws_s3_object" "test" { + bucket = aws_s3_bucket.test.bucket + key = "resnet50-mme-gpu/model.tar.gz" + content = "some-data" +} + +resource "aws_sagemaker_model" "test" { + depends_on = [aws_s3_object.test] + + name = %[1]q + execution_role_arn = aws_iam_role.test.arn + + primary_container { + image = data.aws_sagemaker_prebuilt_ecr_image.model_cache_setting_test.registry_path + mode = "MultiModel" + model_data_url = "s3://${aws_s3_bucket.test.id}/resnet50-mme-gpu/" + multi_model_config { + model_cache_setting = "Disabled" + } + } +} +`, rName)) +} + +// https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/image_uri_config/sagemaker-tritonserver.json + +func testAccModelConfig_containersMultiModelConfigModelCacheSetting(rName string) string { + return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` +data "aws_sagemaker_prebuilt_ecr_image" "model_cache_setting_test" { + repository_name = "sagemaker-tritonserver" + image_tag = "24.03-py3" +} + +resource "aws_s3_bucket" "test" { + bucket = %[1]q + force_destroy = true +} + +resource "aws_s3_object" "test" { + bucket = aws_s3_bucket.test.bucket + key = "resnet50-mme-gpu/model.tar.gz" + content = "some-data" +} + +resource "aws_sagemaker_model" "test" { + depends_on = [aws_s3_object.test] + + name = %[1]q + execution_role_arn = aws_iam_role.test.arn + + container { + image = data.aws_sagemaker_prebuilt_ecr_image.model_cache_setting_test.registry_path + mode = "MultiModel" + model_data_url = "s3://${aws_s3_bucket.test.id}/resnet50-mme-gpu/" + multi_model_config { + model_cache_setting = "Disabled" + } + } +} +`, rName)) +} diff --git a/website/docs/r/sagemaker_model.html.markdown b/website/docs/r/sagemaker_model.html.markdown index e324ffcbca3..9571f74254b 100644 --- a/website/docs/r/sagemaker_model.html.markdown +++ b/website/docs/r/sagemaker_model.html.markdown @@ -68,6 +68,8 @@ The `primary_container` and `container` block both support: * `environment` - (Optional) Environment variables for the Docker container. A list of key value pairs. * `image_config` - (Optional) Specifies whether the model container is in Amazon ECR or a private Docker registry accessible from your Amazon Virtual Private Cloud (VPC). For more information see [Using a Private Docker Registry for Real-Time Inference Containers](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-containers-inference-private.html). see [Image Config](#image-config). +* `inference_specification_name` - (Optional) The inference specification name in the model package version. +* `multi_model_config` - (Optional) Specifies additional configuration for multi-model endpoints. see [Multi Model Config](#multi-model-config). ### Image Config @@ -87,6 +89,15 @@ The `primary_container` and `container` block both support: * `compression_type` - (Required) How the model data is prepared. Allowed values are: `None` and `Gzip`. * `s3_data_type` - (Required) The type of model data to deploy. Allowed values are: `S3Object` and `S3Prefix`. * `s3_uri` - (Required) The S3 path of model data to deploy. +* `model_access_config` - (Optional) Specifies the access configuration file for the ML model. You can explicitly accept the model end-user license agreement (EULA) within the [`model_access_config` configuration block]. see [Model Access Config](#model-access-config). + +##### Model Access Config + +* `accept_eula` - (Required) Specifies agreement to the model end-user license agreement (EULA). The AcceptEula value must be explicitly defined as `true` in order to accept the EULA that this model requires. You are responsible for reviewing and complying with any applicable license terms and making sure they are acceptable for your use case before downloading or using a model. + +### Multi Model Config + +* `model_cache_setting` - (Optional) Whether to cache models for a multi-model endpoint. By default, multi-model endpoints cache models so that a model does not have to be loaded into memory each time it is invoked. Some use cases do not benefit from model caching. For example, if an endpoint hosts a large number of models that are each invoked infrequently, the endpoint might perform better if you disable model caching. To disable model caching, set the value of this parameter to `Disabled`. Allowed values are: `Enabled` and `Disabled`. ## Inference Execution Config From 9aff00799addaa56728b4a5eb4ac27a29c5384a4 Mon Sep 17 00:00:00 2001 From: Deepak <46580663+deepakbshetty@users.noreply.github.com> Date: Sun, 8 Sep 2024 06:27:36 +0100 Subject: [PATCH 2/4] changelog cleanup --- .changelog/35873.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.changelog/35873.txt b/.changelog/35873.txt index b9d0f1b742e..285b74f4b1b 100644 --- a/.changelog/35873.txt +++ b/.changelog/35873.txt @@ -1,6 +1,5 @@ -@@ -0,0 +1,7 @@ ```release-note:enhancement resource/aws_sagemaker_model: Add `primary_container.model_data_source.model_access_config` block and `container.model_data_source.model_access_config` block resource/aws_sagemaker_model: Add `primary_container.multi_model_config` block and `container.multi_model_config` block resource/aws_sagemaker_model: Add `primary_container.inference_specification_name` attribute and `container.inference_specification_name` attribute -``` \ No newline at end of file +``` From ca20e1958ac9592bb99c02fd168fa4e4a92276c5 Mon Sep 17 00:00:00 2001 From: Kit Ewbank Date: Thu, 12 Sep 2024 12:23:07 -0400 Subject: [PATCH 3/4] Tweak CHANGELOG entries. --- .changelog/35873.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.changelog/35873.txt b/.changelog/35873.txt index 285b74f4b1b..9fa07a48811 100644 --- a/.changelog/35873.txt +++ b/.changelog/35873.txt @@ -1,5 +1,7 @@ ```release-note:enhancement -resource/aws_sagemaker_model: Add `primary_container.model_data_source.model_access_config` block and `container.model_data_source.model_access_config` block -resource/aws_sagemaker_model: Add `primary_container.multi_model_config` block and `container.multi_model_config` block -resource/aws_sagemaker_model: Add `primary_container.inference_specification_name` attribute and `container.inference_specification_name` attribute +resource/aws_sagemaker_model: Add `primary_container.model_data_source.s3_data_source.model_access_config`, `primary_container.multi_model_config`, `container.model_data_source.s3_data_source.model_access_config`, and ``container.multi_model_config` configuration blocks +``` + +```release-note:enhancement +resource/aws_sagemaker_model: Add `primary_container.inference_specification_name` and `container.inference_specification_name` arguments ``` From 68baca37436eb9d71d37bf9c7dbfae903038f378 Mon Sep 17 00:00:00 2001 From: Kit Ewbank Date: Thu, 12 Sep 2024 13:09:32 -0400 Subject: [PATCH 4/4] r/aws_sagemaker_model: Fix duplicate resources in acceptance test configurations. --- internal/service/sagemaker/model_test.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/internal/service/sagemaker/model_test.go b/internal/service/sagemaker/model_test.go index 50563e562e6..8bb57408fa9 100644 --- a/internal/service/sagemaker/model_test.go +++ b/internal/service/sagemaker/model_test.go @@ -613,6 +613,7 @@ func testAccModelConfig_base(rName string) string { return fmt.Sprintf(` data "aws_region" "current" {} data "aws_partition" "current" {} + resource "aws_iam_role" "test" { name = %[1]q path = "/" @@ -630,7 +631,7 @@ data "aws_iam_policy_document" "test" { } } -resource "aws_iam_role_policy_attachment" "test" { +resource "aws_iam_role_policy_attachment" "full_access" { role = aws_iam_role.test.name policy_arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/AmazonSageMakerFullAccess" } @@ -783,8 +784,6 @@ resource "aws_s3_object" "test" { // lintignore:AWSAT003,AWSAT005 func testAccModelConfig_primaryContainerPackageName(rName string) string { return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` -data "aws_region" "current" {} - locals { region_account_map = { us-east-1 = "865070037744"