diff --git a/.changelog/35873.txt b/.changelog/35873.txt new file mode 100644 index 00000000000..9fa07a48811 --- /dev/null +++ b/.changelog/35873.txt @@ -0,0 +1,7 @@ +```release-note:enhancement +resource/aws_sagemaker_model: Add `primary_container.model_data_source.s3_data_source.model_access_config`, `primary_container.multi_model_config`, `container.model_data_source.s3_data_source.model_access_config`, and ``container.multi_model_config` configuration blocks +``` + +```release-note:enhancement +resource/aws_sagemaker_model: Add `primary_container.inference_specification_name` and `container.inference_specification_name` arguments +``` diff --git a/internal/service/sagemaker/model.go b/internal/service/sagemaker/model.go index d3a31d7bda5..b4b18acb0ef 100644 --- a/internal/service/sagemaker/model.go +++ b/internal/service/sagemaker/model.go @@ -147,12 +147,48 @@ func resourceModel() *schema.Resource { ForceNew: true, ValidateDiagFunc: enum.Validate[awstypes.ModelCompressionType](), }, + "model_access_config": { + Type: schema.TypeList, + Optional: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "accept_eula": { + Type: schema.TypeBool, + Required: true, + ForceNew: true, + }, + }, + }, + }, }, }, }, }, }, }, + "inference_specification_name": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateFunc: validName, + }, + "multi_model_config": { + Type: schema.TypeList, + Optional: true, + ForceNew: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "model_cache_setting": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateDiagFunc: enum.Validate[awstypes.ModelCacheSetting](), + }, + }, + }, + }, }, }, }, @@ -294,12 +330,49 @@ func resourceModel() *schema.Resource { ForceNew: true, ValidateDiagFunc: enum.Validate[awstypes.ModelCompressionType](), }, + "model_access_config": { + Type: schema.TypeList, + Optional: true, + ForceNew: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "accept_eula": { + Type: schema.TypeBool, + Required: true, + ForceNew: true, + }, + }, + }, + }, }, }, }, }, }, }, + "inference_specification_name": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateFunc: validName, + }, + "multi_model_config": { + Type: schema.TypeList, + Optional: true, + ForceNew: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "model_cache_setting": { + Type: schema.TypeString, + Optional: true, + ForceNew: true, + ValidateDiagFunc: enum.Validate[awstypes.ModelCacheSetting](), + }, + }, + }, + }, }, }, }, @@ -551,6 +624,14 @@ func expandContainer(m map[string]interface{}) *awstypes.ContainerDefinition { container.ImageConfig = expandModelImageConfig(v.([]interface{})) } + if v, ok := m["inference_specification_name"]; ok && v.(string) != "" { + container.InferenceSpecificationName = aws.String(v.(string)) + } + + if v, ok := m["multi_model_config"].([]interface{}); ok && len(v) > 0 { + container.MultiModelConfig = expandMultiModelConfig(v) + } + return &container } @@ -589,6 +670,10 @@ func expandS3ModelDataSource(l []interface{}) *awstypes.S3ModelDataSource { s3ModelDataSource.CompressionType = awstypes.ModelCompressionType(v.(string)) } + if v, ok := m["model_access_config"].([]interface{}); ok && len(v) > 0 { + s3ModelDataSource.ModelAccessConfig = expandModelAccessConfig(v) + } + return &s3ModelDataSource } @@ -634,6 +719,38 @@ func expandContainers(a []interface{}) []awstypes.ContainerDefinition { return containers } +func expandModelAccessConfig(l []interface{}) *awstypes.ModelAccessConfig { + if len(l) == 0 { + return nil + } + + m := l[0].(map[string]interface{}) + + modelAccessConfig := &awstypes.ModelAccessConfig{} + + if v, ok := m["accept_eula"].(bool); ok { + modelAccessConfig.AcceptEula = aws.Bool(v) + } + + return modelAccessConfig +} + +func expandMultiModelConfig(l []interface{}) *awstypes.MultiModelConfig { + if len(l) == 0 { + return nil + } + + m := l[0].(map[string]interface{}) + + multiModelConfig := &awstypes.MultiModelConfig{} + + if v, ok := m["model_cache_setting"].(string); ok && v != "" { + multiModelConfig.ModelCacheSetting = awstypes.ModelCacheSetting(v) + } + + return multiModelConfig +} + func flattenContainer(container *awstypes.ContainerDefinition) []interface{} { if container == nil { return []interface{}{} @@ -667,6 +784,14 @@ func flattenContainer(container *awstypes.ContainerDefinition) []interface{} { cfg["image_config"] = flattenImageConfig(container.ImageConfig) } + if container.InferenceSpecificationName != nil { + cfg["inference_specification_name"] = aws.ToString(container.InferenceSpecificationName) + } + + if container.MultiModelConfig != nil { + cfg["multi_model_config"] = flattenMultiModelConfig(container.MultiModelConfig) + } + return []interface{}{cfg} } @@ -699,6 +824,10 @@ func flattenS3ModelDataSource(s3ModelDataSource *awstypes.S3ModelDataSource) []i cfg["compression_type"] = s3ModelDataSource.CompressionType + if s3ModelDataSource.ModelAccessConfig != nil { + cfg["model_access_config"] = flattenModelAccessConfig(s3ModelDataSource.ModelAccessConfig) + } + return []interface{}{cfg} } @@ -740,6 +869,32 @@ func flattenContainers(containers []awstypes.ContainerDefinition) []interface{} return fContainers } +func flattenModelAccessConfig(config *awstypes.ModelAccessConfig) []interface{} { + if config == nil { + return []interface{}{} + } + + cfg := make(map[string]interface{}) + + cfg["accept_eula"] = aws.ToBool(config.AcceptEula) + + return []interface{}{cfg} +} + +func flattenMultiModelConfig(config *awstypes.MultiModelConfig) []interface{} { + if config == nil { + return []interface{}{} + } + + cfg := make(map[string]interface{}) + + if config.ModelCacheSetting != "" { + cfg["model_cache_setting"] = config.ModelCacheSetting + } + + return []interface{}{cfg} +} + func expandModelInferenceExecutionConfig(l []interface{}) *awstypes.InferenceExecutionConfig { if len(l) == 0 { return nil diff --git a/internal/service/sagemaker/model_test.go b/internal/service/sagemaker/model_test.go index 6dbbbbdda8d..8bb57408fa9 100644 --- a/internal/service/sagemaker/model_test.go +++ b/internal/service/sagemaker/model_test.go @@ -501,8 +501,119 @@ func testAccCheckModelExists(ctx context.Context, n string) resource.TestCheckFu } } +func TestAccSageMakerModel_primaryContainerModelS3DataSourceAcceptEula(t *testing.T) { + ctx := acctest.Context(t) + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_sagemaker_model.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, names.SageMakerServiceID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckModelDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccModelConfig_primaryContainerModelS3DataSourceAcceptEula(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(ctx, resourceName), + resource.TestCheckResourceAttr(resourceName, "primary_container.0.model_data_source.0.s3_data_source.0.model_access_config.0.accept_eula", acctest.CtTrue), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + +func TestAccSageMakerModel_primaryContainerInferenceSpecificationName(t *testing.T) { + ctx := acctest.Context(t) + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_sagemaker_model.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, names.SageMakerServiceID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckModelDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccModelConfig_primaryContainerInferenceSpecificationName(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(ctx, resourceName), + resource.TestCheckResourceAttr(resourceName, "primary_container.0.inference_specification_name", "test"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + +func TestAccSageMakerModel_primaryContainerMultiModelConfigModelCacheSetting(t *testing.T) { + ctx := acctest.Context(t) + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_sagemaker_model.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, names.SageMakerServiceID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckModelDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccModelConfig_primaryContainerMultiModelConfigModelCacheSetting(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(ctx, resourceName), + resource.TestCheckResourceAttr(resourceName, "primary_container.0.multi_model_config.0.model_cache_setting", "Disabled"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + +func TestAccSageMakerModel_containersMultiModelConfigModelCacheSetting(t *testing.T) { + ctx := acctest.Context(t) + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_sagemaker_model.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, names.SageMakerServiceID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckModelDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccModelConfig_containersMultiModelConfigModelCacheSetting(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckModelExists(ctx, resourceName), + resource.TestCheckResourceAttr(resourceName, "container.0.multi_model_config.0.model_cache_setting", "Disabled"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + func testAccModelConfig_base(rName string) string { return fmt.Sprintf(` +data "aws_region" "current" {} +data "aws_partition" "current" {} + resource "aws_iam_role" "test" { name = %[1]q path = "/" @@ -515,11 +626,16 @@ data "aws_iam_policy_document" "test" { principals { type = "Service" - identifiers = ["sagemaker.amazonaws.com"] + identifiers = ["sagemaker.${data.aws_partition.current.dns_suffix}"] } } } +resource "aws_iam_role_policy_attachment" "full_access" { + role = aws_iam_role.test.name + policy_arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/AmazonSageMakerFullAccess" +} + data "aws_sagemaker_prebuilt_ecr_image" "test" { repository_name = "kmeans" } @@ -668,8 +784,6 @@ resource "aws_s3_object" "test" { // lintignore:AWSAT003,AWSAT005 func testAccModelConfig_primaryContainerPackageName(rName string) string { return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` -data "aws_region" "current" {} - locals { region_account_map = { us-east-1 = "865070037744" @@ -949,3 +1063,137 @@ resource "aws_security_group" "test" { } `, rName)) } + +// https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/image_uri_config/huggingface-llm.json + +func testAccModelConfig_primaryContainerModelS3DataSourceAcceptEula(rName string) string { + return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` +data "aws_sagemaker_prebuilt_ecr_image" "accept_eula_test" { + repository_name = "huggingface-pytorch-tgi-inference" + image_tag = "2.3.0-tgi2.2.0-gpu-py310-cu121-ubuntu22.04-v2.0" +} + +resource "aws_sagemaker_model" "test" { + name = %[1]q + enable_network_isolation = true + execution_role_arn = aws_iam_role.test.arn + + primary_container { + image = data.aws_sagemaker_prebuilt_ecr_image.accept_eula_test.registry_path + mode = "SingleModel" + environment = { + ENDPOINT_SERVER_TIMEOUT = 3600 + HF_MODEL_ID = "/opt/ml/model" + MAX_INPUT_LENGTH = 4095 + MAX_TOTAL_TOKENS = 4096 + MODEL_CACHE_ROOT = "/opt/ml/model" + SAGEMAKER_ENV = 1 + SAGEMAKER_MODEL_SERVER_WORKERS = 1 + SAGEMAKER_PROGRAM = "inference.py" + SM_NUM_GPUS = 4 + } + + model_data_source { + s3_data_source { + compression_type = "None" + s3_data_type = "S3Prefix" + s3_uri = format("s3://jumpstart-private-cache-prod-%%s/meta-textgeneration/meta-textgeneration-llama-2-13b-f/artifacts/inference-prepack/v1.0.0/", data.aws_region.current.name) + model_access_config { + accept_eula = true + } + } + } + } +} +`, rName)) +} + +func testAccModelConfig_primaryContainerInferenceSpecificationName(rName string) string { + return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` +resource "aws_sagemaker_model" "test" { + name = %[1]q + execution_role_arn = aws_iam_role.test.arn + + primary_container { + image = data.aws_sagemaker_prebuilt_ecr_image.test.registry_path + inference_specification_name = "test" + } +} +`, rName)) +} + +// https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/image_uri_config/sagemaker-tritonserver.json + +func testAccModelConfig_primaryContainerMultiModelConfigModelCacheSetting(rName string) string { + return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` +data "aws_sagemaker_prebuilt_ecr_image" "model_cache_setting_test" { + repository_name = "sagemaker-tritonserver" + image_tag = "24.03-py3" +} + +resource "aws_s3_bucket" "test" { + bucket = %[1]q + force_destroy = true +} + +resource "aws_s3_object" "test" { + bucket = aws_s3_bucket.test.bucket + key = "resnet50-mme-gpu/model.tar.gz" + content = "some-data" +} + +resource "aws_sagemaker_model" "test" { + depends_on = [aws_s3_object.test] + + name = %[1]q + execution_role_arn = aws_iam_role.test.arn + + primary_container { + image = data.aws_sagemaker_prebuilt_ecr_image.model_cache_setting_test.registry_path + mode = "MultiModel" + model_data_url = "s3://${aws_s3_bucket.test.id}/resnet50-mme-gpu/" + multi_model_config { + model_cache_setting = "Disabled" + } + } +} +`, rName)) +} + +// https://github.com/aws/sagemaker-python-sdk/blob/master/src/sagemaker/image_uri_config/sagemaker-tritonserver.json + +func testAccModelConfig_containersMultiModelConfigModelCacheSetting(rName string) string { + return acctest.ConfigCompose(testAccModelConfig_base(rName), fmt.Sprintf(` +data "aws_sagemaker_prebuilt_ecr_image" "model_cache_setting_test" { + repository_name = "sagemaker-tritonserver" + image_tag = "24.03-py3" +} + +resource "aws_s3_bucket" "test" { + bucket = %[1]q + force_destroy = true +} + +resource "aws_s3_object" "test" { + bucket = aws_s3_bucket.test.bucket + key = "resnet50-mme-gpu/model.tar.gz" + content = "some-data" +} + +resource "aws_sagemaker_model" "test" { + depends_on = [aws_s3_object.test] + + name = %[1]q + execution_role_arn = aws_iam_role.test.arn + + container { + image = data.aws_sagemaker_prebuilt_ecr_image.model_cache_setting_test.registry_path + mode = "MultiModel" + model_data_url = "s3://${aws_s3_bucket.test.id}/resnet50-mme-gpu/" + multi_model_config { + model_cache_setting = "Disabled" + } + } +} +`, rName)) +} diff --git a/website/docs/r/sagemaker_model.html.markdown b/website/docs/r/sagemaker_model.html.markdown index e324ffcbca3..9571f74254b 100644 --- a/website/docs/r/sagemaker_model.html.markdown +++ b/website/docs/r/sagemaker_model.html.markdown @@ -68,6 +68,8 @@ The `primary_container` and `container` block both support: * `environment` - (Optional) Environment variables for the Docker container. A list of key value pairs. * `image_config` - (Optional) Specifies whether the model container is in Amazon ECR or a private Docker registry accessible from your Amazon Virtual Private Cloud (VPC). For more information see [Using a Private Docker Registry for Real-Time Inference Containers](https://docs.aws.amazon.com/sagemaker/latest/dg/your-algorithms-containers-inference-private.html). see [Image Config](#image-config). +* `inference_specification_name` - (Optional) The inference specification name in the model package version. +* `multi_model_config` - (Optional) Specifies additional configuration for multi-model endpoints. see [Multi Model Config](#multi-model-config). ### Image Config @@ -87,6 +89,15 @@ The `primary_container` and `container` block both support: * `compression_type` - (Required) How the model data is prepared. Allowed values are: `None` and `Gzip`. * `s3_data_type` - (Required) The type of model data to deploy. Allowed values are: `S3Object` and `S3Prefix`. * `s3_uri` - (Required) The S3 path of model data to deploy. +* `model_access_config` - (Optional) Specifies the access configuration file for the ML model. You can explicitly accept the model end-user license agreement (EULA) within the [`model_access_config` configuration block]. see [Model Access Config](#model-access-config). + +##### Model Access Config + +* `accept_eula` - (Required) Specifies agreement to the model end-user license agreement (EULA). The AcceptEula value must be explicitly defined as `true` in order to accept the EULA that this model requires. You are responsible for reviewing and complying with any applicable license terms and making sure they are acceptable for your use case before downloading or using a model. + +### Multi Model Config + +* `model_cache_setting` - (Optional) Whether to cache models for a multi-model endpoint. By default, multi-model endpoints cache models so that a model does not have to be loaded into memory each time it is invoked. Some use cases do not benefit from model caching. For example, if an endpoint hosts a large number of models that are each invoked infrequently, the endpoint might perform better if you disable model caching. To disable model caching, set the value of this parameter to `Disabled`. Allowed values are: `Enabled` and `Disabled`. ## Inference Execution Config