Skip to content

Commit

Permalink
Merge branch 'main' into feat/sagemaker-model-cicd
Browse files Browse the repository at this point in the history
  • Loading branch information
kukushking authored Jul 8, 2024
2 parents 5ba382d + 27d11ae commit 12fb241
Show file tree
Hide file tree
Showing 38 changed files with 2,880 additions and 20 deletions.
22 changes: 11 additions & 11 deletions README.md

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions modules/sagemaker/sagemaker-model-monitoring/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ for each monitoring job:
* Model Bias: [ClarifyCheck step](https://docs.aws.amazon.com/sagemaker/latest/dg/build-and-manage-steps.html#step-type-clarify-check)
* Model Explainability: [ClarifyCheck step](https://docs.aws.amazon.com/sagemaker/latest/dg/build-and-manage-steps.html#step-type-clarify-check)

Note that updating parameters will require replacing resources. Deployments may be delayed until any
running monitoring jobs complete (and the resources can be destroyed).

### Architecture

![SageMaker Model Monitoring Module Architecture](docs/_static/sagemaker-model-monitoring-module-architecture.png "SageMaker Model Monitoring Module Architecture")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from aws_cdk import aws_sagemaker as sagemaker
from constructs import Construct

from sagemaker_model_monitoring.utils import generate_unique_id


class DataQualityConstruct(Construct):
"""
Expand Down Expand Up @@ -33,6 +35,26 @@ def __init__(
) -> None:
super().__init__(scope, construct_id, **kwargs)

# CloudFormation doesn't seem to properly wait for the job definition name to be properly populated if we allow
# it to autogenerate it. Generate one which will hopefully not conflict.
unique_id = generate_unique_id(
monitor_image_uri,
endpoint_name,
model_bucket_name,
data_quality_checkstep_output_prefix,
data_quality_output_prefix,
kms_key_id,
model_monitor_role_arn,
security_group_id,
subnet_ids,
instance_count,
instance_type,
instance_volume_size_in_gb,
max_runtime_in_seconds,
schedule_expression,
)
job_definition_name = f"{endpoint_name}-data-quality-{unique_id}"

data_quality_job_definition = sagemaker.CfnDataQualityJobDefinition(
self,
"DataQualityJobDefinition",
Expand Down Expand Up @@ -65,7 +87,7 @@ def __init__(
volume_kms_key_id=kms_key_id,
)
),
job_definition_name=f"{endpoint_name}-data-quality-def",
job_definition_name=job_definition_name,
role_arn=model_monitor_role_arn,
data_quality_baseline_config=sagemaker.CfnDataQualityJobDefinition.DataQualityBaselineConfigProperty(
constraints_resource=sagemaker.CfnDataQualityJobDefinition.ConstraintsResourceProperty(
Expand Down Expand Up @@ -97,6 +119,6 @@ def __init__(
schedule_expression=schedule_expression,
),
),
monitoring_schedule_name=f"{endpoint_name}-data-quality",
monitoring_schedule_name=f"{job_definition_name}-schedule",
)
data_quality_monitor_schedule.add_dependency(data_quality_job_definition)
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from aws_cdk import aws_sagemaker as sagemaker
from constructs import Construct

from sagemaker_model_monitoring.utils import generate_unique_id


class ModelBiasConstruct(Construct):
"""
Expand Down Expand Up @@ -39,6 +41,32 @@ def __init__(
) -> None:
super().__init__(scope, construct_id, **kwargs)

# CloudFormation doesn't seem to properly wait for the job definition name to be properly populated if we allow
# it to autogenerate it. Generate one which will hopefully not conflict.
unique_id = generate_unique_id(
clarify_image_uri,
endpoint_name,
model_bucket_name,
model_bias_checkstep_output_prefix,
model_bias_checkstep_analysis_config_prefix,
model_bias_output_prefix,
ground_truth_prefix,
kms_key_id,
model_monitor_role_arn,
security_group_id,
subnet_ids,
instance_count,
instance_type,
instance_volume_size_in_gb,
max_runtime_in_seconds,
features_attribute,
inference_attribute,
probability_attribute,
probability_threshold_attribute,
schedule_expression,
)
job_definition_name = f"{endpoint_name}-model-bias-{unique_id}"

# To match the defaults in SageMaker.
if model_bias_checkstep_analysis_config_prefix is None:
model_bias_checkstep_analysis_config_prefix = model_bias_checkstep_output_prefix
Expand Down Expand Up @@ -83,7 +111,7 @@ def __init__(
],
kms_key_id=kms_key_id,
),
job_definition_name=f"{endpoint_name}-model-bias-def",
job_definition_name=job_definition_name,
role_arn=model_monitor_role_arn,
model_bias_baseline_config=sagemaker.CfnModelBiasJobDefinition.ModelBiasBaselineConfigProperty(
constraints_resource=sagemaker.CfnModelBiasJobDefinition.ConstraintsResourceProperty(
Expand Down Expand Up @@ -112,6 +140,6 @@ def __init__(
schedule_expression=schedule_expression,
),
),
monitoring_schedule_name=f"{endpoint_name}-model-bias",
monitoring_schedule_name=f"{job_definition_name}-schedule",
)
model_bias_monitor_schedule.add_dependency(model_bias_job_definition)
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from aws_cdk import aws_sagemaker as sagemaker
from constructs import Construct

from sagemaker_model_monitoring.utils import generate_unique_id


class ModelExplainabilityConstruct(Construct):
"""
Expand Down Expand Up @@ -37,6 +39,30 @@ def __init__(
) -> None:
super().__init__(scope, construct_id, **kwargs)

# CloudFormation doesn't seem to properly wait for the job definition name to be properly populated if we allow
# it to autogenerate it. Generate one which will hopefully not conflict.
unique_id = generate_unique_id(
clarify_image_uri,
endpoint_name,
model_bucket_name,
model_explainability_checkstep_output_prefix,
model_explainability_checkstep_analysis_config_prefix,
model_explainability_output_prefix,
kms_key_id,
model_monitor_role_arn,
security_group_id,
subnet_ids,
instance_count,
instance_type,
instance_volume_size_in_gb,
max_runtime_in_seconds,
features_attribute,
inference_attribute,
probability_attribute,
schedule_expression,
)
job_definition_name = f"{endpoint_name}-model-explain-{unique_id}"

# To match the defaults in SageMaker.
if model_explainability_checkstep_analysis_config_prefix is None:
model_explainability_checkstep_analysis_config_prefix = model_explainability_checkstep_output_prefix
Expand Down Expand Up @@ -77,7 +103,7 @@ def __init__(
],
kms_key_id=kms_key_id,
),
job_definition_name=f"{endpoint_name}-model-explain-def",
job_definition_name=job_definition_name,
role_arn=model_monitor_role_arn,
model_explainability_baseline_config=sagemaker.CfnModelExplainabilityJobDefinition.ModelExplainabilityBaselineConfigProperty(
constraints_resource=sagemaker.CfnModelExplainabilityJobDefinition.ConstraintsResourceProperty(
Expand Down Expand Up @@ -106,6 +132,6 @@ def __init__(
schedule_expression=schedule_expression,
),
),
monitoring_schedule_name=f"{endpoint_name}-model-explainability",
monitoring_schedule_name=f"{job_definition_name}-schedule",
)
model_explainability_monitor_schedule.add_dependency(model_explainability_job_definition)
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from aws_cdk import aws_sagemaker as sagemaker
from constructs import Construct

from sagemaker_model_monitoring.utils import generate_unique_id


class ModelQualityConstruct(Construct):
"""
Expand Down Expand Up @@ -38,6 +40,31 @@ def __init__(
) -> None:
super().__init__(scope, construct_id, **kwargs)

# CloudFormation doesn't seem to properly wait for the job definition name to be properly populated if we allow
# it to autogenerate it. Generate one which will hopefully not conflict.
unique_id = generate_unique_id(
monitor_image_uri,
endpoint_name,
model_bucket_name,
model_quality_checkstep_output_prefix,
model_quality_output_prefix,
ground_truth_prefix,
kms_key_id,
model_monitor_role_arn,
security_group_id,
subnet_ids,
instance_count,
instance_type,
instance_volume_size_in_gb,
max_runtime_in_seconds,
problem_type,
inference_attribute,
probability_attribute,
probability_threshold_attribute,
schedule_expression,
)
job_definition_name = f"{endpoint_name}-model-quality-{unique_id}"

model_quality_job_definition = sagemaker.CfnModelQualityJobDefinition(
self,
"ModelQualityJobDefinition",
Expand Down Expand Up @@ -77,7 +104,7 @@ def __init__(
],
kms_key_id=kms_key_id,
),
job_definition_name=f"{endpoint_name}-model-quality-def",
job_definition_name=job_definition_name,
role_arn=model_monitor_role_arn,
model_quality_baseline_config=sagemaker.CfnModelQualityJobDefinition.ModelQualityBaselineConfigProperty(
constraints_resource=sagemaker.CfnModelQualityJobDefinition.ConstraintsResourceProperty(
Expand Down Expand Up @@ -106,6 +133,6 @@ def __init__(
schedule_expression=schedule_expression,
),
),
monitoring_schedule_name=f"{endpoint_name}-model-quality",
monitoring_schedule_name=f"{job_definition_name}-schedule",
)
model_quality_monitor_schedule.add_dependency(model_quality_job_definition)
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from hashlib import md5
from typing import Any


def generate_unique_id(*args: Any) -> str:
"""
Generate a shortened hex digest from a list of arguments.
"""
return md5(b"\x00".join(str(arg).encode() for arg in args)).hexdigest()[:10]
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@ The template contains an example SageMaker Pipeline to train a model on Abalone

![Abalone with XGBoost](docs/_static/abalone-xgboost-template.png "Abalone with XGBoost Template Architecture")

#### LLM fine-tuning and evaluation

The template is based on LLM fine-tuning template from [AWS Enterprise MLOps Framework](https://github.com/aws-samples/aws-enterprise-mlops-framework/tree/main/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/finetune_deploy_llm_product).

![LLM fine-tuning and evaluation template](docs/_static/llm-evaluate.png "LLM Evaluate Template Architecture")
![SM pipeline graph](docs/_static/llm-evaluation-pipeline-graph.png "SM Pipeline graph")

The template is based on basic multi-account template from [AWS Enterprise MLOps Framework](https://github.com/aws-samples/aws-enterprise-mlops-framework/blob/main/mlops-multi-account-cdk/mlops-sm-project-template/README.md#sagemaker-project-stack).

#### Batch Inference Template
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<mxfile modified="2024-04-15T13:30:57.955Z" host="design-inspector.a2z.com" agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36" etag="LSdO9LAzmekkD11aoc1V" version="10.1.8" type="device"><diagram id="8qc0D0XUhG9jV8B8jlGXA" name="Page-1">7V1rd5s4E/41+agcQOL20Y7bbk7bPXmb7WX3yx4BwqbBxgtyYvfXvyMDNtgYOw22wVaak1oD6K6ZZx6NxQ2+G88/xHQ6+hx5LLzRFG9+gwc3mmYbBP4KwSIVGHomGMaBl4rUteAx+MUyoZJJZ4HHktKNPIpCHkzLQjeaTJjLSzIax9FL+TY/CsulTumQbQkeXRpuS78HHh+lUksz1/I/WDAc5SWrhp1eGdP85qwlyYh60UtBhN/d4Ls4inj6aTy/Y6Hou7xfsucmbM7FlXvvGw1nWbWIscrhC0uiWeyyAUvcOJjyKIaH4kyY3v3vDe7ltWBxQMPgF+VBNEHPLE7g//Su5+wWmnVZXJFxVuYjG9MJD9wB5fQumnAaTFh8SO7p0zwOJsNPAWcxDdOx42zCS62extGUxTybNSPOxXj3brT38Au3R2E0XNwmzJ3FAV/c0jH9FU1uPfYMl/1oNvGWNYCEF9BhTMfoOUhmq5qBnGqartu6gVzs+Yi4moscYtmQVEzfsKjj2ukQv0/rfP/lfqtbX1UrmMXBcIKCSTKFeSr68r0bjafRBFqeQMIi1FIc3Ue6QTREqGoiW7d0xBzfcwzdJw5zm+2aZJFwNkZjsWRhPECiEB3bum0iolkYEd/QkaXYCqLMMH3bw8wmdrFT4EP1TMivVszN/FI23aun/mrVxeV58ZoJr3Zkwqtywl/dhI+cn8JYaUpIHbCXy3Zl0xUmhhuEfy2mWZ8PGeQfuGjVd+l9+Ur5qH0f9T/+p3z7rkW//oEBpuM//0DqauqtllbCF7lNi0WnMJGBcoP7LyOYl49TuhzkFzDiIBvxsaiVCh/9IAzvoIuhIYMJVABEHk1Gy8fF9bQ6MPM5m+9cvJko9vyvU5idaeEFQ50kjIsmfysuIFxoxAcWjRmPF3BzXoqerd5sWphZ8mVtpU0jk40KFhrrG6t+uMp6Pc4Hqpiazu+A4rH+c5WB8uPr6IP985vVt36GH370TqOMFN11day6yNOIiojtYlh3HmgkD5um6epEM3ypjPYoo6N3CvF11bZVH/nYNaBTCHSKoyhIUbFuKNSxNZ/s6pRVHV5eXm5f8G0UiyZCdrZoqWiEpiHQByhZwKSeowkUiLnQek22sNTjQ+GgoITHM5fPYibq4RDGqGshhTAdEQYDbhm2mIa64Ts2MR3TOWoDa9dnrXZpG1xoyjKK6qfGcaehfASf6zN9WqrBh2Aqlg94iNXW83nqHmIv8dqdqjKY0ygQKwI+6334Bf16p9zocOVOpG41fUOwmTbLAnU7JfIoCzbTZlmgbmavbpSvblawINhKlbJXNspXChWEX9yPZlz0+d3K5RYwQpj1AObuBlYoIIkqoOHDPMmcflXL01nHi1zBaV4O5Xi+XL239CXRlNshIJjpssx7d2n1Ki7/K4YeMuBx9MTySsES1IhlqaQa2AgUE4Dv3wtBL4KYR6IUmqVC5nORI7RALNplaoCVrNaFInq9vtm3ikBJOSNQIqpeAkoa3kZKxNC3kZJqHBsppctOQiUJlSRUklDpDVCpBhIcWh1NURRRHdC075d1cUegT6AiKfJ4G2jLFd0Vo7Z99MaIApoIDwJrtViNeUOWI4go5qNoGE1o+G4t7ZfpjwJA+ck4X2RohM54JKDOKodPkQADBcpDFLTPjhdXI9Wgj9Wn/6GHiERfEyX5db9o0tzHLIRp8Mw2ctg23dmjDwLUrmGCppT5lC3rz2k8ZDx7qjSFCtVoABN0BBLU1r5t61yCggpQIBHfYabf0FXdNDQFmcSBzjUVBlWxPeTYjq8bjuE65Limvx/SydOfMND3g0pFUqNZmxqr7V7xVN9VDIMhbBlixjMFWVghyHCZTwDtKTQjmltjaqmbqca1gd3ZczvNa+6Qz8Zhb5nf2mv+JGrwECUBT5WpE3EejeGGZdX61H0aLk1vwVP2lz87Pe+Cba7kHM7lUpsbprLCo8YVWw8N+tM1U771lvNqVeu2yaKWqyue5yFKbQsRy2WIqtRFOrV1H1xKwsyGEUULfOlOe5onsDVX7dxp+rG8OzYP+I/C57/FLbd6lhrMsyeWiUUh8QBNADUt+i2VTUBl/ygmCjmJ5DqrZWpRTG1mdlyXc8cOUha7ljp0tYNxFv+0EWpZ0ztiDeur3zadIh1J6UheriNZry/b40v+XjuajjSsGGDVdBhYKeT6sG4JVVRETd1Dlso8TImp6Vg7G0IxQrF76wXQDmPIlzY1FTm54A569C4ajwPx7Bc2Fa5sJGxSeh+U4mw+u1x9hRwrYZAL+bpZvvsRUDrdXheLUBmPUBWTUBmXsB2bULptGS1QUcKmsEpmbgvV7dvyAINtYZWsKppi82m14ml14+ndsQwb+/nw770Yj60YB7hGBiZcLFwbBDFklMKAicByG6EG8Az8kL5dFZyw4kNKkQN7CZb6uAUXaiXWVCmqc0/cBV2aZ0j4wVzUY0cgRq560jCMPiQrAzIKC+B8pM3WBoe2zdpYFayNdewoiNy+dBSqNm0eJVRtM3kjAyFaTU/thDBnCIXokOfTTaqMHIsqOwsrZV0DK0U6beq1napbmvrWmfpL0c2SlarVl5KVunBWKo9nhSd74Ob71BUqyaBj4eNPnGRapqP2UlT7S3wEW6n0Z+4T2OG3cl8JPojzsiTn1R3Oy1B6GJuv47w001RV44o4rwS3iOvSiN4SrsvqNABu2uh2BgAzRoiDbYpc4jiIKABrHNtQEcAAi7mqjVk+ddoDgK+bCaqCDSXDLhmhi2OE8NGCp87BCOX7MwcwQmZnGSHc7TglLOOUJCMkGaFWMEJNH513Nkbo6PFWHWeExCZffxaEXiORSU6a0wH7TWtLIUma1pM0MjDpwMCkdP6f8XyWjYPsWhOXtDq4tpvQVJ4k2yFoKuOSLpuNKkAWyTtdHO+k1R+f95Yv7XX9y3arQ+v3k1hKZ0ksrSvnuNVXXyKFDiCFS1H0ksQ6UzjQiUiso5NxF0BirfcoG+CxpqvMDgAs9YfcSCpLUlldpLJWS+B8bBb0e0vZrK4cj3QauygxqmSzJJv1BjbroQQ3JKF1UYQWrj+DoWuBVAcf+JSfU91BDgp3+1v0WH6Lvjv2/VJ0s+SgzhSAdBoOqum3DlwcB1X12oi3f+MNMh2nmR5ARMnDnrpERPX6mt17HRGlqsYABu96iKj1/D8fC6VvntDdGhaq2yi1acsoUapkoSQL1eBLryQb1Wk2yvqbq9+i79N/jI/qF0Obe38lX0NEjvbGK3kmeu4qHRymlTk07abI6uZRB8BHffXbBj5+0zm/kECXfD20h2S4eFZLot0L5mRbRpE9xAxBF7ksgYmhiBqdliDDMlJLEmSSIGuaICNk44AosyUEWQ6L2oBRJYHSeQJFQqULhkr1519VAJczcGOncf6u5JWBO3iBo52D3hQ9dhZGS9tmtHb0XxfOU68b+naghd9htFp6nnpbmaCmGa369SAZLcloSZh2cYzWXzGUDo99FiNzYjJLk2SWJLMkmdV0tJfeVjIr93JaAE8lmSXJLImS2ouSatuxgVnaw2M17PJdDo/1OlwoX38jcaHEhY1vcqp6GRfibVyIK3AhPjou7PZbcPC1vgWni/hI4v7O4/5LgbjnR2W/s7t4tHfqXGHwff2GywFblVqDKOHEW5VdedFPffXbZvPbusV3oq3Ko2+5yq1KScJdDwl3UoSyf6vy3TMNZ5SzY+9W1ilJyUpJVkqyUg2yUoZx9t3KelDUBoQqWYvOsxYSKF0wUKptxzZsOcOG5Wkcv05uWO4Hnl/YMEj4Ul+fA3hqEnhK4CmBZ8PA0zS1lgLPnK6XwFMCzxY0UALPjgLPbdjSHuDZMFPfcuApujCKeOHWD0JRpKOC3/0f</diagram></mxfile>
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Adding a comment here - empty files create issues with zipping https://github.com/aws/aws-cdk/issues/19012
Loading

0 comments on commit 12fb241

Please sign in to comment.