Merge branch 'master' into patch-1

aws · Jun 21, 2022 · ee87eb0 · ee87eb0
2 parents 35d0963 + 0513c7a
commit ee87eb0
Show file tree

Hide file tree

Showing 24 changed files with 738 additions and 180 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,35 @@
 # Changelog
 
+## v2.0.10 (2022-04-07)
+
+### Bug Fixes and Other Changes
+
+ * pass model directory as input to torchserve
+
+## v2.0.9 (2022-04-04)
+
+### Bug Fixes and Other Changes
+
+ * Update CI to use PyTorch 1.10
+
+## v2.0.8 (2022-01-13)
+
+### Bug Fixes and Other Changes
+
+ * log4j migration from 1 to 2. Replace properties file with xml.
+
+## v2.0.7 (2021-10-26)
+
+### Bug Fixes and Other Changes
+
+ * Enable default model fn for cpu and gpu
+
+## v2.0.6 (2021-10-04)
+
+### Bug Fixes and Other Changes
+
+ * Env variable support for batch inference
+
 ## v2.0.5 (2021-03-17)
 
 ### Bug Fixes and Other Changes

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.0.6.dev0
+2.0.11.dev0
diff --git a/buildspec.yml b/buildspec.yml
@@ -2,10 +2,10 @@ version: 0.2
 
 env:
   variables:
-    FRAMEWORK_VERSION: '1.6.0'
+    FRAMEWORK_VERSION: '1.10.2'
     EIA_FRAMEWORK_VERSION: '1.3.1'
     CPU_INSTANCE_TYPE: 'ml.c4.xlarge'
-    GPU_INSTANCE_TYPE: 'ml.p2.8xlarge'
+    GPU_INSTANCE_TYPE: 'ml.p3.8xlarge'
     EIA_ACCELERATOR_TYPE: 'ml.eia2.medium'
     ECR_REPO: 'sagemaker-test'
     GITHUB_REPO: 'sagemaker-pytorch-serving-container'
@@ -39,12 +39,12 @@ phases:
       - GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID"
       - DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID"
       - DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID"
-      - DLC_EIA_TAG="$FRAMEWORK_VERSION-dlc-eia-$BUILD_ID"
+      - DLC_EIA_TAG="$EIA_FRAMEWORK_VERSION-dlc-eia-$BUILD_ID"
 
       # run local CPU integration tests (build and push the image to ECR repo)
-      - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"
+      - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG"
       - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
-      - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
+      - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG"
       - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
 
       # launch remote GPU instance
@@ -65,10 +65,10 @@ phases:
       # run GPU local integration tests
       - printf "$SETUP_CMDS" > $SETUP_FILE
       # no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests
-      - generic_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG"
+      - generic_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG"
       - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\""
       - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
-      - dlc_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
+      - dlc_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local -vv -rA -s --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG"
       - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup"
       - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*"
 

diff --git a/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py b/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py
@@ -13,7 +13,6 @@
 from __future__ import absolute_import
 
 import os
-import textwrap
 
 import torch
 from sagemaker_inference import (
@@ -29,9 +28,21 @@
 DEFAULT_MODEL_FILENAME = "model.pt"
 
 
+class ModelLoadError(Exception):
+    pass
+
+
 class DefaultPytorchInferenceHandler(default_inference_handler.DefaultInferenceHandler):
     VALID_CONTENT_TYPES = (content_types.JSON, content_types.NPY)
 
+    @staticmethod
+    def _is_model_file(filename):
+        is_model_file = False
+        if os.path.isfile(filename):
+            _, ext = os.path.splitext(filename)
+            is_model_file = ext in [".pt", ".pth"]
+        return is_model_file
+
     def default_model_fn(self, model_dir):
         """Loads a model. For PyTorch, a default function to load a model only if Elastic Inference is used.
         In other cases, users should provide customized model_fn() in script.
@@ -47,12 +58,30 @@ def default_model_fn(self, model_dir):
                 raise FileNotFoundError("Failed to load model with default model_fn: missing file {}."
                                         .format(DEFAULT_MODEL_FILENAME))
             # Client-framework is CPU only. But model will run in Elastic Inference server with CUDA.
-            return torch.jit.load(model_path, map_location=torch.device('cpu'))
+            try:
+                return torch.jit.load(model_path, map_location=torch.device('cpu'))
+            except RuntimeError as e:
+                raise ModelLoadError(
+                    "Failed to load {}. Please ensure model is saved using torchscript.".format(model_path)
+                ) from e
         else:
-            raise NotImplementedError(textwrap.dedent("""
-            Please provide a model_fn implementation.
-            See documentation for model_fn at https://github.com/aws/sagemaker-python-sdk
-            """))
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            model_path = os.path.join(model_dir, DEFAULT_MODEL_FILENAME)
+            if not os.path.exists(model_path):
+                model_files = [file for file in os.listdir(model_dir) if self._is_model_file(file)]
+                if len(model_files) != 1:
+                    raise ValueError(
+                        "Exactly one .pth or .pt file is required for PyTorch models: {}".format(model_files)
+                    )
+                model_path = os.path.join(model_dir, model_files[0])
+            try:
+                model = torch.jit.load(model_path, map_location=device)
+            except RuntimeError as e:
+                raise ModelLoadError(
+                    "Failed to load {}. Please ensure model is saved using torchscript.".format(model_path)
+                ) from e
+            model = model.to(device)
+            return model
 
     def default_input_fn(self, input_data, content_type):
         """A default input_fn that can handle JSON, CSV and NPZ formats.

diff --git a/src/sagemaker_pytorch_serving_container/etc/log4j.properties b/src/sagemaker_pytorch_serving_container/etc/log4j.properties
diff --git a/src/sagemaker_pytorch_serving_container/etc/log4j2.xml b/src/sagemaker_pytorch_serving_container/etc/log4j2.xml
@@ -0,0 +1,87 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Configuration>
+    <Appenders>
+        <Console name="STDOUT" target="SYSTEM_OUT">
+            <PatternLayout pattern="%d{ISO8601} [%-5p] %t %c - %m%n"/>
+        </Console>
+        <RollingFile
+                name="access_log"
+                fileName="${env:LOG_LOCATION:-logs}/access_log.log"
+                filePattern="${env:LOG_LOCATION:-logs}/access_log.%d{dd-MMM}.log.gz">
+            <PatternLayout pattern="%d{ISO8601} - %m%n"/>
+            <Policies>
+                <SizeBasedTriggeringPolicy size="10 MB"/>
+                <TimeBasedTriggeringPolicy/>
+            </Policies>
+            <DefaultRolloverStrategy max="5"/>
+        </RollingFile>
+        <RollingFile
+                name="model_log"
+                fileName="${env:LOG_LOCATION:-logs}/model_log.log"
+                filePattern="${env:LOG_LOCATION:-logs}/model_log.%d{dd-MMM}.log.gz">
+            <PatternLayout pattern="%d{ISO8601} [%-5p] %c - %m%n"/>
+            <Policies>
+                <SizeBasedTriggeringPolicy size="10 MB"/>
+                <TimeBasedTriggeringPolicy/>
+            </Policies>
+            <DefaultRolloverStrategy max="5"/>
+        </RollingFile>
+        <RollingFile
+                name="model_metrics"
+                fileName="${env:METRICS_LOCATION:-logs}/model_metrics.log"
+                filePattern="${env:METRICS_LOCATION:-logs}/model_metrics.%d{dd-MMM}.log.gz">
+            <PatternLayout pattern="%d{ISO8601} - %m%n"/>
+            <Policies>
+                <SizeBasedTriggeringPolicy size="10 MB"/>
+                <TimeBasedTriggeringPolicy/>
+            </Policies>
+            <DefaultRolloverStrategy max="5"/>
+        </RollingFile>
+        <RollingFile
+                name="ts_log"
+                fileName="${env:LOG_LOCATION:-logs}/ts_log.log"
+                filePattern="${env:LOG_LOCATION:-logs}/ts_log.%d{dd-MMM}.log.gz">
+            <PatternLayout pattern="%d{ISO8601} [%-5p] %t %c - %m%n"/>
+            <Policies>
+                <SizeBasedTriggeringPolicy size="10 MB"/>
+                <TimeBasedTriggeringPolicy/>
+            </Policies>
+            <DefaultRolloverStrategy max="5"/>
+        </RollingFile>
+        <RollingFile
+                name="ts_metrics"
+                fileName="${env:METRICS_LOCATION:-logs}/ts_metrics.log"
+                filePattern="${env:METRICS_LOCATION:-logs}/ts_metrics.%d{dd-MMM}.log.gz">
+            <PatternLayout pattern="%d{ISO8601} - %m%n"/>
+            <Policies>
+                <SizeBasedTriggeringPolicy size="10 MB"/>
+                <TimeBasedTriggeringPolicy/>
+            </Policies>
+            <DefaultRolloverStrategy max="5"/>
+        </RollingFile>
+    </Appenders>
+    <Loggers>
+        <Logger name="ACCESS_LOG" level="info">
+            <AppenderRef ref="access_log"/>
+        </Logger>
+        <Logger name="io.netty" level="error"/>
+        <Logger name="MODEL_LOG" level="info">
+            <AppenderRef ref="model_log"/>
+        </Logger>
+        <Logger name="MODEL_METRICS" level="info">
+            <AppenderRef ref="model_metrics"/>
+        </Logger>
+        <Logger name="org.apache" level="off"/>
+        <Logger name="org.pytorch.serve" level="info">
+            <AppenderRef ref="ts_log"/>
+        </Logger>
+        <Logger name="TS_METRICS" level="info">
+            <AppenderRef ref="ts_metrics"/>
+        </Logger>
+        <Root level="info">
+            <AppenderRef ref="STDOUT"/>
+            <AppenderRef ref="ts_log"/>
+        </Root>
+    </Loggers>
+</Configuration>
+