Skip to content
Merged
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ CHANGELOG
========

* bug-fix: Estimators: Fix serialization of single records
* bug-fix: deprecate enable_cloudwatch_metrics from Framework Estimators.

1.9.0
=====
Expand Down
10 changes: 7 additions & 3 deletions src/sagemaker/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import json
import logging
import os
import warnings
from abc import ABCMeta
from abc import abstractmethod
from six import with_metaclass
Expand Down Expand Up @@ -550,8 +551,8 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker.
For convenience, this accepts other types for keys and values, but ``str()`` will be called
to convert them before training.
enable_cloudwatch_metrics (bool): Whether training and hosting containers will
generate CloudWatch metrics under the AWS/SageMakerContainer namespace (default: False).
enable_cloudwatch_metrics (bool): [DEPRECATED] Now there are cloudwatch metrics emitted by all SageMaker
training jobs. This will be ignored for now and removed in a further release.
container_log_level (int): Log level to use within the container (default: logging.INFO).
Valid values are defined in the Python logging module.
code_location (str): Name of the S3 bucket where custom code is uploaded (default: None).
Expand All @@ -564,7 +565,10 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl
super(Framework, self).__init__(**kwargs)
self.source_dir = source_dir
self.entry_point = entry_point
self.enable_cloudwatch_metrics = enable_cloudwatch_metrics
if enable_cloudwatch_metrics:
warnings.warn('enable_cloudwatch_metrics is now deprecated and will be removed in the future.',
DeprecationWarning)
self.enable_cloudwatch_metrics = False
self.container_log_level = container_log_level
self._hyperparameters = hyperparameters or {}
self.code_location = code_location
Expand Down
3 changes: 0 additions & 3 deletions src/sagemaker/mxnet/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -543,9 +543,6 @@ The MXNetModel constructor takes the following arguments:
directory with any other training source code dependencies including
tne entry point file. Structure within this directory will be
preserved when training on SageMaker.
- ``enable_cloudwatch_metrics (boolean):`` Optional. If true, training
and hosting containers will generate Cloudwatch metrics under the
AWS/SageMakerContainer namespace.
- ``container_log_level (int):`` Log level to use within the container.
Valid values are defined in the Python logging module.
- ``code_location (str):`` Optional. Name of the S3 bucket where your
Expand Down
13 changes: 3 additions & 10 deletions tests/unit/test_chainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,15 +66,14 @@ def _get_full_gpu_image_uri(version):


def _chainer_estimator(sagemaker_session, framework_version=defaults.CHAINER_VERSION, train_instance_type=None,
enable_cloudwatch_metrics=False, base_job_name=None, use_mpi=None, num_processes=None,
base_job_name=None, use_mpi=None, num_processes=None,
process_slots_per_host=None, additional_mpi_options=None, **kwargs):
return Chainer(entry_point=SCRIPT_PATH,
framework_version=framework_version,
role=ROLE,
sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT,
train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE,
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
base_job_name=base_job_name,
use_mpi=use_mpi,
num_processes=num_processes,
Expand Down Expand Up @@ -152,7 +151,6 @@ def _create_train_job_with_additional_hyperparameters(version):
},
'hyperparameters': {
'sagemaker_program': json.dumps('dummy_script.py'),
'sagemaker_enable_cloudwatch_metrics': 'false',
'sagemaker_container_log_level': str(logging.INFO),
'sagemaker_job_name': json.dumps(JOB_NAME),
'sagemaker_submit_directory':
Expand Down Expand Up @@ -225,12 +223,10 @@ def test_attach_with_additional_hyperparameters(sagemaker_session, chainer_versi
def test_create_model(sagemaker_session, chainer_version):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
chainer = Chainer(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
framework_version=chainer_version, container_log_level=container_log_level,
py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir,
enable_cloudwatch_metrics=enable_cloudwatch_metrics)
py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir)

job_name = 'new_name'
chainer.fit(inputs='s3://mybucket/train', job_name=job_name)
Expand All @@ -244,7 +240,6 @@ def test_create_model(sagemaker_session, chainer_version):
assert model.name == job_name
assert model.container_log_level == container_log_level
assert model.source_dir == source_dir
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics


def test_create_model_with_optional_params(sagemaker_session):
Expand All @@ -269,13 +264,11 @@ def test_create_model_with_optional_params(sagemaker_session):
def test_create_model_with_custom_image(sagemaker_session):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
custom_image = 'ubuntu:latest'
chainer = Chainer(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
image_name=custom_image, container_log_level=container_log_level,
py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir,
enable_cloudwatch_metrics=enable_cloudwatch_metrics)
py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir)

chainer.fit(inputs='s3://mybucket/train', job_name='new_name')
model = chainer.create_model()
Expand Down
8 changes: 2 additions & 6 deletions tests/unit/test_mxnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,10 @@ def _create_train_job(version):
def test_create_model(sagemaker_session, mxnet_version):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
framework_version=mxnet_version, container_log_level=container_log_level,
base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
base_job_name='job', source_dir=source_dir)

job_name = 'new_name'
mx.fit(inputs='s3://mybucket/train', job_name=job_name)
Expand All @@ -119,7 +118,6 @@ def test_create_model(sagemaker_session, mxnet_version):
assert model.name == job_name
assert model.container_log_level == container_log_level
assert model.source_dir == source_dir
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics


def test_create_model_with_optional_params(sagemaker_session):
Expand All @@ -144,12 +142,11 @@ def test_create_model_with_optional_params(sagemaker_session):
def test_create_model_with_custom_image(sagemaker_session):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
custom_image = 'mxnet:2.0'
mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
image_name=custom_image, container_log_level=container_log_level,
base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
base_job_name='job', source_dir=source_dir)

job_name = 'new_name'
mx.fit(inputs='s3://mybucket/train', job_name='new_name')
Expand All @@ -162,7 +159,6 @@ def test_create_model_with_custom_image(sagemaker_session):
assert model.name == job_name
assert model.container_log_level == container_log_level
assert model.source_dir == source_dir
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics


@patch('time.strftime', return_value=TIMESTAMP)
Expand Down
11 changes: 3 additions & 8 deletions tests/unit/test_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,14 @@ def _get_full_gpu_image_uri(version, py_version=PYTHON_VERSION):


def _pytorch_estimator(sagemaker_session, framework_version=defaults.PYTORCH_VERSION, train_instance_type=None,
enable_cloudwatch_metrics=False, base_job_name=None, **kwargs):
base_job_name=None, **kwargs):
return PyTorch(entry_point=SCRIPT_PATH,
framework_version=framework_version,
py_version=PYTHON_VERSION,
role=ROLE,
sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT,
train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE,
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
base_job_name=base_job_name,
**kwargs)

Expand Down Expand Up @@ -119,11 +118,10 @@ def _create_train_job(version):
def test_create_model(sagemaker_session, pytorch_version):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
pytorch = PyTorch(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
framework_version=pytorch_version, container_log_level=container_log_level,
base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
base_job_name='job', source_dir=source_dir)

job_name = 'new_name'
pytorch.fit(inputs='s3://mybucket/train', job_name='new_name')
Expand All @@ -137,7 +135,6 @@ def test_create_model(sagemaker_session, pytorch_version):
assert model.name == job_name
assert model.container_log_level == container_log_level
assert model.source_dir == source_dir
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics


def test_create_model_with_optional_params(sagemaker_session):
Expand All @@ -162,12 +159,11 @@ def test_create_model_with_optional_params(sagemaker_session):
def test_create_model_with_custom_image(sagemaker_session):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
image = 'pytorch:9000'
pytorch = PyTorch(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE,
container_log_level=container_log_level, image_name=image,
base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
base_job_name='job', source_dir=source_dir)

job_name = 'new_name'
pytorch.fit(inputs='s3://mybucket/train', job_name='new_name')
Expand All @@ -180,7 +176,6 @@ def test_create_model_with_custom_image(sagemaker_session):
assert model.name == job_name
assert model.container_log_level == container_log_level
assert model.source_dir == source_dir
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics


@patch('time.strftime', return_value=TIMESTAMP)
Expand Down
10 changes: 3 additions & 7 deletions tests/unit/test_tf_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def _create_train_job(tf_version):


def _build_tf(sagemaker_session, framework_version=defaults.TF_VERSION, train_instance_type=None,
checkpoint_path=None, enable_cloudwatch_metrics=False, base_job_name=None,
checkpoint_path=None, base_job_name=None,
training_steps=None, evaluation_steps=None, **kwargs):
return TensorFlow(entry_point=SCRIPT_PATH,
training_steps=training_steps,
Expand All @@ -118,7 +118,6 @@ def _build_tf(sagemaker_session, framework_version=defaults.TF_VERSION, train_in
train_instance_count=INSTANCE_COUNT,
train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE,
checkpoint_path=checkpoint_path,
enable_cloudwatch_metrics=enable_cloudwatch_metrics,
base_job_name=base_job_name,
**kwargs)

Expand Down Expand Up @@ -183,12 +182,11 @@ def test_tf_nonexistent_requirements_path(sagemaker_session):
def test_create_model(sagemaker_session, tf_version):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
training_steps=1000, evaluation_steps=10, train_instance_count=INSTANCE_COUNT,
train_instance_type=INSTANCE_TYPE, framework_version=tf_version,
container_log_level=container_log_level, base_job_name='job',
source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
source_dir=source_dir)

job_name = 'doing something'
tf.fit(inputs='s3://mybucket/train', job_name=job_name)
Expand All @@ -202,7 +200,6 @@ def test_create_model(sagemaker_session, tf_version):
assert model.name == job_name
assert model.container_log_level == container_log_level
assert model.source_dir == source_dir
assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics


def test_create_model_with_optional_params(sagemaker_session):
Expand All @@ -228,13 +225,12 @@ def test_create_model_with_optional_params(sagemaker_session):
def test_create_model_with_custom_image(sagemaker_session):
container_log_level = '"logging.INFO"'
source_dir = 's3://mybucket/source'
enable_cloudwatch_metrics = 'true'
custom_image = 'tensorflow:1.0'
tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session,
training_steps=1000, evaluation_steps=10, train_instance_count=INSTANCE_COUNT,
train_instance_type=INSTANCE_TYPE, image_name=custom_image,
container_log_level=container_log_level, base_job_name='job',
source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics)
source_dir=source_dir)

job_name = 'doing something'
tf.fit(inputs='s3://mybucket/train', job_name=job_name)
Expand Down