From b339a4e11ebc5e1601362387dc8a0f756e29c6c2 Mon Sep 17 00:00:00 2001 From: Ignacio Quintero Date: Fri, 13 Jul 2018 16:20:51 -0700 Subject: [PATCH 1/4] Deprecate enable_cloudwatch_metrics from Frameworks There is a warning message for now, but it will be removed later. --- CHANGELOG.rst | 4 ++++ src/sagemaker/estimator.py | 8 +++++--- src/sagemaker/mxnet/README.rst | 3 --- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 625381a942..b906419a16 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,10 @@ CHANGELOG ========= +1.7.1dev +======== +* deprecate enable_cloudwatch_metrics from Framework Estimators. + 1.7.0 ===== diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index 182c00796b..d9c91c9d4d 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -544,8 +544,8 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl The hyperparameters are made accessible as a dict[str, str] to the training code on SageMaker. For convenience, this accepts other types for keys and values, but ``str()`` will be called to convert them before training. - enable_cloudwatch_metrics (bool): Whether training and hosting containers will - generate CloudWatch metrics under the AWS/SageMakerContainer namespace (default: False). + enable_cloudwatch_metrics (bool): [DEPRECATED] Now there are cloudwatch metrics emitted by all SageMaker + training jobs. This will be ignored for now and removed in a further release. container_log_level (int): Log level to use within the container (default: logging.INFO). Valid values are defined in the Python logging module. code_location (str): Name of the S3 bucket where custom code is uploaded (default: None). @@ -558,7 +558,9 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl super(Framework, self).__init__(**kwargs) self.source_dir = source_dir self.entry_point = entry_point - self.enable_cloudwatch_metrics = enable_cloudwatch_metrics + if enable_cloudwatch_metrics: + logging.warn('enable_cloudwatch_metrics is now deprecated and will be removed in the future.') + self.enable_cloudwatch_metrics = False self.container_log_level = container_log_level self._hyperparameters = hyperparameters or {} self.code_location = code_location diff --git a/src/sagemaker/mxnet/README.rst b/src/sagemaker/mxnet/README.rst index 5eae5246bb..39aac958c7 100644 --- a/src/sagemaker/mxnet/README.rst +++ b/src/sagemaker/mxnet/README.rst @@ -543,9 +543,6 @@ The MXNetModel constructor takes the following arguments: directory with any other training source code dependencies aside from tne entry point file. Structure within this directory will be preserved when training on SageMaker. -- ``enable_cloudwatch_metrics (boolean):`` Optional. If true, training - and hosting containers will generate Cloudwatch metrics under the - AWS/SageMakerContainer namespace. - ``container_log_level (int):`` Log level to use within the container. Valid values are defined in the Python logging module. - ``code_location (str):`` Optional. Name of the S3 bucket where your From c0869130290520ed703396699116c17aefcb35ca Mon Sep 17 00:00:00 2001 From: Ignacio Quintero Date: Fri, 13 Jul 2018 17:03:05 -0700 Subject: [PATCH 2/4] Fix unit tests --- tests/unit/test_chainer.py | 13 +++---------- tests/unit/test_mxnet.py | 8 ++------ tests/unit/test_pytorch.py | 11 +++-------- tests/unit/test_tf_estimator.py | 10 +++------- 4 files changed, 11 insertions(+), 31 deletions(-) diff --git a/tests/unit/test_chainer.py b/tests/unit/test_chainer.py index 532c254334..d6055be7c1 100644 --- a/tests/unit/test_chainer.py +++ b/tests/unit/test_chainer.py @@ -66,7 +66,7 @@ def _get_full_gpu_image_uri(version): def _chainer_estimator(sagemaker_session, framework_version=defaults.CHAINER_VERSION, train_instance_type=None, - enable_cloudwatch_metrics=False, base_job_name=None, use_mpi=None, num_processes=None, + base_job_name=None, use_mpi=None, num_processes=None, process_slots_per_host=None, additional_mpi_options=None, **kwargs): return Chainer(entry_point=SCRIPT_PATH, framework_version=framework_version, @@ -74,7 +74,6 @@ def _chainer_estimator(sagemaker_session, framework_version=defaults.CHAINER_VER sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE, - enable_cloudwatch_metrics=enable_cloudwatch_metrics, base_job_name=base_job_name, use_mpi=use_mpi, num_processes=num_processes, @@ -152,7 +151,6 @@ def _create_train_job_with_additional_hyperparameters(version): }, 'hyperparameters': { 'sagemaker_program': json.dumps('dummy_script.py'), - 'sagemaker_enable_cloudwatch_metrics': 'false', 'sagemaker_container_log_level': str(logging.INFO), 'sagemaker_job_name': json.dumps(JOB_NAME), 'sagemaker_submit_directory': @@ -225,12 +223,10 @@ def test_attach_with_additional_hyperparameters(sagemaker_session, chainer_versi def test_create_model(sagemaker_session, chainer_version): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' chainer = Chainer(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, framework_version=chainer_version, container_log_level=container_log_level, - py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir, - enable_cloudwatch_metrics=enable_cloudwatch_metrics) + py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir) job_name = 'new_name' chainer.fit(inputs='s3://mybucket/train', job_name='new_name') @@ -244,19 +240,16 @@ def test_create_model(sagemaker_session, chainer_version): assert model.name == job_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir - assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics def test_create_model_with_custom_image(sagemaker_session): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' custom_image = 'ubuntu:latest' chainer = Chainer(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, image_name=custom_image, container_log_level=container_log_level, - py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir, - enable_cloudwatch_metrics=enable_cloudwatch_metrics) + py_version=PYTHON_VERSION, base_job_name='job', source_dir=source_dir) chainer.fit(inputs='s3://mybucket/train', job_name='new_name') model = chainer.create_model() diff --git a/tests/unit/test_mxnet.py b/tests/unit/test_mxnet.py index b07057f62b..1345eafa18 100644 --- a/tests/unit/test_mxnet.py +++ b/tests/unit/test_mxnet.py @@ -101,11 +101,10 @@ def _create_train_job(version): def test_create_model(sagemaker_session, mxnet_version): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, framework_version=mxnet_version, container_log_level=container_log_level, - base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + base_job_name='job', source_dir=source_dir) job_name = 'new_name' mx.fit(inputs='s3://mybucket/train', job_name='new_name') @@ -119,18 +118,16 @@ def test_create_model(sagemaker_session, mxnet_version): assert model.name == job_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir - assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics def test_create_model_with_custom_image(sagemaker_session): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' custom_image = 'mxnet:2.0' mx = MXNet(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, image_name=custom_image, container_log_level=container_log_level, - base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + base_job_name='job', source_dir=source_dir) job_name = 'new_name' mx.fit(inputs='s3://mybucket/train', job_name='new_name') @@ -143,7 +140,6 @@ def test_create_model_with_custom_image(sagemaker_session): assert model.name == job_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir - assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics @patch('time.strftime', return_value=TIMESTAMP) diff --git a/tests/unit/test_pytorch.py b/tests/unit/test_pytorch.py index 92015ce103..b043a1439d 100644 --- a/tests/unit/test_pytorch.py +++ b/tests/unit/test_pytorch.py @@ -64,7 +64,7 @@ def _get_full_gpu_image_uri(version, py_version=PYTHON_VERSION): def _pytorch_estimator(sagemaker_session, framework_version=defaults.PYTORCH_VERSION, train_instance_type=None, - enable_cloudwatch_metrics=False, base_job_name=None, **kwargs): + base_job_name=None, **kwargs): return PyTorch(entry_point=SCRIPT_PATH, framework_version=framework_version, py_version=PYTHON_VERSION, @@ -72,7 +72,6 @@ def _pytorch_estimator(sagemaker_session, framework_version=defaults.PYTORCH_VER sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE, - enable_cloudwatch_metrics=enable_cloudwatch_metrics, base_job_name=base_job_name, **kwargs) @@ -119,11 +118,10 @@ def _create_train_job(version): def test_create_model(sagemaker_session, pytorch_version): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' pytorch = PyTorch(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, framework_version=pytorch_version, container_log_level=container_log_level, - base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + base_job_name='job', source_dir=source_dir) job_name = 'new_name' pytorch.fit(inputs='s3://mybucket/train', job_name='new_name') @@ -137,18 +135,16 @@ def test_create_model(sagemaker_session, pytorch_version): assert model.name == job_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir - assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics def test_create_model_with_custom_image(sagemaker_session): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' image = 'pytorch:9000' pytorch = PyTorch(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, container_log_level=container_log_level, image_name=image, - base_job_name='job', source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + base_job_name='job', source_dir=source_dir) job_name = 'new_name' pytorch.fit(inputs='s3://mybucket/train', job_name='new_name') @@ -161,7 +157,6 @@ def test_create_model_with_custom_image(sagemaker_session): assert model.name == job_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir - assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics @patch('time.strftime', return_value=TIMESTAMP) diff --git a/tests/unit/test_tf_estimator.py b/tests/unit/test_tf_estimator.py index a2334dc736..238474004f 100644 --- a/tests/unit/test_tf_estimator.py +++ b/tests/unit/test_tf_estimator.py @@ -107,7 +107,7 @@ def _create_train_job(tf_version): def _build_tf(sagemaker_session, framework_version=defaults.TF_VERSION, train_instance_type=None, - checkpoint_path=None, enable_cloudwatch_metrics=False, base_job_name=None, + checkpoint_path=None, base_job_name=None, training_steps=None, evaluation_steps=None, **kwargs): return TensorFlow(entry_point=SCRIPT_PATH, training_steps=training_steps, @@ -118,7 +118,6 @@ def _build_tf(sagemaker_session, framework_version=defaults.TF_VERSION, train_in train_instance_count=INSTANCE_COUNT, train_instance_type=train_instance_type if train_instance_type else INSTANCE_TYPE, checkpoint_path=checkpoint_path, - enable_cloudwatch_metrics=enable_cloudwatch_metrics, base_job_name=base_job_name, **kwargs) @@ -183,12 +182,11 @@ def test_tf_nonexistent_requirements_path(sagemaker_session): def test_create_model(sagemaker_session, tf_version): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, training_steps=1000, evaluation_steps=10, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, framework_version=tf_version, container_log_level=container_log_level, base_job_name='job', - source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + source_dir=source_dir) job_name = 'doing something' tf.fit(inputs='s3://mybucket/train', job_name=job_name) @@ -202,19 +200,17 @@ def test_create_model(sagemaker_session, tf_version): assert model.name == job_name assert model.container_log_level == container_log_level assert model.source_dir == source_dir - assert model.enable_cloudwatch_metrics == enable_cloudwatch_metrics def test_create_model_with_custom_image(sagemaker_session): container_log_level = '"logging.INFO"' source_dir = 's3://mybucket/source' - enable_cloudwatch_metrics = 'true' custom_image = 'tensorflow:1.0' tf = TensorFlow(entry_point=SCRIPT_PATH, role=ROLE, sagemaker_session=sagemaker_session, training_steps=1000, evaluation_steps=10, train_instance_count=INSTANCE_COUNT, train_instance_type=INSTANCE_TYPE, image_name=custom_image, container_log_level=container_log_level, base_job_name='job', - source_dir=source_dir, enable_cloudwatch_metrics=enable_cloudwatch_metrics) + source_dir=source_dir) job_name = 'doing something' tf.fit(inputs='s3://mybucket/train', job_name=job_name) From 9967d522d1e891951a8ee5a0fde999f473293422 Mon Sep 17 00:00:00 2001 From: Ignacio Quintero Date: Thu, 19 Jul 2018 18:19:19 -0700 Subject: [PATCH 3/4] address PR comments --- src/sagemaker/estimator.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/sagemaker/estimator.py b/src/sagemaker/estimator.py index d9c91c9d4d..a604b764f6 100644 --- a/src/sagemaker/estimator.py +++ b/src/sagemaker/estimator.py @@ -15,6 +15,7 @@ import json import logging import os +import warnings from abc import ABCMeta from abc import abstractmethod from six import with_metaclass @@ -559,7 +560,8 @@ def __init__(self, entry_point, source_dir=None, hyperparameters=None, enable_cl self.source_dir = source_dir self.entry_point = entry_point if enable_cloudwatch_metrics: - logging.warn('enable_cloudwatch_metrics is now deprecated and will be removed in the future.') + warnings.warn('enable_cloudwatch_metrics is now deprecated and will be removed in the future.', + DeprecationWarning) self.enable_cloudwatch_metrics = False self.container_log_level = container_log_level self._hyperparameters = hyperparameters or {} From e5012e17be7b4ce6c0a886d198d92154c894e2a2 Mon Sep 17 00:00:00 2001 From: Ignacio Quintero Date: Thu, 16 Aug 2018 14:34:09 -0700 Subject: [PATCH 4/4] properly update changelog --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 23828ab244..0d666397b1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,7 @@ CHANGELOG ======== * bug-fix: Estimators: Fix serialization of single records +* bug-fix: deprecate enable_cloudwatch_metrics from Framework Estimators. 1.9.0 ===== @@ -29,7 +30,6 @@ CHANGELOG * bug-fix: get_execution_role no longer fails if user can't call get_role * bug-fix: Session: use existing model instead of failing during ``create_model()`` -* bug-fix: deprecate enable_cloudwatch_metrics from Framework Estimators. * enhancement: Estimator: allow for different role from the Estimator's when creating a Model or Transformer 1.7.0