From 85a7a76530a89d2b4b3b37639138cb5cf85d3836 Mon Sep 17 00:00:00 2001 From: Nihal Harish Date: Sun, 2 Aug 2020 15:36:56 -0700 Subject: [PATCH 1/8] rename enum (#305) --- smdebug/tensorflow/keras.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smdebug/tensorflow/keras.py b/smdebug/tensorflow/keras.py index d34b645be..ccea3ba47 100644 --- a/smdebug/tensorflow/keras.py +++ b/smdebug/tensorflow/keras.py @@ -454,7 +454,7 @@ def save_smdebug_logs(self, logs): elif key == SMDEBUG_LAYER_OUTPUTS_KEY: layer_outputs = logs[key] self.save_layer_outputs(layer_outputs) - self.save_layer_inputs(logs[ModelInput.X], layer_outputs) + self.save_layer_inputs(logs[ModelInput.INPUTS], layer_outputs) # Save Model Inputs elif key in ModelInputs: export_name = get_model_input_export_name() From e4d08436ee3fc8817af0920df3053d4deb0cb17c Mon Sep 17 00:00:00 2001 From: Nihal Harish Date: Mon, 3 Aug 2020 12:00:07 -0700 Subject: [PATCH 2/8] version bump to 0.9.1 (#304) --- smdebug/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smdebug/_version.py b/smdebug/_version.py index 3e2f46a3a..d69d16e98 100644 --- a/smdebug/_version.py +++ b/smdebug/_version.py @@ -1 +1 @@ -__version__ = "0.9.0" +__version__ = "0.9.1" From 2929052bedb076d023ace70733d2a516ba280a84 Mon Sep 17 00:00:00 2001 From: Nihal Harish Date: Tue, 4 Aug 2020 10:29:12 -0700 Subject: [PATCH 3/8] modify asserts (#307) --- tests/tensorflow2/test_estimator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tensorflow2/test_estimator.py b/tests/tensorflow2/test_estimator.py index c0ee14283..92f8d732e 100644 --- a/tests/tensorflow2/test_estimator.py +++ b/tests/tensorflow2/test_estimator.py @@ -36,7 +36,7 @@ def test_estimator(out_dir, tf_eager_mode, saveall): # vanilla TF 2.2: all = 300, loss = 1, weights = 4, gradients = 0, biases = 18, optimizer variables = 0, metrics = 0, others = 277 # AWS-TF 2.2 : all = 300, loss = 1, weights = 4, gradients = 8, biases = 18, optimizer variables = 0, metrics = 0, others = 269 # AWS-TF 2.1 : all = 309, loss = 1, weights = 4, gradients = 8, biases = 18, optimizer variables = 0, metrics = 0, others = 278 - assert len(tnames) >= 300 + assert len(tnames) >= 1 + 4 + 18 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 4 assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 18 @@ -71,7 +71,7 @@ def test_linear_classifier(out_dir, tf_eager_mode, saveall): # vanilla TF 2.2: all = 214, loss = 2, weights = 1, gradients = 0, biases = 12, optimizer variables = 0, metrics = 0, others = 199 # AWS-TF 2.2: all = 219, loss = 2, weights = 1, gradients = 2, biases = 12, optimizer variables = 5, metrics = 0, others = 197 # AWS-TF 2.1: all = 226, loss = 2, weights = 1, gradients = 2, biases = 12, optimizer variables = 5, metrics = 0, others = 204 - assert len(tnames) >= 214 + assert len(tnames) >= 2 + 1 + 12 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 1 assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 12 From ef7f671e0803cb4f4902b50f12966e0edd26d886 Mon Sep 17 00:00:00 2001 From: Nihal Harish Date: Tue, 4 Aug 2020 10:29:33 -0700 Subject: [PATCH 4/8] version compare (#306) --- tests/tensorflow2/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/tensorflow2/utils.py b/tests/tensorflow2/utils.py index 7b450db75..124da16b8 100644 --- a/tests/tensorflow2/utils.py +++ b/tests/tensorflow2/utils.py @@ -1,8 +1,7 @@ # Standard Library -from re import search - # Third Party import tensorflow.compat.v2 as tf +from packaging import version def is_tf_2_2(): @@ -13,6 +12,6 @@ def is_tf_2_2(): number of tensor_names emitted by 1. :return: bool """ - if search("2.2..", tf.__version__): + if version.parse(tf.__version__) >= version.parse("2.2.0"): return True return False From 19a623eeb7ad26036af05ec32b55428234178c05 Mon Sep 17 00:00:00 2001 From: NihalHarish Date: Tue, 4 Aug 2020 22:11:22 -0700 Subject: [PATCH 5/8] support tf 2.3 tests --- tests/tensorflow2/utils.py | 6 ++++++ .../test_tensorflow2_integration.py | 19 ++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/tests/tensorflow2/utils.py b/tests/tensorflow2/utils.py index 124da16b8..0cfec9217 100644 --- a/tests/tensorflow2/utils.py +++ b/tests/tensorflow2/utils.py @@ -15,3 +15,9 @@ def is_tf_2_2(): if version.parse(tf.__version__) >= version.parse("2.2.0"): return True return False + + +def is_tf_2_3(): + if version.parse(tf.__version__) == version.parse("2.3.0"): + return True + return False diff --git a/tests/zero_code_change/test_tensorflow2_integration.py b/tests/zero_code_change/test_tensorflow2_integration.py index 026fef36b..d7e1bc8a3 100644 --- a/tests/zero_code_change/test_tensorflow2_integration.py +++ b/tests/zero_code_change/test_tensorflow2_integration.py @@ -20,6 +20,7 @@ # Third Party import pytest import tensorflow.compat.v2 as tf +from tests.tensorflow2.utils import is_tf_2_3 from tests.utils import SagemakerSimulator # First Party @@ -51,7 +52,8 @@ def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True): smd.del_hook() tf.keras.backend.clear_session() if not eager_mode: - tf.compat.v1.disable_eager_execution() + # tf.compat.v1.disable_eager_execution() + pass enable_tb = False if tf.__version__ == "2.0.2" else True with SagemakerSimulator(enable_tb=enable_tb) as sim: model = get_keras_model_v2() @@ -63,7 +65,10 @@ def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True): hook = smd.KerasHook(out_dir=sim.out_dir, export_tensorboard=True) opt = hook.wrap_optimizer(opt) model.compile( - loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"] + loss="sparse_categorical_crossentropy", + optimizer=opt, + metrics=["accuracy"], + run_eagerly=eager_mode, ) history = model.fit( x_train, y_train, batch_size=64, epochs=2, validation_split=0.2, callbacks=[hook] @@ -102,7 +107,8 @@ def helper_test_keras_v2_json_config( smd.del_hook() tf.keras.backend.clear_session() if not eager_mode: - tf.compat.v1.disable_eager_execution() + # tf.compat.v1.disable_eager_execution() + pass enable_tb = False if tf.__version__ == "2.0.2" else True with SagemakerSimulator(json_file_contents=json_file_contents, enable_tb=enable_tb) as sim: model = get_keras_model_v2() @@ -114,7 +120,10 @@ def helper_test_keras_v2_json_config( hook = smd.KerasHook.create_from_json_file() opt = hook.wrap_optimizer(opt) model.compile( - loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"] + loss="sparse_categorical_crossentropy", + optimizer=opt, + metrics=["accuracy"], + run_eagerly=eager_mode, ) history = model.fit( x_train, y_train, batch_size=64, epochs=2, validation_split=0.2, callbacks=[hook] @@ -134,7 +143,7 @@ def helper_test_keras_v2_json_config( trial = smd.create_trial(path=sim.out_dir) assert len(trial.steps()) > 0, "Nothing saved at any step." assert len(trial.tensor_names()) > 0, "Tensors were not saved." - if not eager_mode: + if not eager_mode and is_tf_2_3() is False: assert len(trial.tensor_names(collection="gradients")) > 0 assert len(trial.tensor_names(collection="weights")) > 0 assert len(trial.tensor_names(collection="losses")) > 0 From a9e83c8b7349fba29b30f2d832635c7169c882cb Mon Sep 17 00:00:00 2001 From: NihalHarish Date: Thu, 6 Aug 2020 04:21:21 -0700 Subject: [PATCH 6/8] comments --- .../test_tensorflow2_integration.py | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/tests/zero_code_change/test_tensorflow2_integration.py b/tests/zero_code_change/test_tensorflow2_integration.py index d7e1bc8a3..27481c7b4 100644 --- a/tests/zero_code_change/test_tensorflow2_integration.py +++ b/tests/zero_code_change/test_tensorflow2_integration.py @@ -51,9 +51,10 @@ def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True): """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes.""" smd.del_hook() tf.keras.backend.clear_session() - if not eager_mode: - # tf.compat.v1.disable_eager_execution() - pass + if not eager_mode and is_tf_2_3() is False: + # v1 training APIs are currently not supported + # in ZCC mode with smdebug 0.9 and AWS TF 2.3.0 + tf.compat.v1.disable_eager_execution() enable_tb = False if tf.__version__ == "2.0.2" else True with SagemakerSimulator(enable_tb=enable_tb) as sim: model = get_keras_model_v2() @@ -64,11 +65,15 @@ def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True): if script_mode: hook = smd.KerasHook(out_dir=sim.out_dir, export_tensorboard=True) opt = hook.wrap_optimizer(opt) + run_eagerly = None + if is_tf_2_3(): + # Test eager and non eager mode for v2 + run_eagerly = eager_mode model.compile( loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"], - run_eagerly=eager_mode, + run_eagerly=run_eagerly, ) history = model.fit( x_train, y_train, batch_size=64, epochs=2, validation_split=0.2, callbacks=[hook] @@ -106,9 +111,10 @@ def helper_test_keras_v2_json_config( """ Tests ZCC with custom hook configs """ smd.del_hook() tf.keras.backend.clear_session() - if not eager_mode: - # tf.compat.v1.disable_eager_execution() - pass + if not eager_mode and is_tf_2_3() is False: + # v1 training APIs are currently not supported + # in ZCC mode with smdebug 0.9 and AWS TF 2.3.0 + tf.compat.v1.disable_eager_execution() enable_tb = False if tf.__version__ == "2.0.2" else True with SagemakerSimulator(json_file_contents=json_file_contents, enable_tb=enable_tb) as sim: model = get_keras_model_v2() @@ -119,11 +125,15 @@ def helper_test_keras_v2_json_config( if script_mode: hook = smd.KerasHook.create_from_json_file() opt = hook.wrap_optimizer(opt) + run_eagerly = None + if is_tf_2_3(): + # Test eager and non eager mode for v2 + run_eagerly = eager_mode model.compile( loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"], - run_eagerly=eager_mode, + run_eagerly=run_eagerly, ) history = model.fit( x_train, y_train, batch_size=64, epochs=2, validation_split=0.2, callbacks=[hook] From ca945e930355d713f9c74877fd00321048f17d93 Mon Sep 17 00:00:00 2001 From: NihalHarish Date: Thu, 6 Aug 2020 04:28:56 -0700 Subject: [PATCH 7/8] comment --- tests/zero_code_change/test_tensorflow2_integration.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/zero_code_change/test_tensorflow2_integration.py b/tests/zero_code_change/test_tensorflow2_integration.py index 27481c7b4..f9514ae80 100644 --- a/tests/zero_code_change/test_tensorflow2_integration.py +++ b/tests/zero_code_change/test_tensorflow2_integration.py @@ -154,6 +154,8 @@ def helper_test_keras_v2_json_config( assert len(trial.steps()) > 0, "Nothing saved at any step." assert len(trial.tensor_names()) > 0, "Tensors were not saved." if not eager_mode and is_tf_2_3() is False: + # Gradients are currently not saved in ZCC mode with AWS TF 2.3.0 + # and smdebug 0.9 assert len(trial.tensor_names(collection="gradients")) > 0 assert len(trial.tensor_names(collection="weights")) > 0 assert len(trial.tensor_names(collection="losses")) > 0 From 87c18737f9dc47c147279055d9fbb5d4ad6b6734 Mon Sep 17 00:00:00 2001 From: NihalHarish Date: Thu, 6 Aug 2020 04:44:20 -0700 Subject: [PATCH 8/8] run eagerly --- .../test_tensorflow2_integration.py | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/tests/zero_code_change/test_tensorflow2_integration.py b/tests/zero_code_change/test_tensorflow2_integration.py index f9514ae80..cb3cc7ddb 100644 --- a/tests/zero_code_change/test_tensorflow2_integration.py +++ b/tests/zero_code_change/test_tensorflow2_integration.py @@ -60,15 +60,15 @@ def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True): model = get_keras_model_v2() (x_train, y_train), (x_test, y_test) = get_keras_data() x_train, x_test = x_train / 255, x_test / 255 + run_eagerly = None + if is_tf_2_3(): + # Test eager and non eager mode for v2 + run_eagerly = eager_mode opt = tf.keras.optimizers.RMSprop() if script_mode: hook = smd.KerasHook(out_dir=sim.out_dir, export_tensorboard=True) opt = hook.wrap_optimizer(opt) - run_eagerly = None - if is_tf_2_3(): - # Test eager and non eager mode for v2 - run_eagerly = eager_mode model.compile( loss="sparse_categorical_crossentropy", optimizer=opt, @@ -81,7 +81,10 @@ def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True): test_scores = model.evaluate(x_test, y_test, verbose=2, callbacks=[hook]) else: model.compile( - loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"] + loss="sparse_categorical_crossentropy", + optimizer=opt, + metrics=["accuracy"], + run_eagerly=run_eagerly, ) history = model.fit(x_train, y_train, batch_size=64, epochs=2, validation_split=0.2) test_scores = model.evaluate(x_test, y_test, verbose=2) @@ -122,13 +125,13 @@ def helper_test_keras_v2_json_config( x_train, x_test = x_train / 255, x_test / 255 opt = tf.keras.optimizers.RMSprop() + run_eagerly = None + if is_tf_2_3(): + # Test eager and non eager mode for v2 + run_eagerly = eager_mode if script_mode: hook = smd.KerasHook.create_from_json_file() opt = hook.wrap_optimizer(opt) - run_eagerly = None - if is_tf_2_3(): - # Test eager and non eager mode for v2 - run_eagerly = eager_mode model.compile( loss="sparse_categorical_crossentropy", optimizer=opt, @@ -141,7 +144,10 @@ def helper_test_keras_v2_json_config( test_scores = model.evaluate(x_test, y_test, verbose=2, callbacks=[hook]) else: model.compile( - loss="sparse_categorical_crossentropy", optimizer=opt, metrics=["accuracy"] + loss="sparse_categorical_crossentropy", + optimizer=opt, + metrics=["accuracy"], + run_eagerly=run_eagerly, ) history = model.fit(x_train, y_train, epochs=2, batch_size=64, validation_split=0.2) test_scores = model.evaluate(x_test, y_test, verbose=2)