diff --git a/tests/tensorflow2/test_keras.py b/tests/tensorflow2/test_keras.py index 8b0ef1879..4323b5284 100644 --- a/tests/tensorflow2/test_keras.py +++ b/tests/tensorflow2/test_keras.py @@ -14,7 +14,7 @@ import tensorflow.compat.v2 as tf import tensorflow_datasets as tfds from tests.constants import TEST_DATASET_S3_PATH -from tests.tensorflow2.utils import is_tf_2_2 +from tests.tensorflow2.utils import is_tf_2_2, is_tf_2_3 from tests.tensorflow.utils import create_trial_fast_refresh from tests.utils import use_s3_datasets @@ -195,7 +195,7 @@ def test_keras_gradtape(out_dir, saveall): trial = smd.create_trial(path=out_dir) if saveall: # save losses, metrics, weights, biases - assert len(trial.tensor_names()) == 15 + assert len(trial.tensor_names()) == (25 if is_tf_2_2() else 15) assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5 @@ -275,7 +275,7 @@ def test_gradtape_include_regex(out_dir): tr = create_trial_fast_refresh(out_dir) tnames = tr.tensor_names(collection="custom_coll") - assert len(tnames) == 8 + assert len(tnames) == (12 if is_tf_2_2() else 8) for tname in tnames: assert tr.tensor(tname).value(0) is not None @@ -343,7 +343,7 @@ def test_gradtape_include_collections(out_dir): trial = smd.create_trial(path=out_dir) # can't save gradients in TF 2.x - assert len(trial.tensor_names()) == 15 + assert len(trial.tensor_names()) == (16 if is_tf_2_2() else 15) assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4 assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5 assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2 @@ -388,7 +388,7 @@ def test_gradtape_persistent(out_dir, saveall): trial = smd.create_trial(path=out_dir) if saveall: # save losses, metrics, weights, biases - assert len(trial.tensor_names()) == 15 + assert len(trial.tensor_names()) == (25 if is_tf_2_2() else 15) assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5 @@ -409,7 +409,7 @@ def test_keras_fit(out_dir, tf_eager_mode, saveall): helper_keras_fit( trial_dir=out_dir, hook=hook, - eager=tf_eager_mode, + run_eagerly=tf_eager_mode, steps=["train", "eval", "predict", "train"], ) @@ -417,9 +417,16 @@ def test_keras_fit(out_dir, tf_eager_mode, saveall): # can't save gradients in TF 2.x eager mode if saveall: # save losses, metrics, weights, biases, scalar if tf_eager_mode: - assert len(trial.tensor_names()) == (13 if is_tf_2_2() else 14) - assert len(trial.tensor_names(collection=CollectionKeys.INPUTS)) == 0 - assert len(trial.tensor_names(collection=CollectionKeys.OUTPUTS)) == 0 + if is_tf_2_2(): + assert len(trial.tensor_names()) == 28 + else: + assert len(trial.tensor_names()) == (21 if is_tf_2_3() else 14) + assert len(trial.tensor_names(collection=CollectionKeys.INPUTS)) == ( + 1 if is_tf_2_2() else 0 + ) + assert len(trial.tensor_names(collection=CollectionKeys.OUTPUTS)) == ( + 2 if is_tf_2_2() else 0 + ) else: assert len(trial.tensor_names()) == 21 assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2 @@ -435,10 +442,12 @@ def test_keras_fit(out_dir, tf_eager_mode, saveall): "No Optimizer Variables Should be Saved in EVAL Mode", ) else: # save the default losses and metrics - assert len(trial.tensor_names()) == (4 if is_tf_2_2() and tf_eager_mode else 5) + assert len(trial.tensor_names()) == ( + 4 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 5 + ) assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1 assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == ( - 2 if is_tf_2_2() and tf_eager_mode else 3 + 2 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 3 ) for tname in trial.tensor_names(): assert trial.tensor(tname).value(0) is not None @@ -510,7 +519,7 @@ def test_include_regex(out_dir, tf_eager_mode): tnames = tr.tensor_names(collection="custom_coll") if tf_eager_mode: - assert len(tnames) == 8 + assert len(tnames) == (12 if is_tf_2_2() else 8) else: assert len(tnames) == 8 for tname in tnames: @@ -534,7 +543,7 @@ def test_clash_with_tb_callback(out_dir): add_callbacks=["tensorboard"], ) tr = create_trial_fast_refresh(out_dir) - assert len(tr.tensor_names()) == (7 if is_tf_2_2() else 8) + assert len(tr.tensor_names()) == (7 if (is_tf_2_2() or is_tf_2_3()) else 8) @pytest.mark.slow @@ -560,12 +569,12 @@ def test_weights_collections(out_dir, tf_eager_mode): trial = smd.create_trial(path=out_dir) # can't save gradients in TF 2.x - assert len(trial.tensor_names()) == (5 if is_tf_2_2() and tf_eager_mode else 6) + assert len(trial.tensor_names()) == (5 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 6) assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 0 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1 assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == ( - 2 if is_tf_2_2() and tf_eager_mode else 3 + 2 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 3 ) @@ -595,7 +604,10 @@ def test_include_collections(out_dir, tf_eager_mode): trial = smd.create_trial(path=out_dir) # can't save gradients in TF 2.x if tf_eager_mode: - assert len(trial.tensor_names()) == (12 if is_tf_2_2() else 13) + if is_tf_2_2(): + assert len(trial.tensor_names()) == 16 + else: + assert len(trial.tensor_names()) == (12 if is_tf_2_3() else 13) else: assert len(trial.tensor_names()) == 18 assert len(trial.tensor_names(collection=CollectionKeys.GRADIENTS)) == 4 @@ -605,7 +617,7 @@ def test_include_collections(out_dir, tf_eager_mode): assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1 assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == ( - 2 if is_tf_2_2() and tf_eager_mode else 3 + 2 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 3 ) @@ -625,7 +637,7 @@ def test_include_only_custom_collection(out_dir, tf_eager_mode): ) trial = smd.create_trial(path=out_dir) - assert len(trial.tensor_names()) == (8 if is_tf_2_2() and tf_eager_mode else 9) + assert len(trial.tensor_names()) == (8 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 9) assert len(trial.tensor_names(collection="custom_optimizer_variables")) == 5 @@ -640,12 +652,12 @@ def test_hook_from_json(out_dir, tf_eager_mode, monkeypatch): trial = smd.create_trial(path=out_dir) # can't save gradients in TF 2.x - assert len(trial.tensor_names()) == (5 if is_tf_2_2() and tf_eager_mode else 6) + assert len(trial.tensor_names()) == (5 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 6) assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 0 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.LOSSES)) == 1 assert len(trial.tensor_names(collection=CollectionKeys.METRICS)) == ( - 2 if is_tf_2_2() and tf_eager_mode else 3 + 2 if (is_tf_2_2() or is_tf_2_3()) and tf_eager_mode else 3 ) @@ -658,12 +670,15 @@ def test_keras_fit_pure_eager(out_dir, tf_eager_mode): helper_keras_fit(trial_dir=out_dir, hook=hook, eager=tf_eager_mode, run_eagerly=True) trial = smd.create_trial(path=out_dir) - assert len(trial.tensor_names()) == (20 if is_tf_2_2() else 21) + if is_tf_2_2(): + assert len(trial.tensor_names()) == 27 + else: + assert len(trial.tensor_names()) == (20 if is_tf_2_3() else 21) assert len(trial.tensor_names(collection=CollectionKeys.BIASES)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.WEIGHTS)) == 2 assert len(trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES)) == 5 - assert len(trial.tensor_names(collection=CollectionKeys.INPUTS)) == 0 - assert len(trial.tensor_names(collection=CollectionKeys.OUTPUTS)) == 0 + assert len(trial.tensor_names(collection=CollectionKeys.INPUTS)) == (1 if is_tf_2_2() else 0) + assert len(trial.tensor_names(collection=CollectionKeys.OUTPUTS)) == (2 if is_tf_2_2() else 0) @pytest.mark.skip # skip until aws tf update diff --git a/tests/tensorflow2/test_keras_mirrored.py b/tests/tensorflow2/test_keras_mirrored.py index 195d2b3f3..d857218ab 100644 --- a/tests/tensorflow2/test_keras_mirrored.py +++ b/tests/tensorflow2/test_keras_mirrored.py @@ -11,7 +11,7 @@ import tensorflow_datasets as tfds from tensorflow.python.client import device_lib from tests.core.utils import verify_files -from tests.tensorflow2.utils import is_tf_2_2 +from tests.tensorflow2.utils import is_tf_2_2, is_tf_2_3 from tests.tensorflow.utils import create_trial_fast_refresh # First Party @@ -164,11 +164,16 @@ def exhaustive_check(trial_dir, include_workers="one", eager=True): if include_workers == "all": assert len(tr.workers()) == strategy.num_replicas_in_sync if eager: - assert len(tr.tensor_names()) == ( - 6 + 1 + 2 + 5 + 1 if is_tf_2_2() else 6 + 1 + 3 + 5 + 1 - ) - # 6 weights, 1 loss, 3 metrics, 5 optimizer variables for Tf 2.1, 1 scalar - # 6 weights, 1 loss, 2 metrics, 5 optimizer variables for Tf 2.2, 1 scalar + if is_tf_2_2(): + assert len(tr.tensor_names()) == (6 + 1 + 2 + 5 + 1 + 6 + 2) + # 6 weights, 1 loss, 2 metrics, 5 optimizer variables, 6 gradients, 2 outputs for Tf 2.2, 1 scalar + else: + assert len(tr.tensor_names()) == ( + 6 + 1 + 2 + 5 + 1 if (is_tf_2_2() or is_tf_2_3()) else 6 + 1 + 3 + 5 + 1 + ) + # 6 weights, 1 loss, 2 metrics, 5 optimizer variables for Tf 2.3, 1 scalar + # 6 weights, 1 loss, 3 metrics, 5 optimizer variables for Tf 2.1, 1 scalar + else: assert len(tr.tensor_names()) == (6 + 6 + 1 + 3 + strategy.num_replicas_in_sync * 3 + 5) else: @@ -232,7 +237,7 @@ def exhaustive_check(trial_dir, include_workers="one", eager=True): assert len(tr.tensor(loss_name).steps()) == 12 metricnames = tr.tensor_names(collection=CollectionKeys.METRICS) - assert len(metricnames) == (2 if is_tf_2_2() else 3) + assert len(metricnames) == (2 if (is_tf_2_2() or is_tf_2_3()) else 3) @pytest.mark.slow @@ -256,8 +261,15 @@ def test_save_all(out_dir, tf_eager_mode, workers): tr = create_trial_fast_refresh(out_dir) print(tr.tensor_names()) if tf_eager_mode: - assert len(tr.tensor_names()) == (6 + 2 + 1 + 5 + 1 if is_tf_2_2() else 6 + 3 + 1 + 5 + 1) - # weights, metrics, losses, optimizer variables, scalar + if is_tf_2_2(): + assert len(tr.tensor_names()) == ( + 6 + 2 + 1 + 5 + 1 + 1 + 2 + 8 + 8 if is_tf_2_2() else 6 + 3 + 1 + 5 + 1 + ) + # weights, metrics, losses, optimizer variables, scalar, inputs, outputs, gradients, layers + else: + assert len(tr.tensor_names()) == ( + 6 + 2 + 1 + 5 + 1 if is_tf_2_3() else 6 + 3 + 1 + 5 + 1 + ) else: assert ( len(tr.tensor_names()) @@ -366,7 +378,7 @@ def test_include_regex(out_dir, tf_eager_mode, workers): tnames = tr.tensor_names(collection="custom_coll") if tf_eager_mode: - assert len(tnames) == 4 + assert len(tnames) == (12 if is_tf_2_2() else 4) else: assert len(tnames) == 4 + 3 * strategy.num_replicas_in_sync for tname in tnames: @@ -421,7 +433,10 @@ def test_clash_with_tb_callback(out_dir): add_callbacks=["tensorboard"], ) tr = create_trial_fast_refresh(out_dir) - assert len(tr.tensor_names()) == (10 if is_tf_2_2() else 11) + if is_tf_2_2(): + assert len(tr.tensor_names()) == 16 + else: + assert len(tr.tensor_names()) == (10 if is_tf_2_3() else 11) @pytest.mark.skip diff --git a/tests/tensorflow2/utils.py b/tests/tensorflow2/utils.py index 0cfec9217..591504d34 100644 --- a/tests/tensorflow2/utils.py +++ b/tests/tensorflow2/utils.py @@ -12,7 +12,7 @@ def is_tf_2_2(): number of tensor_names emitted by 1. :return: bool """ - if version.parse(tf.__version__) >= version.parse("2.2.0"): + if version.parse(tf.__version__) == version.parse("2.2.0"): return True return False diff --git a/tests/zero_code_change/test_tensorflow2_gradtape_integration.py b/tests/zero_code_change/test_tensorflow2_gradtape_integration.py index b4a5d85c8..d44b851a0 100644 --- a/tests/zero_code_change/test_tensorflow2_gradtape_integration.py +++ b/tests/zero_code_change/test_tensorflow2_gradtape_integration.py @@ -12,7 +12,7 @@ # Third Party import pytest import tensorflow.compat.v2 as tf -from tests.tensorflow2.utils import is_tf_2_2 +from tests.tensorflow2.utils import is_tf_2_2, is_tf_2_3 # First Party import smdebug.tensorflow as smd @@ -26,7 +26,9 @@ def get_keras_data(): return (x_train, y_train), (x_test, y_test) -def helper_test_keras_v2_gradienttape(script_mode: bool = False, json_file_contents="{}"): +def helper_test_keras_v2_gradienttape( + script_mode: bool = False, json_file_contents="{}", default=False +): """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes.""" smd.del_hook() tf.keras.backend.clear_session() @@ -49,7 +51,7 @@ def helper_test_keras_v2_gradienttape(script_mode: bool = False, json_file_conte opt = tf.keras.optimizers.RMSprop() cce = tf.keras.losses.CategoricalCrossentropy(from_logits=True) train_acc_metric = tf.keras.metrics.SparseCategoricalAccuracy() - n_epochs = 2 + n_epochs = 1 if script_mode: if json_file_contents == "{}": hook = smd.KerasHook(out_dir=sim.out_dir, export_tensorboard=True) @@ -100,7 +102,7 @@ def helper_test_keras_v2_gradienttape(script_mode: bool = False, json_file_conte print(log) train_acc_metric.reset_states() hook = smd.get_hook() - if not is_tf_2_2(): + if not (is_tf_2_2() or is_tf_2_3()): assert not hook # only supported on TF 2.2 and greater return assert hook @@ -110,12 +112,23 @@ def helper_test_keras_v2_gradienttape(script_mode: bool = False, json_file_conte assert len(trial.steps()) > 0, "Nothing saved at any step." assert len(trial.tensor_names()) > 0, "Tensors were not saved." assert len(trial.tensor_names(collection="losses")) > 0 + if is_tf_2_2() and default is False: + # Inputs and Outputs are not saved with the default collection configurations. + assert len(trial.tensor_names(collection="inputs")) > 0 + assert len(trial.tensor_names(collection="outputs")) > 0 + assert trial.tensor_names(collection="outputs") == ["predictions"] + if "dense_layers" in json_file_contents: + # Only assert for test_keras_v2_multi_collections + # which defines this custom collection + assert len(trial.tensor_names(collection="dense_layers")) > 0 + else: + assert len(trial.tensor_names(collection="dense_layers")) == 0 @pytest.mark.parametrize("script_mode", [False]) def test_keras_v2_default(script_mode): # Test default ZCC behavior - helper_test_keras_v2_gradienttape(script_mode=script_mode) + helper_test_keras_v2_gradienttape(script_mode=script_mode, default=True) @pytest.mark.parametrize("script_mode", [False]) @@ -144,6 +157,18 @@ def test_keras_v2_multi_collections(script_mode): }, { "CollectionName": "optimizer_variables" + }, + { + "CollectionName": "outputs" + }, + { + "CollectionName": "inputs" + }, + { + "CollectionName": "dense_layers", + "CollectionParameters": { + "include_regex": ".*dense.*" + } } ] } @@ -161,7 +186,7 @@ def test_keras_v2_save_all(script_mode): "S3OutputPath": "s3://sagemaker-test", "LocalPath": "/opt/ml/output/tensors", "HookParameters" : { - "save_steps": "0,1,2,3", + "save_steps": "0", "save_all": true } } diff --git a/tests/zero_code_change/test_tensorflow2_integration.py b/tests/zero_code_change/test_tensorflow2_integration.py index 703a9abcf..cfe179a4d 100644 --- a/tests/zero_code_change/test_tensorflow2_integration.py +++ b/tests/zero_code_change/test_tensorflow2_integration.py @@ -20,13 +20,40 @@ # Third Party import pytest import tensorflow.compat.v2 as tf -from tests.tensorflow2.utils import is_tf_2_3 +from tensorflow.python.keras.engine import data_adapter +from tests.tensorflow2.utils import is_tf_2_2, is_tf_2_3 from tests.utils import SagemakerSimulator # First Party import smdebug.tensorflow as smd from smdebug.core.collection import CollectionKeys +SMDEBUG_PREFIX = "smdebug_" + + +class CustomClassifierModel(tf.keras.models.Sequential): + def train_step(self, data): + data = data_adapter.expand_1d(data) + x, y, sample_weight = data_adapter.unpack_x_y_sample_weight(data) + + with tf.GradientTape() as tape: + y_pred = self(x, training=True) + loss = self.compiled_loss(y, y_pred, sample_weight, regularization_losses=self.losses) + trainable_variables = self.trainable_variables + gradients = tape.gradient(loss, trainable_variables) + self.optimizer.apply_gradients(zip(gradients, trainable_variables)) + + self.compiled_metrics.update_state(y, y_pred, sample_weight) + result_dict = {m.name: m.result() for m in self.metrics} + result_dict.update({f"{SMDEBUG_PREFIX}y": y}) + result_dict.update({f"{SMDEBUG_PREFIX}gradients": y}) + + # to pass gradients and labels to the hook, add logs with the prefix SMDEBUG_ + # For examples: + # To save labels: the key will be smdebug_y + # To save gradients: the key will be smdebug_gradients + return result_dict + def get_keras_model_v2(): model = tf.keras.models.Sequential( @@ -51,21 +78,18 @@ def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True): """ Test the default ZCC behavior of saving losses and metrics in eager and non-eager modes.""" smd.del_hook() tf.keras.backend.clear_session() - if not eager_mode and is_tf_2_3() is False: + if not eager_mode and is_tf_2_3() is False and is_tf_2_2() is False: # v1 training APIs are currently not supported # in ZCC mode with smdebug 0.9 and AWS TF 2.3.0 tf.compat.v1.disable_eager_execution() - - # Performance regression in the _make_histogram fn - enable_tb = False if tf.__version__ == "2.0.2" or is_tf_2_3() else True + enable_tb = False if (tf.__version__ == "2.0.2" or is_tf_2_3()) else True + run_eagerly = None + if is_tf_2_2() or is_tf_2_3(): + run_eagerly = eager_mode with SagemakerSimulator(enable_tb=enable_tb) as sim: model = get_keras_model_v2() (x_train, y_train), (x_test, y_test) = get_keras_data() x_train, x_test = x_train / 255, x_test / 255 - run_eagerly = None - if is_tf_2_3(): - # Test eager and non eager mode for v2 - run_eagerly = eager_mode opt = tf.keras.optimizers.RMSprop() if script_mode: @@ -78,7 +102,7 @@ def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True): run_eagerly=run_eagerly, ) history = model.fit( - x_train, y_train, batch_size=64, epochs=2, validation_split=0.2, callbacks=[hook] + x_train, y_train, batch_size=64, epochs=1, validation_split=0.2, callbacks=[hook] ) test_scores = model.evaluate(x_test, y_test, verbose=2, callbacks=[hook]) else: @@ -88,7 +112,7 @@ def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True): metrics=["accuracy"], run_eagerly=run_eagerly, ) - history = model.fit(x_train, y_train, batch_size=64, epochs=2, validation_split=0.2) + history = model.fit(x_train, y_train, batch_size=64, epochs=1, validation_split=0.2) test_scores = model.evaluate(x_test, y_test, verbose=2) hook = smd.get_hook() @@ -111,29 +135,35 @@ def helper_test_keras_v2(script_mode: bool = False, eager_mode: bool = True): def helper_test_keras_v2_json_config( - json_file_contents, script_mode: bool = False, eager_mode: bool = True + json_file_contents, script_mode: bool = False, eager_mode: bool = True, custom_classifier=False ): """ Tests ZCC with custom hook configs """ smd.del_hook() tf.keras.backend.clear_session() - if not eager_mode and is_tf_2_3() is False: + if not eager_mode and is_tf_2_3() is False and is_tf_2_2() is False: # v1 training APIs are currently not supported # in ZCC mode with smdebug 0.9 and AWS TF 2.3.0 tf.compat.v1.disable_eager_execution() - - # Performance regression in the _make_histogram fn - enable_tb = False if tf.__version__ == "2.0.2" or is_tf_2_3() else True - + run_eagerly = None + if is_tf_2_2() or is_tf_2_3(): + run_eagerly = eager_mode + enable_tb = False if (tf.__version__ == "2.0.2" or is_tf_2_3()) else True with SagemakerSimulator(json_file_contents=json_file_contents, enable_tb=enable_tb) as sim: - model = get_keras_model_v2() + if custom_classifier: + model = CustomClassifierModel( + [ + tf.keras.layers.Flatten(input_shape=(28, 28)), + tf.keras.layers.Dense(128, activation="relu"), + tf.keras.layers.Dropout(0.2), + tf.keras.layers.Dense(10, activation="softmax"), + ] + ) + else: + model = get_keras_model_v2() (x_train, y_train), (x_test, y_test) = get_keras_data() x_train, x_test = x_train / 255, x_test / 255 opt = tf.keras.optimizers.RMSprop() - run_eagerly = None - if is_tf_2_3(): - # Test eager and non eager mode for v2 - run_eagerly = eager_mode if script_mode: hook = smd.KerasHook.create_from_json_file() opt = hook.wrap_optimizer(opt) @@ -164,12 +194,19 @@ def helper_test_keras_v2_json_config( trial = smd.create_trial(path=sim.out_dir) assert len(trial.steps()) > 0, "Nothing saved at any step." assert len(trial.tensor_names()) > 0, "Tensors were not saved." - if not eager_mode and is_tf_2_3() is False: - # Gradients are currently not saved in ZCC mode with AWS TF 2.3.0 - # and smdebug 0.9 + if not eager_mode and is_tf_2_2(): assert len(trial.tensor_names(collection="gradients")) > 0 assert len(trial.tensor_names(collection="weights")) > 0 assert len(trial.tensor_names(collection="losses")) > 0 + if is_tf_2_2(): + assert len(trial.tensor_names(collection="inputs")) > 0 + assert len(trial.tensor_names(collection="outputs")) > 0 + if "dense_layers" in json_file_contents: + # Only assert for test_keras_v2_multi_collections + # which defines this custom collection + assert len(trial.tensor_names(collection="dense_layers")) > 0 + else: + assert len(trial.tensor_names(collection="dense_layers")) == 0 @pytest.mark.parametrize("script_mode", [False]) @@ -188,8 +225,8 @@ def test_keras_v2_multi_collections(script_mode, eager_mode): "S3OutputPath": "s3://sagemaker-test", "LocalPath": "/opt/ml/output/tensors", "HookParameters" : { - "save_interval": "2", - "include_workers": "all" + "save_steps": "0,1,2", + "include_workers": "one" }, "CollectionConfigurations": [ { @@ -206,6 +243,18 @@ def test_keras_v2_multi_collections(script_mode, eager_mode): }, { "CollectionName": "optimizer_variables" + }, + { + "CollectionName": "outputs" + }, + { + "CollectionName": "inputs" + }, + { + "CollectionName": "dense_layers", + "CollectionParameters": { + "include_regex": ".*dense.*" + } } ] } @@ -215,8 +264,61 @@ def test_keras_v2_multi_collections(script_mode, eager_mode): ) +@pytest.mark.parametrize("script_mode", [False]) +@pytest.mark.parametrize("eager_mode", [True]) +@pytest.mark.skip +def test_keras_v2_custom_train_step(script_mode, eager_mode): + # Test multiple collections included in hook json + json_file_contents = """ + { + "S3OutputPath": "s3://sagemaker-test", + "LocalPath": "/opt/ml/output/tensors", + "HookParameters" : { + "save_steps": "0,1,2", + "include_workers": "one" + }, + "CollectionConfigurations": [ + { + "CollectionName": "gradients" + }, + { + "CollectionName": "weights" + }, + { + "CollectionName": "losses" + }, + { + "CollectionName": "biases" + }, + { + "CollectionName": "optimizer_variables" + }, + { + "CollectionName": "outputs" + }, + { + "CollectionName": "inputs" + }, + { + "CollectionName": "dense_layers", + "CollectionParameters": { + "include_regex": ".*dense.*" + } + } + ] + } + """ + helper_test_keras_v2_json_config( + script_mode=script_mode, + eager_mode=eager_mode, + json_file_contents=json_file_contents, + custom_classifier=True, + ) + + @pytest.mark.parametrize("script_mode", [False]) @pytest.mark.parametrize("eager_mode", [True, False]) +@pytest.mark.skip(reason="Takes too long. Time it and and optimize the test") def test_keras_v2_save_all(script_mode, eager_mode): # Test save all through hook config json_file_contents = """ @@ -224,7 +326,7 @@ def test_keras_v2_save_all(script_mode, eager_mode): "S3OutputPath": "s3://sagemaker-test", "LocalPath": "/opt/ml/output/tensors", "HookParameters" : { - "save_steps": "0,1,2,3", + "save_steps": "0,1,2", "save_all": true } }