From c8bba25c76ed833698498d6b26a46369080f1335 Mon Sep 17 00:00:00 2001 From: NihalHarish Date: Wed, 16 Sep 2020 07:23:50 -0700 Subject: [PATCH 1/8] init --- smdebug/tensorflow/keras.py | 18 ++-- tests/tensorflow2/test_model_subclassing.py | 103 ++++++++++++++++++++ 2 files changed, 111 insertions(+), 10 deletions(-) create mode 100644 tests/tensorflow2/test_model_subclassing.py diff --git a/smdebug/tensorflow/keras.py b/smdebug/tensorflow/keras.py index b9855a29b..6c85c22d0 100644 --- a/smdebug/tensorflow/keras.py +++ b/smdebug/tensorflow/keras.py @@ -119,8 +119,8 @@ def register_model(self, model): # It attaches a hook to every layer of the model to capture # layer values self.model = model - if self.tape is not None: - self._wrap_model_with_input_output_saver() + self.model.saved_layers = dict() + self._wrap_model_with_input_output_saver() self.has_registered_model = True def _get_matching_collections( @@ -527,12 +527,9 @@ def _save_metrics(self, batch, logs, force_save=False): self._save_for_tensor(key, logs[key], check_before_write=False) def _save_layer_input_and_outputs(self): - # Run only for GradTape - if self.tape is None: - return - for layer_name in self.saved_layers: + for layer_name in self.model.saved_layers: # Save Input - tensor = self.saved_layers[layer_name].layer_input + tensor = self.model.saved_layers[layer_name].layer_input export_name = get_export_name_for_keras(layer_name, tensor_type="input", tensor=tensor) input_collection = ( {self.get_collection(CollectionKeys.LAYERS)} @@ -543,9 +540,9 @@ def _save_layer_input_and_outputs(self): self._save_tensor_to_file(export_name, tensor.numpy(), input_collection) else: self.logger.warn("cannot save layer values during forward pass with tf.function") - return + continue # Save Output - tensor = self.saved_layers[layer_name].layer_output + tensor = self.model.saved_layers[layer_name].layer_output export_name = get_export_name_for_keras(layer_name, tensor_type="output", tensor=tensor) self._is_collection_being_saved_for_step(CollectionKeys.LAYERS) output_collection = ( @@ -562,6 +559,7 @@ def _save_tensors_post_step(self, batch, logs): self._save_metrics(batch, logs) self.save_smdebug_logs(logs) self._save_custom_tensors_post_step() + self._save_layer_input_and_outputs() if is_tf_version_2x() and tf.executing_eagerly(): for tensor_ref in self.tensor_refs_to_save_this_step: @@ -691,7 +689,7 @@ def _wrap_model_with_input_output_saver(self): layer.register_hook = lambda hook: layer._hooks.append(hook) saver = InputOutputSaver() layer.register_hook(saver) - self.saved_layers[layer.name] = saver + self.model.saved_layers[layer.name] = saver def _on_any_batch_begin(self, batch, mode, logs=None): if self._is_not_supported(): diff --git a/tests/tensorflow2/test_model_subclassing.py b/tests/tensorflow2/test_model_subclassing.py new file mode 100644 index 000000000..062fd06a7 --- /dev/null +++ b/tests/tensorflow2/test_model_subclassing.py @@ -0,0 +1,103 @@ +# Third Party +import tensorflow as tf +from tensorflow.keras.layers import BatchNormalization, Conv2D, Dense, Flatten +from tensorflow.keras.models import Model + +# First Party +import smdebug.tensorflow as smd + + +class MyModel(Model): + def __init__(self): + super().__init__() + self.conv1 = Conv2D( + 32, 3, activation="relu", kernel_initializer=tf.keras.initializers.GlorotNormal(seed=12) + ) + self.conv0 = Conv2D( + 32, 3, activation="relu", kernel_initializer=tf.keras.initializers.GlorotNormal(seed=12) + ) + self.flatten = Flatten() + self.d1 = Dense( + 128, activation="relu", kernel_initializer=tf.keras.initializers.GlorotNormal(seed=192) + ) + self.d2 = Dense(10, kernel_initializer=tf.keras.initializers.GlorotNormal(seed=126)) + self.bn = BatchNormalization() + + def first(self, x): + with tf.name_scope("first"): + tf.print("mymodel.first") + x = self.conv1(x) + # x = self.bn(x) + return self.flatten(x) + + def second(self, x): + with tf.name_scope("second"): + x = self.d1(x) + return self.d2(x) + + def call(self, x, training=None): + x = self.first(x) + return self.second(x) + + +# Create an instance of the model +model = MyModel() + + +def get_grads(images, labels): + # with tf.GradientTape() as tape: + print("model outer call") + return model(images, training=True) + + +@tf.function +def train_step(images, labels): + return tf.reduce_mean(get_grads(images, labels)) + + +def test_subclassed_model(out_dir): + # Download and load MNIST dataset. + (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data("MNIST-data") + x_train, x_test = x_train / 255.0, x_test / 255.0 + + # Add a channels dimension + x_train = x_train[..., tf.newaxis] + x_test = x_test[..., tf.newaxis] + + train_ds = ( + tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000, seed=123).batch(2) + ) + + MyModel.hook = smd.KerasHook( + out_dir, + save_all=True, + save_config=smd.SaveConfig(save_steps=[x for x in range(10)], save_interval=1), + ) + + MyModel.hook.register_model(model) + model.compile(optimizer="Adam", loss="mse", run_eagerly=True) + model.fit(train_ds, epochs=1, steps_per_epoch=10, callbacks=[MyModel.hook]) + + trial = smd.create_trial(out_dir) + assert trial.tensor_names(collection=smd.CollectionKeys.LAYERS) == [ + "conv2d/inputs", + "conv2d/outputs", + "dense/inputs", + "dense/outputs", + "dense_1/inputs", + "dense_1/outputs", + "flatten/inputs", + "flatten/outputs", + ] + + assert trial.tensor_names(collection=smd.CollectionKeys.INPUTS) == ["model_inputs"] + assert trial.tensor_names(collection=smd.CollectionKeys.OUTPUTS) == ["labels", "predictions"] + assert trial.tensor_names(collection=smd.CollectionKeys.LOSSES) == ["loss"] + assert trial.tensor_names(collection=smd.CollectionKeys.GRADIENTS) == [ + "gradients/my_model/first/conv2d/biasGrad", + "gradients/my_model/first/conv2d/kernelGrad", + "gradients/my_model/second/dense/biasGrad", + "gradients/my_model/second/dense/kernelGrad", + "gradients/my_model/second/dense_1/biasGrad", + "gradients/my_model/second/dense_1/kernelGrad", + ] From ad6d2e71e94dd6022eac439698b33c1ee64491aa Mon Sep 17 00:00:00 2001 From: NihalHarish Date: Wed, 16 Sep 2020 08:04:38 -0700 Subject: [PATCH 2/8] retrigger CI From b89512c438e8f65983b1059e57ee3dca5d4a9779 Mon Sep 17 00:00:00 2001 From: NihalHarish Date: Wed, 16 Sep 2020 08:58:20 -0700 Subject: [PATCH 3/8] correct assert --- tests/tensorflow2/test_model_subclassing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tensorflow2/test_model_subclassing.py b/tests/tensorflow2/test_model_subclassing.py index 062fd06a7..5063b892b 100644 --- a/tests/tensorflow2/test_model_subclassing.py +++ b/tests/tensorflow2/test_model_subclassing.py @@ -90,7 +90,7 @@ def test_subclassed_model(out_dir): "flatten/outputs", ] - assert trial.tensor_names(collection=smd.CollectionKeys.INPUTS) == ["model_inputs"] + assert trial.tensor_names(collection=smd.CollectionKeys.INPUTS) == ["model_input"] assert trial.tensor_names(collection=smd.CollectionKeys.OUTPUTS) == ["labels", "predictions"] assert trial.tensor_names(collection=smd.CollectionKeys.LOSSES) == ["loss"] assert trial.tensor_names(collection=smd.CollectionKeys.GRADIENTS) == [ From 8538249c7a1c58fb9a9f0442d9b69a428bc9a6d5 Mon Sep 17 00:00:00 2001 From: NihalHarish Date: Wed, 16 Sep 2020 09:21:05 -0700 Subject: [PATCH 4/8] tf2x --- smdebug/tensorflow/keras.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/smdebug/tensorflow/keras.py b/smdebug/tensorflow/keras.py index 6c85c22d0..a25d3ae30 100644 --- a/smdebug/tensorflow/keras.py +++ b/smdebug/tensorflow/keras.py @@ -527,6 +527,8 @@ def _save_metrics(self, batch, logs, force_save=False): self._save_for_tensor(key, logs[key], check_before_write=False) def _save_layer_input_and_outputs(self): + if is_tf_version_2x() is False: + return for layer_name in self.model.saved_layers: # Save Input tensor = self.model.saved_layers[layer_name].layer_input From 935d0889194b0da452b67b678685318bc9770f03 Mon Sep 17 00:00:00 2001 From: NihalHarish Date: Wed, 16 Sep 2020 11:55:19 -0700 Subject: [PATCH 5/8] clean --- tests/tensorflow2/test_model_subclassing.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/tests/tensorflow2/test_model_subclassing.py b/tests/tensorflow2/test_model_subclassing.py index 5063b892b..fd5cbbb5d 100644 --- a/tests/tensorflow2/test_model_subclassing.py +++ b/tests/tensorflow2/test_model_subclassing.py @@ -40,21 +40,6 @@ def call(self, x, training=None): return self.second(x) -# Create an instance of the model -model = MyModel() - - -def get_grads(images, labels): - # with tf.GradientTape() as tape: - print("model outer call") - return model(images, training=True) - - -@tf.function -def train_step(images, labels): - return tf.reduce_mean(get_grads(images, labels)) - - def test_subclassed_model(out_dir): # Download and load MNIST dataset. (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data("MNIST-data") @@ -64,6 +49,9 @@ def test_subclassed_model(out_dir): x_train = x_train[..., tf.newaxis] x_test = x_test[..., tf.newaxis] + # Create an instance of the model + model = MyModel() + train_ds = ( tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000, seed=123).batch(2) ) From b4a88ac8f1c2554a3f7481d720d4872e5acebd8e Mon Sep 17 00:00:00 2001 From: NihalHarish Date: Wed, 16 Sep 2020 12:09:27 -0700 Subject: [PATCH 6/8] clean --- smdebug/tensorflow/keras.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/smdebug/tensorflow/keras.py b/smdebug/tensorflow/keras.py index a25d3ae30..07d95fd3f 100644 --- a/smdebug/tensorflow/keras.py +++ b/smdebug/tensorflow/keras.py @@ -119,7 +119,6 @@ def register_model(self, model): # It attaches a hook to every layer of the model to capture # layer values self.model = model - self.model.saved_layers = dict() self._wrap_model_with_input_output_saver() self.has_registered_model = True @@ -529,9 +528,9 @@ def _save_metrics(self, batch, logs, force_save=False): def _save_layer_input_and_outputs(self): if is_tf_version_2x() is False: return - for layer_name in self.model.saved_layers: + for layer_name in self.saved_layers: # Save Input - tensor = self.model.saved_layers[layer_name].layer_input + tensor = self.saved_layers[layer_name].layer_input export_name = get_export_name_for_keras(layer_name, tensor_type="input", tensor=tensor) input_collection = ( {self.get_collection(CollectionKeys.LAYERS)} @@ -544,7 +543,7 @@ def _save_layer_input_and_outputs(self): self.logger.warn("cannot save layer values during forward pass with tf.function") continue # Save Output - tensor = self.model.saved_layers[layer_name].layer_output + tensor = self.saved_layers[layer_name].layer_output export_name = get_export_name_for_keras(layer_name, tensor_type="output", tensor=tensor) self._is_collection_being_saved_for_step(CollectionKeys.LAYERS) output_collection = ( @@ -691,7 +690,7 @@ def _wrap_model_with_input_output_saver(self): layer.register_hook = lambda hook: layer._hooks.append(hook) saver = InputOutputSaver() layer.register_hook(saver) - self.model.saved_layers[layer.name] = saver + self.saved_layers[layer.name] = saver def _on_any_batch_begin(self, batch, mode, logs=None): if self._is_not_supported(): From cec8efc123bf2781a5994391a67b789d21b70090 Mon Sep 17 00:00:00 2001 From: NihalHarish Date: Wed, 16 Sep 2020 12:37:57 -0700 Subject: [PATCH 7/8] update test --- smdebug/tensorflow/keras.py | 2 +- tests/tensorflow2/test_model_subclassing.py | 11 +---------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/smdebug/tensorflow/keras.py b/smdebug/tensorflow/keras.py index 07d95fd3f..67d325d87 100644 --- a/smdebug/tensorflow/keras.py +++ b/smdebug/tensorflow/keras.py @@ -540,7 +540,7 @@ def _save_layer_input_and_outputs(self): if hasattr(tensor, "numpy"): self._save_tensor_to_file(export_name, tensor.numpy(), input_collection) else: - self.logger.warn("cannot save layer values during forward pass with tf.function") + self.logger.warning("cannot save layer values during forward pass with tf.function") continue # Save Output tensor = self.saved_layers[layer_name].layer_output diff --git a/tests/tensorflow2/test_model_subclassing.py b/tests/tensorflow2/test_model_subclassing.py index fd5cbbb5d..aae66ebed 100644 --- a/tests/tensorflow2/test_model_subclassing.py +++ b/tests/tensorflow2/test_model_subclassing.py @@ -67,16 +67,7 @@ def test_subclassed_model(out_dir): model.fit(train_ds, epochs=1, steps_per_epoch=10, callbacks=[MyModel.hook]) trial = smd.create_trial(out_dir) - assert trial.tensor_names(collection=smd.CollectionKeys.LAYERS) == [ - "conv2d/inputs", - "conv2d/outputs", - "dense/inputs", - "dense/outputs", - "dense_1/inputs", - "dense_1/outputs", - "flatten/inputs", - "flatten/outputs", - ] + assert len(trial.tensor_names(collection=smd.CollectionKeys.LAYERS)) == 8 assert trial.tensor_names(collection=smd.CollectionKeys.INPUTS) == ["model_input"] assert trial.tensor_names(collection=smd.CollectionKeys.OUTPUTS) == ["labels", "predictions"] From d091d5c2a864821f43519cc12691ebaa2a551f14 Mon Sep 17 00:00:00 2001 From: NihalHarish Date: Wed, 16 Sep 2020 12:47:36 -0700 Subject: [PATCH 8/8] test len --- tests/tensorflow2/test_model_subclassing.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tests/tensorflow2/test_model_subclassing.py b/tests/tensorflow2/test_model_subclassing.py index aae66ebed..d90b931e2 100644 --- a/tests/tensorflow2/test_model_subclassing.py +++ b/tests/tensorflow2/test_model_subclassing.py @@ -72,11 +72,4 @@ def test_subclassed_model(out_dir): assert trial.tensor_names(collection=smd.CollectionKeys.INPUTS) == ["model_input"] assert trial.tensor_names(collection=smd.CollectionKeys.OUTPUTS) == ["labels", "predictions"] assert trial.tensor_names(collection=smd.CollectionKeys.LOSSES) == ["loss"] - assert trial.tensor_names(collection=smd.CollectionKeys.GRADIENTS) == [ - "gradients/my_model/first/conv2d/biasGrad", - "gradients/my_model/first/conv2d/kernelGrad", - "gradients/my_model/second/dense/biasGrad", - "gradients/my_model/second/dense/kernelGrad", - "gradients/my_model/second/dense_1/biasGrad", - "gradients/my_model/second/dense_1/kernelGrad", - ] + assert len(trial.tensor_names(collection=smd.CollectionKeys.GRADIENTS)) == 6