diff --git a/smdebug/tensorflow/keras.py b/smdebug/tensorflow/keras.py index 9a02eb0bb..b9855a29b 100644 --- a/smdebug/tensorflow/keras.py +++ b/smdebug/tensorflow/keras.py @@ -539,7 +539,11 @@ def _save_layer_input_and_outputs(self): if self._is_collection_being_saved_for_step(CollectionKeys.LAYERS) else set() ) - self._save_tensor_to_file(export_name, tensor.numpy(), input_collection) + if hasattr(tensor, "numpy"): + self._save_tensor_to_file(export_name, tensor.numpy(), input_collection) + else: + self.logger.warn("cannot save layer values during forward pass with tf.function") + return # Save Output tensor = self.saved_layers[layer_name].layer_output export_name = get_export_name_for_keras(layer_name, tensor_type="output", tensor=tensor) @@ -549,7 +553,8 @@ def _save_layer_input_and_outputs(self): if self._is_collection_being_saved_for_step(CollectionKeys.LAYERS) else set() ) - self._save_tensor_to_file(export_name, tensor.numpy(), output_collection) + if hasattr(tensor, "numpy"): + self._save_tensor_to_file(export_name, tensor.numpy(), output_collection) def _save_tensors_post_step(self, batch, logs): # some tensors available as value from within hook are saved here @@ -1023,7 +1028,10 @@ def save_tape_logs(self, model_inputs=None, outputs=None): :return: """ logs = {ModelOutput.PREDICTIONS: outputs, ModelInput.INPUTS: model_inputs} - self.save_smdebug_logs(logs) + if is_tf_version_2x() and tf.executing_eagerly(): + self.save_smdebug_logs(logs) + else: + self.logger.warn("cannot save model inputs and outputs in non-eager execution mode") def wrap_tape(self, tape): """ diff --git a/tests/tensorflow2/test_grad_tape_tf_function.py b/tests/tensorflow2/test_grad_tape_tf_function.py new file mode 100644 index 000000000..681287b14 --- /dev/null +++ b/tests/tensorflow2/test_grad_tape_tf_function.py @@ -0,0 +1,81 @@ +# Third Party +import tensorflow as tf + +# First Party +import smdebug.tensorflow as smd +from smdebug.core.collection import CollectionKeys + + +def create_hook(trial_dir): + hook = smd.KerasHook(trial_dir, save_all=True) + return hook + + +def create_model(): + model = tf.keras.models.Sequential( + [ + # WA for TF issue https://github.com/tensorflow/tensorflow/issues/36279 + tf.keras.layers.Flatten(input_shape=(28, 28, 1)), + tf.keras.layers.Dense(128, activation="relu"), + tf.keras.layers.Dropout(0.2), + tf.keras.layers.Dense(10, activation="softmax"), + ] + ) + return model + + +def test_gradtape_tf_function(out_dir): + def get_grads(images, labels): + # with tf.GradientTape() as tape: + return model(images, training=True) + + @tf.function + def train_step(images, labels): + return tf.reduce_mean(get_grads(images, labels)) + + mnist = tf.keras.datasets.mnist + (x_train, y_train), _ = mnist.load_data() + dataset = tf.data.Dataset.from_tensor_slices( + (tf.cast(x_train[..., tf.newaxis] / 255, tf.float32), tf.cast(y_train, tf.int64)) + ) + dataset = dataset.shuffle(1000).batch(64) + model = create_model() + hook = create_hook(out_dir) + opt = tf.keras.optimizers.Adam() + hook.wrap_optimizer(opt) + + n_epochs = 1 + for epoch in range(n_epochs): + for data, labels in dataset: + dataset_labels = labels + labels = tf.one_hot(labels, depth=10) + with hook.wrap_tape(tf.GradientTape()) as tape: + logits = train_step(data, labels) + grads = tape.gradient(logits, model.variables) + opt.apply_gradients(zip(grads, model.variables)) + hook.save_tensor("inputs", data, CollectionKeys.INPUTS) + hook.save_tensor("logits", logits, CollectionKeys.OUTPUTS) + hook.save_tensor("labels", labels, CollectionKeys.OUTPUTS) + + model.save(out_dir, save_format="tf") + hook.close() + + trial = smd.create_trial(out_dir) + assert trial.tensor_names(collection=CollectionKeys.LOSSES) == ["loss"] + assert trial.tensor_names(collection=CollectionKeys.WEIGHTS) == [ + "weights/dense/kernel:0", + "weights/dense_1/kernel:0", + ] + assert trial.tensor_names(collection=CollectionKeys.BIASES) == [ + "weights/dense/bias:0", + "weights/dense_1/bias:0", + ] + assert trial.tensor_names(collection=CollectionKeys.OPTIMIZER_VARIABLES) == [ + "Adam/beta_1:0", + "Adam/beta_2:0", + "Adam/decay:0", + "Adam/iter:0", + "Adam/learning_rate:0", + ] + assert trial.tensor_names(collection=CollectionKeys.INPUTS) == ["inputs"] + assert trial.tensor_names(collection=CollectionKeys.OUTPUTS) == ["labels", "logits"]