From 46322c6558ee11a734c938b8fd27d7188bc6bbcb Mon Sep 17 00:00:00 2001 From: Jared Nielsen Date: Mon, 25 Nov 2019 15:48:01 -0800 Subject: [PATCH 1/5] Get rid of test artifacts --- .../horovod_mnist_estimator.py | 2 +- examples/tensorflow/scripts/keras.py | 4 +-- smdebug/core/json_config.py | 2 +- tests/analysis/trials/test_modes.py | 4 +-- tests/analysis/trials/test_tensors_api.py | 2 +- tests/core/test_collections.py | 4 +-- tests/core/test_modes.py | 10 +++---- tests/mxnet/test_hook.py | 6 ++-- tests/mxnet/test_hook_all_zero.py | 4 +-- tests/mxnet/test_hook_custom_collection.py | 2 +- tests/mxnet/test_hook_loss_collection.py | 4 +-- tests/mxnet/test_hook_reduce_config.py | 4 +-- tests/mxnet/test_hook_save_all.py | 4 +-- tests/mxnet/test_hook_save_config.py | 4 +-- tests/mxnet/test_modes.py | 4 +-- tests/mxnet/test_spot_training.py | 6 ++-- tests/mxnet/test_training_end.py | 4 +-- tests/pytorch/test_collection.py | 2 +- tests/pytorch/test_modes.py | 4 +-- tests/pytorch/test_reduce_config.py | 4 +-- tests/pytorch/test_simple_write.py | 28 +++++++++++-------- tests/tensorflow/keras/test_keras.py | 2 +- tests/tensorflow/keras/test_keras_mirrored.py | 2 +- .../tensorflow/keras/test_keras_mirrored.py | 2 +- 24 files changed, 60 insertions(+), 54 deletions(-) diff --git a/examples/tensorflow/scripts/distributed_training/horovod_mnist_estimator.py b/examples/tensorflow/scripts/distributed_training/horovod_mnist_estimator.py index 96c200e5e..a9ac4488f 100644 --- a/examples/tensorflow/scripts/distributed_training/horovod_mnist_estimator.py +++ b/examples/tensorflow/scripts/distributed_training/horovod_mnist_estimator.py @@ -186,7 +186,7 @@ def main(unused_argv): # Download and load MNIST dataset. (train_data, train_labels), (eval_data, eval_labels) = keras.datasets.mnist.load_data( - "MNIST-data-%d" % hvd.rank() + "/tmp/MNIST-data-%d" % hvd.rank() ) # The shape of downloaded data is (-1, 28, 28), hence we need to reshape it diff --git a/examples/tensorflow/scripts/keras.py b/examples/tensorflow/scripts/keras.py index bbb510cde..0fe13d919 100644 --- a/examples/tensorflow/scripts/keras.py +++ b/examples/tensorflow/scripts/keras.py @@ -42,7 +42,7 @@ def scale(image, label): eval_dataset = mnist_test.map(scale).batch(BATCH_SIZE) hook = KerasHook( - out_dir="~/ts_outputs/", + out_dir="/tmp/ts_outputs/", include_collections=[ # CollectionKeys.WEIGHTS, # CollectionKeys.GRADIENTS, @@ -76,7 +76,7 @@ def scale(image, label): callbacks = [ hook - # tf.keras.callbacks.TensorBoard(log_dir='./logs'), + # tf.keras.callbacks.TensorBoard(log_dir='/tmp/logs'), ] model.fit(train_dataset, epochs=1, callbacks=callbacks) diff --git a/smdebug/core/json_config.py b/smdebug/core/json_config.py index 36273f69f..fe0a8d326 100644 --- a/smdebug/core/json_config.py +++ b/smdebug/core/json_config.py @@ -3,7 +3,7 @@ { "S3Path": "s3://bucket/prefix", - "LocalPath": "newlogsRunTest/test_hook_from_json_config_full", + "LocalPath": "/tmp/test_hook_from_json_config_full", "HookParameters": { "export_tensorboard": true, "tensorboard_dir": "/tmp/tensorboard", diff --git a/tests/analysis/trials/test_modes.py b/tests/analysis/trials/test_modes.py index eec061d57..fff9c8614 100644 --- a/tests/analysis/trials/test_modes.py +++ b/tests/analysis/trials/test_modes.py @@ -21,7 +21,7 @@ def test_modes_on_global_data(): def test_mode_data(): run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - trial_dir = "ts_outputs/" + run_id + trial_dir = "/tmp/ts_outputs/" + run_id c = CollectionManager() c.add("default") @@ -84,4 +84,4 @@ def test_mode_data(): assert tr.tensor("arr").value(i, mode=modes.TRAIN) is not None assert tr.tensor("arr").value(i, mode=modes.EVAL) is not None - shutil.rmtree("ts_outputs/" + run_id) + shutil.rmtree("/tmp/ts_outputs/" + run_id) diff --git a/tests/analysis/trials/test_tensors_api.py b/tests/analysis/trials/test_tensors_api.py index 49e3f1ed7..fbd291f25 100644 --- a/tests/analysis/trials/test_tensors_api.py +++ b/tests/analysis/trials/test_tensors_api.py @@ -51,7 +51,7 @@ def test_tensors(out_dir): def test_mode_data(): run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - trial_dir = "ts_outputs/" + run_id + trial_dir = "/tmp/ts_outputs/" + run_id c = CollectionManager() c.add("default") diff --git a/tests/core/test_collections.py b/tests/core/test_collections.py index 96bd5eaa9..cd840ad25 100644 --- a/tests/core/test_collections.py +++ b/tests/core/test_collections.py @@ -56,9 +56,9 @@ def test_manager_export_load(): cm.add(Collection("trial1")) cm.add("trial2") cm.get("trial2").include("total_loss") - cm.export("dummy_trial", DEFAULT_COLLECTIONS_FILE_NAME) + cm.export("/tmp/dummy_trial", DEFAULT_COLLECTIONS_FILE_NAME) cm2 = CollectionManager.load( - os.path.join(get_path_to_collections("dummy_trial"), DEFAULT_COLLECTIONS_FILE_NAME) + os.path.join(get_path_to_collections("/tmp/dummy_trial"), DEFAULT_COLLECTIONS_FILE_NAME) ) assert cm == cm2 diff --git a/tests/core/test_modes.py b/tests/core/test_modes.py index c95447ff8..fe7fe11a5 100644 --- a/tests/core/test_modes.py +++ b/tests/core/test_modes.py @@ -21,7 +21,7 @@ def test_mode_writing(): run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") worker = socket.gethostname() for s in range(0, 10): - fw = FileWriter(trial_dir="ts_outputs/" + run_id, step=s, worker=worker) + fw = FileWriter(trial_dir="/tmp/ts_outputs/" + run_id, step=s, worker=worker) if s % 2 == 0: fw.write_tensor( tdata=np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32), @@ -37,8 +37,8 @@ def test_mode_writing(): mode_step=s // 2, ) fw.close() - write_dummy_collection_file("ts_outputs/" + run_id) - files = glob.glob("ts_outputs/" + run_id + "/**/*.tfevents", recursive=True) + write_dummy_collection_file("/tmp/ts_outputs/" + run_id) + files = glob.glob("/tmp/ts_outputs/" + run_id + "/**/*.tfevents", recursive=True) global_steps = [] train_steps = [] @@ -56,8 +56,8 @@ def test_mode_writing(): assert mode_step == step // 2 global_steps.append(step) - trial = create_trial("ts_outputs/" + run_id) + trial = create_trial("/tmp/ts_outputs/" + run_id) assert trial.steps() == sorted(global_steps) assert trial.steps(ModeKeys.TRAIN) == sorted(train_steps) assert trial.steps(ModeKeys.EVAL) == sorted(eval_steps) - shutil.rmtree("ts_outputs/" + run_id) + shutil.rmtree("/tmp/ts_outputs/" + run_id) diff --git a/tests/mxnet/test_hook.py b/tests/mxnet/test_hook.py index d670fe6d5..75f769487 100644 --- a/tests/mxnet/test_hook.py +++ b/tests/mxnet/test_hook.py @@ -16,7 +16,7 @@ def test_hook(): save_config = SaveConfig(save_steps=[0, 1, 2, 3]) run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir = "newlogsRunTest/" + run_id + out_dir = "/tmp/" + run_id hook = t_hook(out_dir=out_dir, save_config=save_config) assert has_training_ended(out_dir) == False run_mnist_gluon_model( @@ -26,7 +26,7 @@ def test_hook(): def test_hook_from_json_config(): - out_dir = "newlogsRunTest1/test_hook_from_json_config" + out_dir = "/tmp/test_hook_from_json_config" shutil.rmtree(out_dir, True) os.environ[ CONFIG_FILE_PATH_ENV_STR @@ -40,7 +40,7 @@ def test_hook_from_json_config(): def test_hook_from_json_config_full(): - out_dir = "newlogsRunTest2/test_hook_from_json_config_full" + out_dir = "/tmp/test_hook_from_json_config_full" shutil.rmtree(out_dir, True) os.environ[ CONFIG_FILE_PATH_ENV_STR diff --git a/tests/mxnet/test_hook_all_zero.py b/tests/mxnet/test_hook_all_zero.py index 8d280ca0e..1a3bfdcfb 100644 --- a/tests/mxnet/test_hook_all_zero.py +++ b/tests/mxnet/test_hook_all_zero.py @@ -20,7 +20,7 @@ def test_hook_all_zero(hook=None, out_dir=None): hook_created = True save_config = SaveConfig(save_steps=[0, 1, 2, 3]) run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir = "./newlogsRunTest/" + run_id + out_dir = "/tmp/" + run_id print("Registering the hook with out_dir {0}".format(out_dir)) hook = t_hook( out_dir=out_dir, @@ -52,7 +52,7 @@ def test_hook_all_zero_hook_from_json(): import shutil import os - out_dir = "newlogsRunTest2/test_hook_all_zero_hook_from_json" + out_dir = "/tmp/test_hook_all_zero_hook_from_json" shutil.rmtree(out_dir, True) os.environ[ CONFIG_FILE_PATH_ENV_STR diff --git a/tests/mxnet/test_hook_custom_collection.py b/tests/mxnet/test_hook_custom_collection.py index 0cf004bbf..97ef42ba2 100644 --- a/tests/mxnet/test_hook_custom_collection.py +++ b/tests/mxnet/test_hook_custom_collection.py @@ -13,7 +13,7 @@ def test_hook_custom_collection(): save_config = SaveConfig(save_steps=[0, 1, 2, 3]) run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir = "./newlogsRunTest/" + run_id + out_dir = "/tmp/" + run_id hook = t_hook(out_dir=out_dir, save_config=save_config, include_collections=["ReluActivation"]) hook.get_collection("ReluActivation").include(["relu*", "input_*"]) run_mnist_gluon_model(hook=hook, num_steps_train=10, num_steps_eval=10) diff --git a/tests/mxnet/test_hook_loss_collection.py b/tests/mxnet/test_hook_loss_collection.py index b4a73f05f..f4606fbe7 100644 --- a/tests/mxnet/test_hook_loss_collection.py +++ b/tests/mxnet/test_hook_loss_collection.py @@ -15,7 +15,7 @@ def test_loss_collection_default(): save_config = SaveConfig(save_steps=[0, 1, 2, 3]) run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir = "newlogsRunTest/" + run_id + out_dir = "/tmp/" + run_id hook = t_hook(out_dir=out_dir, save_config=save_config) assert has_training_ended(out_dir) == False run_mnist_gluon_model( @@ -39,7 +39,7 @@ def test_loss_collection_default(): def test_loss_collection_with_no_other_collections(): save_config = SaveConfig(save_steps=[0, 1, 2, 3]) run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir = "newlogsRunTest/" + run_id + out_dir = "/tmp/" + run_id hook = t_hook(out_dir=out_dir, save_config=save_config, include_collections=[]) assert has_training_ended(out_dir) == False run_mnist_gluon_model( diff --git a/tests/mxnet/test_hook_reduce_config.py b/tests/mxnet/test_hook_reduce_config.py index f6097ddfc..559f2e2e6 100644 --- a/tests/mxnet/test_hook_reduce_config.py +++ b/tests/mxnet/test_hook_reduce_config.py @@ -19,7 +19,7 @@ def test_save_config(hook=None, out_dir=None): global_save_config = SaveConfig(save_steps=[0, 1, 2, 3]) run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir = "./newlogsRunTest/" + run_id + out_dir = "/tmp/" + run_id print("Registering the hook with out_dir {0}".format(out_dir)) hook = t_hook( out_dir=out_dir, @@ -86,7 +86,7 @@ def test_save_config_hook_from_json(): from smdebug.core.json_config import CONFIG_FILE_PATH_ENV_STR import os - out_dir = "newlogsRunTest2/test_hook_reduce_config_hook_from_json" + out_dir = "/tmp/test_hook_reduce_config_hook_from_json" shutil.rmtree(out_dir, True) os.environ[ CONFIG_FILE_PATH_ENV_STR diff --git a/tests/mxnet/test_hook_save_all.py b/tests/mxnet/test_hook_save_all.py index 83c88c988..8847cd621 100644 --- a/tests/mxnet/test_hook_save_all.py +++ b/tests/mxnet/test_hook_save_all.py @@ -17,7 +17,7 @@ def test_save_all(hook=None, out_dir=None): hook_created = True save_config = SaveConfig(save_steps=[0, 1, 2, 3]) run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir = "./newlogsRunTest/" + run_id + out_dir = "/tmp/" + run_id print("Registering the hook with out_dir {}".format(out_dir)) hook = t_hook(out_dir=out_dir, save_config=save_config, save_all=True) run_mnist_gluon_model(hook=hook, num_steps_train=7, num_steps_eval=5) @@ -39,7 +39,7 @@ def test_save_all_hook_from_json(): from smdebug.core.json_config import CONFIG_FILE_PATH_ENV_STR import os - out_dir = "newlogsRunTest2/test_hook_save_all_hook_from_json" + out_dir = "/tmp/test_hook_save_all_hook_from_json" shutil.rmtree(out_dir, True) os.environ[ CONFIG_FILE_PATH_ENV_STR diff --git a/tests/mxnet/test_hook_save_config.py b/tests/mxnet/test_hook_save_config.py index 120abcf70..0faede257 100644 --- a/tests/mxnet/test_hook_save_config.py +++ b/tests/mxnet/test_hook_save_config.py @@ -15,7 +15,7 @@ def test_save_config(hook=None): if hook is None: save_config_collection = SaveConfig(save_steps=[4, 5, 6]) run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir = "./newlogsRunTest/" + run_id + out_dir = "/tmp/" + run_id save_config = SaveConfig(save_steps=[0, 1, 2, 3]) hook = t_hook( out_dir=out_dir, @@ -34,7 +34,7 @@ def test_save_config(hook=None): def test_save_config_hookjson_config(): from smdebug.core.json_config import CONFIG_FILE_PATH_ENV_STR - out_dir = "newlogsRunTest2/test_hook_from_json_config_full" + out_dir = "/tmp/test_hook_from_json_config_full" shutil.rmtree(out_dir, True) os.environ[ CONFIG_FILE_PATH_ENV_STR diff --git a/tests/mxnet/test_modes.py b/tests/mxnet/test_modes.py index 94d9b99d4..bfb16505c 100644 --- a/tests/mxnet/test_modes.py +++ b/tests/mxnet/test_modes.py @@ -14,7 +14,7 @@ def test_modes(hook=None, path=None): if hook is None: run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - path = "./newlogsRunTest/" + run_id + path = "/tmp/" + run_id hook = t_hook( out_dir=path, save_config=SaveConfig( @@ -57,7 +57,7 @@ def test_modes_hook_from_json_config(): import shutil import os - out_dir = "newlogsRunTest2/test_modes_hookjson" + out_dir = "/tmp/test_modes_hookjson" shutil.rmtree(out_dir, True) os.environ[CONFIG_FILE_PATH_ENV_STR] = "tests/mxnet/test_json_configs/test_modes_hook.json" hook = t_hook.hook_from_config() diff --git a/tests/mxnet/test_spot_training.py b/tests/mxnet/test_spot_training.py index 0f428e01e..7e29debeb 100644 --- a/tests/mxnet/test_spot_training.py +++ b/tests/mxnet/test_spot_training.py @@ -143,7 +143,7 @@ def test_spot_hook(): os.environ[ CHECKPOINT_CONFIG_FILE_PATH_ENV_VAR ] = "./tests/mxnet/test_json_configs/checkpointconfig.json" - checkpoint_path = "./savedParams" + checkpoint_path = "/tmp/savedParams" if not os.path.exists(checkpoint_path): os.mkdir(checkpoint_path) save_config = SaveConfig(save_steps=[10, 11, 12, 13, 14, 40, 50, 60, 70, 80]) @@ -154,7 +154,7 @@ def test_spot_hook(): """ run_id_1 = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir_1 = "newlogsRunTest/" + run_id_1 + out_dir_1 = "/tmp/" + run_id_1 hook = t_hook( out_dir=out_dir_1, save_config=save_config, include_collections=["weights", "gradients"] ) @@ -174,7 +174,7 @@ def test_spot_hook(): We expect to read steps 40, 50, 60, 70 and 80 """ run_id_2 = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir_2 = "newlogsRunTest/" + run_id_2 + out_dir_2 = "/tmp/" + run_id_2 hook = t_hook( out_dir=out_dir_2, save_config=save_config, include_collections=["weights", "gradients"] ) diff --git a/tests/mxnet/test_training_end.py b/tests/mxnet/test_training_end.py index df4b75d18..6856d9756 100644 --- a/tests/mxnet/test_training_end.py +++ b/tests/mxnet/test_training_end.py @@ -15,7 +15,7 @@ @pytest.mark.slow # 0:03 to run def test_end_local_training(): run_id = str(uuid.uuid4()) - out_dir = "./newlogsRunTest/" + run_id + out_dir = "/tmp/" + run_id assert has_training_ended(out_dir) == False subprocess.check_call( [ @@ -35,7 +35,7 @@ def test_end_local_training(): def test_end_s3_training(): run_id = str(uuid.uuid4()) bucket = "tornasolecodebuildtest" - key = "newlogsRunTest/" + run_id + key = "/tmp/" + run_id out_dir = bucket + "/" + key assert has_training_ended(out_dir) == False subprocess.check_call( diff --git a/tests/pytorch/test_collection.py b/tests/pytorch/test_collection.py index d5a8dd4ea..2e5caffcd 100644 --- a/tests/pytorch/test_collection.py +++ b/tests/pytorch/test_collection.py @@ -19,7 +19,7 @@ def test_collection_add(hook=None, out_dir=None): hook_created = False if hook is None: run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir = "./newlogsRunTest/" + run_id + out_dir = "/tmp/" + run_id hook = t_hook( out_dir=out_dir, save_config=SaveConfig(save_steps=[0, 1, 2, 3]), diff --git a/tests/pytorch/test_modes.py b/tests/pytorch/test_modes.py index 52e6bd782..b93974147 100644 --- a/tests/pytorch/test_modes.py +++ b/tests/pytorch/test_modes.py @@ -85,7 +85,7 @@ def delete_local_trials(local_trials): shutil.rmtree(trial) -def helper_test_modes(hook=None, out_dir="./test_output/test_hook_modes/"): +def helper_test_modes(hook=None, out_dir="/tmp/test_output/test_hook_modes/"): prefix = str(uuid.uuid4()) device = torch.device("cpu") save_steps = [i for i in range(5)] @@ -127,7 +127,7 @@ def test_training_mode(): # Test creating hook with multiple collections and save configs. def test_training_mode_json(): - out_dir = "test_output/test_hook_modes/jsonloading" + out_dir = "/tmp/test_output/test_hook_modes/jsonloading" shutil.rmtree(out_dir, True) os.environ[CONFIG_FILE_PATH_ENV_STR] = "tests/pytorch/test_json_configs/test_modes.json" hook = Hook.hook_from_config() diff --git a/tests/pytorch/test_reduce_config.py b/tests/pytorch/test_reduce_config.py index 051a7f1cf..d19a1ca18 100644 --- a/tests/pytorch/test_reduce_config.py +++ b/tests/pytorch/test_reduce_config.py @@ -23,7 +23,7 @@ def test_reduce_config(hook=None, out_dir=None): global_save_config = SaveConfig(save_steps=[0, 1, 2, 3]) run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir = "./newlogsRunTest/" + run_id + out_dir = "/tmp/" + run_id hook = t_hook( out_dir=out_dir, save_config=global_save_config, @@ -90,7 +90,7 @@ def test_reduce_config(hook=None, out_dir=None): def test_reduce_config_with_json(): from smdebug.core.json_config import CONFIG_FILE_PATH_ENV_STR - out_dir = "test_output/test_hook_reduction_config/jsonloading" + out_dir = "/tmp/test_output/test_hook_reduction_config/jsonloading" shutil.rmtree(out_dir, True) os.environ[ CONFIG_FILE_PATH_ENV_STR diff --git a/tests/pytorch/test_simple_write.py b/tests/pytorch/test_simple_write.py index 2cdf002c6..fdc411855 100644 --- a/tests/pytorch/test_simple_write.py +++ b/tests/pytorch/test_simple_write.py @@ -168,7 +168,7 @@ def helper_test_weights_bias_gradients(hook=None): json = hook is not None if not json: hook = create_hook( - "test_output/test_hook_save_weightsbiasgradients/" + prefix, + "/tmp/test_output/test_hook_save_weightsbiasgradients/" + prefix, model, hook_type, save_steps=save_steps, @@ -179,11 +179,13 @@ def helper_test_weights_bias_gradients(hook=None): train(model, device, optimizer, num_steps=101, save_steps=save_steps) if not json: trial = create_trial( - path="test_output/test_hook_save_weightsbiasgradients/" + prefix, name="test output" + path="/tmp/test_output/test_hook_save_weightsbiasgradients/" + prefix, + name="test output", ) else: trial = create_trial( - path="test_output/test_hook_save_weightsbiasgradients/jsonloading", name="test output" + path="/tmp/test_output/test_hook_save_weightsbiasgradients/jsonloading", + name="test output", ) grads = [ "gradient/Net_fc1.weight", @@ -211,7 +213,7 @@ def helper_test_weights_bias_gradients(hook=None): else: addendum = "jsonloading" hook._cleanup() - delete_local_trials(["test_output/test_hook_save_weightsbiasgradients/" + addendum]) + delete_local_trials(["/tmp/test_output/test_hook_save_weightsbiasgradients/" + addendum]) def saveall_test_helper(hook=None): @@ -223,15 +225,19 @@ def saveall_test_helper(hook=None): json = hook is not None if not json: hook = create_hook( - "test_output/test_hook_saveall/" + prefix, model, hook_type, save_steps=save_steps + "/tmp/test_output/test_hook_saveall/" + prefix, model, hook_type, save_steps=save_steps ) hook.register_hook(model) optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) train(model, device, optimizer, num_steps=101, save_steps=save_steps) if not json: - trial = create_trial(path="test_output/test_hook_saveall/" + prefix, name="test output") + trial = create_trial( + path="/tmp/test_output/test_hook_saveall/" + prefix, name="test output" + ) else: - trial = create_trial(path="test_output/test_hook_saveall/jsonloading", name="test output") + trial = create_trial( + path="/tmp/test_output/test_hook_saveall/jsonloading", name="test output" + ) grads = [ "gradient/Net_fc1.weight", "gradient/Net_fc2.weight", @@ -260,7 +266,7 @@ def saveall_test_helper(hook=None): else: addendum = "jsonloading" hook._cleanup() - delete_local_trials(["test_output/test_hook_saveall/" + addendum]) + delete_local_trials(["/tmp/test_output/test_hook_saveall/" + addendum]) def helper_test_multi_collections(hook, out_dir): @@ -293,7 +299,7 @@ def helper_test_multi_collections(hook, out_dir): def test_weightsbiasgradients_json(): - out_dir = "test_output/test_hook_save_weightsbiasgradients/jsonloading" + out_dir = "/tmp/test_output/test_hook_save_weightsbiasgradients/jsonloading" shutil.rmtree(out_dir, ignore_errors=True) os.environ[ CONFIG_FILE_PATH_ENV_STR @@ -307,7 +313,7 @@ def test_weightsbiasgradients_call(): def test_saveall_json(): - out_dir = "test_output/test_hook_saveall/jsonloading" + out_dir = "/tmp/test_output/test_hook_saveall/jsonloading" shutil.rmtree(out_dir, ignore_errors=True) os.environ[CONFIG_FILE_PATH_ENV_STR] = "tests/pytorch/test_json_configs/test_hook_saveall.json" hook = Hook.hook_from_config() @@ -320,7 +326,7 @@ def test_saveall_params(): # Test creating hook with multiple collections and save configs. def test_multi_collection_json(): - out_dir = "test_output/test_hook_multi_collection/jsonloading" + out_dir = "/tmp/test_output/test_hook_multi_collection/jsonloading" shutil.rmtree(out_dir, True) os.environ[ CONFIG_FILE_PATH_ENV_STR diff --git a/tests/tensorflow/keras/test_keras.py b/tests/tensorflow/keras/test_keras.py index a100feb0b..128d17685 100644 --- a/tests/tensorflow/keras/test_keras.py +++ b/tests/tensorflow/keras/test_keras.py @@ -114,7 +114,7 @@ def train_model( if "tensorboard" in add_callbacks: hooks.append( tf.keras.callbacks.TensorBoard( - log_dir="./logs", histogram_freq=1, write_grads=True, write_images=True + log_dir="/tmp/logs", histogram_freq=1, write_grads=True, write_images=True ) ) if "fetch_tensor" in add_callbacks: diff --git a/tests/tensorflow/keras/test_keras_mirrored.py b/tests/tensorflow/keras/test_keras_mirrored.py index 71b5e6923..dd2a3b221 100644 --- a/tests/tensorflow/keras/test_keras_mirrored.py +++ b/tests/tensorflow/keras/test_keras_mirrored.py @@ -165,7 +165,7 @@ def scale(image, label): # erorr like this https://stackoverflow.com/questions/56836895/custom-training-loop-using-tensorflow-gpu-1-14-and-tf-distribute-mirroredstrateg # this crash is even if tornasole callback is off tf.keras.callbacks.TensorBoard( - log_dir="./logs", histogram_freq=4, write_images=True + log_dir="/tmp/logs", histogram_freq=4, write_images=True ) ) if "fetch_tensor" in add_callbacks: diff --git a/tests/zero_code_change/tests/tensorflow/keras/test_keras_mirrored.py b/tests/zero_code_change/tests/tensorflow/keras/test_keras_mirrored.py index 71b5e6923..dd2a3b221 100644 --- a/tests/zero_code_change/tests/tensorflow/keras/test_keras_mirrored.py +++ b/tests/zero_code_change/tests/tensorflow/keras/test_keras_mirrored.py @@ -165,7 +165,7 @@ def scale(image, label): # erorr like this https://stackoverflow.com/questions/56836895/custom-training-loop-using-tensorflow-gpu-1-14-and-tf-distribute-mirroredstrateg # this crash is even if tornasole callback is off tf.keras.callbacks.TensorBoard( - log_dir="./logs", histogram_freq=4, write_images=True + log_dir="/tmp/logs", histogram_freq=4, write_images=True ) ) if "fetch_tensor" in add_callbacks: From edb7abccbdf116d171de1db59b0b028e321aea8e Mon Sep 17 00:00:00 2001 From: Jared Nielsen Date: Mon, 25 Nov 2019 16:46:42 -0800 Subject: [PATCH 2/5] Update json files --- tests/mxnet/test_json_configs/test_hook_all_zero_hook.json | 2 +- tests/mxnet/test_json_configs/test_hook_from_json_config.json | 2 +- .../test_json_configs/test_hook_from_json_config_full.json | 2 +- tests/mxnet/test_json_configs/test_hook_reduce_config_hook.json | 2 +- tests/mxnet/test_json_configs/test_hook_save_all_hook.json | 2 +- tests/mxnet/test_json_configs/test_modes_hook.json | 2 +- .../test_json_configs/test_save_config_hookjson_config.json | 2 +- .../pytorch/test_json_configs/test_hook_multi_collections.json | 2 +- tests/pytorch/test_json_configs/test_hook_reduction_config.json | 2 +- tests/pytorch/test_json_configs/test_hook_saveall.json | 2 +- .../test_json_configs/test_hook_weightsbiasgradients.json | 2 +- tests/pytorch/test_json_configs/test_modes.json | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/mxnet/test_json_configs/test_hook_all_zero_hook.json b/tests/mxnet/test_json_configs/test_hook_all_zero_hook.json index 773091cc0..598d06d84 100644 --- a/tests/mxnet/test_json_configs/test_hook_all_zero_hook.json +++ b/tests/mxnet/test_json_configs/test_hook_all_zero_hook.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "newlogsRunTest2/test_hook_all_zero_hook_from_json", + "LocalPath": "/tmp/test_hook_all_zero_hook_from_json", "HookParameters": { "save_all": false, "save_steps": "0,1,2,3" diff --git a/tests/mxnet/test_json_configs/test_hook_from_json_config.json b/tests/mxnet/test_json_configs/test_hook_from_json_config.json index 42abb8e19..d821cd53b 100644 --- a/tests/mxnet/test_json_configs/test_hook_from_json_config.json +++ b/tests/mxnet/test_json_configs/test_hook_from_json_config.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "newlogsRunTest1/test_hook_from_json_config", + "LocalPath": "/tmp/test_hook_from_json_config", "HookParameters": { "save_steps": "0,1,2,3" } diff --git a/tests/mxnet/test_json_configs/test_hook_from_json_config_full.json b/tests/mxnet/test_json_configs/test_hook_from_json_config_full.json index 18217a4c4..59ac78209 100644 --- a/tests/mxnet/test_json_configs/test_hook_from_json_config_full.json +++ b/tests/mxnet/test_json_configs/test_hook_from_json_config_full.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "newlogsRunTest2/test_hook_from_json_config_full", + "LocalPath": "/tmp/test_hook_from_json_config_full", "HookParameters": { "save_all": false, "include_regex": "regexe1,regex2", diff --git a/tests/mxnet/test_json_configs/test_hook_reduce_config_hook.json b/tests/mxnet/test_json_configs/test_hook_reduce_config_hook.json index 9b936a3b2..35f1fcff6 100644 --- a/tests/mxnet/test_json_configs/test_hook_reduce_config_hook.json +++ b/tests/mxnet/test_json_configs/test_hook_reduce_config_hook.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "newlogsRunTest2/test_hook_reduce_config_hook_from_json", + "LocalPath": "/tmp/test_hook_reduce_config_hook_from_json", "HookParameters": { "save_all": false, "reductions": "max,mean", diff --git a/tests/mxnet/test_json_configs/test_hook_save_all_hook.json b/tests/mxnet/test_json_configs/test_hook_save_all_hook.json index 5c18ae7c4..a2b6e4851 100644 --- a/tests/mxnet/test_json_configs/test_hook_save_all_hook.json +++ b/tests/mxnet/test_json_configs/test_hook_save_all_hook.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "newlogsRunTest2/test_hook_save_all_hook_from_json", + "LocalPath": "/tmp/test_hook_save_all_hook_from_json", "HookParameters": { "save_all": true, "save_steps": "0,1,2,3" diff --git a/tests/mxnet/test_json_configs/test_modes_hook.json b/tests/mxnet/test_json_configs/test_modes_hook.json index ede462d22..d18f68f93 100644 --- a/tests/mxnet/test_json_configs/test_modes_hook.json +++ b/tests/mxnet/test_json_configs/test_modes_hook.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "newlogsRunTest2/test_modes_hookjson", + "LocalPath": "/tmp/test_modes_hookjson", "HookParameters": { "train.save_interval": 2, "eval.save_interval": 3 diff --git a/tests/mxnet/test_json_configs/test_save_config_hookjson_config.json b/tests/mxnet/test_json_configs/test_save_config_hookjson_config.json index 1a1655232..65382b026 100644 --- a/tests/mxnet/test_json_configs/test_save_config_hookjson_config.json +++ b/tests/mxnet/test_json_configs/test_save_config_hookjson_config.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "newlogsRunTest2/test_hook_from_json_config_full", + "LocalPath": "/tmp/test_hook_from_json_config_full", "HookParameters": { "save_all": false, "save_steps": "0,1,2,3" diff --git a/tests/pytorch/test_json_configs/test_hook_multi_collections.json b/tests/pytorch/test_json_configs/test_hook_multi_collections.json index a02a83813..f39570818 100644 --- a/tests/pytorch/test_json_configs/test_hook_multi_collections.json +++ b/tests/pytorch/test_json_configs/test_hook_multi_collections.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "test_output/test_hook_multi_collection/jsonloading", + "LocalPath": "/tmp/test_output/test_hook_multi_collection/jsonloading", "HookParameters": { "save_all": false, "save_steps": "0, 1, 2, 3" diff --git a/tests/pytorch/test_json_configs/test_hook_reduction_config.json b/tests/pytorch/test_json_configs/test_hook_reduction_config.json index d0829a697..aaea0d2c8 100644 --- a/tests/pytorch/test_json_configs/test_hook_reduction_config.json +++ b/tests/pytorch/test_json_configs/test_hook_reduction_config.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "test_output/test_hook_reduction_config/jsonloading", + "LocalPath": "/tmp/test_output/test_hook_reduction_config/jsonloading", "HookParameters": { "save_all": false, "reductions": "max, mean, variance", diff --git a/tests/pytorch/test_json_configs/test_hook_saveall.json b/tests/pytorch/test_json_configs/test_hook_saveall.json index a45de49fd..c343dd7bb 100644 --- a/tests/pytorch/test_json_configs/test_hook_saveall.json +++ b/tests/pytorch/test_json_configs/test_hook_saveall.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "test_output/test_hook_saveall/jsonloading", + "LocalPath": "/tmp/test_output/test_hook_saveall/jsonloading", "HookParameters": { "save_all": true, "save_steps": "0,20,40,60,80" diff --git a/tests/pytorch/test_json_configs/test_hook_weightsbiasgradients.json b/tests/pytorch/test_json_configs/test_hook_weightsbiasgradients.json index b2626fab9..4097b9257 100644 --- a/tests/pytorch/test_json_configs/test_hook_weightsbiasgradients.json +++ b/tests/pytorch/test_json_configs/test_hook_weightsbiasgradients.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "test_output/test_hook_save_weightsbiasgradients/jsonloading", + "LocalPath": "/tmp/test_output/test_hook_save_weightsbiasgradients/jsonloading", "HookParameters": { "save_all": false, "save_steps": "0,20,40,60,80" diff --git a/tests/pytorch/test_json_configs/test_modes.json b/tests/pytorch/test_json_configs/test_modes.json index bdd7d6a05..92cb49778 100644 --- a/tests/pytorch/test_json_configs/test_modes.json +++ b/tests/pytorch/test_json_configs/test_modes.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "test_output/test_hook_modes/jsonloading", + "LocalPath": "/tmp/test_output/test_hook_modes/jsonloading", "HookParameters": { "save_all": true, "train.save_steps": "0,1,2,3,4" From 1b27b9c5488665d3e01d3354aa8262ab7356d8c7 Mon Sep 17 00:00:00 2001 From: Jared Nielsen Date: Tue, 26 Nov 2019 12:22:35 -0800 Subject: [PATCH 3/5] Match dirs --- tests/mxnet/test_hook.py | 6 +++--- tests/mxnet/test_hook_all_zero.py | 4 ++-- tests/mxnet/test_hook_reduce_config.py | 4 ++-- tests/mxnet/test_hook_save_all.py | 2 +- tests/mxnet/test_json_configs/test_hook_all_zero_hook.json | 2 +- .../mxnet/test_json_configs/test_hook_from_json_config.json | 2 +- .../test_json_configs/test_hook_from_json_config_full.json | 2 +- .../test_json_configs/test_hook_reduce_config_hook.json | 2 +- tests/mxnet/test_json_configs/test_hook_save_all_hook.json | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/mxnet/test_hook.py b/tests/mxnet/test_hook.py index 75f769487..478177ce1 100644 --- a/tests/mxnet/test_hook.py +++ b/tests/mxnet/test_hook.py @@ -16,7 +16,7 @@ def test_hook(): save_config = SaveConfig(save_steps=[0, 1, 2, 3]) run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir = "/tmp/" + run_id + out_dir = "/tmp/newlogsRunTest/" + run_id hook = t_hook(out_dir=out_dir, save_config=save_config) assert has_training_ended(out_dir) == False run_mnist_gluon_model( @@ -26,7 +26,7 @@ def test_hook(): def test_hook_from_json_config(): - out_dir = "/tmp/test_hook_from_json_config" + out_dir = "/tmp/newlogsRunTest1/test_hook_from_json_config" shutil.rmtree(out_dir, True) os.environ[ CONFIG_FILE_PATH_ENV_STR @@ -40,7 +40,7 @@ def test_hook_from_json_config(): def test_hook_from_json_config_full(): - out_dir = "/tmp/test_hook_from_json_config_full" + out_dir = "/tmp/newlogsRunTest2/test_hook_from_json_config_full" shutil.rmtree(out_dir, True) os.environ[ CONFIG_FILE_PATH_ENV_STR diff --git a/tests/mxnet/test_hook_all_zero.py b/tests/mxnet/test_hook_all_zero.py index 1a3bfdcfb..84a4487d1 100644 --- a/tests/mxnet/test_hook_all_zero.py +++ b/tests/mxnet/test_hook_all_zero.py @@ -20,7 +20,7 @@ def test_hook_all_zero(hook=None, out_dir=None): hook_created = True save_config = SaveConfig(save_steps=[0, 1, 2, 3]) run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir = "/tmp/" + run_id + out_dir = "/tmp/newlogsRunTest/" + run_id print("Registering the hook with out_dir {0}".format(out_dir)) hook = t_hook( out_dir=out_dir, @@ -52,7 +52,7 @@ def test_hook_all_zero_hook_from_json(): import shutil import os - out_dir = "/tmp/test_hook_all_zero_hook_from_json" + out_dir = "/tmp/newlogsRunTest2/test_hook_all_zero_hook_from_json" shutil.rmtree(out_dir, True) os.environ[ CONFIG_FILE_PATH_ENV_STR diff --git a/tests/mxnet/test_hook_reduce_config.py b/tests/mxnet/test_hook_reduce_config.py index 559f2e2e6..26e9cacdb 100644 --- a/tests/mxnet/test_hook_reduce_config.py +++ b/tests/mxnet/test_hook_reduce_config.py @@ -19,7 +19,7 @@ def test_save_config(hook=None, out_dir=None): global_save_config = SaveConfig(save_steps=[0, 1, 2, 3]) run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir = "/tmp/" + run_id + out_dir = "/tmp/newlogsRunTest/" + run_id print("Registering the hook with out_dir {0}".format(out_dir)) hook = t_hook( out_dir=out_dir, @@ -86,7 +86,7 @@ def test_save_config_hook_from_json(): from smdebug.core.json_config import CONFIG_FILE_PATH_ENV_STR import os - out_dir = "/tmp/test_hook_reduce_config_hook_from_json" + out_dir = "/tmp/newlogsRunTest2/test_hook_reduce_config_hook_from_json" shutil.rmtree(out_dir, True) os.environ[ CONFIG_FILE_PATH_ENV_STR diff --git a/tests/mxnet/test_hook_save_all.py b/tests/mxnet/test_hook_save_all.py index 8847cd621..80110e35a 100644 --- a/tests/mxnet/test_hook_save_all.py +++ b/tests/mxnet/test_hook_save_all.py @@ -39,7 +39,7 @@ def test_save_all_hook_from_json(): from smdebug.core.json_config import CONFIG_FILE_PATH_ENV_STR import os - out_dir = "/tmp/test_hook_save_all_hook_from_json" + out_dir = "/tmp/newlogsRunTest2/test_hook_save_all_hook_from_json" shutil.rmtree(out_dir, True) os.environ[ CONFIG_FILE_PATH_ENV_STR diff --git a/tests/mxnet/test_json_configs/test_hook_all_zero_hook.json b/tests/mxnet/test_json_configs/test_hook_all_zero_hook.json index 598d06d84..88085bf3c 100644 --- a/tests/mxnet/test_json_configs/test_hook_all_zero_hook.json +++ b/tests/mxnet/test_json_configs/test_hook_all_zero_hook.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "/tmp/test_hook_all_zero_hook_from_json", + "LocalPath": "/tmp/newlogsRunTest2/test_hook_all_zero_hook_from_json", "HookParameters": { "save_all": false, "save_steps": "0,1,2,3" diff --git a/tests/mxnet/test_json_configs/test_hook_from_json_config.json b/tests/mxnet/test_json_configs/test_hook_from_json_config.json index d821cd53b..bba58057c 100644 --- a/tests/mxnet/test_json_configs/test_hook_from_json_config.json +++ b/tests/mxnet/test_json_configs/test_hook_from_json_config.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "/tmp/test_hook_from_json_config", + "LocalPath": "/tmp/newlogsRunTest1/test_hook_from_json_config", "HookParameters": { "save_steps": "0,1,2,3" } diff --git a/tests/mxnet/test_json_configs/test_hook_from_json_config_full.json b/tests/mxnet/test_json_configs/test_hook_from_json_config_full.json index 59ac78209..f7b6a5dca 100644 --- a/tests/mxnet/test_json_configs/test_hook_from_json_config_full.json +++ b/tests/mxnet/test_json_configs/test_hook_from_json_config_full.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "/tmp/test_hook_from_json_config_full", + "LocalPath": "/tmp/newlogsRunTest2/test_hook_from_json_config_full", "HookParameters": { "save_all": false, "include_regex": "regexe1,regex2", diff --git a/tests/mxnet/test_json_configs/test_hook_reduce_config_hook.json b/tests/mxnet/test_json_configs/test_hook_reduce_config_hook.json index 35f1fcff6..b9ef39345 100644 --- a/tests/mxnet/test_json_configs/test_hook_reduce_config_hook.json +++ b/tests/mxnet/test_json_configs/test_hook_reduce_config_hook.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "/tmp/test_hook_reduce_config_hook_from_json", + "LocalPath": "/tmp/newlogsRunTest2/test_hook_reduce_config_hook_from_json", "HookParameters": { "save_all": false, "reductions": "max,mean", diff --git a/tests/mxnet/test_json_configs/test_hook_save_all_hook.json b/tests/mxnet/test_json_configs/test_hook_save_all_hook.json index a2b6e4851..d1ccce2a3 100644 --- a/tests/mxnet/test_json_configs/test_hook_save_all_hook.json +++ b/tests/mxnet/test_json_configs/test_hook_save_all_hook.json @@ -1,6 +1,6 @@ { "S3Path": "s3://kjndjknd_bucket/prefix", - "LocalPath": "/tmp/test_hook_save_all_hook_from_json", + "LocalPath": "/tmp/newlogsRunTest2/test_hook_save_all_hook_from_json", "HookParameters": { "save_all": true, "save_steps": "0,1,2,3" From 3f134585e44c987e5eba9d08a1b49d1c551141d6 Mon Sep 17 00:00:00 2001 From: Jared Nielsen Date: Tue, 26 Nov 2019 13:16:36 -0800 Subject: [PATCH 4/5] Fix tests --- tests/mxnet/test_json_configs/checkpointconfig.json | 2 +- tests/mxnet/test_training_end.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/mxnet/test_json_configs/checkpointconfig.json b/tests/mxnet/test_json_configs/checkpointconfig.json index 5d5d7ac86..fc3fc8444 100644 --- a/tests/mxnet/test_json_configs/checkpointconfig.json +++ b/tests/mxnet/test_json_configs/checkpointconfig.json @@ -1,3 +1,3 @@ { - "LocalPath" : "./savedParams" + "LocalPath" : "/tmp/savedParams" } diff --git a/tests/mxnet/test_training_end.py b/tests/mxnet/test_training_end.py index 6856d9756..0483c3e31 100644 --- a/tests/mxnet/test_training_end.py +++ b/tests/mxnet/test_training_end.py @@ -15,7 +15,7 @@ @pytest.mark.slow # 0:03 to run def test_end_local_training(): run_id = str(uuid.uuid4()) - out_dir = "/tmp/" + run_id + out_dir = "/tmp/newlogsRunTest/" + run_id assert has_training_ended(out_dir) == False subprocess.check_call( [ @@ -35,7 +35,7 @@ def test_end_local_training(): def test_end_s3_training(): run_id = str(uuid.uuid4()) bucket = "tornasolecodebuildtest" - key = "/tmp/" + run_id + key = "newlogsRunTest/" + run_id out_dir = bucket + "/" + key assert has_training_ended(out_dir) == False subprocess.check_call( From cb94e8d463a3660b3db9f717f434702ea3986bbb Mon Sep 17 00:00:00 2001 From: Jared Nielsen Date: Tue, 26 Nov 2019 14:32:08 -0800 Subject: [PATCH 5/5] ignore errors on directory remove --- tests/mxnet/test_spot_training.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/mxnet/test_spot_training.py b/tests/mxnet/test_spot_training.py index 7e29debeb..6a01fede2 100644 --- a/tests/mxnet/test_spot_training.py +++ b/tests/mxnet/test_spot_training.py @@ -154,7 +154,7 @@ def test_spot_hook(): """ run_id_1 = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir_1 = "/tmp/" + run_id_1 + out_dir_1 = "/tmp/newlogsRunTest/" + run_id_1 hook = t_hook( out_dir=out_dir_1, save_config=save_config, include_collections=["weights", "gradients"] ) @@ -174,7 +174,7 @@ def test_spot_hook(): We expect to read steps 40, 50, 60, 70 and 80 """ run_id_2 = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f") - out_dir_2 = "/tmp/" + run_id_2 + out_dir_2 = "/tmp/newlogsRunTest/" + run_id_2 hook = t_hook( out_dir=out_dir_2, save_config=save_config, include_collections=["weights", "gradients"] ) @@ -216,4 +216,4 @@ def test_spot_hook(): print("Cleaning up.") shutil.rmtree(os.path.dirname(out_dir_1)) - shutil.rmtree(checkpoint_path) + shutil.rmtree(checkpoint_path, ignore_errors=True)