Skip to content
This repository has been archived by the owner on Jun 22, 2022. It is now read-only.

[whales, task 5] error after 149 epochs: FileNotFoundError(2, 'No such file or directory')) #84

Open
rafajak opened this issue Apr 16, 2018 · 0 comments

Comments

@rafajak
Copy link
Contributor

rafajak commented Apr 16, 2018

neptune run -- submit --problem whales --task_nr 5

raises an error after the 149th epoch:

2018-04-15 11-05-41 minerva >>> epoch 149 batch 112 accuracy: 0.19444

262538.163211 | File "/mnt/ml-team/homes/usr/envs/minerva_venv/lib/python3.5/site-packages/deepsense/neptune/job_wrapper.py", line 103, in execute
-- | --
262538.163383 | execfile(job_filepath, job_globals)
262538.163556 | File "/mnt/ml-team/homes/usr/envs/minerva_venv/lib/python3.5/site-packages/past/builtins/misc.py", line 82, in execfile
262538.163723 | exec_(code, myglobals, mylocals)
262538.163892 | File "main.py", line 66, in <module>
262538.164061 | action()
262538.16423 | File "/mnt/ml-team/homes/usr/envs/minerva_venv/lib/python3.5/site-packages/click/core.py", line 722, in __call__
262538.164399 | return self.main(*args, **kwargs)
262538.164568 | File "/mnt/ml-team/homes/usr/envs/minerva_venv/lib/python3.5/site-packages/click/core.py", line 697, in main
262538.164776 | rv = self.invoke(ctx)
262538.165005 | File "/mnt/ml-team/homes/usr/envs/minerva_venv/lib/python3.5/site-packages/click/core.py", line 1066, in invoke
262538.165232 | return _process_result(sub_ctx.command.invoke(sub_ctx))
262538.16546 | File "/mnt/ml-team/homes/usr/envs/minerva_venv/lib/python3.5/site-packages/click/core.py", line 895, in invoke
262538.165716 | return ctx.invoke(self.callback, **ctx.params)
262538.165954 | File "/mnt/ml-team/homes/usr/envs/minerva_venv/lib/python3.5/site-packages/click/core.py", line 535, in invoke
262538.166181 | return callback(*args, **kwargs)
262538.166413 | File "main.py", line 61, in submit
262538.166645 | pm.submit_task(task_sub_problem, task_nr, file_path, dev_mode)
262538.166854 | File "/mnt/ml-team/homes/usr/minerva/minerva/whales/problem_manager.py", line 44, in submit_task
262538.167046 | new_trainer.train()
262538.167237 | File "/mnt/ml-team/homes/usr/minerva/minerva/whales/trainer.py", line 48, in train
262538.167427 | 'train_mode': True,
262538.167617 | File "/mnt/ml-team/homes/usr/minerva/minerva/backend/base.py", line 75, in fit_transform
262538.167806 | step_inputs[input_step.name] = input_step.fit_transform(data)
262538.167994 | File "/mnt/ml-team/homes/usr/minerva/minerva/backend/base.py", line 75, in fit_transform
262538.168179 | step_inputs[input_step.name] = input_step.fit_transform(data)
262538.168367 | File "/mnt/ml-team/homes/usr/minerva/minerva/backend/base.py", line 81, in fit_transform
262538.168556 | step_output_data = self._cached_fit_transform(step_inputs)
262538.168743 | File "/mnt/ml-team/homes/usr/minerva/minerva/backend/base.py", line 91, in _cached_fit_transform
262538.168971 | step_output_data = self.transformer.fit_transform(**step_inputs)
262538.169201 | File "/mnt/ml-team/homes/usr/minerva/minerva/backend/base.py", line 218, in fit_transform
262538.169429 | self.fit(*args, **kwargs)
262538.169659 | File "/mnt/ml-team/homes/usr/minerva/minerva/backend/models/pytorch/models.py", line 61, in fit
262538.169913 | self.callbacks.on_epoch_end()
262538.170143 | File "/mnt/ml-team/homes/usr/minerva/minerva/backend/models/pytorch/callbacks.py", line 87, in on_epoch_end
262538.17037 | callback.on_epoch_end(*args, **kwargs)
262538.170591 | File "/mnt/ml-team/homes/usr/minerva/minerva/backend/models/pytorch/callbacks.py", line 320, in on_epoch_end
262538.170778 | save_model(self.model, full_path)
262538.170972 | File "/mnt/ml-team/homes/usr/minerva/minerva/backend/models/pytorch/utils.py", line 68, in save_model
262538.171159 | torch.save(model.state_dict(), path)
262538.17134 | File "/mnt/ml-team/homes/usr/envs/minerva_venv/lib/python3.5/site-packages/torch/serialization.py", line 135, in save
262538.384228 | return _with_file_like(f, "wb", lambda f: _save(obj, f, pickle_module, pickle_protocol))
262538.384434 | File "/mnt/ml-team/homes/usr/envs/minerva_venv/lib/python3.5/site-packages/torch/serialization.py", line 115, in _with_file_like
262538.384553 | f = open(f, mode)
262538.384664 | FileNotFoundError: [Errno 2] No such file or directory: 'resources/whales/solution/localization/submit_solution/checkpoints/localizer_network/model_epoch149.torch'
262538.384773 |  
262538.384882 | During handling of the above exception, another exception occurred:
262538.384988 |  
262538.385094 | Traceback (most recent call last):
262538.385199 | File "/mnt/ml-team/homes/usr/envs/minerva_venv/lib/python3.5/site-packages/deepsense/neptune/job_wrapper.py", line 109, in <module>
262538.385304 | execute()
262538.385408 | File "/mnt/ml-team/homes/usr/envs/minerva_venv/lib/python3.5/site-packages/deepsense/neptune/job_wrapper.py", line 105, in execute
262538.385511 | raise ExperimentExecutionException("Exception during experiment execution", ex)
262538.385615 | deepsense.neptune.exceptions.ExperimentExecutionException: ('Exception during experiment execution', FileNotFoundError(2, 'No such file or directory'))


Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant