From 8dc8a8bfd31a94478c322b19c3e64f870503ff32 Mon Sep 17 00:00:00 2001 From: Jakub Date: Tue, 14 Jan 2020 04:20:01 +0100 Subject: [PATCH] Neptune integration (#648) * added neptune integration * added tests for NeptuneLogger, added neptune to docs * updated link to neptune support * fixed docstrings, fixed try/except in tests, changed append_tags input * fixed docstrings line lenght * bumped epoch nr in model restore tests * added tags support for single strings * fixed passing neptune token to backend * fixed project name in offline mode * added save_top_k=-1 to checkpoint callback * reformated initialization of neptune in online mode * bumped epoch nr to 4 in test_load_model_from_checkpoint * bumped epoch nr to 5 Co-authored-by: William Falcon --- README.md | 1 + docs/source/conf.py | 10 +- pytorch_lightning/logging/__init__.py | 5 + pytorch_lightning/logging/neptune.py | 242 ++++++++++++++++++++++++++ tests/requirements.txt | 1 + tests/test_logging.py | 46 +++++ tests/test_restore_models.py | 7 +- 7 files changed, 299 insertions(+), 13 deletions(-) create mode 100644 pytorch_lightning/logging/neptune.py diff --git a/README.md b/README.md index 4be0bfad5634b..010a1de8b4554 100644 --- a/README.md +++ b/README.md @@ -306,6 +306,7 @@ Lightning also adds a text column with all the hyperparameters for this experime - [Save a snapshot of all hyperparameters](https://williamfalcon.github.io/pytorch-lightning/Trainer/Logging/#save-a-snapshot-of-all-hyperparameters) - [Snapshot code for a training run](https://williamfalcon.github.io/pytorch-lightning/Trainer/Logging/#snapshot-code-for-a-training-run) - [Write logs file to csv every k batches](https://williamfalcon.github.io/pytorch-lightning/Trainer/Logging/#write-logs-file-to-csv-every-k-batches) +- [Logging experiment data to Neptune](https://williamfalcon.github.io/pytorch-lightning/Trainer/Logging/#neptune-support) #### Training loop diff --git a/docs/source/conf.py b/docs/source/conf.py index 5f6df6447a4ea..0c465853a1554 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -62,7 +62,6 @@ # The full version, including alpha/beta/rc tags release = pytorch_lightning.__version__ - # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. @@ -128,7 +127,6 @@ # The name of the Pygments (syntax highlighting) style to use. pygments_style = None - # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for @@ -174,7 +172,6 @@ # Output file base name for HTML help builder. htmlhelp_basename = project + '-doc' - # -- Options for LaTeX output ------------------------------------------------ latex_elements = { @@ -198,7 +195,6 @@ (master_doc, project + '.tex', project + ' Documentation', author, 'manual'), ] - # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples @@ -207,7 +203,6 @@ (master_doc, project, project + ' Documentation', [author], 1) ] - # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples @@ -218,7 +213,6 @@ 'One line description of project.', 'Miscellaneous'), ] - # -- Options for Epub output ------------------------------------------------- # Bibliographic Dublin Core info. @@ -236,7 +230,6 @@ # A list of files that should not be packed into the epub file. epub_exclude_files = ['search.html'] - # -- Extension configuration ------------------------------------------------- # -- Options for intersphinx extension --------------------------------------- @@ -249,7 +242,6 @@ # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True - # https://github.com/rtfd/readthedocs.org/issues/1139 # I use sphinx-apidoc to auto-generate API documentation for my project. # Right now I have to commit these auto-generated files to my repository @@ -302,7 +294,7 @@ def setup(app): MOCK_REQUIRE_PACKAGES.append(pkg.rstrip()) # TODO: better parse from package since the import name and package name may differ -MOCK_MANUAL_PACKAGES = ['torch', 'torchvision', 'sklearn', 'test_tube', 'mlflow', 'comet_ml'] +MOCK_MANUAL_PACKAGES = ['torch', 'torchvision', 'sklearn', 'test_tube', 'mlflow', 'comet_ml', 'neptune'] autodoc_mock_imports = MOCK_REQUIRE_PACKAGES + MOCK_MANUAL_PACKAGES # for mod_name in MOCK_REQUIRE_PACKAGES: # sys.modules[mod_name] = mock.Mock() diff --git a/pytorch_lightning/logging/__init__.py b/pytorch_lightning/logging/__init__.py index fca3be61df62f..350968487f370 100644 --- a/pytorch_lightning/logging/__init__.py +++ b/pytorch_lightning/logging/__init__.py @@ -187,3 +187,8 @@ def __init__(self, hparams): from .comet import CometLogger except ImportError: del environ["COMET_DISABLE_AUTO_LOGGING"] + +try: + from .neptune import NeptuneLogger +except ImportError: + pass diff --git a/pytorch_lightning/logging/neptune.py b/pytorch_lightning/logging/neptune.py new file mode 100644 index 0000000000000..7ce9f1f1f1e5a --- /dev/null +++ b/pytorch_lightning/logging/neptune.py @@ -0,0 +1,242 @@ +""" +Log using `neptune `_ + +Neptune logger can be used in the online mode or offline (silent) mode. +To log experiment data in online mode, NeptuneLogger requries an API key: + +.. code-block:: python + + from pytorch_lightning.logging import NeptuneLogger + # arguments made to NeptuneLogger are passed on to the neptune.experiments.Experiment class + + neptune_logger = NeptuneLogger( + api_key=os.environ["NEPTUNE_API_TOKEN"], + project_name="USER_NAME/PROJECT_NAME", + experiment_name="default", # Optional, + params={"max_epochs": 10}, # Optional, + tags=["pytorch-lightning","mlp"] # Optional, + ) + trainer = Trainer(max_epochs=10, logger=neptune_logger) + +Use the logger anywhere in you LightningModule as follows: + +.. code-block:: python + + def train_step(...): + # example + self.logger.experiment.log_metric("acc_train", acc_train) # log metrics + self.logger.experiment.log_image("worse_predictions", prediction_image) # log images + self.logger.experiment.log_artifact("model_checkpoint.pt", prediction_image) # log model checkpoint + self.logger.experiment.whatever_neptune_supports(...) + + def any_lightning_module_function_or_hook(...): + self.logger.experiment.log_metric("acc_train", acc_train) # log metrics + self.logger.experiment.log_image("worse_predictions", prediction_image) # log images + self.logger.experiment.log_artifact("model_checkpoint.pt", prediction_image) # log model checkpoint + self.logger.experiment.whatever_neptune_supports(...) + + +""" + +from logging import getLogger + +try: + import neptune +except ImportError: + raise ImportError('Missing neptune package. Run `pip install neptune-client`') + +from torch import is_tensor + +# from .base import LightningLoggerBase, rank_zero_only +from pytorch_lightning.logging.base import LightningLoggerBase, rank_zero_only + +logger = getLogger(__name__) + + +class NeptuneLogger(LightningLoggerBase): + def __init__(self, api_key=None, project_name=None, offline_mode=False, + experiment_name=None, upload_source_files=None, + params=None, properties=None, tags=None, **kwargs): + """Initialize a neptune.ml logger. + Requires either an API Key (online mode) or a local directory path (offline mode) + + :param str|None api_key: Required in online mode. Neputne API token, found on https://neptune.ml. + Read how to get your API key https://docs.neptune.ml/python-api/tutorials/get-started.html#copy-api-token. + :param str project_name: Required in online mode. Qualified name of a project in a form of + "namespace/project_name" for example "tom/minst-classification". + If None, the value of NEPTUNE_PROJECT environment variable will be taken. + You need to create the project in https://neptune.ml first. + :param bool offline_mode: Optional default False. If offline_mode=True no logs will be send to neptune. + Usually used for debug purposes. + :param str|None experiment_name: Optional. Editable name of the experiment. + Name is displayed in the experiment’s Details (Metadata section) and in experiments view as a column. + :param list|None upload_source_files: Optional. List of source files to be uploaded. + Must be list of str or single str. Uploaded sources are displayed in the experiment’s Source code tab. + If None is passed, Python file from which experiment was created will be uploaded. + Pass empty list ([]) to upload no files. Unix style pathname pattern expansion is supported. + For example, you can pass '*.py' to upload all python source files from the current directory. + For recursion lookup use '**/*.py' (for Python 3.5 and later). For more information see glob library. + :param dict|None params: Optional. Parameters of the experiment. After experiment creation params are read-only. + Parameters are displayed in the experiment’s Parameters section and each key-value pair can be + viewed in experiments view as a column. + :param dict|None properties: Optional default is {}. Properties of the experiment. + They are editable after experiment is created. Properties are displayed in the experiment’s Details and + each key-value pair can be viewed in experiments view as a column. + :param list|None tags: Optional default []. Must be list of str. Tags of the experiment. + They are editable after experiment is created (see: append_tag() and remove_tag()). + Tags are displayed in the experiment’s Details and can be viewed in experiments view as a column. + """ + super().__init__() + self.api_key = api_key + self.project_name = project_name + self.offline_mode = offline_mode + self.experiment_name = experiment_name + self.upload_source_files = upload_source_files + self.params = params + self.properties = properties + self.tags = tags + self._experiment = None + self._kwargs = kwargs + + if offline_mode: + self.mode = "offline" + neptune.init(project_qualified_name='dry-run/project', + backend=neptune.OfflineBackend()) + else: + self.mode = "online" + neptune.init(api_token=self.api_key, + project_qualified_name=self.project_name) + + logger.info(f"NeptuneLogger was initialized in {self.mode} mode") + + @property + def experiment(self): + if self._experiment is not None: + return self._experiment + else: + self._experiment = neptune.create_experiment(name=self.experiment_name, + params=self.params, + properties=self.properties, + tags=self.tags, + upload_source_files=self.upload_source_files, + **self._kwargs) + return self._experiment + + @rank_zero_only + def log_hyperparams(self, params): + for key, val in vars(params).items(): + self.experiment.set_property(f"param__{key}", val) + + @rank_zero_only + def log_metrics(self, metrics, step=None): + """Log metrics (numeric values) in Neptune experiments + + :param float metric: Dictionary with metric names as keys and measured quanties as values + :param int|None step: Step number at which the metrics should be recorded, must be strictly increasing + + """ + + for key, val in metrics.items(): + if is_tensor(val): + val = val.cpu().detach() + + if step is None: + self.experiment.log_metric(key, val) + else: + self.experiment.log_metric(key, x=step, y=val) + + @rank_zero_only + def finalize(self, status): + self.experiment.stop() + + @property + def name(self): + if self.mode == "offline": + return "offline-name" + else: + return self.experiment.name + + @property + def version(self): + if self.mode == "offline": + return "offline-id-1234" + else: + return self.experiment.id + + @rank_zero_only + def log_metric(self, metric_name, metric_value, step=None): + """Log metrics (numeric values) in Neptune experiments + + :param str metric_name: The name of log, i.e. mse, loss, accuracy. + :param str metric_value: The value of the log (data-point). + :param int|None step: Step number at which the metrics should be recorded, must be strictly increasing + + """ + if step is None: + self.experiment.log_metric(metric_name, metric_value) + else: + self.experiment.log_metric(metric_name, x=step, y=metric_value) + + @rank_zero_only + def log_text(self, log_name, text, step=None): + """Log text data in Neptune experiment + + :param str log_name: The name of log, i.e. mse, my_text_data, timing_info. + :param str text: The value of the log (data-point). + :param int|None step: Step number at which the metrics should be recorded, must be strictly increasing + + """ + if step is None: + self.experiment.log_metric(log_name, text) + else: + self.experiment.log_metric(log_name, x=step, y=text) + + @rank_zero_only + def log_image(self, log_name, image, step=None): + """Log image data in Neptune experiment + + :param str log_name: The name of log, i.e. bboxes, visualisations, sample_images. + :param str|PIL.Image|matplotlib.figure.Figure image: The value of the log (data-point). + Can be one of the following types: PIL image, matplotlib.figure.Figure, path to image file (str) + :param int|None step: Step number at which the metrics should be recorded, must be strictly increasing + + """ + if step is None: + self.experiment.log_image(log_name, image) + else: + self.experiment.log_image(log_name, x=step, y=image) + + @rank_zero_only + def log_artifact(self, artifact, destination=None): + """Save an artifact (file) in Neptune experiment storage. + + :param str artifact: A path to the file in local filesystem. + :param str|None destination: Optional default None. + A destination path. If None is passed, an artifact file name will be used. + + """ + self.experiment.log_artifact(artifact, destination) + + @rank_zero_only + def set_property(self, key, value): + """Set key-value pair as Neptune experiment property. + + :param str key: Property key. + :param obj value: New value of a property. + + """ + self.experiment.set_property(key, value) + + @rank_zero_only + def append_tags(self, tags): + """appends tags to neptune experiment + + :param str|tuple|list(str) tags: Tags to add to the current experiment. + If str is passed, singe tag is added. + If multiple - comma separated - str are passed, all of them are added as tags. + If list of str is passed, all elements of the list are added as tags. + + """ + if not isinstance(tags, (list, set, tuple)): + tags = [tags] # make it as an iterable is if it is not yet + self.experiment.append_tags(*tags) diff --git a/tests/requirements.txt b/tests/requirements.txt index bc3fcc8f142f7..39a27441fe306 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -8,5 +8,6 @@ check-manifest # test_tube # already installed in main req. mlflow comet_ml +neptune-client twine==1.13.0 pillow<7.0.0 \ No newline at end of file diff --git a/tests/test_logging.py b/tests/test_logging.py index f5215824ce24b..133e9a16daac9 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -193,6 +193,52 @@ def test_comet_pickle(tmpdir, monkeypatch): trainer2.logger.log_metrics({"acc": 1.0}) +def test_neptune_logger(tmpdir): + """Verify that basic functionality of neptune logger works.""" + tutils.reset_seed() + + from pytorch_lightning.logging import NeptuneLogger + + hparams = tutils.get_hparams() + model = LightningTestModel(hparams) + + logger = NeptuneLogger(offline_mode=True) + + trainer_options = dict( + default_save_path=tmpdir, + max_epochs=1, + train_percent_check=0.01, + logger=logger + ) + trainer = Trainer(**trainer_options) + result = trainer.fit(model) + + print('result finished') + assert result == 1, "Training failed" + + +def test_neptune_pickle(tmpdir): + """Verify that pickling trainer with neptune logger works.""" + tutils.reset_seed() + + from pytorch_lightning.logging import NeptuneLogger + + # hparams = tutils.get_hparams() + # model = LightningTestModel(hparams) + + logger = NeptuneLogger(offline_mode=True) + trainer_options = dict( + default_save_path=tmpdir, + max_epochs=1, + logger=logger + ) + + trainer = Trainer(**trainer_options) + pkl_bytes = pickle.dumps(trainer) + trainer2 = pickle.loads(pkl_bytes) + trainer2.logger.log_metrics({"acc": 1.0}) + + def test_tensorboard_logger(tmpdir): """Verify that basic functionality of Tensorboard logger works.""" diff --git a/tests/test_restore_models.py b/tests/test_restore_models.py index fe6f3adf77503..48cf94c98924d 100644 --- a/tests/test_restore_models.py +++ b/tests/test_restore_models.py @@ -106,10 +106,10 @@ def test_load_model_from_checkpoint(tmpdir): trainer_options = dict( show_progress_bar=False, - max_epochs=1, + max_epochs=5, train_percent_check=0.4, val_percent_check=0.2, - checkpoint_callback=True, + checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1), logger=False, default_save_path=tmpdir, ) @@ -121,7 +121,7 @@ def test_load_model_from_checkpoint(tmpdir): # correct result and ok accuracy assert result == 1, 'training failed to complete' pretrained_model = LightningTestModel.load_from_checkpoint( - os.path.join(trainer.checkpoint_callback.filepath, "_ckpt_epoch_0.ckpt") + os.path.join(trainer.checkpoint_callback.filepath, "_ckpt_epoch_4.ckpt") ) # test that hparams loaded correctly @@ -369,6 +369,5 @@ def test_model_saving_loading(tmpdir): new_pred = model_2(x) assert torch.all(torch.eq(pred_before_saving, new_pred)).item() == 1 - # if __name__ == '__main__': # pytest.main([__file__])