FIX Galileo-Galilei#66 Galileo-Galilei#64 Galileo-Galilei#54 Galileo-…

…Galilei#30 Galileo-Galilei#31 Galileo-Galilei#72 Galileo-Galilei#29 Galileo-Galilei#62 - context access - kedro 16.5 - hook auto registration
takikadiri · Oct 11, 2020 · d691b96 · d691b96
1 parent 4c0b6a7
commit d691b96
Show file tree

Hide file tree

Showing 17 changed files with 303 additions and 281 deletions.
diff --git a/.isort.cfg b/.isort.cfg
@@ -7,4 +7,4 @@ line_length=88
 ensure_newline_before_comments=True
 sections=FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER
 known_first_party=kedro_mlflow
-known_third_party=black,click,cookiecutter,flake8,isort,jinja2,kedro,mlflow,pandas,pytest,pytest_lazyfixture,setuptools,yaml
+known_third_party=click,cookiecutter,jinja2,kedro,mlflow,pandas,pytest,pytest_lazyfixture,setuptools,yaml
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,12 +4,15 @@
 
 ### Added
 
+- kedro-mlflow hooks are now auto-registred (#29)
 - Add dataset ``MlflowMetricsDataSet`` for metrics logging ([#9](https://github.com/Galileo-Galilei/kedro-mlflow/issues/9)) and update documentation for metrics.
 - Add CI workflow `create-release` to ensure release consistency and up-to-date CHANGELOG. ([#57](https://github.com/Galileo-Galilei/kedro-mlflow/issues/57), [#68](https://github.com/Galileo-Galilei/kedro-mlflow/pull/68))
 - Add templates for issues and pull requests ([#57](https://github.com/Galileo-Galilei/kedro-mlflow/issues/57), [#68](https://github.com/Galileo-Galilei/kedro-mlflow/pull/68))
 
 ### Fixed
 
+- `get_mlflow_config` now uses the kedro project config_loader to get configs (#66 #64 #54 #30 #31 #72)
+- Fix the tests that are broken during the 0.16.5 update (#62)
 - Versioned datasets artifacts logging are handled correctly ([#41](https://github.com/Galileo-Galilei/kedro-mlflow/issues/41))
 - MlflowDataSet handles correctly datasets which are inherited from AbstractDataSet ([#45](https://github.com/Galileo-Galilei/kedro-mlflow/issues/45))
 - Change the test in `_generate_kedro_command` to accept both empty `Iterable`s(default in CLI mode) and `None` values (default in interactive mode) ([#50](https://github.com/Galileo-Galilei/kedro-mlflow/issues/50))
@@ -21,6 +24,9 @@
 
 ### Changed
 
+- Bump kedro requirement to <=0.16.5
+- `kedro mlflow init` is no longer touching the run.py file
+- `MlflowNodeHook` now have `before_pipeline_run` hook so it can access to project run parameters
 - Remove `conda_env` and `model_name` arguments from `MlflowPipelineHook` and add them to `PipelineML` and `pipeline_ml`. This is necessary for incoming hook auto-discovery in future release and it enables having multiple `PipelineML` in the same project ([#58](https://github.com/Galileo-Galilei/kedro-mlflow/pull/58)). This mechanically fixes [#54](https://github.com/Galileo-Galilei/kedro-mlflow/issues/54) by making `conda_env` path absolute for airflow suppport.
 - `flatten_dict_params`, `recursive` and `sep` arguments of the `MlflowNodeHook` are moved to the `mlflow.yml` config file to prepare plugin auto registration. This also modifies the `run.py` template (to remove the args) and the `mlflow.yml` keys to add a `hooks` entry. ([#59](https://github.com/Galileo-Galilei/kedro-mlflow/pull/59))
 - Rename CI workflow to `test` ([#57](https://github.com/Galileo-Galilei/kedro-mlflow/issues/57), [#68](https://github.com/Galileo-Galilei/kedro-mlflow/pull/68))

diff --git a/kedro_mlflow/framework/cli/cli.py b/kedro_mlflow/framework/cli/cli.py
@@ -1,16 +1,12 @@
-import os
 import subprocess
 from pathlib import Path
 
 import click
-from kedro import __file__ as KEDRO_PATH
+from kedro.framework.context import get_static_project_data, load_context
 
-from kedro_mlflow.framework.cli.cli_utils import (
-    render_jinja_template,
-    write_jinja_template,
-)
+from kedro_mlflow.framework.cli.cli_utils import write_jinja_template
 from kedro_mlflow.framework.context import get_mlflow_config
-from kedro_mlflow.utils import _already_updated, _get_project_globals, _is_kedro_project
+from kedro_mlflow.utils import _already_updated, _is_kedro_project
 
 TEMPLATE_FOLDER_PATH = Path(__file__).parent.parent.parent / "template" / "project"
 
@@ -87,80 +83,86 @@ def init(force, silent):
 
     # get constants
     project_path = Path().cwd()
-    project_globals = _get_project_globals()
+    context = load_context(project_path)
+    # project_globals = _get_project_globals()
+    project_globals = get_static_project_data(project_path)
 
     # mlflow.yml is just a static file,
     # but the name of the experiment is set to be the same as the project
+    conf_root = context.CONF_ROOT
     mlflow_yml = "mlflow.yml"
     write_jinja_template(
         src=TEMPLATE_FOLDER_PATH / mlflow_yml,
         is_cookiecutter=False,
-        dst=project_path / "conf" / "base" / mlflow_yml,
-        python_package=project_globals["python_package"],
+        dst=project_path / conf_root / "base" / mlflow_yml,
+        python_package=project_globals["package_name"],
     )
     if not silent:
         click.secho(
-            click.style("'conf/base/mlflow.yml' successfully updated.", fg="green")
+            click.style(
+                f"'{conf_root}/base/mlflow.yml' successfully updated.", fg="green"
+            )
         )
     # make a check whether the project run.py is strictly identical to the template
     # if yes, replace the script by the template silently
     # if no, raise a warning and send a message to INSERT_DOC_URL
-    flag_erase_runpy = force
-    runpy_project_path = (
-        project_path
-        / "src"
-        / (Path(project_globals["context_path"]).parent.as_posix() + ".py")
-    )
-    if not force:
-        kedro_path = Path(KEDRO_PATH).parent
-        runpy_template_path = (
-            kedro_path
-            / "templates"
-            / "project"
-            / "{{ cookiecutter.repo_name }}"
-            / "src"
-            / "{{ cookiecutter.python_package }}"
-            / "run.py"
-        )
-        kedro_runpy_template = render_jinja_template(
-            src=runpy_template_path,
-            is_cookiecutter=True,
-            python_package=project_globals["python_package"],
-            project_name=project_globals["project_name"],
-            kedro_version=project_globals["kedro_version"],
-        )
-
-        with open(runpy_project_path, mode="r") as file_handler:
-            kedro_runpy_project = file_handler.read()
-
-        # beware : black formatting could change slightly this test which is very strict
-        if kedro_runpy_project == kedro_runpy_template:
-            flag_erase_runpy = True
-
-    if flag_erase_runpy:
-        os.remove(runpy_project_path)
-        write_jinja_template(
-            src=TEMPLATE_FOLDER_PATH / "run.py",
-            dst=runpy_project_path,
-            is_cookiecutter=True,
-            python_package=project_globals["python_package"],
-            project_name=project_globals["project_name"],
-            kedro_version=project_globals["kedro_version"],
-        )
-        if not silent:
-            click.secho(click.style("'run.py' successfully updated", fg="green"))
-    else:
-        click.secho(
-            click.style(
-                "You have modified your 'run.py' since project creation.\n"
-                + "In order to use kedro-mlflow, you must either:\n"
-                + "    -  set up your run.py with the following instructions :\n"
-                + "INSERT_DOC_URL\n"
-                + "    - call the following command:\n"
-                + "$ kedro mlflow init --force",
-                fg="yellow",
-            )
-        )
+    # flag_erase_runpy = force
+    # runpy_project_path = (
+    #     project_path
+    #     / "src"
+    #     / project_globals["package_name"]
+    #     / "run.py"
+    # )
+    # if not force:
+    #     kedro_path = Path(KEDRO_PATH).parent
+    #     runpy_template_path = (
+    #         kedro_path
+    #         / "templates"
+    #         / "project"
+    #         / "{{ cookiecutter.repo_name }}"
+    #         / "src"
+    #         / "{{ cookiecutter.python_package }}"
+    #         / "run.py"
+    #     )
+    #     kedro_runpy_template = render_jinja_template(
+    #         src=runpy_template_path,
+    #         is_cookiecutter=True,
+    #         python_package=project_globals["package_name"],
+    #         project_name=project_globals["project_name"],
+    #         kedro_version=project_globals["project_version"],
+    #     )
+
+    #     with open(runpy_project_path, mode="r") as file_handler:
+    #         kedro_runpy_project = file_handler.read()
+
+    #     # beware : black formatting could change slightly this test which is very strict
+    #     if kedro_runpy_project == kedro_runpy_template:
+    #         flag_erase_runpy = True
+
+    # if flag_erase_runpy:
+    #     os.remove(runpy_project_path)
+    #     write_jinja_template(
+    #         src=TEMPLATE_FOLDER_PATH / "run.py",
+    #         dst=runpy_project_path,
+    #         is_cookiecutter=True,
+    #         python_package=project_globals["package_name"],
+    #         project_name=project_globals["project_name"],
+    #         kedro_version=project_globals["project_version"],
+    #     )
+    #     if not silent:
+    #         click.secho(click.style("'run.py' successfully updated", fg="green"))
+    # else:
+    #     click.secho(
+    #         click.style(
+    #             "You have modified your 'run.py' since project creation.\n"
+    #             + "In order to use kedro-mlflow, you must either:\n"
+    #             + "    -  set up your run.py with the following instructions :\n"
+    #             + "INSERT_DOC_URL\n"
+    #             + "    - call the following command:\n"
+    #             + "$ kedro mlflow init --force",
+    #             fg="yellow",
+    #         )
+    #     )
 
 
 @mlflow_commands.command()
@@ -185,7 +187,11 @@ def ui(project_path, env):
     """
 
     # the context must contains the self.mlflow attribues with mlflow configuration
-    mlflow_conf = get_mlflow_config(project_path=project_path, env=env)
+    if not project_path:
+        project_path = Path().cwd()
+
+    context = load_context(project_path=project_path, env=env)
+    mlflow_conf = get_mlflow_config(context)
 
     # call mlflow ui with specific options
     # TODO : add more options for ui

diff --git a/kedro_mlflow/framework/context/mlflow_context.py b/kedro_mlflow/framework/context/mlflow_context.py
@@ -1,23 +1,14 @@
-from pathlib import Path
-
-from kedro.config import ConfigLoader
+from kedro.framework.context import KedroContext
 
 from kedro_mlflow.framework.context.config import KedroMlflowConfig
 
 
 # this could be a read-only property in the context
 # with a @property decorator
 # but for consistency with hook system, it is an external function
-def get_mlflow_config(project_path=None, env="local"):
-    if project_path is None:
-        project_path = Path.cwd()
-    project_path = Path(project_path)
-    conf_paths = [
-        str(project_path / "conf" / "base"),
-        str(project_path / "conf" / env),
-    ]
-    config_loader = ConfigLoader(conf_paths=conf_paths)
-    conf_mlflow_yml = config_loader.get("mlflow*", "mlflow*/**")
-    conf_mlflow = KedroMlflowConfig(project_path=project_path)
+def get_mlflow_config(context: KedroContext):
+
+    conf_mlflow_yml = context.config_loader.get("mlflow*", "mlflow*/**")
+    conf_mlflow = KedroMlflowConfig(context.project_path)
     conf_mlflow.from_dict(conf_mlflow_yml)
     return conf_mlflow
diff --git a/kedro_mlflow/framework/hooks/node_hook.py b/kedro_mlflow/framework/hooks/node_hook.py
@@ -1,16 +1,55 @@
 from typing import Any, Dict
 
 import mlflow
+from kedro.framework.context import load_context
 from kedro.framework.hooks import hook_impl
 from kedro.io import DataCatalog
+from kedro.pipeline import Pipeline
 from kedro.pipeline.node import Node
 
 from kedro_mlflow.framework.context import get_mlflow_config
 
 
 class MlflowNodeHook:
     def __init__(self):
-        config = get_mlflow_config()
+        self.context = None
+        self.flatten = False
+        self.recursive = True
+        self.sep = "."
+
+    @hook_impl
+    def before_pipeline_run(
+        self, run_params: Dict[str, Any], pipeline: Pipeline, catalog: DataCatalog
+    ) -> None:
+        """Hook to be invoked before a pipeline runs.
+        Args:
+            run_params: The params needed for the given run.
+                Should be identical to the data logged by Journal.
+                # @fixme: this needs to be modelled explicitly as code, instead of comment
+                Schema: {
+                    "run_id": str,
+                    "project_path": str,
+                    "env": str,
+                    "kedro_version": str,
+                    "tags": Optional[List[str]],
+                    "from_nodes": Optional[List[str]],
+                    "to_nodes": Optional[List[str]],
+                    "node_names": Optional[List[str]],
+                    "from_inputs": Optional[List[str]],
+                    "load_versions": Optional[List[str]],
+                    "pipeline_name": str,
+                    "extra_params": Optional[Dict[str, Any]],
+                }
+            pipeline: The ``Pipeline`` that will be run.
+            catalog: The ``DataCatalog`` to be used during the run.
+        """
+
+        self.context = load_context(
+            project_path=run_params["project_path"],
+            env=run_params["env"],
+            extra_params=run_params["extra_params"],
+        )
+        config = get_mlflow_config(self.context)
         self.flatten = config.node_hook_opts["flatten_dict_params"]
         self.recursive = config.node_hook_opts["recursive"]
         self.sep = config.node_hook_opts["sep"]
@@ -51,6 +90,9 @@ def before_node_run(
         mlflow.log_params(params_inputs)
 
 
+mlflow_node_hooks = MlflowNodeHook()
+
+
 def flatten_dict(d, recursive: bool = True, sep="."):
     def expand(key, value):
         if isinstance(value, dict):

diff --git a/kedro_mlflow/framework/hooks/pipeline_hook.py b/kedro_mlflow/framework/hooks/pipeline_hook.py
@@ -4,6 +4,7 @@
 
 import mlflow
 import yaml
+from kedro.framework.context import load_context
 from kedro.framework.hooks import hook_impl
 from kedro.io import DataCatalog
 from kedro.pipeline import Pipeline
@@ -17,6 +18,9 @@
 
 
 class MlflowPipelineHook:
+    def __init__(self):
+        self.context = None
+
     @hook_impl
     def after_catalog_created(
         self,
@@ -62,9 +66,13 @@ def before_pipeline_run(
             pipeline: The ``Pipeline`` that will be run.
             catalog: The ``DataCatalog`` to be used during the run.
         """
-        mlflow_conf = get_mlflow_config(
-            project_path=run_params["project_path"], env=run_params["env"]
+        context = load_context(
+            project_path=run_params["project_path"],
+            env=run_params["env"],
+            extra_params=run_params["extra_params"],
         )
+
+        mlflow_conf = get_mlflow_config(context)
         mlflow.set_tracking_uri(mlflow_conf.mlflow_tracking_uri)
         # TODO : if the pipeline fails, we need to be able to end stop the mlflow run
         # cannot figure out how to do this within hooks
@@ -177,6 +185,9 @@ def on_pipeline_error(
             mlflow.end_run()
 
 
+mlflow_pipeline_hooks = MlflowPipelineHook()
+
+
 def _generate_kedro_command(
     tags, node_names, from_nodes, to_nodes, from_inputs, load_versions, pipeline_name
 ):

diff --git a/kedro_mlflow/template/project/run.py b/kedro_mlflow/template/project/run.py
@@ -28,34 +28,20 @@
 
 """Application entry point."""
 from pathlib import Path
-from typing import Dict
 
 from kedro.framework.context import KedroContext, load_package_context
-from kedro.pipeline import Pipeline
-
-from {{ cookiecutter.python_package }}.pipeline import create_pipelines
-
 from kedro_mlflow.framework.hooks import MlflowNodeHook, MlflowPipelineHook
 
-
 class ProjectContext(KedroContext):
     """Users can override the remaining methods from the parent class here,
     or create new ones (e.g. as required by plugins)
     """
 
-    project_name = "{{ cookiecutter.project_name }}"
-    # `project_version` is the version of kedro used to generate the project
-    project_version = "{{ cookiecutter.kedro_version }}"
-    package_name = "{{ cookiecutter.python_package }}"
     hooks = (
         MlflowNodeHook(),
         MlflowPipelineHook(),
     )
 
-    def _get_pipelines(self) -> Dict[str, Pipeline]:
-        return create_pipelines()
-
-
 def run_package():
     # Entry point for running a Kedro project packaged with `kedro package`
     # using `python -m <project_package>.run` command.
@@ -66,4 +52,4 @@ def run_package():
 
 
 if __name__ == "__main__":
-    run_package()
+    run_package()
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -1,2 +1,2 @@
 mlflow>=1.0.0, <2.0.0
-kedro>=0.16.0, <=0.16.4  # 0.16.5 breaks pipeline_ml, template and hooks test
+kedro>=0.16.0, <=0.16.5