Skip to content

Commit

Permalink
[KED-1954] Extract all static data to .kedro.yml (#754)
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitriiDeriabinQB authored Aug 18, 2020
1 parent cb2692f commit dc39a96
Show file tree
Hide file tree
Showing 18 changed files with 409 additions and 1,462 deletions.
1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
## Major features and improvements

## Bug fixes and other changes
* `project_name`, `project_version` and `package_name` now have to be defined in `.kedro.yml` for the projects generated using Kedro 0.16.5+.

## Breaking changes to the API

Expand Down
2 changes: 0 additions & 2 deletions features/package.feature
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ Feature: Package target in new project
When I execute the kedro command "package"
Then I should get a successful exit code
When I install the project's python package
And I delete assets not needed for running installed packages
And I execute the installed project package
Then I should get a successful exit code

Expand All @@ -49,6 +48,5 @@ Feature: Package target in new project
When I execute the kedro command "package"
Then I should get a successful exit code
When I install the project's python package
And I delete assets not needed for running installed packages
And I execute the installed project package
Then I should get a successful exit code
17 changes: 5 additions & 12 deletions features/steps/cli_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,14 +331,6 @@ def exec_kedro_target(context, command):
context.result = run(cmd, env=context.env, cwd=str(context.root_project_dir))


@when("I delete assets not needed for running installed packages")
def delete_unnecessary_assets(context):
"""Delete .kedro.yml as it is not needed when executing installed project package.
"""
kedro_yaml = context.root_project_dir / ".kedro.yml"
kedro_yaml.unlink()


@when("I execute the installed project package")
def exec_project(context):
"""Execute installed Kedro project target."""
Expand Down Expand Up @@ -443,10 +435,11 @@ def udpate_kedro_yml(context: behave.runner.Context, new_source_dir):
"""

kedro_yml_path = context.root_project_dir / ".kedro.yml"
kedro_yml_path.write_text(
f"context_path: {context.package_name}.run.ProjectContext\n"
f"source_dir: {new_source_dir}\n"
)

with kedro_yml_path.open("r+") as _f:
content = yaml.safe_load(_f)
content["source_dir"] = new_source_dir
yaml.safe_dump(content, _f)


@given("I have updated kedro requirements")
Expand Down
51 changes: 28 additions & 23 deletions kedro/framework/cli/jupyter.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,10 @@
_check_module_importable,
env_option,
forward_command,
get_source_dir,
ipython_message,
python_call,
)
from kedro.framework.context import load_context
from kedro.framework.context import get_static_project_data, load_context

JUPYTER_IP_HELP = "IP address of the Jupyter server."
JUPYTER_ALL_KERNELS_HELP = "Display all available Python kernels."
Expand All @@ -70,6 +69,18 @@
overwrite its contents."""


def _load_project_context(**kwargs):
"""Returns project context."""
try:
return load_context(Path.cwd(), **kwargs)
except Exception as err: # pylint: disable=broad-except
env = kwargs.get("env")
_handle_exception(
f"Unable to load Kedro context with environment `{env}`. "
f"Make sure it exists in the project configuration.\nError: {err}"
)


def collect_line_magic():
"""Interface function for collecting line magic functions from plugin entry points.
"""
Expand Down Expand Up @@ -127,8 +138,9 @@ def jupyter():
@env_option
def jupyter_notebook(ip_address, all_kernels, env, idle_timeout, args):
"""Open Jupyter Notebook with project specific variables loaded."""
_check_module_importable("jupyter_core")
context = _load_project_context(env=env)
_check_module_importable("jupyter_core")

if "-h" not in args and "--help" not in args:
ipython_message(all_kernels)

Expand All @@ -155,8 +167,9 @@ def jupyter_notebook(ip_address, all_kernels, env, idle_timeout, args):
@env_option
def jupyter_lab(ip_address, all_kernels, env, idle_timeout, args):
"""Open Jupyter Lab with project specific variables loaded."""
_check_module_importable("jupyter_core")
context = _load_project_context(env=env)
_check_module_importable("jupyter_core")

if "-h" not in args and "--help" not in args:
ipython_message(all_kernels)

Expand Down Expand Up @@ -184,7 +197,9 @@ def jupyter_lab(ip_address, all_kernels, env, idle_timeout, args):
nargs=-1,
)
@env_option
def convert_notebook(all_flag, overwrite_flag, filepath, env):
def convert_notebook( # pylint: disable=unused-argument,too-many-locals
all_flag, overwrite_flag, filepath, env
):
"""Convert selected or all notebooks found in a Kedro project
to Kedro code, by exporting code from the appropriately-tagged cells:
Cells tagged as `node` will be copied over to a Python file matching
Expand All @@ -194,10 +209,12 @@ def convert_notebook(all_flag, overwrite_flag, filepath, env):
relative and absolute paths are accepted.
Should not be provided if --all flag is already present.
"""
context = _load_project_context(env=env)
_update_ipython_dir(context.project_path)
project_path = Path.cwd()
static_data = get_static_project_data(project_path)
source_path = static_data["source_dir"]
package_name = static_data["package_name"]

source_path = get_source_dir(context.project_path)
_update_ipython_dir(project_path)

if not filepath and not all_flag:
secho(
Expand All @@ -210,7 +227,7 @@ def convert_notebook(all_flag, overwrite_flag, filepath, env):
# pathlib glob does not ignore hidden directories,
# whereas Python glob does, which is more useful in
# ensuring checkpoints will not be included
pattern = context.project_path / "**" / "*.ipynb"
pattern = project_path / "**" / "*.ipynb"
notebooks = sorted(Path(p) for p in iglob(str(pattern), recursive=True))
else:
notebooks = [Path(f) for f in filepath]
Expand All @@ -223,7 +240,7 @@ def convert_notebook(all_flag, overwrite_flag, filepath, env):
f"Found non-unique notebook names! Please rename the following: {names}"
)

output_dir = source_path / context.package_name / "nodes"
output_dir = source_path / package_name / "nodes"
if not output_dir.is_dir():
output_dir.mkdir()
(output_dir / "__init__.py").touch()
Expand All @@ -234,7 +251,7 @@ def convert_notebook(all_flag, overwrite_flag, filepath, env):

if output_path.is_file():
overwrite = overwrite_flag or click.confirm(
f"Output file {output_path} already exists. Overwrite?", default=False,
f"Output file {output_path} already exists. Overwrite?", default=False
)
if overwrite:
_export_nodes(notebook, output_path)
Expand Down Expand Up @@ -284,18 +301,6 @@ def _build_jupyter_env(kedro_env: str) -> Dict[str, Any]:
return {"env": jupyter_env}


def _load_project_context(**kwargs):
"""Returns project context."""
try:
return load_context(Path.cwd(), **kwargs)
except Exception as err: # pylint: disable=broad-except
env = kwargs.get("env")
_handle_exception(
f"Unable to load Kedro context with environment `{env}`. "
f"Make sure it exists in the project configuration.\nError: {err}"
)


def _export_nodes(filepath: Path, output_path: Path) -> None:
"""Copy code from Jupyter cells into nodes in src/<package_name>/nodes/,
under filename with same name as notebook.
Expand Down
95 changes: 41 additions & 54 deletions kedro/framework/cli/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,22 @@
import sys
import webbrowser
from pathlib import Path
from typing import Any, Sequence
from typing import Sequence

import click
from click import secho

from kedro.framework.cli.cli import _handle_exception
from kedro.framework.cli.jupyter import _load_project_context
from kedro.framework.cli.utils import (
KedroCliError,
_check_module_importable,
call,
env_option,
forward_command,
get_source_dir,
ipython_message,
python_call,
)
from kedro.framework.context import load_context
from kedro.framework.context import get_static_project_data

NO_DEPENDENCY_MESSAGE = """{module} is not installed. Please make sure {module} is in
{src}/requirements.txt and run `kedro install`."""
Expand All @@ -59,18 +58,6 @@
OPEN_ARG_HELP = """Open the documentation in your default browser after building."""


def _load_project_context(**kwargs: Any):
"""Returns project context."""
try:
return load_context(Path.cwd(), **kwargs)
except Exception as err: # pylint: disable=broad-except
env = kwargs.get("env")
_handle_exception(
f"Unable to load Kedro context with environment `{env}`. "
f"Make sure it exists in the project configuration.\nError: {err}"
)


def _build_reqs(source_path: Path, args: Sequence[str] = ()):
"""Run `pip-compile requirements.in` command.
Expand All @@ -89,6 +76,10 @@ def _build_reqs(source_path: Path, args: Sequence[str] = ()):
python_call("piptools", ["compile", "-q", *args, str(requirements_in)])


def _get_source_path():
return get_static_project_data(Path.cwd())["source_dir"]


@click.group()
def project_group():
"""Collection of project commands."""
Expand All @@ -98,11 +89,9 @@ def project_group():
def test(args):
"""Run the test suite."""
try:
# pylint: disable=import-outside-toplevel,unused-import
import pytest # noqa
except ImportError:
context = _load_project_context()
source_path = get_source_dir(context.project_path)
_check_module_importable("pytest")
except KedroCliError:
source_path = _get_source_path()
raise KedroCliError(
NO_DEPENDENCY_MESSAGE.format(module="pytest", src=str(source_path))
)
Expand All @@ -114,23 +103,24 @@ def test(args):
@click.option("-c", "--check-only", is_flag=True, help=LINT_CHECK_ONLY_HELP)
@click.argument("files", type=click.Path(exists=True), nargs=-1)
def lint(files, check_only):
"""Run flake8, isort and (on Python >=3.6) black."""
context = _load_project_context()
source_path = get_source_dir(context.project_path)
files = files or (
str(source_path / "tests"),
str(source_path / context.package_name),
)

try:
# pylint: disable=import-outside-toplevel, unused-import
import flake8 # noqa
import isort # noqa
import black # noqa
except ImportError as exc:
raise KedroCliError(
NO_DEPENDENCY_MESSAGE.format(module=exc.name, src=str(source_path))
)
"""Run flake8, isort and black."""
static_data = get_static_project_data(Path.cwd())
source_path = static_data["source_dir"]
package_name = static_data["package_name"]
files = files or (str(source_path / "tests"), str(source_path / package_name))

if "PYTHONPATH" not in os.environ:
# isort needs the source path to be in the 'PYTHONPATH' environment
# variable to treat it as a first-party import location
os.environ["PYTHONPATH"] = str(source_path)

for module_name in ("flake8", "isort", "black"):
try:
_check_module_importable(module_name)
except KedroCliError:
raise KedroCliError(
NO_DEPENDENCY_MESSAGE.format(module=module_name, src=str(source_path))
)

python_call("black", ("--check",) + files if check_only else files)
python_call("flake8", ("--max-line-length=88",) + files)
Expand All @@ -155,7 +145,7 @@ def install(compile_flag):
# we cannot use `context.project_path` as in other commands since
# context instantiation might break due to missing dependencies
# we attempt to install here
source_path = get_source_dir(Path.cwd())
source_path = _get_source_path()
environment_yml = source_path / "environment.yml"
requirements_in = source_path / "requirements.in"
requirements_txt = source_path / "requirements.txt"
Expand Down Expand Up @@ -184,6 +174,7 @@ def ipython(env, args):
"""Open IPython with project specific variables loaded."""
context = _load_project_context(env=env)
_check_module_importable("IPython")

os.environ["IPYTHONDIR"] = str(context.project_path / ".ipython")
if env:
os.environ["KEDRO_ENV"] = env
Expand All @@ -195,8 +186,7 @@ def ipython(env, args):
@project_group.command()
def package():
"""Package the project as a Python egg and wheel."""
context = _load_project_context()
source_path = get_source_dir(context.project_path)
source_path = _get_source_path()
call(
[sys.executable, "setup.py", "clean", "--all", "bdist_egg"],
cwd=str(source_path),
Expand All @@ -219,19 +209,21 @@ def package():
)
def build_docs(open_docs):
"""Build the project documentation."""
context = _load_project_context()
source_path = get_source_dir(context.project_path)
static_data = get_static_project_data(Path.cwd())
source_path = static_data["source_dir"]
package_name = static_data["package_name"]

python_call("pip", ["install", str(source_path / "[docs]")])
python_call("pip", ["install", "-r", str(source_path / "requirements.txt")])
python_call("ipykernel", ["install", "--user", f"--name={context.package_name}"])
python_call("ipykernel", ["install", "--user", f"--name={package_name}"])
shutil.rmtree("docs/build", ignore_errors=True)
call(
[
"sphinx-apidoc",
"--module-first",
"-o",
"docs/source",
str(source_path / context.package_name),
str(source_path / package_name),
]
)
call(["sphinx-build", "-M", "html", "docs/source", "docs/build", "-a"])
Expand All @@ -244,10 +236,7 @@ def build_docs(open_docs):
@forward_command(project_group, name="build-reqs")
def build_reqs(args):
"""Build the project dependency requirements."""
# we cannot use `context.project_path` as in other commands since
# context instantiation might break due to missing dependencies
# we attempt to install here
source_path = get_source_dir(Path.cwd())
source_path = _get_source_path()
_build_reqs(source_path, args)
secho(
"Requirements built! Please update requirements.in "
Expand All @@ -260,8 +249,7 @@ def build_reqs(args):
@project_group.command("activate-nbstripout")
def activate_nbstripout():
"""Install the nbstripout git hook to automatically clean notebooks."""
context = _load_project_context()
source_path = get_source_dir(context.project_path)
source_path = _get_source_path()
secho(
(
"Notebook output cells will be automatically cleared before committing"
Expand All @@ -271,9 +259,8 @@ def activate_nbstripout():
)

try:
# pylint: disable=import-outside-toplevel, unused-import
import nbstripout # noqa
except ImportError:
_check_module_importable("nbstripout")
except KedroCliError:
raise KedroCliError(
NO_DEPENDENCY_MESSAGE.format(module="nbstripout", src=str(source_path))
)
Expand Down
5 changes: 5 additions & 0 deletions kedro/framework/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,11 @@ def get_source_dir(project_path: Path) -> Path:
Returns:
The absolute path to the project source directory.
"""
warnings.warn(
"This function is now deprecated and will be removed in Kedro 0.17.0.",
DeprecationWarning,
)

with (project_path / ".kedro.yml").open("r") as kedro_yml:
kedro_yaml = yaml.safe_load(kedro_yml)

Expand Down
Loading

0 comments on commit dc39a96

Please sign in to comment.