Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add plural flags --nodes, --tags, and --load-versions to kedro run #2301

Merged
merged 31 commits into from
Feb 16, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
b59ec24
Add plural flags nodes, tags, load-versions
Feb 8, 2023
452f4e6
Lint
Feb 8, 2023
e400ea8
Lint
Feb 8, 2023
e157502
Trim whitespaces from nodes
Feb 9, 2023
68568bc
Trim whitespaces from load versions
Feb 9, 2023
c48fa1c
Write test for nodes
Feb 9, 2023
3cc615c
Write test for tags
Feb 9, 2023
73ff831
Write test for load versions
Feb 9, 2023
0b8195f
Merge branch 'main' into feat/add-plural-run-flags
AhdraMeraliQB Feb 9, 2023
156530c
Remove shorthand flag from plural
Feb 9, 2023
958baa7
Merge branch 'main' into feat/add-plural-run-flags
AhdraMeraliQB Feb 10, 2023
7afac7b
Fix tests after merge
Feb 10, 2023
99f74f7
Lint
Feb 10, 2023
b45f9e7
Undo merge side-effects
Feb 10, 2023
65fe683
Merge branch 'main' into feat/add-plural-run-flags
AhdraMeraliQB Feb 13, 2023
bd6b097
Add tests to check behaviour when both singular and plural flags used
Feb 13, 2023
9e2bc78
Lint
Feb 13, 2023
414b183
Lint again
Feb 13, 2023
5b79585
Change deprecation condition
AhdraMeraliQB Feb 13, 2023
78be95a
Merge branch 'main' into feat/add-plural-run-flags
AhdraMeraliQB Feb 13, 2023
0156e93
Remove unnecessary ifs
Feb 13, 2023
897558c
Lint changes from main
Feb 13, 2023
94d4c3c
Merge branch 'main' into feat/add-plural-run-flags
AhdraMeraliQB Feb 13, 2023
cbcfef6
Merge branch 'main' into feat/add-plural-run-flags
AhdraMeraliQB Feb 14, 2023
f146a21
Add changes to RELEASE.md
Feb 14, 2023
917841d
Merge branch 'main' into feat/add-plural-run-flags
AhdraMeraliQB Feb 14, 2023
c7a43b7
Merge branch 'main' into feat/add-plural-run-flags
AhdraMeraliQB Feb 15, 2023
dd83248
Try fix CI
Feb 15, 2023
f3ee8f0
Merge branch 'main' into feat/add-plural-run-flags
AhdraMeraliQB Feb 16, 2023
40c14d1
Merge branch 'main' into feat/add-plural-run-flags
AhdraMeraliQB Feb 16, 2023
f4c2200
Revert CLI change
Feb 16, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion kedro/framework/cli/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
_deprecate_options,
_get_values_as_tuple,
_reformat_load_versions,
_split_load_versions,
_split_params,
call,
command_with_verbosity,
Expand Down Expand Up @@ -349,6 +350,14 @@ def activate_nbstripout(
help=NODE_ARG_HELP,
callback=_deprecate_options,
)
@click.option(
"--nodes",
"nodes_names",
type=str,
default="",
help=NODE_ARG_HELP,
callback=split_node_names,
)
@click.option("--runner", "-r", type=str, default=None, help=RUNNER_ARG_HELP)
@click.option("--async", "is_async", is_flag=True, help=ASYNC_ARG_HELP)
@env_option
Expand All @@ -360,6 +369,13 @@ def activate_nbstripout(
help=TAG_ARG_HELP,
callback=_deprecate_options,
)
@click.option(
"--tags",
type=str,
default="",
antonymilne marked this conversation as resolved.
Show resolved Hide resolved
help=TAG_ARG_HELP,
callback=split_string,
)
@click.option(
"--load-version",
"-lv",
Expand All @@ -368,6 +384,13 @@ def activate_nbstripout(
help=LOAD_VERSION_HELP,
callback=_reformat_load_versions,
)
@click.option(
"--load-versions",
type=str,
default="",
help=LOAD_VERSION_HELP,
callback=_split_load_versions,
)
@click.option("--pipeline", "-p", type=str, default=None, help=PIPELINE_ARG_HELP)
@click.option(
"--config",
Expand All @@ -388,18 +411,21 @@ def activate_nbstripout(
help=PARAMS_ARG_HELP,
callback=_split_params,
)
# pylint: disable=too-many-arguments,unused-argument
# pylint: disable=too-many-arguments,unused-argument, too-many-locals
def run(
tag,
tags,
env,
runner,
is_async,
node_names,
nodes_names,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is named this way for differentiation from the already existing node_names - in #2245 I'd suggest keeping node_names as the parameter name

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine to keep node_names for now in #2245 but as part of #2247 I think we should consider just calling this nodes. (Just while I remember, Ivan's argument for why we might not want to do this: it's deliberately written as node_names to deter people from trying to pass node objects themselves to the underlying session.run. My argument for it is consistency and simplicity, particularly when we eventually return to #1423)

to_nodes,
from_nodes,
from_inputs,
to_outputs,
load_version,
load_versions,
pipeline,
config,
conf_source,
Expand All @@ -412,6 +438,16 @@ def run(
tag = _get_values_as_tuple(tag) if tag else tag
node_names = _get_values_as_tuple(node_names) if node_names else node_names

# temporary duplicates for the plural flags
tags = _get_values_as_tuple(tags) if tags else tuple(tags)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor, but do we need these if/else clauses? Since _get_values_as_tuple("") == tuple("") == tuple() already?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch - was following convention from the earlier implementation but looks like they weren't needed there either. My suspicion is that multiple=True implies a default value of an empty tuple 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was unable to find any documentation on the matter but manual testing does expose that values on options where multiple=True will be tuples if the default is unspecified, even when a value is not provided. I'll remove the unnecessary ifs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool, that's what I suspected about multiple=True 👍 Just to double check though, it's ok to remove the if/else for both the multiple=True and the multiple=False versions of these arguments?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep! For the other flags their input is immediately handled by the callback function which goes on to return a list, and it's this list that get's passed to _get_values_as_tuple, which splits them safely

nodes_names = (
_get_values_as_tuple(nodes_names) if nodes_names else tuple(nodes_names)
)

tag = tag + tags
node_names = node_names + nodes_names
load_version = {**load_version, **load_versions}

with KedroSession.create(
env=env, conf_source=conf_source, extra_params=params
) as session:
Expand Down
11 changes: 10 additions & 1 deletion kedro/framework/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ def split_node_names(ctx, param, to_split: str) -> List[str]:
elif char == "]":
match_state -= 1
if char == "," and match_state == 0 and argument:
argument = argument.strip()
result.append(argument)
argument = ""
else:
Expand Down Expand Up @@ -422,9 +423,12 @@ def _reformat_load_versions( # pylint: disable=unused-argument
"""Reformat data structure from tuple to dictionary for `load-version`, e.g.:
('dataset1:time1', 'dataset2:time2') -> {"dataset1": "time1", "dataset2": "time2"}.
"""
_deprecate_options(ctx, param, value)
if param.name != "load_versions":
AhdraMeraliQB marked this conversation as resolved.
Show resolved Hide resolved
_deprecate_options(ctx, param, value)

load_versions_dict = {}
for load_version in value:
load_version = load_version.strip()
load_version_list = load_version.split(":", 1)
if len(load_version_list) != 2:
raise KedroCliError(
Expand Down Expand Up @@ -468,6 +472,11 @@ def _split_params(ctx, param, value):
return conf


def _split_load_versions(ctx, param, value):
lv_tuple = _get_values_as_tuple([value])
Copy link
Contributor

@antonymilne antonymilne Feb 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need the [value] here rather than value? Elsewhere we call _get_values_as_tuple just with a single string. (Edit: is that true? Maybe it gets tuple-ified due to multiple=True? But then that wouldn't make sense for the instances you've added 🤔 )

Also do we need the if/else here? Wouldn't _reformat_load_versions work the same just passing the empty value into it directly?

After we've removed the old load_version we should definitely just move _reformat_load_versions code into this instead of having two separate functions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AntonyMilneQB

Do we need the [value] here rather than value? Elsewhere we call _get_values_as_tuple just with a single string.

Without wrapping value in a list _get_values_as_tuples will treat each character in the string as a value. The function is never called on a single string as multiple=True tuplifies the values of --node and --tag, and the callback functions for --nodes and --tags both return lists.

Also do we need the if/else here? Wouldn't _reformat_load_versions work the same just passing the empty value into it directly?

The if/else isn't needed as _reformat_load_versions would still work - the idea was to prevent calling another method if unnecessary. Which approach would be considered more pythonic?

After we've removed the old load_version we should definitely just move _reformat_load_versions code into this instead of having two separate functions.

Agreed

Copy link
Contributor

@antonymilne antonymilne Feb 14, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The if/else isn't needed as _reformat_load_versions would still work - the idea was to prevent calling another method if unnecessary. Which approach would be considered more pythonic?

Overall I think without the if/else. The performance overhead of calling another function here is very small compared to the (also small) additional complexity of branching in the code. It's a small thing, but I'd only skip calling the function here if it was particularly expensive.

return _reformat_load_versions(ctx, param, lv_tuple) if value else {}


def _get_values_as_tuple(values: Iterable[str]) -> Tuple[str, ...]:
return tuple(chain.from_iterable(value.split(",") for value in values))

Expand Down
135 changes: 134 additions & 1 deletion tests/framework/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,12 +478,90 @@ def test_run_successfully(
assert isinstance(runner, SequentialRunner)
assert not runner._is_async

@mark.parametrize(
"nodes_input, nodes_expected",
[
["splitting_data", ("splitting_data",)],
["splitting_data,training_model", ("splitting_data", "training_model")],
["splitting_data, training_model", ("splitting_data", "training_model")],
],
)
def test_run_specific_nodes(
self,
fake_project_cli,
fake_metadata,
fake_session,
mocker,
nodes_input,
nodes_expected,
):
nodes_command = "--nodes=" + nodes_input
result = CliRunner().invoke(
fake_project_cli, ["run", nodes_command], obj=fake_metadata
)
assert not result.exit_code

fake_session.run.assert_called_once_with(
tags=(),
runner=mocker.ANY,
node_names=nodes_expected,
from_nodes=[],
to_nodes=[],
from_inputs=[],
to_outputs=[],
load_versions={},
pipeline_name=None,
)

runner = fake_session.run.call_args_list[0][1]["runner"]
assert isinstance(runner, SequentialRunner)
assert not runner._is_async

@mark.parametrize(
"tags_input, tags_expected",
[
["tag1", ("tag1",)],
["tag1,tag2", ("tag1", "tag2")],
["tag1, tag2", ("tag1", "tag2")],
],
)
def test_run_with_tags(
self,
fake_project_cli,
fake_metadata,
fake_session,
mocker,
tags_input,
tags_expected,
):
tags_command = "--tags=" + tags_input
result = CliRunner().invoke(
fake_project_cli, ["run", tags_command], obj=fake_metadata
)
assert not result.exit_code

fake_session.run.assert_called_once_with(
tags=tags_expected,
runner=mocker.ANY,
node_names=(),
from_nodes=[],
to_nodes=[],
from_inputs=[],
to_outputs=[],
load_versions={},
pipeline_name=None,
)

runner = fake_session.run.call_args_list[0][1]["runner"]
assert isinstance(runner, SequentialRunner)
assert not runner._is_async

def test_run_with_pipeline_filters(
self, fake_project_cli, fake_metadata, fake_session, mocker
):
from_nodes = ["--from-nodes", "splitting_data"]
to_nodes = ["--to-nodes", "training_model"]
tags = ["--tag", "de"]
tags = ["--tags", "de"]
result = CliRunner().invoke(
fake_project_cli, ["run", *from_nodes, *to_nodes, *tags], obj=fake_metadata
)
Expand Down Expand Up @@ -705,6 +783,45 @@ def test_reformat_load_versions(
pipeline_name=None,
)

@mark.parametrize(
"lv_input, lv_dict",
[
[
"dataset1:time1",
{
"dataset1": "time1",
},
],
[
"dataset1:time1,dataset2:time2",
{"dataset1": "time1", "dataset2": "time2"},
],
[
"dataset1:time1, dataset2:time2",
{"dataset1": "time1", "dataset2": "time2"},
],
],
)
def test_split_load_versions(
self, fake_project_cli, fake_metadata, fake_session, lv_input, lv_dict, mocker
):
result = CliRunner().invoke(
fake_project_cli, ["run", "--load-versions", lv_input], obj=fake_metadata
)
assert not result.exit_code, result.output

fake_session.run.assert_called_once_with(
tags=(),
runner=mocker.ANY,
node_names=(),
from_nodes=[],
to_nodes=[],
from_inputs=[],
to_outputs=[],
load_versions=lv_dict,
pipeline_name=None,
)

def test_fail_reformat_load_versions(self, fake_project_cli, fake_metadata):
load_version = "2020-05-12T12.00.00"
result = CliRunner().invoke(
Expand All @@ -719,6 +836,22 @@ def test_fail_reformat_load_versions(self, fake_project_cli, fake_metadata):
)
assert expected_output in result.output

def test_fail_split_load_versions(self, fake_project_cli, fake_metadata):
load_version = "2020-05-12T12.00.00"
result = CliRunner().invoke(
fake_project_cli,
["run", "--load-versions", load_version],
obj=fake_metadata,
)
assert result.exit_code, result.output

expected_output = (
f"Error: Expected the form of 'load_version' to be "
f"'dataset_name:YYYY-MM-DDThh.mm.ss.sssZ',"
f"found {load_version} instead\n"
)
assert expected_output in result.output

@mark.parametrize(
"from_nodes, expected",
[
Expand Down