Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/3723 greedy flag test selection #3833

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
- Added default field in the `selectors.yml` to allow user to define default selector ([#3448](https://github.com/dbt-labs/dbt/issues/3448), [#3875](https://github.com/dbt-labs/dbt/issues/3875), [#3892](https://github.com/dbt-labs/dbt/issues/3892))
- Added timing and thread information to sources.json artifact ([#3804](https://github.com/dbt-labs/dbt/issues/3804), [#3894](https://github.com/dbt-labs/dbt/pull/3894))
- Update cli and rpc flags for the `build` task to align with other commands (`--resource-type`, `--store-failures`) ([#3596](https://github.com/dbt-labs/dbt/issues/3596), [#3884](https://github.com/dbt-labs/dbt/pull/3884))
- Log tests that are not indirectly selected. Add `--greedy` flag to `test`, `list`, `build` and `greedy` property in yaml selectors ([#3723](https://github.com/dbt-labs/dbt/pull/3723), [#3833](https://github.com/dbt-labs/dbt/pull/3833))

### Fixes

Expand Down Expand Up @@ -44,7 +45,7 @@ Contributors:
- [@dbrtly](https://github.com/dbrtly) ([#3834](https://github.com/dbt-labs/dbt/pull/3834))
- [@swanderz](https://github.com/swanderz) [#3623](https://github.com/dbt-labs/dbt/pull/3623)
- [@JasonGluck](https://github.com/JasonGluck) ([#3582](https://github.com/dbt-labs/dbt/pull/3582))
- [@joellabes](https://github.com/joellabes) ([#3669](https://github.com/dbt-labs/dbt/pull/3669))
- [@joellabes](https://github.com/joellabes) ([#3669](https://github.com/dbt-labs/dbt/pull/3669), [#3833](https://github.com/dbt-labs/dbt/pull/3833))
- [@juma-adoreme](https://github.com/juma-adoreme) ([#3838](https://github.com/dbt-labs/dbt/pull/3838))
- [@annafil](https://github.com/annafil) ([#3825](https://github.com/dbt-labs/dbt/pull/3825))
- [@AndreasTA-AW](https://github.com/AndreasTA-AW) ([#3691](https://github.com/dbt-labs/dbt/pull/3691))
Expand Down
7 changes: 5 additions & 2 deletions core/dbt/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
PARTIAL_PARSE = None
USE_COLORS = None
STORE_FAILURES = None
GREEDY = None


def env_set_truthy(key: str) -> Optional[str]:
Expand Down Expand Up @@ -56,7 +57,7 @@ def _get_context():
def reset():
global STRICT_MODE, FULL_REFRESH, USE_CACHE, WARN_ERROR, TEST_NEW_PARSER, \
USE_EXPERIMENTAL_PARSER, WRITE_JSON, PARTIAL_PARSE, MP_CONTEXT, USE_COLORS, \
STORE_FAILURES
STORE_FAILURES, GREEDY

STRICT_MODE = False
FULL_REFRESH = False
Expand All @@ -69,12 +70,13 @@ def reset():
MP_CONTEXT = _get_context()
USE_COLORS = True
STORE_FAILURES = False
GREEDY = False


def set_from_args(args):
global STRICT_MODE, FULL_REFRESH, USE_CACHE, WARN_ERROR, TEST_NEW_PARSER, \
USE_EXPERIMENTAL_PARSER, WRITE_JSON, PARTIAL_PARSE, MP_CONTEXT, USE_COLORS, \
STORE_FAILURES
STORE_FAILURES, GREEDY

USE_CACHE = getattr(args, 'use_cache', USE_CACHE)

Expand All @@ -99,6 +101,7 @@ def set_from_args(args):
USE_COLORS = use_colors_override

STORE_FAILURES = getattr(args, 'store_failures', STORE_FAILURES)
GREEDY = getattr(args, 'greedy', GREEDY)


# initialize everything to the defaults on module load
Expand Down
15 changes: 9 additions & 6 deletions core/dbt/graph/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# special support for CLI argument parsing.
from dbt import flags
import itertools
from dbt.clients.yaml_helper import yaml, Loader, Dumper # noqa: F401

Expand Down Expand Up @@ -66,7 +67,7 @@ def parse_union_from_default(
def parse_difference(
include: Optional[List[str]], exclude: Optional[List[str]]
) -> SelectionDifference:
included = parse_union_from_default(include, DEFAULT_INCLUDES)
included = parse_union_from_default(include, DEFAULT_INCLUDES, greedy=bool(flags.GREEDY))
excluded = parse_union_from_default(exclude, DEFAULT_EXCLUDES, greedy=True)
return SelectionDifference(components=[included, excluded])

Expand Down Expand Up @@ -180,15 +181,16 @@ def parse_union_definition(definition: Dict[str, Any]) -> SelectionSpec:
union_def_parts = _get_list_dicts(definition, 'union')
include, exclude = _parse_include_exclude_subdefs(union_def_parts)

union = SelectionUnion(components=include)
union = SelectionUnion(components=include, greedy_warning=False)

if exclude is None:
union.raw = definition
return union
else:
return SelectionDifference(
components=[union, exclude],
raw=definition
raw=definition,
greedy_warning=False
)


Expand All @@ -197,15 +199,16 @@ def parse_intersection_definition(
) -> SelectionSpec:
intersection_def_parts = _get_list_dicts(definition, 'intersection')
include, exclude = _parse_include_exclude_subdefs(intersection_def_parts)
intersection = SelectionIntersection(components=include)
intersection = SelectionIntersection(components=include, greedy_warning=False)

if exclude is None:
intersection.raw = definition
return intersection
else:
return SelectionDifference(
components=[intersection, exclude],
raw=definition
raw=definition,
greedy_warning=False
)


Expand Down Expand Up @@ -239,7 +242,7 @@ def parse_dict_definition(definition: Dict[str, Any]) -> SelectionSpec:
if diff_arg is None:
return base
else:
return SelectionDifference(components=[base, diff_arg])
return SelectionDifference(components=[base, diff_arg], greedy_warning=False)


def parse_from_definition(
Expand Down
44 changes: 36 additions & 8 deletions core/dbt/graph/selector.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

from typing import Set, List, Optional, Tuple

from .graph import Graph, UniqueId
Expand Down Expand Up @@ -30,6 +29,24 @@ def alert_non_existence(raw_spec, nodes):
)


def alert_unused_nodes(raw_spec, node_names):
summary_nodes_str = ("\n - ").join(node_names[:3])
debug_nodes_str = ("\n - ").join(node_names)
and_more_str = f"\n - and {len(node_names) - 3} more" if len(node_names) > 4 else ""
summary_msg = (
f"\nSome tests were excluded because at least one parent is not selected. "
f"Use the --greedy flag to include them."
f"\n - {summary_nodes_str}{and_more_str}"
)
logger.info(summary_msg)
if len(node_names) > 4:
debug_msg = (
f"Full list of tests that were excluded:"
f"\n - {debug_nodes_str}"
)
logger.debug(debug_msg)


def can_select_indirectly(node):
"""If a node is not selected itself, but its parent(s) are, it may qualify
for indirect selection.
Expand Down Expand Up @@ -151,16 +168,16 @@ def select_nodes_recursively(self, spec: SelectionSpec) -> Tuple[Set[UniqueId],

return direct_nodes, indirect_nodes

def select_nodes(self, spec: SelectionSpec) -> Set[UniqueId]:
def select_nodes(self, spec: SelectionSpec) -> Tuple[Set[UniqueId], Set[UniqueId]]:
"""Select the nodes in the graph according to the spec.

This is the main point of entry for turning a spec into a set of nodes:
- Recurse through spec, select by criteria, combine by set operation
- Return final (unfiltered) selection set
"""

direct_nodes, indirect_nodes = self.select_nodes_recursively(spec)
return direct_nodes
indirect_only = indirect_nodes.difference(direct_nodes)
return direct_nodes, indirect_only

def _is_graph_member(self, unique_id: UniqueId) -> bool:
if unique_id in self.manifest.sources:
Expand Down Expand Up @@ -213,6 +230,8 @@ def expand_selection(
# - If ANY parent is missing, return it separately. We'll keep it around
# for later and see if its other parents show up.
# We use this for INCLUSION.
# Users can also opt in to inclusive GREEDY mode by passing --greedy flag,
# or by specifying `greedy: true` in a yaml selector

direct_nodes = set(selected)
indirect_nodes = set()
Expand Down Expand Up @@ -251,15 +270,24 @@ def get_selected(self, spec: SelectionSpec) -> Set[UniqueId]:

- node selection. Based on the include/exclude sets, the set
of matched unique IDs is returned
- expand the graph at each leaf node, before combination
- selectors might override this. for example, this is where
tests are added
- includes direct + indirect selection (for tests)
- filtering:
- selectors can filter the nodes after all of them have been
selected
"""
selected_nodes = self.select_nodes(spec)
selected_nodes, indirect_only = self.select_nodes(spec)
filtered_nodes = self.filter_selection(selected_nodes)

if indirect_only:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could be a neat move to wrap this logging logic in another method, defined higher up, and then call it down here, similar to alert_non_existence.

I also bring that up because I think the expect_exists property (attached to BaseSelectionGroup) could be a neat example for what we want to do around a raise_greedy_warning, which would raise a warning for CLI-provided selection criteria, but not for yaml selector-provided selection criteria that define greedy: true|false explicitly. The alert_non_existence method is called if spec.expect_exists here.

filtered_unused_nodes = self.filter_selection(indirect_only)
if filtered_unused_nodes and spec.greedy_warning:
# log anything that didn't make the cut
unused_node_names = []
for unique_id in filtered_unused_nodes:
name = self.manifest.nodes[unique_id].name
unused_node_names.append(name)
alert_unused_nodes(spec, unused_node_names)

return filtered_nodes

def get_graph_queue(self, spec: SelectionSpec) -> GraphQueue:
Expand Down
9 changes: 7 additions & 2 deletions core/dbt/graph/selector_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class SelectionCriteria:
children: bool
children_depth: Optional[int]
greedy: bool = False
greedy_warning: bool = False # do not raise warning for yaml selectors

def __post_init__(self):
if self.children and self.childrens_parents:
Expand Down Expand Up @@ -124,11 +125,11 @@ def selection_criteria_from_dict(
parents_depth=parents_depth,
children=bool(dct.get('children')),
children_depth=children_depth,
greedy=greedy
greedy=(greedy or bool(dct.get('greedy'))),
)

@classmethod
def dict_from_single_spec(cls, raw: str, greedy: bool = False):
def dict_from_single_spec(cls, raw: str):
result = RAW_SELECTOR_PATTERN.match(raw)
if result is None:
return {'error': 'Invalid selector spec'}
Expand All @@ -145,6 +146,8 @@ def dict_from_single_spec(cls, raw: str, greedy: bool = False):
dct['parents'] = bool(dct.get('parents'))
if 'children' in dct:
dct['children'] = bool(dct.get('children'))
if 'greedy' in dct:
dct['greedy'] = bool(dct.get('greedy'))
return dct

@classmethod
Expand All @@ -162,10 +165,12 @@ def __init__(
self,
components: Iterable[SelectionSpec],
expect_exists: bool = False,
greedy_warning: bool = True,
raw: Any = None,
):
self.components: List[SelectionSpec] = list(components)
self.expect_exists = expect_exists
self.greedy_warning = greedy_warning
self.raw = raw

def __iter__(self) -> Iterator[SelectionSpec]:
Expand Down
26 changes: 25 additions & 1 deletion core/dbt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,14 @@ def _build_build_subparser(subparsers, base_subparser):
Store test results (failing rows) in the database
'''
)
sub.add_argument(
'--greedy',
action='store_true',
help='''
Select all tests that touch the selected resources,
even if they also depend on unselected resources
'''
)
resource_values: List[str] = [
str(s) for s in build_task.BuildTask.ALL_RESOURCE_VALUES
] + ['all']
Expand Down Expand Up @@ -637,7 +645,7 @@ def _add_table_mutability_arguments(*subparsers):
'--full-refresh',
action='store_true',
help='''
If specified, DBT will drop incremental models and
If specified, dbt will drop incremental models and
fully-recalculate the incremental table from the model definition.
'''
)
Expand Down Expand Up @@ -753,6 +761,14 @@ def _build_test_subparser(subparsers, base_subparser):
Store test results (failing rows) in the database
'''
)
sub.add_argument(
'--greedy',
action='store_true',
help='''
Select all tests that touch the selected resources,
even if they also depend on unselected resources
'''
)

sub.set_defaults(cls=test_task.TestTask, which='test', rpc_method='test')
return sub
Expand Down Expand Up @@ -878,6 +894,14 @@ def _build_list_subparser(subparsers, base_subparser):
metavar='SELECTOR',
required=False,
)
sub.add_argument(
'--greedy',
action='store_true',
help='''
Select all tests that touch the selected resources,
even if they also depend on unselected resources
'''
)
_add_common_selector_arguments(sub)

return sub
Expand Down
2 changes: 1 addition & 1 deletion core/dbt/task/runnable.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def run(self):
)

if len(self._flattened_nodes) == 0:
logger.warning("WARNING: Nothing to do. Try checking your model "
logger.warning("\nWARNING: Nothing to do. Try checking your model "
"configs and model specification args")
result = self.get_result(
results=[],
Expand Down
Loading