Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add better error messages for yaml selectors #2781

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- Added state and defer arguments to the RPC client, matching the CLI ([#2678](https://github.com/fishtown-analytics/dbt/issues/2678), [#2736](https://github.com/fishtown-analytics/dbt/pull/2736))
- Added schema and dbt versions to JSON artifacts ([#2670](https://github.com/fishtown-analytics/dbt/issues/2670), [#2767](https://github.com/fishtown-analytics/dbt/pull/2767))
- Added ability to snapshot hard-deleted records (opt-in with `invalidate_hard_deletes` config option). ([#249](https://github.com/fishtown-analytics/dbt/issues/249), [#2749](https://github.com/fishtown-analytics/dbt/pull/2749))
- Improved error messages for YAML selectors ([#2700](https://github.com/fishtown-analytics/dbt/issues/2700), [#2781](https://github.com/fishtown-analytics/dbt/pull/2781))

Contributors:
- [@joelluijmes](https://github.com/joelluijmes) ([#2749](https://github.com/fishtown-analytics/dbt/pull/2749))
Expand Down
13 changes: 12 additions & 1 deletion core/dbt/config/selectors.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from pathlib import Path
from typing import Dict, Any
import yaml

from hologram import ValidationError

Expand Down Expand Up @@ -33,7 +34,17 @@ def from_dict(cls, data: Dict[str, Any]) -> 'SelectorConfig':
try:
selector_file = SelectorFile.from_dict(data)
selectors = parse_from_selectors_definition(selector_file)
except (ValidationError, RuntimeException) as exc:
except ValidationError as exc:
yaml_sel_cfg = yaml.dump(exc.instance)
raise DbtSelectorsError(
f"Could not parse selector file data: \n{yaml_sel_cfg}\n"
f"Valid root-level selector definitions: "
f"union, intersection, string, dictionary. No lists. "
f"\nhttps://docs.getdbt.com/reference/node-selection/"
f"yaml-selectors",
result_type='invalid_selector'
) from exc
except RuntimeException as exc:
raise DbtSelectorsError(
f'Could not read selector file data: {exc}',
result_type='invalid_selector',
Expand Down
33 changes: 23 additions & 10 deletions core/dbt/graph/cli.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# special support for CLI argument parsing.
import itertools
import yaml

from typing import (
Dict, List, Optional, Tuple, Any, Union
Expand Down Expand Up @@ -116,8 +117,7 @@ def _get_list_dicts(
values = dct[key]
if not isinstance(values, list):
raise ValidationException(
f'Invalid value type {type(values)} in key "{key}" '
f'(value "{values}")'
f'Invalid value for key "{key}". Expected a list.'
)
for value in values:
if isinstance(value, dict):
Expand Down Expand Up @@ -165,9 +165,10 @@ def _parse_include_exclude_subdefs(
if isinstance(definition, dict) and 'exclude' in definition:
# do not allow multiple exclude: defs at the same level
if diff_arg is not None:
yaml_sel_cfg = yaml.dump(definition)
raise ValidationException(
f'Got multiple exclusion definitions in definition list '
f'{definitions}'
f"You cannot provide multiple exclude arguments to the "
f"same selector set operator:\n{yaml_sel_cfg}"
)
diff_arg = _parse_exclusions(definition)
else:
Expand Down Expand Up @@ -198,6 +199,7 @@ def parse_intersection_definition(
intersection_def_parts = _get_list_dicts(definition, 'intersection')
include, exclude = _parse_include_exclude_subdefs(intersection_def_parts)
intersection = SelectionIntersection(components=include)

if exclude is None:
intersection.raw = definition
return intersection
Expand All @@ -210,7 +212,6 @@ def parse_intersection_definition(

def parse_dict_definition(definition: Dict[str, Any]) -> SelectionSpec:
diff_arg: Optional[SelectionSpec] = None

if len(definition) == 1:
key = list(definition)[0]
value = definition[key]
Expand All @@ -230,7 +231,7 @@ def parse_dict_definition(definition: Dict[str, Any]) -> SelectionSpec:
dct = {k: v for k, v in dct.items() if k != 'exclude'}
else:
raise ValidationException(
f'Expected exactly 1 key in the selection definition or "method" '
f'Expected either 1 key or else "method" '
f'and "value" keys, but got {list(definition)}'
)

Expand All @@ -242,7 +243,18 @@ def parse_dict_definition(definition: Dict[str, Any]) -> SelectionSpec:
return SelectionDifference(components=[base, diff_arg])


def parse_from_definition(definition: RawDefinition) -> SelectionSpec:
def parse_from_definition(
definition: RawDefinition, rootlevel=False
) -> SelectionSpec:

if (isinstance(definition, dict) and
('union' in definition or 'intersection' in definition) and
rootlevel and len(definition) > 1):
keys = ",".join(definition.keys())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Incredibly nit-picky and totally up to you:

Suggested change
keys = ",".join(definition.keys())
keys = ", ".join(definition.keys())

raise ValidationException(
f"Only a single 'union' or 'intersection' key is allowed "
f"in a root level selector definition; found {keys}."
)
if isinstance(definition, str):
return SelectionCriteria.from_single_spec(definition)
elif 'union' in definition:
Expand All @@ -253,8 +265,8 @@ def parse_from_definition(definition: RawDefinition) -> SelectionSpec:
return parse_dict_definition(definition)
else:
raise ValidationException(
f'Expected to find str or dict, instead found '
f'{type(definition)}: {definition}'
f'Expected to find union, intersection, str or dict, instead '
f'found {type(definition)}: {definition}'
)


Expand All @@ -264,5 +276,6 @@ def parse_from_selectors_definition(
result: Dict[str, SelectionSpec] = {}
selector: SelectorDefinition
for selector in source.selectors:
result[selector.name] = parse_from_definition(selector.definition)
result[selector.name] = parse_from_definition(selector.definition,
rootlevel=True)
return result
4 changes: 3 additions & 1 deletion core/dbt/graph/selector_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ def parse_method(
try:
method_name = MethodName(method_parts[0])
except ValueError as exc:
raise InvalidSelectorException(method_parts[0]) from exc
raise InvalidSelectorException(
f"'{method_parts[0]}' is not a valid method name"
) from exc

method_arguments: List[str] = method_parts[1:]

Expand Down
179 changes: 179 additions & 0 deletions test/unit/test_selector_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import dbt.exceptions
import textwrap
import yaml
import unittest
from dbt.config.selectors import (
selector_config_from_data
)

from dbt.config.selectors import SelectorConfig


def get_selector_dict(txt: str) -> dict:
txt = textwrap.dedent(txt)
dct = yaml.safe_load(txt)
return dct


class SelectorUnitTest(unittest.TestCase):

def test_parse_multiple_excludes(self):
dct = get_selector_dict('''\
selectors:
- name: mult_excl
definition:
union:
- method: tag
value: nightly
- exclude:
- method: tag
value: hourly
- exclude:
- method: tag
value: daily
''')
with self.assertRaisesRegex(
dbt.exceptions.DbtSelectorsError,
'cannot provide multiple exclude arguments'
):
selector_config_from_data(dct)

def test_parse_set_op_plus(self):
dct = get_selector_dict('''\
selectors:
- name: union_plus
definition:
- union:
- method: tag
value: nightly
- exclude:
- method: tag
value: hourly
- method: tag
value: foo
Comment on lines +44 to +53
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also add an exception + test for quite similar case, which I've seen a few folks run into:

selectors:
  - name: mixed_syntaxes
    definition:
      key: value
      method: tag
      value: foo
      union:
        - method: tag
          value: nightly
        - exclude:
          - method: tag
            value: hourly

The issue here is that contradictory dict arguments are being passed to definition, and it's not at all clear which ones it's using.

Copy link
Contributor Author

@gshank gshank Sep 23, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we talking about the "key: value" piece? or having a union in a dictionary without a 'union' above it? I was under the impression that the above should be:

selectors:
    - name: mixed_syntaxes
      definition:
         union:
           - method: tag
              value: foo
            - method: tag
               value: nightly
            - exclude:
                 method: tag
                 value: hourly

So having a 'union' in a single dictionary should be an error, and having an invalid keyword 'key' should be an error?

Copy link
Contributor

@jtcohen6 jtcohen6 Sep 23, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right on—the way you've defined it is how it should be. I've seen users write the version I put above, which neither throws an error nor works the way they'd expect. I think we should check to see that definition is one of:

  • string
  • dict w/ 1 argument (union, intersection, or arbitrary key: value)
  • dict w/ 2 arguments (must be method + value)

and throw an exception if it doesn't meet one of those three criteria. Does that sound reasonable?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there are quite a few other arguments that you can put in a dict, such as 'children', 'parents', etc. What I did and pushed was to check that if there is a union or intersection operator in the root level, there's not also a 'method' key. The way the code worked was that it would just. ignore additional dict keys if there was a union or intersection, so that takes the "way it works" and creates an error.

Let me know if you see any other situations that are problematic.

Copy link
Contributor

@jtcohen6 jtcohen6 Sep 23, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, that's a really good point about children, parents, child_depth, etc. Silly of me to forget, sorry.

That sounds about right to me. Would it be possible to generalize this:

check that if there is a union or intersection operator in the root level, there's not also a 'method' key.

To be: "if there is a union or intersection operator in the root level, there should be no other dict arguments."

OR: "There should be only one dict argument, unless method is one of the arguments."

I think this would also avoid the weirdness of something like:

selectors:
    - name: multiple_sets
      definition:
         intersection:
           - method: tag
             value: foo
           - method: tag
             value: bar
         union:
           - method: tag
             value: nightly
           - exclude:
              - method: tag
                value: hourly

If I understand correctly, the current behavior would be to take the first set and ignore the second set entirely. Really we should throw an error.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that would cover more situations. Will change.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this now raises a much better error message:

selectors:
    - name: intersection_and_method
      definition:
         intersection:
           - method: tag
             value: foo
           - method: tag
             value: bar
         method: tag
         value: bar
 You cannot have both method and intersection keys in a root level selector definition:

Nice!

The multiple_sets definition above, however, still doesn't raise an error.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strange. For the multiple_set example I get: Only a single 'union' or 'intersection' key is allowed in a root level selector definition; found intersection,union.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This now works wonderfully!

Could not read selector file data: Runtime Error
    Only a single 'union' or 'intersection' key is allowed in a root level selector definition; found intersection,method,value.

I'm sure this was on me for not pulling the latest remote changes.

''')
with self.assertRaisesRegex(
dbt.exceptions.DbtSelectorsError,
'Valid root-level selector definitions'
):
selector_config_from_data(dct)

def test_parse_multiple_methods(self):
dct = get_selector_dict('''\
selectors:
- name: mult_methods
definition:
- tag:hourly
- tag:nightly
- fqn:start
''')
with self.assertRaisesRegex(
dbt.exceptions.DbtSelectorsError,
'Valid root-level selector definitions'
):
selector_config_from_data(dct)

def test_parse_set_with_method(self):
dct = get_selector_dict('''\
selectors:
- name: mixed_syntaxes
definition:
key: value
method: tag
value: foo
union:
- method: tag
value: m1234
- exclude:
- method: tag
value: m5678
''')
with self.assertRaisesRegex(
dbt.exceptions.DbtSelectorsError,
"Only a single 'union' or 'intersection' key is allowed"
):
selector_config_from_data(dct)

def test_complex_sector(self):
dct = get_selector_dict('''\
selectors:
- name: nightly_diet_snowplow
definition:
union:
- intersection:
- method: source
value: snowplow
childrens_parents: true
- method: tag
value: nightly
- method: path
value: models/export
- exclude:
- intersection:
- method: package
value: snowplow
- method: config.materialized
value: incremental
- method: fqn
value: export_performance_timing
''')
selectors = selector_config_from_data(dct)
assert(isinstance(selectors, SelectorConfig))

def test_exclude_not_list(self):
dct = get_selector_dict('''\
selectors:
- name: summa_exclude
definition:
union:
- method: tag
value: nightly
- exclude:
method: tag
value: daily
''')
with self.assertRaisesRegex(
dbt.exceptions.DbtSelectorsError,
"Expected a list"
):
selector_config_from_data(dct)

def test_invalid_key(self):
dct = get_selector_dict('''\
selectors:
- name: summa_nothing
definition:
method: tag
key: nightly
''')
with self.assertRaisesRegex(
dbt.exceptions.DbtSelectorsError,
"Expected either 1 key"
):
selector_config_from_data(dct)

def test_invalid_single_def(self):
dct = get_selector_dict('''\
selectors:
- name: summa_nothing
definition:
fubar: tag
''')
with self.assertRaisesRegex(
dbt.exceptions.DbtSelectorsError,
"not a valid method name"
):
selector_config_from_data(dct)

def test_method_no_value(self):
dct = get_selector_dict('''\
selectors:
- name: summa_nothing
definition:
method: tag
''')
with self.assertRaisesRegex(
dbt.exceptions.DbtSelectorsError,
"not a valid method name"
):
selector_config_from_data(dct)