Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: add warning on duplicated yaml keys #5146

Merged
merged 4 commits into from
Apr 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20220428-100157.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: Add warning if yaml contains duplicate keys
time: 2022-04-28T10:01:57.893956+12:00
custom:
Author: jeremyyeo
Issue: "5114"
PR: "5146"
29 changes: 27 additions & 2 deletions core/dbt/clients/yaml_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
except ImportError:
from yaml import Loader, SafeLoader, Dumper # type: ignore # noqa: F401

from dbt.ui import warning_tag

YAML_ERROR_MESSAGE = """
Syntax error near line {line_number}
Expand All @@ -20,6 +21,26 @@
""".strip()


class UniqueKeyLoader(SafeLoader):
jeremyyeo marked this conversation as resolved.
Show resolved Hide resolved
"""A subclass that checks for unique yaml mapping nodes.

This class extends `SafeLoader` from the `yaml` library to check for
unique top level keys (mapping nodes). See issue (https://github.com/yaml/pyyaml/issues/165)
and solution (https://gist.github.com/pypt/94d747fe5180851196eb?permalink_comment_id=4015118).
"""

def construct_mapping(self, node, deep=False):
mapping = set()
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if key in mapping:
raise dbt.exceptions.DuplicateYamlKeyException(
f"Duplicate {key!r} key found in yaml file"
)
mapping.add(key)
return super().construct_mapping(node, deep)


def line_no(i, line, width=3):
line_number = str(i).ljust(width)
return "{}| {}".format(line_number, line)
Expand Down Expand Up @@ -48,10 +69,10 @@ def contextualized_yaml_error(raw_contents, error):


def safe_load(contents) -> Optional[Dict[str, Any]]:
return yaml.load(contents, Loader=SafeLoader)
return yaml.load(contents, Loader=UniqueKeyLoader)


def load_yaml_text(contents):
def load_yaml_text(contents, path=None):
try:
return safe_load(contents)
except (yaml.scanner.ScannerError, yaml.YAMLError) as e:
Expand All @@ -61,3 +82,7 @@ def load_yaml_text(contents):
error = str(e)

raise dbt.exceptions.ValidationException(error)
except dbt.exceptions.DuplicateYamlKeyException as e:
# TODO: We may want to raise an exception instead of a warning in the future.
e.msg = f"{e} {path.searched_path}/{path.relative_path}."
dbt.exceptions.warn_or_raise(e, log_fmt=warning_tag("{}"))
7 changes: 6 additions & 1 deletion core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,10 @@ def __init__(self, name: str):
super().__init__(name)


class DuplicateYamlKeyException(CompilationException):
pass


def raise_compiler_error(msg, node=None) -> NoReturn:
raise CompilationException(msg, node)

Expand Down Expand Up @@ -903,7 +907,8 @@ def raise_ambiguous_alias(node_1, node_2, duped_name=None):
def raise_ambiguous_catalog_match(unique_id, match_1, match_2):
def get_match_string(match):
return "{}.{}".format(
match.get("metadata", {}).get("schema"), match.get("metadata", {}).get("name")
match.get("metadata", {}).get("schema"),
match.get("metadata", {}).get("name"),
)

raise_compiler_error(
Expand Down
8 changes: 5 additions & 3 deletions core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def yaml_from_file(source_file: SchemaSourceFile) -> Dict[str, Any]:
"""If loading the yaml fails, raise an exception."""
path = source_file.path.relative_path
try:
return load_yaml_text(source_file.contents)
return load_yaml_text(source_file.contents, source_file.path)
except ValidationException as e:
reason = validator_error_message(e)
raise ParsingException(
Expand Down Expand Up @@ -548,7 +548,8 @@ def parse_file(self, block: FileBlock, dct: Dict = None) -> None:
def check_format_version(file_path, yaml_dct) -> None:
if "version" not in yaml_dct:
raise_invalid_property_yml_version(
file_path, "the yml property file {} is missing a version tag".format(file_path)
file_path,
"the yml property file {} is missing a version tag".format(file_path),
)

version = yaml_dct["version"]
Expand All @@ -562,7 +563,8 @@ def check_format_version(file_path, yaml_dct) -> None:
)
if version != 2:
raise_invalid_property_yml_version(
file_path, "its 'version:' tag is set to {}. Only 2 is supported".format(version)
file_path,
"its 'version:' tag is set to {}. Only 2 is supported".format(version),
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ sources:
schema: invalid_schema
tables:
- name: my_table
sources:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@emmyoop surprisingly fixing the duplicate top level keys in this yaml file in the legacy tests causes it to pass so now really curious what this was doing previously... makes me wonder if we had left 2 sources keys in this file by mistake the last time around but test was passing due to pyaml default of only reading the last source key of this file.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not seeing anything in the tests that indicate we did it on purpose so you may be right. These tests are going to be rewritten and we'll work through some of the weirdness in them as we go.

- name: seed_source
schema: "{{ var('schema_override', target.schema) }}"
tables:
Expand Down
33 changes: 33 additions & 0 deletions tests/functional/duplications/test_basic_duplications.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pytest

from dbt.tests.util import run_dbt_and_capture, run_dbt
from dbt.exceptions import DuplicateYamlKeyException

duplicate_key_schema__schema_yml = """
version: 2
models:
- name: my_model
models:
- name: my_model
"""

my_model_sql = """
select 1 as fun
"""


class TestBasicDuplications:
@pytest.fixture(scope="class")
def models(self):
return {
"my_model.sql": my_model_sql,
"schema.yml": duplicate_key_schema__schema_yml,
}

def test_warning_in_stdout(self, project):
results, stdout = run_dbt_and_capture(["run"])
assert "Duplicate 'models' key found in yaml file models/schema.yml" in stdout

def test_exception_is_raised_with_warn_error_flag(self, project):
with pytest.raises(DuplicateYamlKeyException):
run_dbt(["--warn-error", "run"])