Skip to content

Commit

Permalink
Fix: add warning on duplicated yaml keys (dbt-labs#5146)
Browse files Browse the repository at this point in the history
* add warning on duplicated yaml keys

* update structure and tests

* fix old test schema file

* add changelog
  • Loading branch information
jeremyyeo authored and Axel Goblet committed May 20, 2022
1 parent 9d21c3f commit 3107df1
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 7 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Fixes-20220428-100157.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Fixes
body: Add warning if yaml contains duplicate keys
time: 2022-04-28T10:01:57.893956+12:00
custom:
Author: jeremyyeo
Issue: "5114"
PR: "5146"
29 changes: 27 additions & 2 deletions core/dbt/clients/yaml_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
except ImportError:
from yaml import Loader, SafeLoader, Dumper # type: ignore # noqa: F401

from dbt.ui import warning_tag

YAML_ERROR_MESSAGE = """
Syntax error near line {line_number}
Expand All @@ -20,6 +21,26 @@
""".strip()


class UniqueKeyLoader(SafeLoader):
"""A subclass that checks for unique yaml mapping nodes.
This class extends `SafeLoader` from the `yaml` library to check for
unique top level keys (mapping nodes). See issue (https://github.com/yaml/pyyaml/issues/165)
and solution (https://gist.github.com/pypt/94d747fe5180851196eb?permalink_comment_id=4015118).
"""

def construct_mapping(self, node, deep=False):
mapping = set()
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if key in mapping:
raise dbt.exceptions.DuplicateYamlKeyException(
f"Duplicate {key!r} key found in yaml file"
)
mapping.add(key)
return super().construct_mapping(node, deep)


def line_no(i, line, width=3):
line_number = str(i).ljust(width)
return "{}| {}".format(line_number, line)
Expand Down Expand Up @@ -48,10 +69,10 @@ def contextualized_yaml_error(raw_contents, error):


def safe_load(contents) -> Optional[Dict[str, Any]]:
return yaml.load(contents, Loader=SafeLoader)
return yaml.load(contents, Loader=UniqueKeyLoader)


def load_yaml_text(contents):
def load_yaml_text(contents, path=None):
try:
return safe_load(contents)
except (yaml.scanner.ScannerError, yaml.YAMLError) as e:
Expand All @@ -61,3 +82,7 @@ def load_yaml_text(contents):
error = str(e)

raise dbt.exceptions.ValidationException(error)
except dbt.exceptions.DuplicateYamlKeyException as e:
# TODO: We may want to raise an exception instead of a warning in the future.
e.msg = f"{e} {path.searched_path}/{path.relative_path}."
dbt.exceptions.warn_or_raise(e, log_fmt=warning_tag("{}"))
7 changes: 6 additions & 1 deletion core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,10 @@ def __init__(self, name: str):
super().__init__(name)


class DuplicateYamlKeyException(CompilationException):
pass


def raise_compiler_error(msg, node=None) -> NoReturn:
raise CompilationException(msg, node)

Expand Down Expand Up @@ -903,7 +907,8 @@ def raise_ambiguous_alias(node_1, node_2, duped_name=None):
def raise_ambiguous_catalog_match(unique_id, match_1, match_2):
def get_match_string(match):
return "{}.{}".format(
match.get("metadata", {}).get("schema"), match.get("metadata", {}).get("name")
match.get("metadata", {}).get("schema"),
match.get("metadata", {}).get("name"),
)

raise_compiler_error(
Expand Down
8 changes: 5 additions & 3 deletions core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def yaml_from_file(source_file: SchemaSourceFile) -> Dict[str, Any]:
"""If loading the yaml fails, raise an exception."""
path = source_file.path.relative_path
try:
return load_yaml_text(source_file.contents)
return load_yaml_text(source_file.contents, source_file.path)
except ValidationException as e:
reason = validator_error_message(e)
raise ParsingException(
Expand Down Expand Up @@ -548,7 +548,8 @@ def parse_file(self, block: FileBlock, dct: Dict = None) -> None:
def check_format_version(file_path, yaml_dct) -> None:
if "version" not in yaml_dct:
raise_invalid_property_yml_version(
file_path, "the yml property file {} is missing a version tag".format(file_path)
file_path,
"the yml property file {} is missing a version tag".format(file_path),
)

version = yaml_dct["version"]
Expand All @@ -562,7 +563,8 @@ def check_format_version(file_path, yaml_dct) -> None:
)
if version != 2:
raise_invalid_property_yml_version(
file_path, "its 'version:' tag is set to {}. Only 2 is supported".format(version)
file_path,
"its 'version:' tag is set to {}. Only 2 is supported".format(version),
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ sources:
schema: invalid_schema
tables:
- name: my_table
sources:
- name: seed_source
schema: "{{ var('schema_override', target.schema) }}"
tables:
Expand Down
33 changes: 33 additions & 0 deletions tests/functional/duplications/test_basic_duplications.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pytest

from dbt.tests.util import run_dbt_and_capture, run_dbt
from dbt.exceptions import DuplicateYamlKeyException

duplicate_key_schema__schema_yml = """
version: 2
models:
- name: my_model
models:
- name: my_model
"""

my_model_sql = """
select 1 as fun
"""


class TestBasicDuplications:
@pytest.fixture(scope="class")
def models(self):
return {
"my_model.sql": my_model_sql,
"schema.yml": duplicate_key_schema__schema_yml,
}

def test_warning_in_stdout(self, project):
results, stdout = run_dbt_and_capture(["run"])
assert "Duplicate 'models' key found in yaml file models/schema.yml" in stdout

def test_exception_is_raised_with_warn_error_flag(self, project):
with pytest.raises(DuplicateYamlKeyException):
run_dbt(["--warn-error", "run"])

0 comments on commit 3107df1

Please sign in to comment.