diff --git a/.changes/unreleased/Fixes-20220428-100157.yaml b/.changes/unreleased/Fixes-20220428-100157.yaml new file mode 100644 index 00000000000..03d6b4a52ea --- /dev/null +++ b/.changes/unreleased/Fixes-20220428-100157.yaml @@ -0,0 +1,7 @@ +kind: Fixes +body: Add warning if yaml contains duplicate keys +time: 2022-04-28T10:01:57.893956+12:00 +custom: + Author: jeremyyeo + Issue: "5114" + PR: "5146" diff --git a/core/dbt/clients/yaml_helper.py b/core/dbt/clients/yaml_helper.py index a2f6c54f75d..1afd8388509 100644 --- a/core/dbt/clients/yaml_helper.py +++ b/core/dbt/clients/yaml_helper.py @@ -8,6 +8,7 @@ except ImportError: from yaml import Loader, SafeLoader, Dumper # type: ignore # noqa: F401 +from dbt.ui import warning_tag YAML_ERROR_MESSAGE = """ Syntax error near line {line_number} @@ -20,6 +21,26 @@ """.strip() +class UniqueKeyLoader(SafeLoader): + """A subclass that checks for unique yaml mapping nodes. + + This class extends `SafeLoader` from the `yaml` library to check for + unique top level keys (mapping nodes). See issue (https://github.com/yaml/pyyaml/issues/165) + and solution (https://gist.github.com/pypt/94d747fe5180851196eb?permalink_comment_id=4015118). + """ + + def construct_mapping(self, node, deep=False): + mapping = set() + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + if key in mapping: + raise dbt.exceptions.DuplicateYamlKeyException( + f"Duplicate {key!r} key found in yaml file" + ) + mapping.add(key) + return super().construct_mapping(node, deep) + + def line_no(i, line, width=3): line_number = str(i).ljust(width) return "{}| {}".format(line_number, line) @@ -48,10 +69,10 @@ def contextualized_yaml_error(raw_contents, error): def safe_load(contents) -> Optional[Dict[str, Any]]: - return yaml.load(contents, Loader=SafeLoader) + return yaml.load(contents, Loader=UniqueKeyLoader) -def load_yaml_text(contents): +def load_yaml_text(contents, path=None): try: return safe_load(contents) except (yaml.scanner.ScannerError, yaml.YAMLError) as e: @@ -61,3 +82,7 @@ def load_yaml_text(contents): error = str(e) raise dbt.exceptions.ValidationException(error) + except dbt.exceptions.DuplicateYamlKeyException as e: + # TODO: We may want to raise an exception instead of a warning in the future. + e.msg = f"{e} {path.searched_path}/{path.relative_path}." + dbt.exceptions.warn_or_raise(e, log_fmt=warning_tag("{}")) diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 1293dce1090..88d92919933 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -437,6 +437,10 @@ def __init__(self, name: str): super().__init__(name) +class DuplicateYamlKeyException(CompilationException): + pass + + def raise_compiler_error(msg, node=None) -> NoReturn: raise CompilationException(msg, node) @@ -903,7 +907,8 @@ def raise_ambiguous_alias(node_1, node_2, duped_name=None): def raise_ambiguous_catalog_match(unique_id, match_1, match_2): def get_match_string(match): return "{}.{}".format( - match.get("metadata", {}).get("schema"), match.get("metadata", {}).get("name") + match.get("metadata", {}).get("schema"), + match.get("metadata", {}).get("name"), ) raise_compiler_error( diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 18d2eb96b46..a527181ec86 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -115,7 +115,7 @@ def yaml_from_file(source_file: SchemaSourceFile) -> Dict[str, Any]: """If loading the yaml fails, raise an exception.""" path = source_file.path.relative_path try: - return load_yaml_text(source_file.contents) + return load_yaml_text(source_file.contents, source_file.path) except ValidationException as e: reason = validator_error_message(e) raise ParsingException( @@ -548,7 +548,8 @@ def parse_file(self, block: FileBlock, dct: Dict = None) -> None: def check_format_version(file_path, yaml_dct) -> None: if "version" not in yaml_dct: raise_invalid_property_yml_version( - file_path, "the yml property file {} is missing a version tag".format(file_path) + file_path, + "the yml property file {} is missing a version tag".format(file_path), ) version = yaml_dct["version"] @@ -562,7 +563,8 @@ def check_format_version(file_path, yaml_dct) -> None: ) if version != 2: raise_invalid_property_yml_version( - file_path, "its 'version:' tag is set to {}. Only 2 is supported".format(version) + file_path, + "its 'version:' tag is set to {}. Only 2 is supported".format(version), ) diff --git a/test/integration/006_simple_dependency_tests/local_dependency/models/schema.yml b/test/integration/006_simple_dependency_tests/local_dependency/models/schema.yml index 0f942e08325..32655f6067e 100644 --- a/test/integration/006_simple_dependency_tests/local_dependency/models/schema.yml +++ b/test/integration/006_simple_dependency_tests/local_dependency/models/schema.yml @@ -4,7 +4,6 @@ sources: schema: invalid_schema tables: - name: my_table -sources: - name: seed_source schema: "{{ var('schema_override', target.schema) }}" tables: diff --git a/tests/functional/duplications/test_basic_duplications.py b/tests/functional/duplications/test_basic_duplications.py new file mode 100644 index 00000000000..61e3968096f --- /dev/null +++ b/tests/functional/duplications/test_basic_duplications.py @@ -0,0 +1,33 @@ +import pytest + +from dbt.tests.util import run_dbt_and_capture, run_dbt +from dbt.exceptions import DuplicateYamlKeyException + +duplicate_key_schema__schema_yml = """ +version: 2 +models: + - name: my_model +models: + - name: my_model +""" + +my_model_sql = """ + select 1 as fun +""" + + +class TestBasicDuplications: + @pytest.fixture(scope="class") + def models(self): + return { + "my_model.sql": my_model_sql, + "schema.yml": duplicate_key_schema__schema_yml, + } + + def test_warning_in_stdout(self, project): + results, stdout = run_dbt_and_capture(["run"]) + assert "Duplicate 'models' key found in yaml file models/schema.yml" in stdout + + def test_exception_is_raised_with_warn_error_flag(self, project): + with pytest.raises(DuplicateYamlKeyException): + run_dbt(["--warn-error", "run"])