Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: add warning on duplicated yaml keys #5146

Merged
merged 4 commits into from
Apr 28, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions core/dbt/clients/yaml_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
except ImportError:
from yaml import Loader, SafeLoader, Dumper # type: ignore # noqa: F401

from dbt.ui import warning_tag

YAML_ERROR_MESSAGE = """
Syntax error near line {line_number}
Expand All @@ -20,6 +21,19 @@
""".strip()


class UniqueKeyLoader(SafeLoader):
jeremyyeo marked this conversation as resolved.
Show resolved Hide resolved
def construct_mapping(self, node, deep=False):
mapping = set()
for key_node, value_node in node.value:
key = self.construct_object(key_node, deep=deep)
if key in mapping:
raise dbt.exceptions.DuplicateYamlKeyException(
f"Duplicate {key!r} key found in yaml file"
)
mapping.add(key)
return super().construct_mapping(node, deep)


def line_no(i, line, width=3):
line_number = str(i).ljust(width)
return "{}| {}".format(line_number, line)
Expand Down Expand Up @@ -48,10 +62,10 @@ def contextualized_yaml_error(raw_contents, error):


def safe_load(contents) -> Optional[Dict[str, Any]]:
return yaml.load(contents, Loader=SafeLoader)
return yaml.load(contents, Loader=UniqueKeyLoader)


def load_yaml_text(contents):
def load_yaml_text(contents, path=None):
try:
return safe_load(contents)
except (yaml.scanner.ScannerError, yaml.YAMLError) as e:
Expand All @@ -61,3 +75,7 @@ def load_yaml_text(contents):
error = str(e)

raise dbt.exceptions.ValidationException(error)
except dbt.exceptions.DuplicateYamlKeyException as e:
# TODO: We may want to raise an exception instead of a warning in the future.
msg = f"{e} {path.searched_path}/{path.relative_path}."
dbt.exceptions.warn_or_error(msg, log_fmt=warning_tag("{}"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should use warn_or_raise here instead of warn_or_error. That way the exception you throw above bubbles up instead of getting overlaid by the CompilationException by warn_or_error. To get the message passed through as DuplicateYamlKeyException you can modify the message.

e.msg = f"{e} {path.searched_path}/{path.relative_path}."
dbt.exceptions.warn_or_raise(e, log_fmt=warning_tag("{}"))

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the new changes I see:

$ dbt run                                 
10:59:42  Running with dbt=1.2.0-a1
10:59:42  [WARNING]: Compilation Error
  Compilation Error
    Duplicate 'models' key found in yaml file models/schema.yml.
10:59:43  Found 2 models, 0 tests, 0 snapshots, 0 analyses, 167 macros, 0 operations, 0 seed files, 0 sources, 0 exposures, 0 metrics
10:59:43  
10:59:43  Concurrency: 1 threads (target='dev')
...

Wondering if that was what you expected here?

7 changes: 6 additions & 1 deletion core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,10 @@ def __init__(self, name: str):
super().__init__(name)


class DuplicateYamlKeyException(CompilationException):
pass


def raise_compiler_error(msg, node=None) -> NoReturn:
raise CompilationException(msg, node)

Expand Down Expand Up @@ -903,7 +907,8 @@ def raise_ambiguous_alias(node_1, node_2, duped_name=None):
def raise_ambiguous_catalog_match(unique_id, match_1, match_2):
def get_match_string(match):
return "{}.{}".format(
match.get("metadata", {}).get("schema"), match.get("metadata", {}).get("name")
match.get("metadata", {}).get("schema"),
match.get("metadata", {}).get("name"),
)

raise_compiler_error(
Expand Down
8 changes: 5 additions & 3 deletions core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def yaml_from_file(source_file: SchemaSourceFile) -> Dict[str, Any]:
"""If loading the yaml fails, raise an exception."""
path = source_file.path.relative_path
try:
return load_yaml_text(source_file.contents)
return load_yaml_text(source_file.contents, source_file.path)
except ValidationException as e:
reason = validator_error_message(e)
raise ParsingException(
Expand Down Expand Up @@ -548,7 +548,8 @@ def parse_file(self, block: FileBlock, dct: Dict = None) -> None:
def check_format_version(file_path, yaml_dct) -> None:
if "version" not in yaml_dct:
raise_invalid_property_yml_version(
file_path, "the yml property file {} is missing a version tag".format(file_path)
file_path,
"the yml property file {} is missing a version tag".format(file_path),
)

version = yaml_dct["version"]
Expand All @@ -562,7 +563,8 @@ def check_format_version(file_path, yaml_dct) -> None:
)
if version != 2:
raise_invalid_property_yml_version(
file_path, "its 'version:' tag is set to {}. Only 2 is supported".format(version)
file_path,
"its 'version:' tag is set to {}. Only 2 is supported".format(version),
)


Expand Down
27 changes: 27 additions & 0 deletions tests/functional/duplications/test_basic_duplications.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pytest
from dbt.tests.util import run_dbt_and_capture

duplicate_key_schema__schema_yml = """
version: 2
models:
- name: my_model
models:
- name: my_model
"""

my_model_sql = """
select 1 as fun
"""


@pytest.fixture(scope="class")
def models():
return {
"my_model.sql": my_model_sql,
"schema.yml": duplicate_key_schema__schema_yml,
}


def test_duplicate_key_in_yaml(project):
results, stdout = run_dbt_and_capture(["run"])
assert "Duplicate 'models' key found in yaml file models/schema.yml" in stdout
emmyoop marked this conversation as resolved.
Show resolved Hide resolved