Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add name validation for metrics #5841

Merged
merged 4 commits into from
Sep 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changes/unreleased/Under the Hood-20220914-132632.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Under the Hood
body: Add name validation for metrics
time: 2022-09-14T13:26:32.387524-05:00
custom:
Author: emmyoop
Issue: "5456"
PR: "5841"
20 changes: 18 additions & 2 deletions core/dbt/contracts/graph/unparsed.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from dbt.node_types import NodeType
from dbt.contracts.util import AdditionalPropertiesMixin, Mergeable, Replaceable

Expand Down Expand Up @@ -484,8 +485,23 @@ class UnparsedMetric(dbtClassMixin, Replaceable):
@classmethod
def validate(cls, data):
super(UnparsedMetric, cls).validate(data)
if "name" in data and " " in data["name"]:
raise ParsingException(f"Metrics name '{data['name']}' cannot contain spaces")
if "name" in data:
errors = []
if " " in data["name"]:
errors.append("cannot contain spaces")
# This handles failing queries due to too long metric names.
# It only occurs in BigQuery and Snowflake (Postgres/Redshift truncate)
if len(data["name"]) > 250:
errors.append("cannot contain more than 250 characters")
if not (re.match(r"^[A-Za-z]", data["name"])):
errors.append("must begin with a letter")
if not (re.match(r"[\w-]+$", data["name"])):
errors.append("must contain only letters, numbers and underscores")

if errors:
raise ParsingException(
f"The metric name '{data['name']}' is invalid. It {', '.join(e for e in errors)}"
)

if data.get("calculation_method") == "expression":
raise ValidationError(
Expand Down
8 changes: 8 additions & 0 deletions tests/functional/metrics/fixture_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,11 @@
2021-02-19 04:22:41,jcb,2834.96
2021-02-19 13:28:50,china-unionpay,2440.98
""".strip()

models__people_sql = """
select 1 as id, 'Drew' as first_name, 'Banin' as last_name, 'yellow' as favorite_color, true as loves_dbt, 5 as tenure, current_timestamp as created_at
union all
select 1 as id, 'Jeremy' as first_name, 'Cohen' as last_name, 'indigo' as favorite_color, true as loves_dbt, 4 as tenure, current_timestamp as created_at
union all
select 1 as id, 'Callum' as first_name, 'McCann' as last_name, 'emerald' as favorite_color, true as loves_dbt, 0 as tenure, current_timestamp as created_at
"""
11 changes: 3 additions & 8 deletions tests/functional/metrics/test_metric_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
from dbt.tests.util import run_dbt, update_config_file, get_manifest


from tests.functional.metrics.fixture_metrics import models__people_sql


class MetricConfigTests:
@pytest.fixture(scope="class", autouse=True)
def setUp(self):
Expand Down Expand Up @@ -46,14 +49,6 @@ def setUp(self):

"""

models__people_sql = """
select 1 as id, 'Drew' as first_name, 'Banin' as last_name, 'yellow' as favorite_color, true as loves_dbt, 5 as tenure, current_timestamp as created_at
union all
select 1 as id, 'Jeremy' as first_name, 'Cohen' as last_name, 'indigo' as favorite_color, true as loves_dbt, 4 as tenure, current_timestamp as created_at
union all
select 1 as id, 'Callum' as first_name, 'McCann' as last_name, 'emerald' as favorite_color, true as loves_dbt, 0 as tenure, current_timestamp as created_at
"""


# Test enabled config in dbt_project.yml
class TestMetricEnabledConfigProjectLevel(MetricConfigTests):
Expand Down
10 changes: 2 additions & 8 deletions tests/functional/metrics/test_metric_helper_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from dbt.tests.util import run_dbt, get_manifest
from dbt.contracts.graph.metrics import ResolvedMetricReference

from tests.functional.metrics.fixture_metrics import models__people_sql

metrics__yml = """
version: 2

Expand Down Expand Up @@ -52,14 +54,6 @@
time_grains: [day, week, month]
"""

models__people_sql = """
select 1 as id, 'Drew' as first_name, 'Banin' as last_name, 'yellow' as favorite_color, true as loves_dbt, 5 as tenure, current_timestamp as created_at
union all
select 1 as id, 'Jeremy' as first_name, 'Cohen' as last_name, 'indigo' as favorite_color, true as loves_dbt, 4 as tenure, current_timestamp as created_at
union all
select 1 as id, 'Callum' as first_name, 'McCann' as last_name, 'emerald' as favorite_color, true as loves_dbt, 0 as tenure, current_timestamp as created_at
"""


class TestMetricHelperFunctions:
@pytest.fixture(scope="class")
Expand Down
134 changes: 111 additions & 23 deletions tests/functional/metrics/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from dbt.tests.util import run_dbt, get_manifest
from dbt.exceptions import ParsingException

from tests.functional.metrics.fixture_metrics import mock_purchase_data_csv
from tests.functional.metrics.fixture_metrics import mock_purchase_data_csv, models__people_sql


models__people_metrics_yml = """
Expand Down Expand Up @@ -56,14 +56,6 @@

"""

models__people_sql = """
select 1 as id, 'Drew' as first_name, 'Banin' as last_name, 'yellow' as favorite_color, true as loves_dbt, 5 as tenure, current_timestamp as created_at
union all
select 1 as id, 'Jeremy' as first_name, 'Cohen' as last_name, 'indigo' as favorite_color, true as loves_dbt, 4 as tenure, current_timestamp as created_at
union all
select 1 as id, 'Callum' as first_name, 'McCann' as last_name, 'emerald' as favorite_color, true as loves_dbt, 0 as tenure, current_timestamp as created_at
"""


class TestSimpleMetrics:
@pytest.fixture(scope="class")
Expand Down Expand Up @@ -215,19 +207,6 @@ def test_simple_metric(
meta:
my_meta: 'testing'

- name: collective tenure
label: "Collective tenure"
description: Total number of years of team experience
model: "ref('people')"
calculation_method: sum
expression: tenure
timestamp: created_at
time_grains: [day]
filters:
- field: loves_dbt
operator: 'is'
value: 'true'

"""


Expand All @@ -240,8 +219,117 @@ def models(self):
}

def test_names_with_spaces(self, project):
with pytest.raises(ParsingException):
with pytest.raises(ParsingException) as exc:
run_dbt(["run"])
assert "cannot contain spaces" in str(exc.value)


names_with_special_chars_metrics_yml = """
version: 2

metrics:

- name: number_of_people!
label: "Number of people"
description: Total count of people
model: "ref('people')"
calculation_method: count
expression: "*"
timestamp: created_at
time_grains: [day, week, month]
dimensions:
- favorite_color
- loves_dbt
meta:
my_meta: 'testing'

"""


class TestNamesWithSpecialChar:
@pytest.fixture(scope="class")
def models(self):
return {
"people_metrics.yml": names_with_special_chars_metrics_yml,
"people.sql": models__people_sql,
}

def test_names_with_special_char(self, project):
with pytest.raises(ParsingException) as exc:
run_dbt(["run"])
assert "must contain only letters, numbers and underscores" in str(exc.value)


names_with_leading_numeric_metrics_yml = """
version: 2

metrics:

- name: 1_number_of_people
label: "Number of people"
description: Total count of people
model: "ref('people')"
calculation_method: count
expression: "*"
timestamp: created_at
time_grains: [day, week, month]
dimensions:
- favorite_color
- loves_dbt
meta:
my_meta: 'testing'

"""


class TestNamesWithLeandingNumber:
@pytest.fixture(scope="class")
def models(self):
return {
"people_metrics.yml": names_with_leading_numeric_metrics_yml,
"people.sql": models__people_sql,
}

def test_names_with_leading_number(self, project):
with pytest.raises(ParsingException) as exc:
run_dbt(["run"])
assert "must begin with a letter" in str(exc.value)


long_name_metrics_yml = """
version: 2

metrics:

- name: this_name_is_going_to_contain_more_than_250_characters_but_be_otherwise_acceptable_and_then_will_throw_an_error_which_I_expect_to_happen_and_repeat_this_name_is_going_to_contain_more_than_250_characters_but_be_otherwise_acceptable_and_then_will_throw_an_error_which_I_expect_to_happen
label: "Number of people"
description: Total count of people
model: "ref('people')"
calculation_method: count
expression: "*"
timestamp: created_at
time_grains: [day, week, month]
dimensions:
- favorite_color
- loves_dbt
meta:
my_meta: 'testing'

"""


class TestLongName:
@pytest.fixture(scope="class")
def models(self):
return {
"people_metrics.yml": long_name_metrics_yml,
"people.sql": models__people_sql,
}

def test_long_name(self, project):
with pytest.raises(ParsingException) as exc:
run_dbt(["run"])
assert "cannot contain more than 250 characters" in str(exc.value)


downstream_model_sql = """
Expand Down