Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Consistent model selection in parsing #141

Merged
merged 5 commits into from
Aug 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions dbtmetabase/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,8 +724,7 @@ def exposures(
)

# Load models
dbt_models, aliases = dbt.read_models()
del aliases # Unused in this particular function
dbt_models, _ = dbt.read_models()

# Instantiate Metabase interface
metabase = MetabaseInterface(
Expand Down
33 changes: 20 additions & 13 deletions dbtmetabase/metabase.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,30 @@
import re
import json
import requests
import time
import yaml
import os

import re
import time
from typing import (
Sequence,
Optional,
Tuple,
Iterable,
MutableMapping,
Union,
List,
Mapping,
MutableMapping,
Optional,
Sequence,
Tuple,
Union,
)

from dbtmetabase.models import exceptions
import requests
import yaml

from .logger.logging import logger
from .models.metabase import MetabaseModel, MetabaseColumn, ModelType, NullValue
from .models import exceptions
from .models.metabase import (
METABASE_MODEL_DEFAULT_SCHEMA,
MetabaseColumn,
MetabaseModel,
ModelType,
NullValue,
)


class MetabaseClient:
Expand Down Expand Up @@ -458,7 +463,9 @@ def build_metadata_lookups(
table_schema = table.get("schema")
# table["schema"] is null for bigquery datasets
bigquery_schema = metadata.get("details", {}).get("dataset-id")
table_schema = (table_schema or bigquery_schema or "public").upper()
table_schema = (
table_schema or bigquery_schema or METABASE_MODEL_DEFAULT_SCHEMA
).upper()
table_name = table["name"].upper()

if schemas_to_exclude:
Expand Down
6 changes: 4 additions & 2 deletions dbtmetabase/models/metabase.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from dataclasses import dataclass, field
from enum import Enum

from typing import Sequence, Optional, MutableMapping
from typing import MutableMapping, Optional, Sequence

# Allowed metabase.* fields
# Must be covered by MetabaseModel attributes
Expand All @@ -16,6 +15,9 @@
"semantic_type",
]

# Default model schema (only schema in BigQuery)
METABASE_MODEL_DEFAULT_SCHEMA = "PUBLIC"


class ModelType(str, Enum):
nodes = "nodes"
Expand Down
26 changes: 19 additions & 7 deletions dbtmetabase/parsers/dbt.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from abc import ABCMeta, abstractmethod
from os.path import expanduser
from typing import Optional, Mapping, MutableMapping, Iterable, Tuple, List
from typing import Iterable, List, Mapping, MutableMapping, Optional, Tuple

from ..logger.logging import logger
from ..models.metabase import MetabaseModel, MetabaseColumn, NullValue
from ..models.metabase import MetabaseColumn, MetabaseModel, NullValue


class DbtReader(metaclass=ABCMeta):
Expand Down Expand Up @@ -31,11 +31,11 @@ def __init__(
"""

self.path = expanduser(path)
self.database = database
self.schema = schema
self.schema_excludes = schema_excludes
self.includes = includes
self.excludes = excludes
self.database = database.upper() if database else None
self.schema = schema.upper() if schema else None
self.schema_excludes = [x.upper() for x in schema_excludes or []]
self.includes = [x.upper() for x in includes or []]
self.excludes = [x.upper() for x in excludes or []]
self.alias_mapping: MutableMapping = {}

@abstractmethod
Expand All @@ -46,6 +46,18 @@ def read_models(
) -> Tuple[List[MetabaseModel], MutableMapping]:
pass

def model_selected(self, name: str) -> bool:
"""Checks whether model passes inclusion/exclusion criteria.

Args:
name (str): Model name.

Returns:
bool: True if included, false otherwise.
"""
n = name.upper()
return n not in self.excludes and (not self.includes or n in self.includes)

def set_column_foreign_key(
self,
column: Mapping,
Expand Down
116 changes: 63 additions & 53 deletions dbtmetabase/parsers/dbt_folder.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import re
import yaml
from pathlib import Path
from typing import List, Mapping, MutableMapping, Optional, Tuple

import yaml

from ..logger.logging import logger
from ..models.metabase import (
MetabaseModel,
METABASE_COLUMN_META_FIELDS,
METABASE_MODEL_DEFAULT_SCHEMA,
METABASE_MODEL_META_FIELDS,
MetabaseColumn,
MetabaseModel,
ModelType,
METABASE_MODEL_META_FIELDS,
METABASE_COLUMN_META_FIELDS,
)
from .dbt import DbtReader

Expand All @@ -34,78 +36,86 @@ def read_models(
list -- List of dbt models in Metabase-friendly format.
"""

database = self.database
schema = self.schema
schema_excludes = self.schema_excludes
includes = self.includes
excludes = self.excludes

if schema_excludes is None:
schema_excludes = []
if includes is None:
includes = []
if excludes is None:
excludes = []
if schema is None:
schema = "public"

# Args that allow API interface for both readers to be interchangeable while passing CI
del database, docs_url

mb_models: List[MetabaseModel] = []

schema = self.schema or METABASE_MODEL_DEFAULT_SCHEMA

for path in (Path(self.path) / "models").rglob("*.yml"):
with open(path, "r", encoding="utf-8") as stream:
schema_file = yaml.safe_load(stream)
if schema_file is None:
if not schema_file:
logger().warning("Skipping empty or invalid YAML: %s", path)
continue

for model in schema_file.get("models", []):
name = model.get("alias", model["name"])
model_name = model.get("alias", model["name"]).upper()

# Refs will still use file name -- this alias mapping is good for getting the right name in the database
if "alias" in model:
self.alias_mapping[name] = model["name"]
self.alias_mapping[model_name] = model["name"].upper()

logger().info("\nProcessing model: %s", path)
if (not includes or name in includes) and (name not in excludes):
mb_models.append(
self._read_model(
model=model,
schema=schema.upper(),
model_type=ModelType.nodes,
include_tags=include_tags,
)

if not self.model_selected(model_name):
logger().debug(
"Skipping %s not included in includes or excluded by excludes",
model_name,
)
logger().debug(mb_models[-1].ref)
continue

mb_models.append(
self._read_model(
model=model,
schema=schema,
model_type=ModelType.nodes,
include_tags=include_tags,
)
)

for source in schema_file.get("sources", []):
source_schema_name = source.get("schema", source["name"])
source_schema_name = source.get("schema", source["name"]).upper()

if "{{" in source_schema_name and "}}" in source_schema_name:
logger().warning(
"dbt Folder Reader cannot resolve jinja expressions- use the Manifest Reader instead."
"dbt folder reader cannot resolve Jinja expressions, defaulting to current schema"
)
source_schema_name = schema
if source_schema_name.upper() != schema.upper():

elif source_schema_name != schema:
logger().debug(
"Skipping schema %s not in target schema %s",
source_schema_name,
schema,
)
continue

for model in source.get("tables", []):
name = model.get("identifier", model["name"])
model_name = model.get("identifier", model["name"]).upper()

# These will be used to resolve our regex parsed source() references
if "identifier" in model:
self.alias_mapping[name] = model["name"]
self.alias_mapping[model_name] = model["name"].upper()

logger().info(
"\nProcessing source: %s -- table: %s", path, name
"\nProcessing source: %s -- table: %s", path, model_name
)
if (not includes or name in includes) and (
name not in excludes
):
mb_models.append(
self._read_model(
model=model,
source=source["name"],
model_type=ModelType.sources,
schema=source_schema_name.upper(),
include_tags=include_tags,
)

if not self.model_selected(model_name):
logger().debug(
"Skipping %s not included in includes or excluded by excludes",
model_name,
)
continue

mb_models.append(
self._read_model(
model=model,
source=source["name"],
model_type=ModelType.sources,
schema=source_schema_name,
include_tags=include_tags,
)
logger().debug(mb_models[-1].ref)
)

return mb_models, self.alias_mapping

Expand Down Expand Up @@ -168,7 +178,7 @@ def _read_column(self, column: Mapping, schema: str) -> MetabaseColumn:

Arguments:
column {dict} -- One dbt column to read.
schema {str} -- Schema as passed doen from CLI args or parsed from `source`
schema {str} -- Schema as passed down from CLI args or parsed from `source`

Returns:
dict -- One dbt column in Metabase-friendly format.
Expand Down
Loading