From 9e11a00dae486f7ba8f937b2cb8f917e51f1a055 Mon Sep 17 00:00:00 2001 From: Mike Gouline <1960272+gouline@users.noreply.github.com> Date: Tue, 23 Aug 2022 21:48:12 +1000 Subject: [PATCH] Folder parser meta fields and docs --- README.rst | 33 +++++-------- dbtmetabase/models/metabase.py | 16 ++++--- dbtmetabase/parsers/dbt.py | 54 +++++++++++++++++++-- dbtmetabase/parsers/dbt_folder.py | 73 +++++++++++------------------ dbtmetabase/parsers/dbt_manifest.py | 36 +++++++------- 5 files changed, 118 insertions(+), 94 deletions(-) diff --git a/README.rst b/README.rst index c914418..24433a4 100644 --- a/README.rst +++ b/README.rst @@ -185,10 +185,9 @@ There are two approaches provided by this library to read your dbt project: 1. Artifacts ^^^^^^^^^^^^ -The recommended approach is to instruct dbt-metabase to read your ``manifest.json``, a -`dbt artifact`_ containing the full representation of your dbt project's resources. If -your dbt project uses multiple schemas, multiple databases or model aliases, you must use -this approach. +You can instruct dbt-metabase to read your ``manifest.json``, a `dbt artifact`_ containing +the full representation of your dbt project's resources. If your dbt project uses multiple schemas, +multiple databases or model aliases, you must use this approach. Note that you you have to run ``dbt compile --target prod`` or any of the other dbt commands listed in the dbt documentation above to get a fresh copy of your ``manifest.json``. Remember @@ -203,10 +202,9 @@ project). 2. Direct parsing ^^^^^^^^^^^^^^^^^ -The second alternative is to provide the path to your dbt project root folder -using the argument ``--dbt_path``. dbt-metabase will then look for all .yml files -and parse your documentation and tests directly from there. It will not support -dbt projects with custom schemas. +Alternatively, you can provide the path to your dbt project root folder using the argument +``--dbt_path``. dbt-metabase will then look for all .yml files and parse your documentation +and tests directly from there. It does not support dbt projects with custom schemas. Semantic Types -------------- @@ -254,15 +252,10 @@ See `documentation`_ for a more complete list. Foreign Keys ------------ -By default, dbt-metabase parses the relationship tests to figure out PK-FK -relationships between two tables. Alternatively, you can also use the meta -fields ``fk_target_table`` and ``fk_target_field`` to set the relationships -just like semantic types. You can set the ``semantic_type`` as ``type/FK`` -without setting those two fields, but you cannot set those two fields -without the ``semantic_type`` set to ``type/FK``. If both, meta fields -and relationship test, are set for a field, meta fields take precedence. - -Here is an example of how you could to this: +Built-in relationship tests are the recommended way of defining foreign keys, +however you can alternatively use ``fk_target_table`` and ``fk_target_field`` +meta fields (``semantic_type`` is optional and will be inferred). If both are +set for a column, meta fields take precedence. .. code-block:: yaml @@ -273,9 +266,9 @@ Here is an example of how you could to this: metabase.fk_target_table: analytics_dims.dim_countries metabase.fk_target_field: id -Importantly, the ``fk_target_table`` needs to be in the format -``schema_name.table_name``. If the model has an alias, use the alias, not -the original model name here. +You can provide ``fk_target_table`` in the format ``schema_name.table_name`` or +just ``table_name`` to use the current schema. If your model has an alias, provide +that alias (rather than the original name). Visibility Types ---------------- diff --git a/dbtmetabase/models/metabase.py b/dbtmetabase/models/metabase.py index d05d424..06c08ed 100644 --- a/dbtmetabase/models/metabase.py +++ b/dbtmetabase/models/metabase.py @@ -4,14 +4,16 @@ from typing import Sequence, Optional, MutableMapping # Allowed metabase.* fields -# Should be covered by attributes in the MetabaseColumn class -METABASE_META_FIELDS = [ - "special_type", - "semantic_type", - "visibility_type", +# Must be covered by MetabaseModel attributes +METABASE_MODEL_META_FIELDS = [ "display_name", - "fk_target_table", - "fk_target_field", + "visibility_type", + "points_of_interest", + "caveats", +] +# Must be covered by MetabaseColumn attributes +METABASE_COLUMN_META_FIELDS = METABASE_MODEL_META_FIELDS + [ + "semantic_type", ] diff --git a/dbtmetabase/parsers/dbt.py b/dbtmetabase/parsers/dbt.py index 42cd902..2530e55 100644 --- a/dbtmetabase/parsers/dbt.py +++ b/dbtmetabase/parsers/dbt.py @@ -2,7 +2,8 @@ from os.path import expanduser from typing import Optional, Mapping, MutableMapping, Iterable, Tuple, List -from ..models.metabase import METABASE_META_FIELDS, MetabaseModel, NullValue +from ..logger.logging import logger +from ..models.metabase import MetabaseModel, MetabaseColumn, NullValue class DbtReader(metaclass=ABCMeta): @@ -45,12 +46,59 @@ def read_models( ) -> Tuple[List[MetabaseModel], MutableMapping]: pass + def set_column_foreign_key( + self, + column: Mapping, + metabase_column: MetabaseColumn, + table: Optional[str], + field: Optional[str], + schema: Optional[str], + ): + """Sets foreign key target on a column. + + Args: + column (Mapping): Schema column definition. + metabase_column (MetabaseColumn): Metabase column definition. + table (str): Foreign key target table. + field (str): Foreign key target field. + schema (str): Current schema name. + """ + # Meta fields take precedence + meta = column.get("meta", {}) + table = meta.get("metabase.fk_target_table", table) + field = meta.get("metabase.fk_target_field", field) + + if not table or not field: + if table or field: + logger().warning( + "Foreign key requires table and field for column %s", + metabase_column.name, + ) + return + + table_path = table.split(".") + if len(table_path) == 1 and schema: + table_path.insert(0, schema) + + metabase_column.semantic_type = "type/FK" + metabase_column.fk_target_table = ".".join( + [x.strip('"').upper() for x in table_path] + ) + metabase_column.fk_target_field = field.strip('"').upper() + logger().debug( + "Relation from %s to %s.%s", + metabase_column.name, + metabase_column.fk_target_table, + metabase_column.fk_target_field, + ) + @staticmethod - def read_meta_fields(obj: Mapping) -> Mapping: + def read_meta_fields(obj: Mapping, fields: List) -> Mapping: """Reads meta fields from a schem object. Args: obj (Mapping): Schema object. + fields (List): List of fields to read. Returns: Mapping: Field values. @@ -58,7 +106,7 @@ def read_meta_fields(obj: Mapping) -> Mapping: vals = {} meta = obj.get("meta", []) - for field in METABASE_META_FIELDS: + for field in fields: if f"metabase.{field}" in meta: value = meta[f"metabase.{field}"] vals[field] = value if value is not None else NullValue diff --git a/dbtmetabase/parsers/dbt_folder.py b/dbtmetabase/parsers/dbt_folder.py index 1e9d3b1..310acc2 100644 --- a/dbtmetabase/parsers/dbt_folder.py +++ b/dbtmetabase/parsers/dbt_folder.py @@ -3,8 +3,14 @@ from pathlib import Path from typing import List, Mapping, MutableMapping, Optional, Tuple -from ..models.metabase import MetabaseModel, MetabaseColumn, ModelType from ..logger.logging import logger +from ..models.metabase import ( + MetabaseModel, + MetabaseColumn, + ModelType, + METABASE_MODEL_META_FIELDS, + METABASE_COLUMN_META_FIELDS, +) from .dbt import DbtReader @@ -130,10 +136,6 @@ def _read_model( metabase_columns.append(self._read_column(column, schema)) description = model.get("description", "") - meta = model.get("meta", {}) - points_of_interest = meta.get("metabase.points_of_interest") - caveats = meta.get("metabase.caveats") - if include_tags: tags = model.get("tags", []) if tags: @@ -144,7 +146,6 @@ def _read_model( # Resolved name is what the name will be in the database resolved_name = model.get("alias", model.get("identifier")) - display_name = meta.get("metabase.display_name") dbt_name = None if not resolved_name: resolved_name = model["name"] @@ -153,15 +154,13 @@ def _read_model( return MetabaseModel( name=resolved_name, - display_name=display_name, schema=schema, description=description, - points_of_interest=points_of_interest, - caveats=caveats, columns=metabase_columns, model_type=model_type, source=source, dbt_name=dbt_name, + **self.read_meta_fields(model, METABASE_MODEL_META_FIELDS), ) def _read_column(self, column: Mapping, schema: str) -> MetabaseColumn: @@ -178,51 +177,35 @@ def _read_column(self, column: Mapping, schema: str) -> MetabaseColumn: column_name = column.get("name", "").upper().strip('"') column_description = column.get("description") - meta = column.get("meta", {}) - display_name = meta.get("metabase.display_name") - - # Set explicitly (relationships override this) - fk_to = meta.get("metabase.foreign_key_to") - fk_field = meta.get("metabase.foreign_key_field") - metabase_column = MetabaseColumn( name=column_name, description=column_description, - display_name=display_name, + **self.read_meta_fields(column, METABASE_COLUMN_META_FIELDS), ) + fk_target_table = None + fk_target_field = None + for test in column.get("tests") or []: if isinstance(test, dict): if "relationships" in test: relationships = test["relationships"] - fk_to = relationships["to"] - fk_field = relationships["field"] - - if fk_to and fk_field: - fk_table = self.parse_ref(fk_to) - if fk_table: - metabase_column.semantic_type = "type/FK" - metabase_column.fk_target_table = f"{schema}.{fk_table}".upper() - metabase_column.fk_target_field = str(fk_field).upper().strip('"') - logger().debug( - "Relation from %s to %s.%s", - column.get("name", "").upper().strip('"'), - metabase_column.fk_target_table, - metabase_column.fk_target_field, - ) - else: - logger().warning( - "Could not resolve foreign key target table for column %s", - metabase_column.name, - ) - elif fk_to or fk_field: - logger().warning( - "Foreign key 'to' and 'field' must be provided for column %s", - metabase_column.name, - ) - - for field, value in DbtReader.read_meta_fields(column).items(): - setattr(metabase_column, field, value) + fk_target_table = self.parse_ref(relationships["to"]) + if not fk_target_table: + logger().warning( + "Could not resolve foreign key target table for column %s", + metabase_column.name, + ) + continue + fk_target_field = relationships["field"] + + self.set_column_foreign_key( + column=column, + metabase_column=metabase_column, + table=fk_target_table, + field=fk_target_field, + schema=schema, + ) return metabase_column diff --git a/dbtmetabase/parsers/dbt_manifest.py b/dbtmetabase/parsers/dbt_manifest.py index 7136dc6..f114dc2 100644 --- a/dbtmetabase/parsers/dbt_manifest.py +++ b/dbtmetabase/parsers/dbt_manifest.py @@ -1,8 +1,14 @@ import json from typing import List, Tuple, Mapping, Optional, MutableMapping -from ..models.metabase import MetabaseModel, MetabaseColumn, ModelType from ..logger.logging import logger +from ..models.metabase import ( + MetabaseModel, + MetabaseColumn, + ModelType, + METABASE_MODEL_META_FIELDS, + METABASE_COLUMN_META_FIELDS, +) from .dbt import DbtReader @@ -260,9 +266,6 @@ def _read_model( ) description = model.get("description", "") - meta = model.get("meta", {}) - points_of_interest = meta.get("metabase.points_of_interest") - caveats = meta.get("metabase.caveats") if include_tags: tags = model.get("tags", []) @@ -289,18 +292,16 @@ def _read_model( name=resolved_name, schema=model["schema"].upper(), description=description, - points_of_interest=points_of_interest, - caveats=caveats, columns=metabase_column, model_type=model_type, unique_id=unique_id, source=source, dbt_name=dbt_name, - **DbtReader.read_meta_fields(model), + **self.read_meta_fields(model, METABASE_MODEL_META_FIELDS), ) - @staticmethod def _read_column( + self, column: Mapping, relationship: Optional[Mapping], ) -> MetabaseColumn: @@ -319,18 +320,15 @@ def _read_column( metabase_column = MetabaseColumn( name=column_name, description=column_description, - **DbtReader.read_meta_fields(column), + **self.read_meta_fields(column, METABASE_COLUMN_META_FIELDS), ) - if relationship: - metabase_column.semantic_type = "type/FK" - metabase_column.fk_target_table = relationship["fk_target_table"].upper() - metabase_column.fk_target_field = relationship["fk_target_field"].upper() - logger().debug( - "Relation from %s to %s.%s", - column.get("name", "").upper().strip('"'), - metabase_column.fk_target_table, - metabase_column.fk_target_field, - ) + self.set_column_foreign_key( + column=column, + metabase_column=metabase_column, + table=relationship["fk_target_table"] if relationship else None, + field=relationship["fk_target_field"] if relationship else None, + schema=self.schema, + ) return metabase_column