diff --git a/metadata-ingestion/docs/sources/metabase/metabase.md b/metadata-ingestion/docs/sources/metabase/metabase.md index 68422b8decce9..cbabf65c2a574 100644 --- a/metadata-ingestion/docs/sources/metabase/metabase.md +++ b/metadata-ingestion/docs/sources/metabase/metabase.md @@ -17,6 +17,11 @@ the mapping between database id in Metabase and platform instance in DataHub may ``` The key in this map must be string, not integer although Metabase API provides `id` as number. If `database_id_to_instance_map` is not specified, `platform_instance_map` is used for platform instance mapping. If none of the above are specified, platform instance is not used when constructing `urn` when searching for dataset relations. + +If needed it is possible to exclude collections from other users by setting the following configuration: +```yaml +exclude_other_user_collections: true +``` ## Compatibility Metabase version [v0.48.3](https://www.metabase.com/start/oss/) diff --git a/metadata-ingestion/docs/sources/metabase/metabase.yml b/metadata-ingestion/docs/sources/metabase/metabase.yml index cc2aed9f8bce0..dd3168ff3ff51 100644 --- a/metadata-ingestion/docs/sources/metabase/metabase.yml +++ b/metadata-ingestion/docs/sources/metabase/metabase.yml @@ -12,6 +12,7 @@ source: default_schema: public database_alias_map: h2: sample-dataset.db + exclude_other_user_collections: true # Optional mapping of platform types to instance ids platform_instance_map: # optional postgres: test_postgres # optional diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py index 7ba159a86da0d..fc60dc6406730 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -1,3 +1,4 @@ +import json import logging from datetime import datetime, timezone from functools import lru_cache @@ -79,6 +80,10 @@ class MetabaseConfig(DatasetLineageProviderConfigBase): default="public", description="Default schema name to use when schema is not provided in an SQL query", ) + exclude_other_user_collections: bool = Field( + default=False, + description="Flag that if true, exclude other user collections", + ) @validator("connect_uri", "display_uri") def remove_trailing_slash(cls, v): @@ -209,6 +214,7 @@ def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]: try: collections_response = self.session.get( f"{self.config.connect_uri}/api/collection/" + f"?exclude-other-user-collections={json.dumps(self.config.exclude_other_user_collections)}" ) collections_response.raise_for_status() collections = collections_response.json() diff --git a/metadata-ingestion/tests/integration/metabase/test_metabase.py b/metadata-ingestion/tests/integration/metabase/test_metabase.py index e16394440e1e0..872e57c59b859 100644 --- a/metadata-ingestion/tests/integration/metabase/test_metabase.py +++ b/metadata-ingestion/tests/integration/metabase/test_metabase.py @@ -14,7 +14,7 @@ JSON_RESPONSE_MAP = { "http://localhost:3000/api/session": "session.json", "http://localhost:3000/api/user/current": "user.json", - "http://localhost:3000/api/collection/": "collections.json", + "http://localhost:3000/api/collection/?exclude-other-user-collections=false": "collections.json", "http://localhost:3000/api/collection/root/items?models=dashboard": "collection_dashboards.json", "http://localhost:3000/api/collection/150/items?models=dashboard": "collection_dashboards.json", "http://localhost:3000/api/dashboard/10": "dashboard_1.json",