diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index afc5000220de76..a74d0adb8480e8 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -313,6 +313,7 @@ Extracts: - List of databases, schema, and tables - Column types associated with each table - Also supports PostGIS extensions +- database_identifer (optional) can be used to change the name of database to be ingested ```yml source: @@ -322,6 +323,7 @@ source: password: pass host_port: localhost:5432 database: DemoDatabase + database_identifer: DatabaseNameToBeIngested include_views: True # whether to include views, defaults to True # table_pattern/schema_pattern is same as above # options is same as above diff --git a/metadata-ingestion/src/datahub/ingestion/source/postgres.py b/metadata-ingestion/src/datahub/ingestion/source/postgres.py index c8654c23a078e6..b0e83cebdccad8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/postgres.py +++ b/metadata-ingestion/src/datahub/ingestion/source/postgres.py @@ -29,6 +29,8 @@ class PostgresConfig(BasicSQLAlchemyConfig): def get_identifier(self, schema: str, table: str) -> str: regular = f"{schema}.{table}" + if self.database_identifier: + return f"{self.database_identifier}.{regular}" if self.database: return f"{self.database}.{regular}" return regular diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql_common.py index 9f4af53cc1d17c..87fe92a9a08493 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql_common.py @@ -120,6 +120,7 @@ class BasicSQLAlchemyConfig(SQLAlchemyConfig): password: Optional[pydantic.SecretStr] = None host_port: str database: Optional[str] = None + database_identifier: Optional[str] = None scheme: str def get_sql_alchemy_url(self, uri_opts=None): diff --git a/metadata-ingestion/tests/unit/test_postgres_source.py b/metadata-ingestion/tests/unit/test_postgres_source.py new file mode 100644 index 00000000000000..1cc1ea581c92fe --- /dev/null +++ b/metadata-ingestion/tests/unit/test_postgres_source.py @@ -0,0 +1,21 @@ +from datahub.ingestion.source.postgres import PostgresConfig + + +def _base_config(): + return {"username": "user", "password": "password", "host_port": "host:1521"} + + +def test_database_identifier_takes_precendence(): + config = PostgresConfig.parse_obj( + { + **_base_config(), + "database_identifier": "ops_database", + "database": "postgres", + } + ) + assert config.get_identifier("superset", "logs") == "ops_database.superset.logs" + + +def test_database_in_identifier(): + config = PostgresConfig.parse_obj({**_base_config(), "database": "postgres"}) + assert config.get_identifier("superset", "logs") == "postgres.superset.logs"