diff --git a/metadata-ingestion/README.md b/metadata-ingestion/README.md index 286696cc27a25..0ba88136867c5 100644 --- a/metadata-ingestion/README.md +++ b/metadata-ingestion/README.md @@ -206,7 +206,7 @@ source: password: pass host_port: localhost:1433 database: DemoDatabase - include_views: True + include_views: True # whether to include views, defaults to True table_pattern: deny: - "^.*\\.sys_.*" # deny all tables that start with sys_ @@ -243,6 +243,7 @@ source: password: pass host_port: localhost:1433 database: DemoDatabase + include_views: True # whether to include views, defaults to True uri_args: # See https://docs.microsoft.com/en-us/sql/connect/odbc/dsn-connection-string-attribute?view=sql-server-ver15 driver: "ODBC Driver 17 for SQL Server" @@ -317,6 +318,7 @@ source: password: pass host_port: localhost:5432 database: DemoDatabase + include_views: True # whether to include views, defaults to True # table_pattern/schema_pattern is same as above # options is same as above ``` @@ -337,6 +339,7 @@ source: password: pass host_port: example.something.us-west-2.redshift.amazonaws.com:5439 database: DemoDatabase + include_views: True # whether to include views, defaults to True # table_pattern/schema_pattern is same as above # options is same as above ``` @@ -358,6 +361,7 @@ source: database: db_name warehouse: "COMPUTE_WH" # optional role: "sysadmin" # optional + include_views: True # whether to include views, defaults to True # table_pattern/schema_pattern is same as above # options is same as above ``` @@ -402,6 +406,7 @@ source: host_port: localhost:5432 database: dbname service_name: svc # omit database if using this option + include_views: True # whether to include views, defaults to True # table_pattern/schema_pattern is same as above # options is same as above ``` @@ -442,6 +447,7 @@ source: options: # options is same as above # See https://github.com/mxmzdlv/pybigquery#authentication for details. credentials_path: "/path/to/keyfile.json" # optional + include_views: True # whether to include views, defaults to True # table_pattern/schema_pattern is same as above ``` @@ -466,6 +472,7 @@ source: # See https://docs.aws.amazon.com/athena/latest/ug/querying.html # However, the athena driver will transparently fetch these results as you would expect from any other sql client. work_group: athena_workgroup # "primary" + include_views: True # whether to include views, defaults to True # table_pattern/schema_pattern is same as above ``` @@ -485,11 +492,11 @@ source: config: aws_region: # aws_region_name, i.e. "eu-west-1" env: # environment for the DatasetSnapshot URN, one of "DEV", "EI", "PROD" or "CORP". Defaults to "PROD". - + # Filtering patterns for databases and tables to scan database_pattern: # Optional, to filter databases scanned, same as schema_pattern above. table_pattern: # Optional, to filter tables scanned, same as table_pattern above. - + # Credentials. If not specified here, these are picked up according to boto3 rules. # (see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html) aws_access_key_id: # Optional. @@ -541,6 +548,7 @@ source: options: {} # same as above schema_pattern: {} # same as above table_pattern: {} # same as above + include_views: True # whether to include views, defaults to True ``` ### MongoDB `mongodb` diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql_common.py index 02b6e097fb977..565244ccfbe87 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql_common.py @@ -73,7 +73,7 @@ class SQLAlchemyConfig(ConfigModel): table_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() view_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() - include_views: Optional[bool] = False + include_views: Optional[bool] = True include_tables: Optional[bool] = True @abstractmethod diff --git a/metadata-ingestion/tests/integration/mysql/mysql_mce_golden.json b/metadata-ingestion/tests/integration/mysql/mysql_mce_golden.json index 0b2c5391d4fce..2029764760b12 100644 --- a/metadata-ingestion/tests/integration/mysql/mysql_mce_golden.json +++ b/metadata-ingestion/tests/integration/mysql/mysql_mce_golden.json @@ -299,5 +299,118 @@ } }, "proposedDelta": null +}, +{ + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,metagalaxy.metadata_index_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `metadata_index_view` AS select `metadata_index`.`id` AS `id`,`metadata_index`.`urn` AS `urn`,`metadata_index`.`path` AS `path`,`metadata_index`.`doubleVal` AS `doubleVal` from `metadata_index`", + "is_view": "True" + }, + "externalUrl": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "metagalaxy.metadata_index_view", + "platform": "urn:li:dataPlatform:mysql", + "version": 0, + "created": { + "time": 1615443388097, + "actor": "urn:li:corpuser:etl", + "impersonator": null + }, + "lastModified": { + "time": 1615443388097, + "actor": "urn:li:corpuser:etl", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "jsonPath": null, + "nullable": true, + "description": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "BIGINT()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null + }, + { + "fieldPath": "urn", + "jsonPath": null, + "nullable": true, + "description": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=200)", + "recursive": false, + "globalTags": null, + "glossaryTerms": null + }, + { + "fieldPath": "path", + "jsonPath": null, + "nullable": true, + "description": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=150)", + "recursive": false, + "globalTags": null, + "glossaryTerms": null + }, + { + "fieldPath": "doubleVal", + "jsonPath": null, + "nullable": true, + "description": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "DOUBLE(asdecimal=True)", + "recursive": false, + "globalTags": null, + "glossaryTerms": null + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null + } + } + ] + } + }, + "proposedDelta": null } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mysql/setup/setup.sql b/metadata-ingestion/tests/integration/mysql/setup/setup.sql index 5775556838b41..fdcccc6cc9c5b 100644 --- a/metadata-ingestion/tests/integration/mysql/setup/setup.sql +++ b/metadata-ingestion/tests/integration/mysql/setup/setup.sql @@ -43,3 +43,6 @@ CREATE TABLE metadata_index ( INDEX stringIndex (`urn`,`aspect`,`path`,`stringVal`), INDEX doubleIndex (`urn`,`aspect`,`path`,`doubleVal`) ) COMMENT="This is a table comment"; + +-- create view for testing +CREATE VIEW metadata_index_view AS SELECT id, urn, path, doubleVal FROM metadata_index; \ No newline at end of file