Skip to content

Commit

Permalink
fix(profiles): prevent NoneType exception when profiling empty datase…
Browse files Browse the repository at this point in the history
…ts (datahub-project#3144)


Co-authored-by: Sergio Gómez <sergio.gomez.villamor@adevinta.com>
  • Loading branch information
2 people authored and gabe-lyons committed Aug 31, 2021
1 parent 214509b commit 8cfdd80
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,10 @@ def _handle_convert_column_evrs( # noqa: C901 (complexity)
column_profile.uniqueProportion = res["observed_value"]
elif exp == "expect_column_values_to_not_be_null":
column_profile.nullCount = res["unexpected_count"]
if "unexpected_percent" in res:
if (
"unexpected_percent" in res
and res["unexpected_percent"] is not None
):
column_profile.nullProportion = res["unexpected_percent"] / 100
elif exp == "expect_column_values_to_not_match_regex":
# ignore; generally used for whitespace checks using regex r"^\s+|\s+$"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -584,5 +584,80 @@
"contentType": "application/json"
},
"systemMetadata": null
},
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,test_cases.test_empty,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "test_cases.test_empty",
"platform": "urn:li:dataPlatform:mysql",
"version": 0,
"created": {
"time": 0,
"actor": "urn:li:corpuser:unknown",
"impersonator": null
},
"lastModified": {
"time": 0,
"actor": "urn:li:corpuser:unknown",
"impersonator": null
},
"deleted": null,
"dataset": null,
"cluster": null,
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.MySqlDDL": {
"tableSchema": ""
}
},
"fields": [
{
"fieldPath": "dummy",
"jsonPath": null,
"nullable": true,
"description": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "VARCHAR(length=50)",
"recursive": false,
"globalTags": null,
"glossaryTerms": null,
"isPartOfKey": false
}
],
"primaryKeys": null,
"foreignKeysSpecs": null
}
}
]
}
},
"proposedDelta": null,
"systemMetadata": {
"lastObserved": 1586847600000,
"runId": "mysql-test",
"properties": null
}
},
{
"auditHeader": null,
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,test_cases.test_empty,PROD)",
"entityKeyAspect": null,
"changeType": "UPSERT",
"aspectName": "datasetProfile",
"aspect": {
"value": "{\"timestampMillis\": 1586847600000, \"rowCount\": 0, \"columnCount\": 1, \"fieldProfiles\": [{\"fieldPath\": \"dummy\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}",
"contentType": "application/json"
},
"systemMetadata": null
}
]
]
2 changes: 2 additions & 0 deletions metadata-ingestion/tests/integration/mysql/mysql_to_file.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ source:
allow:
- "^metagalaxy"
- "^northwind"
- "^test_cases"
profile_pattern:
allow:
- "^northwind.*\$"
- "^test_cases.*\$"
profiling:
enabled: True

Expand Down
16 changes: 16 additions & 0 deletions metadata-ingestion/tests/integration/mysql/setup/setup.sql
Original file line number Diff line number Diff line change
Expand Up @@ -89,5 +89,21 @@ INSERT INTO `customers` (`id`, `company`, `last_name`, `first_name`, `email_addr
INSERT INTO `customers` (`id`, `company`, `last_name`, `first_name`, `email_address`) VALUES (5, 'Company E', 'Donnell', 'Martin', NULL);
# 5 records

-- -----------------------------------------------------
-- Schema for testing different scenarios
-- -----------------------------------------------------

DROP SCHEMA IF EXISTS `test_cases` ;
CREATE SCHEMA IF NOT EXISTS `test_cases` DEFAULT CHARACTER SET latin1 ;
USE `test_cases` ;

-- no data in `test_cases`.`test_empty`

CREATE TABLE IF NOT EXISTS `test_cases`.`test_empty` (
`dummy` VARCHAR(50) NULL DEFAULT NULL)
ENGINE = InnoDB
DEFAULT CHARACTER SET = utf8;


SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;

0 comments on commit 8cfdd80

Please sign in to comment.