Skip to content

Commit

Permalink
fix: Issues with inconsistency in case conversion (#388)
Browse files Browse the repository at this point in the history
* Updated README.md to include DremioExtractor

Signed-off-by: Josh Howard <josh.t.howard@ey.com>

* Removed case conversion from databuilder models

Signed-off-by: Josh Howard <josh.t.howard@ey.com>

* Fixed test deprecation warnings due to assertEquals

Signed-off-by: Josh Howard <josh.t.howard@ey.com>

Co-authored-by: Josh Howard <josh.t.howard@ey.com>
  • Loading branch information
joshthoward and Josh Howard authored Oct 19, 2020
1 parent 46f50f8 commit 9595866
Show file tree
Hide file tree
Showing 22 changed files with 195 additions and 196 deletions.
12 changes: 6 additions & 6 deletions databuilder/models/table_column_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ def __init__(self,
user_email: str,
read_count: int = 1
) -> None:
self.database = database.lower()
self.cluster = cluster.lower()
self.schema = schema.lower()
self.table = table.lower()
self.column = column.lower()
self.user_email = user_email.lower()
self.database = database
self.cluster = cluster
self.schema = schema
self.table = table
self.column = column
self.user_email = user_email
self.read_count = read_count

def __repr__(self) -> str:
Expand Down
9 changes: 4 additions & 5 deletions databuilder/models/table_lineage.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@ def __init__(self,
cluster: str,
downstream_deps: List=None,
) -> None:
self.db = db_name.lower()
self.schema = schema.lower()
self.table = table_name.lower()

self.cluster = cluster.lower() if cluster else 'gold'
self.db = db_name
self.schema = schema
self.table = table_name
self.cluster = cluster if cluster else 'gold'
# a list of downstream dependencies, each of which will follow
# the same key
self.downstream_deps = downstream_deps or []
Expand Down
10 changes: 5 additions & 5 deletions databuilder/models/table_owner.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ def __init__(self,
owners: Union[List, str],
cluster: str = 'gold',
) -> None:
self.db = db_name.lower()
self.schema = schema.lower()
self.table = table_name.lower()
self.db = db_name
self.schema = schema
self.table = table_name
if isinstance(owners, str):
owners = owners.split(',')
self.owners = [owner.lower().strip() for owner in owners]
self.owners = [owner.strip() for owner in owners]

self.cluster = cluster.lower()
self.cluster = cluster
self._node_iter = iter(self.create_nodes())
self._relation_iter = iter(self.create_relation())

Expand Down
8 changes: 4 additions & 4 deletions databuilder/models/table_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ def __init__(self,
source: str,
source_type: str='github',
) -> None:
self.db = db_name.lower()
self.schema = schema.lower()
self.table = table_name.lower()
self.db = db_name
self.schema = schema
self.table = table_name

self.cluster = cluster.lower() if cluster else 'gold'
self.cluster = cluster if cluster else 'gold'
# source is the source file location
self.source = source
self.source_type = source_type
Expand Down
6 changes: 3 additions & 3 deletions databuilder/models/table_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ def __init__(self,
if schema is None:
self.schema, self.table = table_name.split('.')
else:
self.table = table_name.lower()
self.schema = schema.lower()
self.table = table_name
self.schema = schema
self.db = db
self.col_name = col_name.lower()
self.col_name = col_name
self.start_epoch = start_epoch
self.end_epoch = end_epoch
self.cluster = cluster
Expand Down
16 changes: 8 additions & 8 deletions databuilder/models/watermark.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,24 @@ def __init__(self,
schema: str,
table_name: str,
part_name: str,
part_type: str='high_watermark',
cluster: str='gold',
part_type: str = 'high_watermark',
cluster: str = 'gold',
) -> None:
self.create_time = create_time
self.database = database.lower()
self.schema = schema.lower()
self.table = table_name.lower()
self.database = database
self.schema = schema
self.table = table_name
self.parts: List[Tuple[str, str]] = []

if '=' not in part_name:
raise Exception('Only partition table has high watermark')

# currently we don't consider nested partitions
idx = part_name.find('=')
name, value = part_name.lower()[:idx], part_name.lower()[idx + 1:]
name, value = part_name[:idx], part_name[idx + 1:]
self.parts = [(name, value)]
self.part_type = part_type.lower()
self.cluster = cluster.lower()
self.part_type = part_type
self.cluster = cluster
self._node_iter = iter(self.create_nodes())
self._relation_iter = iter(self.create_relation())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ def test_dashboard_chart_extractor_actual_record(self) -> None:
}

record = extractor.extract()
self.assertEquals(record._dashboard_group_id, 'ggg')
self.assertEquals(record._dashboard_id, 'ddd')
self.assertEquals(record._chart_name, 'some chart')
self.assertEquals(record._product, 'mode')
self.assertEqual(record._dashboard_group_id, 'ggg')
self.assertEqual(record._dashboard_id, 'ddd')
self.assertEqual(record._chart_name, 'some chart')
self.assertEqual(record._product, 'mode')


if __name__ == '__main__':
Expand Down
64 changes: 32 additions & 32 deletions tests/unit/extractor/test_bigquery_metadata_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,13 +194,13 @@ def test_table_without_schema(self, mock_build: Any) -> None:
scope=extractor.get_scope()))
result = extractor.extract()

self.assertEquals(result.database, 'bigquery')
self.assertEquals(result.cluster, 'your-project-here')
self.assertEquals(result.schema, 'fdgdfgh')
self.assertEquals(result.name, 'nested_recs')
self.assertEquals(result.description._text, '')
self.assertEquals(result.columns, [])
self.assertEquals(result.is_view, False)
self.assertEqual(result.database, 'bigquery')
self.assertEqual(result.cluster, 'your-project-here')
self.assertEqual(result.schema, 'fdgdfgh')
self.assertEqual(result.name, 'nested_recs')
self.assertEqual(result.description._text, '')
self.assertEqual(result.columns, [])
self.assertEqual(result.is_view, False)

@patch('databuilder.extractor.base_bigquery_extractor.build')
def test_table_without_columns(self, mock_build: Any) -> None:
Expand All @@ -210,13 +210,13 @@ def test_table_without_columns(self, mock_build: Any) -> None:
scope=extractor.get_scope()))
result = extractor.extract()

self.assertEquals(result.database, 'bigquery')
self.assertEquals(result.cluster, 'your-project-here')
self.assertEquals(result.schema, 'fdgdfgh')
self.assertEquals(result.name, 'nested_recs')
self.assertEquals(result.description._text, "")
self.assertEquals(result.columns, [])
self.assertEquals(result.is_view, False)
self.assertEqual(result.database, 'bigquery')
self.assertEqual(result.cluster, 'your-project-here')
self.assertEqual(result.schema, 'fdgdfgh')
self.assertEqual(result.name, 'nested_recs')
self.assertEqual(result.description._text, "")
self.assertEqual(result.columns, [])
self.assertEqual(result.is_view, False)

@patch('databuilder.extractor.base_bigquery_extractor.build')
def test_view(self, mock_build: Any) -> None:
Expand All @@ -226,7 +226,7 @@ def test_view(self, mock_build: Any) -> None:
scope=extractor.get_scope()))
result = extractor.extract()
self.assertIsInstance(result, TableMetadata)
self.assertEquals(result.is_view, True)
self.assertEqual(result.is_view, True)

@patch('databuilder.extractor.base_bigquery_extractor.build')
def test_normal_table(self, mock_build: Any) -> None:
Expand All @@ -236,17 +236,17 @@ def test_normal_table(self, mock_build: Any) -> None:
scope=extractor.get_scope()))
result = extractor.extract()

self.assertEquals(result.database, 'bigquery')
self.assertEquals(result.cluster, 'your-project-here')
self.assertEquals(result.schema, 'fdgdfgh')
self.assertEquals(result.name, 'nested_recs')
self.assertEquals(result.description._text, "")
self.assertEqual(result.database, 'bigquery')
self.assertEqual(result.cluster, 'your-project-here')
self.assertEqual(result.schema, 'fdgdfgh')
self.assertEqual(result.name, 'nested_recs')
self.assertEqual(result.description._text, "")

first_col = result.columns[0]
self.assertEquals(first_col.name, 'test')
self.assertEquals(first_col.type, 'STRING')
self.assertEquals(first_col.description._text, 'some_description')
self.assertEquals(result.is_view, False)
self.assertEqual(first_col.name, 'test')
self.assertEqual(first_col.type, 'STRING')
self.assertEqual(first_col.description._text, 'some_description')
self.assertEqual(result.is_view, False)

@patch('databuilder.extractor.base_bigquery_extractor.build')
def test_table_with_nested_records(self, mock_build: Any) -> None:
Expand All @@ -257,14 +257,14 @@ def test_table_with_nested_records(self, mock_build: Any) -> None:
result = extractor.extract()

first_col = result.columns[0]
self.assertEquals(first_col.name, 'nested')
self.assertEquals(first_col.type, 'RECORD')
self.assertEqual(first_col.name, 'nested')
self.assertEqual(first_col.type, 'RECORD')
second_col = result.columns[1]
self.assertEquals(second_col.name, 'nested.nested2')
self.assertEquals(second_col.type, 'RECORD')
self.assertEqual(second_col.name, 'nested.nested2')
self.assertEqual(second_col.type, 'RECORD')
third_col = result.columns[2]
self.assertEquals(third_col.name, 'nested.nested2.ahah')
self.assertEquals(third_col.type, 'STRING')
self.assertEqual(third_col.name, 'nested.nested2.ahah')
self.assertEqual(third_col.type, 'STRING')

@patch('databuilder.extractor.base_bigquery_extractor.build')
def test_keypath_and_pagesize_can_be_set(self, mock_build: Any) -> None:
Expand Down Expand Up @@ -299,5 +299,5 @@ def test_table_part_of_table_date_range(self, mock_build: Any) -> None:
count += 1
result = extractor.extract()

self.assertEquals(count, 1)
self.assertEquals(table_name, 'date_range_')
self.assertEqual(count, 1)
self.assertEqual(table_name, 'date_range_')
84 changes: 42 additions & 42 deletions tests/unit/extractor/test_bigquery_watermark_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,22 +131,22 @@ def test_table_with_default_partitions(self, mock_build: Any) -> None:
extractor.init(Scoped.get_scoped_conf(conf=self.conf,
scope=extractor.get_scope()))
result = extractor.extract()
self.assertEquals(result.part_type, 'low_watermark')
self.assertEquals(result.database, 'bigquery')
self.assertEquals(result.schema, 'fdgdfgh')
self.assertEquals(result.table, 'other')
self.assertEquals(result.cluster, 'your-project-here')
self.assertEquals(result.create_time, datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S'))
self.assertEquals(result.parts, [('_partitiontime', '20180802')])
self.assertEqual(result.part_type, 'low_watermark')
self.assertEqual(result.database, 'bigquery')
self.assertEqual(result.schema, 'fdgdfgh')
self.assertEqual(result.table, 'other')
self.assertEqual(result.cluster, 'your-project-here')
self.assertEqual(result.create_time, datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S'))
self.assertEqual(result.parts, [('_PARTITIONTIME', '20180802')])

result = extractor.extract()
self.assertEquals(result.part_type, 'high_watermark')
self.assertEquals(result.database, 'bigquery')
self.assertEquals(result.schema, 'fdgdfgh')
self.assertEquals(result.table, 'other')
self.assertEquals(result.cluster, 'your-project-here')
self.assertEquals(result.create_time, datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S'))
self.assertEquals(result.parts, [('_partitiontime', '20180804')])
self.assertEqual(result.part_type, 'high_watermark')
self.assertEqual(result.database, 'bigquery')
self.assertEqual(result.schema, 'fdgdfgh')
self.assertEqual(result.table, 'other')
self.assertEqual(result.cluster, 'your-project-here')
self.assertEqual(result.create_time, datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S'))
self.assertEqual(result.parts, [('_PARTITIONTIME', '20180804')])

@patch('databuilder.extractor.base_bigquery_extractor.build')
def test_table_with_field_partitions(self, mock_build: Any) -> None:
Expand All @@ -156,23 +156,23 @@ def test_table_with_field_partitions(self, mock_build: Any) -> None:
scope=extractor.get_scope()))
result = extractor.extract()
assert result is not None
self.assertEquals(result.part_type, 'low_watermark')
self.assertEquals(result.database, 'bigquery')
self.assertEquals(result.schema, 'fdgdfgh')
self.assertEquals(result.table, 'other')
self.assertEquals(result.cluster, 'your-project-here')
self.assertEquals(result.create_time, datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S'))
self.assertEquals(result.parts, [('processed_date', '20180802')])
self.assertEqual(result.part_type, 'low_watermark')
self.assertEqual(result.database, 'bigquery')
self.assertEqual(result.schema, 'fdgdfgh')
self.assertEqual(result.table, 'other')
self.assertEqual(result.cluster, 'your-project-here')
self.assertEqual(result.create_time, datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S'))
self.assertEqual(result.parts, [('processed_date', '20180802')])

result = extractor.extract()
assert result is not None
self.assertEquals(result.part_type, 'high_watermark')
self.assertEquals(result.database, 'bigquery')
self.assertEquals(result.schema, 'fdgdfgh')
self.assertEquals(result.table, 'other')
self.assertEquals(result.cluster, 'your-project-here')
self.assertEquals(result.create_time, datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S'))
self.assertEquals(result.parts, [('processed_date', '20180804')])
self.assertEqual(result.part_type, 'high_watermark')
self.assertEqual(result.database, 'bigquery')
self.assertEqual(result.schema, 'fdgdfgh')
self.assertEqual(result.table, 'other')
self.assertEqual(result.cluster, 'your-project-here')
self.assertEqual(result.create_time, datetime.fromtimestamp(1547512241).strftime('%Y-%m-%d %H:%M:%S'))
self.assertEqual(result.parts, [('processed_date', '20180804')])

@patch('databuilder.extractor.base_bigquery_extractor.build')
def test_keypath_can_be_set(self, mock_build: Any) -> None:
Expand Down Expand Up @@ -200,20 +200,20 @@ def test_table_part_of_table_date_range(self, mock_build: Any) -> None:

result = extractor.extract()
assert result is not None
self.assertEquals(result.part_type, 'low_watermark')
self.assertEquals(result.database, 'bigquery')
self.assertEquals(result.schema, 'fdgdfgh')
self.assertEquals(result.table, 'date_range_')
self.assertEquals(result.cluster, 'your-project-here')
self.assertEquals(result.create_time, datetime.fromtimestamp(1557577779).strftime('%Y-%m-%d %H:%M:%S'))
self.assertEquals(result.parts, [('__table__', '20190101')])
self.assertEqual(result.part_type, 'low_watermark')
self.assertEqual(result.database, 'bigquery')
self.assertEqual(result.schema, 'fdgdfgh')
self.assertEqual(result.table, 'date_range_')
self.assertEqual(result.cluster, 'your-project-here')
self.assertEqual(result.create_time, datetime.fromtimestamp(1557577779).strftime('%Y-%m-%d %H:%M:%S'))
self.assertEqual(result.parts, [('__table__', '20190101')])

result = extractor.extract()
assert result is not None
self.assertEquals(result.part_type, 'high_watermark')
self.assertEquals(result.database, 'bigquery')
self.assertEquals(result.schema, 'fdgdfgh')
self.assertEquals(result.table, 'date_range_')
self.assertEquals(result.cluster, 'your-project-here')
self.assertEquals(result.create_time, datetime.fromtimestamp(1557577779).strftime('%Y-%m-%d %H:%M:%S'))
self.assertEquals(result.parts, [('__table__', '20190102')])
self.assertEqual(result.part_type, 'high_watermark')
self.assertEqual(result.database, 'bigquery')
self.assertEqual(result.schema, 'fdgdfgh')
self.assertEqual(result.table, 'date_range_')
self.assertEqual(result.cluster, 'your-project-here')
self.assertEqual(result.create_time, datetime.fromtimestamp(1557577779).strftime('%Y-%m-%d %H:%M:%S'))
self.assertEqual(result.parts, [('__table__', '20190102')])
10 changes: 5 additions & 5 deletions tests/unit/extractor/test_csv_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def test_extraction_with_model_class(self) -> None:
scope=extractor.get_scope()))

result = extractor.extract()
self.assertEquals(result.name, 'test_table1')
self.assertEquals(result.description._text, '1st test table')
self.assertEquals(result.database, 'hive')
self.assertEquals(result.cluster, 'gold')
self.assertEquals(result.schema, 'test_schema')
self.assertEqual(result.name, 'test_table1')
self.assertEqual(result.description._text, '1st test table')
self.assertEqual(result.database, 'hive')
self.assertEqual(result.cluster, 'gold')
self.assertEqual(result.schema, 'test_schema')
6 changes: 3 additions & 3 deletions tests/unit/extractor/test_generic_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_extraction_with_model_class(self) -> None:
scope=extractor.get_scope()))

result = extractor.extract()
self.assertEquals(result.timestamp, 10000000)
self.assertEqual(result.timestamp, 10000000)

def test_extraction_without_model_class(self) -> None:
"""
Expand All @@ -44,5 +44,5 @@ def test_extraction_without_model_class(self) -> None:
extractor.init(Scoped.get_scoped_conf(conf=conf,
scope=extractor.get_scope()))

self.assertEquals(extractor.extract(), {'foo': 1})
self.assertEquals(extractor.extract(), {'bar': 2})
self.assertEqual(extractor.extract(), {'foo': 1})
self.assertEqual(extractor.extract(), {'bar': 2})
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ def test_extraction_with_model_class(self, mock_time: Any) -> None:
scope=extractor.get_scope()))

result = extractor.extract()
self.assertEquals(result.timestamp, 10000000)
self.assertEqual(result.timestamp, 10000000)
Loading

0 comments on commit 9595866

Please sign in to comment.