Skip to content

Commit

Permalink
Closes #934, #935: Remove type from schema browser and don't show emp…
Browse files Browse the repository at this point in the history
…ty example column in schema drawer (#936)

* Closes #934, #935: Remove type from schema browser and don't show empty example column

* Speed up schema fetch requests with fewer postgres queries.

* Add column metadata to Athena glue processing.

* Fix bug assuming 'metadata' exists for every table.
  • Loading branch information
Marina Samuel committed Apr 11, 2019
1 parent 15c9e7a commit 4e61701
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 24 deletions.
17 changes: 12 additions & 5 deletions client/app/components/queries/SchemaData.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,20 @@ class SchemaData extends React.PureComponent {
dataIndex: 'type',
width: 400,
key: 'type',
}, {
title: 'Example',
dataIndex: 'example',
width: 400,
key: 'example',
}];

const hasExample =
this.props.tableMetadata.some(columnMetadata => columnMetadata.example);

if (hasExample) {
columns.push({
title: 'Example',
dataIndex: 'example',
width: 400,
key: 'example',
});
}

return (
<Drawer
title={this.props.tableName}
Expand Down
1 change: 0 additions & 1 deletion client/app/components/queries/schema-browser.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
<div uib-collapse="table.collapsed">
<div ng-repeat="column in table.columns | filter:$ctrl.schemaFilterColumn track by column.key" class="table-open">
{{column.name}}
<span ng-if="column.type !== undefined">({{column.type}})</span>
<i class="fa fa-angle-double-right copy-to-editor" aria-hidden="true"
ng-click="$ctrl.itemSelected($event, [column.name])"></i>
</div>
Expand Down
37 changes: 25 additions & 12 deletions redash/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,24 +206,37 @@ def delete(self):

def get_schema(self):
schema = []
tables = TableMetadata.query.filter(TableMetadata.data_source_id == self.id).all()
for table in tables:
if not table.exists:
continue
columns_by_table_id = {}

table_info = {
'name': table.name,
'exists': table.exists,
'hasColumnMetadata': table.column_metadata,
'columns': []}
columns = ColumnMetadata.query.filter(ColumnMetadata.table_id == table.id)
table_info['columns'] = sorted([{
tables = TableMetadata.query.filter(
TableMetadata.data_source_id == self.id,
TableMetadata.exists.is_(True),
).all()
table_ids = [table.id for table in tables]

columns = ColumnMetadata.query.filter(
ColumnMetadata.exists.is_(True),
ColumnMetadata.table_id.in_(table_ids),
).all()

for column in columns:
columns_by_table_id.setdefault(column.table_id, []).append({
'key': column.id,
'name': column.name,
'type': column.type,
'exists': column.exists,
'example': column.example
} for column in columns if column.exists == True], key=itemgetter('name'))
})

for table in tables:
table_info = {
'name': table.name,
'exists': table.exists,
'hasColumnMetadata': table.column_metadata,
'columns': []}

table_info['columns'] = sorted(
columns_by_table_id.get(table.id, []), key=itemgetter('name'))
schema.append(table_info)

return sorted(schema, key=itemgetter('name'))
Expand Down
11 changes: 10 additions & 1 deletion redash/query_runner/athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,18 @@ def __get_schema_from_glue(self):
table_name = '%s.%s' % (database['Name'], table['Name'])
if table_name not in schema:
column = [columns['Name'] for columns in table['StorageDescriptor']['Columns']]
schema[table_name] = {'name': table_name, 'columns': column}
metadata = [{
"name": column_data['Name'],
"type": column_data['Type']
} for column_data in table['StorageDescriptor']['Columns']]
schema[table_name] = {'name': table_name, 'columns': column, 'metadata': metadata}
for partition in table.get('PartitionKeys', []):
schema[table_name]['columns'].append(partition['Name'])
schema[table_name]['metadata'].append({
"name": partition['Name'],
"type": partition['Type']
})

return schema.values()

def get_schema(self, get_stats=False):
Expand Down
2 changes: 1 addition & 1 deletion redash/tasks/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ def refresh_schema(data_source_id):
"column_metadata": "metadata" in table
}
new_column_names[table_name] = table['columns']
new_column_metadata[table_name] = table['metadata']
new_column_metadata[table_name] = table.get('metadata', None)

insert_or_update_table_metadata(ds, existing_tables_set, table_data)
models.db.session.flush()
Expand Down
24 changes: 20 additions & 4 deletions tests/query_runner/test_athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,11 @@ def test_external_table(self):
{'DatabaseName': 'test1'},
)
with self.stubber:
assert query_runner.get_schema() == [{'columns': ['row_id'], 'name': 'test1.jdbc_table'}]
assert query_runner.get_schema() == [{
'columns': ['row_id'],
'name': 'test1.jdbc_table',
'metadata': [{'type': 'int', 'name': 'row_id'}]
}]

def test_partitioned_table(self):
"""
Expand Down Expand Up @@ -118,7 +122,11 @@ def test_partitioned_table(self):
{'DatabaseName': 'test1'},
)
with self.stubber:
assert query_runner.get_schema() == [{'columns': ['sk', 'category'], 'name': 'test1.partitioned_table'}]
assert query_runner.get_schema() == [{
'columns': ['sk', 'category'],
'name': 'test1.partitioned_table',
'metadata': [{'type': 'int', 'name': 'sk'}, {'type': 'int', 'name': 'category'}]
}]

def test_view(self):
query_runner = Athena({'glue': True, 'region': 'mars-east-1'})
Expand Down Expand Up @@ -150,7 +158,11 @@ def test_view(self):
{'DatabaseName': 'test1'},
)
with self.stubber:
assert query_runner.get_schema() == [{'columns': ['sk'], 'name': 'test1.view'}]
assert query_runner.get_schema() == [{
'columns': ['sk'],
'name': 'test1.view',
'metadata': [{'type': 'int', 'name': 'sk'}]
}]

def test_dodgy_table_does_not_break_schema_listing(self):
"""
Expand Down Expand Up @@ -187,4 +199,8 @@ def test_dodgy_table_does_not_break_schema_listing(self):
{'DatabaseName': 'test1'},
)
with self.stubber:
assert query_runner.get_schema() == [{'columns': ['region'], 'name': 'test1.csv'}]
assert query_runner.get_schema() == [{
'columns': ['region'],
'name': 'test1.csv',
'metadata': [{'type': 'string', 'name': 'region'}]
}]

0 comments on commit 4e61701

Please sign in to comment.