From d89be3ac4a77fb29c08afa0a700012781a083b7b Mon Sep 17 00:00:00 2001 From: Alison Date: Sun, 28 Oct 2018 14:19:26 -0500 Subject: [PATCH 1/2] athena&presto support column type and partition key --- client/app/components/QueryEditor.jsx | 14 +++++++-- .../components/queries/schema-browser.html | 4 +-- redash/query_runner/athena.py | 30 +++++++++++------- redash/query_runner/presto.py | 31 +++++++++++++------ tests/query_runner/test_athena.py | 8 ++--- 5 files changed, 57 insertions(+), 30 deletions(-) diff --git a/client/app/components/QueryEditor.jsx b/client/app/components/QueryEditor.jsx index 008ede9d49..fcac701a9f 100644 --- a/client/app/components/QueryEditor.jsx +++ b/client/app/components/QueryEditor.jsx @@ -38,8 +38,18 @@ function buildKeywordsFromSchema(schema) { schema.forEach((table) => { keywords[table.name] = 'Table'; table.columns.forEach((c) => { - keywords[c] = 'Column'; - keywords[`${table.name}.${c}`] = 'Column'; + if (typeof c === 'string') { + keywords[c] = 'Column'; + keywords[`${table.name}.${c}`] = 'Column'; + } else if (typeof c === 'object') { + c.forEach((a, b) => { + if (a === 'column_name') { + const columnName = b; + keywords[columnName] = 'Column'; + keywords[`${table.name}.${columnName}`] = 'Column'; + } + }); + } }); }); diff --git a/client/app/components/queries/schema-browser.html b/client/app/components/queries/schema-browser.html index 6e3f518059..457037ce64 100644 --- a/client/app/components/queries/schema-browser.html +++ b/client/app/components/queries/schema-browser.html @@ -20,9 +20,9 @@ ng-click="$ctrl.itemSelected($event, [table.name])">
-
{{column}} +
{{column.column_name}} ({{column.column_type}}) + ng-click="$ctrl.itemSelected($event, [column.column_name])">
diff --git a/redash/query_runner/athena.py b/redash/query_runner/athena.py index e7f1bb4ad5..2e484dfb9a 100644 --- a/redash/query_runner/athena.py +++ b/redash/query_runner/athena.py @@ -4,6 +4,7 @@ from redash.query_runner import * from redash.settings import parse_boolean from redash.utils import json_dumps, json_loads +from .presto import format_schema logger = logging.getLogger(__name__) ANNOTATE_QUERY = parse_boolean(os.environ.get('ATHENA_ANNOTATE_QUERY', 'true')) @@ -127,11 +128,22 @@ def __get_schema_from_glue(self): iterator = paginator.paginate(DatabaseName=database['Name']) for table in iterator.search('TableList[]'): table_name = '%s.%s' % (database['Name'], table['Name']) + columns = [] if table_name not in schema: - column = [columns['Name'] for columns in table['StorageDescriptor']['Columns']] - schema[table_name] = {'name': table_name, 'columns': column} - for partition in table.get('PartitionKeys', []): - schema[table_name]['columns'].append(partition['Name']) + schema[table_name] = {'name': table_name, 'columns': []} + + for partition in table.get('PartitionKeys', []): + columns.append({ + 'column_name':partition['Name'], + 'extra_info': 'partition key' + }) + for column in table['StorageDescriptor']['Columns']: + columns.append({ + 'column_name': column['Name'], + 'column_type': column['Type'], + }) + schema[table_name]['columns'].append(columns) + print schema.values() return schema.values() @@ -139,9 +151,8 @@ def get_schema(self, get_stats=False): if self.configuration.get('glue', False): return self.__get_schema_from_glue() - schema = {} query = """ - SELECT table_schema, table_name, column_name + SELECT table_schema, table_name, column_name, data_type as column_type, extra_info FROM information_schema.columns WHERE table_schema NOT IN ('information_schema') """ @@ -150,12 +161,7 @@ def get_schema(self, get_stats=False): if error is not None: raise Exception("Failed getting schema.") - results = json_loads(results) - for row in results['rows']: - table_name = '{0}.{1}'.format(row['table_schema'], row['table_name']) - if table_name not in schema: - schema[table_name] = {'name': table_name, 'columns': []} - schema[table_name]['columns'].append(row['column_name']) + schema = format_schema(json.loads(results)) return schema.values() diff --git a/redash/query_runner/presto.py b/redash/query_runner/presto.py index 975ea70c07..e5a6aa7ba9 100644 --- a/redash/query_runner/presto.py +++ b/redash/query_runner/presto.py @@ -28,6 +28,25 @@ "date": TYPE_DATE, } +def format_schema(results): + """ + This function formats the schema, table, and columns of Athena and Presto + for display in the UI schema browser. + """ + schema = {} + for row in results['rows']: + column_list = [] + table_name = '{}.{}'.format(row['table_schema'], row['table_name']) + if table_name not in schema: + schema[table_name] = {'name': table_name, columns: []} + column_list.append({ + 'column_name': row['column_name'], + 'extra_info': row['extra_info'], + 'column_type': row['column_type'] + }) + schema[table_name]['columns'] = columns_list[0] + return schema + class Presto(BaseQueryRunner): noop_query = 'SHOW TABLES' @@ -72,7 +91,7 @@ def type(cls): def get_schema(self, get_stats=False): schema = {} query = """ - SELECT table_schema, table_name, column_name + SELECT table_schema, table_name, column_name, data_type as column_type, extra_info FROM information_schema.columns WHERE table_schema NOT IN ('pg_catalog', 'information_schema') """ @@ -82,15 +101,7 @@ def get_schema(self, get_stats=False): if error is not None: raise Exception("Failed getting schema.") - results = json_loads(results) - - for row in results['rows']: - table_name = '{}.{}'.format(row['table_schema'], row['table_name']) - - if table_name not in schema: - schema[table_name] = {'name': table_name, 'columns': []} - - schema[table_name]['columns'].append(row['column_name']) + schema = format_schema(json.loads(results)) return schema.values() diff --git a/tests/query_runner/test_athena.py b/tests/query_runner/test_athena.py index fe444de64f..f490a76937 100644 --- a/tests/query_runner/test_athena.py +++ b/tests/query_runner/test_athena.py @@ -72,7 +72,7 @@ def test_external_table(self): {'DatabaseName': 'test1'}, ) with self.stubber: - assert query_runner.get_schema() == [{'columns': ['row_id'], 'name': 'test1.jdbc_table'}] + assert query_runner.get_schema() == [{'columns': [[{'column_name':'row_id','column_type':'int'}]], 'name': 'test1.jdbc_table'}] def test_partitioned_table(self): """ @@ -118,7 +118,7 @@ def test_partitioned_table(self): {'DatabaseName': 'test1'}, ) with self.stubber: - assert query_runner.get_schema() == [{'columns': ['sk', 'category'], 'name': 'test1.partitioned_table'}] + assert query_runner.get_schema() == [{'columns': [[{'extra_info':'partition key','column_name':'category'},{'column_type':'int','column_name':'sk'}]], 'name': 'test1.partitioned_table'}] def test_view(self): query_runner = Athena({'glue': True, 'region': 'mars-east-1'}) @@ -150,7 +150,7 @@ def test_view(self): {'DatabaseName': 'test1'}, ) with self.stubber: - assert query_runner.get_schema() == [{'columns': ['sk'], 'name': 'test1.view'}] + assert query_runner.get_schema() == [{'columns': [[{'column_name':'sk', 'column_type':'int'}]], 'name': 'test1.view'}] def test_dodgy_table_does_not_break_schema_listing(self): """ @@ -187,4 +187,4 @@ def test_dodgy_table_does_not_break_schema_listing(self): {'DatabaseName': 'test1'}, ) with self.stubber: - assert query_runner.get_schema() == [{'columns': ['region'], 'name': 'test1.csv'}] + assert query_runner.get_schema() == [{'columns': [[{'column_name':'region','column_type':'string'}]], 'name': 'test1.csv'}] From b0bf0051c80cb5358fdccf53952d0b2b9b439ca1 Mon Sep 17 00:00:00 2001 From: Alison Date: Sun, 28 Oct 2018 19:20:16 -0500 Subject: [PATCH 2/2] fix codeclimate issues for athena&presto partition keys --- redash/query_runner/athena.py | 4 ++-- redash/query_runner/presto.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/redash/query_runner/athena.py b/redash/query_runner/athena.py index 2e484dfb9a..a6f6dc8e4c 100644 --- a/redash/query_runner/athena.py +++ b/redash/query_runner/athena.py @@ -131,10 +131,10 @@ def __get_schema_from_glue(self): columns = [] if table_name not in schema: schema[table_name] = {'name': table_name, 'columns': []} - + for partition in table.get('PartitionKeys', []): columns.append({ - 'column_name':partition['Name'], + 'column_name': partition['Name'], 'extra_info': 'partition key' }) for column in table['StorageDescriptor']['Columns']: diff --git a/redash/query_runner/presto.py b/redash/query_runner/presto.py index e5a6aa7ba9..0d62e9da55 100644 --- a/redash/query_runner/presto.py +++ b/redash/query_runner/presto.py @@ -28,6 +28,7 @@ "date": TYPE_DATE, } + def format_schema(results): """ This function formats the schema, table, and columns of Athena and Presto