|
1 | 1 | import json
|
2 | 2 | import logging
|
3 | 3 | import os
|
| 4 | +import boto3 |
4 | 5 |
|
5 | 6 | from redash.query_runner import *
|
6 | 7 | from redash.settings import parse_boolean
|
@@ -74,6 +75,10 @@ def configuration_schema(cls):
|
74 | 75 | 'title': 'Schema Name',
|
75 | 76 | 'default': 'default'
|
76 | 77 | },
|
| 78 | + 'glue': { |
| 79 | + 'type': 'boolean', |
| 80 | + 'title': 'Use Glue Data Catalog', |
| 81 | + }, |
77 | 82 | },
|
78 | 83 | 'required': ['region', 's3_staging_dir'],
|
79 | 84 | 'order': ['region', 'aws_access_key', 'aws_secret_key', 's3_staging_dir', 'schema'],
|
@@ -112,7 +117,30 @@ def type(cls):
|
112 | 117 | def __init__(self, configuration):
|
113 | 118 | super(Athena, self).__init__(configuration)
|
114 | 119 |
|
| 120 | + def __get_schema_from_glue(self): |
| 121 | + client = boto3.client( |
| 122 | + 'glue', |
| 123 | + aws_access_key_id=self.configuration.get('aws_access_key', None), |
| 124 | + aws_secret_access_key=self.configuration.get('aws_secret_key', None), |
| 125 | + region_name=self.configuration['region'] |
| 126 | + ) |
| 127 | + schema = {} |
| 128 | + |
| 129 | + for database in client.get_databases()['DatabaseList']: |
| 130 | + for table in client.get_tables(DatabaseName=database['Name'])['TableList']: |
| 131 | + table_name = '%s.%s' % (database['Name'], table['Name']) |
| 132 | + if table_name not in schema: |
| 133 | + column = [columns['Name'] for columns in table['StorageDescriptor']['Columns']] |
| 134 | + schema[table_name] = {'name': table_name, 'columns': column} |
| 135 | + for partition in table['PartitionKeys']: |
| 136 | + schema[table_name]['columns'].append(partition['Name']) |
| 137 | + |
| 138 | + return schema.values() |
| 139 | + |
115 | 140 | def get_schema(self, get_stats=False):
|
| 141 | + if self.configuration.get('glue', False): |
| 142 | + return self.__get_schema_from_glue() |
| 143 | + |
116 | 144 | schema = {}
|
117 | 145 | query = """
|
118 | 146 | SELECT table_schema, table_name, column_name
|
|
0 commit comments