From c5318471b910d9339b7482456bb6c9e586a57880 Mon Sep 17 00:00:00 2001 From: Shitij Goyal Date: Wed, 19 Feb 2020 17:30:29 +0530 Subject: [PATCH 01/11] Remove embed option. Remove public sharing of dashboards. Allow DML queries in Athena. Add updates on redash home page. --- client/app/pages/home/home.html | 28 ++++++++++++++++++++++++++++ client/app/pages/queries/query.html | 2 -- docker-compose.yml | 3 +++ package-lock.json | 2 +- redash/handlers/dashboards.py | 3 ++- redash/handlers/embed.py | 1 + redash/permissions.py | 4 ++++ redash/query_runner/__init__.py | 8 ++++++++ redash/query_runner/athena.py | 6 +++++- 9 files changed, 52 insertions(+), 5 deletions(-) diff --git a/client/app/pages/home/home.html b/client/app/pages/home/home.html index 29d8eace91..75340ac569 100644 --- a/client/app/pages/home/home.html +++ b/client/app/pages/home/home.html @@ -34,6 +34,34 @@
+
+
+

Dunzo Updates

+
  • Do not name your saved with theses patterns. Any such saved query older than 7 days + will be removed from the system. +
      +
    • + New Query +
    • +
    • + Test Query +
    • +
    • + test_query +
    • +
    • + Copy of {any text} +
    • +
    +
  • +
    +
  • + Do add multiple tags to your saved queries. A tag with your name is helpful in filtering queries by user. +
  • + +
    +
    +

    Favorite Dashboards

    diff --git a/client/app/pages/queries/query.html b/client/app/pages/queries/query.html index 97148729b9..5232d00e67 100644 --- a/client/app/pages/queries/query.html +++ b/client/app/pages/queries/query.html @@ -263,8 +263,6 @@

    query="query" query-result="queryResult" query-executing="queryExecuting" - show-embed-dialog="showEmbedDialog" - embed="embed" apiKey="apiKey" selected-tab="selectedTab" open-add-to-dashboard-form="openAddToDashboardForm"> diff --git a/docker-compose.yml b/docker-compose.yml index ce0fcd257e..d2fe7fd80e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -43,6 +43,9 @@ services: # The following turns the DB into less durable, but gains significant performance improvements for the tests run (x3 # improvement on my personal machine). We should consider moving this into a dedicated Docker Compose configuration for # tests. + environment: + POSTGRES_USER: 'postgres' + POSTGRES_HOST_AUTH_METHOD: trust ports: - "15432:5432" command: "postgres -c fsync=off -c full_page_writes=off -c synchronous_commit=OFF" diff --git a/package-lock.json b/package-lock.json index b224be6f7e..974b270862 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "redash-client", - "version": "8.0.0", + "version": "8.0.1", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/redash/handlers/dashboards.py b/redash/handlers/dashboards.py index 954e9da4f7..a5da024bbc 100644 --- a/redash/handlers/dashboards.py +++ b/redash/handlers/dashboards.py @@ -8,7 +8,7 @@ order_results as _order_results) from redash.permissions import (can_modify, require_admin_or_owner, require_object_modify_permission, - require_permission) + require_permission, is_public_access_allowed) from redash.security import csp_allows_embeding from redash.serializers import serialize_dashboard from sqlalchemy.orm.exc import StaleDataError @@ -265,6 +265,7 @@ def post(self, dashboard_id): """ dashboard = models.Dashboard.get_by_id_and_org(dashboard_id, self.current_org) require_admin_or_owner(dashboard.user_id) + is_public_access_allowed() api_key = models.ApiKey.create_for_object(dashboard, self.current_user) models.db.session.flush() models.db.session.commit() diff --git a/redash/handlers/embed.py b/redash/handlers/embed.py index 62805f3a5d..342f176603 100644 --- a/redash/handlers/embed.py +++ b/redash/handlers/embed.py @@ -8,6 +8,7 @@ from redash.handlers import routes from redash.handlers.base import (get_object_or_404, org_scoped_rule, record_event) +from redash.permissions import is_public_access_allowed from redash.handlers.static import render_index from redash.security import csp_allows_embeding diff --git a/redash/permissions.py b/redash/permissions.py index d928d918c9..185d90f6c6 100644 --- a/redash/permissions.py +++ b/redash/permissions.py @@ -101,6 +101,10 @@ def require_admin_or_owner(object_owner_id): abort(403, message="You don't have permission to edit this resource.") +def is_public_access_allowed(): + abort(403, message="Creating public dashboards is not allowed.") + + def can_modify(obj, user): return is_admin_or_owner(obj.user_id) or user.has_access(obj, ACCESS_TYPE_MODIFY) diff --git a/redash/query_runner/__init__.py b/redash/query_runner/__init__.py index 52175a93b7..cdd0943639 100644 --- a/redash/query_runner/__init__.py +++ b/redash/query_runner/__init__.py @@ -87,6 +87,14 @@ def annotate_query(self, query, metadata): annotated_query = u"/* {} */ {}".format(annotation, query) return annotated_query + def annotate_query_with_single_line_comment(self, query, metadata): + if not self.should_annotate_query: + return query + + annotation = u", ".join([u"{}: {}".format(k, v) for k, v in metadata.iteritems()]) + annotated_query = u"-- {} -- \n {}".format(annotation, query) + return annotated_query + def test_connection(self): if self.noop_query is None: raise NotImplementedError() diff --git a/redash/query_runner/athena.py b/redash/query_runner/athena.py index 7735c4182d..7cecbc676e 100644 --- a/redash/query_runner/athena.py +++ b/redash/query_runner/athena.py @@ -7,6 +7,7 @@ logger = logging.getLogger(__name__) ANNOTATE_QUERY = parse_boolean(os.environ.get('ATHENA_ANNOTATE_QUERY', 'true')) +ANNOTATE_QUERY_FOR_DML = parse_boolean(os.environ.get('ATHENA_ANNOTATE_QUERY_FOR_DML', 'true')) SHOW_EXTRA_SETTINGS = parse_boolean(os.environ.get('ATHENA_SHOW_EXTRA_SETTINGS', 'true')) ASSUME_ROLE = parse_boolean(os.environ.get('ATHENA_ASSUME_ROLE', 'false')) OPTIONAL_CREDENTIALS = parse_boolean(os.environ.get('ATHENA_OPTIONAL_CREDENTIALS', 'true')) @@ -134,7 +135,10 @@ def enabled(cls): def annotate_query(self, query, metadata): if ANNOTATE_QUERY: - return super(Athena, self).annotate_query(query, metadata) + if ANNOTATE_QUERY_FOR_DML: + return super(Athena, self).annotate_query_with_single_line_comment(query, metadata) + else: + return super(Athena, self).annotate_query(query, metadata) return query @classmethod From fcda834085ad68ae6eea27a2d21aa8745590cde3 Mon Sep 17 00:00:00 2001 From: Shitij Goyal Date: Wed, 26 Feb 2020 13:09:16 +0530 Subject: [PATCH 02/11] Change athena implementation to make it faster. Add new dunzo update to homepage. Change default table size rows to 50 --- client/app/config/dashboard-grid-options.js | 2 +- client/app/pages/home/home.html | 14 ++-- client/app/visualizations/table/index.js | 2 +- redash/query_runner/athena.py | 77 +++++++++++++++++++-- redash/settings/__init__.py | 4 +- redash/tasks/queries.py | 2 +- 6 files changed, 88 insertions(+), 13 deletions(-) diff --git a/client/app/config/dashboard-grid-options.js b/client/app/config/dashboard-grid-options.js index a07c5691bb..ba2a308cc3 100644 --- a/client/app/config/dashboard-grid-options.js +++ b/client/app/config/dashboard-grid-options.js @@ -1,6 +1,6 @@ export default { columns: 6, // grid columns count - rowHeight: 50, // grid row height (incl. bottom padding) + rowHeight: 40, // grid row height (incl. bottom padding) margins: 15, // widget margins mobileBreakPoint: 800, // defaults for widgets diff --git a/client/app/pages/home/home.html b/client/app/pages/home/home.html index 75340ac569..6d00d8a776 100644 --- a/client/app/pages/home/home.html +++ b/client/app/pages/home/home.html @@ -35,9 +35,16 @@
    -
    -

    Dunzo Updates

    -
  • Do not name your saved with theses patterns. Any such saved query older than 7 days +
    +

    Dunzo Updates

    +
      +
    1. + Redash is primarily meant as a reporting and exploration tool. It is NOT meant as a way to query + raw data and download it as CSV for further processing. Please aim to do all aggregations and slicing + and dicing in the query itself. Redash queries will FAIL if you try to download too many rows ~10 lac. + Use a LIMIT filter in your queries if you want to explore. +
    2. +
    3. Do not name your saved queries with theses patterns. Any such saved query older than 7 days will be removed from the system.
      • @@ -54,7 +61,6 @@
    4. -
    5. Do add multiple tags to your saved queries. A tag with your name is helpful in filtering queries by user.
    6. diff --git a/client/app/visualizations/table/index.js b/client/app/visualizations/table/index.js index 6a8b5dfc99..e5ff07b0f2 100644 --- a/client/app/visualizations/table/index.js +++ b/client/app/visualizations/table/index.js @@ -12,7 +12,7 @@ import { ColumnTypes } from './utils'; const ALLOWED_ITEM_PER_PAGE = [5, 10, 15, 20, 25, 50, 100, 150, 200, 250]; const DEFAULT_OPTIONS = { - itemsPerPage: 25, + itemsPerPage: 50, }; function getColumnContentAlignment(type) { diff --git a/redash/query_runner/athena.py b/redash/query_runner/athena.py index 7cecbc676e..103f6628ab 100644 --- a/redash/query_runner/athena.py +++ b/redash/query_runner/athena.py @@ -1,5 +1,8 @@ import logging import os +import csv +import random +from pyathena.util import parse_output_location from redash.query_runner import * from redash.settings import parse_boolean @@ -15,11 +18,11 @@ try: import pyathena import boto3 + enabled = True except ImportError: enabled = False - _TYPE_MAPPINGS = { 'boolean': TYPE_BOOLEAN, 'tinyint': TYPE_INTEGER, @@ -125,7 +128,7 @@ def configuration_schema(cls): schema['order'].insert(2, 'aws_secret_key') if not OPTIONAL_CREDENTIALS and not ASSUME_ROLE: - schema['required'] += ['aws_access_key', 'aws_secret_key'] + schema['required'] += ['aws_access_key', 'aws_secret_key'] return schema @@ -153,7 +156,7 @@ def _get_iam_credentials(self, user=None): RoleArn=self.configuration.get('iam_role'), RoleSessionName=role_session_name, ExternalId=self.configuration.get('external_id') - ) + ) return { 'aws_access_key_id': creds['Credentials']['AccessKeyId'], 'aws_secret_access_key': creds['Credentials']['SecretAccessKey'], @@ -219,9 +222,13 @@ def run_query(self, query, user): work_group=self.configuration.get('work_group', 'primary'), formatter=SimpleFormatter(), **self._get_iam_credentials(user=user)).cursor() + cursor.execute(query) + + return self.get_query_result_from_file(cursor, user) + # return self.get_query_results_from_cursor(cursor) + def get_query_results_from_cursor(self, cursor): try: - cursor.execute(query) column_tuples = [(i[0], _TYPE_MAPPINGS.get(i[1], None)) for i in cursor.description] columns = self.fetch_columns(column_tuples) rows = [dict(zip(([c['name'] for c in columns]), r)) for i, r in enumerate(cursor.fetchall())] @@ -255,8 +262,70 @@ def run_query(self, query, user): cursor.cancel() error = ex.message json_data = None + return json_data, error + + def get_query_result_from_file(self, cursor, user): + try: + qbytes = None + athena_query_results_file = None + error = None + json_data = None + try: + athena_query_id = cursor.query_id + except AttributeError as e: + athena_query_id = "temp_"+str(random.getrandbits(128)) + logger.debug("Athena Upstream can't get query_id: %s", e) + try: + athena_output_location = cursor.output_location + except Exception as e: + error = e.message + logger.debug("Output location not found: %s", e) + return json_data, error + + bucket, key = parse_output_location(athena_output_location) + s3 = boto3.client('s3', **self._get_iam_credentials(user=user)) + athena_query_results_file = athena_query_id + with open(athena_query_results_file, 'wb') as w: + s3.download_fileobj(bucket, key, w) + with open(athena_query_results_file, 'r+') as f: + rows = list(csv.DictReader(f)) + column_tuples = [(i[0], _TYPE_MAPPINGS.get(i[1], None)) for i in cursor.description] + columns = self.fetch_columns(column_tuples) + try: + qbytes = cursor.data_scanned_in_bytes + except AttributeError as e: + logger.debug("Athena Upstream can't get data_scanned_in_bytes: %s", e) + data = { + 'columns': columns, + 'rows': rows, + 'metadata': { + 'data_scanned': qbytes, + 'athena_query_id': athena_query_id + } + } + json_data = json_dumps(data, ignore_nan=True) + except (KeyboardInterrupt, InterruptException) as e: + if cursor.query_id: + cursor.cancel() + error = "Query cancelled by user. %s", e + json_data = None + except Exception as ex: + if cursor.query_id: + cursor.cancel() + logger.debug(ex.message) + error = ex + json_data = None + finally: + self.remove_file(athena_query_results_file) + self.remove_file(athena_query_results_file) return json_data, error + def remove_file(self, athena_query_results_file): + try: + os.remove(athena_query_results_file) + except OSError: + logger.debug("No such file with %s exists", athena_query_results_file) + register(Athena) diff --git a/redash/settings/__init__.py b/redash/settings/__init__.py index 1a7154dfe6..1c1e289485 100644 --- a/redash/settings/__init__.py +++ b/redash/settings/__init__.py @@ -330,8 +330,8 @@ def email_server_is_configured(): ALLOW_SCRIPTS_IN_USER_INPUT = parse_boolean(os.environ.get("REDASH_ALLOW_SCRIPTS_IN_USER_INPUT", "false")) DASHBOARD_REFRESH_INTERVALS = map(int, array_from_string(os.environ.get("REDASH_DASHBOARD_REFRESH_INTERVALS", "60,300,600,1800,3600,43200,86400"))) QUERY_REFRESH_INTERVALS = map(int, array_from_string(os.environ.get("REDASH_QUERY_REFRESH_INTERVALS", "60, 300, 600, 900, 1800, 3600, 7200, 10800, 14400, 18000, 21600, 25200, 28800, 32400, 36000, 39600, 43200, 86400, 604800, 1209600, 2592000"))) -PAGE_SIZE = int(os.environ.get('REDASH_PAGE_SIZE', 20)) -PAGE_SIZE_OPTIONS = map(int, array_from_string(os.environ.get("REDASH_PAGE_SIZE_OPTIONS", "5,10,20,50,100"))) +PAGE_SIZE = int(os.environ.get('REDASH_PAGE_SIZE', 50)) +PAGE_SIZE_OPTIONS = map(int, array_from_string(os.environ.get("REDASH_PAGE_SIZE_OPTIONS", "5,10,15,20,50,100"))) TABLE_CELL_MAX_JSON_SIZE = int(os.environ.get('REDASH_TABLE_CELL_MAX_JSON_SIZE', 50000)) # Features: diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py index 0ee9cd0ab0..1972a67461 100644 --- a/redash/tasks/queries.py +++ b/redash/tasks/queries.py @@ -72,7 +72,6 @@ def to_dict(self): query_result_id = result else: query_result_id = None - return { 'id': self._async_result.id, 'updated_at': updated_at, @@ -366,6 +365,7 @@ def run(self): error = text_type(e) data = None + logging.debug(e) logging.warning('Unexpected error while running query:', exc_info=1) run_time = time.time() - started_at From 3f096c6ea553211dab41694906003c11267a5c6f Mon Sep 17 00:00:00 2001 From: Shitij Goyal Date: Wed, 4 Mar 2020 20:07:28 +0530 Subject: [PATCH 03/11] Place timeout in postgres query runner --- redash/query_runner/athena.py | 2 +- redash/query_runner/pg.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/redash/query_runner/athena.py b/redash/query_runner/athena.py index 103f6628ab..b1fd035a91 100644 --- a/redash/query_runner/athena.py +++ b/redash/query_runner/athena.py @@ -307,7 +307,7 @@ def get_query_result_from_file(self, cursor, user): except (KeyboardInterrupt, InterruptException) as e: if cursor.query_id: cursor.cancel() - error = "Query cancelled by user. %s", e + error = "Query cancelled by user." json_data = None except Exception as ex: if cursor.query_id: diff --git a/redash/query_runner/pg.py b/redash/query_runner/pg.py index e9e4cc5431..00125ead1d 100644 --- a/redash/query_runner/pg.py +++ b/redash/query_runner/pg.py @@ -10,6 +10,8 @@ logger = logging.getLogger(__name__) +QUERY_TIMEOUT = os.environ.get('POSTGRES_QUERY_TIMEOUT', '900') + types_map = { 20: TYPE_INTEGER, 21: TYPE_INTEGER, @@ -167,6 +169,7 @@ def _get_tables(self, schema): return schema.values() def _get_connection(self): + statement_timeout = '-c statement_timeout={postgres_query_timeout}'.format(postgres_query_timeout=QUERY_TIMEOUT) connection = psycopg2.connect( user=self.configuration.get('user'), password=self.configuration.get('password'), @@ -174,7 +177,8 @@ def _get_connection(self): port=self.configuration.get('port'), dbname=self.configuration.get('dbname'), sslmode=self.configuration.get('sslmode'), - async_=True) + async_=True, + options=statement_timeout) return connection From d03fa1a734951ae55f50d0a2cb87297d97b4e2fc Mon Sep 17 00:00:00 2001 From: Shitij Goyal Date: Wed, 4 Mar 2020 21:01:12 +0530 Subject: [PATCH 04/11] Change logic for filtering by schema name --- client/app/components/queries/schema-browser.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/client/app/components/queries/schema-browser.js b/client/app/components/queries/schema-browser.js index 34615aa590..c8d815a232 100644 --- a/client/app/components/queries/schema-browser.js +++ b/client/app/components/queries/schema-browser.js @@ -29,13 +29,14 @@ function SchemaBrowserCtrl($rootScope, $scope) { }; this.splitFilter = (filter) => { + this.schemaFilterObject = {}; filter = filter.replace(/ {2}/g, ' '); if (filter.includes(' ')) { const splitTheFilter = filter.split(' '); this.schemaFilterObject = { name: splitTheFilter[0], columns: splitTheFilter[1] }; this.schemaFilterColumn = splitTheFilter[1]; } else { - this.schemaFilterObject = filter; + this.schemaFilterObject['name' || '$'] = filter; this.schemaFilterColumn = ''; } }; From bbf8017676db34d57b5a961b319ed298118a701d Mon Sep 17 00:00:00 2001 From: Shitij Goyal Date: Wed, 4 Mar 2020 21:34:28 +0530 Subject: [PATCH 05/11] Change number formatter to not use commas --- client/app/visualizations/table/index.js | 4 ++-- docker-compose.yml | 2 ++ redash/settings/organization.py | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/client/app/visualizations/table/index.js b/client/app/visualizations/table/index.js index e5ff07b0f2..04874cd48d 100644 --- a/client/app/visualizations/table/index.js +++ b/client/app/visualizations/table/index.js @@ -49,8 +49,8 @@ function getDefaultFormatOptions(column) { datetime: clientConfig.dateTimeFormat || 'DD/MM/YYYY HH:mm', }; const numberFormat = { - integer: clientConfig.integerFormat || '0,0', - float: clientConfig.floatFormat || '0,0.00', + integer: clientConfig.integerFormat || '00', + float: clientConfig.floatFormat || '00.00', }; return { dateTimeFormat: dateTimeFormat[column.type], diff --git a/docker-compose.yml b/docker-compose.yml index d2fe7fd80e..48b8d09426 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,6 +19,7 @@ services: REDASH_REDIS_URL: "redis://redis:6379/0" REDASH_DATABASE_URL: "postgresql://postgres@postgres/postgres" REDASH_RATELIMIT_ENABLED: "false" + REDASH_INTEGER_FORMAT: "00" worker: build: . command: dev_worker @@ -35,6 +36,7 @@ services: REDASH_DATABASE_URL: "postgresql://postgres@postgres/postgres" QUEUES: "queries,scheduled_queries,celery,schemas" WORKERS_COUNT: 2 + REDASH_INTEGER_FORMAT: "00" redis: image: redis:3-alpine restart: unless-stopped diff --git a/redash/settings/organization.py b/redash/settings/organization.py index 853a6cd4ec..a65b2af94f 100644 --- a/redash/settings/organization.py +++ b/redash/settings/organization.py @@ -18,8 +18,8 @@ DATE_FORMAT = os.environ.get("REDASH_DATE_FORMAT", "DD/MM/YY") TIME_FORMAT = os.environ.get("REDASH_TIME_FORMAT", "HH:mm") -INTEGER_FORMAT = os.environ.get("REDASH_INTEGER_FORMAT", "0,0") -FLOAT_FORMAT = os.environ.get("REDASH_FLOAT_FORMAT", "0,0.00") +INTEGER_FORMAT = os.environ.get("REDASH_INTEGER_FORMAT", "00") +FLOAT_FORMAT = os.environ.get("REDASH_FLOAT_FORMAT", "00.00") MULTI_BYTE_SEARCH_ENABLED = parse_boolean(os.environ.get("MULTI_BYTE_SEARCH_ENABLED", "false")) JWT_LOGIN_ENABLED = parse_boolean(os.environ.get("REDASH_JWT_LOGIN_ENABLED", "false")) From 4e235c77e2879f1cc3f816a5fd36e3fa02d6e3da Mon Sep 17 00:00:00 2001 From: Shitij Goyal Date: Thu, 5 Mar 2020 12:16:56 +0530 Subject: [PATCH 06/11] Change default query timeout to 10 minutes --- redash/query_runner/pg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/redash/query_runner/pg.py b/redash/query_runner/pg.py index 00125ead1d..c509471355 100644 --- a/redash/query_runner/pg.py +++ b/redash/query_runner/pg.py @@ -10,7 +10,7 @@ logger = logging.getLogger(__name__) -QUERY_TIMEOUT = os.environ.get('POSTGRES_QUERY_TIMEOUT', '900') +QUERY_TIMEOUT = os.environ.get('POSTGRES_QUERY_TIMEOUT', '600000') types_map = { 20: TYPE_INTEGER, From 3ceae7be82991f9bea931fb276c41f2d5addf985 Mon Sep 17 00:00:00 2001 From: Shitij Goyal Date: Mon, 9 Mar 2020 13:52:22 +0530 Subject: [PATCH 07/11] Use context manager in xlsx serializer --- client/app/pages/home/home.html | 11 +- redash/serializers/query_result.py | 218 ++++++++++++++--------------- 2 files changed, 117 insertions(+), 112 deletions(-) diff --git a/client/app/pages/home/home.html b/client/app/pages/home/home.html index 6d00d8a776..23c2038ec6 100644 --- a/client/app/pages/home/home.html +++ b/client/app/pages/home/home.html @@ -44,7 +44,7 @@ and dicing in the query itself. Redash queries will FAIL if you try to download too many rows ~10 lac. Use a LIMIT filter in your queries if you want to explore. -
    7. Do not name your saved queries with theses patterns. Any such saved query older than 7 days +
    8. Do not name your saved queries with these patterns. Any such saved query older than 7 days will be removed from the system.
      • @@ -62,8 +62,15 @@
    9. - Do add multiple tags to your saved queries. A tag with your name is helpful in filtering queries by user. + Please add multiple tags to your saved queries. A tag with your name is helpful in filtering queries by user.
    10. +
    11. + There is query timeout of 10 minutes in all postgres datasources. Please write performant queries accordingly. +
    12. +
    13. + As with any tool, Redash also has some limitations but we have tried to make it as smooth as possible for users. + Please provide us with feedback on how we can improve it further. +
  • diff --git a/redash/serializers/query_result.py b/redash/serializers/query_result.py index ff737e3a28..4a2c20a93b 100644 --- a/redash/serializers/query_result.py +++ b/redash/serializers/query_result.py @@ -1,110 +1,108 @@ -import cStringIO -import csv -import xlsxwriter -from funcy import rpartial, project -from dateutil.parser import isoparse as parse_date -from redash.utils import json_loads, UnicodeWriter -from redash.query_runner import (TYPE_BOOLEAN, TYPE_DATE, TYPE_DATETIME) -from redash.authentication.org_resolving import current_org - - -def _convert_format(fmt): - return fmt.replace('DD', '%d').replace('MM', '%m').replace('YYYY', '%Y').replace('YY', '%y').replace('HH', '%H').replace('mm', '%M').replace('ss', '%s') - - -def _convert_bool(value): - if value is True: - return "true" - elif value is False: - return "false" - - return value - - -def _convert_datetime(value, fmt): - if not value: - return value - - try: - parsed = parse_date(value) - ret = parsed.strftime(fmt) - except Exception: - return value - - return ret - - -def _get_column_lists(columns): - date_format = _convert_format(current_org.get_setting('date_format')) - datetime_format = _convert_format('{} {}'.format(current_org.get_setting('date_format'), current_org.get_setting('time_format'))) - - special_types = { - TYPE_BOOLEAN: _convert_bool, - TYPE_DATE: rpartial(_convert_datetime, date_format), - TYPE_DATETIME: rpartial(_convert_datetime, datetime_format) - } - - fieldnames = [] - special_columns = dict() - - for col in columns: - fieldnames.append(col['name']) - - for col_type in special_types.keys(): - if col['type'] == col_type: - special_columns[col['name']] = special_types[col_type] - - return fieldnames, special_columns - - -def serialize_query_result(query_result, is_api_user): - if is_api_user: - publicly_needed_keys = ['data', 'retrieved_at'] - return project(query_result.to_dict(), publicly_needed_keys) - else: - return query_result.to_dict() - - -def serialize_query_result_to_csv(query_result): - s = cStringIO.StringIO() - - query_data = json_loads(query_result.data) - - fieldnames, special_columns = _get_column_lists(query_data['columns'] or []) - - writer = csv.DictWriter(s, extrasaction="ignore", fieldnames=fieldnames) - writer.writer = UnicodeWriter(s) - writer.writeheader() - - for row in query_data['rows']: - for col_name, converter in special_columns.iteritems(): - if col_name in row: - row[col_name] = converter(row[col_name]) - - writer.writerow(row) - - return s.getvalue() - - -def serialize_query_result_to_xlsx(query_result): - s = cStringIO.StringIO() - - query_data = json_loads(query_result.data) - book = xlsxwriter.Workbook(s, {'constant_memory': True}) - sheet = book.add_worksheet("result") - - column_names = [] - for (c, col) in enumerate(query_data['columns']): - sheet.write(0, c, col['name']) - column_names.append(col['name']) - - for (r, row) in enumerate(query_data['rows']): - for (c, name) in enumerate(column_names): - v = row.get(name) - if isinstance(v, list) or isinstance(v, dict): - v = str(v).encode('utf-8') - sheet.write(r + 1, c, v) - - book.close() - - return s.getvalue() +import cStringIO +import csv +import xlsxwriter +from funcy import rpartial, project +from dateutil.parser import isoparse as parse_date +from redash.utils import json_loads, UnicodeWriter +from redash.query_runner import (TYPE_BOOLEAN, TYPE_DATE, TYPE_DATETIME) +from redash.authentication.org_resolving import current_org + + +def _convert_format(fmt): + return fmt.replace('DD', '%d').replace('MM', '%m').replace('YYYY', '%Y').replace('YY', '%y').replace('HH', '%H').replace('mm', '%M').replace('ss', '%s') + + +def _convert_bool(value): + if value is True: + return "true" + elif value is False: + return "false" + + return value + + +def _convert_datetime(value, fmt): + if not value: + return value + + try: + parsed = parse_date(value) + ret = parsed.strftime(fmt) + except Exception: + return value + + return ret + + +def _get_column_lists(columns): + date_format = _convert_format(current_org.get_setting('date_format')) + datetime_format = _convert_format('{} {}'.format(current_org.get_setting('date_format'), current_org.get_setting('time_format'))) + + special_types = { + TYPE_BOOLEAN: _convert_bool, + TYPE_DATE: rpartial(_convert_datetime, date_format), + TYPE_DATETIME: rpartial(_convert_datetime, datetime_format) + } + + fieldnames = [] + special_columns = dict() + + for col in columns: + fieldnames.append(col['name']) + + for col_type in special_types.keys(): + if col['type'] == col_type: + special_columns[col['name']] = special_types[col_type] + + return fieldnames, special_columns + + +def serialize_query_result(query_result, is_api_user): + if is_api_user: + publicly_needed_keys = ['data', 'retrieved_at'] + return project(query_result.to_dict(), publicly_needed_keys) + else: + return query_result.to_dict() + + +def serialize_query_result_to_csv(query_result): + s = cStringIO.StringIO() + + query_data = json_loads(query_result.data) + + fieldnames, special_columns = _get_column_lists(query_data['columns'] or []) + + writer = csv.DictWriter(s, extrasaction="ignore", fieldnames=fieldnames) + writer.writer = UnicodeWriter(s) + writer.writeheader() + + for row in query_data['rows']: + for col_name, converter in special_columns.iteritems(): + if col_name in row: + row[col_name] = converter(row[col_name]) + + writer.writerow(row) + + return s.getvalue() + + +def serialize_query_result_to_xlsx(query_result): + s = cStringIO.StringIO() + + query_data = json_loads(query_result.data) + with xlsxwriter.Workbook(s, {'constant_memory': True}) as book: + sheet = book.add_worksheet("result") + + column_names = [] + for (c, col) in enumerate(query_data['columns']): + sheet.write(0, c, col['name']) + column_names.append(col['name']) + + for (r, row) in enumerate(query_data['rows']): + for (c, name) in enumerate(column_names): + v = row.get(name) + if isinstance(v, list) or isinstance(v, dict): + v = str(v).encode('utf-8') + sheet.write(r + 1, c, v) + + return s.getvalue() From 2293036b1a46dfe7744b339df82dd78888d9b2c7 Mon Sep 17 00:00:00 2001 From: Shitij Goyal Date: Mon, 9 Mar 2020 14:16:37 +0530 Subject: [PATCH 08/11] Increase gunicorn timeout --- bin/docker-entrypoint | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/docker-entrypoint b/bin/docker-entrypoint index e6a122c469..ba4dd26492 100755 --- a/bin/docker-entrypoint +++ b/bin/docker-entrypoint @@ -34,7 +34,7 @@ server() { # Recycle gunicorn workers every n-th request. See http://docs.gunicorn.org/en/stable/settings.html#max-requests for more details. MAX_REQUESTS=${MAX_REQUESTS:-1000} MAX_REQUESTS_JITTER=${MAX_REQUESTS_JITTER:-100} - exec /usr/local/bin/gunicorn -b 0.0.0.0:5000 --name redash -w${REDASH_WEB_WORKERS:-4} redash.wsgi:app --max-requests $MAX_REQUESTS --max-requests-jitter $MAX_REQUESTS_JITTER + exec /usr/local/bin/gunicorn -b 0.0.0.0:5000 --name redash -w${REDASH_WEB_WORKERS:-4} redash.wsgi:app --max-requests $MAX_REQUESTS --max-requests-jitter $MAX_REQUESTS_JITTER --timeout 65 } create_db() { From 0bb12d1429108edf961b8db141e9c3a4ca71c13d Mon Sep 17 00:00:00 2001 From: Shitij Goyal Date: Wed, 11 Mar 2020 16:35:14 +0530 Subject: [PATCH 09/11] Fix celery concurrency issues. Fix athena query issue for create statement --- .gitignore | 2 +- bin/docker-entrypoint | 8 ++++---- client/app/pages/home/home.html | 5 +++++ docker-compose.yml | 11 ++++++++++- redash/query_runner/athena.py | 18 ++++++++++++++++-- redash/tasks/queries.py | 2 +- redash/worker.py | 5 ++--- 7 files changed, 39 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 350e1f610d..7ae2c0345d 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,7 @@ celerybeat-schedule* _build .vscode .env - +.log dump.rdb node_modules diff --git a/bin/docker-entrypoint b/bin/docker-entrypoint index ba4dd26492..46d0ab9169 100755 --- a/bin/docker-entrypoint +++ b/bin/docker-entrypoint @@ -2,12 +2,12 @@ set -e worker() { - WORKERS_COUNT=${WORKERS_COUNT:-2} + WORKERS_COUNT=${WORKERS_COUNT:-1} QUEUES=${QUEUES:-queries,scheduled_queries,celery,schemas} WORKER_EXTRA_OPTIONS=${WORKER_EXTRA_OPTIONS:-} echo "Starting $WORKERS_COUNT workers for queues: $QUEUES..." - exec /usr/local/bin/celery worker --app=redash.worker -c$WORKERS_COUNT -Q$QUEUES -linfo --max-tasks-per-child=10 -Ofair $WORKER_EXTRA_OPTIONS + exec /usr/local/bin/celery worker --app=redash.worker -Q$QUEUES -ldebug --max-tasks-per-child=10 -c4 -Ofair $WORKER_EXTRA_OPTIONS } scheduler() { @@ -21,13 +21,13 @@ scheduler() { } dev_worker() { - WORKERS_COUNT=${WORKERS_COUNT:-2} + WORKERS_COUNT=${WORKERS_COUNT:-1} QUEUES=${QUEUES:-queries,scheduled_queries,celery,schemas} SCHEDULE_DB=${SCHEDULE_DB:-celerybeat-schedule} echo "Starting dev scheduler and $WORKERS_COUNT workers for queues: $QUEUES..." - exec watchmedo auto-restart --directory=./redash/ --pattern=*.py --recursive -- /usr/local/bin/celery worker --app=redash.worker --beat -s$SCHEDULE_DB -c$WORKERS_COUNT -Q$QUEUES -linfo --max-tasks-per-child=10 -Ofair + exec watchmedo auto-restart --directory=./redash/ --pattern=*.py --recursive -- /usr/local/bin/celery worker --app=redash.worker -s$SCHEDULE_DB -c2 -Q$QUEUES -linfo --max-tasks-per-child=10 -Ofair } server() { diff --git a/client/app/pages/home/home.html b/client/app/pages/home/home.html index 23c2038ec6..c1786f8b18 100644 --- a/client/app/pages/home/home.html +++ b/client/app/pages/home/home.html @@ -67,6 +67,11 @@
  • There is query timeout of 10 minutes in all postgres datasources. Please write performant queries accordingly.
  • +
  • + While downloading data as csv or excel files, you may see a popup. Please click 'Leave'. You will not be + redirected anywhere. Also if you have a large number of rows, the data download may fail. You will have to + reduce the number of rows in that case. +
  • As with any tool, Redash also has some limitations but we have tried to make it as smooth as possible for users. Please provide us with feedback on how we can improve it further. diff --git a/docker-compose.yml b/docker-compose.yml index 48b8d09426..7a88b051c1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -23,6 +23,7 @@ services: worker: build: . command: dev_worker + restart: unless-stopped volumes: - type: bind source: . @@ -35,11 +36,19 @@ services: REDASH_REDIS_URL: "redis://redis:6379/0" REDASH_DATABASE_URL: "postgresql://postgres@postgres/postgres" QUEUES: "queries,scheduled_queries,celery,schemas" - WORKERS_COUNT: 2 + WORKERS_COUNT: 1 REDASH_INTEGER_FORMAT: "00" redis: image: redis:3-alpine restart: unless-stopped + flower: + image: mher/flower + container_name: redash_flower + environment: + CELERY_BROKER_URL: "redis://redis:6379/0" + FLOWER_PORT: 8888 + ports: + - 8889:8888 postgres: image: postgres:9.5-alpine # The following turns the DB into less durable, but gains significant performance improvements for the tests run (x3 diff --git a/redash/query_runner/athena.py b/redash/query_runner/athena.py index b1fd035a91..aadc0633f9 100644 --- a/redash/query_runner/athena.py +++ b/redash/query_runner/athena.py @@ -2,6 +2,8 @@ import os import csv import random + +from botocore.exceptions import ClientError from pyathena.util import parse_output_location from redash.query_runner import * @@ -224,7 +226,7 @@ def run_query(self, query, user): **self._get_iam_credentials(user=user)).cursor() cursor.execute(query) - return self.get_query_result_from_file(cursor, user) + return self.get_query_result_from_file(cursor, user, query) # return self.get_query_results_from_cursor(cursor) def get_query_results_from_cursor(self, cursor): @@ -264,7 +266,7 @@ def get_query_results_from_cursor(self, cursor): json_data = None return json_data, error - def get_query_result_from_file(self, cursor, user): + def get_query_result_from_file(self, cursor, user, query): try: qbytes = None athena_query_results_file = None @@ -277,11 +279,15 @@ def get_query_result_from_file(self, cursor, user): logger.debug("Athena Upstream can't get query_id: %s", e) try: athena_output_location = cursor.output_location + logger.info(athena_output_location) except Exception as e: error = e.message logger.debug("Output location not found: %s", e) return json_data, error + if not athena_output_location or athena_output_location == '': + return json_data, error + bucket, key = parse_output_location(athena_output_location) s3 = boto3.client('s3', **self._get_iam_credentials(user=user)) athena_query_results_file = athena_query_id @@ -309,6 +315,14 @@ def get_query_result_from_file(self, cursor, user): cursor.cancel() error = "Query cancelled by user." json_data = None + except ClientError as e: + logger.exception(e) + if '404' in e.message and 'HeadObject' in e.message: + error = None + json_data = json_dumps({}, ignore_nan=True) + else: + error = e + json_data = None except Exception as ex: if cursor.query_id: cursor.cancel() diff --git a/redash/tasks/queries.py b/redash/tasks/queries.py index 1972a67461..d86df19a55 100644 --- a/redash/tasks/queries.py +++ b/redash/tasks/queries.py @@ -92,7 +92,7 @@ def ready(self): return self._async_result.ready() def cancel(self): - return self._async_result.revoke(terminate=True, signal='SIGINT') + return self._async_result.revoke(terminate=True, signal='SIGKILL') def enqueue_query(query, data_source, user_id, is_api_key=False, scheduled_query=None, metadata={}): diff --git a/redash/worker.py b/redash/worker.py index e960c34fd7..b0e0592828 100644 --- a/redash/worker.py +++ b/redash/worker.py @@ -1,16 +1,15 @@ from __future__ import absolute_import + from datetime import timedelta from random import randint -from flask import current_app - from celery import Celery from celery.schedules import crontab from celery.signals import worker_process_init from celery.utils.log import get_logger +from flask import current_app from redash import create_app, extensions, settings -from redash.metrics import celery as celery_metrics # noqa logger = get_logger(__name__) From 85f2b6dd08ac99538044fbd879474fbfece15e2d Mon Sep 17 00:00:00 2001 From: Shitij Goyal Date: Fri, 13 Mar 2020 15:43:36 +0530 Subject: [PATCH 10/11] Add new relic monitoring agent --- bin/docker-entrypoint | 8 +- docker-compose.yml | 2 + newrelic.ini | 207 ++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 5 +- 4 files changed, 216 insertions(+), 6 deletions(-) create mode 100644 newrelic.ini diff --git a/bin/docker-entrypoint b/bin/docker-entrypoint index 46d0ab9169..533c0963d3 100755 --- a/bin/docker-entrypoint +++ b/bin/docker-entrypoint @@ -7,7 +7,7 @@ worker() { WORKER_EXTRA_OPTIONS=${WORKER_EXTRA_OPTIONS:-} echo "Starting $WORKERS_COUNT workers for queues: $QUEUES..." - exec /usr/local/bin/celery worker --app=redash.worker -Q$QUEUES -ldebug --max-tasks-per-child=10 -c4 -Ofair $WORKER_EXTRA_OPTIONS + exec NEW_RELIC_CONFIG_FILE=newrelic.ini newrelic-admin run-program /usr/local/bin/celery worker --app=redash.worker -Q$QUEUES -linfo --max-tasks-per-child=10 -c4 -Ofair $WORKER_EXTRA_OPTIONS } scheduler() { @@ -17,7 +17,7 @@ scheduler() { echo "Starting scheduler and $WORKERS_COUNT workers for queues: $QUEUES..." - exec /usr/local/bin/celery worker --app=redash.worker --beat -s$SCHEDULE_DB -c$WORKERS_COUNT -Q$QUEUES -linfo --max-tasks-per-child=10 -Ofair + exec newrelic-admin run-program /usr/local/bin/celery worker --app=redash.worker --beat -s$SCHEDULE_DB -c$WORKERS_COUNT -Q$QUEUES -linfo --max-tasks-per-child=10 -Ofair } dev_worker() { @@ -27,14 +27,14 @@ dev_worker() { echo "Starting dev scheduler and $WORKERS_COUNT workers for queues: $QUEUES..." - exec watchmedo auto-restart --directory=./redash/ --pattern=*.py --recursive -- /usr/local/bin/celery worker --app=redash.worker -s$SCHEDULE_DB -c2 -Q$QUEUES -linfo --max-tasks-per-child=10 -Ofair + exec newrelic-admin run-program /usr/local/bin/celery worker --app=redash.worker -s$SCHEDULE_DB -c2 -Q$QUEUES -linfo --max-tasks-per-child=10 -Ofair } server() { # Recycle gunicorn workers every n-th request. See http://docs.gunicorn.org/en/stable/settings.html#max-requests for more details. MAX_REQUESTS=${MAX_REQUESTS:-1000} MAX_REQUESTS_JITTER=${MAX_REQUESTS_JITTER:-100} - exec /usr/local/bin/gunicorn -b 0.0.0.0:5000 --name redash -w${REDASH_WEB_WORKERS:-4} redash.wsgi:app --max-requests $MAX_REQUESTS --max-requests-jitter $MAX_REQUESTS_JITTER --timeout 65 + exec newrelic-admin run-program /usr/local/bin/gunicorn -b 0.0.0.0:5000 --name redash -w${REDASH_WEB_WORKERS:-4} redash.wsgi:app --max-requests $MAX_REQUESTS --max-requests-jitter $MAX_REQUESTS_JITTER --timeout 65 } create_db() { diff --git a/docker-compose.yml b/docker-compose.yml index 7a88b051c1..8b598befcc 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -20,6 +20,7 @@ services: REDASH_DATABASE_URL: "postgresql://postgres@postgres/postgres" REDASH_RATELIMIT_ENABLED: "false" REDASH_INTEGER_FORMAT: "00" + NEW_RELIC_CONFIG_FILE: newrelic.ini worker: build: . command: dev_worker @@ -38,6 +39,7 @@ services: QUEUES: "queries,scheduled_queries,celery,schemas" WORKERS_COUNT: 1 REDASH_INTEGER_FORMAT: "00" + NEW_RELIC_CONFIG_FILE: newrelic.ini redis: image: redis:3-alpine restart: unless-stopped diff --git a/newrelic.ini b/newrelic.ini new file mode 100644 index 0000000000..1b7f8d77a3 --- /dev/null +++ b/newrelic.ini @@ -0,0 +1,207 @@ +# --------------------------------------------------------------------------- + +# +# This file configures the New Relic Python Agent. +# +# The path to the configuration file should be supplied to the function +# newrelic.agent.initialize() when the agent is being initialized. +# +# The configuration file follows a structure similar to what you would +# find for Microsoft Windows INI files. For further information on the +# configuration file format see the Python ConfigParser documentation at: +# +# http://docs.python.org/library/configparser.html +# +# For further discussion on the behaviour of the Python agent that can +# be configured via this configuration file see: +# +# http://newrelic.com/docs/python/python-agent-configuration +# + +# --------------------------------------------------------------------------- + +# Here are the settings that are common to all environments. + +[newrelic] + +# You must specify the license key associated with your New +# Relic account. This key binds the Python Agent's data to your +# account in the New Relic service. +license_key = 85bded9530484bd96cfbe58674f2b83dc5249022 + +# The application name. Set this to be the name of your +# application as you would like it to show up in New Relic UI. +# The UI will then auto-map instances of your application into a +# entry on your home dashboard page. +app_name = redash + +# When "true", the agent collects performance data about your +# application and reports this data to the New Relic UI at +# newrelic.com. This global switch is normally overridden for +# each environment below. +monitor_mode = true + +# Sets the name of a file to log agent messages to. Useful for +# debugging any issues with the agent. This is not set by +# default as it is not known in advance what user your web +# application processes will run as and where they have +# permission to write to. Whatever you set this to you must +# ensure that the permissions for the containing directory and +# the file itself are correct, and that the user that your web +# application runs as can write to the file. If not able to +# write out a log file, it is also possible to say "stderr" and +# output to standard error output. This would normally result in +# output appearing in your web server log. +#log_file = /tmp/newrelic-python-agent.log + +# Sets the level of detail of messages sent to the log file, if +# a log file location has been provided. Possible values, in +# increasing order of detail, are: "critical", "error", "warning", +# "info" and "debug". When reporting any agent issues to New +# Relic technical support, the most useful setting for the +# support engineers is "debug". However, this can generate a lot +# of information very quickly, so it is best not to keep the +# agent at this level for longer than it takes to reproduce the +# problem you are experiencing. +log_level = info + +# High Security Mode enforces certain security settings, and prevents +# them from being overridden, so that no sensitive data is sent to New +# Relic. Enabling High Security Mode means that request parameters are +# not collected and SQL can not be sent to New Relic in its raw form. +# To activate High Security Mode, it must be set to 'true' in this +# local .ini configuration file AND be set to 'true' in the +# server-side configuration in the New Relic user interface. For +# details, see +# https://docs.newrelic.com/docs/subscriptions/high-security +high_security = false + +# The Python Agent will attempt to connect directly to the New +# Relic service. If there is an intermediate firewall between +# your host and the New Relic service that requires you to use a +# HTTP proxy, then you should set both the "proxy_host" and +# "proxy_port" settings to the required values for the HTTP +# proxy. The "proxy_user" and "proxy_pass" settings should +# additionally be set if proxy authentication is implemented by +# the HTTP proxy. The "proxy_scheme" setting dictates what +# protocol scheme is used in talking to the HTTP proxy. This +# would normally always be set as "http" which will result in the +# agent then using a SSL tunnel through the HTTP proxy for end to +# end encryption. +# proxy_scheme = http +# proxy_host = hostname +# proxy_port = 8080 +# proxy_user = +# proxy_pass = + +# Capturing request parameters is off by default. To enable the +# capturing of request parameters, first ensure that the setting +# "attributes.enabled" is set to "true" (the default value), and +# then add "request.parameters.*" to the "attributes.include" +# setting. For details about attributes configuration, please +# consult the documentation. +# attributes.include = request.parameters.* + +# The transaction tracer captures deep information about slow +# transactions and sends this to the UI on a periodic basis. The +# transaction tracer is enabled by default. Set this to "false" +# to turn it off. +transaction_tracer.enabled = true + +# Threshold in seconds for when to collect a transaction trace. +# When the response time of a controller action exceeds this +# threshold, a transaction trace will be recorded and sent to +# the UI. Valid values are any positive float value, or (default) +# "apdex_f", which will use the threshold for a dissatisfying +# Apdex controller action - four times the Apdex T value. +transaction_tracer.transaction_threshold = apdex_f + +# When the transaction tracer is on, SQL statements can +# optionally be recorded. The recorder has three modes, "off" +# which sends no SQL, "raw" which sends the SQL statement in its +# original form, and "obfuscated", which strips out numeric and +# string literals. +transaction_tracer.record_sql = obfuscated + +# Threshold in seconds for when to collect stack trace for a SQL +# call. In other words, when SQL statements exceed this +# threshold, then capture and send to the UI the current stack +# trace. This is helpful for pinpointing where long SQL calls +# originate from in an application. +transaction_tracer.stack_trace_threshold = 0.5 + +# Determines whether the agent will capture query plans for slow +# SQL queries. Only supported in MySQL and PostgreSQL. Set this +# to "false" to turn it off. +transaction_tracer.explain_enabled = true + +# Threshold for query execution time below which query plans +# will not not be captured. Relevant only when "explain_enabled" +# is true. +transaction_tracer.explain_threshold = 0.5 + +# Space separated list of function or method names in form +# 'module:function' or 'module:class.function' for which +# additional function timing instrumentation will be added. +transaction_tracer.function_trace = + +# The error collector captures information about uncaught +# exceptions or logged exceptions and sends them to UI for +# viewing. The error collector is enabled by default. Set this +# to "false" to turn it off. +error_collector.enabled = true + +# To stop specific errors from reporting to the UI, set this to +# a space separated list of the Python exception type names to +# ignore. The exception name should be of the form 'module:class'. +error_collector.ignore_errors = + +# Browser monitoring is the Real User Monitoring feature of the UI. +# For those Python web frameworks that are supported, this +# setting enables the auto-insertion of the browser monitoring +# JavaScript fragments. +browser_monitoring.auto_instrument = true + +# A thread profiling session can be scheduled via the UI when +# this option is enabled. The thread profiler will periodically +# capture a snapshot of the call stack for each active thread in +# the application to construct a statistically representative +# call tree. +thread_profiler.enabled = true + +# Your application deployments can be recorded through the +# New Relic REST API. To use this feature provide your API key +# below then use the `newrelic-admin record-deploy` command. +# api_key = + +# Distributed tracing lets you see the path that a request takes +# through your distributed system. Enabling distributed tracing +# changes the behavior of some New Relic features, so carefully +# consult the transition guide before you enable this feature: +# https://docs.newrelic.com/docs/transition-guide-distributed-tracing +distributed_tracing.enabled = false + +# --------------------------------------------------------------------------- + +# +# The application environments. These are specific settings which +# override the common environment settings. The settings related to a +# specific environment will be used when the environment argument to the +# newrelic.agent.initialize() function has been defined to be either +# "development", "test", "staging" or "production". +# + +[newrelic:development] +monitor_mode = false + +[newrelic:test] +monitor_mode = false + +[newrelic:staging] +app_name = redash (Staging) +monitor_mode = true + +[newrelic:production] +monitor_mode = true + +# --------------------------------------------------------------------------- diff --git a/requirements.txt b/requirements.txt index 432d7cfd01..fa977dad51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,8 +36,8 @@ SQLAlchemy-Utils==0.33.11 sqlparse==0.2.4 statsd==2.1.2 gunicorn==19.7.1 -celery==4.3.0 -kombu==4.6.3 +celery==4.4.0 +kombu==4.6.6 jsonschema==2.4.0 RestrictedPython==3.6.0 pysaml2==4.5.0 @@ -58,6 +58,7 @@ chromelogger==0.4.3 pypd==1.1.0 disposable-email-domains>=0.0.52 gevent==1.4.0 +newrelic==5.10.0.138 # Install the dependencies of the bin/bundle-extensions script here. # It has its own requirements file to simplify the frontend client build process -r requirements_bundles.txt From 92c9291d85dd19e7617ac3fad76021a517d4fcc8 Mon Sep 17 00:00:00 2001 From: Shitij Goyal Date: Tue, 16 Jun 2020 19:14:18 +0530 Subject: [PATCH 11/11] Changes --- bin/docker-entrypoint | 2 +- redash/query_runner/athena.py | 15 +++++++++++++-- redash/settings/__init__.py | 2 +- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/bin/docker-entrypoint b/bin/docker-entrypoint index 533c0963d3..238a1d17b2 100755 --- a/bin/docker-entrypoint +++ b/bin/docker-entrypoint @@ -7,7 +7,7 @@ worker() { WORKER_EXTRA_OPTIONS=${WORKER_EXTRA_OPTIONS:-} echo "Starting $WORKERS_COUNT workers for queues: $QUEUES..." - exec NEW_RELIC_CONFIG_FILE=newrelic.ini newrelic-admin run-program /usr/local/bin/celery worker --app=redash.worker -Q$QUEUES -linfo --max-tasks-per-child=10 -c4 -Ofair $WORKER_EXTRA_OPTIONS + exec newrelic-admin run-program /usr/local/bin/celery worker --app=redash.worker -Q$QUEUES -linfo --max-tasks-per-child=10 -c4 -Ofair $WORKER_EXTRA_OPTIONS } scheduler() { diff --git a/redash/query_runner/athena.py b/redash/query_runner/athena.py index aadc0633f9..fe25acce09 100644 --- a/redash/query_runner/athena.py +++ b/redash/query_runner/athena.py @@ -3,8 +3,9 @@ import csv import random +from botocore.config import Config from botocore.exceptions import ClientError -from pyathena.util import parse_output_location +from pyathena.util import parse_output_location, RetryConfig from redash.query_runner import * from redash.settings import parse_boolean @@ -55,6 +56,14 @@ class Athena(BaseQueryRunner): def name(cls): return "Amazon Athena" + @classmethod + def get_retry_config(cls): + config = RetryConfig( + attempt=2, + multiplier=1, max_delay=50 + ) + return config + @classmethod def configuration_schema(cls): schema = { @@ -223,6 +232,7 @@ def run_query(self, query, user): kms_key=self.configuration.get('kms_key', None), work_group=self.configuration.get('work_group', 'primary'), formatter=SimpleFormatter(), + retry_config=self.get_retry_config(), **self._get_iam_credentials(user=user)).cursor() cursor.execute(query) @@ -289,7 +299,8 @@ def get_query_result_from_file(self, cursor, user, query): return json_data, error bucket, key = parse_output_location(athena_output_location) - s3 = boto3.client('s3', **self._get_iam_credentials(user=user)) + s3 = boto3.client('s3', + **self._get_iam_credentials(user=user)) athena_query_results_file = athena_query_id with open(athena_query_results_file, 'wb') as w: s3.download_fileobj(bucket, key, w) diff --git a/redash/settings/__init__.py b/redash/settings/__init__.py index 1c1e289485..c2f49898b8 100644 --- a/redash/settings/__init__.py +++ b/redash/settings/__init__.py @@ -41,7 +41,7 @@ 'ssl_keyfile': os.environ.get("REDASH_CELERY_BROKER_SSL_KEYFILE"), } if CELERY_BROKER_USE_SSL else None -CELERY_WORKER_PREFETCH_MULTIPLIER = int(os.environ.get("REDASH_CELERY_WORKER_PREFETCH_MULTIPLIER", 1)) +CELERY_WORKER_PREFETCH_MULTIPLIER: int = int(os.environ.get("REDASH_CELERY_WORKER_PREFETCH_MULTIPLIER", 1)) CELERY_ACCEPT_CONTENT = os.environ.get("REDASH_CELERY_ACCEPT_CONTENT", "json").split(",") CELERY_TASK_SERIALIZER = os.environ.get("REDASH_CELERY_TASK_SERIALIZER", "json") CELERY_RESULT_SERIALIZER = os.environ.get("REDASH_CELERY_RESULT_SERIALIZER", "json")