Skip to content

Commit

Permalink
CSV: correctly serialize booleans and dates. (getredash#3841)
Browse files Browse the repository at this point in the history
* CSV: correctly serialize booleans and dates.

Closes getredash#3736, closes getredash#2751.

* pep8 fixes

* Move column iteration to a helper function.

* Use elif, as types are mutually exclusive.

* Refactor parsing implementation.

* Move the csv generation fucntion
  • Loading branch information
arikfr authored and harveyrendell committed Nov 14, 2019
1 parent ffe6857 commit fbaa3c2
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 43 deletions.
5 changes: 3 additions & 2 deletions redash/handlers/query_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from redash.tasks.queries import enqueue_query
from redash.utils import (collect_parameters_from_request, gen_query_hash, json_dumps, utcnow, to_filename)
from redash.models.parameterized_query import ParameterizedQuery, InvalidParameterError, dropdown_values
from redash.serializers import serialize_query_result_to_csv, serialize_query_result_to_xlsx


def error_response(message):
Expand Down Expand Up @@ -279,12 +280,12 @@ def make_json_response(self, query_result):
@staticmethod
def make_csv_response(query_result):
headers = {'Content-Type': "text/csv; charset=UTF-8"}
return make_response(query_result.make_csv_content(), 200, headers)
return make_response(serialize_query_result_to_csv(query_result), 200, headers)

@staticmethod
def make_excel_response(query_result):
headers = {'Content-Type': "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"}
return make_response(query_result.make_excel_content(), 200, headers)
return make_response(serialize_query_result_to_xlsx(query_result), 200, headers)


class JobResource(BaseResource):
Expand Down
40 changes: 1 addition & 39 deletions redash/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import cStringIO
import csv
import datetime
import calendar
import logging
import time
import pytz

import xlsxwriter
from six import python_2_unicode_compatible, text_type
from sqlalchemy import distinct, or_, and_, UniqueConstraint
from sqlalchemy.dialects import postgresql
Expand All @@ -25,7 +22,7 @@
get_destination)
from redash.metrics import database # noqa: F401
from redash.query_runner import (get_configuration_schema_for_query_runner_type,
get_query_runner)
get_query_runner, TYPE_BOOLEAN, TYPE_DATE, TYPE_DATETIME)
from redash.utils import generate_token, json_dumps, json_loads
from redash.utils.configuration import ConfigurationContainer
from redash.models.parameterized_query import ParameterizedQuery
Expand Down Expand Up @@ -322,41 +319,6 @@ def store_result(cls, org, data_source, query_hash, query, data, run_time, retri
def groups(self):
return self.data_source.groups

def make_csv_content(self):
s = cStringIO.StringIO()

query_data = json_loads(self.data)
writer = csv.DictWriter(s, extrasaction="ignore", fieldnames=[col['name'] for col in query_data['columns']])
writer.writer = utils.UnicodeWriter(s)
writer.writeheader()
for row in query_data['rows']:
writer.writerow(row)

return s.getvalue()

def make_excel_content(self):
s = cStringIO.StringIO()

query_data = json_loads(self.data)
book = xlsxwriter.Workbook(s, {'constant_memory': True})
sheet = book.add_worksheet("result")

column_names = []
for (c, col) in enumerate(query_data['columns']):
sheet.write(0, c, col['name'])
column_names.append(col['name'])

for (r, row) in enumerate(query_data['rows']):
for (c, name) in enumerate(column_names):
v = row.get(name)
if isinstance(v, list) or isinstance(v, dict):
v = str(v).encode('utf-8')
sheet.write(r + 1, c, v)

book.close()

return s.getvalue()


def should_schedule_next(previous_iteration, now, interval, time=None, day_of_week=None, failures=0):
# if time exists then interval > 23 hours (82800s)
Expand Down
2 changes: 2 additions & 0 deletions redash/serializers.py → redash/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from redash.utils import json_loads
from redash.models.parameterized_query import ParameterizedQuery

from .query_result import serialize_query_result_to_csv, serialize_query_result_to_xlsx


def public_widget(widget):
res = {
Expand Down
104 changes: 104 additions & 0 deletions redash/serializers/query_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import cStringIO
import csv
import xlsxwriter
from dateutil.parser import parse as parse_date
from redash.utils import json_loads, UnicodeWriter
from redash.query_runner import (TYPE_BOOLEAN, TYPE_DATE, TYPE_DATETIME)
from redash.authentication.org_resolving import current_org


def _convert_format(fmt):
return fmt.replace('DD', '%d').replace('MM', '%m').replace('YYYY', '%Y').replace('YY', '%y').replace('HH', '%H').replace('mm', '%M').replace('ss', '%s')


def _convert_bool(value):
if value is True:
return "true"
elif value is False:
return "false"

return value

def _convert_date(value):
if not value:
return value

parsed = parse_date(value)

return parsed.strftime(_convert_format(current_org.get_setting('date_format')))


def _convert_datetime(value):
if not value:
return value

parsed = parse_date(value)

fmt = _convert_format('{} {}'.format(current_org.get_setting('date_format'), current_org.get_setting('time_format')))
return parsed.strftime(fmt)


SPECIAL_TYPES = {
TYPE_BOOLEAN: _convert_bool,
TYPE_DATE: _convert_date,
TYPE_DATETIME: _convert_datetime
}


def _get_column_lists(columns):
fieldnames = []
special_columns = dict()

for col in columns:
fieldnames.append(col['name'])

for col_type in SPECIAL_TYPES.keys():
if col['type'] == col_type:
special_columns[col['name']] = SPECIAL_TYPES[col_type]

return fieldnames, special_columns


def serialize_query_result_to_csv(query_result):
s = cStringIO.StringIO()

query_data = json_loads(query_result.data)

fieldnames, special_columns = _get_column_lists(query_data['columns'])

writer = csv.DictWriter(s, extrasaction="ignore", fieldnames=fieldnames)
writer.writer = UnicodeWriter(s)
writer.writeheader()

for row in query_data['rows']:
for col_name, converter in special_columns.iteritems():
if col_name in row:
row[col_name] = converter(row[col_name])

writer.writerow(row)

return s.getvalue()


def serialize_query_result_to_xlsx(query_result):
s = cStringIO.StringIO()

query_data = json_loads(query_result.data)
book = xlsxwriter.Workbook(s, {'constant_memory': True})
sheet = book.add_worksheet("result")

column_names = []
for (c, col) in enumerate(query_data['columns']):
sheet.write(0, c, col['name'])
column_names.append(col['name'])

for (r, row) in enumerate(query_data['rows']):
for (c, name) in enumerate(column_names):
v = row.get(name)
if isinstance(v, list) or isinstance(v, dict):
v = str(v).encode('utf-8')
sheet.write(r + 1, c, v)

book.close()

return s.getvalue()
4 changes: 2 additions & 2 deletions tests/models/test_query_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from tests import BaseTestCase

from redash import models
from redash.utils import utcnow
from redash.utils import utcnow, json_dumps


class QueryResultTest(BaseTestCase):
Expand Down Expand Up @@ -66,4 +66,4 @@ def test_store_result_does_not_modify_query_update_at(self):

models.QueryResult.store_result(query.org_id, query.data_source, query.query_hash, query.query_text, "", 0, utcnow())

self.assertEqual(original_updated_at, query.updated_at)
self.assertEqual(original_updated_at, query.updated_at)
Empty file added tests/serializers/__init__.py
Empty file.
49 changes: 49 additions & 0 deletions tests/serializers/test_query_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import datetime
import csv
import cStringIO

from tests import BaseTestCase

from redash import models
from redash.utils import utcnow, json_dumps
from redash.serializers import serialize_query_result_to_csv


data = {
"rows": [
{"datetime": "2019-05-26T12:39:23.026Z", "bool": True, "date": "2019-05-26"},
{"datetime": "", "bool": False, "date": ""},
{"datetime": None, "bool": None, "date": None},
],
"columns": [
{"friendly_name": "bool", "type": "boolean", "name": "bool"},
{"friendly_name": "date", "type": "datetime", "name": "datetime"},
{"friendly_name": "date", "type": "date", "name": "date"}
]
}

class CsvSerializationTest(BaseTestCase):
def get_csv_content(self):
query_result = self.factory.create_query_result(data=json_dumps(data))
return serialize_query_result_to_csv(query_result)

def test_serializes_booleans_correctly(self):
with self.app.test_request_context('/'):
parsed = csv.DictReader(cStringIO.StringIO(self.get_csv_content()))
rows = list(parsed)

self.assertEqual(rows[0]['bool'], 'true')
self.assertEqual(rows[1]['bool'], 'false')
self.assertEqual(rows[2]['bool'], '')

def test_serializes_datatime_with_correct_format(self):
with self.app.test_request_context('/'):
parsed = csv.DictReader(cStringIO.StringIO(self.get_csv_content()))
rows = list(parsed)

self.assertEqual(rows[0]['datetime'], '26/05/19 12:39')
self.assertEqual(rows[1]['datetime'], '')
self.assertEqual(rows[2]['datetime'], '')
self.assertEqual(rows[0]['date'], '26/05/19')
self.assertEqual(rows[1]['date'], '')
self.assertEqual(rows[2]['date'], '')

0 comments on commit fbaa3c2

Please sign in to comment.