-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CSV: correctly serialize booleans and dates. #3841
Changes from 1 commit
9ae2ada
51fce50
5e0ccb9
f26aded
feda3c9
c6ff138
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import cStringIO | ||
import csv | ||
import xlsxwriter | ||
from dateutil.parser import parse as parse_date | ||
from redash.utils import json_loads, UnicodeWriter | ||
from redash.query_runner import (TYPE_BOOLEAN, TYPE_DATE, TYPE_DATETIME) | ||
from redash.authentication.org_resolving import current_org | ||
|
||
|
||
def convert_format(fmt): | ||
return fmt.replace('DD', '%d').replace('MM', '%m').replace('YYYY', '%Y').replace('YY', '%y').replace('HH', '%H').replace('mm', '%M').replace('ss', '%s') | ||
|
||
def serialize_query_result_to_csv(query_result): | ||
s = cStringIO.StringIO() | ||
|
||
query_data = json_loads(query_result.data) | ||
|
||
fieldnames = [] | ||
bool_columns = [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This feels a tad too scripty for me. I'd prefer to have less details and noise here.
bool_columns, date_columns, datetime_columns = split_columns_by_type(query_data['columns'])
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've started with implementing 1 (5e0ccb9), but then when I tried to split it further I realized I will need to pass around the lists 🤢or convert this into a class 🤔. So ended up doing this instead: feda3c9. I didn't do this initially as it felt too much, but it does clean the code and makes it easy to add support for new types later. |
||
date_columns = [] | ||
datetime_columns = [] | ||
|
||
for col in query_data['columns']: | ||
fieldnames.append(col['name']) | ||
if col['type'] == TYPE_BOOLEAN: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These feel pretty mutually exclusive, so an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
bool_columns.append(col['name']) | ||
|
||
if col['type'] == TYPE_DATE: | ||
date_columns.append(col['name']) | ||
|
||
if col['type'] == TYPE_DATETIME: | ||
datetime_columns.append(col['name']) | ||
|
||
writer = csv.DictWriter(s, extrasaction="ignore", fieldnames=[col['name'] for col in query_data['columns']]) | ||
writer.writer = UnicodeWriter(s) | ||
writer.writeheader() | ||
for row in query_data['rows']: | ||
|
||
for col in bool_columns: | ||
if col in row: | ||
if row[col] == True: | ||
row[col] = "true" | ||
elif row[col] == False: | ||
row[col] = "false" | ||
|
||
for col in date_columns: | ||
if not row[col]: | ||
continue | ||
|
||
if col in row: | ||
parsed = parse_date(row[col]) | ||
|
||
row[col] = parsed.strftime(convert_format(current_org.get_setting('date_format'))) | ||
|
||
for col in datetime_columns: | ||
if not row[col]: | ||
continue | ||
|
||
if col in row: | ||
parsed = parse_date(row[col]) | ||
|
||
fmt = convert_format('{} {}'.format(current_org.get_setting('date_format'), current_org.get_setting('time_format'))) | ||
row[col] = parsed.strftime(fmt) | ||
|
||
|
||
writer.writerow(row) | ||
|
||
return s.getvalue() | ||
|
||
|
||
def serialize_query_result_to_xlsx(query_result): | ||
s = cStringIO.StringIO() | ||
|
||
query_data = json_loads(query_result.data) | ||
book = xlsxwriter.Workbook(s, {'constant_memory': True}) | ||
sheet = book.add_worksheet("result") | ||
|
||
column_names = [] | ||
for (c, col) in enumerate(query_data['columns']): | ||
sheet.write(0, c, col['name']) | ||
column_names.append(col['name']) | ||
|
||
for (r, row) in enumerate(query_data['rows']): | ||
for (c, name) in enumerate(column_names): | ||
v = row.get(name) | ||
if isinstance(v, list) or isinstance(v, dict): | ||
v = str(v).encode('utf-8') | ||
sheet.write(r + 1, c, v) | ||
|
||
book.close() | ||
|
||
return s.getvalue() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import datetime | ||
import csv | ||
import cStringIO | ||
|
||
from tests import BaseTestCase | ||
|
||
from redash import models | ||
from redash.utils import utcnow, json_dumps | ||
from redash.serializers import serialize_query_result_to_csv | ||
|
||
|
||
data = { | ||
"rows": [ | ||
{"datetime": "2019-05-26T12:39:23.026Z", "bool": True, "date": "2019-05-26"}, | ||
{"datetime": "", "bool": False, "date": ""}, | ||
{"datetime": None, "bool": None, "date": None}, | ||
], | ||
"columns": [ | ||
{"friendly_name": "bool", "type": "boolean", "name": "bool"}, | ||
{"friendly_name": "date", "type": "datetime", "name": "datetime"}, | ||
{"friendly_name": "date", "type": "date", "name": "date"} | ||
] | ||
} | ||
|
||
def get_csv_content(factory): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not stuff this inside There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No good reason :) I just didn't realize at first it will need the factory, and then I moved it out it was easier to pass it the factory -> c6ff138. |
||
query_result = factory.create_query_result(data=json_dumps(data)) | ||
return serialize_query_result_to_csv(query_result) | ||
|
||
|
||
class CsvSerializationTest(BaseTestCase): | ||
def test_serializes_booleans_correctly(self): | ||
with self.app.test_request_context('/'): | ||
parsed = csv.DictReader(cStringIO.StringIO(get_csv_content(self.factory))) | ||
rows = list(parsed) | ||
|
||
self.assertEqual(rows[0]['bool'], 'true') | ||
self.assertEqual(rows[1]['bool'], 'false') | ||
self.assertEqual(rows[2]['bool'], '') | ||
|
||
def test_serializes_datatime_with_correct_format(self): | ||
with self.app.test_request_context('/'): | ||
parsed = csv.DictReader(cStringIO.StringIO(get_csv_content(self.factory))) | ||
rows = list(parsed) | ||
|
||
self.assertEqual(rows[0]['datetime'], '26/05/19 12:39') | ||
self.assertEqual(rows[1]['datetime'], '') | ||
self.assertEqual(rows[2]['datetime'], '') | ||
self.assertEqual(rows[0]['date'], '26/05/19') | ||
self.assertEqual(rows[1]['date'], '') | ||
self.assertEqual(rows[2]['date'], '') | ||
|
||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Used the opportunity to move this serialization logic from
redash.models
toredash.serializers
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great! and +1 for tests!