Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: parse date values #150

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion vega/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import os
import copy
import datetime
from dateutil import parser

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -28,20 +30,29 @@


def test_sanitize_dataframe():

def parse_pydate(dt):
parsed = None
if dt is not None:
parsed = parser.parse(dt).date()
return parsed

# create a dataframe with various types
df = pd.DataFrame({'s': list('abcde'),
'f': np.arange(5, dtype=float),
'i': np.arange(5, dtype=int),
'b': np.array([True, False, True, True, False]),
'd': pd.date_range('2012-01-01', periods=5, freq='H'),
'c': pd.Series(list('ababc'), dtype='category'),
'o': pd.Series([np.array(i) for i in range(5)])})
'o': pd.Series([np.array(i) for i in range(5)]),
'od': pd.Series([datetime.date(2019, m, 1) for m in range(1, 6)])})

# add some nulls
df.iloc[0, df.columns.get_loc('s')] = None
df.iloc[0, df.columns.get_loc('f')] = np.nan
df.iloc[0, df.columns.get_loc('d')] = pd.NaT
df.iloc[0, df.columns.get_loc('o')] = np.array(np.nan)
df.iloc[0, df.columns.get_loc('od')] = None

# JSON serialize. This will fail on non-sanitized dataframes
df_clean = sanitize_dataframe(df)
Expand All @@ -59,6 +70,8 @@ def test_sanitize_dataframe():
# astype(datetime) introduces time-zone issues:
# to_datetime() does not.
df2[col] = pd.to_datetime(df2[col])
elif col == 'od':
df2[col] = df2[col].apply(parse_pydate)
else:
df2[col] = df2[col].astype(df[col].dtype)

Expand Down
10 changes: 7 additions & 3 deletions vega/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import cgi
import codecs
import collections
import datetime
import os.path


Expand Down Expand Up @@ -43,9 +44,11 @@ def sanitize_dataframe(df):
if isinstance(df.columns, pd.core.index.MultiIndex):
raise ValueError('Hierarchical indices not supported')

def to_list_if_array(val):
def parse_object_column_type(val):
if isinstance(val, np.ndarray):
return val.tolist()
elif isinstance(val, datetime.date):
return "{dt:%Y-%m-%dT00:00:00}".format(dt=val)
else:
return val

Expand All @@ -72,8 +75,9 @@ def to_list_if_array(val):
df[col_name] = df[col_name].astype(str).replace('NaT', '')
elif dtype == object:
# Convert numpy arrays saved as objects to lists
# Arrays are not JSON serializable
col = df[col_name].apply(to_list_if_array, convert_dtype=False)
# Handle datetime.date typed objects
# Arrays and datetime.date are not JSON serializable
col = df[col_name].apply(parse_object_column_type, convert_dtype=False)
df[col_name] = col.where(col.notnull(), None)
return df

Expand Down