vega · johnmdonich · Dec 26, 2019 · Dec 26, 2019 · Jan 9, 2020 · Jan 9, 2020
diff --git a/vega/tests/test_utils.py b/vega/tests/test_utils.py
@@ -1,6 +1,8 @@
 import json
 import os
 import copy
+import datetime
+from dateutil import parser
 
 import numpy as np
 import pandas as pd
@@ -28,20 +30,29 @@
 
 
 def test_sanitize_dataframe():
+
+    def parse_pydate(dt):
+        parsed = None
+        if dt is not None:
+            parsed = parser.parse(dt).date()
+        return parsed
+
     # create a dataframe with various types
     df = pd.DataFrame({'s': list('abcde'),
                        'f': np.arange(5, dtype=float),
                        'i': np.arange(5, dtype=int),
                        'b': np.array([True, False, True, True, False]),
                        'd': pd.date_range('2012-01-01', periods=5, freq='H'),
                        'c': pd.Series(list('ababc'), dtype='category'),
-                       'o': pd.Series([np.array(i) for i in range(5)])})
+                       'o': pd.Series([np.array(i) for i in range(5)]),
+                       'od': pd.Series([datetime.date(2019, m, 1) for m in range(1, 6)])})
 
     # add some nulls
     df.iloc[0, df.columns.get_loc('s')] = None
     df.iloc[0, df.columns.get_loc('f')] = np.nan
     df.iloc[0, df.columns.get_loc('d')] = pd.NaT
     df.iloc[0, df.columns.get_loc('o')] = np.array(np.nan)
+    df.iloc[0, df.columns.get_loc('od')] = None
 
     # JSON serialize. This will fail on non-sanitized dataframes
     df_clean = sanitize_dataframe(df)
@@ -59,6 +70,8 @@ def test_sanitize_dataframe():
             # astype(datetime) introduces time-zone issues:
             # to_datetime() does not.
             df2[col] = pd.to_datetime(df2[col])
+        elif col == 'od':
+            df2[col] = df2[col].apply(parse_pydate)
         else:
             df2[col] = df2[col].astype(df[col].dtype)
 

diff --git a/vega/utils.py b/vega/utils.py
@@ -1,6 +1,7 @@
 import cgi
 import codecs
 import collections
+import datetime
 import os.path
 
 
@@ -43,9 +44,11 @@ def sanitize_dataframe(df):
     if isinstance(df.columns, pd.core.index.MultiIndex):
         raise ValueError('Hierarchical indices not supported')
 
-    def to_list_if_array(val):
+    def parse_object_column_type(val):
         if isinstance(val, np.ndarray):
             return val.tolist()
+        elif isinstance(val, datetime.date):
+            return "{dt:%Y-%m-%dT00:00:00}".format(dt=val)
         else:
             return val
 
@@ -72,8 +75,9 @@ def to_list_if_array(val):
             df[col_name] = df[col_name].astype(str).replace('NaT', '')
         elif dtype == object:
             # Convert numpy arrays saved as objects to lists
-            # Arrays are not JSON serializable
-            col = df[col_name].apply(to_list_if_array, convert_dtype=False)
+            # Handle datetime.date typed objects
+            # Arrays and datetime.date are not JSON serializable
+            col = df[col_name].apply(parse_object_column_type, convert_dtype=False)
             df[col_name] = col.where(col.notnull(), None)
     return df