From 36db2b31456fd9086f10084c1b384ce1054e2c5a Mon Sep 17 00:00:00 2001 From: KBolashev Date: Thu, 1 Aug 2024 15:03:39 +0300 Subject: [PATCH 1/2] Handle datetimes correctly for dataframe + multivalue --- dagshub/data_engine/model/datasource.py | 9 +++++++++ tests/data_engine/test_datasource.py | 8 ++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/dagshub/data_engine/model/datasource.py b/dagshub/data_engine/model/datasource.py index d5bc3bfa..4fcfea74 100644 --- a/dagshub/data_engine/model/datasource.py +++ b/dagshub/data_engine/model/datasource.py @@ -598,6 +598,9 @@ def _df_to_metadata( sub_val = sub_val.encode("utf-8") if isinstance(sub_val, bytes): sub_val = wrap_bytes(sub_val) + if isinstance(sub_val, datetime.datetime): + time_zone = _get_datetime_utc_offset(sub_val) + sub_val = int(sub_val.timestamp() * 1000) res.append( DatapointMetadataUpdateEntry( url=datapoint, key=key, value=str(sub_val), valueType=value_type, allowMultiple=True @@ -619,6 +622,9 @@ def _df_to_metadata( val = val.encode("utf-8") if isinstance(val, bytes): val = wrap_bytes(val) + if isinstance(val, datetime.datetime): + time_zone = _get_datetime_utc_offset(val) + val = int(val.timestamp() * 1000) res.append( DatapointMetadataUpdateEntry( url=datapoint, @@ -1449,6 +1455,9 @@ def update_metadata(self, datapoints: Union[List[str], str], metadata: Dict[str, continue if isinstance(v, str) and k in document_fields: v = v.encode("utf-8") + if isinstance(v, datetime.datetime): + time_zone = _get_datetime_utc_offset(v) + v = int(v.timestamp() * 1000) if isinstance(v, bytes): sub_val = wrap_bytes(sub_val) self._metadata_entries.append( diff --git a/tests/data_engine/test_datasource.py b/tests/data_engine/test_datasource.py index 9d968489..8e98e702 100644 --- a/tests/data_engine/test_datasource.py +++ b/tests/data_engine/test_datasource.py @@ -149,8 +149,12 @@ def test_pandas_timestamp(ds): actual = Datasource._df_to_metadata(ds, df) expected = [ - DatapointMetadataUpdateEntry("test1", "key1", "2020-10-10 10:10:00", MetadataFieldType.DATETIME), - DatapointMetadataUpdateEntry("test2", "key1", "2030-10-10 10:20:20", MetadataFieldType.DATETIME), + DatapointMetadataUpdateEntry( + "test1", "key1", f"{int(data_dict['key1'][0].timestamp()) * 1000}", MetadataFieldType.DATETIME + ), + DatapointMetadataUpdateEntry( + "test2", "key1", f"{int(data_dict['key1'][1].timestamp()) * 1000}", MetadataFieldType.DATETIME + ), ] assert expected == actual From c05cceeab574a03ec935c5bc25135b4ef0c6f89b Mon Sep 17 00:00:00 2001 From: KBolashev Date: Thu, 1 Aug 2024 15:17:21 +0300 Subject: [PATCH 2/2] Send timezone too --- dagshub/data_engine/model/datasource.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/dagshub/data_engine/model/datasource.py b/dagshub/data_engine/model/datasource.py index 4fcfea74..5839d596 100644 --- a/dagshub/data_engine/model/datasource.py +++ b/dagshub/data_engine/model/datasource.py @@ -584,6 +584,7 @@ def _df_to_metadata( update_entry.allowMultiple = True for sub_val in val: value_type = field_value_types.get(key) + time_zone = None if value_type is None: value_type = metadataTypeLookup[type(sub_val)] field_value_types[key] = value_type @@ -603,7 +604,12 @@ def _df_to_metadata( sub_val = int(sub_val.timestamp() * 1000) res.append( DatapointMetadataUpdateEntry( - url=datapoint, key=key, value=str(sub_val), valueType=value_type, allowMultiple=True + url=datapoint, + key=key, + value=str(sub_val), + valueType=value_type, + allowMultiple=True, + timeZone=time_zone, ) ) else: @@ -615,6 +621,7 @@ def _df_to_metadata( if value_type == MetadataFieldType.BLOB and not isinstance(val, bytes): if key not in document_fields: continue + time_zone = None # Pandas quirk - integers are floats on the backend if value_type == MetadataFieldType.INTEGER: val = int(val) @@ -632,6 +639,7 @@ def _df_to_metadata( value=str(val), valueType=value_type, allowMultiple=key in multivalue_fields, + timeZone=time_zone, ) ) return res @@ -1444,6 +1452,7 @@ def update_metadata(self, datapoints: Union[List[str], str], metadata: Dict[str, if e.key == k: e.allowMultiple = True for sub_val in v: + time_zone = None value_type = field_value_types.get(k) if value_type is None: value_type = metadataTypeLookup[type(sub_val)] @@ -1468,6 +1477,7 @@ def update_metadata(self, datapoints: Union[List[str], str], metadata: Dict[str, # todo: preliminary type check valueType=value_type, allowMultiple=k in self._multivalue_fields, + timeZone=time_zone, ) )