Skip to content

Commit

Permalink
fix: Added Redshift and Spark typecheck to data_source event_timestam…
Browse files Browse the repository at this point in the history
…p_col inference (#2389)

* added Redshift typecheck to data_source event_timestamp_col inference

Signed-off-by: Shai Bruhis <shaibruhis@gmail.com>

* address comments

Signed-off-by: Shai Bruhis <shaibruhis@gmail.com>

* moved non file data sources into their own test

Signed-off-by: Shai Bruhis <shaibruhis@gmail.com>

* addressed comments

Signed-off-by: Shai Bruhis <shaibruhis@gmail.com>

* fixed texts

Signed-off-by: Shai Bruhis <shaibruhis@gmail.com>

* remove previously defined event_timestamp_column from data_source to allow for inference

Signed-off-by: Shai Bruhis <shaibruhis@gmail.com>

* made a deepcopy of data_sources to not affect other tests

Signed-off-by: Shai Bruhis <shaibruhis@gmail.com>

* linter

Signed-off-by: Shai Bruhis <shaibruhis@gmail.com>
  • Loading branch information
shaibruhis authored Mar 17, 2022
1 parent e17028d commit 04dea73
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 1 deletion.
2 changes: 2 additions & 0 deletions sdk/python/feast/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ def update_data_sources_with_inferred_event_timestamp_col(
assert (
isinstance(data_source, FileSource)
or isinstance(data_source, BigQuerySource)
or isinstance(data_source, RedshiftSource)
or isinstance(data_source, SnowflakeSource)
or "SparkSource" == data_source.__class__.__name__
)

# loop through table columns to find singular match
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,9 @@ class UniversalDataSources:
global_ds: DataSource
field_mapping: DataSource

def values(self):
return dataclasses.asdict(self).values()


def construct_universal_data_sources(
datasets: UniversalDatasets, data_source_creator: DataSourceCreator
Expand Down
26 changes: 25 additions & 1 deletion sdk/python/tests/integration/registration/test_inference.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from copy import deepcopy

import pandas as pd
import pytest

Expand Down Expand Up @@ -111,7 +113,7 @@ def test_infer_datasource_names_dwh():


@pytest.mark.integration
def test_update_data_sources_with_inferred_event_timestamp_col(simple_dataset_1):
def test_update_file_data_source_with_inferred_event_timestamp_col(simple_dataset_1):
df_with_two_viable_timestamp_cols = simple_dataset_1.copy(deep=True)
df_with_two_viable_timestamp_cols["ts_2"] = simple_dataset_1["ts_1"]

Expand All @@ -138,6 +140,28 @@ def test_update_data_sources_with_inferred_event_timestamp_col(simple_dataset_1)
)


@pytest.mark.integration
@pytest.mark.universal
def test_update_data_sources_with_inferred_event_timestamp_col(universal_data_sources):
(_, _, data_sources) = universal_data_sources
data_sources_copy = deepcopy(data_sources)

# remove defined event_timestamp_column to allow for inference
for data_source in data_sources_copy.values():
data_source.event_timestamp_column = None

update_data_sources_with_inferred_event_timestamp_col(
data_sources_copy.values(), RepoConfig(provider="local", project="test"),
)
actual_event_timestamp_cols = [
source.event_timestamp_column for source in data_sources_copy.values()
]

assert actual_event_timestamp_cols == ["event_timestamp"] * len(
data_sources_copy.values()
)


def test_on_demand_features_type_inference():
# Create Feature Views
date_request = RequestDataSource(
Expand Down

0 comments on commit 04dea73

Please sign in to comment.