From 227f487650dba73948bf53d9b330f4379b4b1395 Mon Sep 17 00:00:00 2001 From: Danny Chiao Date: Thu, 11 Aug 2022 19:15:26 -0400 Subject: [PATCH] fix: Fix incorrect on demand feature view diffing and improve Java tests (#3074) * fix: Fix ODFV bug Signed-off-by: Danny Chiao Signed-off-by: Francisco Javier Arceo --- .github/workflows/java_master_only.yml | 88 +++++++++++++++++- .github/workflows/java_pr.yml | 86 +++++++++++++++++ java/CONTRIBUTING.md | 1 + java/serving/README.md | 2 + java/serving/pom.xml | 22 +++++ .../docker-compose/feast10/definitions.py | 8 +- .../docker-compose/feast10/registry.db | Bin 14203 -> 0 bytes .../docker-compose/feast10/setup_it.py | 86 +++++++++++++++++ sdk/python/feast/diff/registry_diff.py | 4 +- .../tests/unit/diff/test_registry_diff.py | 56 +++++++++++ 10 files changed, 344 insertions(+), 9 deletions(-) delete mode 100644 java/serving/src/test/resources/docker-compose/feast10/registry.db create mode 100644 java/serving/src/test/resources/docker-compose/feast10/setup_it.py diff --git a/.github/workflows/java_master_only.yml b/.github/workflows/java_master_only.yml index f5297615f6..194024a168 100644 --- a/.github/workflows/java_master_only.yml +++ b/.github/workflows/java_master_only.yml @@ -69,6 +69,52 @@ jobs: java-version: '11' java-package: jdk architecture: x64 + - name: Setup Python (to call feast apply) + uses: actions/setup-python@v2 + id: setup-python + with: + python-version: 3.8 + architecture: x64 + - name: Setup Go + id: setup-go + uses: actions/setup-go@v2 + with: + go-version: 1.18.0 + - name: Upgrade pip version + run: | + pip install --upgrade "pip>=21.3.1,<22.1" + - name: Get pip cache dir + id: pip-cache + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.pip-cache.outputs.dir }} + /opt/hostedtoolcache/Python + /Users/runner/hostedtoolcache/Python + key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }} + restore-keys: | + ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- + - name: Install pip-tools + run: pip install pip-tools + - name: Install apache-arrow on ubuntu + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev + - name: Install Python dependencies + run: make install-python-ci-dependencies + - uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-it-maven- - uses: actions/cache@v2 with: path: ~/.m2/repository @@ -95,10 +141,46 @@ jobs: java-version: '11' java-package: jdk architecture: x64 - - uses: actions/setup-python@v2 + - name: Setup Python (to call feast apply) + uses: actions/setup-python@v2 + id: setup-python + with: + python-version: 3.8 + architecture: x64 + - name: Setup Go + id: setup-go + uses: actions/setup-go@v2 with: - python-version: '3.8' - architecture: 'x64' + go-version: 1.18.0 + - name: Upgrade pip version + run: | + pip install --upgrade "pip>=21.3.1,<22.1" + - name: Get pip cache dir + id: pip-cache + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.pip-cache.outputs.dir }} + /opt/hostedtoolcache/Python + /Users/runner/hostedtoolcache/Python + key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }} + restore-keys: | + ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- + - name: Install pip-tools + run: pip install pip-tools + - name: Install apache-arrow on ubuntu + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev + - name: Install Python dependencies + run: make install-python-ci-dependencies - uses: actions/cache@v2 with: path: ~/.m2/repository diff --git a/.github/workflows/java_pr.yml b/.github/workflows/java_pr.yml index 328a8e7c7b..c552428664 100644 --- a/.github/workflows/java_pr.yml +++ b/.github/workflows/java_pr.yml @@ -40,6 +40,52 @@ jobs: java-version: '11' java-package: jdk architecture: x64 + - name: Setup Python (to call feast apply) + uses: actions/setup-python@v2 + id: setup-python + with: + python-version: 3.8 + architecture: x64 + - name: Setup Go + id: setup-go + uses: actions/setup-go@v2 + with: + go-version: 1.18.0 + - name: Upgrade pip version + run: | + pip install --upgrade "pip>=21.3.1,<22.1" + - name: Get pip cache dir + id: pip-cache + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.pip-cache.outputs.dir }} + /opt/hostedtoolcache/Python + /Users/runner/hostedtoolcache/Python + key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }} + restore-keys: | + ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- + - name: Install pip-tools + run: pip install pip-tools + - name: Install apache-arrow on ubuntu + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev + - name: Install Python dependencies + run: make install-python-ci-dependencies + - uses: actions/cache@v2 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-it-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-it-maven- - uses: actions/cache@v2 with: path: ~/.m2/repository @@ -129,6 +175,46 @@ jobs: aws-region: us-west-2 - name: Use AWS CLI run: aws sts get-caller-identity + - name: Setup Python (to call feast apply) + uses: actions/setup-python@v2 + id: setup-python + with: + python-version: 3.8 + architecture: x64 + - name: Setup Go + id: setup-go + uses: actions/setup-go@v2 + with: + go-version: 1.18.0 + - name: Upgrade pip version + run: | + pip install --upgrade "pip>=21.3.1,<22.1" + - name: Get pip cache dir + id: pip-cache + run: | + echo "::set-output name=dir::$(pip cache dir)" + - name: pip cache + uses: actions/cache@v2 + with: + path: | + ${{ steps.pip-cache.outputs.dir }} + /opt/hostedtoolcache/Python + /Users/runner/hostedtoolcache/Python + key: ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip-${{ hashFiles(format('**/py{0}-ci-requirements.txt', env.PYTHON)) }} + restore-keys: | + ${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-pip- + - name: Install pip-tools + run: pip install pip-tools + - name: Install apache-arrow on ubuntu + run: | + sudo apt update + sudo apt install -y -V ca-certificates lsb-release wget + wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt install -y -V ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb + sudo apt update + sudo apt install -y -V libarrow-dev + - name: Install Python dependencies + run: make install-python-ci-dependencies - name: Run integration tests run: make test-java-integration - name: Save report diff --git a/java/CONTRIBUTING.md b/java/CONTRIBUTING.md index 74549034b9..7ccfe108c0 100644 --- a/java/CONTRIBUTING.md +++ b/java/CONTRIBUTING.md @@ -59,6 +59,7 @@ mvn spotless:apply ### Project Makefile The Project Makefile provides useful shorthands for common development tasks: +> Note: These commands rely on a local version of `feast` (Python) to be installed Run all Unit tests: ``` diff --git a/java/serving/README.md b/java/serving/README.md index a0d87563a9..0a153ceab8 100644 --- a/java/serving/README.md +++ b/java/serving/README.md @@ -136,4 +136,6 @@ Unit & Integration Tests can be used to verify functionality: mvn test -pl serving --also-make # run integration tests mvn verify -pl serving --also-make +# run integration tests with debugger +mvn -Dmaven.failsafe.debug verify -pl serving --also-make ``` \ No newline at end of file diff --git a/java/serving/pom.xml b/java/serving/pom.xml index e597775f9b..9eea11ef96 100644 --- a/java/serving/pom.xml +++ b/java/serving/pom.xml @@ -82,6 +82,28 @@ + + + org.codehaus.mojo + exec-maven-plugin + 1.6.0 + + + + python + src/test/resources/docker-compose/feast10/ + + setup_it.py + + + feast_test_apply + process-test-resources + + exec + + + + diff --git a/java/serving/src/test/resources/docker-compose/feast10/definitions.py b/java/serving/src/test/resources/docker-compose/feast10/definitions.py index 908f65ce8b..4ac7b807c6 100644 --- a/java/serving/src/test/resources/docker-compose/feast10/definitions.py +++ b/java/serving/src/test/resources/docker-compose/feast10/definitions.py @@ -75,8 +75,9 @@ def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame: entity = Entity(name="entity") -benchmark_feature_views = [ - FeatureView( +benchmark_feature_views = [] +for i in range(25): + fv = FeatureView( name=f"feature_view_{i}", entities=[entity], ttl=timedelta(seconds=86400), @@ -84,8 +85,7 @@ def transformed_conv_rate(features_df: pd.DataFrame) -> pd.DataFrame: online=True, source=generated_data_source, ) - for i in range(25) -] + benchmark_feature_views.append(fv) benchmark_feature_service = FeatureService( name=f"benchmark_feature_service", features=benchmark_feature_views, diff --git a/java/serving/src/test/resources/docker-compose/feast10/registry.db b/java/serving/src/test/resources/docker-compose/feast10/registry.db deleted file mode 100644 index 746934e3d0a09c348f8046087fd6144d0d237d61..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 14203 zcmc(mUx-|18OCQeyUFI{-!iGK*(+V5w1Mo*`R0G{LX1{4kPC}9y*ZwpbEZ4(?#w!K zc6FhFdZ8{dpx6l%1xYE^_Cg4SLX8vyRxGs8N-0uGAyq5sl@U=YTJU?{-A%sd$vH3= z!bxD6_xqkR^UQB&=R41PzT-7K;~kEMy{&i{^rD5?#l7NFy=dvq_*aj0<8YJ&8^gh) zaVIGiyw7-__72AVq?c?j%r5Ocx?6U}`>b~;2%^pP_3ee3C2#MceW|;&i$#Cwbl3OW z?Pho7Y^xbH&el5h#@Urneyp@we%!8yCTg8II{)rZfAzzI3s3#_z2}SOgQE9V(L1^K z9##jN!?o>Tl!VEsgs%7Bntk!5W8UG;puZIi!z5mqTRQ5^hn>!Db#UpVcOu++B#6SE zeWqmC+ZZj(n%Vg${(1V`>U?qWM7(9+c#!ngWBW4e8)pu6hOvE~=;3=y_w3ow*I(WU zhu_?clOGg)#kbt+cZcEfpxa&R^<(RCFpNjb-QJpgY;AYqLI7SHJ&{ zbd0qrFxCiTjWE^-V~sG@2xE;f)(B&bFxCiTEjNreKmIr!V|@yYb;4LDjCI0TCyaH% zSSO5i!dNGab;4NB4da`C+3BP%F^wrOHV9*bFg6HdgD^G-V}md@2xEgVHV9)QH;g;) zKADcOIR(ZhVQdn{CShz6#wKBG62>NBY!b#MVQl7x@kj6fEFEKO3XCnn*dmNA!q_5= zEyCC$j4i_0B8)A<*vbv#&3`?gjH)gz-XS@^pXVscr{LJuj!aK6B^eX}q281LG2uc_bmM|bNVL)iYfZ&7y;Ryo* z6b5o5y7PzY8ALM^jcZ&Z4Gb_*pNaZR)MuhT6ZM&>&qRGD>gPuE&C9Q65X}rXE;UU~1&8ALN9k84~r4-7C-O+BDsqMCa^!9+FrfP#r?_5nq1L~p(Nw+y10 z>Blv$@dpN&s0JWVFj0*_pkSgJfKOl zeE9neqSIt1!&P+xx z6@h|@>ct{ZFj2i+1PUga-vy(Wets)D_q)^H_0!&|JJ;*?yLF>@xYg^#+#h|mc=xsi z*q>mxI^5o_V82-1(k@hPX_qOtv~bUi2!Mdegp4`$xbxXT6Ena1#e#yP|Zd;L1 zi&b}Cyt=iRb!&0!)?(MK#jjh7VYe2?Zk=k`xDxr6g_C3?d=S(e}p^JH0KH_WG6 zZn+Z4w@{O1Nw+XhmL=Q5JXw}x3-e@IaxKiKT5h`%36M~eWr>e4PnIP-!aP})=m_&< zS%M?Xi{)Pz@4npStm%4Ps{{k$8V1BR42W+S5aTc)&S5~T!+>~)0Wl8)>F#|HNgl!h zb}xAd73^N}5GvTc|U}* z$|t7^N9h9Tx|j2ORIqzF&qoEjm-Bp7uzNYrM+Li=^ZXR|H8@JwO4q%dQ=@|2%Q-bF z*u9)nqk`SbIW;QSy_{30xUa)ex@@}cWzQ2T*uCs}LIt~*eNU)h_p) zT|TH__p;9i73^Mi`lPsT!BM)dy6$CT4Jz2ZY^^~ByO+&1s9^W9y#^KRUN+dIxNpNz zy2QHfWj_ll*uCs%K?S>)JuRqU_p+-673^O2wcPFAwn-hc+7HD6alWLBZ}doq~ehYd!^qbzdrc;lFdVXF-qZ6X?}(L*f@BbcQB-P_YV(JF zTrA_LZKqh#p2nJ5@Q&W81N(Qi^o{gei@vh><@5=>v#11j7M0TOEIRYSfh-2E9e85) z^7Udt|Bbz@tAf}4UB$R$zbF!IkpYZEJB< z-q;S;;%}Gt?m`=tZQpT}lphVZ!m_p=`;U}I!%jJ|_hpsstyOv>)uHS={e)7&QivI(E%Qx2m diff --git a/java/serving/src/test/resources/docker-compose/feast10/setup_it.py b/java/serving/src/test/resources/docker-compose/feast10/setup_it.py new file mode 100644 index 0000000000..733ebdfb49 --- /dev/null +++ b/java/serving/src/test/resources/docker-compose/feast10/setup_it.py @@ -0,0 +1,86 @@ +from pathlib import Path +from feast.repo_config import load_repo_config +from datetime import datetime, timedelta + +import numpy as np +import pandas as pd + +from definitions import ( + benchmark_feature_service, + benchmark_feature_views, + driver, + driver_hourly_stats_view, + entity, + transformed_conv_rate, +) + +from feast import FeatureStore + + +def setup_data(): + start = datetime.now() - timedelta(days=10) + + df = pd.DataFrame() + df["driver_id"] = np.arange(1000, 1010) + df["created"] = datetime.now() + df["conv_rate"] = np.arange(0, 1, 0.1) + df["acc_rate"] = np.arange(0.5, 1, 0.05) + df["avg_daily_trips"] = np.arange(0, 1000, 100) + + # some of rows are beyond 7 days to test OUTSIDE_MAX_AGE status + df["event_timestamp"] = start + pd.Series(np.arange(0, 10)).map( + lambda days: timedelta(days=days) + ) + + # Store data in parquet files. Parquet is convenient for local development mode. For + # production, you can use your favorite DWH, such as BigQuery. See Feast documentation + # for more info. + df.to_parquet("driver_stats.parquet") + + # For Benchmarks + # Please read more in Feast RFC-031 + # (link https://docs.google.com/document/d/12UuvTQnTTCJhdRgy6h10zSbInNGSyEJkIxpOcgOen1I/edit) + # about this benchmark setup + def generate_data( + num_rows: int, num_features: int, destination: str + ) -> pd.DataFrame: + features = [f"feature_{i}" for i in range(num_features)] + columns = ["entity", "event_timestamp"] + features + df = pd.DataFrame(0, index=np.arange(num_rows), columns=columns) + df["event_timestamp"] = datetime.utcnow() + for column in features: + df[column] = np.random.randint(1, num_rows, num_rows) + + df["entity"] = "key-" + pd.Series(np.arange(1, num_rows + 1)).astype( + pd.StringDtype() + ) + + df.to_parquet(destination) + + generate_data(10**3, 250, "benchmark_data.parquet") + + +def main(): + print("Running setup_it.py") + + setup_data() + existing_repo_config = load_repo_config(Path(".")) + + # Update to default online store since otherwise, relies on Dockerized Redis service + fs = FeatureStore(config=existing_repo_config.copy(update={"online_store": {}})) + fs.apply( + [ + driver_hourly_stats_view, + transformed_conv_rate, + driver, + entity, + benchmark_feature_service, + *benchmark_feature_views, + ] + ) + + print("setup_it finished") + + +if __name__ == "__main__": + main() diff --git a/sdk/python/feast/diff/registry_diff.py b/sdk/python/feast/diff/registry_diff.py index 56d5b84c71..37c8af9155 100644 --- a/sdk/python/feast/diff/registry_diff.py +++ b/sdk/python/feast/diff/registry_diff.py @@ -144,8 +144,8 @@ def diff_registry_objects( continue elif getattr(current_spec, _field.name) != getattr(new_spec, _field.name): if _field.name == "user_defined_function": - current_spec = cast(OnDemandFeatureViewSpec, current_proto) - new_spec = cast(OnDemandFeatureViewSpec, new_proto) + current_spec = cast(OnDemandFeatureViewSpec, current_spec) + new_spec = cast(OnDemandFeatureViewSpec, new_spec) current_udf = current_spec.user_defined_function new_udf = new_spec.user_defined_function for _udf_field in current_udf.DESCRIPTOR.fields: diff --git a/sdk/python/tests/unit/diff/test_registry_diff.py b/sdk/python/tests/unit/diff/test_registry_diff.py index 61f4f77702..8af6c50a13 100644 --- a/sdk/python/tests/unit/diff/test_registry_diff.py +++ b/sdk/python/tests/unit/diff/test_registry_diff.py @@ -1,9 +1,14 @@ +import pandas as pd + +from feast import Field from feast.diff.registry_diff import ( diff_registry_objects, tag_objects_for_keep_delete_update_add, ) from feast.entity import Entity from feast.feature_view import FeatureView +from feast.on_demand_feature_view import on_demand_feature_view +from feast.types import String from tests.utils.data_source_test_creator import prep_file_source @@ -89,3 +94,54 @@ def test_diff_registry_objects_feature_views(simple_dataset_1): assert feast_object_diffs.feast_object_property_diffs[0].val_declared == { "when": "after" } + + +def test_diff_odfv(simple_dataset_1): + with prep_file_source(df=simple_dataset_1, timestamp_field="ts_1") as file_source: + entity = Entity(name="id", join_keys=["id"]) + fv = FeatureView( + name="fv2", + entities=[entity], + source=file_source, + tags={"when": "before"}, + ) + + @on_demand_feature_view( + sources=[fv], + schema=[Field(name="first_char", dtype=String)], + ) + def pre_changed(inputs: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df["first_char"] = inputs["string_col"].str[:1].astype("string") + return df + + @on_demand_feature_view( + sources=[fv], + schema=[Field(name="first_char", dtype=String)], + ) + def post_changed(inputs: pd.DataFrame) -> pd.DataFrame: + df = pd.DataFrame() + df["first_char"] = inputs["string_col"].str[:1].astype("string") + "hi" + return df + + feast_object_diffs = diff_registry_objects( + pre_changed, pre_changed, "on demand feature view" + ) + assert len(feast_object_diffs.feast_object_property_diffs) == 0 + + feast_object_diffs = diff_registry_objects( + pre_changed, post_changed, "on demand feature view" + ) + + # Note that user_defined_function.body is excluded because it always changes (dill is non-deterministic), even + # if no code is changed + assert len(feast_object_diffs.feast_object_property_diffs) == 3 + assert feast_object_diffs.feast_object_property_diffs[0].property_name == "name" + assert ( + feast_object_diffs.feast_object_property_diffs[1].property_name + == "user_defined_function.name" + ) + assert ( + feast_object_diffs.feast_object_property_diffs[2].property_name + == "user_defined_function.body_text" + )