diff --git a/python/tests/conftest.py b/python/tests/conftest.py new file mode 100644 index 0000000000..145f346918 --- /dev/null +++ b/python/tests/conftest.py @@ -0,0 +1,22 @@ +# +# Copyright 2022 Hopsworks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest +from tests.fixtures.backend_fixtures import backend_fixtures + +pytest_plugins = [ + "tests.fixtures.backend_fixtures", +] \ No newline at end of file diff --git a/python/tests/fixtures/__init__.py b/python/tests/fixtures/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/python/tests/fixtures/backend_fixtures.json b/python/tests/fixtures/backend_fixtures.json new file mode 100644 index 0000000000..6d121c0fe8 --- /dev/null +++ b/python/tests/fixtures/backend_fixtures.json @@ -0,0 +1,76 @@ +{ + "get_feature_group": { + "response": [ + { + "type": "cachedFeaturegroupDTO", + "created": "2022-08-01T11:07:55Z", + "creator": { + "email": "admin@hopsworks.ai", + "firstName": "Admin", + "lastName": "Admin", + "maxNumProjects": 0, + "numActiveProjects": 0, + "numCreatedProjects": 0, + "numRemainingProjects": 0, + "status": 0, + "testUser": false, + "tos": false, + "toursState": 0, + "twoFactor": false + }, + "description": "", + "featurestoreId": 67, + "featurestoreName": "test_featurestore", + "id": 15, + "location": "hopsfs://10.0.2.15:8020/apps/hive/warehouse/test_featurestore.db/fg_test_1", + "name": "fg_test", + "statisticsConfig": { + "columns": [], + "correlations": false, + "enabled": true, + "exactUniqueness": false, + "histograms": false + }, + "version": 1, + "features": [ + { + "defaultValue": null, + "featureGroupId": 15, + "hudiPrecombineKey": true, + "name": "intt", + "onlineType": "int", + "partition": false, + "primary": true, + "type": "int" + }, + { + "defaultValue": null, + "featureGroupId": 15, + "hudiPrecombineKey": false, + "name": "stringt", + "onlineType": "varchar(1000)", + "partition": false, + "primary": false, + "type": "string" + } + ], + "onlineTopicName": "119_15_fg_test_1_onlinefs", + "onlineEnabled": true, + "timeTravelFormat": "HUDI" + } + ], + "method": "GET", + "path_params": [ + "project", + "119", + "featurestores", + 67, + "featuregroups", + "fg_test" + ], + "query_params": { + "version": 1 + }, + "headers": null + } +} \ No newline at end of file diff --git a/python/tests/fixtures/backend_fixtures.py b/python/tests/fixtures/backend_fixtures.py new file mode 100644 index 0000000000..ba8bece348 --- /dev/null +++ b/python/tests/fixtures/backend_fixtures.py @@ -0,0 +1,25 @@ +# +# Copyright 2022 Hopsworks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest +import json + +with open('python/tests/fixtures/backend_fixtures.json', 'r') as json_file: + backend_fixtures_json = json.load(json_file) + +@pytest.fixture +def backend_fixtures(): + return backend_fixtures_json diff --git a/python/tests/fixtures/generate_backend_fixtures.ipynb b/python/tests/fixtures/generate_backend_fixtures.ipynb new file mode 100644 index 0000000000..19053a0f35 --- /dev/null +++ b/python/tests/fixtures/generate_backend_fixtures.ipynb @@ -0,0 +1,267 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "08e91fdc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting Spark application\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
IDApplication IDKindStateSpark UIDriver log
11application_1659447865675_0004pysparkidleLinkLink
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SparkSession available as 'spark'.\n", + "Connected. Call `.close()` to terminate connection gracefully." + ] + } + ], + "source": [ + "import hsfs\n", + "# Create a connection\n", + "connection = hsfs.connection()\n", + "# Get the feature store handle for the project's feature store\n", + "fs = connection.get_feature_store()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6b53d31", + "metadata": {}, + "outputs": [], + "source": [ + "# Preserve original send_request method\n", + "from hsfs.client import base\n", + "send_request_original = base.Client._send_request" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "1a316673", + "metadata": {}, + "outputs": [], + "source": [ + "def wrap_send_request(response_instance):\n", + "\n", + " def _send_request_wrap(\n", + " self,\n", + " method,\n", + " path_params,\n", + " query_params=None,\n", + " headers=None,\n", + " data=None,\n", + " stream=False,\n", + " files=None,\n", + " ):\n", + " global send_request_original\n", + " response = send_request_original(self, method, path_params, query_params, headers, data, stream, files)\n", + " response_instance.response = response\n", + " response_instance.method = method\n", + " response_instance.path_params = path_params\n", + " response_instance.query_params = query_params\n", + " response_instance.headers = headers\n", + " response_instance.data = data\n", + " response_instance.stream = stream\n", + " response_instance.files = files\n", + " return response\n", + "\n", + " hsfs.client.base.Client._send_request = _send_request_wrap\n", + " \n", + "def unwrap_send_request():\n", + " global send_request_original\n", + " base.Client._send_request = send_request_original" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "a6141889", + "metadata": {}, + "outputs": [], + "source": [ + "class RequestResponseInstance:\n", + " def __init__(self):\n", + " self.response = None\n", + " self.method = None\n", + " self.path_params = None\n", + " self.query_params = None\n", + " self.headers = None\n", + " self.data = None\n", + " self.stream = None\n", + " self.files = None\n", + " \n", + " def to_dict(self):\n", + " d = {}\n", + " d[\"response\"] = self.response\n", + " d[\"method\"] = self.method\n", + " d[\"path_params\"] = self.path_params\n", + " d[\"query_params\"] = self.query_params\n", + " d[\"headers\"] = self.headers\n", + " return d\n", + "\n", + "class ResponseGenerator:\n", + " \n", + " def __init__(self, name):\n", + " self.name = name\n", + "\n", + " def prepare(self):\n", + " pass\n", + "\n", + " def call(self):\n", + " pass\n", + "\n", + " def cleanup(self):\n", + " pass\n", + " \n", + " def run(self):\n", + " global responses_dict\n", + " if self.name in responses_dict:\n", + " raise Exception(\"fixture was already determined. remove instance from responses_dict or rename generator to continue.\")\n", + " \n", + " response_instance = RequestResponseInstance()\n", + "\n", + " self.prepare()\n", + " \n", + " wrap_send_request(response_instance)\n", + " self.call()\n", + " unwrap_send_request()\n", + " \n", + " self.cleanup()\n", + " \n", + " responses_dict[self.name] = response_instance.to_dict()\n", + " \n", + " return response_instance" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "7decf4e1", + "metadata": {}, + "outputs": [], + "source": [ + "# specify generators...\n", + "\n", + "class FeatureGroupResponseGenerator(ResponseGenerator):\n", + "\n", + " def prepare(self):\n", + "\n", + " from pyspark.sql.types import StructType, StructField, StringType, IntegerType\n", + " data2 = [(1, \"asd\"),(2, \"asssd\"),(23, \"adssd\"),(1, \"adsasd\"),(7, \"asds\")]\n", + "\n", + " schema = StructType([\n", + " StructField(\"intt\",IntegerType(),True),\n", + " StructField(\"stringt\",StringType(),True)\n", + " ])\n", + "\n", + " df = spark.createDataFrame(data=data2,schema=schema)\n", + "\n", + " from hsfs.feature import Feature\n", + " features = [\n", + " Feature(name=\"intt\",type=\"int\",online_type=\"int\"),\n", + " Feature(name=\"arrt\",type=\"array\",online_type=\"varchar(1000)\")\n", + " ]\n", + "\n", + " features = [\n", + " Feature(name=\"intt\",type=\"int\",online_type=\"int\"),\n", + " Feature(name=\"stringt\",type=\"string\",online_type=\"varchar(1000)\")\n", + " ]\n", + " self.fg = fs.create_feature_group(name=\"fg_test\",\n", + " features=features,\n", + " primary_key=[\"intt\"], # key can not contain null values\n", + " online_enabled=True,\n", + " time_travel_format=\"HUDI\")\n", + "\n", + " \n", + " self.fg.save(df)\n", + "\n", + " def call(self):\n", + " fs.get_feature_group(\"fg_test\", version=1)\n", + " \n", + " def cleanup(self):\n", + " self.fg.delete()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "d9c36062", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Feature Group created successfully, explore it at \n", + "https://hopsworks0.logicalclocks.com/p/119/fs/67/fg/1050\n", + "\n", + "VersionWarning: No version provided for creating feature group `fg_test`, incremented version to `9`." + ] + } + ], + "source": [ + "responses_dict = {}\n", + "\n", + "# run generators...\n", + "FeatureGroupResponseGenerator(\"get_feature_group\").run() " + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "bc33a022", + "metadata": {}, + "outputs": [], + "source": [ + "# write responses captured with the generators to 'backend_fixtures.json'\n", + "import json\n", + "import pydoop.hdfs as hdfs\n", + "\n", + "filename = f'hdfs:///Projects/{fs.project_name}/Resources/backend_fixtures.json'\n", + "with hdfs.open(filename, 'wt') as json_file:\n", + " json.dump(responses_dict, json_file, \n", + " indent=4, \n", + " separators=(',',': '))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "PySpark", + "language": "python", + "name": "pysparkkernel" + }, + "language_info": { + "codemirror_mode": { + "name": "python", + "version": 3 + }, + "mimetype": "text/x-python", + "name": "pyspark", + "pygments_lexer": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/python/tests/test_feature_group.py b/python/tests/test_feature_group.py new file mode 100644 index 0000000000..bd2a4cffca --- /dev/null +++ b/python/tests/test_feature_group.py @@ -0,0 +1,28 @@ +# +# Copyright 2022 Hopsworks AB +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest +from hsfs.feature_group import FeatureGroup + +class TestFeatureGroup: + + def test_from_response_json(self, backend_fixtures): + fg = FeatureGroup.from_response_json(backend_fixtures["get_feature_group"]["response"])[0] + + assert fg.id == 15 + assert len(fg._features) == 2 + +