From 49d5a5dc641f1f6823d45bade6f6abc837243deb Mon Sep 17 00:00:00 2001 From: Blair Chen Date: Mon, 26 Sep 2022 23:14:31 +0800 Subject: [PATCH] Add e2e test for purview registry and rbac registry (#689) * Add e2e test for purview registry and rbac registry * Add purview and rbac env e2e to registry tests * Fix merge issue --- feathr_project/test/test_feature_registry.py | 382 ++++++++---------- feathr_project/test/test_fixture.py | 8 +- .../feathr_config_purview.yaml | 113 ------ .../feathr_config_registry_purview.yaml | 49 +++ .../feathr_config_registry_purview_rbac.yaml | 49 +++ .../feathr_config_registry_sql.yaml | 49 +++ .../feathr_config_registry_sql_rbac.yaml | 49 +++ 7 files changed, 373 insertions(+), 326 deletions(-) delete mode 100644 feathr_project/test/test_user_workspace/feathr_config_purview.yaml create mode 100644 feathr_project/test/test_user_workspace/feathr_config_registry_purview.yaml create mode 100644 feathr_project/test/test_user_workspace/feathr_config_registry_purview_rbac.yaml create mode 100644 feathr_project/test/test_user_workspace/feathr_config_registry_sql.yaml create mode 100644 feathr_project/test/test_user_workspace/feathr_config_registry_sql_rbac.yaml diff --git a/feathr_project/test/test_feature_registry.py b/feathr_project/test/test_feature_registry.py index e68f2a949..5f2fea7d4 100644 --- a/feathr_project/test/test_feature_registry.py +++ b/feathr_project/test/test_feature_registry.py @@ -1,10 +1,11 @@ import glob import os import time +import unittest +import pytest from datetime import datetime from pathlib import Path -import pytest from click.testing import CliRunner from feathr import (FeatureQuery, ObservationSettings, TypedKey, @@ -16,214 +17,177 @@ from test_fixture import registry_test_setup_append, registry_test_setup_partially from test_utils.constants import Constants - -def test_feathr_register_features_e2e(): - """ - This test will register features, get all the registered features, then query a set of already registered features. - """ - - test_workspace_dir = Path( - __file__).parent.resolve() / "test_user_workspace" - client: FeathrClient = registry_test_setup(os.path.join(test_workspace_dir, "feathr_config.yaml")) - - # set output folder based on different runtime - now = datetime.now() - if client.spark_runtime == 'databricks': - output_path = ''.join(['dbfs:/feathrazure_cijob','_', str(now.minute), '_', str(now.second), ".parquet"]) - else: - output_path = ''.join(['abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/demo_data/output','_', str(now.minute), '_', str(now.second), ".parquet"]) - - - client.register_features() - # Allow purview to process a bit - time.sleep(5) - # in CI test, the project name is set by the CI pipeline so we read it here - all_features = client.list_registered_features(project_name=client.project_name) - all_feature_names = [x['name'] for x in all_features] - - assert 'f_is_long_trip_distance' in all_feature_names # test regular ones - assert 'f_trip_time_rounded' in all_feature_names # make sure derived features are there - assert 'f_location_avg_fare' in all_feature_names # make sure aggregated features are there - assert 'f_trip_time_rounded_plus' in all_feature_names # make sure derived features are there - assert 'f_trip_time_distance' in all_feature_names # make sure derived features are there - - # Sync workspace from registry, will get all conf files back - client.get_features_from_registry(client.project_name) - - feature_query = FeatureQuery( - feature_list=["f_location_avg_fare", "f_trip_time_rounded", "f_is_long_trip_distance"], - key=TypedKey(key_column="DOLocationID",key_column_type=ValueType.INT32)) - settings = ObservationSettings( - observation_path="wasbs://public@azurefeathrstorage.blob.core.windows.net/sample_data/green_tripdata_2020-04_with_index.csv", - event_timestamp_column="lpep_dropoff_datetime", - timestamp_format="yyyy-MM-dd HH:mm:ss") - client.get_offline_features(observation_settings=settings, - feature_query=feature_query, - output_path=output_path) - client.wait_job_to_finish(timeout_sec=Constants.SPARK_JOB_TIMEOUT_SECONDS) - -def test_feathr_register_features_purview_e2e(): - """ - This test will register features, get all the registered features, then query a set of already registered features. - """ - - test_workspace_dir = Path( - __file__).parent.resolve() / "test_user_workspace" - client: FeathrClient = registry_test_setup(os.path.join(test_workspace_dir, "feathr_config_purview.yaml")) - - # set output folder based on different runtime - now = datetime.now() - if client.spark_runtime == 'databricks': - output_path = ''.join(['dbfs:/feathrazure_cijob','_', str(now.minute), '_', str(now.second), ".parquet"]) - else: - output_path = ''.join(['abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/demo_data/output','_', str(now.minute), '_', str(now.second), ".parquet"]) - - - client.register_features() - # Allow purview to process a bit - time.sleep(5) - # in CI test, the project name is set by the CI pipeline so we read it here - all_features = client.list_registered_features(project_name=client.project_name) - all_feature_names = [x['name'] for x in all_features] - - assert 'f_is_long_trip_distance' in all_feature_names # test regular ones - assert 'f_trip_time_rounded' in all_feature_names # make sure derived features are there - assert 'f_location_avg_fare' in all_feature_names # make sure aggregated features are there - assert 'f_trip_time_rounded_plus' in all_feature_names # make sure derived features are there - assert 'f_trip_time_distance' in all_feature_names # make sure derived features are there - - # Sync workspace from registry, will get all conf files back - client.get_features_from_registry(client.project_name) - - feature_query = FeatureQuery( - feature_list=["f_location_avg_fare", "f_trip_time_rounded", "f_is_long_trip_distance"], - key=TypedKey(key_column="DOLocationID",key_column_type=ValueType.INT32)) - settings = ObservationSettings( - observation_path="wasbs://public@azurefeathrstorage.blob.core.windows.net/sample_data/green_tripdata_2020-04_with_index.csv", - event_timestamp_column="lpep_dropoff_datetime", - timestamp_format="yyyy-MM-dd HH:mm:ss") - client.get_offline_features(observation_settings=settings, - feature_query=feature_query, - output_path=output_path) - client.wait_job_to_finish(timeout_sec=Constants.SPARK_JOB_TIMEOUT_SECONDS) - -def test_feathr_register_features_partially(): - """ - This test will register full set of features into one project, then register another project in two partial registrations. - The length of the return value of get_features_from_registry should be identical. - """ - test_workspace_dir = Path( - __file__).parent.resolve() / "test_user_workspace" - client: FeathrClient = registry_test_setup(os.path.join(test_workspace_dir, "feathr_config.yaml")) - client.register_features() - time.sleep(30) - full_registration = client.get_features_from_registry(client.project_name) - - now = datetime.now() - os.environ["project_config__project_name"] = ''.join(['feathr_ci_registry','_', str(now.minute), '_', str(now.second), '_', str(now.microsecond)]) - - client: FeathrClient = registry_test_setup_partially(os.path.join(test_workspace_dir, "feathr_config.yaml")) - new_project_name = client.project_name - client.register_features() - time.sleep(30) - - - client: FeathrClient = registry_test_setup_append(os.path.join(test_workspace_dir, "feathr_config.yaml")) - client.project_name = new_project_name - client.register_features() - time.sleep(30) - - appended_registration = client.get_features_from_registry(client.project_name) - - # after a full registration, another registration should not affect the registered anchor features. - assert len(full_registration.items())==len(appended_registration.items()) - -@pytest.mark.skip(reason="Underlying implementation changed, not applicable") -def test_get_feature_from_registry(): - registry = _FeatureRegistry("mock_project","mock_purview","mock_delimeter") - derived_feature_with_multiple_inputs = { - "guid": "derived_feature_with_multiple_input_anchors", - "typeName": "feathr_derived_feature_v1", - "attributes": { - "input_derived_features": [], - "input_anchor_features": [ - { - "guid": "input_anchorA", - "typeName": "feathr_anchor_feature_v1", - }, - { - "guid": "input_anchorB", - "typeName": "feathr_anchor_feature_v1", - } - ] - }, - } - hierarchical_derived_feature = { - "guid": "hierarchical_derived_feature", - "typeName": "feathr_derived_feature_v1", - "attributes": { - "input_derived_features": [ - { - "guid": "derived_feature_with_multiple_input_anchors", - "typeName": "feathr_derived_feature_v1", - } - ], - "input_anchor_features": [ - { - "guid": "input_anchorC", - "typeName": "feathr_anchor_feature_v1", - } - ], +class FeatureRegistryTests(unittest.TestCase): + def test_feathr_register_features_e2e(self): + """ + This test will register features, get all the registered features, then query a set of already registered features. + """ + + config_paths = [ + "feathr_config_registry_purview.yaml", + "feathr_config_registry_purview_rbac.yaml", + "feathr_config_registry_sql.yaml", + "feathr_config_registry_sql_rbac.yaml", + ] + + for config_path in config_paths: + with self.subTest(config_path=config_path): + test_workspace_dir = Path(__file__).parent.resolve() / "test_user_workspace" + client: FeathrClient = registry_test_setup(os.path.join(test_workspace_dir, config_path)) + + # set output folder based on different runtime + now = datetime.now() + if client.spark_runtime == 'databricks': + output_path = ''.join(['dbfs:/feathrazure_cijob','_', str(now.minute), '_', str(now.second), ".parquet"]) + else: + output_path = ''.join(['abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/demo_data/output','_', str(now.minute), '_', str(now.second), ".parquet"]) + + + client.register_features() + # Allow purview to process a bit + time.sleep(5) + # in CI test, the project name is set by the CI pipeline so we read it here + all_features = client.list_registered_features(project_name=client.project_name) + all_feature_names = [x['name'] for x in all_features] + + assert 'f_is_long_trip_distance' in all_feature_names # test regular ones + assert 'f_trip_time_rounded' in all_feature_names # make sure derived features are there + assert 'f_location_avg_fare' in all_feature_names # make sure aggregated features are there + assert 'f_trip_time_rounded_plus' in all_feature_names # make sure derived features are there + assert 'f_trip_time_distance' in all_feature_names # make sure derived features are there + + # Sync workspace from registry, will get all conf files back + client.get_features_from_registry(client.project_name) + + feature_query = FeatureQuery( + feature_list=["f_location_avg_fare", "f_trip_time_rounded", "f_is_long_trip_distance"], + key=TypedKey(key_column="DOLocationID",key_column_type=ValueType.INT32)) + settings = ObservationSettings( + observation_path="wasbs://public@azurefeathrstorage.blob.core.windows.net/sample_data/green_tripdata_2020-04_with_index.csv", + event_timestamp_column="lpep_dropoff_datetime", + timestamp_format="yyyy-MM-dd HH:mm:ss") + client.get_offline_features(observation_settings=settings, + feature_query=feature_query, + output_path=output_path) + client.wait_job_to_finish(timeout_sec=Constants.SPARK_JOB_TIMEOUT_SECONDS) + + def test_feathr_register_features_partially(self): + """ + This test will register full set of features into one project, then register another project in two partial registrations. + The length of the return value of get_features_from_registry should be identical. + """ + test_workspace_dir = Path( + __file__).parent.resolve() / "test_user_workspace" + client: FeathrClient = registry_test_setup(os.path.join(test_workspace_dir, "feathr_config.yaml")) + client.register_features() + time.sleep(30) + full_registration = client.get_features_from_registry(client.project_name) + + now = datetime.now() + os.environ["project_config__project_name"] = ''.join(['feathr_ci_registry','_', str(now.minute), '_', str(now.second), '_', str(now.microsecond)]) + + client: FeathrClient = registry_test_setup_partially(os.path.join(test_workspace_dir, "feathr_config.yaml")) + new_project_name = client.project_name + client.register_features() + time.sleep(30) + + + client: FeathrClient = registry_test_setup_append(os.path.join(test_workspace_dir, "feathr_config.yaml")) + client.project_name = new_project_name + client.register_features() + time.sleep(30) + + appended_registration = client.get_features_from_registry(client.project_name) + + # after a full registration, another registration should not affect the registered anchor features. + assert len(full_registration.items())==len(appended_registration.items()) + + @pytest.mark.skip(reason="Underlying implementation changed, not applicable") + def test_get_feature_from_registry(self): + registry = _FeatureRegistry("mock_project","mock_purview","mock_delimeter") + derived_feature_with_multiple_inputs = { + "guid": "derived_feature_with_multiple_input_anchors", + "typeName": "feathr_derived_feature_v1", + "attributes": { + "input_derived_features": [], + "input_anchor_features": [ + { + "guid": "input_anchorA", + "typeName": "feathr_anchor_feature_v1", + }, + { + "guid": "input_anchorB", + "typeName": "feathr_anchor_feature_v1", + } + ] + }, } - } - anchors = [ - { - "guid": "input_anchorA", - "typeName": "feathr_anchor_feature_v1", - }, - { - "guid": "input_anchorC", - "typeName": "feathr_anchor_feature_v1", - }, - { - "guid": "input_anchorB", - "typeName": "feathr_anchor_feature_v1", - }] - - def entity_array_to_dict(arr): - return {x['guid']:x for x in arr} - - inputs = registry.search_input_anchor_features(['derived_feature_with_multiple_input_anchors'],entity_array_to_dict(anchors+[derived_feature_with_multiple_inputs])) - assert len(inputs)==2 - assert "input_anchorA" in inputs and "input_anchorB" in inputs - - inputs = registry.search_input_anchor_features(['hierarchical_derived_feature'],entity_array_to_dict(anchors+[derived_feature_with_multiple_inputs,hierarchical_derived_feature])) - assert len(inputs)==3 - assert "input_anchorA" in inputs and "input_anchorB" in inputs and "input_anchorC" in inputs - -@pytest.mark.skip(reason="Add back get_features is not supported in feature registry for now and needs further discussion") -def test_feathr_get_features_from_registry(): - """ - Test FeathrClient() sync features and get all the conf files from registry - """ - runner = CliRunner() - with runner.isolated_filesystem(): - result = runner.invoke(init, []) - - assert result.exit_code == 0 - assert os.path.isdir("./feathr_user_workspace") - os.chdir('feathr_user_workspace') - - # Look for conf files, we shouldn't have any - total_conf_files = glob.glob('*/*.conf', recursive=True) - assert len(total_conf_files) == 0 - - client = FeathrClient() - # Sync workspace from registry, will get all conf files back - client.get_features_from_registry("frame_getting_started") - - total_conf_files = glob.glob('*/*.conf', recursive=True) - # we should have at least 3 conf files - assert len(total_conf_files) == 3 + hierarchical_derived_feature = { + "guid": "hierarchical_derived_feature", + "typeName": "feathr_derived_feature_v1", + "attributes": { + "input_derived_features": [ + { + "guid": "derived_feature_with_multiple_input_anchors", + "typeName": "feathr_derived_feature_v1", + } + ], + "input_anchor_features": [ + { + "guid": "input_anchorC", + "typeName": "feathr_anchor_feature_v1", + } + ], + } + } + anchors = [ + { + "guid": "input_anchorA", + "typeName": "feathr_anchor_feature_v1", + }, + { + "guid": "input_anchorC", + "typeName": "feathr_anchor_feature_v1", + }, + { + "guid": "input_anchorB", + "typeName": "feathr_anchor_feature_v1", + }] + + def entity_array_to_dict(arr): + return {x['guid']:x for x in arr} + + inputs = registry.search_input_anchor_features(['derived_feature_with_multiple_input_anchors'],entity_array_to_dict(anchors+[derived_feature_with_multiple_inputs])) + assert len(inputs)==2 + assert "input_anchorA" in inputs and "input_anchorB" in inputs + + inputs = registry.search_input_anchor_features(['hierarchical_derived_feature'],entity_array_to_dict(anchors+[derived_feature_with_multiple_inputs,hierarchical_derived_feature])) + assert len(inputs)==3 + assert "input_anchorA" in inputs and "input_anchorB" in inputs and "input_anchorC" in inputs + + @pytest.mark.skip(reason="Add back get_features is not supported in feature registry for now and needs further discussion") + def test_feathr_get_features_from_registry(self): + """ + Test FeathrClient() sync features and get all the conf files from registry + """ + runner = CliRunner() + with runner.isolated_filesystem(): + result = runner.invoke(init, []) + + assert result.exit_code == 0 + assert os.path.isdir("./feathr_user_workspace") + os.chdir('feathr_user_workspace') + + # Look for conf files, we shouldn't have any + total_conf_files = glob.glob('*/*.conf', recursive=True) + assert len(total_conf_files) == 0 + + client = FeathrClient() + # Sync workspace from registry, will get all conf files back + client.get_features_from_registry("frame_getting_started") + + total_conf_files = glob.glob('*/*.conf', recursive=True) + # we should have at least 3 conf files + assert len(total_conf_files) == 3 diff --git a/feathr_project/test/test_fixture.py b/feathr_project/test/test_fixture.py index d13a261ef..c048eff7c 100644 --- a/feathr_project/test/test_fixture.py +++ b/feathr_project/test/test_fixture.py @@ -161,17 +161,17 @@ def kafka_test_setup(config_path: str): return client def registry_test_setup(config_path: str): - - - # use a new project name every time to make sure all features are registered correctly + # Use a new project name every time to make sure all features are registered correctly + # Project name example: feathr_ci_registry_2022_09_24_01_02_30 now = datetime.now() - os.environ["project_config__project_name"] = ''.join(['feathr_ci_registry','_', str(now.minute), '_', str(now.second), '_', str(now.microsecond)]) + os.environ["project_config__project_name"] = f'feathr_ci_registry_{str(now)[:19].replace(" ", "_").replace(":", "_").replace("-", "_")}' client = FeathrClient(config_path=config_path, project_registry_tag={"for_test_purpose":"true"}) request_anchor, agg_anchor, derived_feature_list = generate_entities() client.build_features(anchor_list=[agg_anchor, request_anchor], derived_feature_list=derived_feature_list) return client + def registry_test_setup_partially(config_path: str): """Register a partial of a project. Will call `generate_entities()` and register only the first anchor feature. """ diff --git a/feathr_project/test/test_user_workspace/feathr_config_purview.yaml b/feathr_project/test/test_user_workspace/feathr_config_purview.yaml deleted file mode 100644 index 8785dfdec..000000000 --- a/feathr_project/test/test_user_workspace/feathr_config_purview.yaml +++ /dev/null @@ -1,113 +0,0 @@ -# DO NOT MOVE OR DELETE THIS FILE - -# This file contains the configurations that are used by Feathr -# All the configurations can be overwritten by environment variables with concatenation of `__` for different layers of this config file. -# For example, `feathr_runtime_location` for databricks can be overwritten by setting this environment variable: -# SPARK_CONFIG__DATABRICKS__FEATHR_RUNTIME_LOCATION -# Another example would be overwriting Redis host with this config: `ONLINE_STORE__REDIS__HOST` -# For example if you want to override this setting in a shell environment: -# export ONLINE_STORE__REDIS__HOST=feathrazure.redis.cache.windows.net - -# version of API settings -api_version: 1 -project_config: - project_name: 'project_feathr_integration_test' - # Information that are required to be set via environment variables. - required_environment_variables: - # the environemnt variables are required to run Feathr - # Redis password for your online store - - 'REDIS_PASSWORD' - # client IDs and client Secret for the service principal. Read the getting started docs on how to get those information. - - 'AZURE_CLIENT_ID' - - 'AZURE_TENANT_ID' - - 'AZURE_CLIENT_SECRET' - optional_environment_variables: - # the environemnt variables are optional, however you will need them if you want to use some of the services: - - ADLS_ACCOUNT - - ADLS_KEY - - WASB_ACCOUNT - - WASB_KEY - - S3_ACCESS_KEY - - S3_SECRET_KEY - - JDBC_TABLE - - JDBC_USER - - JDBC_PASSWORD - - KAFKA_SASL_JAAS_CONFIG - -offline_store: - # paths starts with abfss:// or abfs:// - # ADLS_ACCOUNT and ADLS_KEY should be set in environment variable if this is set to true - adls: - adls_enabled: true - - # paths starts with wasb:// or wasbs:// - # WASB_ACCOUNT and WASB_KEY should be set in environment variable - wasb: - wasb_enabled: true - - # paths starts with s3a:// - # S3_ACCESS_KEY and S3_SECRET_KEY should be set in environment variable - s3: - s3_enabled: true - # S3 endpoint. If you use S3 endpoint, then you need to provide access key and secret key in the environment variable as well. - s3_endpoint: 's3.amazonaws.com' - - # jdbc endpoint - jdbc: - jdbc_enabled: true - jdbc_database: 'feathrtestdb' - jdbc_table: 'feathrtesttable' - - # snowflake endpoint - snowflake: - snowflake_enabled: true - url: "dqllago-ol19457.snowflakecomputing.com" - user: "feathrintegration" - role: "ACCOUNTADMIN" - -spark_config: - # choice for spark runtime. Currently support: azure_synapse, databricks - # The `databricks` configs will be ignored if `azure_synapse` is set and vice versa. - spark_cluster: 'databricks' - # configure number of parts for the spark output for feature generation job - spark_result_output_parts: '1' - - azure_synapse: - dev_url: 'https://feathrazuretest3synapse.dev.azuresynapse.net' - pool_name: 'spark3' - # workspace dir for storing all the required configuration files and the jar resources - workspace_dir: 'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_test_workspace' - executor_size: 'Small' - executor_num: 1 - # Feathr Job configuration. Support local paths, path start with http(s)://, and paths start with abfs(s):// - # this is the default location so end users don't have to compile the runtime again. - # feathr_runtime_location: wasbs://public@azurefeathrstorage.blob.core.windows.net/feathr-assembly-LATEST.jar - feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" - databricks: - # workspace instance - workspace_instance_url: 'https://adb-2474129336842816.16.azuredatabricks.net/' - workspace_token_value: 'dapid8ddd83000dc2863763b7d47f0e8f3db' - # config string including run time information, spark version, machine size, etc. - # the config follows the format in the databricks documentation: https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/2.0/jobs - config_template: {"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"0403-214809-inlet434-pool-l9dj3kwz"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}} - # Feathr Job location. Support local paths, path start with http(s)://, and paths start with dbfs:/ - work_dir: 'dbfs:/feathr_getting_started' - # this is the default location so end users don't have to compile the runtime again. - feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" - -online_store: - redis: - # Redis configs to access Redis cluster - host: 'feathrazuretest3redis.redis.cache.windows.net' - port: 6380 - ssl_enabled: True - -feature_registry: - # The API endpoint of the registry service - api_endpoint: "https://feathr-sql-registry.azurewebsites.net/api/v1" - -monitoring: - database: - sql: - url: 'jdbc:postgresql://featuremonitoring.postgres.database.azure.com:5432/postgres' - user: "demo" diff --git a/feathr_project/test/test_user_workspace/feathr_config_registry_purview.yaml b/feathr_project/test/test_user_workspace/feathr_config_registry_purview.yaml new file mode 100644 index 000000000..afe923163 --- /dev/null +++ b/feathr_project/test/test_user_workspace/feathr_config_registry_purview.yaml @@ -0,0 +1,49 @@ +api_version: 1 +project_config: + project_name: 'project_feathr_integration_test' + required_environment_variables: + optional_environment_variables: + +offline_store: + adls: + adls_enabled: false + wasb: + wasb_enabled: false + s3: + s3_enabled: false + jdbc: + jdbc_enabled: false + snowflake: + snowflake_enabled: false + +spark_config: + spark_cluster: 'databricks' + spark_result_output_parts: '1' + azure_synapse: + dev_url: 'https://feathrazuretest3synapse.dev.azuresynapse.net' + pool_name: 'spark3' + workspace_dir: 'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_test_workspace' + executor_size: 'Small' + executor_num: 1 + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" + databricks: + workspace_instance_url: 'https://adb-2474129336842816.16.azuredatabricks.net/' + workspace_token_value: 'dapid8ddd83000dc2863763b7d47f0e8f3db' + config_template: {"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"0403-214809-inlet434-pool-l9dj3kwz"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}} + work_dir: 'dbfs:/feathr_getting_started' + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" + +online_store: + redis: + host: 'feathrazuretest3redis.redis.cache.windows.net' + port: 6380 + ssl_enabled: True + +feature_registry: + api_endpoint: "https://feathr-registry-purview.azurewebsites.net/api/v1" + +monitoring: + database: + sql: + url: 'jdbc:postgresql://featuremonitoring.postgres.database.azure.com:5432/postgres' + user: "demo" diff --git a/feathr_project/test/test_user_workspace/feathr_config_registry_purview_rbac.yaml b/feathr_project/test/test_user_workspace/feathr_config_registry_purview_rbac.yaml new file mode 100644 index 000000000..fb88972f7 --- /dev/null +++ b/feathr_project/test/test_user_workspace/feathr_config_registry_purview_rbac.yaml @@ -0,0 +1,49 @@ +api_version: 1 +project_config: + project_name: 'project_feathr_integration_test' + required_environment_variables: + optional_environment_variables: + +offline_store: + adls: + adls_enabled: false + wasb: + wasb_enabled: false + s3: + s3_enabled: false + jdbc: + jdbc_enabled: false + snowflake: + snowflake_enabled: false + +spark_config: + spark_cluster: 'databricks' + spark_result_output_parts: '1' + azure_synapse: + dev_url: 'https://feathrazuretest3synapse.dev.azuresynapse.net' + pool_name: 'spark3' + workspace_dir: 'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_test_workspace' + executor_size: 'Small' + executor_num: 1 + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" + databricks: + workspace_instance_url: 'https://adb-2474129336842816.16.azuredatabricks.net/' + workspace_token_value: 'dapid8ddd83000dc2863763b7d47f0e8f3db' + config_template: {"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"0403-214809-inlet434-pool-l9dj3kwz"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}} + work_dir: 'dbfs:/feathr_getting_started' + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" + +online_store: + redis: + host: 'feathrazuretest3redis.redis.cache.windows.net' + port: 6380 + ssl_enabled: True + +feature_registry: + api_endpoint: "https://feathr-registry-purview-rbac.azurewebsites.net/api/v1" + +monitoring: + database: + sql: + url: 'jdbc:postgresql://featuremonitoring.postgres.database.azure.com:5432/postgres' + user: "demo" diff --git a/feathr_project/test/test_user_workspace/feathr_config_registry_sql.yaml b/feathr_project/test/test_user_workspace/feathr_config_registry_sql.yaml new file mode 100644 index 000000000..486eed1e4 --- /dev/null +++ b/feathr_project/test/test_user_workspace/feathr_config_registry_sql.yaml @@ -0,0 +1,49 @@ +api_version: 1 +project_config: + project_name: 'project_feathr_integration_test' + required_environment_variables: + optional_environment_variables: + +offline_store: + adls: + adls_enabled: false + wasb: + wasb_enabled: false + s3: + s3_enabled: false + jdbc: + jdbc_enabled: false + snowflake: + snowflake_enabled: false + +spark_config: + spark_cluster: 'databricks' + spark_result_output_parts: '1' + azure_synapse: + dev_url: 'https://feathrazuretest3synapse.dev.azuresynapse.net' + pool_name: 'spark3' + workspace_dir: 'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_test_workspace' + executor_size: 'Small' + executor_num: 1 + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" + databricks: + workspace_instance_url: 'https://adb-2474129336842816.16.azuredatabricks.net/' + workspace_token_value: 'dapid8ddd83000dc2863763b7d47f0e8f3db' + config_template: {"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"0403-214809-inlet434-pool-l9dj3kwz"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}} + work_dir: 'dbfs:/feathr_getting_started' + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" + +online_store: + redis: + host: 'feathrazuretest3redis.redis.cache.windows.net' + port: 6380 + ssl_enabled: True + +feature_registry: + api_endpoint: "https://feathr-registry-sql.azurewebsites.net/api/v1" + +monitoring: + database: + sql: + url: 'jdbc:postgresql://featuremonitoring.postgres.database.azure.com:5432/postgres' + user: "demo" diff --git a/feathr_project/test/test_user_workspace/feathr_config_registry_sql_rbac.yaml b/feathr_project/test/test_user_workspace/feathr_config_registry_sql_rbac.yaml new file mode 100644 index 000000000..4ad7d35db --- /dev/null +++ b/feathr_project/test/test_user_workspace/feathr_config_registry_sql_rbac.yaml @@ -0,0 +1,49 @@ +api_version: 1 +project_config: + project_name: 'project_feathr_integration_test' + required_environment_variables: + optional_environment_variables: + +offline_store: + adls: + adls_enabled: false + wasb: + wasb_enabled: false + s3: + s3_enabled: false + jdbc: + jdbc_enabled: false + snowflake: + snowflake_enabled: false + +spark_config: + spark_cluster: 'databricks' + spark_result_output_parts: '1' + azure_synapse: + dev_url: 'https://feathrazuretest3synapse.dev.azuresynapse.net' + pool_name: 'spark3' + workspace_dir: 'abfss://feathrazuretest3fs@feathrazuretest3storage.dfs.core.windows.net/feathr_test_workspace' + executor_size: 'Small' + executor_num: 1 + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" + databricks: + workspace_instance_url: 'https://adb-2474129336842816.16.azuredatabricks.net/' + workspace_token_value: 'dapid8ddd83000dc2863763b7d47f0e8f3db' + config_template: {"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"0403-214809-inlet434-pool-l9dj3kwz"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}} + work_dir: 'dbfs:/feathr_getting_started' + feathr_runtime_location: "../../target/scala-2.12/feathr-assembly-0.8.0.jar" + +online_store: + redis: + host: 'feathrazuretest3redis.redis.cache.windows.net' + port: 6380 + ssl_enabled: True + +feature_registry: + api_endpoint: "https://feathr-registry-sql-rbac.azurewebsites.net/api/v1" + +monitoring: + database: + sql: + url: 'jdbc:postgresql://featuremonitoring.postgres.database.azure.com:5432/postgres' + user: "demo"