diff --git a/CHANGELOG.md b/CHANGELOG.md index 10d1fbdcdc..ffad1fac4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,9 +19,12 @@ The types of changes are: ## [Unreleased](https://github.com/ethyca/fides/compare/2.1.0...main) ### Added + * Add authenticated privacy request route. [#1819](https://github.com/ethyca/fides/pull/1819) +* Access and erasure support for Fullstory API [#1821](https://github.com/ethyca/fides/pull/1821) ### Fixed + * Fix error in parent user creation seeding. [#1832](https://github.com/ethyca/fides/issues/1832) ### Docs diff --git a/data/saas/config/fullstory_config.yml b/data/saas/config/fullstory_config.yml new file mode 100644 index 0000000000..ffaa0bcb49 --- /dev/null +++ b/data/saas/config/fullstory_config.yml @@ -0,0 +1,53 @@ +saas_config: + fides_key: + name: Fullstory SaaS Config + type: fullstory + description: A sample schema representing the Fullstory connector for Fides + version: 0.0.1 + + connector_params: + - name: domain + - name: api_key + + external_references: + - name: fullstory_user_id + label: Fullstory User ID + description: Dataset reference to the location of Fullstory user IDs + + client_config: + protocol: https + host: + authentication: + strategy: api_key + configuration: + headers: + - name: Authorization + value: Basic + + test_request: + method: GET + path: /operations/v1 + + endpoints: + - name: user + requests: + read: + method: GET + path: /users/v1/individual/ + param_values: + - name: user_id + references: + - fullstory_user_id + update: + method: POST + path: /users/v1/individual//customvars + body: | + { + + } + param_values: + - name: user_id + references: + - dataset: + field: user.uid + direction: from diff --git a/data/saas/dataset/fullstory_dataset.yml b/data/saas/dataset/fullstory_dataset.yml new file mode 100644 index 0000000000..c088559a5b --- /dev/null +++ b/data/saas/dataset/fullstory_dataset.yml @@ -0,0 +1,36 @@ +dataset: + - fides_key: + name: Fullstory Dataset + description: A sample dataset representing the Fullstory connector for Fides + collections: + - name: user + fields: + - name: uid + data_categories: [user.unique_id] + fidesops_meta: + data_type: string + primary_key: True + - name: displayName + data_categories: [user.name] + fidesops_meta: + data_type: string + - name: email + data_categories: [user.contact.email] + fidesops_meta: + data_type: string + - name: numSessions + data_categories: [system.operations] + fidesops_meta: + data_type: integer + - name: firstSeen + data_categories: [system.operations] + fidesops_meta: + data_type: string + - name: lastSeen + data_categories: [system.operations] + fidesops_meta: + data_type: string + - name: existingOperation + data_categories: [system.operations] + fidesops_meta: + data_type: string diff --git a/docs/fides/docs/getting-started/database_connectors.md b/docs/fides/docs/getting-started/database_connectors.md index d3cf272150..608a50fac6 100644 --- a/docs/fides/docs/getting-started/database_connectors.md +++ b/docs/fides/docs/getting-started/database_connectors.md @@ -424,6 +424,10 @@ To view a list of all available connection types, visit `GET /api/v1/connection_ "identifier": "datadog", "type": "saas" }, + { + "identifier": "fullstory", + "type": "saas" + }, { "identifier": "hubspot", "type": "saas" diff --git a/docs/fides/docs/guides/connection_types.md b/docs/fides/docs/guides/connection_types.md index 31c9e32296..2b7b0bf16d 100644 --- a/docs/fides/docs/guides/connection_types.md +++ b/docs/fides/docs/guides/connection_types.md @@ -66,6 +66,11 @@ which fidesops can communicate. "type": "saas", "human_readable": "Datadog" }, + { + "identifier": "fullstory", + "type": "saas", + "human_readable": "Fullstory" + }, { "identifier": "hubspot", "type": "saas", diff --git a/pyproject.toml b/pyproject.toml index d382337465..b957254e1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -190,6 +190,7 @@ markers = [ "integration_datadog", "integration_domo", "integration_doordash", + "integration_fullstory", "integration_segment", "integration_sendgrid", "integration_auth0", diff --git a/tests/ops/conftest.py b/tests/ops/conftest.py index ead3d4b971..0e0dc08cb3 100644 --- a/tests/ops/conftest.py +++ b/tests/ops/conftest.py @@ -46,6 +46,7 @@ from .fixtures.saas.datadog_fixtures import * from .fixtures.saas.domo_fixtures import * from .fixtures.saas.doordash_fixtures import * +from .fixtures.saas.fullstory_fixtures import * from .fixtures.saas.hubspot_fixtures import * from .fixtures.saas.mailchimp_fixtures import * from .fixtures.saas.outreach_fixtures import * diff --git a/tests/ops/fixtures/saas/external_datasets/fullstory.sql b/tests/ops/fixtures/saas/external_datasets/fullstory.sql new file mode 100644 index 0000000000..db99f78eac --- /dev/null +++ b/tests/ops/fixtures/saas/external_datasets/fullstory.sql @@ -0,0 +1,7 @@ +CREATE TABLE public.fullstory_users ( + email CHARACTER VARYING(100) PRIMARY KEY, + fullstory_user_id CHARACTER VARYING(100) +); + +INSERT INTO public.fullstory_users VALUES +('test@test.com', '1234') diff --git a/tests/ops/fixtures/saas/fullstory_fixtures.py b/tests/ops/fixtures/saas/fullstory_fixtures.py new file mode 100644 index 0000000000..2f8a1960b0 --- /dev/null +++ b/tests/ops/fixtures/saas/fullstory_fixtures.py @@ -0,0 +1,281 @@ +from typing import Any, Dict, Generator + +import pydash +import pytest +import requests +from fideslib.db import session +from sqlalchemy.orm import Session +from sqlalchemy_utils.functions import create_database, database_exists, drop_database + +from fides.api.ops.models.connectionconfig import ( + AccessLevel, + ConnectionConfig, + ConnectionType, +) +from fides.api.ops.models.datasetconfig import DatasetConfig +from fides.api.ops.util.saas_util import ( + load_config_with_replacement, + load_dataset_with_replacement, +) +from tests.ops.test_helpers.db_utils import seed_postgres_data +from tests.ops.test_helpers.vault_client import get_secrets + +secrets = get_secrets("fullstory") + + +@pytest.fixture(scope="function") +def fullstory_secrets(saas_config): + return { + "domain": pydash.get(saas_config, "fullstory.domain") or secrets["domain"], + "api_key": pydash.get(saas_config, "fullstory.api_key") or secrets["api_key"], + "fullstory_user_id": { + "dataset": "fullstory_postgres", + "field": "fullstory_users.fullstory_user_id", + "direction": "from", + }, + } + + +@pytest.fixture(scope="function") +def fullstory_identity_email(saas_config): + return ( + pydash.get(saas_config, "fullstory.identity_email") or secrets["identity_email"] + ) + + +@pytest.fixture(scope="session") +def fullstory_erasure_identity_email(saas_config): + return ( + pydash.get(saas_config, "fullstory.erasure_identity_email") + or secrets["erasure_identity_email"] + ) + + +@pytest.fixture(scope="session") +def fullstory_erasure_identity_id(saas_config): + return ( + pydash.get(saas_config, "fullstory.erasure_identity_id") + or secrets["erasure_identity_id"] + ) + + +@pytest.fixture +def fullstory_config() -> Dict[str, Any]: + return load_config_with_replacement( + "data/saas/config/fullstory_config.yml", + "", + "fullstory_instance", + ) + + +@pytest.fixture +def fullstory_dataset() -> Dict[str, Any]: + return load_dataset_with_replacement( + "data/saas/dataset/fullstory_dataset.yml", + "", + "fullstory_instance", + )[0] + + +@pytest.fixture(scope="function") +def fullstory_connection_config( + db: session, fullstory_config, fullstory_secrets +) -> Generator: + fides_key = fullstory_config["fides_key"] + connection_config = ConnectionConfig.create( + db=db, + data={ + "key": fides_key, + "name": fides_key, + "connection_type": ConnectionType.saas, + "access": AccessLevel.write, + "secrets": fullstory_secrets, + "saas_config": fullstory_config, + }, + ) + yield connection_config + connection_config.delete(db) + + +@pytest.fixture +def fullstory_dataset_config( + db: Session, + fullstory_connection_config: ConnectionConfig, + fullstory_dataset: Dict[str, Any], +) -> Generator: + fides_key = fullstory_dataset["fides_key"] + fullstory_connection_config.name = fides_key + fullstory_connection_config.key = fides_key + fullstory_connection_config.save(db=db) + dataset = DatasetConfig.create( + db=db, + data={ + "connection_config_id": fullstory_connection_config.id, + "fides_key": fides_key, + "dataset": fullstory_dataset, + }, + ) + yield dataset + dataset.delete(db=db) + + +@pytest.fixture() +def fullstory_postgres_dataset() -> Dict[str, Any]: + return { + "fides_key": "fullstory_postgres", + "name": "Fullstory Postgres", + "description": "Lookup for Fullstory User IDs", + "collections": [ + { + "name": "fullstory_users", + "fields": [ + { + "name": "email", + "data_categories": ["user.contact.email"], + "fidesops_meta": {"data_type": "string", "identity": "email"}, + }, + { + "name": "fullstory_user_id", + "fidesops_meta": {"data_type": "string"}, + }, + ], + } + ], + } + + +@pytest.fixture +def fullstory_postgres_dataset_config( + connection_config: ConnectionConfig, + fullstory_postgres_dataset: Dict[str, Any], + db: Session, +) -> Generator: + fides_key = fullstory_postgres_dataset["fides_key"] + connection_config.name = fides_key + connection_config.key = fides_key + connection_config.save(db=db) + dataset = DatasetConfig.create( + db=db, + data={ + "connection_config_id": connection_config.id, + "fides_key": fides_key, + "dataset": fullstory_postgres_dataset, + }, + ) + yield dataset + dataset.delete(db=db) + + +@pytest.fixture(scope="function") +def fullstory_postgres_db(postgres_integration_session): + postgres_integration_session = seed_postgres_data( + postgres_integration_session, + "./tests/ops/fixtures/saas/external_datasets/fullstory.sql", + ) + yield postgres_integration_session + drop_database(postgres_integration_session.bind.url) + + +class FullstoryTestClient: + + headers: object = {} + base_url: str = "" + + def __init__(self, connection_config_fullstory: ConnectionConfig): + fullstory_secrets = connection_config_fullstory.secrets + self.headers = { + "Authorization": f"Basic {fullstory_secrets['api_key']}", + } + self.base_url = f"https://{fullstory_secrets['domain']}" + + def get_user(self, user_id: str) -> requests.Response: + + user_response: requests.Response = requests.get( + url=f"{self.base_url}/users/v1/individual/{user_id}", headers=self.headers + ) + return user_response + + def update_user(self, user_id: str, user_data) -> requests.Response: + user_response: requests.Response = requests.post( + url=f"{self.base_url}/users/v1/individual/{user_id}/customvars", + json=user_data, + headers=self.headers, + ) + return user_response + + +@pytest.fixture(scope="function") +def fullstory_postgres_erasure_db( + postgres_integration_session, + fullstory_erasure_identity_email, + fullstory_erasure_identity_id, +): + if database_exists(postgres_integration_session.bind.url): + # Postgres cannot drop databases from within a transaction block, so + # we should drop the DB this way instead + drop_database(postgres_integration_session.bind.url) + create_database(postgres_integration_session.bind.url) + + create_table_query = "CREATE TABLE public.fullstory_users (email CHARACTER VARYING(100) PRIMARY KEY,fullstory_user_id CHARACTER VARYING(100));" + postgres_integration_session.execute(create_table_query) + insert_query = ( + "INSERT INTO public.fullstory_users VALUES('" + + fullstory_erasure_identity_email + + "', '" + + fullstory_erasure_identity_id + + "')" + ) + postgres_integration_session.execute(insert_query) + + yield postgres_integration_session + drop_database(postgres_integration_session.bind.url) + + +@pytest.fixture(scope="function") +def fullstory_test_client( + fullstory_connection_config: FullstoryTestClient, +) -> Generator: + test_client = FullstoryTestClient( + connection_config_fullstory=fullstory_connection_config + ) + yield test_client + + +def user_updated( + user_id: str, email: str, fullstory_test_client: FullstoryTestClient +) -> Any: + """ + Confirm whether user is update successfully + """ + user_response = fullstory_test_client.get_user(user_id=user_id) + + assert user_response.ok + assert user_response.json()["displayName"] == "MASKED" + return user_response.json() + + +@pytest.fixture(scope="function") +def fullstory_erasure_data( + fullstory_secrets, + fullstory_test_client: FullstoryTestClient, + fullstory_erasure_identity_id, +) -> Generator: + """ + Creates a dynamic test data record for erasure tests. + Yields user ID as this may be useful to have in test scenarios + """ + # Get user + user_id = fullstory_erasure_identity_id + user_response = fullstory_test_client.get_user(user_id) + + user = user_response.json() + user_data = {"displayName": user["displayName"], "email": user["email"]} + assert user_response.ok + + yield user + + # Update user to default values after test here + + user_response = fullstory_test_client.update_user(user_id, user_data) + + assert user_response.ok diff --git a/tests/ops/integration_tests/saas/test_fullstory_task.py b/tests/ops/integration_tests/saas/test_fullstory_task.py new file mode 100644 index 0000000000..acbbb452ba --- /dev/null +++ b/tests/ops/integration_tests/saas/test_fullstory_task.py @@ -0,0 +1,168 @@ +import logging +import random + +import pytest + +from fides.api.ops.graph.graph import DatasetGraph +from fides.api.ops.models.privacy_request import PrivacyRequest +from fides.api.ops.schemas.redis_cache import Identity +from fides.api.ops.service.connectors import get_connector +from fides.api.ops.task import graph_task +from fides.api.ops.task.graph_task import get_cached_data_for_erasures +from fides.ctl.core.config import get_config +from tests.ops.fixtures.saas.fullstory_fixtures import FullstoryTestClient, user_updated +from tests.ops.graph.graph_test_util import assert_rows_match +from tests.ops.test_helpers.saas_test_utils import poll_for_existence + +CONFIG = get_config() +logger = logging.getLogger(__name__) + + +@pytest.mark.skip("API keys are temporary for free accounts") +@pytest.mark.integration_saas +@pytest.mark.integration_fullstory +def test_fullstory_connection_test( + fullstory_connection_config, +) -> None: + get_connector(fullstory_connection_config).test_connection() + + +@pytest.mark.skip("API keys are temporary for free accounts") +@pytest.mark.integration_saas +@pytest.mark.integration_fullstory +@pytest.mark.asyncio +async def test_fullstory_access_request_task( + db, + policy, + fullstory_connection_config, + fullstory_dataset_config, + fullstory_identity_email, + connection_config, + fullstory_postgres_dataset_config, + fullstory_postgres_db, +) -> None: + """Full access request based on the Fullstory SaaS config""" + privacy_request = PrivacyRequest( + id=f"test_fullstory_access_request_task_{random.randint(0, 1000)}" + ) + identity_attribute = "email" + identity_value = fullstory_identity_email + identity_kwargs = {identity_attribute: identity_value} + identity = Identity(**identity_kwargs) + + privacy_request.cache_identity(identity) + + dataset_name = fullstory_connection_config.get_saas_config().fides_key + merged_graph = fullstory_dataset_config.get_graph() + graph = DatasetGraph(*[merged_graph, fullstory_postgres_dataset_config.get_graph()]) + + v = await graph_task.run_access_request( + privacy_request, + policy, + graph, + [fullstory_connection_config, connection_config], + {"email": fullstory_identity_email}, + db, + ) + + assert_rows_match( + v[f"{dataset_name}:user"], + min_size=1, + keys=[ + "uid", + "displayName", + "email", + "numSessions", + "firstSeen", + "lastSeen", + "existingOperation", + ], + ) + + +@pytest.mark.skip("API keys are temporary for free accounts") +@pytest.mark.integration_saas +@pytest.mark.integration_fullstory +@pytest.mark.asyncio +async def test_fullstory_erasure_request_task( + db, + policy, + fullstory_connection_config, + fullstory_dataset_config, + connection_config, + fullstory_postgres_erasure_db, + fullstory_postgres_dataset_config, + erasure_policy_string_rewrite, + fullstory_erasure_identity_email, + fullstory_erasure_identity_id, + fullstory_erasure_data, + fullstory_test_client: FullstoryTestClient, +) -> None: + """Full erasure request based on the Fullstory SaaS config""" + + privacy_request = PrivacyRequest( + id=f"test_fullstory_access_request_task_{random.randint(0, 1000)}" + ) + identity_attribute = "email" + identity_value = fullstory_erasure_identity_email + identity_kwargs = {identity_attribute: identity_value} + identity = Identity(**identity_kwargs) + privacy_request.cache_identity(identity) + + dataset_name = fullstory_connection_config.get_saas_config().fides_key + merged_graph = fullstory_dataset_config.get_graph() + graph = DatasetGraph(*[merged_graph, fullstory_postgres_dataset_config.get_graph()]) + + v = await graph_task.run_access_request( + privacy_request, + policy, + graph, + [fullstory_connection_config, connection_config], + {"email": fullstory_erasure_identity_email}, + db, + ) + + assert_rows_match( + v[f"{dataset_name}:user"], + min_size=1, + keys=[ + "uid", + "displayName", + "email", + "numSessions", + "firstSeen", + "lastSeen", + "existingOperation", + ], + ) + + temp_masking = CONFIG.execution.masking_strict + CONFIG.execution.masking_strict = True + + x = await graph_task.run_erasure( + privacy_request, + erasure_policy_string_rewrite, + graph, + [fullstory_connection_config, connection_config], + identity_kwargs, + get_cached_data_for_erasures(privacy_request.id), + db, + ) + # Verify masking request was issued for endpoints with update actions + assert x == { + "fullstory_instance:user": 1, + "fullstory_postgres:fullstory_users": 0, + } + user_id = v[f"{dataset_name}:user"][0]["uid"] + # Verifying field is masked but it is delayed for 60 seconds + + error_message = ( + f"User with email {fullstory_erasure_identity_email} was not updated to default" + ) + poll_for_existence( + user_updated, + (user_id, fullstory_erasure_identity_email, fullstory_test_client), + error_message=error_message, + ) + + CONFIG.execution.masking_strict = temp_masking