diff --git a/CHANGELOG.md b/CHANGELOG.md index 65605e31e2..5c9aac8d02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ The types of changes are: ### Fixed - Fixed typo in the BigQuery integration description [#5120](https://github.com/ethyca/fides/pull/5120) - Fixed default values of Experience config toggles [#5123](https://github.com/ethyca/fides/pull/5123) +- Skip indexing Custom Privacy Request Field array values [#5127](https://github.com/ethyca/fides/pull/5127) ### Fixed - Fixed not being able to edit a monitor from scheduled to not scheduled [#5114](https://github.com/ethyca/fides/pull/5114) diff --git a/src/fides/api/api/v1/endpoints/privacy_request_endpoints.py b/src/fides/api/api/v1/endpoints/privacy_request_endpoints.py index a6146ef652..2565bf01cc 100644 --- a/src/fides/api/api/v1/endpoints/privacy_request_endpoints.py +++ b/src/fides/api/api/v1/endpoints/privacy_request_endpoints.py @@ -466,6 +466,8 @@ def _filter_privacy_request_queryset( query = query.filter(PrivacyRequest.id.in_(identities_query)) if custom_privacy_request_fields: + # Note that if Custom Privacy Request Field values were arrays, they are not + # indexed for searching and not discoverable here custom_field_conditions = [ (CustomPrivacyRequestField.field_name == field_name) & ( @@ -473,6 +475,7 @@ def _filter_privacy_request_queryset( == CustomPrivacyRequestField.hash_value(value) ) for field_name, value in custom_privacy_request_fields.items() + if not isinstance(value, list) ] custom_fields_query = select( diff --git a/src/fides/api/models/privacy_request.py b/src/fides/api/models/privacy_request.py index 5c307d958d..e1a878a727 100644 --- a/src/fides/api/models/privacy_request.py +++ b/src/fides/api/models/privacy_request.py @@ -1446,7 +1446,7 @@ def hash_value( cls, value: MultiValue, encoding: str = "UTF-8", - ) -> Union[str, List[str]]: + ) -> Optional[Union[str, List[str]]]: """Utility function to hash the value(s) with a generated salt""" def hash_single_value(value: Union[str, int]) -> str: @@ -1459,7 +1459,9 @@ def hash_single_value(value: Union[str, int]) -> str: return hashed_value if isinstance(value, list): - return [hash_single_value(item) for item in value] + # Skip hashing lists: this avoids us hashing and later indexing potentially large values and our index + # is not useful for array search anyway + return None return hash_single_value(value) diff --git a/tests/fixtures/application_fixtures.py b/tests/fixtures/application_fixtures.py index 00d2aea76f..23d5a1a364 100644 --- a/tests/fixtures/application_fixtures.py +++ b/tests/fixtures/application_fixtures.py @@ -1752,6 +1752,36 @@ def privacy_request_with_custom_fields(db: Session, policy: Policy) -> PrivacyRe privacy_request.delete(db) +@pytest.fixture(scope="function") +def privacy_request_with_custom_array_fields( + db: Session, policy: Policy +) -> PrivacyRequest: + privacy_request = PrivacyRequest.create( + db=db, + data={ + "external_id": f"ext-{str(uuid4())}", + "started_processing_at": datetime(2021, 10, 1), + "finished_processing_at": datetime(2021, 10, 3), + "requested_at": datetime(2021, 10, 1), + "status": PrivacyRequestStatus.complete, + "origin": f"https://example.com/", + "policy_id": policy.id, + "client_id": policy.client_id, + }, + ) + privacy_request.persist_custom_privacy_request_fields( + db=db, + custom_privacy_request_fields={ + "device_ids": CustomPrivacyRequestField( + label="Device Ids", value=["device_1", "device_2", "device_3"] + ), + }, + ) + privacy_request.save(db) + yield privacy_request + privacy_request.delete(db) + + @pytest.fixture(scope="function") def privacy_request_with_email_identity(db: Session, policy: Policy) -> PrivacyRequest: privacy_request = PrivacyRequest.create( diff --git a/tests/ops/api/v1/endpoints/test_consent_request_endpoints.py b/tests/ops/api/v1/endpoints/test_consent_request_endpoints.py index d8a9b639c4..befbb7c370 100644 --- a/tests/ops/api/v1/endpoints/test_consent_request_endpoints.py +++ b/tests/ops/api/v1/endpoints/test_consent_request_endpoints.py @@ -384,7 +384,8 @@ def test_consent_request_with_custom_privacy_request_fields( data = { "identity": {"email": "test@example.com"}, "custom_privacy_request_fields": { - "first_name": {"label": "First name", "value": "John"} + "first_name": {"label": "First name", "value": "John"}, + "pets": {"label": "My Pets", "value": ["Dog", "Cat", "Snake"]}, }, } response = api_client.post(url, json=data) @@ -397,8 +398,17 @@ def test_consent_request_with_custom_privacy_request_fields( conditions=( CustomPrivacyRequestField.consent_request_id == consent_request_id ), - ).first() - assert custom_privacy_request_field + ).all() + for field in custom_privacy_request_field: + assert ( + field.encrypted_value["value"] + == data["custom_privacy_request_fields"][field.field_name]["value"] + ) + if isinstance(field.encrypted_value["value"], list): + # We don't hash list fields + assert field.hashed_value is None + else: + assert field.hashed_value is not None @pytest.mark.usefixtures("disable_consent_identity_verification") def test_consent_request_with_external_id(self, db, api_client, url): diff --git a/tests/ops/api/v1/endpoints/test_privacy_request_endpoints.py b/tests/ops/api/v1/endpoints/test_privacy_request_endpoints.py index 9439eb1f89..741c51e3e1 100644 --- a/tests/ops/api/v1/endpoints/test_privacy_request_endpoints.py +++ b/tests/ops/api/v1/endpoints/test_privacy_request_endpoints.py @@ -2,9 +2,8 @@ import csv import io import json -import string from datetime import datetime, timedelta -from random import choice, randint +from random import randint from typing import List from unittest import mock from uuid import uuid4 @@ -292,6 +291,13 @@ def test_create_privacy_request_stores_multivalue_custom_fields( pr.get_persisted_custom_privacy_request_fields() ) assert persisted_custom_privacy_request_fields == TEST_CUSTOM_FIELDS + + for field in pr.custom_fields: + if isinstance(field.encrypted_value["value"], list): + # We don't hash list fields + assert field.hashed_value is None + else: + assert field.hashed_value is not None pr.delete(db=db) assert run_access_request_mock.called @@ -2118,6 +2124,65 @@ def test_privacy_request_search_with_custom_fields( assert resp["items"][0]["id"] == privacy_request.id assert resp["items"][0].get("custom_privacy_request_fields") is None + def test_privacy_request_search_by_custom_fields_and_array_fields( + self, + api_client: TestClient, + url, + generate_auth_header, + privacy_request_with_custom_fields, + privacy_request_with_custom_array_fields, + ): + privacy_request = privacy_request_with_custom_fields + auth_header = generate_auth_header(scopes=[PRIVACY_REQUEST_READ]) + response = api_client.post( + url, + headers=auth_header, + json={ + "custom_privacy_request_fields": {"first_name": "John"}, + "include_custom_privacy_request_fields": True, + }, + ) + assert 200 == response.status_code + + resp = response.json() + assert len(resp["items"]) == 1 + assert resp["items"][0]["id"] == privacy_request.id + assert ( + resp["items"][0]["custom_privacy_request_fields"] + == privacy_request.get_persisted_custom_privacy_request_fields() + ) + assert resp["items"][0]["policy"]["key"] == privacy_request.policy.key + assert resp["items"][0]["policy"]["name"] == privacy_request.policy.name + + # List fields are not indexed for search. privacy_request_with_custom_array_fields has a list of custom + # privacy request fields but it will not be returned here + response = api_client.post( + url, + headers=auth_header, + json={ + "custom_privacy_request_fields": {"device_id": "device_1"}, + "include_custom_privacy_request_fields": True, + }, + ) + assert 200 == response.status_code + + resp = response.json() + assert len(resp["items"]) == 0 + + # If a list field is sent in as the query param, we ignore the list field in search, it does not become a filter - + response = api_client.post( + url, + headers=auth_header, + json={ + "custom_privacy_request_fields": {"device_id": ["device_1"]}, + "include_custom_privacy_request_fields": True, + }, + ) + assert 200 == response.status_code + + resp = response.json() + assert len(resp["items"]) == 2 + def test_privacy_request_search_by_action( self, api_client: TestClient,