Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip Indexing Custom Privacy Request Field Arrays #5127

Merged
merged 6 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ The types of changes are:
### Fixed
- Fixed typo in the BigQuery integration description [#5120](https://github.com/ethyca/fides/pull/5120)
- Fixed default values of Experience config toggles [#5123](https://github.com/ethyca/fides/pull/5123)
- Skip indexing Custom Privacy Request Field array values [#5127](https://github.com/ethyca/fides/pull/5127)

### Fixed
- Fixed not being able to edit a monitor from scheduled to not scheduled [#5114](https://github.com/ethyca/fides/pull/5114)
Expand Down
3 changes: 3 additions & 0 deletions src/fides/api/api/v1/endpoints/privacy_request_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,13 +466,16 @@ def _filter_privacy_request_queryset(
query = query.filter(PrivacyRequest.id.in_(identities_query))

if custom_privacy_request_fields:
# Note that if Custom Privacy Request Field values were arrays, they are not
# indexed for searching and not discoverable here
custom_field_conditions = [
(CustomPrivacyRequestField.field_name == field_name)
& (
CustomPrivacyRequestField.hashed_value
== CustomPrivacyRequestField.hash_value(value)
)
for field_name, value in custom_privacy_request_fields.items()
if not isinstance(value, list)
pattisdr marked this conversation as resolved.
Show resolved Hide resolved
]

custom_fields_query = select(
Expand Down
6 changes: 4 additions & 2 deletions src/fides/api/models/privacy_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -1446,7 +1446,7 @@ def hash_value(
cls,
value: MultiValue,
encoding: str = "UTF-8",
) -> Union[str, List[str]]:
) -> Optional[Union[str, List[str]]]:
"""Utility function to hash the value(s) with a generated salt"""

def hash_single_value(value: Union[str, int]) -> str:
Expand All @@ -1459,7 +1459,9 @@ def hash_single_value(value: Union[str, int]) -> str:
return hashed_value

if isinstance(value, list):
return [hash_single_value(item) for item in value]
# Skip hashing lists: this avoids us hashing and later indexing potentially large values and our index
# is not useful for array search anyway
return None
pattisdr marked this conversation as resolved.
Show resolved Hide resolved
return hash_single_value(value)


Expand Down
30 changes: 30 additions & 0 deletions tests/fixtures/application_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -1752,6 +1752,36 @@ def privacy_request_with_custom_fields(db: Session, policy: Policy) -> PrivacyRe
privacy_request.delete(db)


@pytest.fixture(scope="function")
def privacy_request_with_custom_array_fields(
db: Session, policy: Policy
) -> PrivacyRequest:
privacy_request = PrivacyRequest.create(
db=db,
data={
"external_id": f"ext-{str(uuid4())}",
"started_processing_at": datetime(2021, 10, 1),
"finished_processing_at": datetime(2021, 10, 3),
"requested_at": datetime(2021, 10, 1),
"status": PrivacyRequestStatus.complete,
"origin": f"https://example.com/",
"policy_id": policy.id,
"client_id": policy.client_id,
},
)
privacy_request.persist_custom_privacy_request_fields(
db=db,
custom_privacy_request_fields={
"device_ids": CustomPrivacyRequestField(
label="Device Ids", value=["device_1", "device_2", "device_3"]
),
},
)
privacy_request.save(db)
yield privacy_request
privacy_request.delete(db)


@pytest.fixture(scope="function")
def privacy_request_with_email_identity(db: Session, policy: Policy) -> PrivacyRequest:
privacy_request = PrivacyRequest.create(
Expand Down
16 changes: 13 additions & 3 deletions tests/ops/api/v1/endpoints/test_consent_request_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,8 @@ def test_consent_request_with_custom_privacy_request_fields(
data = {
"identity": {"email": "test@example.com"},
"custom_privacy_request_fields": {
"first_name": {"label": "First name", "value": "John"}
"first_name": {"label": "First name", "value": "John"},
"pets": {"label": "My Pets", "value": ["Dog", "Cat", "Snake"]},
},
}
response = api_client.post(url, json=data)
Expand All @@ -397,8 +398,17 @@ def test_consent_request_with_custom_privacy_request_fields(
conditions=(
CustomPrivacyRequestField.consent_request_id == consent_request_id
),
).first()
assert custom_privacy_request_field
).all()
for field in custom_privacy_request_field:
assert (
field.encrypted_value["value"]
== data["custom_privacy_request_fields"][field.field_name]["value"]
)
if isinstance(field.encrypted_value["value"], list):
# We don't hash list fields
assert field.hashed_value is None
else:
assert field.hashed_value is not None

@pytest.mark.usefixtures("disable_consent_identity_verification")
def test_consent_request_with_external_id(self, db, api_client, url):
Expand Down
69 changes: 67 additions & 2 deletions tests/ops/api/v1/endpoints/test_privacy_request_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
import csv
import io
import json
import string
from datetime import datetime, timedelta
from random import choice, randint
from random import randint
from typing import List
from unittest import mock
from uuid import uuid4
Expand Down Expand Up @@ -292,6 +291,13 @@ def test_create_privacy_request_stores_multivalue_custom_fields(
pr.get_persisted_custom_privacy_request_fields()
)
assert persisted_custom_privacy_request_fields == TEST_CUSTOM_FIELDS

for field in pr.custom_fields:
if isinstance(field.encrypted_value["value"], list):
# We don't hash list fields
assert field.hashed_value is None
else:
assert field.hashed_value is not None
pattisdr marked this conversation as resolved.
Show resolved Hide resolved
pr.delete(db=db)
assert run_access_request_mock.called

Expand Down Expand Up @@ -2118,6 +2124,65 @@ def test_privacy_request_search_with_custom_fields(
assert resp["items"][0]["id"] == privacy_request.id
assert resp["items"][0].get("custom_privacy_request_fields") is None

def test_privacy_request_search_by_custom_fields_and_array_fields(
self,
api_client: TestClient,
url,
generate_auth_header,
privacy_request_with_custom_fields,
privacy_request_with_custom_array_fields,
):
privacy_request = privacy_request_with_custom_fields
auth_header = generate_auth_header(scopes=[PRIVACY_REQUEST_READ])
response = api_client.post(
url,
headers=auth_header,
json={
"custom_privacy_request_fields": {"first_name": "John"},
"include_custom_privacy_request_fields": True,
},
)
assert 200 == response.status_code

resp = response.json()
assert len(resp["items"]) == 1
assert resp["items"][0]["id"] == privacy_request.id
assert (
resp["items"][0]["custom_privacy_request_fields"]
== privacy_request.get_persisted_custom_privacy_request_fields()
)
assert resp["items"][0]["policy"]["key"] == privacy_request.policy.key
assert resp["items"][0]["policy"]["name"] == privacy_request.policy.name

# List fields are not indexed for search. privacy_request_with_custom_array_fields has a list of custom
# privacy request fields but it will not be returned here
response = api_client.post(
url,
headers=auth_header,
json={
"custom_privacy_request_fields": {"device_id": "device_1"},
"include_custom_privacy_request_fields": True,
},
)
assert 200 == response.status_code

resp = response.json()
assert len(resp["items"]) == 0

# If a list field is sent in as the query param, we ignore the list field in search, it does not become a filter -
response = api_client.post(
url,
headers=auth_header,
json={
"custom_privacy_request_fields": {"device_id": ["device_1"]},
"include_custom_privacy_request_fields": True,
},
)
assert 200 == response.status_code

resp = response.json()
assert len(resp["items"]) == 2

def test_privacy_request_search_by_action(
self,
api_client: TestClient,
Expand Down