diff --git a/CHANGELOG.md b/CHANGELOG.md index 4164964cde..e99953a31d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ The types of changes are: - Fixed "add" icons on some buttons being wrong size [#4975](https://github.com/ethyca/fides/pull/4975) - Masked "Keyfile credentials" input on integration config form [#4971](https://github.com/ethyca/fides/pull/4971) - Fixed ability to update consent preferences after they've previously been set [#4984](https://github.com/ethyca/fides/pull/4984) +- Fixed validations for privacy declaration taxonomy labels when creating/updating a System [#4982](https://github.com/ethyca/fides/pull/4982) ## [2.38.0](https://github.com/ethyca/fides/compare/2.37.0...2.38.0) diff --git a/src/fides/api/alembic/migrations/versions/a3c173391603_add_property_id_to_privacy_request_model.py b/src/fides/api/alembic/migrations/versions/a3c173391603_add_property_id_to_privacy_request_model.py index 40ae8fe64c..c8058f9086 100644 --- a/src/fides/api/alembic/migrations/versions/a3c173391603_add_property_id_to_privacy_request_model.py +++ b/src/fides/api/alembic/migrations/versions/a3c173391603_add_property_id_to_privacy_request_model.py @@ -6,9 +6,8 @@ """ -from alembic import op import sqlalchemy as sa - +from alembic import op # revision identifiers, used by Alembic. revision = "a3c173391603" diff --git a/src/fides/api/api/v1/endpoints/consent_request_endpoints.py b/src/fides/api/api/v1/endpoints/consent_request_endpoints.py index 11566f3db9..82a27d737d 100644 --- a/src/fides/api/api/v1/endpoints/consent_request_endpoints.py +++ b/src/fides/api/api/v1/endpoints/consent_request_endpoints.py @@ -41,7 +41,7 @@ ) from fides.api.models.property import Property from fides.api.oauth.utils import verify_oauth_client -from fides.api.schemas.messaging.messaging import MessagingMethod, MessagingActionType +from fides.api.schemas.messaging.messaging import MessagingActionType, MessagingMethod from fides.api.schemas.privacy_request import BulkPostPrivacyRequests from fides.api.schemas.privacy_request import Consent as ConsentSchema from fides.api.schemas.privacy_request import ( @@ -56,9 +56,7 @@ ) from fides.api.schemas.redis_cache import Identity from fides.api.service._verification import send_verification_code_to_user -from fides.api.service.messaging.message_dispatch_service import ( - message_send_enabled, -) +from fides.api.service.messaging.message_dispatch_service import message_send_enabled from fides.api.util.api_router import APIRouter from fides.api.util.consent_util import ( get_or_create_fides_user_device_id_provided_identity, diff --git a/src/fides/api/db/system.py b/src/fides/api/db/system.py index a31528460b..4d662fdfb6 100644 --- a/src/fides/api/db/system.py +++ b/src/fides/api/db/system.py @@ -4,12 +4,13 @@ import copy from datetime import datetime -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, Type, Union from deepdiff import DeepDiff from fastapi import HTTPException from fideslang.models import Cookies as CookieSchema from fideslang.models import System as SystemSchema +from fideslang.validation import FidesKey from loguru import logger as log from sqlalchemy import and_, delete, insert, select, update from sqlalchemy.ext.asyncio import AsyncSession @@ -20,6 +21,8 @@ from fides.api.db.crud import create_resource, get_resource, update_resource from fides.api.models.sql_models import ( # type: ignore[attr-defined] Cookies, + DataCategory, + DataSubject, DataUse, PrivacyDeclaration, System, @@ -48,27 +51,54 @@ def get_system(db: Session, fides_key: str) -> System: return system +async def validate_data_labels( + db: AsyncSession, + sql_model: Union[Type[DataUse], Type[DataSubject], Type[DataCategory]], + labels: List[FidesKey], +) -> None: + """ + Given a model and a list of FidesKeys, check that for each Fides Key + there is a model instance with that key and the active attribute set to True. + If any of the keys don't exist or exist but are not active, raise a 400 error + """ + for label in labels: + try: + resource = await get_resource( + sql_model=sql_model, + fides_key=label, + async_session=db, + ) + except NotFoundError: + raise HTTPException( + status_code=HTTP_400_BAD_REQUEST, + detail=f"Invalid privacy declaration referencing unknown {sql_model.__name__} {label}", + ) + + if not resource.active: + raise HTTPException( + status_code=HTTP_400_BAD_REQUEST, + detail=f"Invalid privacy declaration referencing inactive {sql_model.__name__} {label}", + ) + + async def validate_privacy_declarations(db: AsyncSession, system: SystemSchema) -> None: """ Ensure that the `PrivacyDeclaration`s on the provided `System` resource are valid: - that they reference valid `DataUse` records + - that they reference valid `DataCategory` records + - that they reference valid `DataSubject` records - that there are not "duplicate" `PrivacyDeclaration`s as defined by their "logical ID" If not, a `400` is raised """ logical_ids = set() for privacy_declaration in system.privacy_declarations: - try: - await get_resource( - sql_model=DataUse, - fides_key=privacy_declaration.data_use, - async_session=db, - ) - except NotFoundError: - raise HTTPException( - status_code=HTTP_400_BAD_REQUEST, - detail=f"Invalid privacy declaration referencing unknown DataUse {privacy_declaration.data_use}", - ) + await validate_data_labels(db, DataUse, [privacy_declaration.data_use]) + await validate_data_labels( + db, DataCategory, privacy_declaration.data_categories + ) + await validate_data_labels(db, DataSubject, privacy_declaration.data_subjects) + logical_id = privacy_declaration_logical_id(privacy_declaration) if logical_id in logical_ids: raise HTTPException( diff --git a/src/fides/api/schemas/connection_configuration/__init__.py b/src/fides/api/schemas/connection_configuration/__init__.py index ea95fe5f67..c3fa117f66 100644 --- a/src/fides/api/schemas/connection_configuration/__init__.py +++ b/src/fides/api/schemas/connection_configuration/__init__.py @@ -87,7 +87,8 @@ SaaSSchemaFactory as SaaSSchemaFactory, ) from fides.api.schemas.connection_configuration.connection_secrets_scylla import ( - ScyllaSchema, ScyllaDocsSchema, + ScyllaDocsSchema, + ScyllaSchema, ) from fides.api.schemas.connection_configuration.connection_secrets_snowflake import ( SnowflakeDocsSchema as SnowflakeDocsSchema, @@ -175,5 +176,5 @@ def get_connection_secrets_schema( FidesDocsSchema, SovrnDocsSchema, DynamoDBDocsSchema, - ScyllaDocsSchema + ScyllaDocsSchema, ] diff --git a/tests/ctl/core/test_api.py b/tests/ctl/core/test_api.py index ed517f2ec3..31bc741ce8 100644 --- a/tests/ctl/core/test_api.py +++ b/tests/ctl/core/test_api.py @@ -13,6 +13,7 @@ from fideslang.models import PrivacyDeclaration as PrivacyDeclarationSchema from fideslang.models import System as SystemSchema from pytest import MonkeyPatch +from sqlalchemy.orm import Session from starlette.status import ( HTTP_200_OK, HTTP_201_CREATED, @@ -30,7 +31,11 @@ from fides.api.db.system import create_system from fides.api.models.connectionconfig import ConnectionConfig from fides.api.models.datasetconfig import DatasetConfig -from fides.api.models.sql_models import Dataset, PrivacyDeclaration, System +from fides.api.models.sql_models import DataCategory as DataCategoryModel +from fides.api.models.sql_models import Dataset +from fides.api.models.sql_models import DataSubject as DataSubjectModel +from fides.api.models.sql_models import DataUse as DataUseModel +from fides.api.models.sql_models import PrivacyDeclaration, System from fides.api.models.system_history import SystemHistory from fides.api.models.tcf_purpose_overrides import TCFPurposeOverride from fides.api.oauth.roles import OWNER, VIEWER @@ -73,6 +78,66 @@ def get_existing_key(test_config: FidesConfig, resource_type: str) -> int: ).json()[-1]["fides_key"] +@pytest.fixture(scope="function", name="inactive_data_category") +def fixture_inactive_data_category(db: Session) -> typing.Generator: + """ + Fixture that yields an inactive data category and then deletes it for each test run. + """ + fides_key = "inactive_data_category" + data_category = DataCategoryModel.create( + db=db, + data={ + "fides_key": fides_key, + "name": "Inactive Category", + "active": False, + }, + ) + + yield data_category + + data_category.delete(db) + + +@pytest.fixture(scope="function", name="inactive_data_use") +def fixture_inactive_data_use(db: Session) -> typing.Generator: + """ + Fixture that yields an inactive data use and then deletes it for each test run. + """ + fides_key = "inactive_data_use" + data_use = DataUseModel.create( + db=db, + data={ + "fides_key": fides_key, + "name": "Inactive Use", + "active": False, + }, + ) + + yield data_use + + data_use.delete(db) + + +@pytest.fixture(scope="function", name="inactive_data_subject") +def fixture_inactive_data_subject(db: Session) -> typing.Generator: + """ + Fixture that yields an inactive data subject and then deletes it for each test run. + """ + fides_key = "inactive_data_subject" + data_subject = DataSubjectModel.create( + db=db, + data={ + "fides_key": fides_key, + "name": "Inactive Subject", + "active": False, + }, + ) + + yield data_subject + + data_subject.delete(db) + + # Unit Tests @pytest.mark.unit def test_generate_resource_urls_no_id(test_config: FidesConfig) -> None: @@ -589,6 +654,211 @@ def test_system_create_system_already_exists( len(System.all(db)) == 1 ) # ensure our system wasn't created, still only one system + def test_system_create_invalid_data_category( + self, test_config, db, generate_auth_header + ): + auth_header = generate_auth_header(scopes=[SYSTEM_CREATE]) + + result = _api.create( + url=test_config.cli.server_url, + headers=auth_header, + resource_type="system", + json_resource=json.dumps( + { + "fides_key": "system_key", + "system_type": "system", + "privacy_declarations": [ + { + "fides_key": "test", + "data_categories": [ + "user.name.first", + "user.name.nickname", + "user.name.last", + ], # Nickname does not exist + "data_use": "marketing", + } + ], + } + ), + ) + + assert result.status_code == HTTP_400_BAD_REQUEST + assert result.json() == { + "detail": "Invalid privacy declaration referencing unknown DataCategory user.name.nickname" + } + + assert not System.all(db) # ensure our system wasn't created + + def test_system_create_invalid_data_subject( + self, test_config, db, generate_auth_header + ): + auth_header = generate_auth_header(scopes=[SYSTEM_CREATE]) + + result = _api.create( + url=test_config.cli.server_url, + headers=auth_header, + resource_type="system", + json_resource=json.dumps( + { + "fides_key": "system_key", + "system_type": "system", + "privacy_declarations": [ + { + "fides_key": "test", + "data_categories": ["user.name.first", "user.name.last"], + "data_use": "marketing", + "data_subjects": [ + "employee", + "intern", + ], # Intern does not exist + } + ], + } + ), + ) + + assert result.status_code == HTTP_400_BAD_REQUEST + assert result.json() == { + "detail": "Invalid privacy declaration referencing unknown DataSubject intern" + } + + assert not System.all(db) # ensure our system wasn't created + + def test_system_create_invalid_data_use( + self, test_config, db, generate_auth_header + ): + auth_header = generate_auth_header(scopes=[SYSTEM_CREATE]) + + result = _api.create( + url=test_config.cli.server_url, + headers=auth_header, + resource_type="system", + json_resource=json.dumps( + { + "fides_key": "system_key", + "system_type": "system", + "privacy_declarations": [ + { + "fides_key": "test", + "data_categories": ["user.name.first", "user.name.last"], + "data_use": "marketing.measure", # Invalid data use + } + ], + } + ), + ) + + assert result.status_code == HTTP_400_BAD_REQUEST + assert result.json() == { + "detail": "Invalid privacy declaration referencing unknown DataUse marketing.measure" + } + + assert not System.all(db) # ensure our system wasn't created + + def test_system_create_inactive_data_category( + self, test_config, inactive_data_category, db, generate_auth_header + ): + auth_header = generate_auth_header(scopes=[SYSTEM_CREATE]) + + result = _api.create( + url=test_config.cli.server_url, + headers=auth_header, + resource_type="system", + json_resource=json.dumps( + { + "fides_key": "system_key", + "system_type": "system", + "privacy_declarations": [ + { + "fides_key": "test", + "data_categories": [ + "user.name.first", + "user.name.last", + inactive_data_category.fides_key, + ], + "data_use": "marketing", + } + ], + } + ), + ) + + assert result.status_code == HTTP_400_BAD_REQUEST + assert result.json() == { + "detail": f"Invalid privacy declaration referencing inactive DataCategory {inactive_data_category.fides_key}" + } + + assert not System.all(db) # ensure our system wasn't created + + def test_system_create_inactive_data_use( + self, test_config, inactive_data_use, db, generate_auth_header + ): + auth_header = generate_auth_header(scopes=[SYSTEM_CREATE]) + + result = _api.create( + url=test_config.cli.server_url, + headers=auth_header, + resource_type="system", + json_resource=json.dumps( + { + "fides_key": "system_key", + "system_type": "system", + "privacy_declarations": [ + { + "fides_key": "test", + "data_categories": [ + "user.name.first", + "user.name.last", + ], + "data_use": inactive_data_use.fides_key, + } + ], + } + ), + ) + + assert result.status_code == HTTP_400_BAD_REQUEST + assert result.json() == { + "detail": f"Invalid privacy declaration referencing inactive DataUse {inactive_data_use.fides_key}" + } + + assert not System.all(db) # ensure our system wasn't created + + def test_system_create_inactive_data_subject( + self, test_config, inactive_data_subject, db, generate_auth_header + ): + auth_header = generate_auth_header(scopes=[SYSTEM_CREATE]) + + result = _api.create( + url=test_config.cli.server_url, + headers=auth_header, + resource_type="system", + json_resource=json.dumps( + { + "fides_key": "system_key", + "system_type": "system", + "privacy_declarations": [ + { + "fides_key": "test", + "data_categories": [ + "user.name.first", + "user.name.last", + ], + "data_use": "marketing", + "data_subjects": [inactive_data_subject.fides_key], + } + ], + } + ), + ) + + assert result.status_code == HTTP_400_BAD_REQUEST + assert result.json() == { + "detail": f"Invalid privacy declaration referencing inactive DataSubject {inactive_data_subject.fides_key}" + } + + assert not System.all(db) # ensure our system wasn't created + async def test_system_create( self, generate_auth_header, db, test_config, system_create_request_body ): @@ -1312,10 +1582,151 @@ def test_system_update_privacy_declaration_invalid_data_use( json_resource=system_update_request_body.json(exclude_none=True), ) assert result.status_code == HTTP_400_BAD_REQUEST + assert result.json() == { + "detail": "Invalid privacy declaration referencing unknown DataUse invalid_data_use" + } + + # assert the system's privacy declaration has not been updated + db.refresh(system) + assert system.privacy_declarations[0].data_use == "marketing.advertising" + + def test_system_update_privacy_declaration_invalid_data_category( + self, + system, + test_config, + system_update_request_body, + generate_role_header, + db, + ): + auth_header = generate_role_header(roles=[OWNER]) + system_update_request_body.privacy_declarations[0].data_categories = [ + "invalid_data_category" + ] + result = _api.update( + url=test_config.cli.server_url, + headers=auth_header, + resource_type="system", + json_resource=system_update_request_body.json(exclude_none=True), + ) + assert result.status_code == HTTP_400_BAD_REQUEST + assert result.json() == { + "detail": "Invalid privacy declaration referencing unknown DataCategory invalid_data_category" + } + # assert the system's privacy declaration has not been updated + db.refresh(system) + assert system.privacy_declarations[0].data_categories == [ + "user.device.cookie_id" + ] + + def test_system_update_privacy_declaration_invalid_data_subject( + self, + system, + test_config, + system_update_request_body, + generate_role_header, + db, + ): + auth_header = generate_role_header(roles=[OWNER]) + system_update_request_body.privacy_declarations[0].data_subjects = [ + "invalid_data_subject" + ] + result = _api.update( + url=test_config.cli.server_url, + headers=auth_header, + resource_type="system", + json_resource=system_update_request_body.json(exclude_none=True), + ) + assert result.status_code == HTTP_400_BAD_REQUEST + assert result.json() == { + "detail": "Invalid privacy declaration referencing unknown DataSubject invalid_data_subject" + } + # assert the system's privacy declaration has not been updated + db.refresh(system) + assert system.privacy_declarations[0].data_subjects == ["customer"] + + def test_system_update_privacy_declaration_inactive_data_use( + self, + system, + test_config, + system_update_request_body, + inactive_data_use, + generate_role_header, + db, + ): + auth_header = generate_role_header(roles=[OWNER]) + system_update_request_body.privacy_declarations[0].data_use = ( + inactive_data_use.fides_key + ) + result = _api.update( + url=test_config.cli.server_url, + headers=auth_header, + resource_type="system", + json_resource=system_update_request_body.json(exclude_none=True), + ) + assert result.status_code == HTTP_400_BAD_REQUEST + assert result.json() == { + "detail": f"Invalid privacy declaration referencing inactive DataUse {inactive_data_use.fides_key}" + } # assert the system's privacy declaration has not been updated db.refresh(system) assert system.privacy_declarations[0].data_use == "marketing.advertising" + def test_system_update_privacy_declaration_inactive_data_category( + self, + system, + test_config, + system_update_request_body, + inactive_data_category, + generate_role_header, + db, + ): + auth_header = generate_role_header(roles=[OWNER]) + system_update_request_body.privacy_declarations[0].data_categories = [ + inactive_data_category.fides_key + ] + result = _api.update( + url=test_config.cli.server_url, + headers=auth_header, + resource_type="system", + json_resource=system_update_request_body.json(exclude_none=True), + ) + assert result.status_code == HTTP_400_BAD_REQUEST + assert result.json() == { + "detail": f"Invalid privacy declaration referencing inactive DataCategory {inactive_data_category.fides_key}" + } + # assert the system's privacy declaration has not been updated + db.refresh(system) + assert system.privacy_declarations[0].data_categories == [ + "user.device.cookie_id" + ] + + def test_system_update_privacy_declaration_inactive_data_subject( + self, + system, + test_config, + system_update_request_body, + inactive_data_subject, + generate_role_header, + db, + ): + auth_header = generate_role_header(roles=[OWNER]) + system_update_request_body.privacy_declarations[0].data_subjects = [ + inactive_data_subject.fides_key + ] + result = _api.update( + url=test_config.cli.server_url, + headers=auth_header, + resource_type="system", + json_resource=system_update_request_body.json(exclude_none=True), + ) + assert result.status_code == HTTP_400_BAD_REQUEST + assert result.json() == { + "detail": f"Invalid privacy declaration referencing inactive DataSubject {inactive_data_subject.fides_key}" + } + # assert the system's privacy declaration has not been updated + db.refresh(system) + assert system.privacy_declarations[0].data_subjects == ["customer"] + def test_system_update_privacy_declaration_invalid_duplicate( self, system,