Combine Execution and Audit Logs in Request Status Endpoint [#1024] (#…

…1068) * Update the request status endpoint, so when the verbose query param is used and execution logs are embedded, also return audit logs. Execution Logs are created at the collection level while audit logs are for the overall privacy request level, so most fields returned for audit logs are None. Logs are also grouped at the dataset level here, so give the audit logs a fake dataset name for display purposes, for example, "Request approved". * Update CHANGELOG and update docs to reflect that audit logs are included in a verbose request status response.
ethyca · Aug 15, 2022 · f848ecb · f848ecb
1 parent 3174d64
commit f848ecb
Show file tree

Hide file tree

Showing 6 changed files with 154 additions and 27 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,10 +23,16 @@ The types of changes are:
 
 * Access support for Datadog Logs [#1060](https://github.com/ethyca/fidesops/pull/1060)
 ""
+
 ### Fixed
 
 * HTTP headers are now preserved in requests generated from SaaS connector pagination [#1069](https://github.com/ethyca/fidesops/pull/1069)
 
+### Changed
+
+* Update request status endpoint to return both audit and execution logs [#1068] https://github.com/ethyca/fidesops/pull/1068/
+
+
 ## [1.7.0](https://github.com/ethyca/fidesops/compare/1.6.3...1.7.0)
 
 ### Added

diff --git a/docs/fidesops/docs/guides/reporting.md b/docs/fidesops/docs/guides/reporting.md
@@ -91,13 +91,18 @@ Use the `verbose` query param to see more details about individual collections v
 with individual statuses. Individual collection statuses include `in_processing`, `retrying`, `complete` or `error`.
 You may see multiple logs for each collection as they reach different steps in the lifecycle.  
 
-`verbose` will embed a “results” key in the response, with execution logs grouped by dataset name.  In the example below,
+`verbose` will embed a “results” key in the response, with both audit logs containing information about the overall request,
+as well as execution logs grouped by dataset name.  
+
+In the example below,
 we have two datasets: `my-mongo-db` and `my-postgres-db`. There are two execution logs for `my-mongo-db` (when the `flights` 
 collection is starting execution and when the `flights` collection has finished) and two execution
 logs for `my-postgres-db` (when the `order` collection is starting and finishing execution).  `fields_affected` are the fields
 that were potentially returned or masked based on the Rules you've specified on the Policy. The embedded execution logs 
 are automatically truncated at 50 logs, so to view the entire list of logs, visit the execution logs endpoint separately.
 
+There are also "Request approved" and "Request finished" audit logs included in the response.
+
 ```json title="<code>GET api/v1/privacy-request?request_id={privacy_request_id}&verbose=True</code>"
 {
     "items": [
@@ -109,6 +114,17 @@ are automatically truncated at 50 logs, so to view the entire list of logs, visi
             "status": "complete",
             "external_id": null,
             "results": {
+                 "Request approved": [
+                    {
+                        "collection_name": null,
+                        "fields_affected": null,
+                        "message": "",
+                        "action_type": null,
+                        "status": "approved",
+                        "updated_at": "2022-08-11T14:03:37.679732+00:00",
+                        "user_id": "system"
+                    }
+                ],
                 "my-mongo-db": [
                     {
                         "collection_name": "flights",
@@ -132,7 +148,8 @@ are automatically truncated at 50 logs, so to view the entire list of logs, visi
                         "message": "success",
                         "action_type": "access",
                         "status": "complete",
-                        "updated_at": "2022-02-28T16:38:04.727094+00:00"
+                        "updated_at": "2022-02-28T16:38:04.727094+00:00",
+                        "user_id": null
                     }
                 ],
                 "my-postgres-db": [
@@ -158,10 +175,22 @@ are automatically truncated at 50 logs, so to view the entire list of logs, visi
                         "message": "success",
                         "action_type": "access",
                         "status": "complete",
-                        "updated_at": "2022-02-28T16:39:04.668513+00:00"
+                        "updated_at": "2022-02-28T16:39:04.668513+00:00",
+                        "user_id": null
                     }
                 ]
-            }
+            },
+            "Request finished": [
+                {
+                    "collection_name": null,
+                    "fields_affected": null,
+                    "message": "",
+                    "action_type": null,
+                    "status": "finished",
+                    "updated_at": "2022-08-11T14:04:29.611878+00:00",
+                    "user_id": "system"
+                }
+            ]
         }
     ],
     "total": 1,

diff --git a/src/fidesops/ops/api/v1/endpoints/privacy_request_endpoints.py b/src/fidesops/ops/api/v1/endpoints/privacy_request_endpoints.py
@@ -7,6 +7,7 @@
 from datetime import datetime
 from typing import Any, Callable, DefaultDict, Dict, List, Optional, Set, Union
 
+import sqlalchemy
 from fastapi import Body, Depends, HTTPException, Security
 from fastapi.params import Query as FastAPIQuery
 from fastapi_pagination import Page, Params
@@ -15,7 +16,7 @@
 from fideslib.models.audit_log import AuditLog, AuditLogAction
 from fideslib.models.client import ClientDetail
 from pydantic import conlist
-from sqlalchemy import column
+from sqlalchemy import cast, column, null
 from sqlalchemy.orm import Query, Session
 from starlette.responses import StreamingResponse
 from starlette.status import (
@@ -283,27 +284,64 @@ def privacy_request_csv_download(
     return response
 
 
-def execution_logs_by_dataset_name(
+def execution_and_audit_logs_by_dataset_name(
     self: PrivacyRequest,
 ) -> DefaultDict[str, List["ExecutionLog"]]:
     """
-    Returns a truncated list of ExecutionLogs for each dataset name associated with
-    a PrivacyRequest. Added as a conditional property to the PrivacyRequest class at runtime to
-    show optionally embedded execution logs.
+    Returns a combined mapping of execution and audit logs for the given privacy request.
+
+    Audit Logs are for the entire privacy request as a whole, while execution logs are created for specific collections.
+    Logs here are grouped by dataset, but if it is an audit log, it is just given a fake dataset name, here "Request + status"
+    ExecutionLogs for each dataset are truncated.
+
+    Added as a conditional property to the PrivacyRequest class at runtime to
+    show optionally embedded execution and audit logs.
 
     An example response might include your execution logs from your mongo db in one group, and execution logs from
-    your postgres db in a different group.
+    your postgres db in a different group, plus audit logs for when the request was approved and denied.
     """
+    db: Session = Session.object_session(self)
+    all_logs: DefaultDict[str, List[Union["AuditLog", "ExecutionLog"]]] = defaultdict(
+        list
+    )
 
-    execution_logs: DefaultDict[str, List["ExecutionLog"]] = defaultdict(list)
-
-    for log in self.execution_logs.order_by(
-        ExecutionLog.dataset_name, ExecutionLog.updated_at.asc()
-    ):
-        if len(execution_logs[log.dataset_name]) > EMBEDDED_EXECUTION_LOG_LIMIT - 1:
+    execution_log_query: Query = db.query(
+        ExecutionLog.id,
+        ExecutionLog.created_at,
+        ExecutionLog.updated_at,
+        ExecutionLog.message,
+        cast(ExecutionLog.status, sqlalchemy.String).label("status"),
+        ExecutionLog.privacy_request_id,
+        ExecutionLog.dataset_name,
+        ExecutionLog.collection_name,
+        ExecutionLog.fields_affected,
+        ExecutionLog.action_type,
+        null().label("user_id"),
+    ).filter(ExecutionLog.privacy_request_id == self.id)
+
+    audit_log_query: Query = db.query(
+        AuditLog.id,
+        AuditLog.created_at,
+        AuditLog.updated_at,
+        AuditLog.message,
+        cast(AuditLog.action.label("status"), sqlalchemy.String).label("status"),
+        AuditLog.privacy_request_id,
+        null().label("dataset_name"),
+        null().label("collection_name"),
+        null().label("fields_affected"),
+        null().label("action_type"),
+        AuditLog.user_id,
+    ).filter(AuditLog.privacy_request_id == self.id)
+
+    combined: Query = execution_log_query.union_all(audit_log_query)
+
+    for log in combined.order_by(ExecutionLog.updated_at.asc()):
+        dataset_name: str = log.dataset_name or f"Request {log.status}"
+
+        if len(all_logs[dataset_name]) > EMBEDDED_EXECUTION_LOG_LIMIT - 1:
             continue
-        execution_logs[log.dataset_name].append(log)
-    return execution_logs
+        all_logs[dataset_name].append(log)
+    return all_logs
 
 
 def _filter_privacy_request_queryset(
@@ -509,12 +547,12 @@ def get_request_status(
 
     # Conditionally embed execution log details in the response.
     if verbose:
-        logger.info("Finding execution log details")
-        PrivacyRequest.execution_logs_by_dataset = property(
-            execution_logs_by_dataset_name
+        logger.info("Finding execution and audit log details")
+        PrivacyRequest.execution_and_audit_logs_by_dataset = property(
+            execution_and_audit_logs_by_dataset_name
         )
     else:
-        PrivacyRequest.execution_logs_by_dataset = property(lambda self: None)
+        PrivacyRequest.execution_and_audit_logs_by_dataset = property(lambda self: None)
 
     paginated = paginate(query, params)
     if include_identities:

diff --git a/src/fidesops/ops/schemas/privacy_request.py b/src/fidesops/ops/schemas/privacy_request.py
@@ -1,7 +1,8 @@
 from datetime import datetime
 from enum import Enum as EnumType
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Union
 
+from fideslib.models.audit_log import AuditLogAction
 from fideslib.oauth.schemas.user import PrivacyRequestReviewer
 from pydantic import Field, validator
 
@@ -107,6 +108,25 @@ class ExecutionLogDetailResponse(ExecutionLogResponse):
     dataset_name: Optional[str]
 
 
+class ExecutionAndAuditLogResponse(BaseSchema):
+    """Schema for the combined ExecutionLogs and Audit Logs
+    associated with a PrivacyRequest"""
+
+    collection_name: Optional[str]
+    fields_affected: Optional[List[FieldsAffectedResponse]]
+    message: Optional[str]
+    action_type: Optional[ActionType]
+    status: Optional[Union[ExecutionLogStatus, AuditLogAction]]
+    updated_at: Optional[datetime]
+    user_id: Optional[str]
+
+    class Config:
+        """Set orm_mode and allow population by field name"""
+
+        use_enum_values = True
+        allow_population_by_field_name = True
+
+
 class RowCountRequest(BaseSchema):
     """Schema for a user to manually confirm data erased for a collection"""
 
@@ -148,11 +168,12 @@ class Config:
 
 
 class PrivacyRequestVerboseResponse(PrivacyRequestResponse):
-    """The schema for the more detailed PrivacyRequest response containing detailed execution logs."""
+    """The schema for the more detailed PrivacyRequest response containing both
+    detailed execution logs and audit logs."""
 
-    execution_logs_by_dataset: Dict[str, List[ExecutionLogResponse]] = Field(
-        alias="results"
-    )
+    execution_and_audit_logs_by_dataset: Dict[
+        str, List[ExecutionAndAuditLogResponse]
+    ] = Field(alias="results")
 
     class Config:
         """Allow the results field to be populated by the 'PrivacyRequest.execution_logs_by_dataset' property"""

diff --git a/tests/ops/api/v1/endpoints/test_privacy_request_endpoints.py b/tests/ops/api/v1/endpoints/test_privacy_request_endpoints.py
@@ -22,6 +22,7 @@
 
 from fidesops.ops.api.v1.endpoints.privacy_request_endpoints import (
     EMBEDDED_EXECUTION_LOG_LIMIT,
+    execution_and_audit_logs_by_dataset_name,
     validate_manual_input,
 )
 from fidesops.ops.api.v1.scope_registry import (
@@ -58,6 +59,7 @@
 from fidesops.ops.schemas.dataset import DryRunDatasetResponse
 from fidesops.ops.schemas.masking.masking_secrets import SecretType
 from fidesops.ops.schemas.policy import PolicyResponse
+from fidesops.ops.schemas.privacy_request import ExecutionAndAuditLogResponse
 from fidesops.ops.schemas.redis_cache import PrivacyRequestIdentity
 from fidesops.ops.util.cache import (
     get_encryption_cache_key,
@@ -955,6 +957,7 @@ def test_verbose_privacy_requests(
         api_client: TestClient,
         generate_auth_header,
         privacy_request: PrivacyRequest,
+        audit_log,
         postgres_execution_log,
         second_postgres_execution_log,
         mongo_execution_log,
@@ -1000,6 +1003,17 @@ def test_verbose_privacy_requests(
                     "stopped_collection_details": None,
                     "resume_endpoint": None,
                     "results": {
+                        "Request approved": [
+                            {
+                                "collection_name": None,
+                                "fields_affected": None,
+                                "message": "",
+                                "action_type": None,
+                                "status": "approved",
+                                "updated_at": stringify_date(audit_log.updated_at),
+                                "user_id": "system",
+                            }
+                        ],
                         "my-mongo-db": [
                             {
                                 "collection_name": "orders",
@@ -1016,6 +1030,7 @@ def test_verbose_privacy_requests(
                                 "updated_at": stringify_date(
                                     mongo_execution_log.updated_at
                                 ),
+                                "user_id": None,
                             }
                         ],
                         "my-postgres-db": [
@@ -1034,6 +1049,7 @@ def test_verbose_privacy_requests(
                                 "updated_at": stringify_date(
                                     postgres_execution_log.updated_at
                                 ),
+                                "user_id": None,
                             },
                             {
                                 "collection_name": "address",
@@ -1059,6 +1075,7 @@ def test_verbose_privacy_requests(
                                 "updated_at": stringify_date(
                                     second_postgres_execution_log.updated_at
                                 ),
+                                "user_id": None,
                             },
                         ],
                     },

diff --git a/tests/ops/fixtures/application_fixtures.py b/tests/ops/fixtures/application_fixtures.py
@@ -9,6 +9,7 @@
 import yaml
 from faker import Faker
 from fideslib.core.config import load_file, load_toml
+from fideslib.models.audit_log import AuditLog, AuditLogAction
 from fideslib.models.client import ClientDetail
 from fideslib.models.fides_user import FidesUser
 from fideslib.models.fides_user_permissions import FidesUserPermissions
@@ -797,6 +798,21 @@ def privacy_request(db: Session, policy: Policy) -> PrivacyRequest:
     privacy_request.delete(db)
 
 
+@pytest.fixture(scope="function")
+def audit_log(db: Session, privacy_request) -> PrivacyRequest:
+    audit_log = AuditLog.create(
+        db=db,
+        data={
+            "user_id": "system",
+            "privacy_request_id": privacy_request.id,
+            "action": AuditLogAction.approved,
+            "message": "",
+        },
+    )
+    yield audit_log
+    audit_log.delete(db)
+
+
 @pytest.fixture(scope="function")
 def privacy_request_status_pending(db: Session, policy: Policy) -> PrivacyRequest:
     privacy_request = _create_privacy_request_for_policy(