Skip to content

Commit

Permalink
Combine Execution and Audit Logs in Request Status Endpoint [#1024] (#…
Browse files Browse the repository at this point in the history
…1068)

* Update the request status endpoint, so when the verbose query param is used and execution logs are embedded, also return audit logs.

Execution Logs are created at the collection level while audit logs are for the overall privacy request level, so most fields returned for audit logs are None.
Logs are also grouped at the dataset level here, so give the audit logs a fake dataset name for display purposes, for example, "Request approved".

* Update CHANGELOG and update docs to reflect that audit logs are included in a verbose request status response.
  • Loading branch information
pattisdr authored Aug 15, 2022
1 parent 3174d64 commit f848ecb
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 27 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,16 @@ The types of changes are:

* Access support for Datadog Logs [#1060](https://github.com/ethyca/fidesops/pull/1060)
""

### Fixed

* HTTP headers are now preserved in requests generated from SaaS connector pagination [#1069](https://github.com/ethyca/fidesops/pull/1069)

### Changed

* Update request status endpoint to return both audit and execution logs [#1068] https://github.com/ethyca/fidesops/pull/1068/


## [1.7.0](https://github.com/ethyca/fidesops/compare/1.6.3...1.7.0)

### Added
Expand Down
37 changes: 33 additions & 4 deletions docs/fidesops/docs/guides/reporting.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,18 @@ Use the `verbose` query param to see more details about individual collections v
with individual statuses. Individual collection statuses include `in_processing`, `retrying`, `complete` or `error`.
You may see multiple logs for each collection as they reach different steps in the lifecycle.

`verbose` will embed a “results” key in the response, with execution logs grouped by dataset name. In the example below,
`verbose` will embed a “results” key in the response, with both audit logs containing information about the overall request,
as well as execution logs grouped by dataset name.

In the example below,
we have two datasets: `my-mongo-db` and `my-postgres-db`. There are two execution logs for `my-mongo-db` (when the `flights`
collection is starting execution and when the `flights` collection has finished) and two execution
logs for `my-postgres-db` (when the `order` collection is starting and finishing execution). `fields_affected` are the fields
that were potentially returned or masked based on the Rules you've specified on the Policy. The embedded execution logs
are automatically truncated at 50 logs, so to view the entire list of logs, visit the execution logs endpoint separately.

There are also "Request approved" and "Request finished" audit logs included in the response.

```json title="<code>GET api/v1/privacy-request?request_id={privacy_request_id}&verbose=True</code>"
{
"items": [
Expand All @@ -109,6 +114,17 @@ are automatically truncated at 50 logs, so to view the entire list of logs, visi
"status": "complete",
"external_id": null,
"results": {
"Request approved": [
{
"collection_name": null,
"fields_affected": null,
"message": "",
"action_type": null,
"status": "approved",
"updated_at": "2022-08-11T14:03:37.679732+00:00",
"user_id": "system"
}
],
"my-mongo-db": [
{
"collection_name": "flights",
Expand All @@ -132,7 +148,8 @@ are automatically truncated at 50 logs, so to view the entire list of logs, visi
"message": "success",
"action_type": "access",
"status": "complete",
"updated_at": "2022-02-28T16:38:04.727094+00:00"
"updated_at": "2022-02-28T16:38:04.727094+00:00",
"user_id": null
}
],
"my-postgres-db": [
Expand All @@ -158,10 +175,22 @@ are automatically truncated at 50 logs, so to view the entire list of logs, visi
"message": "success",
"action_type": "access",
"status": "complete",
"updated_at": "2022-02-28T16:39:04.668513+00:00"
"updated_at": "2022-02-28T16:39:04.668513+00:00",
"user_id": null
}
]
}
},
"Request finished": [
{
"collection_name": null,
"fields_affected": null,
"message": "",
"action_type": null,
"status": "finished",
"updated_at": "2022-08-11T14:04:29.611878+00:00",
"user_id": "system"
}
]
}
],
"total": 1,
Expand Down
74 changes: 56 additions & 18 deletions src/fidesops/ops/api/v1/endpoints/privacy_request_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from datetime import datetime
from typing import Any, Callable, DefaultDict, Dict, List, Optional, Set, Union

import sqlalchemy
from fastapi import Body, Depends, HTTPException, Security
from fastapi.params import Query as FastAPIQuery
from fastapi_pagination import Page, Params
Expand All @@ -15,7 +16,7 @@
from fideslib.models.audit_log import AuditLog, AuditLogAction
from fideslib.models.client import ClientDetail
from pydantic import conlist
from sqlalchemy import column
from sqlalchemy import cast, column, null
from sqlalchemy.orm import Query, Session
from starlette.responses import StreamingResponse
from starlette.status import (
Expand Down Expand Up @@ -283,27 +284,64 @@ def privacy_request_csv_download(
return response


def execution_logs_by_dataset_name(
def execution_and_audit_logs_by_dataset_name(
self: PrivacyRequest,
) -> DefaultDict[str, List["ExecutionLog"]]:
"""
Returns a truncated list of ExecutionLogs for each dataset name associated with
a PrivacyRequest. Added as a conditional property to the PrivacyRequest class at runtime to
show optionally embedded execution logs.
Returns a combined mapping of execution and audit logs for the given privacy request.
Audit Logs are for the entire privacy request as a whole, while execution logs are created for specific collections.
Logs here are grouped by dataset, but if it is an audit log, it is just given a fake dataset name, here "Request + status"
ExecutionLogs for each dataset are truncated.
Added as a conditional property to the PrivacyRequest class at runtime to
show optionally embedded execution and audit logs.
An example response might include your execution logs from your mongo db in one group, and execution logs from
your postgres db in a different group.
your postgres db in a different group, plus audit logs for when the request was approved and denied.
"""
db: Session = Session.object_session(self)
all_logs: DefaultDict[str, List[Union["AuditLog", "ExecutionLog"]]] = defaultdict(
list
)

execution_logs: DefaultDict[str, List["ExecutionLog"]] = defaultdict(list)

for log in self.execution_logs.order_by(
ExecutionLog.dataset_name, ExecutionLog.updated_at.asc()
):
if len(execution_logs[log.dataset_name]) > EMBEDDED_EXECUTION_LOG_LIMIT - 1:
execution_log_query: Query = db.query(
ExecutionLog.id,
ExecutionLog.created_at,
ExecutionLog.updated_at,
ExecutionLog.message,
cast(ExecutionLog.status, sqlalchemy.String).label("status"),
ExecutionLog.privacy_request_id,
ExecutionLog.dataset_name,
ExecutionLog.collection_name,
ExecutionLog.fields_affected,
ExecutionLog.action_type,
null().label("user_id"),
).filter(ExecutionLog.privacy_request_id == self.id)

audit_log_query: Query = db.query(
AuditLog.id,
AuditLog.created_at,
AuditLog.updated_at,
AuditLog.message,
cast(AuditLog.action.label("status"), sqlalchemy.String).label("status"),
AuditLog.privacy_request_id,
null().label("dataset_name"),
null().label("collection_name"),
null().label("fields_affected"),
null().label("action_type"),
AuditLog.user_id,
).filter(AuditLog.privacy_request_id == self.id)

combined: Query = execution_log_query.union_all(audit_log_query)

for log in combined.order_by(ExecutionLog.updated_at.asc()):
dataset_name: str = log.dataset_name or f"Request {log.status}"

if len(all_logs[dataset_name]) > EMBEDDED_EXECUTION_LOG_LIMIT - 1:
continue
execution_logs[log.dataset_name].append(log)
return execution_logs
all_logs[dataset_name].append(log)
return all_logs


def _filter_privacy_request_queryset(
Expand Down Expand Up @@ -509,12 +547,12 @@ def get_request_status(

# Conditionally embed execution log details in the response.
if verbose:
logger.info("Finding execution log details")
PrivacyRequest.execution_logs_by_dataset = property(
execution_logs_by_dataset_name
logger.info("Finding execution and audit log details")
PrivacyRequest.execution_and_audit_logs_by_dataset = property(
execution_and_audit_logs_by_dataset_name
)
else:
PrivacyRequest.execution_logs_by_dataset = property(lambda self: None)
PrivacyRequest.execution_and_audit_logs_by_dataset = property(lambda self: None)

paginated = paginate(query, params)
if include_identities:
Expand Down
31 changes: 26 additions & 5 deletions src/fidesops/ops/schemas/privacy_request.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from datetime import datetime
from enum import Enum as EnumType
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Union

from fideslib.models.audit_log import AuditLogAction
from fideslib.oauth.schemas.user import PrivacyRequestReviewer
from pydantic import Field, validator

Expand Down Expand Up @@ -107,6 +108,25 @@ class ExecutionLogDetailResponse(ExecutionLogResponse):
dataset_name: Optional[str]


class ExecutionAndAuditLogResponse(BaseSchema):
"""Schema for the combined ExecutionLogs and Audit Logs
associated with a PrivacyRequest"""

collection_name: Optional[str]
fields_affected: Optional[List[FieldsAffectedResponse]]
message: Optional[str]
action_type: Optional[ActionType]
status: Optional[Union[ExecutionLogStatus, AuditLogAction]]
updated_at: Optional[datetime]
user_id: Optional[str]

class Config:
"""Set orm_mode and allow population by field name"""

use_enum_values = True
allow_population_by_field_name = True


class RowCountRequest(BaseSchema):
"""Schema for a user to manually confirm data erased for a collection"""

Expand Down Expand Up @@ -148,11 +168,12 @@ class Config:


class PrivacyRequestVerboseResponse(PrivacyRequestResponse):
"""The schema for the more detailed PrivacyRequest response containing detailed execution logs."""
"""The schema for the more detailed PrivacyRequest response containing both
detailed execution logs and audit logs."""

execution_logs_by_dataset: Dict[str, List[ExecutionLogResponse]] = Field(
alias="results"
)
execution_and_audit_logs_by_dataset: Dict[
str, List[ExecutionAndAuditLogResponse]
] = Field(alias="results")

class Config:
"""Allow the results field to be populated by the 'PrivacyRequest.execution_logs_by_dataset' property"""
Expand Down
17 changes: 17 additions & 0 deletions tests/ops/api/v1/endpoints/test_privacy_request_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from fidesops.ops.api.v1.endpoints.privacy_request_endpoints import (
EMBEDDED_EXECUTION_LOG_LIMIT,
execution_and_audit_logs_by_dataset_name,
validate_manual_input,
)
from fidesops.ops.api.v1.scope_registry import (
Expand Down Expand Up @@ -58,6 +59,7 @@
from fidesops.ops.schemas.dataset import DryRunDatasetResponse
from fidesops.ops.schemas.masking.masking_secrets import SecretType
from fidesops.ops.schemas.policy import PolicyResponse
from fidesops.ops.schemas.privacy_request import ExecutionAndAuditLogResponse
from fidesops.ops.schemas.redis_cache import PrivacyRequestIdentity
from fidesops.ops.util.cache import (
get_encryption_cache_key,
Expand Down Expand Up @@ -955,6 +957,7 @@ def test_verbose_privacy_requests(
api_client: TestClient,
generate_auth_header,
privacy_request: PrivacyRequest,
audit_log,
postgres_execution_log,
second_postgres_execution_log,
mongo_execution_log,
Expand Down Expand Up @@ -1000,6 +1003,17 @@ def test_verbose_privacy_requests(
"stopped_collection_details": None,
"resume_endpoint": None,
"results": {
"Request approved": [
{
"collection_name": None,
"fields_affected": None,
"message": "",
"action_type": None,
"status": "approved",
"updated_at": stringify_date(audit_log.updated_at),
"user_id": "system",
}
],
"my-mongo-db": [
{
"collection_name": "orders",
Expand All @@ -1016,6 +1030,7 @@ def test_verbose_privacy_requests(
"updated_at": stringify_date(
mongo_execution_log.updated_at
),
"user_id": None,
}
],
"my-postgres-db": [
Expand All @@ -1034,6 +1049,7 @@ def test_verbose_privacy_requests(
"updated_at": stringify_date(
postgres_execution_log.updated_at
),
"user_id": None,
},
{
"collection_name": "address",
Expand All @@ -1059,6 +1075,7 @@ def test_verbose_privacy_requests(
"updated_at": stringify_date(
second_postgres_execution_log.updated_at
),
"user_id": None,
},
],
},
Expand Down
16 changes: 16 additions & 0 deletions tests/ops/fixtures/application_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import yaml
from faker import Faker
from fideslib.core.config import load_file, load_toml
from fideslib.models.audit_log import AuditLog, AuditLogAction
from fideslib.models.client import ClientDetail
from fideslib.models.fides_user import FidesUser
from fideslib.models.fides_user_permissions import FidesUserPermissions
Expand Down Expand Up @@ -797,6 +798,21 @@ def privacy_request(db: Session, policy: Policy) -> PrivacyRequest:
privacy_request.delete(db)


@pytest.fixture(scope="function")
def audit_log(db: Session, privacy_request) -> PrivacyRequest:
audit_log = AuditLog.create(
db=db,
data={
"user_id": "system",
"privacy_request_id": privacy_request.id,
"action": AuditLogAction.approved,
"message": "",
},
)
yield audit_log
audit_log.delete(db)


@pytest.fixture(scope="function")
def privacy_request_status_pending(db: Session, policy: Policy) -> PrivacyRequest:
privacy_request = _create_privacy_request_for_policy(
Expand Down

0 comments on commit f848ecb

Please sign in to comment.