[Admin UI Backend] Download privacy requests as CSV [#263] (#285)

Adding ability to download privacy requests as CSV
ethyca · Mar 16, 2022 · 492fdce · 492fdce
1 parent ae70158
commit 492fdce
Show file tree

Hide file tree

Showing 4 changed files with 107 additions and 8 deletions.
diff --git a/docs/fidesops/docs/guides/reporting.md b/docs/fidesops/docs/guides/reporting.md
@@ -4,7 +4,7 @@ In this section we'll cover:
 
 - How to check the high-level status of your privacy requests
 - How to get more detailed execution logs of collections and fields that were potentially affected as part of your privacy request.
-
+- How to download all privacy requests as a CSV
 
 Take me directly to [API docs](/fidesops/api#operations-Privacy_Requests-get_request_status_api_v1_privacy_request_get).
 
@@ -171,3 +171,14 @@ are automatically truncated at 50 logs, so to view the entire list of logs, visi
 
 
 ```
+## Downloading all privacy requests as a CSV 
+
+
+To get all privacy requests in CSV format, use the `download_csv` query param:
+
+`GET api/v1/privacy-request/?download_csv=True`
+
+```csv
+Time received,Subject identity,Policy key,Request status,Reviewer,Time approved/denied
+2022-03-14 16:53:28.869258+00:00,{'email': 'customer-1@example.com'},my_primary_policy,complete,fid_16ffde2f-613b-4f79-bbae-41420b0f836b,2022-03-14 16:54:08.804283+00:00
+```
diff --git a/src/fidesops/api/v1/endpoints/privacy_request_endpoints.py b/src/fidesops/api/v1/endpoints/privacy_request_endpoints.py
@@ -1,14 +1,18 @@
+import io
+import csv
+
 import logging
 from collections import defaultdict
 from datetime import date, datetime
+from starlette.responses import StreamingResponse
 from typing import List, Optional, Union, DefaultDict, Dict, Set, Callable, Any
 
 from fastapi import APIRouter, Body, Depends, Security, HTTPException
 from fastapi_pagination import Page, Params
 from fastapi_pagination.bases import AbstractPage
 from fastapi_pagination.ext.sqlalchemy import paginate
 from pydantic import conlist
-from sqlalchemy.orm import Session
+from sqlalchemy.orm import Session, Query
 from starlette.status import (
     HTTP_400_BAD_REQUEST,
     HTTP_404_NOT_FOUND,
@@ -219,6 +223,41 @@ def create_privacy_request(
     )
 
 
+def privacy_request_csv_download(privacy_request_query: Query) -> StreamingResponse:
+    """Download privacy requests as CSV for Admin UI"""
+    f = io.StringIO()
+    csv_file = csv.writer(f)
+
+    csv_file.writerow(
+        [
+            "Time received",
+            "Subject identity",
+            "Policy key",
+            "Request status",
+            "Reviewer",
+            "Time approved/denied",
+        ]
+    )
+
+    for pr in privacy_request_query:
+        csv_file.writerow(
+            [
+                pr.created_at,
+                pr.get_cached_identity_data(),
+                pr.policy.key if pr.policy else None,
+                pr.status.value if pr.status else None,
+                pr.reviewed_by,
+                pr.reviewed_at,
+            ]
+        )
+    f.seek(0)
+    response = StreamingResponse(f, media_type="text/csv")
+    response.headers[
+        "Content-Disposition"
+    ] = f"attachment; filename=privacy_requests_download_{datetime.today().strftime('%Y-%m-%d')}.csv"
+    return response
+
+
 @router.get(
     urls.PRIVACY_REQUESTS,
     dependencies=[Security(verify_oauth_client, scopes=[scopes.PRIVACY_REQUEST_READ])],
@@ -246,7 +285,8 @@ def get_request_status(
     external_id: Optional[str] = None,
     verbose: Optional[bool] = False,
     include_identities: Optional[bool] = False,
-) -> AbstractPage[PrivacyRequest]:
+    download_csv: Optional[bool] = False,
+) -> Union[StreamingResponse, AbstractPage[PrivacyRequest]]:
     """Returns PrivacyRequest information. Supports a variety of optional query params.
 
     To fetch a single privacy request, use the id query param `?id=`.
@@ -259,8 +299,6 @@ def get_request_status(
             detail="Cannot specify both succeeded and failed query params.",
         )
 
-    logger.info(f"Finding all request statuses with pagination params {params}")
-
     query = db.query(PrivacyRequest)
 
     # Further restrict all PrivacyRequests by query params
@@ -299,6 +337,11 @@ def get_request_status(
             PrivacyRequest.finished_processing_at > errored_gt,
         )
 
+    if download_csv:
+        # Returning here if download_csv param was specified
+        logger.info("Downloading privacy requests as csv")
+        return privacy_request_csv_download(query)
+
     # Conditionally embed execution log details in the response.
     if verbose:
         logger.info(f"Finding execution log details")
@@ -315,6 +358,7 @@ def get_request_status(
         for item in paginated.items:
             item.identity = item.get_cached_identity_data()
 
+    logger.info(f"Finding all request statuses with pagination params {params}")
     return paginated
 
 

diff --git a/src/fidesops/task/graph_task.py b/src/fidesops/task/graph_task.py
@@ -406,7 +406,6 @@ def collect_tasks_fn(
                 data[tn.address] = GraphTask(tn, resources)
 
         def termination_fn(*dependent_values: List[Row]) -> Dict[str, List[Row]]:
-
             """A termination function that just returns its inputs mapped to their source addresses.
 
             This needs to wait for all dependent keys because this is how dask is informed to wait for
@@ -463,7 +462,6 @@ def collect_tasks_fn(
         traversal.traverse(env, collect_tasks_fn)
 
         def termination_fn(*dependent_values: int) -> Tuple[int, ...]:
-
             """The dependent_values here is an int output from each task feeding in, where
             each task reports the output of 'task.rtf(access_request_data)', which is the number of
             records updated.

diff --git a/tests/api/v1/endpoints/test_privacy_request_endpoints.py b/tests/api/v1/endpoints/test_privacy_request_endpoints.py
@@ -1,6 +1,12 @@
+import ast
+
+import csv
+import io
+
 import json
 from datetime import datetime
-from typing import List, Dict
+from dateutil.parser import parse
+from typing import List
 from unittest import mock
 
 from fastapi_pagination import Params
@@ -755,6 +761,46 @@ def test_verbose_privacy_request_embed_limit(
             ExecutionLog.privacy_request_id == privacy_request.id
         ).delete()
 
+    def test_get_privacy_requests_csv_format(
+        self, db, generate_auth_header, api_client, url, privacy_request, user
+    ):
+        reviewed_at = datetime.now()
+        created_at = datetime.now()
+
+        privacy_request.created_at = created_at
+        privacy_request.status = PrivacyRequestStatus.approved
+        privacy_request.reviewed_by = user.id
+        privacy_request.reviewed_at = reviewed_at
+        privacy_request.cache_identity(
+            {"email": "email@example.com", "phone_number": "111-111-1111"}
+        )
+        privacy_request.save(db)
+
+        auth_header = generate_auth_header(scopes=[PRIVACY_REQUEST_READ])
+        response = api_client.get(url + f"?download_csv=True", headers=auth_header)
+        assert 200 == response.status_code
+
+        assert response.headers["content-type"] == "text/csv; charset=utf-8"
+        assert (
+            response.headers["content-disposition"]
+            == f"attachment; filename=privacy_requests_download_{datetime.today().strftime('%Y-%m-%d')}.csv"
+        )
+
+        content = response.content.decode()
+        file = io.StringIO(content)
+        csv_file = csv.DictReader(file, delimiter=",")
+
+        first_row = next(csv_file)
+        assert parse(first_row["Time received"], ignoretz=True) == created_at
+        assert ast.literal_eval(first_row["Subject identity"]) == {
+            "email": "email@example.com",
+            "phone_number": "111-111-1111",
+        }
+        assert first_row["Policy key"] == "example_access_request_policy"
+        assert first_row["Request status"] == "approved"
+        assert first_row["Reviewer"] == user.id
+        assert parse(first_row["Time approved/denied"], ignoretz=True) == reviewed_at
+
 
 class TestGetExecutionLogs:
     @pytest.fixture(scope="function")