Skip to content
This repository has been archived by the owner on Nov 1, 2023. It is now read-only.

Commit

Permalink
automatically clean PII after retention period (#1051)
Browse files Browse the repository at this point in the history
This PR removes PII from Jobs, Tasks, and Repros after 18 months.  

This PR also removes notifications tied to a container that has not been used in a task for 18 months.  This is done due to notifications having arbitrarily complex mechanisms for storing PII (typically the "assignee").
  • Loading branch information
nharper285 authored Jul 22, 2021
1 parent e359ed8 commit 3289644
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 0 deletions.
79 changes: 79 additions & 0 deletions src/api-service/__app__/timer_retention/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env python
#
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import datetime
import logging

import azure.functions as func
from onefuzztypes.enums import JobState, TaskState

from ..onefuzzlib.events import get_events
from ..onefuzzlib.jobs import Job
from ..onefuzzlib.notifications.main import Notification
from ..onefuzzlib.repro import Repro
from ..onefuzzlib.tasks.main import Task

RETENTION_POLICY = datetime.timedelta(days=(18 * 30))
SEARCH_EXTENT = datetime.timedelta(days=(20 * 30))


def main(mytimer: func.TimerRequest, dashboard: func.Out[str]) -> None: # noqa: F841

now = datetime.datetime.now(tz=datetime.timezone.utc)

time_retained_older = now - RETENTION_POLICY
time_retained_newer = now - SEARCH_EXTENT

time_filter = (
f"Timestamp lt datetime'{time_retained_older.isoformat()}' "
f"and Timestamp gt datetime'{time_retained_newer.isoformat()}'"
)
time_filter_newer = f"Timestamp gt datetime'{time_retained_older.isoformat()}'"

# Collecting 'still relevant' task containers.
# NOTE: This must be done before potentially modifying tasks otherwise
# the task timestamps will not be useful.
used_containers = set()
for task in Task.search(raw_unchecked_filter=time_filter_newer):
task_containers = {x.name for x in task.config.containers}
used_containers.update(task_containers)

for notification in Notification.search(raw_unchecked_filter=time_filter):
logging.debug(
"checking expired notification for removal: %s",
notification.notification_id,
)
container = notification.container
if container not in used_containers:
logging.info(
"deleting expired notification: %s", notification.notification_id
)
notification.delete()

for job in Job.search(
query={"state": [JobState.stopped]}, raw_unchecked_filter=time_filter
):
if job.user_info is not None and job.user_info.upn is not None:
logging.info("removing PII from job: %s", job.job_id)
job.user_info.upn = None
job.save()

for task in Task.search(
query={"state": [TaskState.stopped]}, raw_unchecked_filter=time_filter
):
if task.user_info is not None and task.user_info.upn is not None:
logging.info("removing PII from task: %s", task.task_id)
task.user_info.upn = None
task.save()

for repro in Repro.search(raw_unchecked_filter=time_filter):
if repro.user_info is not None and repro.user_info.upn is not None:
logging.info("removing PII from repro: %s", repro.vm_id)
repro.user_info.upn = None
repro.save()

events = get_events()
if events:
dashboard.set(events)
17 changes: 17 additions & 0 deletions src/api-service/__app__/timer_retention/function.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"bindings": [
{
"direction": "in",
"name": "mytimer",
"schedule": "20:00:00",
"type": "timerTrigger"
},
{
"type": "signalR",
"direction": "out",
"name": "dashboard",
"hubName": "dashboard"
}
],
"scriptFile": "__init__.py"
}

0 comments on commit 3289644

Please sign in to comment.