From 6992e8a0a3433d887d17bb574f1813cf0504e02e Mon Sep 17 00:00:00 2001 From: RuthShryock <81720958+RuthShryock@users.noreply.github.com> Date: Mon, 2 Dec 2024 14:06:55 -0500 Subject: [PATCH] feat(accessLogExport)!: create new AccessLogExportTask to generate a csv of access logs TASK-871 (#5258) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### 👀 Preview steps 1. Ensure that the `Export all data` button for `ProjectViews` still works as normal and that the email header has the appropriate title: "Project View Report Complete" 2. Ensure that exports for submission data works correctly: - make submissions to a project - navigate to the _Project < Data < Downloads_ - 🟢 ensure the `Export` button is exporting data correctly 3. Preview the AccessLogExportTask from the shell: - ℹ️ have a user - enter the Django shell: `./manage.py shell_plus` - get your user: `test_user = User.objects.get(username='test')` - create an AccessLogExportTask: ``` task = AccessLogExportTask.objects.create( user=test_user, get_all_logs=False, data={'type': 'access_logs_export'}, ) ``` - 🟢 print the task object and notice that an AccessLogExportTask object has been created: `print(task)` - run the task: `task.run()` - 🟢 print the result and notice that it generated a link to the csv export: `print(task.result)` ### 💭 Notes - Changed the field named `user` to `user_url` because naming it `user` conflicts with an existing field in the `AccessLog` model - Renamed the file titled `project_view_exports.py` to `data_exports.py` because it now generates exports for both project view and access log exports - Refactored the `ProjectViewExportTask` to have a base `CommonExportTask` which both the project view and access log export classes inherit from - Updated the email (in `kpi/tasks.py`) to have the correct subject title based on the type of export report it is sending - Alphabetized the functions in `data_exports.py` --- jsapp/js/constants.ts | 2 +- kobo/apps/audit_log/models.py | 4 +- .../tests/test_project_history_logs.py | 2 +- kobo/apps/trash_bin/utils.py | 4 +- ...xporttask_submissionexporttask_and_more.py | 91 ++++++++++++ kpi/models/__init__.py | 6 +- kpi/models/import_export_task.py | 140 +++++++++++++++--- kpi/serializers/v1/export_task.py | 6 +- kpi/serializers/v2/export_task.py | 11 +- kpi/tasks.py | 7 +- kpi/tests/api/v1/test_api_assets.py | 10 +- kpi/tests/api/v1/test_api_exports.py | 8 +- kpi/tests/api/v2/test_api_exports.py | 4 +- kpi/tests/test_access_logs_export_task.py | 122 +++++++++++++++ ...t_mock_data_conflicting_version_exports.py | 4 +- kpi/tests/test_mock_data_exports.py | 48 +++--- ...roject_view_exports.py => data_exports.py} | 126 +++++++++------- kpi/views/v1/export_task.py | 24 +-- kpi/views/v2/asset_export_settings.py | 4 +- kpi/views/v2/export_task.py | 5 +- 20 files changed, 478 insertions(+), 150 deletions(-) create mode 100644 kpi/migrations/0060_rename_exporttask_submissionexporttask_and_more.py create mode 100644 kpi/tests/test_access_logs_export_task.py rename kpi/utils/{project_view_exports.py => data_exports.py} (77%) diff --git a/jsapp/js/constants.ts b/jsapp/js/constants.ts index 64d48bdfe7..cc59df48b5 100644 --- a/jsapp/js/constants.ts +++ b/jsapp/js/constants.ts @@ -356,7 +356,7 @@ export const META_QUESTION_TYPES = createEnum([ // 1. https://github.com/kobotoolbox/kobocat/blob/78133d519f7b7674636c871e3ba5670cd64a7227/onadata/apps/viewer/models/parsed_instance.py#L242-L260 // 2. https://github.com/kobotoolbox/kpi/blob/7db39015866c905edc645677d72b9c1ea16067b1/jsapp/js/constants.es6#L284-L294 export const ADDITIONAL_SUBMISSION_PROPS = createEnum([ - // match the ordering of (Python) kpi.models.import_export_task.ExportTask.COPY_FIELDS + // match the ordering of (Python) kpi.models.import_export_task.SubmissionExportTask.COPY_FIELDS '_id', '_uuid', '_submission_time', diff --git a/kobo/apps/audit_log/models.py b/kobo/apps/audit_log/models.py index 140f6eb852..4f3ce52728 100644 --- a/kobo/apps/audit_log/models.py +++ b/kobo/apps/audit_log/models.py @@ -334,7 +334,7 @@ def create_from_request(cls, request): 'asset-file-detail': cls.create_from_file_request, 'asset-file-list': cls.create_from_file_request, 'asset-export-list': cls.create_from_export_request, - 'exporttask-list': cls.create_from_v1_export, + 'submissionexporttask-list': cls.create_from_v1_export, 'asset-bulk': cls.create_from_bulk_request, } url_name = request.resolver_match.url_name @@ -610,7 +610,7 @@ def create_from_related_request( action = modify_action if action: # some actions on related objects do not need to be logged, - # eg deleting an ExportTask + # eg deleting a SubmissionExportTask ProjectHistoryLog.objects.create( user=request.user, object_id=object_id, action=action, metadata=metadata ) diff --git a/kobo/apps/audit_log/tests/test_project_history_logs.py b/kobo/apps/audit_log/tests/test_project_history_logs.py index 3b684a85f9..84bbf79c65 100644 --- a/kobo/apps/audit_log/tests/test_project_history_logs.py +++ b/kobo/apps/audit_log/tests/test_project_history_logs.py @@ -869,7 +869,7 @@ def test_export_v1_creates_log(self): # can't use _base_project_history_log_test because # the old endpoint doesn't like format=json self.client.post( - path=reverse('exporttask-list'), + path=reverse('submissionexporttask-list'), data=request_data, ) diff --git a/kobo/apps/trash_bin/utils.py b/kobo/apps/trash_bin/utils.py index 549d6c845e..bc8a513abc 100644 --- a/kobo/apps/trash_bin/utils.py +++ b/kobo/apps/trash_bin/utils.py @@ -20,7 +20,7 @@ from kobo.apps.audit_log.audit_actions import AuditAction from kobo.apps.audit_log.models import AuditLog, AuditType from kpi.exceptions import InvalidXFormException, MissingXFormException -from kpi.models import Asset, ExportTask, ImportTask +from kpi.models import Asset, SubmissionExportTask, ImportTask from kpi.utils.mongo_helper import MongoHelper from kpi.utils.storage import rmdir from .constants import DELETE_PROJECT_STR_PREFIX, DELETE_USER_STR_PREFIX @@ -45,7 +45,7 @@ def delete_asset(request_author: settings.AUTH_USER_MODEL, asset: 'kpi.Asset'): if asset.has_deployment: _delete_submissions(request_author, asset) asset.deployment.delete() - project_exports = ExportTask.objects.filter( + project_exports = SubmissionExportTask.objects.filter( Q(data__source=f'{host}/api/v2/assets/{asset.uid}/') | Q(data__source=f'{host}/assets/{asset.uid}/') ) diff --git a/kpi/migrations/0060_rename_exporttask_submissionexporttask_and_more.py b/kpi/migrations/0060_rename_exporttask_submissionexporttask_and_more.py new file mode 100644 index 0000000000..24f2aef144 --- /dev/null +++ b/kpi/migrations/0060_rename_exporttask_submissionexporttask_and_more.py @@ -0,0 +1,91 @@ +# Generated by Django 4.2.15 on 2024-11-26 19:55 + +import django.db.models.deletion +import private_storage.fields +import private_storage.storage.files +from django.conf import settings +from django.db import migrations, models + +import kpi.fields.file +import kpi.fields.kpi_uid +import kpi.models.asset_file +import kpi.models.import_export_task + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('kpi', '0059_assetexportsettings_date_created_and_more'), + ] + + operations = [ + migrations.RenameModel( + old_name='ExportTask', + new_name='SubmissionExportTask', + ), + migrations.RenameModel( + old_name='SynchronousExport', + new_name='SubmissionSynchronousExport', + ), + migrations.AlterField( + model_name='assetfile', + name='content', + field=kpi.fields.file.PrivateExtendedFileField( + max_length=380, null=True, upload_to=kpi.models.asset_file.upload_to + ), + ), + migrations.CreateModel( + name='AccessLogExportTask', + fields=[ + ( + 'id', + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name='ID', + ), + ), + ('data', models.JSONField()), + ('messages', models.JSONField(default=dict)), + ( + 'status', + models.CharField( + choices=[ + ('created', 'created'), + ('processing', 'processing'), + ('error', 'error'), + ('complete', 'complete'), + ], + default='created', + max_length=32, + ), + ), + ('date_created', models.DateTimeField(auto_now_add=True)), + ('uid', kpi.fields.kpi_uid.KpiUidField(_null=False, uid_prefix='ale')), + ('get_all_logs', models.BooleanField(default=False)), + ( + 'result', + private_storage.fields.PrivateFileField( + max_length=380, + storage=( + private_storage.storage.files.PrivateFileSystemStorage() + ), + upload_to=kpi.models.import_export_task.export_upload_to, + ), + ), + ( + 'user', + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to=settings.AUTH_USER_MODEL, + ), + ), + ], + options={ + 'abstract': False, + }, + bases=(kpi.models.import_export_task.ExportTaskMixin, models.Model), + ), + ] diff --git a/kpi/models/__init__.py b/kpi/models/__init__.py index 90da79949a..07862f8bf0 100644 --- a/kpi/models/__init__.py +++ b/kpi/models/__init__.py @@ -7,11 +7,11 @@ from .asset_snapshot import AssetSnapshot from .asset_user_partial_permission import AssetUserPartialPermission from .object_permission import ObjectPermission -from .import_export_task import ( - ExportTask, +from .import_export_task import ( # noqa F401 + SubmissionExportTask, ImportTask, ProjectViewExportTask, - SynchronousExport, + SubmissionSynchronousExport, ) from .tag_uid import TagUid from .authorized_application import AuthorizedApplication diff --git a/kpi/models/import_export_task.py b/kpi/models/import_export_task.py index e6a0c9c98f..d307627180 100644 --- a/kpi/models/import_export_task.py +++ b/kpi/models/import_export_task.py @@ -18,7 +18,9 @@ from django.contrib.postgres.indexes import BTreeIndex, HashIndex from django.core.files.storage import FileSystemStorage from django.db import models, transaction -from django.db.models import F +from django.db.models import CharField, F, Value +from django.db.models.functions import Concat +from django.db.models.query import QuerySet from django.urls import reverse from django.utils import timezone from django.utils.translation import gettext as t @@ -52,9 +54,18 @@ from kpi.exceptions import XlsFormatException from kpi.fields import KpiUidField from kpi.models import Asset +from kpi.utils.data_exports import ( + ACCESS_LOGS_EXPORT_FIELDS, + ASSET_FIELDS, + CONFIG, + SETTINGS, + create_data_export, + filter_remaining_metadata, + get_q, +) from kpi.utils.log import logging from kpi.utils.models import _load_library_content, create_assets, resolve_url_to_asset -from kpi.utils.project_view_exports import create_project_view_export +from kpi.utils.project_views import get_region_for_view from kpi.utils.rename_xls_sheet import ( ConflictSheetError, NoFromSheetError, @@ -129,7 +140,7 @@ def run(self): # This method must be implemented by a subclass self._run_task(msgs) self.status = self.COMPLETE - except ExportTaskBase.InaccessibleData as e: + except SubmissionExportTaskBase.InaccessibleData as e: msgs['error_type'] = t('Cannot access data') msgs['error'] = str(e) self.status = self.ERROR @@ -477,27 +488,28 @@ def export_upload_to(self, filename): return posixpath.join(self.user.username, 'exports', filename) -class ProjectViewExportTask(ImportExportTask): - uid = KpiUidField(uid_prefix='pve') - result = PrivateFileField(upload_to=export_upload_to, max_length=380) +class ExportTaskMixin: + + @property + def default_email_subject(self) -> str: + return 'Report Complete' + + def _get_export_details(self) -> tuple: + return self.data.get('type'), self.data.get('view', None) def _build_export_filename( - self, export_type: str, username: str, view: str + self, export_type: str, username: str, view: str = None ) -> str: time = timezone.now().strftime('%Y-%m-%dT%H:%M:%SZ') - return f'{export_type}-{username}-view_{view}-{time}.csv' - - def _run_task(self, messages: list) -> None: - export_type = self.data['type'] - view = self.data['view'] + if view: + return f'{export_type}-{username}-view_{view}-{time}.csv' + return f'{export_type}-{username}-{time}.csv' - filename = self._build_export_filename( - export_type, self.user.username, view - ) + def _export_data_to_file(self, messages: list, buff) -> None: + export_type, view = self._get_export_details() + filename = self._build_export_filename(export_type, self.user.username, view) absolute_filepath = self.get_absolute_filepath(filename) - buff = create_project_view_export(export_type, self.user.username, view) - with self.result.storage.open(absolute_filepath, 'wb') as output_file: output_file.write(buff.read().encode()) @@ -510,7 +522,95 @@ def delete(self, *args, **kwargs) -> None: super().delete(*args, **kwargs) -class ExportTaskBase(ImportExportTask): +class AccessLogExportTask(ExportTaskMixin, ImportExportTask): + uid = KpiUidField(uid_prefix='ale') + get_all_logs = models.BooleanField(default=False) + result = PrivateFileField(upload_to=export_upload_to, max_length=380) + + @property + def default_email_subject(self) -> str: + return 'Access Log Report Complete' + + def get_data(self, filtered_queryset: QuerySet) -> QuerySet: + user_url = Concat( + Value(f'{settings.KOBOFORM_URL}/api/v2/users/'), + F('user__username'), + output_field=CharField(), + ) + + return filtered_queryset.annotate( + user_url=user_url, + username=F('user__username'), + auth_type=F('metadata__auth_type'), + source=F('metadata__source'), + ip_address=F('metadata__ip_address'), + initial_superusername=F('metadata__initial_user_username'), + initial_superuseruid=F('metadata__initial_user_uid'), + authorized_application=F('metadata__authorized_app_name'), + other_details=F('metadata'), + ).values(*ACCESS_LOGS_EXPORT_FIELDS) + + def _run_task(self, messages: list) -> None: + if self.get_all_logs and not self.user.is_superuser: + raise PermissionError('Only superusers can export all access logs.') + + export_type, view = self._get_export_details() + config = CONFIG[export_type] + + queryset = config['queryset']() + if not self.get_all_logs: + queryset = queryset.filter(user__username=self.user.username) + data = self.get_data(queryset) + accessed_metadata_fields = [ + 'auth_type', + 'source', + 'ip_address', + 'initial_user_username', + 'initial_user_uid', + 'authorized_app_name', + ] + for row in data: + row['other_details'] = filter_remaining_metadata( + row, accessed_metadata_fields + ) + buff = create_data_export(export_type, data) + self._export_data_to_file(messages, buff) + + +class ProjectViewExportTask(ExportTaskMixin, ImportExportTask): + uid = KpiUidField(uid_prefix='pve') + result = PrivateFileField(upload_to=export_upload_to, max_length=380) + + @property + def default_email_subject(self) -> str: + return 'Project View Report Complete' + + def get_data(self, filtered_queryset: QuerySet) -> QuerySet: + vals = ASSET_FIELDS + (SETTINGS,) + return ( + filtered_queryset.annotate( + owner__name=F('owner__extra_details__data__name'), + owner__organization=F('owner__extra_details__data__organization'), + form_id=F('_deployment_data__backend_response__formid'), + ) + .values(*vals) + .order_by('id') + ) + + def _run_task(self, messages: list) -> None: + export_type, view = self._get_export_details() + config = CONFIG[export_type] + + region_for_view = get_region_for_view(view) + q = get_q(region_for_view, export_type) + queryset = config['queryset'].filter(q) + + data = self.get_data(queryset) + buff = create_data_export(export_type, data) + self._export_data_to_file(messages, buff) + + +class SubmissionExportTaskBase(ImportExportTask): """ An (asynchronous) submission data export job. The instantiator must set the `data` attribute to a dictionary with the following keys: @@ -940,7 +1040,7 @@ def remove_excess(cls, user, source): export.delete() -class ExportTask(ExportTaskBase): +class SubmissionExportTask(SubmissionExportTaskBase): """ An asynchronous export task, to be run with Celery """ @@ -961,7 +1061,7 @@ def _run_task(self, messages): self.remove_excess(self.user, source_url) -class SynchronousExport(ExportTaskBase): +class SubmissionSynchronousExport(SubmissionExportTaskBase): """ A synchronous export, with significant limitations on processing time, but offered for user convenience diff --git a/kpi/serializers/v1/export_task.py b/kpi/serializers/v1/export_task.py index 414f3521ed..54e81f3064 100644 --- a/kpi/serializers/v1/export_task.py +++ b/kpi/serializers/v1/export_task.py @@ -2,19 +2,19 @@ from rest_framework import serializers from kpi.fields import ReadOnlyJSONField -from kpi.models import ExportTask +from kpi.models import SubmissionExportTask class ExportTaskSerializer(serializers.HyperlinkedModelSerializer): url = serializers.HyperlinkedIdentityField( lookup_field='uid', - view_name='exporttask-detail' + view_name='submissionexporttask-detail' ) messages = ReadOnlyJSONField(required=False) data = ReadOnlyJSONField() class Meta: - model = ExportTask + model = SubmissionExportTask fields = ( 'url', 'status', diff --git a/kpi/serializers/v2/export_task.py b/kpi/serializers/v2/export_task.py index 5db4bf9922..efd28f98c3 100644 --- a/kpi/serializers/v2/export_task.py +++ b/kpi/serializers/v2/export_task.py @@ -28,7 +28,7 @@ ) from kpi.fields import ReadOnlyJSONField -from kpi.models import ExportTask, Asset +from kpi.models import SubmissionExportTask, Asset from kpi.tasks import export_in_background from kpi.utils.export_task import format_exception_values from kpi.utils.object_permission import get_database_user @@ -40,7 +40,7 @@ class ExportTaskSerializer(serializers.ModelSerializer): data = ReadOnlyJSONField() class Meta: - model = ExportTask + model = SubmissionExportTask fields = ( 'url', 'status', @@ -58,10 +58,10 @@ class Meta: 'result', ) - def create(self, validated_data: dict) -> ExportTask: + def create(self, validated_data: dict) -> SubmissionExportTask: # Create a new export task user = get_database_user(self._get_request.user) - export_task = ExportTask.objects.create( + export_task = SubmissionExportTask.objects.create( user=user, data=validated_data ) # Have Celery run the export in the background @@ -262,7 +262,7 @@ def validate_type(self, data: dict) -> str: ) return export_type - def get_url(self, obj: ExportTask) -> str: + def get_url(self, obj: SubmissionExportTask) -> str: return reverse( 'asset-export-detail', args=(self._get_asset.uid, obj.uid), @@ -276,4 +276,3 @@ def _get_asset(self) -> Asset: @property def _get_request(self) -> Request: return self.context['request'] - diff --git a/kpi/tasks.py b/kpi/tasks.py index a2553b644a..84924884ce 100644 --- a/kpi/tasks.py +++ b/kpi/tasks.py @@ -12,7 +12,7 @@ from kpi.constants import LIMIT_HOURS_23 from kpi.maintenance_tasks import remove_old_asset_snapshots, remove_old_import_tasks from kpi.models.asset import Asset -from kpi.models.import_export_task import ExportTask, ImportTask +from kpi.models.import_export_task import ImportTask, SubmissionExportTask @celery_app.task @@ -24,7 +24,7 @@ def import_in_background(import_task_uid): @celery_app.task def export_in_background(export_task_uid): - export_task = ExportTask.objects.get(uid=export_task_uid) + export_task = SubmissionExportTask.objects.get(uid=export_task_uid) export_task.run() @@ -44,8 +44,9 @@ def export_task_in_background( 'Regards,\n' 'KoboToolbox' ) + subject = export.default_email_subject mail.send_mail( - subject='Project View Report Complete', + subject=subject, message=msg, from_email=settings.DEFAULT_FROM_EMAIL, recipient_list=[user.email], diff --git a/kpi/tests/api/v1/test_api_assets.py b/kpi/tests/api/v1/test_api_assets.py index 6bf9255f7e..5d2f4be619 100644 --- a/kpi/tests/api/v1/test_api_assets.py +++ b/kpi/tests/api/v1/test_api_assets.py @@ -10,7 +10,7 @@ from formpack.utils.expand_content import SCHEMA_VERSION from kobo.apps.kobo_auth.shortcuts import User from kpi.constants import ASSET_TYPE_COLLECTION -from kpi.models import Asset, ExportTask +from kpi.models import Asset, SubmissionExportTask from kpi.models.import_export_task import export_upload_to from kpi.serializers.v1.asset import AssetListSerializer @@ -286,7 +286,7 @@ def setUp(self): ) def test_owner_can_create_export(self): - post_url = reverse('exporttask-list') + post_url = reverse('submissionexporttask-list') asset_url = reverse('asset-detail', args=[self.asset.uid]) task_data = { 'source': asset_url, @@ -355,12 +355,12 @@ def test_owner_can_create_and_delete_export(self): ) file_path = export_upload_to(self, file_name) - detail_url = reverse('exporttask-detail', kwargs={ + detail_url = reverse('submissionexporttask-detail', kwargs={ 'uid': detail_response.data['uid'] }) # checking if file exists before attempting to delete - file_exists_before_delete = ExportTask.result.field.storage.exists( + file_exists_before_delete = SubmissionExportTask.result.field.storage.exists( name=file_path ) assert file_exists_before_delete @@ -370,7 +370,7 @@ def test_owner_can_create_and_delete_export(self): assert delete_response.status_code == status.HTTP_204_NO_CONTENT # checking if file still exists after attempting to delete it - file_exists_after_delete = ExportTask.result.field.storage.exists( + file_exists_after_delete = SubmissionExportTask.result.field.storage.exists( name=file_path ) assert not file_exists_after_delete diff --git a/kpi/tests/api/v1/test_api_exports.py b/kpi/tests/api/v1/test_api_exports.py index 10e5e8bb5a..e14f1215aa 100644 --- a/kpi/tests/api/v1/test_api_exports.py +++ b/kpi/tests/api/v1/test_api_exports.py @@ -4,7 +4,7 @@ from rest_framework import status from rest_framework.reverse import reverse -from kpi.models import Asset, ExportTask +from kpi.models import Asset, SubmissionExportTask from kpi.tests.base_test_case import BaseTestCase from kpi.tests.test_mock_data_exports import MockDataExportsBase @@ -14,7 +14,7 @@ def test_export_uid_filter(self): assert self.user.username == 'someuser' def _create_export_task(asset): - export_task = ExportTask() + export_task = SubmissionExportTask() export_task.user = self.user export_task.data = { 'source': reverse('asset-detail', args=[asset.uid]), @@ -37,7 +37,7 @@ def _create_export_task(asset): # Retrieve all the exports unfiltered self.client.login(username='someuser', password='someuser') - list_url = reverse(self._get_endpoint('exporttask-list')) + list_url = reverse(self._get_endpoint('submissionexporttask-list')) response = self.client.get(list_url) assert response.status_code == status.HTTP_200_OK assert response.json()['count'] == 2 @@ -58,7 +58,7 @@ def test_export_source_validation(self): moment! """ self.client.login(username='someuser', password='someuser') - list_url = reverse(self._get_endpoint('exporttask-list')) + list_url = reverse(self._get_endpoint('submissionexporttask-list')) source_url = reverse('asset-detail', args=[self.asset.uid]) # Give the source URL an invalid asset UID source_url = source_url.rstrip('/') + 'bogus/' diff --git a/kpi/tests/api/v2/test_api_exports.py b/kpi/tests/api/v2/test_api_exports.py index 9a29417119..412eebcfea 100644 --- a/kpi/tests/api/v2/test_api_exports.py +++ b/kpi/tests/api/v2/test_api_exports.py @@ -11,7 +11,7 @@ PERM_VIEW_ASSET, PERM_VIEW_SUBMISSIONS, ) -from kpi.models import Asset, ExportTask, AssetExportSettings +from kpi.models import Asset, SubmissionExportTask, AssetExportSettings from kpi.tests.base_test_case import BaseTestCase from kpi.tests.test_mock_data_exports import MockDataExportsBase from kpi.urls.router_api_v2 import URL_NAMESPACE as ROUTER_URL_NAMESPACE @@ -26,7 +26,7 @@ def _create_export_task(self, asset=None, user=None, _type='csv'): uid = self.asset.uid if asset is None else asset.uid user = self.user if user is None else user - export_task = ExportTask() + export_task = SubmissionExportTask() export_task.user = user export_task.data = { 'source': reverse( diff --git a/kpi/tests/test_access_logs_export_task.py b/kpi/tests/test_access_logs_export_task.py new file mode 100644 index 0000000000..7baafc4428 --- /dev/null +++ b/kpi/tests/test_access_logs_export_task.py @@ -0,0 +1,122 @@ +import csv +import os + +from django.conf import settings +from django.test import TestCase + +from kobo.apps.audit_log.models import AccessLog +from kobo.apps.kobo_auth.shortcuts import User +from kpi.models.import_export_task import AccessLogExportTask + + +class AccessLogExportTaskTests(TestCase): + + def setUp(self): + self.user = User.objects.create_user( + username='testuser', email='testuser@example.com', password='password' + ) + self.superuser = User.objects.create_superuser( + username='superuser', email='superuser@example.com', password='password' + ) + + def create_export_task(self, user, get_all_logs=True): + return AccessLogExportTask.objects.create( + user=user, + get_all_logs=get_all_logs, + data={'type': 'access_logs_export'}, + ) + + def test_task_initialization(self): + task = self.create_export_task(self.user, get_all_logs=False) + self.assertIsInstance(task, AccessLogExportTask) + self.assertFalse(task.get_all_logs) + + def test_get_all_logs_superuser(self): + task = self.create_export_task(self.superuser) + self.assertTrue(task.get_all_logs) + + def test_get_all_logs_non_superuser(self): + task = self.create_export_task(self.user) + + with self.assertRaises(PermissionError) as context: + task._run_task([]) + + self.assertEqual( + str(context.exception), 'Only superusers can export all access logs.' + ) + + def test_run_task_creates_csv(self): + task = self.create_export_task(self.superuser) + task.run() + + self.assertIsNotNone(task.result, 'The task.result should not be None.') + expected_pattern = ( + rf'{self.superuser.username}/exports/access_logs_export-' + rf'{self.superuser.username}-' + r'\d{4}-\d{2}-\d{2}T\d{6}Z\.csv' + ) + + self.assertRegex( + task.result.name, + expected_pattern, + 'The task.result file name format is incorrect.', + ) + self.assertTrue( + os.path.exists(task.result.path), + f'The file at {task.result.path} should exist.', + ) + + def test_csv_content_structure(self): + log = AccessLog.objects.create( + user=self.user, + metadata={ + 'auth_type': 'test_auth', + 'source': 'test_source', + 'ip_address': '127.0.0.1', + 'initial_user_username': 'initial_superuser', + 'initial_user_uid': 'initial_superuser_uid', + 'authorized_app_name': 'test_app', + }, + date_created='2024-11-05T12:00:00Z', + ) + task = self.create_export_task(self.superuser) + task.run() + + with open(task.result.path, mode='r', encoding='utf-8') as csv_file: + reader = csv.DictReader(csv_file) + rows = list(reader) + + expected_headers = [ + 'user_url', + 'user_uid', + 'username', + 'auth_type', + 'date_created', + 'source', + 'ip_address', + 'initial_superusername', + 'initial_superuseruid', + 'authorized_application', + 'other_details', + ] + self.assertListEqual(expected_headers, reader.fieldnames) + + first_row = rows[0] + expected_user_url = ( + f'{settings.KOBOFORM_URL}/api/v2/users/{self.user.username}' + ) + + self.assertEqual(first_row['user_url'], expected_user_url) + self.assertEqual(first_row['user_uid'], log.user_uid) + self.assertEqual(first_row['username'], self.user.username) + self.assertEqual(first_row['auth_type'], 'test_auth') + self.assertEqual(first_row['source'], 'test_source') + self.assertEqual(first_row['ip_address'], '127.0.0.1') + self.assertEqual(first_row['initial_superusername'], 'initial_superuser') + self.assertEqual(first_row['initial_superuseruid'], 'initial_superuser_uid') + self.assertEqual(first_row['authorized_application'], 'test_app') + self.assertIsNotNone(first_row['other_details']) + + def tearDown(self): + AccessLogExportTask.objects.all().delete() + User.objects.all().delete() diff --git a/kpi/tests/test_mock_data_conflicting_version_exports.py b/kpi/tests/test_mock_data_conflicting_version_exports.py index 56361ffe54..588279fa8e 100644 --- a/kpi/tests/test_mock_data_conflicting_version_exports.py +++ b/kpi/tests/test_mock_data_conflicting_version_exports.py @@ -8,7 +8,7 @@ from kobo.apps.kobo_auth.shortcuts import User from kobo.apps.reports import report_data from kpi.constants import PERM_VIEW_SUBMISSIONS -from kpi.models import Asset, ExportTask +from kpi.models import Asset, SubmissionExportTask from kpi.utils.strings import to_str @@ -68,7 +68,7 @@ def test_csv_export(self): Ignores the order of the rows and columns """ - export_task = ExportTask() + export_task = SubmissionExportTask() export_task.user = self.user export_task.data = { 'source': reverse('asset-detail', args=[self.asset.uid]), diff --git a/kpi/tests/test_mock_data_exports.py b/kpi/tests/test_mock_data_exports.py index 23791f00e7..14980612d0 100644 --- a/kpi/tests/test_mock_data_exports.py +++ b/kpi/tests/test_mock_data_exports.py @@ -19,7 +19,7 @@ PERM_VIEW_ASSET, PERM_VIEW_SUBMISSIONS, ) -from kpi.models import Asset, ExportTask +from kpi.models import Asset, SubmissionExportTask from kpi.utils.mongo_helper import drop_mock_only from kpi.utils.object_permission import get_anonymous_user @@ -408,11 +408,11 @@ def run_csv_export_test( UTF-8 encoded representation should match the export result `export_options`: (optional) a list of extra options for - `ExportTask.data`. Do not include `source` or `type` + `SubmissionExportTask.data`. Do not include `source` or `type` `asset`: (optional) the asset to export. Defaults to `self.asset` `user`: (optional) the user to own the export. Defaults to `self.user` """ - export_task = ExportTask() + export_task = SubmissionExportTask() asset = self.asset if asset is None else asset export_task.user = self.user if user is None else user export_task.data = { @@ -446,11 +446,11 @@ def run_xls_export_test( `expected_rows`: a list of strings *without* trailing newlines whose UTF-8 encoded representation should match the export result - `export_options`: a list of extra options for `ExportTask.data`. Do not + `export_options`: a list of extra options for `SubmissionExportTask.data`. Do not include `source` or `type` """ asset = self.asset if asset is None else asset - export_task = ExportTask() + export_task = SubmissionExportTask() export_task.user = self.user if user is None else user export_task.data = { 'source': reverse('asset-detail', args=[asset.uid]), @@ -1370,7 +1370,7 @@ def test_xls_export_repeat_groups(self): self.run_xls_export_test(expected_data, asset=asset, repeat_group=True) def test_export_spss_labels(self): - export_task = ExportTask() + export_task = SubmissionExportTask() export_task.user = self.user export_task.data = { 'source': reverse('asset-detail', args=[self.asset.uid]), @@ -1468,22 +1468,22 @@ def test_remove_excess_exports(self): } # Create and run one export, so we can verify that it's `result` file # is later deleted - export_task = ExportTask() + export_task = SubmissionExportTask() export_task.user = self.user export_task.data = task_data export_task.save() export_task.run() - self.assertEqual(export_task.status, ExportTask.COMPLETE) + self.assertEqual(export_task.status, SubmissionExportTask.COMPLETE) result = export_task.result self.assertTrue(result.storage.exists(result.name)) # Make an excessive amount of additional exports excess_count = 5 + settings.MAXIMUM_EXPORTS_PER_USER_PER_FORM for _ in range(excess_count): - export_task = ExportTask() + export_task = SubmissionExportTask() export_task.user = self.user export_task.data = task_data export_task.save() - created_export_tasks = ExportTask.objects.filter( + created_export_tasks = SubmissionExportTask.objects.filter( user=self.user, data__source=task_data['source'] ) self.assertEqual(excess_count + 1, created_export_tasks.count()) @@ -1492,13 +1492,13 @@ def test_remove_excess_exports(self): :settings.MAXIMUM_EXPORTS_PER_USER_PER_FORM] # Call `run()` once more since it invokes the cleanup logic export_task.run() - self.assertEqual(export_task.status, ExportTask.COMPLETE) + self.assertEqual(export_task.status, SubmissionExportTask.COMPLETE) # Verify the cleanup self.assertFalse(result.storage.exists(result.name)) self.assertListEqual( # assertSequenceEqual isn't working... list(export_tasks_to_keep.values_list('pk', flat=True)), list( - ExportTask.objects.filter( + SubmissionExportTask.objects.filter( user=self.user, data__source=task_data['source'] ).order_by('-date_created').values_list('pk', flat=True) ), @@ -1511,13 +1511,13 @@ def test_log_and_mark_stuck_exports_as_errored(self): } self.assertEqual( 0, - ExportTask.objects.filter( + SubmissionExportTask.objects.filter( user=self.user, data__source=task_data['source'] ).count(), ) # Simulate a few stuck exports - for status in (ExportTask.CREATED, ExportTask.PROCESSING): - export_task = ExportTask() + for status in (SubmissionExportTask.CREATED, SubmissionExportTask.PROCESSING): + export_task = SubmissionExportTask() export_task.user = self.user export_task.data = task_data export_task.status = status @@ -1525,21 +1525,21 @@ def test_log_and_mark_stuck_exports_as_errored(self): export_task.date_created -= datetime.timedelta(days=1) export_task.save() self.assertSequenceEqual( - [ExportTask.CREATED, ExportTask.PROCESSING], - ExportTask.objects.filter( + [SubmissionExportTask.CREATED, SubmissionExportTask.PROCESSING], + SubmissionExportTask.objects.filter( user=self.user, data__source=task_data['source'] ).order_by('pk').values_list('status', flat=True), ) # Run another export, which invokes the cleanup logic - export_task = ExportTask() + export_task = SubmissionExportTask() export_task.user = self.user export_task.data = task_data export_task.save() export_task.run() # Verify that the stuck exports have been marked self.assertSequenceEqual( - [ExportTask.ERROR, ExportTask.ERROR, ExportTask.COMPLETE], - ExportTask.objects.filter( + [SubmissionExportTask.ERROR, SubmissionExportTask.ERROR, SubmissionExportTask.COMPLETE], + SubmissionExportTask.objects.filter( user=self.user, data__source=task_data['source'] ).order_by('pk').values_list('status', flat=True), ) @@ -1551,14 +1551,14 @@ def test_export_long_form_title(self): 'jugs dum cornelia legit flavia scribit et laeta est flavia quod ' 'cornelia iam in villa habitat et cornelia et flavia sunt amicae' ) - assert len(what_a_title) > ExportTask.MAXIMUM_FILENAME_LENGTH + assert len(what_a_title) > SubmissionExportTask.MAXIMUM_FILENAME_LENGTH self.asset.name = what_a_title self.asset.save() task_data = { 'source': reverse('asset-detail', args=[self.asset.uid]), 'type': 'csv', } - export_task = ExportTask() + export_task = SubmissionExportTask() export_task.user = self.user export_task.data = task_data export_task.save() @@ -1566,7 +1566,7 @@ def test_export_long_form_title(self): assert ( len(os.path.basename(export_task.result.name)) == - ExportTask.MAXIMUM_FILENAME_LENGTH + SubmissionExportTask.MAXIMUM_FILENAME_LENGTH ) def test_export_latest_version_only(self): @@ -1616,7 +1616,7 @@ def test_export_exceeding_api_submission_limit(self): } for i in range(limit + excess) ] asset.deployment.mock_submissions(submissions) - export_task = ExportTask() + export_task = SubmissionExportTask() export_task.user = self.user export_task.data = { 'source': reverse('asset-detail', args=[asset.uid]), diff --git a/kpi/utils/project_view_exports.py b/kpi/utils/data_exports.py similarity index 77% rename from kpi/utils/project_view_exports.py rename to kpi/utils/data_exports.py index d8ab5e1643..6a77bf2d69 100644 --- a/kpi/utils/project_view_exports.py +++ b/kpi/utils/data_exports.py @@ -5,6 +5,7 @@ from io import StringIO from typing import Union +from django.apps import apps from django.conf import settings from django.db.models import Count, F, Q from django.db.models.query import QuerySet @@ -13,7 +14,6 @@ from kobo.apps.openrosa.apps.logger.models.xform import XForm from kpi.constants import ASSET_TYPE_SURVEY from kpi.models import Asset -from kpi.utils.project_views import get_region_for_view ASSET_FIELDS = ( 'id', @@ -66,6 +66,19 @@ 'instagram', 'metadata', ) +ACCESS_LOGS_EXPORT_FIELDS = ( + 'user_url', + 'user_uid', + 'username', + 'auth_type', + 'date_created', + 'source', + 'ip_address', + 'initial_superusername', + 'initial_superuseruid', + 'authorized_application', + 'other_details', +) CONFIG = { 'assets': { 'queryset': Asset.objects.filter(asset_type=ASSET_TYPE_SURVEY), @@ -79,9 +92,45 @@ 'key': METADATA, 'columns': USER_FIELDS + METADATA_FIELDS, }, + 'access_logs_export': { + 'queryset': lambda: apps.get_model('audit_log', 'AccessLog') + .objects.all() + .order_by('-date_created'), + 'key': 'metadata', + 'columns': ACCESS_LOGS_EXPORT_FIELDS, + }, } +def create_data_export(export_type: str, data: QuerySet) -> StringIO: + config = CONFIG[export_type] + + buff = StringIO() + writer = csv.writer(buff) + writer.writerow(config['columns']) + for row in data: + items = row.pop(config['key'], {}) or {} + flatten_settings_inplace(items) + row.update(items) + # submission counts come from kobocat database and therefore need to be + # appended manually rather than through queries + if export_type == 'assets': + row['submission_count'] = get_submission_count(row['form_id']) + flat_row = [get_row_value(row, col) for col in config['columns']] + writer.writerow(flat_row) + + buff.seek(0) + return buff + + +def filter_remaining_metadata(row, accessed_fields): + metadata = row['other_details'] + if metadata is not None: + return { + key: value for key, value in metadata.items() if key not in accessed_fields + } + + def flatten_settings_inplace(settings: dict) -> None: for k, v in settings.items(): if isinstance(v, list) and v: @@ -98,12 +147,18 @@ def flatten_settings_inplace(settings: dict) -> None: settings[k] = '' -def get_row_value(row: dict, col: str) -> Union[str, int, float, bool, None]: - val = row.get(col, '') - # remove any new lines from text - if isinstance(val, str): - val = val.replace('\n', '') - return val +def get_user_data(filtered_queryset: QuerySet) -> QuerySet: + vals = USER_FIELDS + (METADATA,) + return ( + filtered_queryset.exclude(pk=settings.ANONYMOUS_USER_ID) + .annotate( + mfa_is_active=F('mfa_methods__is_active'), + metadata=F('extra_details__data'), + asset_count=Count('assets'), + ) + .values(*vals) + .order_by('id') + ) def get_q(countries: list[str], export_type: str) -> QuerySet: @@ -115,6 +170,14 @@ def get_q(countries: list[str], export_type: str) -> QuerySet: return Q(**{q_term: countries}) +def get_row_value(row: dict, col: str) -> Union[str, int, float, bool, None]: + val = row.get(col, '') + # remove any new lines from text + if isinstance(val, str): + val = val.replace('\n', '') + return val + + def get_submission_count(xform_id: int) -> int: result = XForm.objects.values('num_of_submissions').filter(pk=xform_id).first() @@ -123,52 +186,3 @@ def get_submission_count(xform_id: int) -> int: return 0 return result['num_of_submissions'] - - -def get_data(filtered_queryset: QuerySet, export_type: str) -> QuerySet: - if export_type == 'assets': - vals = ASSET_FIELDS + (SETTINGS,) - data = filtered_queryset.annotate( - owner__name=F('owner__extra_details__data__name'), - owner__organization=F('owner__extra_details__data__organization'), - form_id=F('_deployment_data__backend_response__formid'), - ) - else: - vals = USER_FIELDS + (METADATA,) - data = filtered_queryset.exclude( - pk=settings.ANONYMOUS_USER_ID - ).annotate( - mfa_is_active=F('mfa_methods__is_active'), - metadata=F('extra_details__data'), - asset_count=Count('assets'), - ) - - return data.values(*vals).order_by('id') - - -def create_project_view_export( - export_type: str, username: str, uid: str -) -> StringIO: - config = CONFIG[export_type] - region_for_view = get_region_for_view(uid) - - q = get_q(region_for_view, export_type) - filtered_queryset = config['queryset'].filter(q) - data = get_data(filtered_queryset, export_type) - - buff = StringIO() - writer = csv.writer(buff) - writer.writerow(config['columns']) - for row in data: - items = row.pop(config['key'], {}) or {} - flatten_settings_inplace(items) - row.update(items) - # submission counts come from kobocat database and therefore need to be - # appended manually rather than through queries - if export_type == 'assets': - row['submission_count'] = get_submission_count(row['form_id']) - flat_row = [get_row_value(row, col) for col in config['columns']] - writer.writerow(flat_row) - - buff.seek(0) - return buff diff --git a/kpi/views/v1/export_task.py b/kpi/views/v1/export_task.py index 42ed479b81..ea209dc065 100644 --- a/kpi/views/v1/export_task.py +++ b/kpi/views/v1/export_task.py @@ -6,7 +6,7 @@ from rest_framework.reverse import reverse from kobo.apps.audit_log.base_views import AuditLoggedNoUpdateModelViewSet -from kpi.models import Asset, ExportTask +from kpi.models import Asset, SubmissionExportTask from kpi.serializers import ExportTaskSerializer from kpi.tasks import export_in_background from kpi.utils.models import remove_string_prefix, resolve_url_to_asset @@ -36,7 +36,8 @@ class ExportTaskViewSet(AuditLoggedNoUpdateModelViewSet): > List can be filtered through the following methods: * Source URL or UID if `q=source:[URL|UID]`; - * Comma-separated list of `ExportTask` UIDs if `q=uid__in:[UID],[UID],...` was provided + * Comma-separated list of `SubmissionExportTask` UIDs + if `q=uid__in:[UID],[UID],...` was provided * Data source URL if `q=data__source:[URL]` > Examples: @@ -130,21 +131,21 @@ class ExportTaskViewSet(AuditLoggedNoUpdateModelViewSet): ### CURRENT ENDPOINT """ - queryset = ExportTask.objects.all() + queryset = SubmissionExportTask.objects.all() serializer_class = ExportTaskSerializer lookup_field = 'uid' log_type = 'project-history' def get_queryset(self, *args, **kwargs): if self.request.user.is_anonymous: - return ExportTask.objects.none() + return SubmissionExportTask.objects.none() - queryset = ExportTask.objects.filter( + queryset = SubmissionExportTask.objects.filter( user=self.request.user).order_by('date_created') # Ultra-basic filtering by: # * source URL or UID if `q=source:[URL|UID]` was provided; - # * comma-separated list of `ExportTask` UIDs if + # * comma-separated list of `SubmissionExportTask` UIDs if # `q=uid__in:[UID],[UID],...` was provided q = self.request.query_params.get('q', False) if not q: @@ -166,7 +167,7 @@ def get_queryset(self, *args, **kwargs): else: # Filter requested that we don't understand; make it obvious by # returning nothing - return ExportTask.objects.none() + return SubmissionExportTask.objects.none() return queryset def create(self, request, *args, **kwargs): @@ -205,15 +206,16 @@ def create(self, request, *args, **kwargs): raise serializers.ValidationError( {'source': 'The specified asset must be deployed.'}) # Create a new export task - export_task = ExportTask.objects.create(user=request.user, - data=task_data) + export_task = SubmissionExportTask.objects.create( + user=request.user, data=task_data + ) # Have Celery run the export in the background export_in_background.delay(export_task_uid=export_task.uid) return Response({ 'uid': export_task.uid, 'url': reverse( - 'exporttask-detail', + 'submissionexporttask-detail', kwargs={'uid': export_task.uid}, request=request), - 'status': ExportTask.PROCESSING + 'status': SubmissionExportTask.PROCESSING }, status.HTTP_201_CREATED) diff --git a/kpi/views/v2/asset_export_settings.py b/kpi/views/v2/asset_export_settings.py index fcdb01c332..039a4711f8 100644 --- a/kpi/views/v2/asset_export_settings.py +++ b/kpi/views/v2/asset_export_settings.py @@ -12,7 +12,7 @@ from rest_framework.decorators import action from rest_framework_extensions.mixins import NestedViewSetMixin -from kpi.models import AssetExportSettings, SynchronousExport +from kpi.models import AssetExportSettings, SubmissionSynchronousExport from kpi.permissions import AssetExportSettingsPermission from kpi.renderers import SubmissionCSVRenderer, SubmissionXLSXRenderer from kpi.serializers.v2.asset_export_settings import ( @@ -235,7 +235,7 @@ def data(self, request, *args, **kwargs): # were originally created for a different format settings_obj.export_settings['type'] = format_type - export = SynchronousExport.generate_or_return_existing( + export = SubmissionSynchronousExport.generate_or_return_existing( user=user, asset_export_settings=settings_obj, ) diff --git a/kpi/views/v2/export_task.py b/kpi/views/v2/export_task.py index 32f9ca8623..a1bfc2bee5 100644 --- a/kpi/views/v2/export_task.py +++ b/kpi/views/v2/export_task.py @@ -7,7 +7,7 @@ from kobo.apps.audit_log.base_views import AuditLoggedNoUpdateModelViewSet from kpi.filters import SearchFilter -from kpi.models import ExportTask +from kpi.models import SubmissionExportTask from kpi.permissions import ExportTaskPermission from kpi.serializers.v2.export_task import ExportTaskSerializer from kpi.utils.object_permission import get_database_user @@ -142,7 +142,7 @@ class ExportTaskViewSet( ### CURRENT ENDPOINT """ - model = ExportTask + model = SubmissionExportTask serializer_class = ExportTaskSerializer lookup_field = 'uid' renderer_classes = [ @@ -168,4 +168,3 @@ def get_queryset(self): user=user, data__source__icontains=self.kwargs['parent_lookup_asset'], ) -