Skip to content

Commit

Permalink
Merge branch 'release/2.024.36'
Browse files Browse the repository at this point in the history
  • Loading branch information
noliveleger committed Nov 27, 2024
2 parents b8184e8 + 7413adc commit e0c7df0
Show file tree
Hide file tree
Showing 6 changed files with 180 additions and 62 deletions.
2 changes: 2 additions & 0 deletions kobo/apps/openrosa/apps/logger/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

SUBMISSIONS_SUSPENDED_HEARTBEAT_KEY = 'kobo:update_attachment_storage_bytes:heartbeat'
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
from __future__ import annotations

import time

from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.management.base import BaseCommand
from django.db.models import OuterRef, Subquery, Sum
from django_redis import get_redis_connection

from kobo.apps.openrosa.apps.logger.constants import (
SUBMISSIONS_SUSPENDED_HEARTBEAT_KEY
)
from kobo.apps.openrosa.apps.logger.models.attachment import Attachment
from kobo.apps.openrosa.apps.logger.models.xform import XForm
from kobo.apps.openrosa.apps.main.models.user_profile import UserProfile
from kobo.apps.openrosa.libs.utils.jsonbfield_helper import ReplaceValues


class Command(BaseCommand):

help = (
'Retroactively calculate the total attachment file storage '
'per xform and user profile'
Expand All @@ -22,6 +29,7 @@ def __init__(self, *args, **kwargs):
self._verbosity = 0
self._force = False
self._sync = False
self._redis_client = get_redis_connection()

def add_arguments(self, parser):
parser.add_argument(
Expand Down Expand Up @@ -52,10 +60,14 @@ def add_arguments(self, parser):
)

parser.add_argument(
'-l', '--skip-lock-release',
'-nl', '--no-lock',
action='store_true',
default=False,
help='Do not attempts to remove submission lock on user profiles. Default is False',
help=(
'Do not lock accounts from receiving submissions while updating '
'storage counters.\n'
'WARNING: This may result in discrepancies. The default value is False.'
)
)

def handle(self, *args, **kwargs):
Expand All @@ -65,7 +77,7 @@ def handle(self, *args, **kwargs):
self._sync = kwargs['sync']
chunks = kwargs['chunks']
username = kwargs['username']
skip_lock_release = kwargs['skip_lock_release']
no_lock = kwargs['no_lock']

if self._force and self._sync:
self.stderr.write(
Expand All @@ -89,7 +101,7 @@ def handle(self, *args, **kwargs):
'`force` option has been enabled'
)

if not skip_lock_release:
if not no_lock:
self._release_locks()

profile_queryset = self._reset_user_profile_counters()
Expand All @@ -112,57 +124,62 @@ def handle(self, *args, **kwargs):
)
continue

self._lock_user_profile(user)
if not no_lock:
self._lock_user_profile(user)

for xform in user_xforms.iterator(chunk_size=chunks):
try:
for xform in user_xforms.iterator(chunk_size=chunks):

# write out xform progress
if self._verbosity > 1:
self.stdout.write(
f"Calculating attachments for xform_id #{xform['pk']}"
f" (user {user.username})"
)
# aggregate total media file size for all media per xform
form_attachments = Attachment.objects.filter(
instance__xform_id=xform['pk'],
).aggregate(total=Sum('media_file_size'))

if form_attachments['total']:
if (
xform['attachment_storage_bytes']
== form_attachments['total']
):
if self._verbosity > 2:
self.stdout.write(
'\tSkipping xform update! '
'Attachment storage is already accurate'
self._heartbeat(user)

# write out xform progress
if self._verbosity > 1:
self.stdout.write(
f"Calculating attachments for xform_id #{xform['pk']}"
f" (user {user.username})"
)
# aggregate total media file size for all media per xform
form_attachments = Attachment.objects.filter(
instance__xform_id=xform['pk'],
).aggregate(total=Sum('media_file_size'))

if form_attachments['total']:
if (
xform['attachment_storage_bytes']
== form_attachments['total']
):
if self._verbosity > 2:
self.stdout.write(
'\tSkipping xform update! '
'Attachment storage is already accurate'
)
else:
if self._verbosity > 2:
self.stdout.write(
f'\tUpdating xform attachment storage to '
f"{form_attachments['total']} bytes"
)

XForm.all_objects.filter(
pk=xform['pk']
).update(
attachment_storage_bytes=form_attachments['total']
)

else:
if self._verbosity > 2:
self.stdout.write(
f'\tUpdating xform attachment storage to '
f"{form_attachments['total']} bytes"
self.stdout.write('\tNo attachments found')
if not xform['attachment_storage_bytes'] == 0:
XForm.all_objects.filter(
pk=xform['pk']
).update(
attachment_storage_bytes=0
)

XForm.all_objects.filter(
pk=xform['pk']
).update(
attachment_storage_bytes=form_attachments['total']
)

else:
if self._verbosity > 2:
self.stdout.write('\tNo attachments found')
if not xform['attachment_storage_bytes'] == 0:
XForm.all_objects.filter(
pk=xform['pk']
).update(
attachment_storage_bytes=0
)

# need to call `update_user_profile()` one more time outside the loop
# because the last user profile will not be up-to-date otherwise
self._update_user_profile(user)
self._update_user_profile(user)
finally:
if not no_lock:
self._release_lock(user)

if self._verbosity >= 1:
self.stdout.write('Done!')
Expand All @@ -187,6 +204,13 @@ def _get_queryset(self, profile_queryset, username):

return users.order_by('pk')

def _heartbeat(self, user: settings.AUTH_USER_MODEL):
self._redis_client.hset(
SUBMISSIONS_SUSPENDED_HEARTBEAT_KEY, mapping={
user.username: int(time.time())
}
)

def _lock_user_profile(self, user: settings.AUTH_USER_MODEL):
# Retrieve or create user's profile.
(
Expand All @@ -204,7 +228,18 @@ def _lock_user_profile(self, user: settings.AUTH_USER_MODEL):
# new submissions from coming in while the
# `attachment_storage_bytes` is being calculated.
user_profile.submissions_suspended = True
user_profile.save(update_fields=['submissions_suspended'])
user_profile.metadata['attachments_counting_status'] = 'not-completed'
user_profile.save(update_fields=['metadata', 'submissions_suspended'])

self._heartbeat(user)

def _release_lock(self, user: settings.AUTH_USER_MODEL):
# Release any locks on the users' profile from getting submissions
if self._verbosity > 1:
self.stdout.write(f'Releasing submission lock for {user.username}…')

UserProfile.objects.filter(user_id=user.pk).update(submissions_suspended=False)
self._redis_client.hdel(SUBMISSIONS_SUSPENDED_HEARTBEAT_KEY, user.username)

def _release_locks(self):
# Release any locks on the users' profile from getting submissions
Expand Down Expand Up @@ -244,7 +279,7 @@ def _update_user_profile(self, user: settings.AUTH_USER_MODEL):
f'{user.username}’s profile'
)

# Update user's profile (and lock the related row)
# Update user's profile
updates = {
'attachments_counting_status': 'complete',
}
Expand All @@ -265,5 +300,4 @@ def _update_user_profile(self, user: settings.AUTH_USER_MODEL):
'metadata',
updates=updates,
),
submissions_suspended=False,
)
6 changes: 2 additions & 4 deletions kobo/apps/openrosa/apps/logger/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,8 @@ def pre_delete_attachment(instance, **kwargs):

if file_size and attachment.deleted_at is None:
with transaction.atomic():
"""
Update both counters at the same time (in a transaction) to avoid
desynchronization as much as possible
"""
# Update both counters simultaneously within a transaction to minimize
# the risk of desynchronization.
UserProfile.objects.filter(
user_id=xform.user_id
).update(
Expand Down
89 changes: 84 additions & 5 deletions kobo/apps/openrosa/apps/logger/tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# coding: utf-8
import csv
import datetime
import logging
import time
import zipfile
from collections import defaultdict
from datetime import timedelta
Expand All @@ -11,14 +12,19 @@
from django.conf import settings
from django.core.management import call_command
from django.utils import timezone
from django_redis import get_redis_connection

from kobo.apps.kobo_auth.shortcuts import User
from kobo.apps.openrosa.libs.utils.jsonbfield_helper import ReplaceValues
from kobo.celery import celery_app
from kpi.deployment_backends.kc_access.storage import (
default_kobocat_storage as default_storage,
)
from kpi.utils.log import logging
from .constants import SUBMISSIONS_SUSPENDED_HEARTBEAT_KEY
from .models.daily_xform_submission_counter import DailyXFormSubmissionCounter
from .models import Instance, XForm
from ..main.models import UserProfile


@celery_app.task()
Expand Down Expand Up @@ -46,7 +52,10 @@ def fix_root_node_names(**kwargs):
# #### END ISSUE 242 FIX ######


@shared_task(soft_time_limit=3600, time_limit=3630)
@shared_task(
soft_time_limit=settings.CELERY_LONG_RUNNING_TASK_SOFT_TIME_LIMIT,
time_limit=settings.CELERY_LONG_RUNNING_TASK_TIME_LIMIT
)
def generate_stats_zip(output_filename):
# Limit to last month and this month
now = datetime.datetime.now()
Expand Down Expand Up @@ -121,6 +130,76 @@ def list_created_by_month(model, date_field):
zip_file.close()


@celery_app.task()
def sync_storage_counters():
call_command('update_attachment_storage_bytes', verbosity=3, sync=True)
@celery_app.task
def fix_stale_submissions_suspended_flag():
"""
Task to fix stale `submissions_suspended` flag to ensure that accounts are
not indefinitely locked, preventing users from accessing or collecting their
data.
Note:
- This task is **not** automatically included in the periodic tasks.
- If the task `sync_storage_counters` is added to the periodic tasks,
this task should also be manually added to ensure consistency
in the system's storage management and cleanup process.
"""

redis_client = get_redis_connection()
lock = redis_client.hgetall(SUBMISSIONS_SUSPENDED_HEARTBEAT_KEY)
if not lock:
return

usernames = []

for username, timestamp in lock.items():
username = username.decode()
timestamp = int(timestamp.decode())

if timestamp + settings.CELERY_LONG_RUNNING_TASK_SOFT_TIME_LIMIT <= int(
time.time()
):
logging.info(
f'Removing `submission_suspended` flag on user #{username}’s profile'
)
usernames.append(username)

if usernames:
UserProfile.objects.filter(user__username__in=usernames).update(
metadata=ReplaceValues(
'metadata',
updates={'submissions_suspended': False},
),
)
redis_client.hdel(SUBMISSIONS_SUSPENDED_HEARTBEAT_KEY, *usernames)


@celery_app.task(
soft_time_limit=settings.CELERY_LONG_RUNNING_TASK_SOFT_TIME_LIMIT,
time_limit=settings.CELERY_LONG_RUNNING_TASK_TIME_LIMIT
)
def sync_storage_counters(**kwargs):
"""
Task to synchronize the "storage" counters for user profiles and their projects (XForm).
This task ensures consistency between the storage usage tracked at the profile level
and the cumulative storage used by all associated projects. The total storage usage
calculated from the projects should match the storage counter of the corresponding profile.
Note:
- This task is **not** automatically included in the periodic tasks.
- If this task is added to periodic tasks, ensure that the
`fix_stale_submissions_suspended_flag` task is also scheduled to maintain
system integrity and prevent stale data issues.
"""

# The `no_lock` option is not hard-coded when calling the command, allowing
# superusers to control the lock behaviour from the admin interface without
# requiring a redeployment.
no_lock = kwargs.get('no_lock', False)

call_command(
'update_attachment_storage_bytes',
verbosity=3,
sync=True,
no_lock=no_lock,
)
7 changes: 6 additions & 1 deletion kobo/apps/openrosa/libs/utils/logger_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,6 @@ def get_soft_deleted_attachments(instance: Instance) -> list[Attachment]:

# Update Attachment objects to hide them if they are not used anymore.
# We do not want to delete them until the instance itself is deleted.

# FIXME Temporary hack to leave background-audio files and audit files alone
# Bug comes from `get_xform_media_question_xpaths()`
queryset = Attachment.objects.filter(instance=instance).exclude(
Expand All @@ -786,6 +785,12 @@ def get_soft_deleted_attachments(instance: Instance) -> list[Attachment]:
| Q(media_file_basename__regex=r'^\d{10,}\.(m4a|amr)$')
)
soft_deleted_attachments = list(queryset.all())

# The query below updates only the database records, not the in-memory
# `Attachment` objects.
# As a result, the `deleted_at` attribute of `Attachment` objects remains `None`
# in memory after the update.
# This behavior is necessary to allow the signal to handle file deletion from storage.
queryset.update(deleted_at=dj_timezone.now())

return soft_deleted_attachments
Expand Down
2 changes: 1 addition & 1 deletion kpi/deployment_backends/openrosa_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -1295,7 +1295,7 @@ def transfer_counters_ownership(self, new_owner: 'kobo_auth.User'):
attachment_storage_bytes=F('attachment_storage_bytes')
- self.xform.attachment_storage_bytes
)
UserProfile.objects.filter(user_id=self.asset.owner.pk).update(
UserProfile.objects.filter(user_id=new_owner.pk).update(
attachment_storage_bytes=F('attachment_storage_bytes')
+ self.xform.attachment_storage_bytes
)
Expand Down

0 comments on commit e0c7df0

Please sign in to comment.