Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

User import and export improvements #3431

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions bookwyrm/importers/bookwyrm_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,21 @@ def process_import(
job = BookwyrmImportJob.objects.create(
user=user, archive_file=archive_file, required=required
)

return job

def create_retry_job(
self, user: User, original_job: BookwyrmImportJob
) -> BookwyrmImportJob:
"""retry items that didn't import"""

job = BookwyrmImportJob.objects.create(
user=user,
archive_file=original_job.archive_file,
required=original_job.required,
retry=True,
)

return job


Expand Down
144 changes: 144 additions & 0 deletions bookwyrm/migrations/0210_userrelationshipimport_and_more.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Generated by Django 4.2.15 on 2024-09-03 11:22

import bookwyrm.models.fields
import django.contrib.postgres.fields
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
("bookwyrm", "0209_user_show_ratings"),
]

operations = [
migrations.CreateModel(
name="UserRelationshipImport",
fields=[
(
"childjob_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="bookwyrm.childjob",
),
),
(
"relationship",
bookwyrm.models.fields.CharField(
choices=[("follow", "Follow"), ("block", "Block")],
max_length=10,
null=True,
),
),
(
"remote_id",
bookwyrm.models.fields.RemoteIdField(
max_length=255,
null=True,
validators=[bookwyrm.models.fields.validate_remote_id],
),
),
("fail_reason", models.TextField(null=True)),
],
options={
"abstract": False,
},
bases=("bookwyrm.childjob",),
),
migrations.RemoveField(
model_name="bookwyrmexportjob",
name="json_completed",
),
migrations.AddField(
model_name="bookwyrmimportjob",
name="retry",
field=models.BooleanField(default=False),
),
migrations.AlterField(
model_name="bookwyrmimportjob",
name="required",
field=django.contrib.postgres.fields.ArrayField(
base_field=bookwyrm.models.fields.CharField(blank=True, max_length=50),
blank=True,
size=None,
),
),
migrations.CreateModel(
name="UserImportPost",
fields=[
(
"childjob_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="bookwyrm.childjob",
),
),
("json", models.JSONField()),
(
"status_type",
bookwyrm.models.fields.CharField(
choices=[
("comment", "Comment"),
("review", "Review"),
("quote", "Quotation"),
],
default="comment",
max_length=10,
null=True,
),
),
("fail_reason", models.TextField(null=True)),
(
"book",
bookwyrm.models.fields.ForeignKey(
on_delete=django.db.models.deletion.PROTECT,
to="bookwyrm.edition",
),
),
],
options={
"abstract": False,
},
bases=("bookwyrm.childjob",),
),
migrations.CreateModel(
name="UserImportBook",
fields=[
(
"childjob_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="bookwyrm.childjob",
),
),
("book_data", models.JSONField()),
("fail_reason", models.TextField(null=True)),
(
"book",
models.ForeignKey(
blank=True,
null=True,
on_delete=django.db.models.deletion.SET_NULL,
to="bookwyrm.book",
),
),
],
options={
"abstract": False,
},
bases=("bookwyrm.childjob",),
),
]
7 changes: 6 additions & 1 deletion bookwyrm/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@
from .group import Group, GroupMember, GroupMemberInvitation

from .import_job import ImportJob, ImportItem
from .bookwyrm_import_job import BookwyrmImportJob
from .bookwyrm_import_job import (
BookwyrmImportJob,
UserImportBook,
UserImportPost,
import_book_task,
)
from .bookwyrm_export_job import BookwyrmExportJob

from .move import MoveUser
Expand Down
68 changes: 29 additions & 39 deletions bookwyrm/models/bookwyrm_export_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from boto3.session import Session as BotoSession
from s3_tar import S3Tar

from django.db.models import BooleanField, FileField, JSONField
from django.db.models import FileField, JSONField
from django.core.serializers.json import DjangoJSONEncoder
from django.core.files.base import ContentFile
from django.core.files.storage import storages
Expand All @@ -17,7 +17,7 @@
from bookwyrm.models import Review, Comment, Quotation
from bookwyrm.models import Edition
from bookwyrm.models import UserFollows, User, UserBlocks
from bookwyrm.models.job import ParentJob
from bookwyrm.models.job import ParentJob, ParentTask
from bookwyrm.tasks import app, IMPORTS
from bookwyrm.utils.tar import BookwyrmTarFile

Expand All @@ -42,38 +42,41 @@ class BookwyrmExportJob(ParentJob):

export_data = FileField(null=True, storage=select_exports_storage)
export_json = JSONField(null=True, encoder=DjangoJSONEncoder)
json_completed = BooleanField(default=False)

def start_job(self):
"""schedule the first task"""

task = create_export_json_task.delay(job_id=self.id)
self.task_id = task.id
self.save(update_fields=["task_id"])
self.set_status("active")
create_export_json_task.delay(job_id=self.id)


@app.task(queue=IMPORTS)
def create_export_json_task(job_id):
@app.task(queue=IMPORTS, base=ParentTask)
def create_export_json_task(**kwargs):
"""create the JSON data for the export"""

job = BookwyrmExportJob.objects.get(id=job_id)

job = BookwyrmExportJob.objects.get(id=kwargs["job_id"])
# don't start the job if it was stopped from the UI
if job.complete:
if job.status == "stopped":
return

try:
job.set_status("active")

# generate JSON structure
job.export_json = export_json(job.user)
# generate JSON
data = export_user(job.user)
data["settings"] = export_settings(job.user)
data["goals"] = export_goals(job.user)
data["books"] = export_books(job.user)
data["saved_lists"] = export_saved_lists(job.user)
data["follows"] = export_follows(job.user)
data["blocks"] = export_blocks(job.user)
job.export_json = data
job.save(update_fields=["export_json"])

# create archive in separate task
# trigger task to create tar file
create_archive_task.delay(job_id=job.id)

except Exception as err: # pylint: disable=broad-except
logger.exception(
"create_export_json_task for %s failed with error: %s", job, err
"create_export_json_task for job %s failed with error: %s", job.id, err
)
job.set_status("failed")

Expand All @@ -94,21 +97,20 @@ def add_file_to_s3_tar(s3_tar: S3Tar, storage, file, directory=""):
)


@app.task(queue=IMPORTS)
def create_archive_task(job_id):
@app.task(queue=IMPORTS, base=ParentTask)
def create_archive_task(**kwargs):
"""create the archive containing the JSON file and additional files"""

job = BookwyrmExportJob.objects.get(id=job_id)
job = BookwyrmExportJob.objects.get(id=kwargs["job_id"])

# don't start the job if it was stopped from the UI
if job.complete:
if job.status == "stopped":
return

try:
export_task_id = str(job.task_id)
archive_filename = f"{export_task_id}.tar.gz"
export_json_bytes = DjangoJSONEncoder().encode(job.export_json).encode("utf-8")

user = job.user
editions = get_books_for_user(user)

Expand Down Expand Up @@ -169,25 +171,15 @@ def create_archive_task(job_id):
tar.add_image(edition.cover, directory="images")
job.save(update_fields=["export_data"])

job.set_status("completed")
job.complete_job()

except Exception as err: # pylint: disable=broad-except
logger.exception("create_archive_task for %s failed with error: %s", job, err)
logger.exception(
"create_archive_task for job %s failed with error: %s", job.id, err
)
job.set_status("failed")


def export_json(user: User):
"""create export JSON"""
data = export_user(user) # in the root of the JSON structure
data["settings"] = export_settings(user)
data["goals"] = export_goals(user)
data["books"] = export_books(user)
data["saved_lists"] = export_saved_lists(user)
data["follows"] = export_follows(user)
data["blocks"] = export_blocks(user)
return data


def export_user(user: User):
"""export user data"""
data = user.to_activity()
Expand Down Expand Up @@ -316,11 +308,9 @@ def export_book(user: User, edition: Edition):
def get_books_for_user(user):
"""
Get all the books and editions related to a user.

We use union() instead of Q objects because it creates
multiple simple queries in stead of a much more complex DB query
multiple simple queries instead of a complex DB query
that can time out.

"""

shelf_eds = Edition.objects.select_related("parent_work").filter(shelves__user=user)
Expand Down
Loading
Loading