diff --git a/openedx_learning/apps/authoring/backup_restore/zipper.py b/openedx_learning/apps/authoring/backup_restore/zipper.py index 1443b63f3..acbb738fa 100644 --- a/openedx_learning/apps/authoring/backup_restore/zipper.py +++ b/openedx_learning/apps/authoring/backup_restore/zipper.py @@ -5,18 +5,21 @@ import hashlib import zipfile from pathlib import Path -from typing import Optional +from typing import List, Optional -from django.db.models import QuerySet +from django.db.models import Prefetch, QuerySet from django.utils.text import slugify -from openedx_learning.apps.authoring.backup_restore.toml import toml_learning_package, toml_publishable_entity -from openedx_learning.apps.authoring.publishing import api as publishing_api -from openedx_learning.apps.authoring.publishing.models import ( +from openedx_learning.api.authoring_models import ( + ComponentVersion, + ComponentVersionContent, + Content, LearningPackage, PublishableEntity, PublishableEntityVersion, ) +from openedx_learning.apps.authoring.backup_restore.toml import toml_learning_package, toml_publishable_entity +from openedx_learning.apps.authoring.publishing import api as publishing_api TOML_PACKAGE_NAME = "package.toml" @@ -68,6 +71,53 @@ def create_folder(self, folder_path: Path, zip_file: zipfile.ZipFile) -> None: zip_file.writestr(zip_info, "") # Add explicit empty directory entry self.folders_already_created.add(folder_path) + def get_publishable_entities(self) -> QuerySet[PublishableEntity]: + """ + Retrieve the publishable entities associated with the learning package. + Prefetches related data for efficiency. + """ + lp_id = self.learning_package.pk + publishable_entities: QuerySet[PublishableEntity] = publishing_api.get_publishable_entities(lp_id) + return ( + publishable_entities + .select_related( + "container", + "component__component_type", + "draft__version__componentversion", + "published__version__componentversion", + ) + .prefetch_related( + # We should re-evaluate the prefetching strategy here, + # as the current approach may cause performance issues— + # especially with large libraries (up to 100K items), + # which is too large for this type of prefetch. + Prefetch( + "draft__version__componentversion__componentversioncontent_set", + queryset=ComponentVersionContent.objects.select_related("content"), + to_attr="prefetched_contents", + ), + Prefetch( + "published__version__componentversion__componentversioncontent_set", + queryset=ComponentVersionContent.objects.select_related("content"), + to_attr="prefetched_contents", + ), + ) + ) + + def get_versions_to_write(self, entity: PublishableEntity): + """ + Get the versions of a publishable entity that should be written to the zip file. + It retrieves both draft and published versions. + """ + draft_version: Optional[PublishableEntityVersion] = publishing_api.get_draft_version(entity) + published_version: Optional[PublishableEntityVersion] = publishing_api.get_published_version(entity) + + versions_to_write = [draft_version] if draft_version else [] + + if published_version and published_version != draft_version: + versions_to_write.append(published_version) + return versions_to_write + def create_zip(self, path: str) -> None: """ Creates a zip file containing the learning package data. @@ -76,11 +126,10 @@ def create_zip(self, path: str) -> None: Raises: Exception: If the learning package cannot be found or if the zip creation fails. """ - package_toml_content: str = toml_learning_package(self.learning_package) - lp_id = self.learning_package.pk with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zipf: - # Add the package.toml string + # Add the package.toml file + package_toml_content: str = toml_learning_package(self.learning_package) zipf.writestr(TOML_PACKAGE_NAME, package_toml_content) # Add the entities directory @@ -91,9 +140,11 @@ def create_zip(self, path: str) -> None: collections_folder = Path("collections") self.create_folder(collections_folder, zipf) - # Add each entity's TOML file - publishable_entities: QuerySet[PublishableEntity] = publishing_api.get_publishable_entities(lp_id) - publishable_entities = publishable_entities.select_related("container", "component__component_type") + # ------ ENTITIES SERIALIZATION ------------- + + # get the publishable entities + publishable_entities: QuerySet[PublishableEntity] = self.get_publishable_entities() + for entity in publishable_entities: # entity: PublishableEntity = entity # Type hint for clarity @@ -116,24 +167,34 @@ def create_zip(self, path: str) -> None: # component_versions/ # v1/ # static/ + + # Generate the slugified hash for the component local key + # Example: if the local key is "my_component", the slugified hash might be "my_component_123456" + # It's a combination of the local key and a hash and should be unique entity_slugify_hash = slugify_hashed_filename(entity.component.local_key) + + # Create the component namespace folder + # Example of component namespace is: "entities/xblock.v1/" component_namespace_folder = entities_folder / entity.component.component_type.namespace - # Example of component namespace is: "xblock.v1" self.create_folder(component_namespace_folder, zipf) + # Create the component type folder + # Example of component type is: "entities/xblock.v1/html/" component_type_folder = component_namespace_folder / entity.component.component_type.name - # Example of component type is: "html" self.create_folder(component_type_folder, zipf) + # Create the component id folder + # Example of component id is: "entities/xblock.v1/html/my_component_123456/" component_id_folder = component_type_folder / entity_slugify_hash - # Example of component id is: "i-dont-like-the-sidebar-aa1645ade4a7" self.create_folder(component_id_folder, zipf) # Add the entity TOML file inside the component type folder as well + # Example: "entities/xblock.v1/html/my_component_123456.toml" component_entity_toml_path = component_type_folder / f"{entity_slugify_hash}.toml" zipf.writestr(str(component_entity_toml_path), entity_toml_content) # Add component version folder into the component id folder + # Example: "entities/xblock.v1/html/my_component_123456/component_versions/" component_version_folder = component_id_folder / "component_versions" self.create_folder(component_version_folder, zipf) @@ -141,13 +202,7 @@ def create_zip(self, path: str) -> None: # Focusing on draft and published versions # Get the draft and published versions - draft_version: Optional[PublishableEntityVersion] = publishing_api.get_draft_version(entity) - published_version: Optional[PublishableEntityVersion] = publishing_api.get_published_version(entity) - - versions_to_write = [draft_version] if draft_version else [] - - if published_version and published_version != draft_version: - versions_to_write.append(published_version) + versions_to_write: List[PublishableEntityVersion] = self.get_versions_to_write(entity) for version in versions_to_write: # Create a folder for the version @@ -158,3 +213,30 @@ def create_zip(self, path: str) -> None: # Add static folder for the version static_folder = version_folder / "static" self.create_folder(static_folder, zipf) + + # ------ COMPONENT STATIC CONTENT ------------- + component_version: ComponentVersion = version.componentversion + + # Get content data associated with this version + contents: QuerySet[ + ComponentVersionContent + ] = component_version.prefetched_contents # type: ignore[attr-defined] + + for component_version_content in contents: + content: Content = component_version_content.content + + # Important: The component_version_content.key contains implicitly + # the file name and the file extension + file_path = version_folder / component_version_content.key + + if content.has_file and content.path: + # If has_file, we pull it from the file system + with content.read_file() as f: + file_data = f.read() + elif not content.has_file and content.text: + # Otherwise, we use the text content as the file data + file_data = content.text + else: + # If no file and no text, we skip this content + continue + zipf.writestr(str(file_path), file_data) diff --git a/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py b/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py index b51b3e12a..e338d3766 100644 --- a/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py +++ b/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py @@ -12,6 +12,7 @@ from openedx_learning.api import authoring as api from openedx_learning.api.authoring_models import Component, LearningPackage +from openedx_learning.apps.authoring.backup_restore.zipper import LearningPackageZipper from openedx_learning.lib.test_utils import TestCase User = get_user_model() @@ -19,7 +20,7 @@ class LpDumpCommandTestCase(TestCase): """ - Test serving static assets (Content files, via Component lookup). + Test the lp_dump management command. """ learning_package: LearningPackage @@ -28,7 +29,7 @@ class LpDumpCommandTestCase(TestCase): @classmethod def setUpTestData(cls): """ - Initialize our content data + Initialize data for the whole TestCase """ # Create a user for the test @@ -46,6 +47,8 @@ def setUpTestData(cls): cls.now = datetime(2024, 8, 5, tzinfo=timezone.utc) cls.xblock_v1_namespace = "xblock.v1" + html_media_type = api.get_or_create_media_type("text/html") + text_media_type = api.get_or_create_media_type("text/plain") # Create component types cls.html_type = api.get_or_create_component_type(cls.xblock_v1_namespace, "html") @@ -68,13 +71,25 @@ def setUpTestData(cls): published_at=cls.now, ) - api.create_next_component_version( + new_problem_version = api.create_next_component_version( cls.published_component.pk, title="My published problem draft v2", content_to_replace={}, created=cls.now, ) + new_txt_content = api.get_or_create_text_content( + cls.learning_package.pk, + text_media_type.id, + text="This is some data", + created=cls.now, + ) + api.create_component_version_content( + new_problem_version.pk, + new_txt_content.pk, + key="hello.txt", + ) + # Create a Draft component, one in each learning package cls.draft_component, _ = api.create_component_and_version( cls.learning_package.id, @@ -85,13 +100,25 @@ def setUpTestData(cls): created_by=cls.user.id, ) - api.create_next_component_version( + new_html_version = api.create_next_component_version( cls.draft_component.pk, title="My draft html v2", content_to_replace={}, created=cls.now, ) + cls.html_asset_content = api.get_or_create_file_content( + cls.learning_package.id, + html_media_type.id, + data=b"hello world!", + created=cls.now, + ) + api.create_component_version_content( + new_html_version.pk, + cls.html_asset_content.id, + key="static/hello.html", + ) + components = api.get_publishable_entities(cls.learning_package) cls.all_components = components @@ -111,25 +138,29 @@ def check_zip_file_structure(self, zip_path: Path): """ with zipfile.ZipFile(zip_path, "r") as zip_file: # Check that the zip file contains the expected files - expected_paths = [ - "package.toml", - "entities/", + + expected_directories = [ "collections/", - "entities/xblock.v1/", - "entities/xblock.v1/html/", - "entities/xblock.v1/html/my_draft_example_af06e1.toml", - "entities/xblock.v1/html/my_draft_example_af06e1/", - "entities/xblock.v1/html/my_draft_example_af06e1/component_versions/", - "entities/xblock.v1/html/my_draft_example_af06e1/component_versions/v2/", "entities/xblock.v1/html/my_draft_example_af06e1/component_versions/v2/static/", - "entities/xblock.v1/problem/", - "entities/xblock.v1/problem/my_published_example_386dce/", - "entities/xblock.v1/problem/my_published_example_386dce.toml", - "entities/xblock.v1/problem/my_published_example_386dce/component_versions/", - "entities/xblock.v1/problem/my_published_example_386dce/component_versions/v1/", "entities/xblock.v1/problem/my_published_example_386dce/component_versions/v1/static/", + "entities/xblock.v1/problem/my_published_example_386dce/component_versions/v2/static/", ] + expected_files = [ + # Learning package files + "package.toml", + + # Entity TOML files + "entities/xblock.v1/html/my_draft_example_af06e1.toml", + "entities/xblock.v1/problem/my_published_example_386dce.toml", + + # Entity static content files + "entities/xblock.v1/html/my_draft_example_af06e1/component_versions/v2/static/hello.html", + "entities/xblock.v1/problem/my_published_example_386dce/component_versions/v2/hello.txt", + ] + + expected_paths = expected_directories + expected_files + # Check that all expected paths are present zip_name_list = zip_file.namelist() for expected_path in expected_paths: @@ -207,3 +238,31 @@ def test_dump_nonexistent_learning_package(self): # Attempt to dump a learning package that does not exist call_command("lp_dump", lp_key, file_name, stdout=out) self.assertIn("Learning package 'nonexistent_lp' does not exist", out.getvalue()) + + def test_queries_n_plus_problem(self): + """ + Test n plus problem over LearningPackageZipper for performance. + Regardless of the number of entities, the query count should remain on 3 + Why? + 1 query for PublishableEntity + select_related joins + 1 query for all draft contents + 1 query for all published contents + """ + zipper = LearningPackageZipper(self.learning_package) + entities = zipper.get_publishable_entities() + with self.assertNumQueries(3): + list(entities) # force evaluation + self.assertEqual(len(entities), 2) + # Add another component + api.create_component_and_version( + self.learning_package.id, + self.problem_type, + local_key="my_published_example2", + title="My published problem 2", + created=self.now, + created_by=self.user.id, + ) + entities = zipper.get_publishable_entities() + with self.assertNumQueries(3): + list(entities) # force evaluation + self.assertEqual(len(entities), 3)