openedx · ormsbee · Sep 2, 2025 · Aug 26, 2025
diff --git a/openedx_learning/apps/authoring/backup_restore/zipper.py b/openedx_learning/apps/authoring/backup_restore/zipper.py
@@ -5,18 +5,21 @@
 import hashlib
 import zipfile
 from pathlib import Path
-from typing import Optional
+from typing import List, Optional
 
-from django.db.models import QuerySet
+from django.db.models import Prefetch, QuerySet
 from django.utils.text import slugify
 
-from openedx_learning.apps.authoring.backup_restore.toml import toml_learning_package, toml_publishable_entity
-from openedx_learning.apps.authoring.publishing import api as publishing_api
-from openedx_learning.apps.authoring.publishing.models import (
+from openedx_learning.api.authoring_models import (
+    ComponentVersion,
+    ComponentVersionContent,
+    Content,
     LearningPackage,
     PublishableEntity,
     PublishableEntityVersion,
 )
+from openedx_learning.apps.authoring.backup_restore.toml import toml_learning_package, toml_publishable_entity
+from openedx_learning.apps.authoring.publishing import api as publishing_api
 
 TOML_PACKAGE_NAME = "package.toml"
 
@@ -68,6 +71,53 @@ def create_folder(self, folder_path: Path, zip_file: zipfile.ZipFile) -> None:
             zip_file.writestr(zip_info, "")  # Add explicit empty directory entry
             self.folders_already_created.add(folder_path)
 
+    def get_publishable_entities(self) -> QuerySet[PublishableEntity]:
+        """
+        Retrieve the publishable entities associated with the learning package.
+        Prefetches related data for efficiency.
+        """
+        lp_id = self.learning_package.pk
+        publishable_entities: QuerySet[PublishableEntity] = publishing_api.get_publishable_entities(lp_id)
+        return (
+            publishable_entities
+            .select_related(
+                "container",
+                "component__component_type",
+                "draft__version__componentversion",
+                "published__version__componentversion",
+            )
+            .prefetch_related(
+                # We should re-evaluate the prefetching strategy here,
+                # as the current approach may cause performance issues—
+                # especially with large libraries (up to 100K items),
+                # which is too large for this type of prefetch.
+                Prefetch(
+                    "draft__version__componentversion__componentversioncontent_set",
+                    queryset=ComponentVersionContent.objects.select_related("content"),
+                    to_attr="prefetched_contents",
+                ),
+                Prefetch(
+                    "published__version__componentversion__componentversioncontent_set",
+                    queryset=ComponentVersionContent.objects.select_related("content"),
+                    to_attr="prefetched_contents",
+                ),
+            )
+        )
+
+    def get_versions_to_write(self, entity: PublishableEntity):
+        """
+        Get the versions of a publishable entity that should be written to the zip file.
+        It retrieves both draft and published versions.
+        """
+        draft_version: Optional[PublishableEntityVersion] = publishing_api.get_draft_version(entity)
+        published_version: Optional[PublishableEntityVersion] = publishing_api.get_published_version(entity)
+
+        versions_to_write = [draft_version] if draft_version else []
+
+        if published_version and published_version != draft_version:
+            versions_to_write.append(published_version)
+        return versions_to_write
+
     def create_zip(self, path: str) -> None:
         """
         Creates a zip file containing the learning package data.
@@ -76,11 +126,10 @@ def create_zip(self, path: str) -> None:
         Raises:
             Exception: If the learning package cannot be found or if the zip creation fails.
         """
-        package_toml_content: str = toml_learning_package(self.learning_package)
-        lp_id = self.learning_package.pk
 
         with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zipf:
-            # Add the package.toml string
+            # Add the package.toml file
+            package_toml_content: str = toml_learning_package(self.learning_package)
             zipf.writestr(TOML_PACKAGE_NAME, package_toml_content)
 
             # Add the entities directory
@@ -91,9 +140,11 @@ def create_zip(self, path: str) -> None:
             collections_folder = Path("collections")
             self.create_folder(collections_folder, zipf)
 
-            # Add each entity's TOML file
-            publishable_entities: QuerySet[PublishableEntity] = publishing_api.get_publishable_entities(lp_id)
-            publishable_entities = publishable_entities.select_related("container", "component__component_type")
+            # ------ ENTITIES SERIALIZATION -------------
+
+            # get the publishable entities
+            publishable_entities: QuerySet[PublishableEntity] = self.get_publishable_entities()
+
             for entity in publishable_entities:
                 # entity: PublishableEntity = entity  # Type hint for clarity
 
@@ -116,38 +167,42 @@ def create_zip(self, path: str) -> None:
                     #                 component_versions/
                     #                     v1/
                     #                         static/
+
+                    # Generate the slugified hash for the component local key
+                    # Example: if the local key is "my_component", the slugified hash might be "my_component_123456"
+                    # It's a combination of the local key and a hash and should be unique
                     entity_slugify_hash = slugify_hashed_filename(entity.component.local_key)
+
+                    # Create the component namespace folder
+                    # Example of component namespace is: "entities/xblock.v1/"
                     component_namespace_folder = entities_folder / entity.component.component_type.namespace
-                    # Example of component namespace is: "xblock.v1"
                     self.create_folder(component_namespace_folder, zipf)
 
+                    # Create the component type folder
+                    # Example of component type is: "entities/xblock.v1/html/"
                     component_type_folder = component_namespace_folder / entity.component.component_type.name
-                    # Example of component type is: "html"
                     self.create_folder(component_type_folder, zipf)
 
+                    # Create the component id folder
+                    # Example of component id is: "entities/xblock.v1/html/my_component_123456/"
                     component_id_folder = component_type_folder / entity_slugify_hash
-                    # Example of component id is: "i-dont-like-the-sidebar-aa1645ade4a7"
                     self.create_folder(component_id_folder, zipf)
 
                     # Add the entity TOML file inside the component type folder as well
+                    # Example: "entities/xblock.v1/html/my_component_123456.toml"
                     component_entity_toml_path = component_type_folder / f"{entity_slugify_hash}.toml"
                     zipf.writestr(str(component_entity_toml_path), entity_toml_content)
 
                     # Add component version folder into the component id folder
+                    # Example: "entities/xblock.v1/html/my_component_123456/component_versions/"
                     component_version_folder = component_id_folder / "component_versions"
                     self.create_folder(component_version_folder, zipf)
 
                     # ------ COMPONENT VERSIONING -------------
                     # Focusing on draft and published versions
 
                     # Get the draft and published versions
-                    draft_version: Optional[PublishableEntityVersion] = publishing_api.get_draft_version(entity)
-                    published_version: Optional[PublishableEntityVersion] = publishing_api.get_published_version(entity)
-
-                    versions_to_write = [draft_version] if draft_version else []
-
-                    if published_version and published_version != draft_version:
-                        versions_to_write.append(published_version)
+                    versions_to_write: List[PublishableEntityVersion] = self.get_versions_to_write(entity)
 
                     for version in versions_to_write:
                         # Create a folder for the version
@@ -158,3 +213,30 @@ def create_zip(self, path: str) -> None:
                         # Add static folder for the version
                         static_folder = version_folder / "static"
                         self.create_folder(static_folder, zipf)
+
+                        # ------ COMPONENT STATIC CONTENT -------------
+                        component_version: ComponentVersion = version.componentversion
+
+                        # Get content data associated with this version
+                        contents: QuerySet[
+                            ComponentVersionContent
+                        ] = component_version.prefetched_contents  # type: ignore[attr-defined]
+
+                        for component_version_content in contents:
+                            content: Content = component_version_content.content
+
+                            # Important: The component_version_content.key contains implicitly
+                            # the file name and the file extension
+                            file_path = version_folder / component_version_content.key
+
+                            if content.has_file and content.path:
+                                # If has_file, we pull it from the file system
+                                with content.read_file() as f:
+                                    file_data = f.read()
+                            elif not content.has_file and content.text:
+                                # Otherwise, we use the text content as the file data
+                                file_data = content.text
+                            else:
+                                # If no file and no text, we skip this content
+                                continue
+                            zipf.writestr(str(file_path), file_data)
diff --git a/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py b/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py
@@ -12,14 +12,15 @@
 
 from openedx_learning.api import authoring as api
 from openedx_learning.api.authoring_models import Component, LearningPackage
+from openedx_learning.apps.authoring.backup_restore.zipper import LearningPackageZipper
 from openedx_learning.lib.test_utils import TestCase
 
 User = get_user_model()
 
 
 class LpDumpCommandTestCase(TestCase):
     """
-    Test serving static assets (Content files, via Component lookup).
+    Test the lp_dump management command.
     """
 
     learning_package: LearningPackage
@@ -28,7 +29,7 @@ class LpDumpCommandTestCase(TestCase):
     @classmethod
     def setUpTestData(cls):
         """
-        Initialize our content data
+        Initialize data for the whole TestCase
         """
 
         # Create a user for the test
@@ -46,6 +47,8 @@ def setUpTestData(cls):
         cls.now = datetime(2024, 8, 5, tzinfo=timezone.utc)
 
         cls.xblock_v1_namespace = "xblock.v1"
+        html_media_type = api.get_or_create_media_type("text/html")
+        text_media_type = api.get_or_create_media_type("text/plain")
 
         # Create component types
         cls.html_type = api.get_or_create_component_type(cls.xblock_v1_namespace, "html")
@@ -68,13 +71,25 @@ def setUpTestData(cls):
             published_at=cls.now,
         )
 
-        api.create_next_component_version(
+        new_problem_version = api.create_next_component_version(
             cls.published_component.pk,
             title="My published problem draft v2",
             content_to_replace={},
             created=cls.now,
         )
 
+        new_txt_content = api.get_or_create_text_content(
+            cls.learning_package.pk,
+            text_media_type.id,
+            text="This is some data",
+            created=cls.now,
+        )
+        api.create_component_version_content(
+            new_problem_version.pk,
+            new_txt_content.pk,
+            key="hello.txt",
+        )
+
         # Create a Draft component, one in each learning package
         cls.draft_component, _ = api.create_component_and_version(
             cls.learning_package.id,
@@ -85,13 +100,25 @@ def setUpTestData(cls):
             created_by=cls.user.id,
         )
 
-        api.create_next_component_version(
+        new_html_version = api.create_next_component_version(
             cls.draft_component.pk,
             title="My draft html v2",
             content_to_replace={},
             created=cls.now,
         )
 
+        cls.html_asset_content = api.get_or_create_file_content(
+            cls.learning_package.id,
+            html_media_type.id,
+            data=b"<html>hello world!</html>",
+            created=cls.now,
+        )
+        api.create_component_version_content(
+            new_html_version.pk,
+            cls.html_asset_content.id,
+            key="static/hello.html",
+        )
+
         components = api.get_publishable_entities(cls.learning_package)
         cls.all_components = components
 
@@ -111,25 +138,29 @@ def check_zip_file_structure(self, zip_path: Path):
         """
         with zipfile.ZipFile(zip_path, "r") as zip_file:
             # Check that the zip file contains the expected files
-            expected_paths = [
-                "package.toml",
-                "entities/",
+
+            expected_directories = [
                 "collections/",
-                "entities/xblock.v1/",
-                "entities/xblock.v1/html/",
-                "entities/xblock.v1/html/my_draft_example_af06e1.toml",
-                "entities/xblock.v1/html/my_draft_example_af06e1/",
-                "entities/xblock.v1/html/my_draft_example_af06e1/component_versions/",
-                "entities/xblock.v1/html/my_draft_example_af06e1/component_versions/v2/",
                 "entities/xblock.v1/html/my_draft_example_af06e1/component_versions/v2/static/",
-                "entities/xblock.v1/problem/",
-                "entities/xblock.v1/problem/my_published_example_386dce/",
-                "entities/xblock.v1/problem/my_published_example_386dce.toml",
-                "entities/xblock.v1/problem/my_published_example_386dce/component_versions/",
-                "entities/xblock.v1/problem/my_published_example_386dce/component_versions/v1/",
                 "entities/xblock.v1/problem/my_published_example_386dce/component_versions/v1/static/",
+                "entities/xblock.v1/problem/my_published_example_386dce/component_versions/v2/static/",
             ]
 
+            expected_files = [
+                # Learning package files
+                "package.toml",
+
+                # Entity TOML files
+                "entities/xblock.v1/html/my_draft_example_af06e1.toml",
+                "entities/xblock.v1/problem/my_published_example_386dce.toml",
+
+                # Entity static content files
+                "entities/xblock.v1/html/my_draft_example_af06e1/component_versions/v2/static/hello.html",
+                "entities/xblock.v1/problem/my_published_example_386dce/component_versions/v2/hello.txt",
+            ]
+
+            expected_paths = expected_directories + expected_files
+
             # Check that all expected paths are present
             zip_name_list = zip_file.namelist()
             for expected_path in expected_paths:
@@ -207,3 +238,31 @@ def test_dump_nonexistent_learning_package(self):
             # Attempt to dump a learning package that does not exist
             call_command("lp_dump", lp_key, file_name, stdout=out)
             self.assertIn("Learning package 'nonexistent_lp' does not exist", out.getvalue())
+
+    def test_queries_n_plus_problem(self):
+        """
+        Test n plus problem over LearningPackageZipper for performance.
+        Regardless of the number of entities, the query count should remain on 3
+        Why?
+            1 query for PublishableEntity + select_related joins
+            1 query for all draft contents
+            1 query for all published contents
+        """
+        zipper = LearningPackageZipper(self.learning_package)
+        entities = zipper.get_publishable_entities()
+        with self.assertNumQueries(3):
+            list(entities)  # force evaluation
+            self.assertEqual(len(entities), 2)
+        # Add another component
+        api.create_component_and_version(
+            self.learning_package.id,
+            self.problem_type,
+            local_key="my_published_example2",
+            title="My published problem 2",
+            created=self.now,
+            created_by=self.user.id,
+        )
+        entities = zipper.get_publishable_entities()
+        with self.assertNumQueries(3):
+            list(entities)  # force evaluation
+            self.assertEqual(len(entities), 3)