diff --git a/openedx_learning/apps/authoring/backup_restore/toml.py b/openedx_learning/apps/authoring/backup_restore/toml.py index c96c786cb..348658f39 100644 --- a/openedx_learning/apps/authoring/backup_restore/toml.py +++ b/openedx_learning/apps/authoring/backup_restore/toml.py @@ -12,7 +12,7 @@ from openedx_learning.apps.authoring.publishing.models.learning_package import LearningPackage -def toml_learning_package(learning_package: LearningPackage) -> str: +def toml_learning_package(learning_package: LearningPackage, timestamp: datetime) -> str: """ Create a TOML representation of the learning package. @@ -27,7 +27,7 @@ def toml_learning_package(learning_package: LearningPackage) -> str: updated = 2025-09-03T17:50:59.536190Z """ doc = tomlkit.document() - doc.add(tomlkit.comment(f"Datetime of the export: {datetime.now()}")) + doc.add(tomlkit.comment(f"Datetime of the export: {timestamp}")) section = tomlkit.table() section.add("title", learning_package.title) section.add("key", learning_package.key) @@ -40,6 +40,8 @@ def toml_learning_package(learning_package: LearningPackage) -> str: def _get_toml_publishable_entity_table( entity: PublishableEntity, + draft_version: PublishableEntityVersion | None, + published_version: PublishableEntityVersion | None, include_versions: bool = True) -> tomlkit.items.Table: """ Create a TOML representation of a publishable entity. @@ -64,28 +66,30 @@ def _get_toml_publishable_entity_table( entity_table.add("can_stand_alone", entity.can_stand_alone) # Add key since the toml filename doesn't show the real key entity_table.add("key", entity.key) + entity_table.add("created", entity.created) if not include_versions: return entity_table - current_draft_version = publishing_api.get_draft_version(entity) - current_published_version = publishing_api.get_published_version(entity) - - if current_draft_version: + if draft_version: draft_table = tomlkit.table() - draft_table.add("version_num", current_draft_version.version_num) + draft_table.add("version_num", draft_version.version_num) entity_table.add("draft", draft_table) published_table = tomlkit.table() - if current_published_version: - published_table.add("version_num", current_published_version.version_num) + if published_version: + published_table.add("version_num", published_version.version_num) else: published_table.add(tomlkit.comment("unpublished: no published_version_num")) entity_table.add("published", published_table) return entity_table -def toml_publishable_entity(entity: PublishableEntity, versions_to_write: list[PublishableEntityVersion]) -> str: +def toml_publishable_entity( + entity: PublishableEntity, + versions_to_write: list[PublishableEntityVersion], + draft_version: PublishableEntityVersion | None, + published_version: PublishableEntityVersion | None) -> str: """ Create a TOML representation of a publishable entity and its versions. @@ -114,7 +118,7 @@ def toml_publishable_entity(entity: PublishableEntity, versions_to_write: list[P [version.container.unit] graded = true """ - entity_table = _get_toml_publishable_entity_table(entity) + entity_table = _get_toml_publishable_entity_table(entity, draft_version, published_version) doc = tomlkit.document() doc.add("entity", entity_table) doc.add(tomlkit.nl()) @@ -152,16 +156,22 @@ def toml_publishable_entity_version(version: PublishableEntityVersion) -> tomlki version_table.add("title", version.title) version_table.add("uuid", str(version.uuid)) version_table.add("version_num", version.version_num) + container_table = tomlkit.table() - container_table.add("children", []) + + children = [] + if hasattr(version, 'containerversion'): + children = publishing_api.get_container_children_entities_keys(version.containerversion) + container_table.add("children", children) + unit_table = tomlkit.table() - unit_table.add("graded", True) + container_table.add("unit", unit_table) version_table.add("container", container_table) return version_table # For use in AoT -def toml_collection(collection: Collection) -> str: +def toml_collection(collection: Collection, entity_keys: list[str]) -> str: """ Create a TOML representation of a collection. @@ -178,7 +188,6 @@ def toml_collection(collection: Collection) -> str: """ doc = tomlkit.document() - entity_keys = collection.entities.order_by("key").values_list("key", flat=True) entities_array = tomlkit.array() entities_array.extend(entity_keys) entities_array.multiline(True) diff --git a/openedx_learning/apps/authoring/backup_restore/zipper.py b/openedx_learning/apps/authoring/backup_restore/zipper.py index 2cc61ca8d..dc71ca1ae 100644 --- a/openedx_learning/apps/authoring/backup_restore/zipper.py +++ b/openedx_learning/apps/authoring/backup_restore/zipper.py @@ -4,8 +4,9 @@ """ import hashlib import zipfile +from datetime import datetime, timezone from pathlib import Path -from typing import List, Optional +from typing import List, Optional, Tuple from django.db.models import Prefetch, QuerySet from django.utils.text import slugify @@ -64,18 +65,69 @@ class LearningPackageZipper: def __init__(self, learning_package: LearningPackage): self.learning_package = learning_package self.folders_already_created: set[Path] = set() - - def create_folder(self, folder_path: Path, zip_file: zipfile.ZipFile) -> None: + self.entities_filenames_already_created: set[str] = set() + self.utc_now = datetime.now(tz=timezone.utc) + + def _ensure_parent_folders( + self, + zip_file: zipfile.ZipFile, + path: Path, + timestamp: datetime, + ) -> None: """ - Create a folder for the zip file structure. - Skips creating the folder if it already exists based on the folder path. - Args: - folder_path (Path): The path of the folder to create. + Ensure all parent folders for the given path exist in the zip. + """ + for parent in path.parents[::-1]: + if parent != Path(".") and parent not in self.folders_already_created: + folder_info = zipfile.ZipInfo(str(parent) + "/") + folder_info.date_time = timestamp.timetuple()[:6] + zip_file.writestr(folder_info, "") + self.folders_already_created.add(parent) + + def add_folder_to_zip( + self, + zip_file: zipfile.ZipFile, + folder: Path, + timestamp: datetime | None = None, + ) -> None: + """ + Explicitly add an empty folder into the zip structure. """ - if folder_path not in self.folders_already_created: - zip_info = zipfile.ZipInfo(str(folder_path) + "/") - zip_file.writestr(zip_info, "") # Add explicit empty directory entry - self.folders_already_created.add(folder_path) + if folder in self.folders_already_created: + return + + if timestamp is None: + timestamp = self.utc_now + + self._ensure_parent_folders(zip_file, folder, timestamp) + + folder_info = zipfile.ZipInfo(str(folder) + "/") + folder_info.date_time = timestamp.timetuple()[:6] + zip_file.writestr(folder_info, "") + self.folders_already_created.add(folder) + + def add_file_to_zip( + self, + zip_file: zipfile.ZipFile, + file_path: Path, + content: bytes | str | None = None, + timestamp: datetime | None = None, + ) -> None: + """ + Add a file into the zip structure. + """ + if timestamp is None: + timestamp = self.utc_now + + self._ensure_parent_folders(zip_file, file_path, timestamp) + + file_info = zipfile.ZipInfo(str(file_path)) + file_info.date_time = timestamp.timetuple()[:6] + + if isinstance(content, str): + content = content.encode("utf-8") + + zip_file.writestr(file_info, content or b"") def get_publishable_entities(self) -> QuerySet[PublishableEntity]: """ @@ -108,6 +160,7 @@ def get_publishable_entities(self) -> QuerySet[PublishableEntity]: to_attr="prefetched_contents", ), ) + .order_by("key") ) def get_collections(self) -> QuerySet[Collection]: @@ -119,10 +172,20 @@ def get_collections(self) -> QuerySet[Collection]: .prefetch_related("entities") ) - def get_versions_to_write(self, entity: PublishableEntity): + def get_versions_to_write( + self, entity: PublishableEntity + ) -> Tuple[List[PublishableEntityVersion], + Optional[PublishableEntityVersion], + Optional[PublishableEntityVersion]]: """ Get the versions of a publishable entity that should be written to the zip file. It retrieves both draft and published versions. + + Returns: + Tuple containing: + - versions_to_write: List of PublishableEntityVersion to write. + - draft_version: The current draft version, if any. + - published_version: The current published version, if any. """ draft_version: Optional[PublishableEntityVersion] = publishing_api.get_draft_version(entity) published_version: Optional[PublishableEntityVersion] = publishing_api.get_published_version(entity) @@ -131,7 +194,42 @@ def get_versions_to_write(self, entity: PublishableEntity): if published_version and published_version != draft_version: versions_to_write.append(published_version) - return versions_to_write + return versions_to_write, draft_version, published_version + + def get_entity_toml_filename(self, entity_key: str) -> str: + """ + Generate a unique TOML filename for a publishable entity. + Ensures that the filename is unique within the zip file. + + Behavior: + - If the slugified key has not been used yet, use it as the filename. + - If it has been used, append a short hash to ensure uniqueness. + + Args: + entity_key (str): The key of the publishable entity. + + Returns: + str: A unique TOML filename for the entity. + """ + slugify_name = slugify(entity_key, allow_unicode=True) + + if slugify_name in self.entities_filenames_already_created: + filename = slugify_hashed_filename(entity_key) + else: + filename = slugify_name + + self.entities_filenames_already_created.add(slugify_name) + return filename + + def get_latest_modified(self, versions_to_check: List[PublishableEntityVersion]) -> datetime: + """ + Get the latest modification timestamp among the learning package and its entities. + """ + latest = self.learning_package.updated + for version in versions_to_check: + if version and version.created > latest: + latest = version.created + return latest def create_zip(self, path: str) -> None: """ @@ -144,16 +242,16 @@ def create_zip(self, path: str) -> None: with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zipf: # Add the package.toml file - package_toml_content: str = toml_learning_package(self.learning_package) - zipf.writestr(TOML_PACKAGE_NAME, package_toml_content) + package_toml_content: str = toml_learning_package(self.learning_package, self.utc_now) + self.add_file_to_zip(zipf, Path(TOML_PACKAGE_NAME), package_toml_content, self.learning_package.updated) # Add the entities directory entities_folder = Path("entities") - self.create_folder(entities_folder, zipf) + self.add_folder_to_zip(zipf, entities_folder, timestamp=self.learning_package.updated) # Add the collections directory collections_folder = Path("collections") - self.create_folder(collections_folder, zipf) + self.add_folder_to_zip(zipf, collections_folder, timestamp=self.learning_package.updated) # ------ ENTITIES SERIALIZATION ------------- @@ -163,17 +261,21 @@ def create_zip(self, path: str) -> None: for entity in publishable_entities: # entity: PublishableEntity = entity # Type hint for clarity - # Get the draft and published versions - versions_to_write: List[PublishableEntityVersion] = self.get_versions_to_write(entity) + # Get the versions to serialize for this entity + versions_to_write, draft_version, published_version = self.get_versions_to_write(entity) + + latest_modified = self.get_latest_modified(versions_to_write) # Create a TOML representation of the entity - entity_toml_content: str = toml_publishable_entity(entity, versions_to_write) + entity_toml_content: str = toml_publishable_entity( + entity, versions_to_write, draft_version, published_version + ) if hasattr(entity, 'container'): - entity_slugify_hash = slugify_hashed_filename(entity.key) - entity_toml_filename = f"{entity_slugify_hash}.toml" + entity_filename = self.get_entity_toml_filename(entity.key) + entity_toml_filename = f"{entity_filename}.toml" entity_toml_path = entities_folder / entity_toml_filename - zipf.writestr(str(entity_toml_path), entity_toml_content) + self.add_file_to_zip(zipf, entity_toml_path, entity_toml_content, timestamp=latest_modified) if hasattr(entity, 'component'): # Create the component folder structure for the entity. The structure is as follows: @@ -186,35 +288,31 @@ def create_zip(self, path: str) -> None: # v1/ # static/ - # Generate the slugified hash for the component local key - # Example: if the local key is "my_component", the slugified hash might be "my_component_123456" - # It's a combination of the local key and a hash and should be unique - entity_slugify_hash = slugify_hashed_filename(entity.component.local_key) + entity_filename = self.get_entity_toml_filename(entity.component.local_key) - # Create the component namespace folder - # Example of component namespace is: "entities/xblock.v1/" - component_namespace_folder = entities_folder / entity.component.component_type.namespace - self.create_folder(component_namespace_folder, zipf) + component_root_folder = ( + # Example: "entities/xblock.v1/html/" + entities_folder + / entity.component.component_type.namespace + / entity.component.component_type.name + ) - # Create the component type folder - # Example of component type is: "entities/xblock.v1/html/" - component_type_folder = component_namespace_folder / entity.component.component_type.name - self.create_folder(component_type_folder, zipf) + component_folder = ( + # Example: "entities/xblock.v1/html/my_component_123456/" + component_root_folder + / entity_filename + ) - # Create the component id folder - # Example of component id is: "entities/xblock.v1/html/my_component_123456/" - component_id_folder = component_type_folder / entity_slugify_hash - self.create_folder(component_id_folder, zipf) + component_version_folder = ( + # Example: "entities/xblock.v1/html/my_component_123456/component_versions/" + component_folder + / "component_versions" + ) # Add the entity TOML file inside the component type folder as well # Example: "entities/xblock.v1/html/my_component_123456.toml" - component_entity_toml_path = component_type_folder / f"{entity_slugify_hash}.toml" - zipf.writestr(str(component_entity_toml_path), entity_toml_content) - - # Add component version folder into the component id folder - # Example: "entities/xblock.v1/html/my_component_123456/component_versions/" - component_version_folder = component_id_folder / "component_versions" - self.create_folder(component_version_folder, zipf) + component_entity_toml_path = component_root_folder / f"{entity_filename}.toml" + self.add_file_to_zip(zipf, component_entity_toml_path, entity_toml_content, latest_modified) # ------ COMPONENT VERSIONING ------------- # Focusing on draft and published versions only @@ -222,11 +320,11 @@ def create_zip(self, path: str) -> None: # Create a folder for the version version_number = f"v{version.version_num}" version_folder = component_version_folder / version_number - self.create_folder(version_folder, zipf) + self.add_folder_to_zip(zipf, version_folder, timestamp=version.created) # Add static folder for the version static_folder = version_folder / "static" - self.create_folder(static_folder, zipf) + self.add_folder_to_zip(zipf, static_folder, timestamp=version.created) # ------ COMPONENT STATIC CONTENT ------------- component_version: ComponentVersion = version.componentversion @@ -253,12 +351,18 @@ def create_zip(self, path: str) -> None: else: # If no file and no text, we skip this content continue - zipf.writestr(str(file_path), file_data) + self.add_file_to_zip(zipf, file_path, file_data, timestamp=content.created) # ------ COLLECTION SERIALIZATION ------------- collections = self.get_collections() for collection in collections: - collection_hash_slug = slugify_hashed_filename(collection.key) + collection_hash_slug = self.get_entity_toml_filename(collection.key) collection_toml_file_path = collections_folder / f"{collection_hash_slug}.toml" - zipf.writestr(str(collection_toml_file_path), toml_collection(collection)) + entity_keys_related = collection.entities.order_by("key").values_list("key", flat=True) + self.add_file_to_zip( + zipf, + collection_toml_file_path, + toml_collection(collection, list(entity_keys_related)), + timestamp=collection.modified, + ) diff --git a/openedx_learning/apps/authoring/publishing/api.py b/openedx_learning/apps/authoring/publishing/api.py index 68183c64f..557cf45e6 100644 --- a/openedx_learning/apps/authoring/publishing/api.py +++ b/openedx_learning/apps/authoring/publishing/api.py @@ -89,6 +89,7 @@ "get_containers_with_entity", "get_container_children_count", "bulk_draft_changes_for", + "get_container_children_entities_keys", ] @@ -1468,6 +1469,22 @@ def get_container_children_count( return container_version.entity_list.entitylistrow_set.filter(**filter_deleted).count() +def get_container_children_entities_keys(container_version: ContainerVersion) -> list[str]: + """ + Fetch the list of entity keys for all entities in the given container version. + + Args: + container_version: The ContainerVersion to fetch the entity keys for. + Returns: + A list of entity keys for all entities in the container version, ordered by entity key. + """ + return list( + container_version.entity_list.entitylistrow_set + .values_list("entity__key", flat=True) + .order_by("order_num") + ) + + def bulk_draft_changes_for( learning_package_id: int, changed_by: int | None = None, diff --git a/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py b/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py index c7f144841..511aa54e6 100644 --- a/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py +++ b/tests/openedx_learning/apps/authoring/backup_restore/test_backup.py @@ -30,6 +30,7 @@ class LpDumpCommandTestCase(TestCase): html_type: str problem_type: str published_component: Component + published_component2: Component draft_component: Component html_asset_content: Content collection: Collection @@ -72,6 +73,17 @@ def setUpTestData(cls): created_by=cls.user.id, ) + # Make and publish one Component + # Same local key as above to test uniqueness of slugified hash + cls.published_component2, _ = api.create_component_and_version( + cls.learning_package.id, + cls.problem_type, + local_key="My_published_example", + title="My published problem 2", + created=cls.now, + created_by=cls.user.id, + ) + # Create a Content entry for the published Component api.publish_all_drafts( cls.learning_package.id, @@ -124,7 +136,7 @@ def setUpTestData(cls): api.create_component_version_content( new_html_version.pk, cls.html_asset_content.id, - key="static/hello.html", + key="static/other/subdirectory/hello.html", ) components = api.get_publishable_entities(cls.learning_package) @@ -163,7 +175,8 @@ def check_zip_file_structure(self, zip_path: Path): expected_directories = [ "collections/", - "entities/xblock.v1/html/my_draft_example_af06e1/component_versions/v2/static/", + "entities/xblock.v1/html/my_draft_example/component_versions/v2/static/", + "entities/xblock.v1/problem/my_published_example/component_versions/v1/static/", "entities/xblock.v1/problem/my_published_example_386dce/component_versions/v1/static/", "entities/xblock.v1/problem/my_published_example_386dce/component_versions/v2/static/", ] @@ -173,15 +186,16 @@ def check_zip_file_structure(self, zip_path: Path): "package.toml", # Entity TOML files - "entities/xblock.v1/html/my_draft_example_af06e1.toml", + "entities/xblock.v1/html/my_draft_example.toml", + "entities/xblock.v1/problem/my_published_example.toml", "entities/xblock.v1/problem/my_published_example_386dce.toml", # Entity static content files - "entities/xblock.v1/html/my_draft_example_af06e1/component_versions/v2/static/hello.html", + "entities/xblock.v1/html/my_draft_example/component_versions/v2/static/other/subdirectory/hello.html", "entities/xblock.v1/problem/my_published_example_386dce/component_versions/v2/hello.txt", # Collections - "collections/col1_06bb25.toml", + "collections/col1.toml", ] expected_paths = expected_directories + expected_files @@ -231,7 +245,7 @@ def test_lp_dump_command(self): '[entity.published]', 'version_num = 1', ], - "entities/xblock.v1/html/my_draft_example_af06e1.toml": [ + "entities/xblock.v1/html/my_draft_example.toml": [ '[entity]', f'uuid = "{self.draft_component.uuid}"', 'can_stand_alone = true', @@ -277,7 +291,7 @@ def test_queries_n_plus_problem(self): entities = zipper.get_publishable_entities() with self.assertNumQueries(3): list(entities) # force evaluation - self.assertEqual(len(entities), 2) + self.assertEqual(len(entities), 3) # Add another component api.create_component_and_version( self.learning_package.id, @@ -290,4 +304,4 @@ def test_queries_n_plus_problem(self): entities = zipper.get_publishable_entities() with self.assertNumQueries(3): list(entities) # force evaluation - self.assertEqual(len(entities), 3) + self.assertEqual(len(entities), 4)