diff --git a/openedx_learning/apps/authoring/backup_restore/serializers.py b/openedx_learning/apps/authoring/backup_restore/serializers.py new file mode 100644 index 000000000..97a0b9c21 --- /dev/null +++ b/openedx_learning/apps/authoring/backup_restore/serializers.py @@ -0,0 +1,42 @@ +""" +The serializers module for restoration of authoring data. +""" +from rest_framework import serializers + +from openedx_learning.apps.authoring.components import api as components_api + + +class ComponentSerializer(serializers.Serializer): # pylint: disable=abstract-method + """ + Serializer for components. + Contains logic to convert entity_key to component_type and local_key. + """ + can_stand_alone = serializers.BooleanField(required=True) + key = serializers.CharField(required=True) + created = serializers.DateTimeField(required=True) + created_by = serializers.CharField(required=True, allow_null=True) + + def validate(self, attrs): + """ + Custom validation logic: + parse the entity_key into (component_type, local_key). + """ + entity_key = attrs["key"] + try: + component_type_obj, local_key = components_api.get_or_create_component_type_by_entity_key(entity_key) + attrs["component_type"] = component_type_obj + attrs["local_key"] = local_key + except ValueError as exc: + raise serializers.ValidationError({"key": str(exc)}) + return attrs + + +class ComponentVersionSerializer(serializers.Serializer): # pylint: disable=abstract-method + """ + Serializer for component versions. + """ + title = serializers.CharField(required=True) + entity_key = serializers.CharField(required=True) + created = serializers.DateTimeField(required=True) + created_by = serializers.CharField(required=True, allow_null=True) + content_to_replace = serializers.DictField(child=serializers.CharField(), required=True) diff --git a/openedx_learning/apps/authoring/backup_restore/toml.py b/openedx_learning/apps/authoring/backup_restore/toml.py index 803a35b2e..fcef12254 100644 --- a/openedx_learning/apps/authoring/backup_restore/toml.py +++ b/openedx_learning/apps/authoring/backup_restore/toml.py @@ -117,7 +117,6 @@ def toml_publishable_entity( children = [] [version.container.unit] - graded = true """ entity_table = _get_toml_publishable_entity_table(entity, draft_version, published_version) doc = tomlkit.document() @@ -219,3 +218,21 @@ def parse_learning_package_toml(content: str) -> dict: if "key" not in lp_data["learning_package"]: raise ValueError("Invalid learning package TOML: missing 'key' in 'learning_package' section") return lp_data["learning_package"] + + +def parse_publishable_entity_toml(content: str) -> tuple[Dict[str, Any], list]: + """ + Parse the publishable entity TOML file and return a dict of its fields. + """ + pe_data: Dict[str, Any] = tomlkit.parse(content) + + # Validate the minimum required fields + if "entity" not in pe_data: + raise ValueError("Invalid publishable entity TOML: missing 'entity' section") + if "version" not in pe_data: + raise ValueError("Invalid publishable entity TOML: missing 'version' section") + if "key" not in pe_data["entity"]: + raise ValueError("Invalid publishable entity TOML: missing 'key' field") + if "can_stand_alone" not in pe_data["entity"]: + raise ValueError("Invalid publishable entity TOML: missing 'can_stand_alone' field") + return pe_data["entity"], pe_data.get("version", []) diff --git a/openedx_learning/apps/authoring/backup_restore/zipper.py b/openedx_learning/apps/authoring/backup_restore/zipper.py index 8486605a2..2bd66ce63 100644 --- a/openedx_learning/apps/authoring/backup_restore/zipper.py +++ b/openedx_learning/apps/authoring/backup_restore/zipper.py @@ -6,7 +6,7 @@ import zipfile from datetime import datetime, timezone from pathlib import Path -from typing import Any, List, Optional, Tuple +from typing import Any, List, Optional, Tuple, TypedDict from django.db import transaction from django.db.models import Prefetch, QuerySet @@ -14,6 +14,7 @@ from openedx_learning.api.authoring_models import ( Collection, + ComponentType, ComponentVersion, ComponentVersionContent, Content, @@ -21,18 +22,27 @@ PublishableEntity, PublishableEntityVersion, ) +from openedx_learning.apps.authoring.backup_restore.serializers import ComponentSerializer, ComponentVersionSerializer from openedx_learning.apps.authoring.backup_restore.toml import ( parse_learning_package_toml, + parse_publishable_entity_toml, toml_collection, toml_learning_package, toml_publishable_entity, ) from openedx_learning.apps.authoring.collections import api as collections_api +from openedx_learning.apps.authoring.components import api as components_api from openedx_learning.apps.authoring.publishing import api as publishing_api TOML_PACKAGE_NAME = "package.toml" +class ComponentDefaults(TypedDict): + content_to_replace: dict[str, int | bytes | None] + created: datetime + created_by: Optional[int] + + def slugify_hashed_filename(identifier: str) -> str: """ Generate a filesystem-safe filename from an identifier. @@ -386,6 +396,8 @@ class LearningPackageUnzipper: def __init__(self) -> None: self.utc_now: datetime = datetime.now(tz=timezone.utc) + self.component_types_cache: dict[Tuple[str, str], ComponentType] = {} + self.errors: list[dict[str, Any]] = [] # -------------------------- # Public API @@ -451,9 +463,98 @@ def _restore_containers( def _restore_components( self, zipf: zipfile.ZipFile, component_files: List[str], learning_package: LearningPackage ) -> None: - """Restore components from the zip archive.""" - for component_file in component_files: - self._load_component(zipf, component_file, learning_package) + """ + Restore components and their versions from the zip archive. + This method validates all components and their versions before persisting any data. + If any validation errors occur, no data is persisted and errors are collected. + """ + + validated_components = [] + validated_drafts = [] + validated_published = [] + + for file in component_files: + if not file.endswith(".toml"): + # Skip non-TOML files + continue + + # Load component data from the TOML file + component_data, draft_version, published_version = self._load_component_data(zipf, file) + + # Validate component data + component_serializer = ComponentSerializer(data={ + "created": self.utc_now, + "created_by": None, + **component_data, + }) + if not component_serializer.is_valid(): + # Collect errors and continue + self.errors.append({"file": file, "errors": component_serializer.errors}) + continue + # Collect component validated data + validated_components.append(component_serializer.validated_data) + + # Load and validate versions + valid_versions = self._validate_versions( + component_serializer.validated_data, + draft_version, + published_version + ) + if valid_versions["draft"]: + validated_drafts.append(valid_versions["draft"]) + if valid_versions["published"]: + validated_published.append(valid_versions["published"]) + + if self.errors: + return + + # Persist all validated components and their versions if there are no errors + self._persist_components(learning_package, validated_components, validated_drafts, validated_published) + + def _persist_components( + self, + learning_package: LearningPackage, + validated_components: List[dict[str, Any]], + validated_drafts: List[dict[str, Any]], + validated_published: List[dict[str, Any]], + ) -> None: + """ + Persist validated components and their versions to the database. + + The operation is performed within a bulk draft changes context to save + only one transaction on Draft Change Log. + """ + components_by_key = {} # Map entity_key to Component instance + # Step 1: + # Create components and their publishable entities + # Create all published versions as a draft first + # Publish all drafts + with publishing_api.bulk_draft_changes_for(learning_package.id): + for valid_component in validated_components: + entity_key = valid_component.pop("key") + component = components_api.create_component( + learning_package.id, + **valid_component, + ) + components_by_key[entity_key] = component + + for valid_draft in validated_published: + entity_key = valid_draft.pop("entity_key") + components_api.create_next_component_version( + components_by_key[entity_key].publishable_entity.id, + **valid_draft + ) + + publishing_api.publish_all_drafts(learning_package.id) + + # Step 2: Create all draft versions + with publishing_api.bulk_draft_changes_for(learning_package.id): + for valid_draft in validated_drafts: + entity_key = valid_draft.pop("entity_key") + components_api.create_next_component_version( + components_by_key[entity_key].publishable_entity.id, + **valid_draft + ) def _restore_collections( self, zipf: zipfile.ZipFile, collection_files: List[str], learning_package: LearningPackage @@ -488,12 +589,12 @@ def _load_container( ) """ - def _load_component( - self, zipf: zipfile.ZipFile, component_file: str, learning_package: LearningPackage - ): # pylint: disable=W0613 - """Load and persist a component (placeholder).""" - # TODO: implement actual parsing - return None + def _load_component_data(self, zipf, component_file): + """Load component data and its versions from a TOML file.""" + content = self._read_file_from_zip(zipf, component_file) + component_data, component_version_data = parse_publishable_entity_toml(content) + draft_version, published_version = self._get_versions_to_write(component_version_data, component_data) + return component_data, draft_version, published_version # -------------------------- # Utilities @@ -529,3 +630,41 @@ def _get_organized_file_list(self, file_paths: List[str]) -> dict[str, Any]: organized["collections"].append(path) return organized + + def _get_versions_to_write( + self, + component_version_data: List[dict[str, Any]], + component_data: dict[str, Any] + ) -> Tuple[Optional[dict[str, Any]], Optional[dict[str, Any]]]: + """Return the draft and published versions to write, based on component data.""" + + draft_version_num = component_data.get("draft", {}).get("version_num") + published_version_num = component_data.get("published", {}).get("version_num") + + # Build lookup by version_num + version_lookup = {v.get("version_num"): v for v in component_version_data} + + return ( + version_lookup.get(draft_version_num) if draft_version_num else None, + version_lookup.get(published_version_num) if published_version_num else None, + ) + + def _validate_versions(self, component_validated_data, draft_version, published_version): + """ Validate draft and published versions using ComponentVersionSerializer.""" + valid_versions = {"draft": None, "published": None} + for label, version in [("draft", draft_version), ("published", published_version)]: + if version is None: + continue + entity_key = component_validated_data["key"] + version_data = { + "entity_key": entity_key, + "content_to_replace": {}, + "created": self.utc_now, + "created_by": None, + **version, + } + serializer = ComponentVersionSerializer(data=version_data) + if not serializer.is_valid(): + self.errors.append(f"Errors in {label} version for {entity_key}: {serializer.errors}") + valid_versions[label] = serializer.validated_data + return valid_versions diff --git a/openedx_learning/apps/authoring/components/api.py b/openedx_learning/apps/authoring/components/api.py index 4af0e360e..a252bd5ed 100644 --- a/openedx_learning/apps/authoring/components/api.py +++ b/openedx_learning/apps/authoring/components/api.py @@ -34,6 +34,7 @@ # to be callable only by other apps in the authoring package. __all__ = [ "get_or_create_component_type", + "get_or_create_component_type_by_entity_key", "create_component", "create_component_version", "create_next_component_version", @@ -73,6 +74,27 @@ def get_or_create_component_type(namespace: str, name: str) -> ComponentType: return component_type +def get_or_create_component_type_by_entity_key(entity_key: str) -> tuple[ComponentType, str]: + """ + Get or create a ComponentType based on a full entity key string. + + The entity key is expected to be in the format + ``"{namespace}:{type_name}:{local_key}"``. This function will parse out the + ``namespace`` and ``type_name`` parts and use those to get or create the + ComponentType. + + Raises ValueError if the entity_key is not in the expected format. + """ + try: + namespace, type_name, local_key = entity_key.split(':', 2) + except ValueError as exc: + raise ValueError( + f"Invalid entity_key format: {entity_key!r}. " + "Expected format: '{namespace}:{type_name}:{local_key}'" + ) from exc + return get_or_create_component_type(namespace, type_name), local_key + + def create_component( learning_package_id: int, /, diff --git a/tests/openedx_learning/apps/authoring/components/test_api.py b/tests/openedx_learning/apps/authoring/components/test_api.py index 6f1656ec9..69a87044e 100644 --- a/tests/openedx_learning/apps/authoring/components/test_api.py +++ b/tests/openedx_learning/apps/authoring/components/test_api.py @@ -605,3 +605,38 @@ def setUpTestData(cls) -> None: username="user", email="user@example.com", ) + + +class TestComponentTypeUtils(TestCase): + """ + Test the component type utility functions. + """ + + def test_get_or_create_component_type_by_entity_key_creates_new(self): + comp_type, local_key = components_api.get_or_create_component_type_by_entity_key( + "video:youtube:abcd1234" + ) + + assert isinstance(comp_type, ComponentType) + assert comp_type.namespace == "video" + assert comp_type.name == "youtube" + assert local_key == "abcd1234" + assert ComponentType.objects.count() == 1 + + def test_get_or_create_component_type_by_entity_key_existing(self): + ComponentType.objects.create(namespace="video", name="youtube") + + comp_type, local_key = components_api.get_or_create_component_type_by_entity_key( + "video:youtube:efgh5678" + ) + + assert comp_type.namespace == "video" + assert comp_type.name == "youtube" + assert local_key == "efgh5678" + assert ComponentType.objects.count() == 1 + + def test_get_or_create_component_type_by_entity_key_invalid_format(self): + with self.assertRaises(ValueError) as ctx: + components_api.get_or_create_component_type_by_entity_key("not-enough-parts") + + self.assertIn("Invalid entity_key format", str(ctx.exception))