Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions openedx_learning/apps/authoring/backup_restore/api.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@
"""
Backup Restore API
"""
from openedx_learning.apps.authoring.backup_restore.zipper import LearningPackageZipper
import zipfile

from openedx_learning.apps.authoring.backup_restore.zipper import LearningPackageUnzipper, LearningPackageZipper
from openedx_learning.apps.authoring.publishing.api import get_learning_package_by_key


def create_zip_file(lp_key: str, path: str) -> None:
"""
Creates a zip file with a toml file so far (WIP)
Creates a dump zip file for the given learning package key at the given path.

Can throw a NotFoundError at get_learning_package_by_key
"""
learning_package = get_learning_package_by_key(lp_key)
LearningPackageZipper(learning_package).create_zip(path)


def load_dump_zip_file(path: str) -> None:
"""
Loads a zip file derived from create_zip_file
"""
with zipfile.ZipFile(path, "r") as zipf:
LearningPackageUnzipper().load(zipf)
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Django management commands to handle backup and restore learning packages (WIP)
Django management commands to handle backup learning packages (WIP)
"""
import logging

Expand All @@ -25,7 +25,7 @@ def add_arguments(self, parser):
def handle(self, *args, **options):
lp_key = options['lp_key']
file_name = options['file_name']
if not file_name.endswith(".zip"):
if not file_name.lower().endswith(".zip"):
raise CommandError("Output file name must end with .zip")
try:
create_zip_file(lp_key, file_name)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""
Django management commands to handle restore learning packages (WIP)
"""
import logging

from django.core.management import CommandError
from django.core.management.base import BaseCommand

from openedx_learning.apps.authoring.backup_restore.api import load_dump_zip_file

logger = logging.getLogger(__name__)


class Command(BaseCommand):
"""
Django management command to load a learning package from a zip file.
"""
help = 'Load a learning package from a zip file.'

def add_arguments(self, parser):
parser.add_argument('file_name', type=str, help='The name of the input zip file to load.')

def handle(self, *args, **options):
file_name = options['file_name']
if not file_name.lower().endswith(".zip"):
raise CommandError("Input file name must end with .zip")
try:
load_dump_zip_file(file_name)
message = f'{file_name} loaded successfully'
self.stdout.write(self.style.SUCCESS(message))
except FileNotFoundError as exc:
message = f"Learning package file {file_name} not found"
raise CommandError(message) from exc
except Exception as e:
message = f"Failed to load '{file_name}': {e}"
logger.exception(
"Failed to load zip file %s ",
file_name,
)
raise CommandError(message) from e
17 changes: 17 additions & 0 deletions openedx_learning/apps/authoring/backup_restore/toml.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

from datetime import datetime
from typing import Any, Dict

import tomlkit

Expand Down Expand Up @@ -202,3 +203,19 @@ def toml_collection(collection: Collection, entity_keys: list[str]) -> str:
doc.add("collection", collection_table)

return tomlkit.dumps(doc)


def parse_learning_package_toml(content: str) -> dict:
"""
Parse the learning package TOML content and return a dict of its fields.
"""
lp_data: Dict[str, Any] = tomlkit.parse(content)

# Validate the minimum required fields
if "learning_package" not in lp_data:
raise ValueError("Invalid learning package TOML: missing 'learning_package' section")
if "title" not in lp_data["learning_package"]:
raise ValueError("Invalid learning package TOML: missing 'title' in 'learning_package' section")
if "key" not in lp_data["learning_package"]:
raise ValueError("Invalid learning package TOML: missing 'key' in 'learning_package' section")
return lp_data["learning_package"]
165 changes: 164 additions & 1 deletion openedx_learning/apps/authoring/backup_restore/zipper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import zipfile
from datetime import datetime, timezone
from pathlib import Path
from typing import List, Optional, Tuple
from typing import Any, List, Optional, Tuple

from django.db import transaction
from django.db.models import Prefetch, QuerySet
from django.utils.text import slugify

Expand All @@ -21,6 +22,7 @@
PublishableEntityVersion,
)
from openedx_learning.apps.authoring.backup_restore.toml import (
parse_learning_package_toml,
toml_collection,
toml_learning_package,
toml_publishable_entity,
Expand Down Expand Up @@ -366,3 +368,164 @@ def create_zip(self, path: str) -> None:
toml_collection(collection, list(entity_keys_related)),
timestamp=collection.modified,
)


class LearningPackageUnzipper:
"""
Handles extraction and restoration of learning package data from a zip archive.

Main responsibilities:
- Parse and organize files from the zip structure.
- Restore learning package, containers, components, and collections to the database.
- Ensure atomicity of the restore process.

Usage:
unzipper = LearningPackageUnzipper()
summary = unzipper.load("/path/to/backup.zip")
"""

def __init__(self) -> None:
self.utc_now: datetime = datetime.now(tz=timezone.utc)

# --------------------------
# Public API
# --------------------------

@transaction.atomic
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: This is okay for now, but we may need to be more granular with this eventually, instead of putting it over the entire method.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Understood

def load(self, zipf: zipfile.ZipFile) -> dict[str, Any]:
"""
Extracts and restores all objects from the ZIP archive in an atomic transaction.

Args:
zipf (ZipFile): An open ZipFile instance.

Returns:
dict: Summary of restored objects (keys, counts, etc.).

Raises:
FileNotFoundError: If required files are missing.
ValueError: If TOML parsing fails.
Exception: For any database errors (transaction will rollback).
"""
organized_files = self._get_organized_file_list(zipf.namelist())

# Validate required files
if not organized_files["learning_package"]:
raise FileNotFoundError(f"Missing required {TOML_PACKAGE_NAME} in archive.")

# Restore objects
learning_package = self._load_learning_package(zipf, organized_files["learning_package"])
self._restore_components(zipf, organized_files["components"], learning_package)
self._restore_containers(zipf, organized_files["containers"], learning_package)
self._restore_collections(zipf, organized_files["collections"], learning_package)

return {
"learning_package": learning_package.key,
"containers": len(organized_files["containers"]),
"components": len(organized_files["components"]),
"collections": len(organized_files["collections"]),
}

# --------------------------
# Loading methods
# --------------------------

def _load_learning_package(self, zipf: zipfile.ZipFile, package_file: str) -> LearningPackage:
"""Load and persist the learning package TOML file."""
toml_content = self._read_file_from_zip(zipf, package_file)
data = parse_learning_package_toml(toml_content)

return publishing_api.create_learning_package(
key=data["key"],
title=data["title"],
description=data["description"],
)

def _restore_containers(
self, zipf: zipfile.ZipFile, container_files: List[str], learning_package: LearningPackage
) -> None:
"""Restore containers from the zip archive."""
for container_file in container_files:
self._load_container(zipf, container_file, learning_package)

def _restore_components(
self, zipf: zipfile.ZipFile, component_files: List[str], learning_package: LearningPackage
) -> None:
"""Restore components from the zip archive."""
for component_file in component_files:
self._load_component(zipf, component_file, learning_package)

def _restore_collections(
self, zipf: zipfile.ZipFile, collection_files: List[str], learning_package: LearningPackage
) -> None:
"""Restore collections from the zip archive (future extension)."""
# pylint: disable=W0613
for collection_file in collection_files: # pylint: disable=W0612
# Placeholder for collection restore logic
pass

# --------------------------
# Individual object loaders
# --------------------------

def _load_container(
self, zipf: zipfile.ZipFile, container_file: str, learning_package: LearningPackage
): # pylint: disable=W0613
"""Load and persist a container (placeholder)."""
# TODO: parse TOML here
# pylint: disable=W0105
"""
container = publishing_api.create_container(
learning_package_id=learning_package.id,
key="container_key_placeholder",
title="Container Title Placeholder",
description="Container Description Placeholder",
)
publishing_api.create_container_version(
container_id=container.id,
title="Container Version Title Placeholder",
created_by=None,
)
"""

def _load_component(
self, zipf: zipfile.ZipFile, component_file: str, learning_package: LearningPackage
): # pylint: disable=W0613
"""Load and persist a component (placeholder)."""
# TODO: implement actual parsing
return None

# --------------------------
# Utilities
# --------------------------

def _read_file_from_zip(self, zipf: zipfile.ZipFile, filename: str) -> str:
"""Read and decode a UTF-8 file from the zip archive."""
with zipf.open(filename) as f:
return f.read().decode("utf-8")

def _get_organized_file_list(self, file_paths: List[str]) -> dict[str, Any]:
"""
Organize file paths into categories: learning_package, containers, components, collections.
"""
organized: dict[str, Any] = {
"learning_package": None,
"containers": [],
"components": [],
"collections": [],
}

for path in file_paths:
if path.endswith("/"): # skip directories
continue

if path == TOML_PACKAGE_NAME:
organized["learning_package"] = path
elif path.startswith("entities/") and str(Path(path).parent) == "entities":
organized["containers"].append(path)
elif path.startswith("entities/"):
organized["components"].append(path)
elif path.startswith("collections/"):
organized["collections"].append(path)

return organized