Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openedx_learning/apps/authoring/backup_restore/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ def load_dump_zip_file(path: str) -> None:
Loads a zip file derived from create_zip_file
"""
with zipfile.ZipFile(path, "r") as zipf:
LearningPackageUnzipper().load(zipf)
LearningPackageUnzipper(zipf).load()
8 changes: 5 additions & 3 deletions openedx_learning/apps/authoring/backup_restore/serializers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
The serializers module for restoration of authoring data.
"""
from datetime import timezone

from rest_framework import serializers

from openedx_learning.apps.authoring.components import api as components_api
Expand All @@ -12,7 +14,7 @@ class EntitySerializer(serializers.Serializer): # pylint: disable=abstract-meth
"""
can_stand_alone = serializers.BooleanField(required=True)
key = serializers.CharField(required=True)
created = serializers.DateTimeField(required=True)
created = serializers.DateTimeField(required=True, default_timezone=timezone.utc)
created_by = serializers.CharField(required=True, allow_null=True)


Expand All @@ -22,8 +24,9 @@ class EntityVersionSerializer(serializers.Serializer): # pylint: disable=abstra
"""
title = serializers.CharField(required=True)
entity_key = serializers.CharField(required=True)
created = serializers.DateTimeField(required=True)
created = serializers.DateTimeField(required=True, default_timezone=timezone.utc)
created_by = serializers.CharField(required=True, allow_null=True)
version_num = serializers.IntegerField(required=True)


class ComponentSerializer(EntitySerializer): # pylint: disable=abstract-method
Expand Down Expand Up @@ -51,7 +54,6 @@ class ComponentVersionSerializer(EntityVersionSerializer): # pylint: disable=ab
"""
Serializer for component versions.
"""
content_to_replace = serializers.DictField(child=serializers.CharField(), required=True)


class ContainerSerializer(EntitySerializer): # pylint: disable=abstract-method
Expand Down
113 changes: 83 additions & 30 deletions openedx_learning/apps/authoring/backup_restore/zipper.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,8 +406,9 @@ class LearningPackageUnzipper:
summary = unzipper.load("/path/to/backup.zip")
"""

def __init__(self) -> None:
self.utc_now: datetime = datetime.now(tz=timezone.utc)
def __init__(self, zipf: zipfile.ZipFile) -> None:
self.zipf = zipf
self.utc_now: datetime = datetime.now(timezone.utc)
self.component_types_cache: dict[tuple[str, str], ComponentType] = {}
self.errors: list[dict[str, Any]] = []
# Maps for resolving relationships
Expand All @@ -422,28 +423,34 @@ def __init__(self) -> None:
# --------------------------

@transaction.atomic
def load(self, zipf: zipfile.ZipFile) -> dict[str, Any]:
def load(self) -> dict[str, Any]:
"""Extracts and restores all objects from the ZIP archive in an atomic transaction."""
organized_files = self._get_organized_file_list(zipf.namelist())
organized_files = self._get_organized_file_list(self.zipf.namelist())

if not organized_files["learning_package"]:
raise FileNotFoundError(f"Missing required {TOML_PACKAGE_NAME} in archive.")

learning_package = self._load_learning_package(zipf, organized_files["learning_package"])
learning_package = self._load_learning_package(organized_files["learning_package"])
components_validated = self._extract_entities(
zipf, organized_files["components"], ComponentSerializer, ComponentVersionSerializer
organized_files["components"], ComponentSerializer, ComponentVersionSerializer
)
containers_validated = self._extract_entities(
zipf, organized_files["containers"], ContainerSerializer, ContainerVersionSerializer
organized_files["containers"], ContainerSerializer, ContainerVersionSerializer
)

collections_validated = self._extract_collections(
zipf, organized_files["collections"]
organized_files["collections"]
)

self._write_errors()
if not self.errors:
self._save(learning_package, components_validated, containers_validated, collections_validated)
self._save(
learning_package,
components_validated,
containers_validated,
collections_validated,
component_static_files=organized_files["component_static_files"]
)

return {
"learning_package": learning_package.key,
Expand All @@ -458,7 +465,6 @@ def load(self, zipf: zipfile.ZipFile) -> dict[str, Any]:

def _extract_entities(
self,
zipf: zipfile.ZipFile,
entity_files: list[str],
entity_serializer: type[serializers.Serializer],
version_serializer: type[serializers.Serializer],
Expand All @@ -471,7 +477,7 @@ def _extract_entities(
# Skip non-TOML files
continue

entity_data, draft_version, published_version = self._load_entity_data(zipf, file)
entity_data, draft_version, published_version = self._load_entity_data(file)
serializer = entity_serializer(
data={"created": self.utc_now, "created_by": None, **entity_data}
)
Expand Down Expand Up @@ -501,7 +507,6 @@ def _extract_entities(

def _extract_collections(
self,
zipf: zipfile.ZipFile,
collection_files: list[str],
) -> dict[str, Any]:
"""Extraction + validation pipeline for collections."""
Expand All @@ -511,7 +516,7 @@ def _extract_collections(
if not file.endswith(".toml"):
# Skip non-TOML files
continue
toml_content = self._read_file_from_zip(zipf, file)
toml_content = self._read_file_from_zip(file)
collection_data = parse_collection_toml(toml_content)
serializer = CollectionSerializer(data={"created_by": None, **collection_data})
if not serializer.is_valid():
Expand All @@ -538,20 +543,22 @@ def _save(
learning_package: LearningPackage,
components: dict[str, Any],
containers: dict[str, Any],
collections: dict[str, Any]
collections: dict[str, Any],
*,
component_static_files: dict[str, List[str]]
) -> None:
"""Persist all validated entities in two phases: published then drafts."""

with publishing_api.bulk_draft_changes_for(learning_package.id):
self._save_components(learning_package, components)
self._save_components(learning_package, components, component_static_files)
self._save_units(learning_package, containers)
self._save_subsections(learning_package, containers)
self._save_sections(learning_package, containers)
self._save_collections(learning_package, collections)
publishing_api.publish_all_drafts(learning_package.id)

with publishing_api.bulk_draft_changes_for(learning_package.id):
self._save_draft_versions(components, containers)
self._save_draft_versions(components, containers, component_static_files)

def _save_collections(self, learning_package, collections):
"""Save collections and their entities."""
Expand All @@ -564,7 +571,7 @@ def _save_collections(self, learning_package, collections):
entities_qset=publishing_api.get_publishable_entities(learning_package.id).filter(key__in=entities)
)

def _save_components(self, learning_package, components):
def _save_components(self, learning_package, components, component_static_files):
"""Save components and published component versions."""
for valid_component in components.get("components", []):
entity_key = valid_component.pop("key")
Expand All @@ -573,8 +580,12 @@ def _save_components(self, learning_package, components):

for valid_published in components.get("components_published", []):
entity_key = valid_published.pop("entity_key")
version_num = valid_published["version_num"] # Should exist, validated earlier
content_to_replace = self._resolve_static_files(version_num, entity_key, component_static_files)
components_api.create_next_component_version(
self.components_map_by_key[entity_key].publishable_entity.id,
content_to_replace=content_to_replace,
force_version_num=valid_published.pop("version_num", None),
**valid_published
)

Expand Down Expand Up @@ -620,34 +631,50 @@ def _save_sections(self, learning_package, containers):
self.sections_map_by_key[entity_key], subsections=children, **valid_published
)

def _save_draft_versions(self, components, containers):
def _save_draft_versions(self, components, containers, component_static_files):
"""Save draft versions for all entity types."""
for valid_draft in components.get("components_drafts", []):
entity_key = valid_draft.pop("entity_key")
version_num = valid_draft["version_num"] # Should exist, validated earlier
content_to_replace = self._resolve_static_files(version_num, entity_key, component_static_files)
components_api.create_next_component_version(
self.components_map_by_key[entity_key].publishable_entity.id,
content_to_replace=content_to_replace,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this do the right thing if I have a file in a published version and that file is not there in the draft version? The content_to_replace param is basically a diff, so if we'd need to do something like:

content_to_replace_for_published = {
    'static/profile.webp': Content(),
    'static/background.webp': Content(),
}

# after we create the published version data, make the replacement
# dict remove all the static assets
content_to_replace_for_draft = {
    key: None for key in content_to_replace_for_published
}

# now selectively re-add the stuff that appears in the draft version
content_to_replace_for_draft['static/new_image.web'] = Content()

Actually, now that I think on it, maybe we could make an optional "ignore_previous_content" param to create_next_component_version? So if ignore_previous_content=True, we could skip this whole section:

https://github.com/openedx/openedx-learning/blob/312d02474dfe7fdc7883d2c733f190c5fa84051f/openedx_learning/apps/authoring/components/api.py#L244-L254

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wonderful. ignore_previous_content makes the implementation easier to handle this case. Thank you

force_version_num=valid_draft.pop("version_num", None),
# Drafts can diverge from published, so we allow ignoring previous content
# Use case: published v1 had files A, B; draft v2 only has file A
ignore_previous_content=True,
**valid_draft
)

for valid_draft in containers.get("unit_drafts", []):
entity_key = valid_draft.pop("entity_key")
children = self._resolve_children(valid_draft, self.components_map_by_key)
units_api.create_next_unit_version(
self.units_map_by_key[entity_key], components=children, **valid_draft
self.units_map_by_key[entity_key],
components=children,
force_version_num=valid_draft.pop("version_num", None),
**valid_draft
)

for valid_draft in containers.get("subsection_drafts", []):
entity_key = valid_draft.pop("entity_key")
children = self._resolve_children(valid_draft, self.units_map_by_key)
subsections_api.create_next_subsection_version(
self.subsections_map_by_key[entity_key], units=children, **valid_draft
self.subsections_map_by_key[entity_key],
units=children,
force_version_num=valid_draft.pop("version_num", None),
**valid_draft
)

for valid_draft in containers.get("section_drafts", []):
entity_key = valid_draft.pop("entity_key")
children = self._resolve_children(valid_draft, self.subsections_map_by_key)
sections_api.create_next_section_version(
self.sections_map_by_key[entity_key], subsections=children, **valid_draft
self.sections_map_by_key[entity_key],
subsections=children,
force_version_num=valid_draft.pop("version_num", None),
**valid_draft
)

# --------------------------
Expand Down Expand Up @@ -680,14 +707,31 @@ def _write_errors(self) -> str | None:

return log_filename

def _resolve_static_files(
self,
num_version: int,
entity_key: str,
static_files_map: dict[str, List[str]]
) -> dict[str, bytes]:
"""Resolve static file paths into their binary content."""
resolved_files: dict[str, bytes] = {}

static_file_key = f"{entity_key}:v{num_version}" # e.g., "my_component:123:v1"
static_files = static_files_map.get(static_file_key, [])
for static_file in static_files:
local_key = static_file.split(f"v{num_version}/")[-1]
with self.zipf.open(static_file, "r") as f:
resolved_files[local_key] = f.read()
return resolved_files

def _resolve_children(self, entity_data: dict[str, Any], lookup_map: dict[str, Any]) -> list[Any]:
"""Resolve child entity keys into model instances."""
children_keys = entity_data.pop("children", [])
return [lookup_map[key] for key in children_keys if key in lookup_map]

def _load_learning_package(self, zipf: zipfile.ZipFile, package_file: str) -> LearningPackage:
def _load_learning_package(self, package_file: str) -> LearningPackage:
"""Load and persist the learning package TOML file."""
toml_content = self._read_file_from_zip(zipf, package_file)
toml_content = self._read_file_from_zip(package_file)
data = parse_learning_package_toml(toml_content)
return publishing_api.create_learning_package(
key=data["key"],
Expand All @@ -696,10 +740,10 @@ def _load_learning_package(self, zipf: zipfile.ZipFile, package_file: str) -> Le
)

def _load_entity_data(
self, zipf: zipfile.ZipFile, entity_file: str
self, entity_file: str
) -> tuple[dict[str, Any], dict[str, Any] | None, dict[str, Any] | None]:
"""Load entity data and its versions from TOML."""
content = self._read_file_from_zip(zipf, entity_file)
content = self._read_file_from_zip(entity_file)
entity_data, version_data = parse_publishable_entity_toml(content)
return entity_data, *self._get_versions_to_write(version_data, entity_data)

Expand All @@ -712,7 +756,6 @@ def _validate_versions(self, entity_data, draft, published, serializer_cls, *, f
serializer = serializer_cls(
data={
"entity_key": entity_data["key"],
"content_to_replace": {},
"created": self.utc_now,
"created_by": None,
**version
Expand All @@ -724,9 +767,9 @@ def _validate_versions(self, entity_data, draft, published, serializer_cls, *, f
self.errors.append({"file": file, "errors": serializer.errors})
return valid

def _read_file_from_zip(self, zipf: zipfile.ZipFile, filename: str) -> str:
def _read_file_from_zip(self, filename: str) -> str:
"""Read and decode a UTF-8 file from the zip archive."""
with zipf.open(filename) as f:
with self.zipf.open(filename) as f:
return f.read().decode("utf-8")

def _get_organized_file_list(self, file_paths: list[str]) -> dict[str, Any]:
Expand All @@ -735,6 +778,7 @@ def _get_organized_file_list(self, file_paths: list[str]) -> dict[str, Any]:
"learning_package": None,
"containers": [],
"components": [],
"component_static_files": defaultdict(list),
"collections": [],
}

Expand All @@ -746,10 +790,19 @@ def _get_organized_file_list(self, file_paths: list[str]) -> dict[str, Any]:
elif path.startswith("entities/") and str(Path(path).parent) == "entities":
organized["containers"].append(path)
elif path.startswith("entities/"):
organized["components"].append(path)
if path.endswith(".toml"):
organized["components"].append(path)
else:
component_key = Path(path).parts[1:4] # e.g., ['xblock.v1', 'html', 'my_component_123456']
num_version = Path(path).parts[5] if len(Path(path).parts) > 5 else "v1" # e.g., 'v1'
if len(component_key) == 3:
component_identifier = ":".join(component_key)
component_identifier += f":{num_version}"
organized["component_static_files"][component_identifier].append(path)
else:
self.errors.append({"file": path, "errors": "Invalid component static file path structure."})
elif path.startswith("collections/"):
organized["collections"].append(path)

return organized

def _get_versions_to_write(
Expand Down
33 changes: 33 additions & 0 deletions openedx_learning/apps/authoring/components/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,28 @@ def create_next_component_version(
created: datetime,
title: str | None = None,
created_by: int | None = None,
*,
force_version_num: int | None = None,
ignore_previous_content: bool = False,
) -> ComponentVersion:
"""
Create a new ComponentVersion based on the most recent version.

Args:
component_pk (int): The primary key of the Component to version.
content_to_replace (dict): Mapping of file keys to Content IDs,
None (for deletion), or bytes (for new file content).
created (datetime): The creation timestamp for the new version.
title (str, optional): Title for the new version. If None, uses the previous version's title.
created_by (int, optional): User ID of the creator.
force_version_num (int, optional): If provided, overrides the automatic version number increment and sets
this version's number explicitly. Use this if you need to restore or import a version with a specific
version number, such as during data migration or when synchronizing with external systems.
ignore_previous_content (bool): If True, do not copy over content from the previous version.

Returns:
ComponentVersion: The newly created ComponentVersion instance.

A very common pattern for making a new ComponentVersion is going to be "make
it just like the last version, except changing these one or two things".
Before calling this, you should create any new contents via the contents
Expand All @@ -183,6 +201,14 @@ def create_next_component_version(
convenient to remove paths (e.g. due to deprecation) without having to
always check for its existence first.

Why use force_version_num?
Normally, the version number is incremented automatically from the latest version. If you need to set a specific
version number (for example, when restoring from backup, importing legacy data, or synchronizing with another
system), use force_version_num to override the default behavior.

Why not use create_component_version?
The main reason is that we want to reuse the logic to create a static file component from a dictionary.

TODO: Have to add learning_downloadable info to this when it comes time to
support static asset download.
"""
Expand All @@ -202,6 +228,9 @@ def create_next_component_version(
if title is None:
title = last_version.title

if force_version_num is not None:
next_version_num = force_version_num

with atomic():
publishable_entity_version = publishing_api.create_publishable_entity_version(
component_pk,
Expand Down Expand Up @@ -241,6 +270,10 @@ def create_next_component_version(
component_version=component_version,
key=key,
)

if ignore_previous_content:
return component_version

# Now copy any old associations that existed, as long as they aren't
# in conflict with the new stuff or marked for deletion.
last_version_content_mapping = ComponentVersionContent.objects \
Expand Down
Loading