Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 103 additions & 21 deletions openedx_learning/apps/authoring/backup_restore/zipper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,21 @@
import hashlib
import zipfile
from pathlib import Path
from typing import Optional
from typing import List, Optional

from django.db.models import QuerySet
from django.db.models import Prefetch, QuerySet
from django.utils.text import slugify

from openedx_learning.apps.authoring.backup_restore.toml import toml_learning_package, toml_publishable_entity
from openedx_learning.apps.authoring.publishing import api as publishing_api
from openedx_learning.apps.authoring.publishing.models import (
from openedx_learning.api.authoring_models import (
ComponentVersion,
ComponentVersionContent,
Content,
LearningPackage,
PublishableEntity,
PublishableEntityVersion,
)
from openedx_learning.apps.authoring.backup_restore.toml import toml_learning_package, toml_publishable_entity
from openedx_learning.apps.authoring.publishing import api as publishing_api

TOML_PACKAGE_NAME = "package.toml"

Expand Down Expand Up @@ -68,6 +71,53 @@ def create_folder(self, folder_path: Path, zip_file: zipfile.ZipFile) -> None:
zip_file.writestr(zip_info, "") # Add explicit empty directory entry
self.folders_already_created.add(folder_path)

def get_publishable_entities(self) -> QuerySet[PublishableEntity]:
"""
Retrieve the publishable entities associated with the learning package.
Prefetches related data for efficiency.
"""
lp_id = self.learning_package.pk
publishable_entities: QuerySet[PublishableEntity] = publishing_api.get_publishable_entities(lp_id)
return (
publishable_entities
.select_related(
"container",
"component__component_type",
"draft__version__componentversion",
"published__version__componentversion",
)
.prefetch_related(
# We should re-evaluate the prefetching strategy here,
# as the current approach may cause performance issues—
# especially with large libraries (up to 100K items),
# which is too large for this type of prefetch.
Prefetch(
"draft__version__componentversion__componentversioncontent_set",
queryset=ComponentVersionContent.objects.select_related("content"),
to_attr="prefetched_contents",
),
Prefetch(
"published__version__componentversion__componentversioncontent_set",
queryset=ComponentVersionContent.objects.select_related("content"),
to_attr="prefetched_contents",
),
)
)

def get_versions_to_write(self, entity: PublishableEntity):
"""
Get the versions of a publishable entity that should be written to the zip file.
It retrieves both draft and published versions.
"""
draft_version: Optional[PublishableEntityVersion] = publishing_api.get_draft_version(entity)
published_version: Optional[PublishableEntityVersion] = publishing_api.get_published_version(entity)

versions_to_write = [draft_version] if draft_version else []

if published_version and published_version != draft_version:
versions_to_write.append(published_version)
return versions_to_write

def create_zip(self, path: str) -> None:
"""
Creates a zip file containing the learning package data.
Expand All @@ -76,11 +126,10 @@ def create_zip(self, path: str) -> None:
Raises:
Exception: If the learning package cannot be found or if the zip creation fails.
"""
package_toml_content: str = toml_learning_package(self.learning_package)
lp_id = self.learning_package.pk

with zipfile.ZipFile(path, "w", compression=zipfile.ZIP_DEFLATED) as zipf:
# Add the package.toml string
# Add the package.toml file
package_toml_content: str = toml_learning_package(self.learning_package)
zipf.writestr(TOML_PACKAGE_NAME, package_toml_content)

# Add the entities directory
Expand All @@ -91,9 +140,11 @@ def create_zip(self, path: str) -> None:
collections_folder = Path("collections")
self.create_folder(collections_folder, zipf)

# Add each entity's TOML file
publishable_entities: QuerySet[PublishableEntity] = publishing_api.get_publishable_entities(lp_id)
publishable_entities = publishable_entities.select_related("container", "component__component_type")
# ------ ENTITIES SERIALIZATION -------------

# get the publishable entities
publishable_entities: QuerySet[PublishableEntity] = self.get_publishable_entities()

for entity in publishable_entities:
# entity: PublishableEntity = entity # Type hint for clarity

Expand All @@ -116,38 +167,42 @@ def create_zip(self, path: str) -> None:
# component_versions/
# v1/
# static/

# Generate the slugified hash for the component local key
# Example: if the local key is "my_component", the slugified hash might be "my_component_123456"
# It's a combination of the local key and a hash and should be unique
entity_slugify_hash = slugify_hashed_filename(entity.component.local_key)

# Create the component namespace folder
# Example of component namespace is: "entities/xblock.v1/"
component_namespace_folder = entities_folder / entity.component.component_type.namespace
# Example of component namespace is: "xblock.v1"
self.create_folder(component_namespace_folder, zipf)

# Create the component type folder
# Example of component type is: "entities/xblock.v1/html/"
component_type_folder = component_namespace_folder / entity.component.component_type.name
# Example of component type is: "html"
self.create_folder(component_type_folder, zipf)

# Create the component id folder
# Example of component id is: "entities/xblock.v1/html/my_component_123456/"
component_id_folder = component_type_folder / entity_slugify_hash
# Example of component id is: "i-dont-like-the-sidebar-aa1645ade4a7"
self.create_folder(component_id_folder, zipf)

# Add the entity TOML file inside the component type folder as well
# Example: "entities/xblock.v1/html/my_component_123456.toml"
component_entity_toml_path = component_type_folder / f"{entity_slugify_hash}.toml"
zipf.writestr(str(component_entity_toml_path), entity_toml_content)

# Add component version folder into the component id folder
# Example: "entities/xblock.v1/html/my_component_123456/component_versions/"
component_version_folder = component_id_folder / "component_versions"
self.create_folder(component_version_folder, zipf)

# ------ COMPONENT VERSIONING -------------
# Focusing on draft and published versions

# Get the draft and published versions
draft_version: Optional[PublishableEntityVersion] = publishing_api.get_draft_version(entity)
published_version: Optional[PublishableEntityVersion] = publishing_api.get_published_version(entity)

versions_to_write = [draft_version] if draft_version else []

if published_version and published_version != draft_version:
versions_to_write.append(published_version)
versions_to_write: List[PublishableEntityVersion] = self.get_versions_to_write(entity)

for version in versions_to_write:
# Create a folder for the version
Expand All @@ -158,3 +213,30 @@ def create_zip(self, path: str) -> None:
# Add static folder for the version
static_folder = version_folder / "static"
self.create_folder(static_folder, zipf)

# ------ COMPONENT STATIC CONTENT -------------
component_version: ComponentVersion = version.componentversion

# Get content data associated with this version
contents: QuerySet[
ComponentVersionContent
] = component_version.prefetched_contents # type: ignore[attr-defined]

for component_version_content in contents:
content: Content = component_version_content.content

# Important: The component_version_content.key contains implicitly
# the file name and the file extension
file_path = version_folder / component_version_content.key

if content.has_file and content.path:
# If has_file, we pull it from the file system
with content.read_file() as f:
file_data = f.read()
elif not content.has_file and content.text:
# Otherwise, we use the text content as the file data
file_data = content.text
else:
# If no file and no text, we skip this content
continue
zipf.writestr(str(file_path), file_data)
95 changes: 77 additions & 18 deletions tests/openedx_learning/apps/authoring/backup_restore/test_backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@

from openedx_learning.api import authoring as api
from openedx_learning.api.authoring_models import Component, LearningPackage
from openedx_learning.apps.authoring.backup_restore.zipper import LearningPackageZipper
from openedx_learning.lib.test_utils import TestCase

User = get_user_model()


class LpDumpCommandTestCase(TestCase):
"""
Test serving static assets (Content files, via Component lookup).
Test the lp_dump management command.
"""

learning_package: LearningPackage
Expand All @@ -28,7 +29,7 @@ class LpDumpCommandTestCase(TestCase):
@classmethod
def setUpTestData(cls):
"""
Initialize our content data
Initialize data for the whole TestCase
"""

# Create a user for the test
Expand All @@ -46,6 +47,8 @@ def setUpTestData(cls):
cls.now = datetime(2024, 8, 5, tzinfo=timezone.utc)

cls.xblock_v1_namespace = "xblock.v1"
html_media_type = api.get_or_create_media_type("text/html")
text_media_type = api.get_or_create_media_type("text/plain")

# Create component types
cls.html_type = api.get_or_create_component_type(cls.xblock_v1_namespace, "html")
Expand All @@ -68,13 +71,25 @@ def setUpTestData(cls):
published_at=cls.now,
)

api.create_next_component_version(
new_problem_version = api.create_next_component_version(
cls.published_component.pk,
title="My published problem draft v2",
content_to_replace={},
created=cls.now,
)

new_txt_content = api.get_or_create_text_content(
cls.learning_package.pk,
text_media_type.id,
text="This is some data",
created=cls.now,
)
api.create_component_version_content(
new_problem_version.pk,
new_txt_content.pk,
key="hello.txt",
)

# Create a Draft component, one in each learning package
cls.draft_component, _ = api.create_component_and_version(
cls.learning_package.id,
Expand All @@ -85,13 +100,25 @@ def setUpTestData(cls):
created_by=cls.user.id,
)

api.create_next_component_version(
new_html_version = api.create_next_component_version(
cls.draft_component.pk,
title="My draft html v2",
content_to_replace={},
created=cls.now,
)

cls.html_asset_content = api.get_or_create_file_content(
cls.learning_package.id,
html_media_type.id,
data=b"<html>hello world!</html>",
created=cls.now,
)
api.create_component_version_content(
new_html_version.pk,
cls.html_asset_content.id,
key="static/hello.html",
)

components = api.get_publishable_entities(cls.learning_package)
cls.all_components = components

Expand All @@ -111,25 +138,29 @@ def check_zip_file_structure(self, zip_path: Path):
"""
with zipfile.ZipFile(zip_path, "r") as zip_file:
# Check that the zip file contains the expected files
expected_paths = [
"package.toml",
"entities/",

expected_directories = [
"collections/",
"entities/xblock.v1/",
"entities/xblock.v1/html/",
"entities/xblock.v1/html/my_draft_example_af06e1.toml",
"entities/xblock.v1/html/my_draft_example_af06e1/",
"entities/xblock.v1/html/my_draft_example_af06e1/component_versions/",
"entities/xblock.v1/html/my_draft_example_af06e1/component_versions/v2/",
"entities/xblock.v1/html/my_draft_example_af06e1/component_versions/v2/static/",
"entities/xblock.v1/problem/",
"entities/xblock.v1/problem/my_published_example_386dce/",
"entities/xblock.v1/problem/my_published_example_386dce.toml",
"entities/xblock.v1/problem/my_published_example_386dce/component_versions/",
"entities/xblock.v1/problem/my_published_example_386dce/component_versions/v1/",
"entities/xblock.v1/problem/my_published_example_386dce/component_versions/v1/static/",
"entities/xblock.v1/problem/my_published_example_386dce/component_versions/v2/static/",
]

expected_files = [
# Learning package files
"package.toml",

# Entity TOML files
"entities/xblock.v1/html/my_draft_example_af06e1.toml",
"entities/xblock.v1/problem/my_published_example_386dce.toml",

# Entity static content files
"entities/xblock.v1/html/my_draft_example_af06e1/component_versions/v2/static/hello.html",
"entities/xblock.v1/problem/my_published_example_386dce/component_versions/v2/hello.txt",
]

expected_paths = expected_directories + expected_files

# Check that all expected paths are present
zip_name_list = zip_file.namelist()
for expected_path in expected_paths:
Expand Down Expand Up @@ -207,3 +238,31 @@ def test_dump_nonexistent_learning_package(self):
# Attempt to dump a learning package that does not exist
call_command("lp_dump", lp_key, file_name, stdout=out)
self.assertIn("Learning package 'nonexistent_lp' does not exist", out.getvalue())

def test_queries_n_plus_problem(self):
"""
Test n plus problem over LearningPackageZipper for performance.
Regardless of the number of entities, the query count should remain on 3
Why?
1 query for PublishableEntity + select_related joins
1 query for all draft contents
1 query for all published contents
"""
zipper = LearningPackageZipper(self.learning_package)
entities = zipper.get_publishable_entities()
with self.assertNumQueries(3):
list(entities) # force evaluation
self.assertEqual(len(entities), 2)
# Add another component
api.create_component_and_version(
self.learning_package.id,
self.problem_type,
local_key="my_published_example2",
title="My published problem 2",
created=self.now,
created_by=self.user.id,
)
entities = zipper.get_publishable_entities()
with self.assertNumQueries(3):
list(entities) # force evaluation
self.assertEqual(len(entities), 3)