Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions cms/envs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1811,9 +1811,21 @@

# alternative swagger generator for CMS API
'drf_spectacular',

'openedx_events',

# Learning Core Apps, used by v2 content libraries (content_libraries app)
"openedx_learning.core.components.apps.ComponentsConfig",
"openedx_learning.core.contents.apps.ContentsConfig",
"openedx_learning.core.publishing.apps.PublishingConfig",
Comment on lines +1816 to +1820
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Learning Core Apps, used by v2 content libraries (content_libraries app)
"openedx_learning.core.components.apps.ComponentsConfig",
"openedx_learning.core.contents.apps.ContentsConfig",
"openedx_learning.core.publishing.apps.PublishingConfig",
# Learning Core Apps, used by v2 content libraries (content_libraries app)
"openedx_learning.core.components",
"openedx_learning.core.contents",
"openedx_learning.core.publishing",

(non-blocking nit)

Django lets you abbreviate these when your app contains an apps.py with a lone/default app config.

]

OPENEDX_LEARNING = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you slap a setting annotation on this and the LMS setting, even if it's just a big WIP warning with a link to openedx-learning?

# Custom file storage, though this is better done through Django's
# STORAGES setting in Django >= 4.2
"STORAGE": None,
}


################# EDX MARKETING SITE ##################################

Expand Down
14 changes: 13 additions & 1 deletion lms/envs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
# and throws spurious errors. Therefore, we disable invalid-name checking.
# pylint: disable=invalid-name


import importlib.util
import sys
import os
Expand Down Expand Up @@ -3316,9 +3315,22 @@ def _make_locale_paths(settings): # pylint: disable=missing-function-docstring

# Notifications
'openedx.core.djangoapps.notifications',


'openedx_events',

# Learning Core Apps, used by v2 content libraries (content_libraries app)
"openedx_learning.core.components.apps.ComponentsConfig",
"openedx_learning.core.contents.apps.ContentsConfig",
"openedx_learning.core.publishing.apps.PublishingConfig",
Comment on lines +3321 to +3325
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Learning Core Apps, used by v2 content libraries (content_libraries app)
"openedx_learning.core.components.apps.ComponentsConfig",
"openedx_learning.core.contents.apps.ContentsConfig",
"openedx_learning.core.publishing.apps.PublishingConfig",
# Learning Core Apps, used by v2 content libraries (content_libraries app)
"openedx_learning.core.components",
"openedx_learning.core.contents",
"openedx_learning.core.publishing",

(non-blocking nit) Same comment

]

OPENEDX_LEARNING = {
# Custom file storage, though this is better done through Django's
# STORAGES setting in Django >= 4.2
"STORAGE": None,
}

######################### CSRF #########################################

# Forwards-compatibility with Django 1.7
Expand Down
6 changes: 5 additions & 1 deletion openedx/core/djangoapps/content_libraries/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Admin site for content libraries
"""
from django.contrib import admin
from .models import ContentLibrary, ContentLibraryPermission
from .models import ContentLibrary, ContentLibraryLearningPackage, ContentLibraryPermission


class ContentLibraryPermissionInline(admin.TabularInline):
Expand Down Expand Up @@ -40,3 +40,7 @@ def get_readonly_fields(self, request, obj=None):
return ["library_key", "org", "slug", "bundle_uuid"]
else:
return ["library_key", ]

@admin.register(ContentLibraryLearningPackage)
class ContentLibraryLearningPackageAdmin(admin.ModelAdmin):
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
"""
Command to import Blockstore-backed v2 Libraries to Learning Core data models.

This will hopefully be very short-lived code.
"""
from datetime import datetime, timezone
from xml.etree import ElementTree as ET
import logging

from django.db import transaction
from django.core.management import BaseCommand, CommandError
from django.core.exceptions import ObjectDoesNotExist

from opaque_keys.edx.locator import LibraryLocatorV2
from openedx.core.djangoapps.content_libraries import api as lib_api
from openedx.core.djangoapps.content_libraries import models as lib_models
from openedx.core.djangoapps.content_libraries import constants as lib_constants
from openedx.core.lib.blockstore_api import (
get_bundle,
get_bundle_file_data,
get_bundle_files_dict,
)
from openedx_learning.core.publishing import api as publishing_api
from openedx_learning.core.components import api as components_api
from openedx_learning.core.contents import api as contents_api


log = logging.getLogger(__name__)


class Command(BaseCommand):
"""
Create a new LearningPackage and initialize with contents from Library.

If you run this and specify a Library that already has a LearningPackage
(using -f), this command will delete that LearningPackage and create a new
one to associate with the Libary. It does not modify the existing one.

All the work is done in a transaction, so errors partway through the
process shouldn't cause state inconsistency in the database. A partly-
imported course *can* cause data to end up in Django Storages.
"""

def add_arguments(self, parser):
"""
Add arguments to the argument parser.
"""
parser.add_argument(
'library-key',
type=LibraryLocatorV2.from_string,
help=('Content Library Key to import content from.'),
)
parser.add_argument(
'-f',
'--force',
action='store_true',
default=False,
)

def handle(self, *args, **options):
"""
Does the work of parsing content from Blockstore and writing it into
openedx-learning core models (publishing, components, contents).
"""
# Search for the library.
try:
lib_key = options['library-key']
lib_data = lib_api.get_library(lib_key)
lib = lib_models.ContentLibrary.objects.get_by_key(lib_key)
except ObjectDoesNotExist:
raise CommandError(f"Library not found: {lib_key}")

COMPONENT_NAMESPACE = 'xblock.v1'

learning_package_already_exists = (
hasattr(lib, 'contents') and
lib.contents.learning_package is not None
)

if learning_package_already_exists and not options['force']:
raise CommandError(
f"Learning Package already exists for {lib_key} (use -f to overwrite)"
)

with transaction.atomic():
# This is a migration script and we're assuming there's no important
# state attached to the LearningPackage yet. That makes it safe to
# just wipe out everything and recreate it.
if learning_package_already_exists:
lp = lib.contents.learning_package
log.info(f"Deleting existing LearningPackage {lp.key} ({lp.uuid})")
lib.contents.delete()
lp.delete()

# Initialize a new LearningPackage
learning_package = publishing_api.create_learning_package(
key=lib_key,
title=lib_data.title,
)
log.info(f"Created LearningPackage {learning_package.key} ({learning_package.uuid})")
lib_models.ContentLibraryLearningPackage.objects.create(
content_library=lib,
learning_package=learning_package,
)

# We don't need the full history stored in Blockstore, just the most
# recently published version and the most recent draft.
bundle = get_bundle(lib.bundle_uuid)
published_files = get_bundle_files_dict(lib.bundle_uuid)

now = datetime.now(timezone.utc)

# First get the published version into openedx-learning models. On
# the openedx-learning side, we'll create them as Drafts and then
# publish at the end.
published_metadata_dict = {}
published_component_pks = {}
published_definition_files = {
file_path: metadata
for file_path, metadata in published_files.items()
if file_path.endswith('/definition.xml') # This is the OLX
}
for file_path, metadata in published_definition_files.items():
block_type, block_id, _def_xml = file_path.split('/')
published_metadata_dict[file_path] = metadata
xml_bytes = get_bundle_file_data(bundle.uuid, file_path)
display_name = extract_display_name(xml_bytes, file_path)

component, component_version = components_api.create_component_and_version(
learning_package_id=learning_package.id,
namespace=COMPONENT_NAMESPACE,
type=block_type,
local_key=block_id,
title=display_name,
created=now,
created_by=None,
)
published_component_pks[file_path] = component.pk
text_content, _created = contents_api.get_or_create_text_content_from_bytes(
learning_package_id=learning_package.id,
data_bytes=xml_bytes,
mime_type=f"application/vnd.openedx.xblock.v1.{block_type}+xml",
created=now,
)
components_api.add_content_to_component_version(
component_version,
raw_content_id=text_content.pk,
key="definition.xml",
learner_downloadable=False
)
# Publish all the Draft versions we created.
publishing_api.publish_all_drafts(
learning_package_id=learning_package.id,
message="Initial import from Blockstore",
published_at=now,
)

# Now grab the draft version from blockstore, and copy those...
draft_files = get_bundle_files_dict(lib.bundle_uuid, use_draft=lib_constants.DRAFT_NAME)
draft_definition_files = {
file_path: metadata
for file_path, metadata in draft_files.items()
if file_path.endswith("definition.xml")
}
for file_path, draft_metadata in draft_definition_files.items():
published_metadata = published_metadata_dict.get(file_path)
if draft_metadata.modified:
block_type, block_id, _def_xml = file_path.split('/')
xml_bytes = get_bundle_file_data(bundle.uuid, file_path, use_draft=lib_constants.DRAFT_NAME)
display_name = extract_display_name(xml_bytes, file_path)

# If this is newly created in the draft, we have to create a
# whole new Component...
if published_metadata is None:
component = components_api.create_component(
learning_package_id=learning_package.id,
namespace=COMPONENT_NAMESPACE,
type=block_type,
local_key=block_id,
created=now,
created_by=None,
)
component_pk = component.pk
version_num = 1
# Otherwise, it's just been modified...
else:
component_pk = published_component_pks[file_path]
version_num = 2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a little odd to me that we have to carefully manage the version numbers like this, rather than having create_component_version return the new version numbers after success.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed.

@ormsbee I don't want to block this PR, so consider adding something like create_next_component_version to the API in order to smooth out the I-just-want-to-create-the-next-version use case, while leaving create_component_version as-is for advanced cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will, thanks. One of the reasons I kept it explicit is because I wanted to have a pattern where the caller has to explicitly say what version they think it should be in order to catch race conditions with multiple writers and force one of them to figure it out rather than to silently overwrite. But it's just clunky when used in this scenario.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hadn't thought about race conditions. If you end up deciding to keep it explicit because of those, that sounds good to me too.


component_version = components_api.create_component_version(
component_pk=component_pk,
version_num=version_num,
title=display_name,
created=now,
created_by=None,
)
text_content, _created = contents_api.get_or_create_text_content_from_bytes(
learning_package_id=learning_package.id,
data_bytes=xml_bytes,
mime_type=f"application/vnd.openedx.xblock.v1.{block_type}+xml",
created=now,
)
components_api.add_content_to_component_version(
component_version,
raw_content_id=text_content.pk,
key="definition.xml",
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bradenmacdonald: I just copied the blockstore convention of definition.xml here, but I was wondering if I should change it to source.xml because it doesn't really represent the definition scope, and I was wondering if it might cause more confusion than it's worth.

Or some other name? @kdmccormick: any thoughts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ormsbee Good point. I like source.xml or block.xml

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking on this a little more, source.xml might get confused with Python source code (e.g. if we later allow a src dir in there for more convenient editing, as a sibling directory for static). So with that in mind, I'll go with block.xml.

Thank you!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

block.xml sounds great.

learner_downloadable=False
)

# Now remove stuff that was present in the published set but was
# deleted in the draft.
deleted_definition_files = set(published_definition_files) - set(draft_definition_files)
for deleted_definition_file in deleted_definition_files:
log.info(f"Deleting {deleted_definition_file} from draft")
component_pk = published_component_pks[deleted_definition_file]
publishing_api.set_draft_version(component_pk, None)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(openedx-learning API feedback, not blocking for this PR)

I'm enjoying this PR because it's cool seeing the publishing and components API in action in edx-platform. For the most part, each call to the APIs has been very clear and self-explanatory. Kudos.

For this particular set_draft_version call, though, I had to dive into the definition to understand that passing in None means "remove from the draft". It feels like that function does one of two different things depending on whether the second arg is None. I wonder if, rather than having a single API function which takes an Optional, it'd be better to have one API function which demands and int, and another which removes the draft:

def set_draft_version(publishable_entity_id: int, publishable_entity_version_pk: int) -> None:
    ...

def remove_from_draft(publishable_entity_id: int) -> None:
    ...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. I suspect I was just getting too wrapped up in the data model to step back and think about it from the API consumer's point of view. I'll do this.



def extract_display_name(xml_bytes, file_path):
"""
Parse the display_name out of the XML.

This will return an empty string if no display_name is specified, or if
there is a parsing error.
"""
try:
xml_str = xml_bytes.decode('utf-8')
block_root = ET.fromstring(xml_str)
display_name = block_root.attrib.get("display_name", "")
except ET.ParseError as err:
log.error(f"Parse error for {file_path}: {err}")
display_name = ""

return display_name
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 3.2.22 on 2023-10-24 15:44

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('oel_publishing', '0002_alter_fk_on_delete'),
('content_libraries', '0009_alter_contentlibrary_authorized_lti_configs'),
]

operations = [
migrations.CreateModel(
name='ContentLibraryLearningPackage',
fields=[
('content_library', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, primary_key=True, related_name='contents', serialize=False, to='content_libraries.contentlibrary')),
('learning_package', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_publishing.learningpackage')),
],
),
]
28 changes: 28 additions & 0 deletions openedx/core/djangoapps/content_libraries/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
LIBRARY_TYPES, COMPLEX, LICENSE_OPTIONS,
ALL_RIGHTS_RESERVED,
)
from openedx_learning.core.publishing.models import LearningPackage
from organizations.models import Organization # lint-amnesty, pylint: disable=wrong-import-order

from .apps import ContentLibrariesConfig
Expand Down Expand Up @@ -527,3 +528,30 @@ def update_score(self, weighted_earned, weighted_possible, timestamp):

def __str__(self):
return str(self.usage_key)


class ContentLibraryLearningPackage(models.Model):
"""
Associates ContentLibrary with a LearningPackage.

This essentially maps the abstract, catalog concept of a Content Library
from the actual bucket of content that is stored in that library.

If the referenced ContentLibrary is deleted, this association is also
deleted, but the LearningPackage being referenced is _not_ deleted.

This association actively prevents the referenced LearningPackage from being
deleted, i.e. you cannot just remove the content that is backing a
ContentLibrary by mistake. You have to explicitly disassociate it from the
ContentLibrary first.
"""
content_library = models.OneToOneField(
ContentLibrary,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the advantage/rationale for having this as a separate model/table, rather than just adding the learning_package foreign key to ContentLibrary directly?

I ask because this code was unclear until I looked up what contents was:

            if learning_package_already_exists:
                lp = lib.contents.learning_package
                log.info(f"Deleting existing LearningPackage {lp.key} ({lp.uuid})")
                lib.contents.delete()
                lp.delete()

Copy link
Member

@kdmccormick kdmccormick Nov 27, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ormsbee I have the same question as @bradenmacdonald --why not add the FK to ContentLibrary directly?

Also, relatedly, what's the desired arity of the ContentLibrary->LearningPackage relationship? If I'm reading this model right, it's N->(0,1), but I'd imagine that the desired arity would be either 1->(0,1) or 1->1.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ContentLibrary->LearningPackage relationship was something that I went back and forth about when trying to square the idea of multiple v1 libraries being mappable into a single v2 library. But I think I agree with both of you that it should be 1:1 for now, so I'll swap this for a simple ForeignKey.

primary_key=True,
on_delete=models.CASCADE,
related_name='contents',
)
learning_package = models.ForeignKey(
LearningPackage,
on_delete=models.RESTRICT,
)