diff --git a/.gitignore b/.gitignore index 9cabbe5db..888019d47 100644 --- a/.gitignore +++ b/.gitignore @@ -61,7 +61,7 @@ requirements/private.in requirements/private.txt # database file -dev.db +dev.db* .vscode diff --git a/.importlinter b/.importlinter index 82d881926..8dbba0cc7 100644 --- a/.importlinter +++ b/.importlinter @@ -41,4 +41,5 @@ name = Core App Dependency Layering type = layers layers= openedx_learning.core.components + openedx_learning.core.contents openedx_learning.core.publishing diff --git a/README.rst b/README.rst index 566537c0b..ff6dbbddc 100644 --- a/README.rst +++ b/README.rst @@ -45,8 +45,8 @@ We have a few different identifier types in the schema, and we try to avoid ``_i * ``id`` is the auto-generated, internal row ID and primary key. This never changes. Data models should make foreign keys to this field, as per Django convention. * ``uuid`` is a randomly generated UUID4. This is the stable way to refer to a row/resource from an external service. This never changes. This is separate from ``id`` mostly because there are performance penalties when using UUIDs as primary keys with MySQL. -* ``identifier`` is intended to be a case-sensitive, alphanumeric identifier, which holds some meaning to library clients. This is usually stable, but can be changed, depending on the business logic of the client. The apps in this repo should make no assumptions about it being stable. It can be used as a suffix. -* ``num`` is like ``identifier``, but for use when it's strictly numeric. It can also be used as a suffix. +* ``key`` is intended to be a case-sensitive, alphanumeric key, which holds some meaning to library clients. This is usually stable, but can be changed, depending on the business logic of the client. The apps in this repo should make no assumptions about it being stable. It can be used as a suffix. +* ``num`` is like ``key``, but for use when it's strictly numeric. It can also be used as a suffix. See Also diff --git a/docs/decisions/0005-identifier-conventions.rst b/docs/decisions/0005-identifier-conventions.rst new file mode 100644 index 000000000..8a53bca4b --- /dev/null +++ b/docs/decisions/0005-identifier-conventions.rst @@ -0,0 +1,39 @@ +5. Identifier Conventions +========================= + +Context +------- + +We want a common set of conventions for how to reference models in various situations, such as: + +* From a model in a different app, but in the same Python process. +* From a different server. + +Decision +-------- + +The content-related data models in our system will use the following convention for identifier fields: + +Primary Key + The primary key will be a BigAutoField. This will usually be ``id``, but it can be different if the model builds off another one via a ``OneToOneField`` primary key. Other apps that are running in the same process should directly make foreign key field references to this. This value will not change. + +UUID + The ``uuid`` field is a randomly generated UUID4 that is globally unique and should be treated as immutable. If you are making a reference to this record in another system (e.g. an external service), this is the identifier to store. + +Key + The ``key`` field is chosen by apps or users, and is the most likely piece to be human-readable (though it doesn't have to be). These values are only locally unique within a given scope, such as a particular LearningPackage or ComponentVersion. + + The correlates most closely to OpaqueKeys, though they are not precisely the same. In particular, we don't want to directly store BlockUsageKeys that have the LearningContextKey baked into them, because that makes it much harder to change the LearningContextKey later, e.g. if we ever want to change a CourseKey for a course. Different models can choose whether the ``key`` value is mutable or not, but outside users should assume that it can change, even if it rarely does in practice. + +Implementation Notes +-------------------- + +Helpers to generate these field types are in the ``openedx_learning.lib.fields`` module. + +Rejected Alternatives +--------------------- + +A number of names were considered for ``key``, and were rejected for different reasons: + +* ``identifier`` is used in some standards like QTI, but it's too easily confused with ``id`` or the general concept of the three types of identity-related fields we have. +* ``slug`` was considered, but ultimately rejected because that implied these fields would be human-readable, and that's not guaranteed. Most XBlock content that comes from MongoDB will be using GUIDs, for instance. diff --git a/docs/decisions/0006-app-label-prefix.rst b/docs/decisions/0006-app-label-prefix.rst new file mode 100644 index 000000000..376d079e5 --- /dev/null +++ b/docs/decisions/0006-app-label-prefix.rst @@ -0,0 +1,13 @@ +6. App Label Prefix +=================== + +Context +------- + +We want this repo to be useful in different Django projects outside of just edx-platform, and we want to avoid downstream collisions with our app names. + + +Decision +-------- + +All apps in this repo will create a default AppConfig that sets the ``label`` to have a prefix of ``oel_`` before the app name. So if the app name is ``publishing``, the ``label`` will be ``oel_publishing``. This means that all generated database tables will similarly be prefixed with ``oel_``. diff --git a/olx_importer/management/commands/load_components.py b/olx_importer/management/commands/load_components.py index 08fc47cc6..d987751b8 100644 --- a/olx_importer/management/commands/load_components.py +++ b/olx_importer/management/commands/load_components.py @@ -18,33 +18,24 @@ changed?" in order to decide if we really make a new ComponentVersion or not. """ from datetime import datetime, timezone -import codecs import logging import mimetypes import pathlib import re import xml.etree.ElementTree as ET -from django.core.files.base import ContentFile -from django.core.management.base import BaseCommand +from django.core.management.base import BaseCommand, CommandError from django.db import transaction -from openedx_learning.core.publishing.models import LearningPackage, PublishLogEntry -from openedx_learning.core.components.models import ( - Component, - ComponentVersion, - ComponentVersionRawContent, - ComponentPublishLogEntry, - PublishedComponent, - RawContent, - TextContent, -) -from openedx_learning.lib.fields import create_hash_digest +# Model references to remove +from openedx_learning.core.components import api as components_api +from openedx_learning.core.contents import api as contents_api +from openedx_learning.core.publishing import api as publishing_api SUPPORTED_TYPES = ["problem", "video", "html"] logger = logging.getLogger(__name__) - + class Command(BaseCommand): help = "Load sample Component data from course export" @@ -70,12 +61,12 @@ def init_known_types(self): def add_arguments(self, parser): parser.add_argument("course_data_path", type=pathlib.Path) - parser.add_argument("learning_package_identifier", type=str) + parser.add_argument("learning_package_key", type=str) - def handle(self, course_data_path, learning_package_identifier, **options): + def handle(self, course_data_path, learning_package_key, **options): self.course_data_path = course_data_path - self.learning_package_identifier = learning_package_identifier - self.load_course_data(learning_package_identifier) + self.learning_package_key = learning_package_key + self.load_course_data(learning_package_key) def get_course_title(self): course_type_dir = self.course_data_path / "course" @@ -83,74 +74,62 @@ def get_course_title(self): course_root = ET.parse(course_xml_file).getroot() return course_root.attrib.get("display_name", "Unknown Course") - def load_course_data(self, learning_package_identifier): + def load_course_data(self, learning_package_key): print(f"Importing course from: {self.course_data_path}") now = datetime.now(timezone.utc) title = self.get_course_title() + if publishing_api.learning_package_exists(learning_package_key): + raise CommandError( + f"{learning_package_key} already exists. " + "This command currently only supports initial import." + ) + with transaction.atomic(): - learning_package, _created = LearningPackage.objects.get_or_create( - identifier=learning_package_identifier, - defaults={ - "title": title, - "created": now, - "updated": now, - }, + self.learning_package = publishing_api.create_learning_package( + learning_package_key, title, created=now, ) - self.learning_package = learning_package + for block_type in SUPPORTED_TYPES: + self.import_block_type(block_type, now) #, publish_log_entry) - publish_log_entry = PublishLogEntry.objects.create( - learning_package=learning_package, - message="Initial Import", - published_at=now, - published_by=None, + publishing_api.publish_all_drafts( + self.learning_package.id, + message="Initial Import from load_components script" ) - for block_type in SUPPORTED_TYPES: - self.import_block_type(block_type, now, publish_log_entry) def create_content(self, static_local_path, now, component_version): - identifier = pathlib.Path("static") / static_local_path - real_path = self.course_data_path / identifier - mime_type, _encoding = mimetypes.guess_type(identifier) + key = pathlib.Path("static") / static_local_path + real_path = self.course_data_path / key + mime_type, _encoding = mimetypes.guess_type(key) if mime_type is None: logger.error( - f" no mimetype found for {real_path}, defaulting to application/binary" + f' no mimetype found for "{real_path}", defaulting to application/binary' ) mime_type = "application/binary" try: data_bytes = real_path.read_bytes() except FileNotFoundError: - logger.warning(f" Static reference not found: {real_path}") + logger.warning(f' Static reference not found: "{real_path}"') return # Might as well bail if we can't find the file. - hash_digest = create_hash_digest(data_bytes) - - raw_content, created = RawContent.objects.get_or_create( - learning_package=self.learning_package, + raw_content, _created = contents_api.get_or_create_raw_content( + learning_package_id=self.learning_package.id, + data_bytes=data_bytes, mime_type=mime_type, - hash_digest=hash_digest, - defaults=dict( - size=len(data_bytes), - created=now, - ), + created=now, ) - if created: - raw_content.file.save( - f"{raw_content.learning_package.uuid}/{hash_digest}", - ContentFile(data_bytes), - ) - - ComponentVersionRawContent.objects.get_or_create( - component_version=component_version, - raw_content=raw_content, - identifier=identifier, + components_api.add_content_to_component_version( + component_version, + raw_content_id=raw_content.id, + key=key, learner_downloadable=True, ) - def import_block_type(self, block_type, now, publish_log_entry): + def import_block_type(self, block_type, now): # , publish_log_entry): components_found = 0 + components_skipped = 0 # Find everything that looks like a reference to a static file appearing # in attribute quotes, stripping off the querystring at the end. This is @@ -158,90 +137,50 @@ def import_block_type(self, block_type, now, publish_log_entry): # outside of tag declarations as well. static_files_regex = re.compile(r"""['"]\/static\/(.+?)["'\?]""") block_data_path = self.course_data_path / block_type + namespace="xblock.v1" for xml_file_path in block_data_path.glob("*.xml"): components_found += 1 - identifier = xml_file_path.stem - - # Find or create the Component itself - component, _created = Component.objects.get_or_create( - learning_package=self.learning_package, - namespace="xblock.v1", - type=block_type, - identifier=identifier, - defaults={ - "created": now, - }, - ) - - # Create the RawContent entry for the raw data... - data_bytes = xml_file_path.read_bytes() - hash_digest = create_hash_digest(data_bytes) - - raw_content, created = RawContent.objects.get_or_create( - learning_package=self.learning_package, - mime_type=f"application/vnd.openedx.xblock.v1.{block_type}+xml", - hash_digest=hash_digest, - defaults=dict( - # text=data_str, - size=len(data_bytes), - created=now, - ), - ) - if created: - raw_content.file.save( - f"{raw_content.learning_package.uuid}/{hash_digest}", - ContentFile(data_bytes), - ) - - # Decode as utf-8-sig in order to strip any BOM from the data. - data_str = codecs.decode(data_bytes, "utf-8-sig") - TextContent.objects.create( - raw_content=raw_content, - text=data_str, - length=len(data_str), - ) - - # TODO: Get associated file contents, both with the static regex, as - # well as with XBlock-specific code that examines attributes in - # video and HTML tag definitions. + local_key = xml_file_path.stem + # Do some basic parsing of the content to see if it's even well + # constructed enough to add (or whether we should skip/error on it). try: - block_root = ET.fromstring(data_str) + block_root = ET.parse(xml_file_path).getroot() except ET.ParseError as err: logger.error(f"Parse error for {xml_file_path}: {err}") + components_skipped += 1 continue - + display_name = block_root.attrib.get("display_name", "") - - # Create the ComponentVersion - component_version = ComponentVersion.objects.create( - component=component, - version_num=1, # This only works for initial import + _component, component_version = components_api.create_component_and_version( + learning_package_id=self.learning_package.id, + namespace=namespace, + type=block_type, + local_key=local_key, title=display_name, created=now, created_by=None, ) - ComponentVersionRawContent.objects.create( - component_version=component_version, - raw_content=raw_content, - identifier="source.xml", - learner_downloadable=False, - ) - static_files_found = static_files_regex.findall(data_str) - for static_local_path in static_files_found: - self.create_content(static_local_path, now, component_version) - # Mark that Component as Published - component_publish_log_entry = ComponentPublishLogEntry.objects.create( - component=component, - component_version=component_version, - publish_log_entry=publish_log_entry, + # Create the RawContent entry for the raw data... + data_bytes = xml_file_path.read_bytes() + text_content, _created = contents_api.get_or_create_text_content_from_bytes( + learning_package_id=self.learning_package.id, + data_bytes=data_bytes, + mime_type=f"application/vnd.openedx.xblock.v1.{block_type}+xml", + created=now, ) - PublishedComponent.objects.create( - component=component, - component_version=component_version, - component_publish_log_entry=component_publish_log_entry, + # Add the OLX source text to the ComponentVersion + components_api.add_content_to_component_version( + component_version, + raw_content_id=text_content.pk, + key="source.xml", + learner_downloadable=False ) - print(f"{block_type}: {components_found}") + # Cycle through static assets references and add those as well... + for static_local_path in static_files_regex.findall(text_content.text): + self.create_content(static_local_path, now, component_version) + + print(f"{block_type}: {components_found} (skipped: {components_skipped})") diff --git a/openedx_learning/contrib/media_server/urls.py b/openedx_learning/contrib/media_server/urls.py index 554966ace..a76155dec 100644 --- a/openedx_learning/contrib/media_server/urls.py +++ b/openedx_learning/contrib/media_server/urls.py @@ -6,8 +6,8 @@ path( ( "component_asset/" - "/" - "/" + "/" + "/" "/" "" ), diff --git a/openedx_learning/contrib/media_server/views.py b/openedx_learning/contrib/media_server/views.py index 86e92d131..d376283eb 100644 --- a/openedx_learning/contrib/media_server/views.py +++ b/openedx_learning/contrib/media_server/views.py @@ -8,7 +8,7 @@ def component_asset( - request, learning_package_identifier, component_identifier, version_num, asset_path + request, learning_package_key, component_key, version_num, asset_path ): """ Serve the ComponentVersion asset data. @@ -25,7 +25,7 @@ def component_asset( """ try: cvc = get_component_version_content( - learning_package_identifier, component_identifier, version_num, asset_path + learning_package_key, component_key, version_num, asset_path ) except ObjectDoesNotExist: raise Http404("File not found") diff --git a/openedx_learning/core/components/admin.py b/openedx_learning/core/components/admin.py index 10712d6a9..34f091d92 100644 --- a/openedx_learning/core/components/admin.py +++ b/openedx_learning/core/components/admin.py @@ -1,5 +1,4 @@ from django.contrib import admin -from django.db.models.aggregates import Count, Sum from django.template.defaultfilters import filesizeformat from django.urls import reverse from django.utils.html import format_html @@ -8,8 +7,6 @@ Component, ComponentVersion, ComponentVersionRawContent, - PublishedComponent, - RawContent, ) from openedx_learning.lib.admin_utils import ReadOnlyModelAdmin @@ -17,13 +14,13 @@ class ComponentVersionInline(admin.TabularInline): model = ComponentVersion fields = ["version_num", "created", "title", "format_uuid"] - readonly_fields = ["version_num", "created", "title", "format_uuid"] + readonly_fields = ["version_num", "created", "title", "uuid", "format_uuid"] extra = 0 def format_uuid(self, cv_obj): return format_html( '{}', - reverse("admin:components_componentversion_change", args=(cv_obj.id,)), + reverse("admin:oel_components_componentversion_change", args=(cv_obj.pk,)), cv_obj.uuid, ) @@ -32,113 +29,44 @@ def format_uuid(self, cv_obj): @admin.register(Component) class ComponentAdmin(ReadOnlyModelAdmin): - list_display = ("identifier", "uuid", "namespace", "type", "created") + list_display = ("key", "uuid", "namespace", "type", "created") readonly_fields = [ "learning_package", "uuid", "namespace", "type", - "identifier", + "key", "created", ] list_filter = ("type", "learning_package") - search_fields = ["uuid", "identifier"] + search_fields = ["publishable_entity__uuid", "publishable_entity__key"] inlines = [ComponentVersionInline] -@admin.register(PublishedComponent) -class PublishedComponentAdmin(ReadOnlyModelAdmin): - model = PublishedComponent +class RawContentInline(admin.TabularInline): + model = ComponentVersion.raw_contents.through def get_queryset(self, request): queryset = super().get_queryset(request) - return ( - queryset.select_related( - "component", - "component__learning_package", - "component_version", - "component_publish_log_entry__publish_log_entry", - ) - .annotate(size=Sum("component_version__raw_contents__size")) - .annotate(content_count=Count("component_version__raw_contents")) + return queryset.select_related( + "raw_content", + "raw_content__learning_package", + "raw_content__text_content", + "component_version", + "component_version__publishable_entity_version", + "component_version__component", + "component_version__component__publishable_entity", ) - readonly_fields = ["component", "component_version", "component_publish_log_entry"] - list_display = [ - "identifier", - "version", - "title", - "published_at", - "type", - "content_count", - "size", - "learning_package", - ] - list_filter = ["component__type", "component__learning_package"] - search_fields = [ - "component__uuid", - "component__identifier", - "component_version__uuid", - "component_version__title", - ] - - def learning_package(self, pc): - return pc.component.learning_package.identifier - - def published_at(self, pc): - return pc.component_publish_log_entry.publish_log_entry.published_at - - def identifier(self, pc): - """ - Link to published ComponentVersion with Component identifier as text. - - This is a little weird in that we're showing the Component identifier, - but linking to the published ComponentVersion. But this is what you want - to link to most of the time, as the link to the Component has almost no - information in it (and can be accessed from the ComponentVersion details - page anyhow). - """ - return format_html( - '{}', - reverse( - "admin:components_componentversion_change", - args=(pc.component_version_id,), - ), - pc.component.identifier, - ) - - def content_count(self, pc): - return pc.content_count - - content_count.short_description = "#" - - def size(self, pc): - return filesizeformat(pc.size) - - def namespace(self, pc): - return pc.component.namespace - - def type(self, pc): - return pc.component.type - - def version(self, pc): - return pc.component_version.version_num - - def title(self, pc): - return pc.component_version.title - - -class RawContentInline(admin.TabularInline): - model = ComponentVersion.raw_contents.through fields = [ - "format_identifier", + "format_key", "format_size", "learner_downloadable", "rendered_data", ] readonly_fields = [ "raw_content", - "format_identifier", + "format_key", "format_size", "rendered_data", ] @@ -152,15 +80,15 @@ def format_size(self, cvc_obj): format_size.short_description = "Size" - def format_identifier(self, cvc_obj): + def format_key(self, cvc_obj): return format_html( '{}', link_for_cvc(cvc_obj), # reverse("admin:components_content_change", args=(cvc_obj.content_id,)), - cvc_obj.identifier, + cvc_obj.key, ) - format_identifier.short_description = "Identifier" + format_key.short_description = "Key" @admin.register(ComponentVersion) @@ -180,41 +108,16 @@ class ComponentVersionAdmin(ReadOnlyModelAdmin): "version_num", "created", ] + list_display = ["component", "version_num", "uuid", "created"] inlines = [RawContentInline] - -@admin.register(RawContent) -class RawContentAdmin(ReadOnlyModelAdmin): - list_display = [ - "hash_digest", - "learning_package", - "mime_type", - "size", - "created", - ] - fields = [ - "learning_package", - "hash_digest", - "mime_type", - "size", - "created", - "text_preview", - ] - readonly_fields = [ - "learning_package", - "hash_digest", - "mime_type", - "size", - "created", - "text_preview", - ] - list_filter = ("mime_type", "learning_package") - search_fields = ("hash_digest",) - - def text_preview(self, raw_content_obj): - if hasattr(raw_content_obj, "text_content"): - return format_text_for_admin_display(raw_content_obj.text_content.text) - return "" + def get_queryset(self, request): + queryset = super().get_queryset(request) + return queryset.select_related( + "component", + "component__publishable_entity", + "publishable_entity_version", + ) def is_displayable_text(mime_type): @@ -241,10 +144,10 @@ def is_displayable_text(mime_type): def link_for_cvc(cvc_obj: ComponentVersionRawContent): return "/media_server/component_asset/{}/{}/{}/{}".format( - cvc_obj.raw_content.learning_package.identifier, - cvc_obj.component_version.component.identifier, + cvc_obj.raw_content.learning_package.key, + cvc_obj.component_version.component.key, cvc_obj.component_version.version_num, - cvc_obj.identifier, + cvc_obj.key, ) @@ -263,10 +166,10 @@ def content_preview(cvc_obj: ComponentVersionRawContent): '', # TODO: configure with settings value: "/media_server/component_asset/{}/{}/{}/{}".format( - cvc_obj.raw_content.learning_package.identifier, - cvc_obj.component_version.component.identifier, + cvc_obj.raw_content.learning_package.key, + cvc_obj.component_version.component.key, cvc_obj.component_version.version_num, - cvc_obj.identifier, + cvc_obj.key, ), ) diff --git a/openedx_learning/core/components/api.py b/openedx_learning/core/components/api.py index 41d92f462..1a3972dec 100644 --- a/openedx_learning/core/components/api.py +++ b/openedx_learning/core/components/api.py @@ -1,29 +1,90 @@ """ -Public API for querying and manipulating Components. +Components API (warning: UNSTABLE, in progress API) -This API is still under construction and should not be considered "stable" until -this repo hits a 1.0 release. +These functions are often going to be simple-looking write operations, but there +is bookkeeping logic needed across multiple models to keep state consistent. You +can read from the models directly for various queries if necessary–we do this in +the Django Admin for instance. But you should NEVER mutate this app's models +directly, since there might be other related models that you may not know about. + +Please look at the models.py file for more information about the kinds of data +are stored in this app. """ -from django.db.models import Q from pathlib import Path -from .models import ComponentVersionRawContent +from django.db.models import Q +from django.db.transaction import atomic + +from ..publishing.api import ( + create_publishable_entity, + create_publishable_entity_version, +) +from .models import ComponentVersionRawContent, Component, ComponentVersion + + +def create_component( + learning_package_id, namespace, type, local_key, created, created_by +): + key = f"{namespace}:{type}@{local_key}" + with atomic(): + publishable_entity = create_publishable_entity( + learning_package_id, key, created, created_by + ) + component = Component.objects.create( + publishable_entity=publishable_entity, + learning_package_id=learning_package_id, + namespace=namespace, + type=type, + local_key=local_key, + ) + return component + + +def create_component_version(component_pk, version_num, title, created, created_by): + with atomic(): + publishable_entity_version = create_publishable_entity_version( + entity_id=component_pk, + version_num=version_num, + title=title, + created=created, + created_by=created_by, + ) + component_version = ComponentVersion.objects.create( + publishable_entity_version=publishable_entity_version, + component_id=component_pk, + ) + return component_version + + +def create_component_and_version( + learning_package_id, namespace, type, local_key, title, created, created_by +): + with atomic(): + component = create_component( + learning_package_id, namespace, type, local_key, created, created_by + ) + component_version = create_component_version( + component.pk, + version_num=1, + title=title, + created=created, + created_by=created_by, + ) + return (component, component_version) + + +def get_component_by_pk(component_pk): + return Component.objects.get(pk=component_pk) def get_component_version_content( - learning_package_identifier: str, - component_identifier: str, + learning_package_key: str, + component_key: str, version_num: int, - identifier: Path, + key: Path, ) -> ComponentVersionRawContent: """ - Look up ComponentVersionRawContent by human readable identifiers. - - Notes: - - 1. This function is returning a model, which we generally frown upon. - 2. I'd like to experiment with different lookup methods - (see https://github.com/openedx/openedx-learning/issues/34) + Look up ComponentVersionRawContent by human readable keys. Can raise a django.core.exceptions.ObjectDoesNotExist error if there is no matching ComponentVersionRawContent. @@ -34,10 +95,20 @@ def get_component_version_content( "component_version__component", "component_version__component__learning_package", ).get( - Q( - component_version__component__learning_package__identifier=learning_package_identifier - ) - & Q(component_version__component__identifier=component_identifier) - & Q(component_version__version_num=version_num) - & Q(identifier=identifier) + Q(component_version__component__learning_package__key=learning_package_key) + & Q(component_version__component__publishable_entity__key=component_key) + & Q(component_version__publishable_entity_version__version_num=version_num) + & Q(key=key) + ) + + +def add_content_to_component_version( + component_version, raw_content_id, key, learner_downloadable=False +): + cvrc, _created = ComponentVersionRawContent.objects.get_or_create( + component_version=component_version, + raw_content_id=raw_content_id, + key=key, + learner_downloadable=learner_downloadable, ) + return cvrc diff --git a/openedx_learning/core/components/apps.py b/openedx_learning/core/components/apps.py index 0354fcb71..9b25954ac 100644 --- a/openedx_learning/core/components/apps.py +++ b/openedx_learning/core/components/apps.py @@ -9,3 +9,13 @@ class ComponentsConfig(AppConfig): name = "openedx_learning.core.components" verbose_name = "Learning Core: Components" default_auto_field = "django.db.models.BigAutoField" + label = "oel_components" + + def ready(self): + """ + Register Component and ComponentVersion. + """ + from ..publishing.api import register_content_models + from .models import Component, ComponentVersion + + register_content_models(Component, ComponentVersion) diff --git a/openedx_learning/core/components/migrations/0001_initial.py b/openedx_learning/core/components/migrations/0001_initial.py index f4a2c284b..ac379c67d 100644 --- a/openedx_learning/core/components/migrations/0001_initial.py +++ b/openedx_learning/core/components/migrations/0001_initial.py @@ -1,10 +1,7 @@ -# Generated by Django 4.1.6 on 2023-04-14 00:12 +# Generated by Django 3.2.18 on 2023-05-11 02:07 -from django.conf import settings -import django.core.validators from django.db import migrations, models import django.db.models.deletion -import openedx_learning.lib.validators import uuid @@ -12,8 +9,8 @@ class Migration(migrations.Migration): initial = True dependencies = [ - ("publishing", "0001_initial"), - migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ("oel_publishing", "0001_initial"), + ("oel_contents", "0001_initial"), ] operations = [ @@ -21,39 +18,22 @@ class Migration(migrations.Migration): name="Component", fields=[ ( - "id", - models.BigAutoField( - auto_created=True, + "publishable_entity", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, primary_key=True, serialize=False, - verbose_name="ID", - ), - ), - ( - "uuid", - models.UUIDField( - default=uuid.uuid4, - editable=False, - unique=True, - verbose_name="UUID", + to="oel_publishing.publishableentity", ), ), ("namespace", models.CharField(max_length=100)), ("type", models.CharField(blank=True, max_length=100)), - ("identifier", models.CharField(max_length=255)), - ( - "created", - models.DateTimeField( - validators=[ - openedx_learning.lib.validators.validate_utc_datetime - ] - ), - ), + ("local_key", models.CharField(max_length=255)), ( "learning_package", models.ForeignKey( on_delete=django.db.models.deletion.CASCADE, - to="publishing.learningpackage", + to="oel_publishing.learningpackage", ), ), ], @@ -66,51 +46,20 @@ class Migration(migrations.Migration): name="ComponentVersion", fields=[ ( - "id", - models.BigAutoField( - auto_created=True, + "publishable_entity_version", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, primary_key=True, serialize=False, - verbose_name="ID", - ), - ), - ( - "uuid", - models.UUIDField( - default=uuid.uuid4, - editable=False, - unique=True, - verbose_name="UUID", - ), - ), - ("title", models.CharField(blank=True, default="", max_length=1000)), - ( - "version_num", - models.PositiveBigIntegerField( - validators=[django.core.validators.MinValueValidator(1)] - ), - ), - ( - "created", - models.DateTimeField( - validators=[ - openedx_learning.lib.validators.validate_utc_datetime - ] + to="oel_publishing.publishableentityversion", ), ), ( "component", models.ForeignKey( on_delete=django.db.models.deletion.CASCADE, - to="components.component", - ), - ), - ( - "created_by", - models.ForeignKey( - null=True, - on_delete=django.db.models.deletion.SET_NULL, - to=settings.AUTH_USER_MODEL, + related_name="versions", + to="oel_components.component", ), ), ], @@ -119,83 +68,6 @@ class Migration(migrations.Migration): "verbose_name_plural": "Component Versions", }, ), - migrations.CreateModel( - name="RawContent", - fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ("hash_digest", models.CharField(editable=False, max_length=40)), - ("mime_type", models.CharField(max_length=255)), - ( - "size", - models.PositiveBigIntegerField( - validators=[django.core.validators.MaxValueValidator(50000000)] - ), - ), - ( - "created", - models.DateTimeField( - validators=[ - openedx_learning.lib.validators.validate_utc_datetime - ] - ), - ), - ("file", models.FileField(null=True, upload_to="")), - ( - "learning_package", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - to="publishing.learningpackage", - ), - ), - ], - options={ - "verbose_name": "Raw Content", - "verbose_name_plural": "Raw Contents", - }, - ), - migrations.CreateModel( - name="PublishedComponent", - fields=[ - ( - "component", - models.OneToOneField( - on_delete=django.db.models.deletion.RESTRICT, - primary_key=True, - serialize=False, - to="components.component", - ), - ), - ], - options={ - "verbose_name": "Published Component", - "verbose_name_plural": "Published Components", - }, - ), - migrations.CreateModel( - name="TextContent", - fields=[ - ( - "raw_content", - models.OneToOneField( - on_delete=django.db.models.deletion.CASCADE, - primary_key=True, - related_name="text_content", - serialize=False, - to="components.rawcontent", - ), - ), - ("text", models.TextField(blank=True, max_length=100000)), - ("length", models.PositiveIntegerField()), - ], - ), migrations.CreateModel( name="ComponentVersionRawContent", fields=[ @@ -217,20 +89,20 @@ class Migration(migrations.Migration): verbose_name="UUID", ), ), - ("identifier", models.CharField(max_length=255)), + ("key", models.CharField(max_length=255)), ("learner_downloadable", models.BooleanField(default=False)), ( "component_version", models.ForeignKey( on_delete=django.db.models.deletion.CASCADE, - to="components.componentversion", + to="oel_components.componentversion", ), ), ( "raw_content", models.ForeignKey( on_delete=django.db.models.deletion.RESTRICT, - to="components.rawcontent", + to="oel_contents.rawcontent", ), ), ], @@ -240,148 +112,42 @@ class Migration(migrations.Migration): name="raw_contents", field=models.ManyToManyField( related_name="component_versions", - through="components.ComponentVersionRawContent", - to="components.rawcontent", - ), - ), - migrations.CreateModel( - name="ComponentPublishLogEntry", - fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ( - "component", - models.ForeignKey( - on_delete=django.db.models.deletion.RESTRICT, - to="components.component", - ), - ), - ( - "component_version", - models.ForeignKey( - null=True, - on_delete=django.db.models.deletion.RESTRICT, - to="components.componentversion", - ), - ), - ( - "publish_log_entry", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - to="publishing.publishlogentry", - ), - ), - ], - ), - migrations.AddIndex( - model_name="rawcontent", - index=models.Index( - fields=["learning_package", "mime_type"], - name="content_idx_lp_mime_type", - ), - ), - migrations.AddIndex( - model_name="rawcontent", - index=models.Index( - fields=["learning_package", "-size"], name="content_idx_lp_rsize" - ), - ), - migrations.AddIndex( - model_name="rawcontent", - index=models.Index( - fields=["learning_package", "-created"], name="content_idx_lp_rcreated" - ), - ), - migrations.AddConstraint( - model_name="rawcontent", - constraint=models.UniqueConstraint( - fields=("learning_package", "mime_type", "hash_digest"), - name="content_uniq_lc_mime_type_hash_digest", - ), - ), - migrations.AddField( - model_name="publishedcomponent", - name="component_publish_log_entry", - field=models.ForeignKey( - on_delete=django.db.models.deletion.RESTRICT, - to="components.componentpublishlogentry", - ), - ), - migrations.AddField( - model_name="publishedcomponent", - name="component_version", - field=models.OneToOneField( - null=True, - on_delete=django.db.models.deletion.RESTRICT, - to="components.componentversion", + through="oel_components.ComponentVersionRawContent", + to="oel_contents.RawContent", ), ), migrations.AddIndex( model_name="componentversionrawcontent", index=models.Index( - fields=["raw_content", "component_version"], name="cvrawcontent_c_cv" + fields=["raw_content", "component_version"], + name="oel_cvrawcontent_c_cv", ), ), migrations.AddIndex( model_name="componentversionrawcontent", index=models.Index( - fields=["component_version", "raw_content"], name="cvrawcontent_cv_d" + fields=["component_version", "raw_content"], + name="oel_cvrawcontent_cv_d", ), ), migrations.AddConstraint( model_name="componentversionrawcontent", constraint=models.UniqueConstraint( - fields=("component_version", "identifier"), - name="cvrawcontent_uniq_cv_id", + fields=("component_version", "key"), name="oel_cvrawcontent_uniq_cv_key" ), ), - migrations.AddIndex( - model_name="componentversion", - index=models.Index( - fields=["component", "-created"], name="cv_idx_component_rcreated" - ), - ), - migrations.AddIndex( - model_name="componentversion", - index=models.Index(fields=["title"], name="cv_idx_title"), - ), - migrations.AddConstraint( - model_name="componentversion", - constraint=models.UniqueConstraint( - fields=("component", "version_num"), - name="cv_uniq_component_version_num", - ), - ), - migrations.AddIndex( - model_name="component", - index=models.Index( - fields=["learning_package", "identifier"], - name="component_idx_lp_identifier", - ), - ), - migrations.AddIndex( - model_name="component", - index=models.Index(fields=["identifier"], name="component_idx_identifier"), - ), migrations.AddIndex( model_name="component", index=models.Index( - fields=["learning_package", "-created"], - name="component_idx_lp_rcreated", + fields=["learning_package", "namespace", "type", "local_key"], + name="oel_component_idx_lc_ns_t_lk", ), ), migrations.AddConstraint( model_name="component", constraint=models.UniqueConstraint( - fields=("learning_package", "namespace", "type", "identifier"), - name="component_uniq_lc_ns_type_identifier", + fields=("learning_package", "namespace", "type", "local_key"), + name="oel_component_uniq_lc_ns_t_lk", ), ), ] diff --git a/openedx_learning/core/components/models.py b/openedx_learning/core/components/models.py index b1d683394..b773c3f92 100644 --- a/openedx_learning/core/components/models.py +++ b/openedx_learning/core/components/models.py @@ -6,33 +6,29 @@ Problem), but little beyond that. Components have one or more ComponentVersions, which represent saved versions of -that Component. At any time, there is at most one published ComponentVersion for -a Component in a LearningPackage (there can be zero if it's unpublished). The -publish status is tracked in PublishedComponent, with historical publish data in -ComponentPublishLogEntry. - -RawContent is a simple model holding unversioned, raw data, along with some -simple metadata like size and MIME type. +that Component. Managing the publishing of these versions is handled through the +publishing app. Component maps 1:1 to PublishableEntity and ComponentVersion +maps 1:1 to PublishableEntityVersion. Multiple pieces of RawContent may be associated with a ComponentVersion, through the ComponentVersionRawContent model. ComponentVersionRawContent allows to -specify a Component-local identifier. We're using this like a file path by -convention, but it's possible we might want to have special identifiers later. +specify a ComponentVersion-local identifier. We're using this like a file path +by convention, but it's possible we might want to have special identifiers +later. """ from django.db import models -from django.conf import settings -from django.core.validators import MinValueValidator, MaxValueValidator - -from openedx_learning.lib.fields import ( - hash_field, - identifier_field, - immutable_uuid_field, - manual_date_time_field, +from django.db.models import F, Q + +from openedx_learning.lib.fields import key_field, immutable_uuid_field +from ..publishing.models import LearningPackage +from ..publishing.model_mixins import ( + PublishableEntityMixin, + PublishableEntityVersionMixin, ) -from ..publishing.models import LearningPackage, PublishLogEntry +from ..contents.models import RawContent -class Component(models.Model): +class Component(PublishableEntityMixin): """ This represents any Component that has ever existed in a LearningPackage. @@ -47,33 +43,36 @@ class Component(models.Model): associated with the ComponentVersion model and the RawContent that ComponentVersions are associated with. - A Component belongs to one and only one LearningPackage. + A Component belongs to exactly one LearningPackage. - How to use this model - --------------------- + Identifiers + ----------- - Make a foreign key to the Component model when you need a stable reference - that will exist for as long as the LearningPackage itself exists. It is - possible for an Component to have no published ComponentVersion, either - because it was never published or because it's been "deleted" (made - unavailable) at some point, but the Component will continue to exist. + Components have a ``publishable_entity`` OneToOneField to the ``publishing`` + app's PublishableEntity field, and it uses this as its primary key. Please + see PublishableEntity's docstring for how you should use its ``uuid`` and + ``key`` fields. - The UUID should be treated as immutable. + State Consistency + ----------------- - The identifier field *is* mutable, but changing it will affect all - ComponentVersions. + The ``key`` field on Component's ``publishable_entity`` is dervied from the + ``(namespace, type, local_key)`` fields in this model. We don't support + changing the keys yet, but if we do, those values need to be kept in sync. - If you are referencing this model from within the same process, use a - foreign key to the id. If you are referencing this Component from an - external system/service, use the UUID. The identifier is the part that is - most likely to be human-readable, and may be exported/copied, but try not to - rely on it, since this value may change. + How build on this model + ----------------------- - Note: When we actually implement the ability to change identifiers, we - should make a history table and a modified attribute on this model. + Make a foreign key to the Component model when you need a stable reference + that will exist for as long as the LearningPackage itself exists. """ - uuid = immutable_uuid_field() + # This foreign key is technically redundant because we're already locked to + # a single LearningPackage through our publishable_entity relation. However, having + # this foreign key directly allows us to make indexes that efficiently + # query by other Component fields within a given LearningPackage, which is + # going to be a common use case (and we can't make a compound index using + # columns from different tables). learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE) # namespace and type work together to help figure out what Component needs @@ -87,53 +86,42 @@ class Component(models.Model): # the UsageKey. type = models.CharField(max_length=100, null=False, blank=True) - # identifier is local to a learning_package + namespace + type. For XBlocks, - # this is the block_id part of the UsageKey, which usually shows up in the - # OLX as the url_name attribute. - identifier = identifier_field() - - created = manual_date_time_field() + # local_key is an identifier that is local to the (namespace, type). The + # publishable.key should be calculated as a combination of (namespace, type, + # local_key). + local_key = key_field() class Meta: constraints = [ - # The combination of (namespace, type, identifier) is unique within + # The combination of (namespace, type, local_key) is unique within # a given LearningPackage. Note that this means it is possible to - # have two Components that have the exact same identifier. An XBlock + # have two Components that have the exact same local_key. An XBlock # would be modeled as namespace="xblock.v1" with the type as the - # block_type, so the identifier would only be the block_id (the + # block_type, so the local_key would only be the block_id (the # very last part of the UsageKey). models.UniqueConstraint( fields=[ "learning_package", "namespace", "type", - "identifier", + "local_key", ], - name="component_uniq_lc_ns_type_identifier", - ) + name="oel_component_uniq_lc_ns_t_lk", + ), ] indexes = [ - # LearningPackage Identifier Index: - # * Search by identifier (without having to specify namespace and - # type). This kind of freeform search will likely be common. + # Global Namespace/Type/Local-Key Index: + # * Search by the different Components fields across all Learning + # Packages on the site. This would be a support-oriented tool + # from Django Admin. models.Index( - fields=["learning_package", "identifier"], - name="component_idx_lp_identifier", - ), - # Global Identifier Index: - # * Search by identifier across all Components on the site. This - # would be a support-oriented tool from Django Admin. - models.Index( - fields=["identifier"], - name="component_idx_identifier", - ), - # LearningPackage (reverse) Created Index: - # * Search for most recently *created* Components for a given - # LearningPackage, since they're the most likely to be actively - # worked on. - models.Index( - fields=["learning_package", "-created"], - name="component_idx_lp_rcreated", + fields=[ + "learning_package", + "namespace", + "type", + "local_key", + ], + name="oel_component_idx_lc_ns_t_lk", ), ] @@ -142,296 +130,37 @@ class Meta: verbose_name_plural = "Components" def __str__(self): - return f"{self.identifier}" + return f"{self.namespace}:{self.type}:{self.local_key}" -class ComponentVersion(models.Model): +class ComponentVersion(PublishableEntityVersionMixin): """ A particular version of a Component. - This holds the title (because that's versioned information) and the contents - via a M:M relationship with RawContent via ComponentVersionRawContent. - - * Each ComponentVersion belongs to one and only one Component. - * ComponentVersions have a version_num that should increment by one with - each new version. + This holds the content using a M:M relationship with RawContent via + ComponentVersionRawContent. """ - uuid = immutable_uuid_field() - component = models.ForeignKey(Component, on_delete=models.CASCADE) - - # Blank titles are allowed because some Components are built to be used from - # a particular Unit, and the title would be redundant in that context (e.g. - # a "Welcome" video in a "Welcome" Unit). - title = models.CharField(max_length=1000, default="", null=False, blank=True) - - # The version_num starts at 1 and increments by 1 with each new version for - # a given Component. Doing it this way makes it more convenient for users to - # refer to than a hash or UUID value. - version_num = models.PositiveBigIntegerField( - null=False, - validators=[MinValueValidator(1)], - ) - - # All ComponentVersions created as part of the same publish should have the - # exact same created datetime (not off by a handful of microseconds). - created = manual_date_time_field() - - # User who created the ContentVersion. This can be null if the user is later - # removed. Open edX in general doesn't let you remove users, but we should - # try to model it so that this is possible eventually. - created_by = models.ForeignKey( - settings.AUTH_USER_MODEL, - on_delete=models.SET_NULL, - null=True, + # This is technically redundant, since we can get this through + # publishable_entity_version.publishable.component, but this is more + # convenient. + component = models.ForeignKey( + Component, on_delete=models.CASCADE, related_name="versions" ) - # The contents hold the actual interesting data associated with this + # The raw_contents hold the actual interesting data associated with this # ComponentVersion. raw_contents = models.ManyToManyField( - "RawContent", + RawContent, through="ComponentVersionRawContent", related_name="component_versions", ) - def __str__(self): - return f"v{self.version_num}: {self.title}" - class Meta: - constraints = [ - # Prevent the situation where we have multiple ComponentVersions - # claiming to be the same version_num for a given Component. This - # can happen if there's a race condition between concurrent editors - # in different browsers, working on the same Component. With this - # constraint, one of those processes will raise an IntegrityError. - models.UniqueConstraint( - fields=[ - "component", - "version_num", - ], - name="cv_uniq_component_version_num", - ) - ] - indexes = [ - # LearningPackage (reverse) Created Index: - # * Make it cheap to find the most recently created - # ComponentVersions for a given LearningPackage. This represents - # the most recently saved work for a LearningPackage and would - # be the most likely areas to get worked on next. - models.Index( - fields=["component", "-created"], - name="cv_idx_component_rcreated", - ), - # Title Index: - # * Search by title. - models.Index( - fields=[ - "title", - ], - name="cv_idx_title", - ), - ] - - # These are for the Django Admin UI. verbose_name = "Component Version" verbose_name_plural = "Component Versions" -class ComponentPublishLogEntry(models.Model): - """ - This is a historical record of Component publishing. - - When a ComponentVersion is initially created, it's considered a draft. The - act of publishing means we're marking a ContentVersion as the official, - ready-for-use version of this Component. - """ - - publish_log_entry = models.ForeignKey(PublishLogEntry, on_delete=models.CASCADE) - component = models.ForeignKey(Component, on_delete=models.RESTRICT) - component_version = models.ForeignKey( - ComponentVersion, on_delete=models.RESTRICT, null=True - ) - - -class PublishedComponent(models.Model): - """ - For any given Component, what is the currently published ComponentVersion. - - It may be possible for a Component to exist only as a Draft (and thus not - show up in this table). There is only ever one published ComponentVersion - per Component at any given time. - - TODO: Do we need to create a (redundant) title field in this model so that - we can more efficiently search across titles within a LearningPackage? - Probably not an immediate concern because the number of rows currently - shouldn't be > 10,000 in the more extreme cases. - """ - - component = models.OneToOneField( - Component, on_delete=models.RESTRICT, primary_key=True - ) - component_version = models.OneToOneField( - ComponentVersion, - on_delete=models.RESTRICT, - null=True, - ) - component_publish_log_entry = models.ForeignKey( - ComponentPublishLogEntry, - on_delete=models.RESTRICT, - ) - - class Meta: - verbose_name = "Published Component" - verbose_name_plural = "Published Components" - - -class RawContent(models.Model): - """ - This is the most basic piece of raw content data, with no version metadata. - - RawContent stores data using the "file" field. This data is not - auto-normalized in any way, meaning that pieces of content that are - semantically equivalent (e.g. differently spaced/sorted JSON) may result in - new entries. This model is intentionally ignorant of what these things mean, - because it expects supplemental data models to build on top of it. - - Two RawContent instances _can_ have the same hash_digest if they are of - different MIME types. For instance, an empty text file and an empty SRT file - will both hash the same way, but be considered different entities. - - The other fields on RawContent are for data that is intrinsic to the file - data itself (e.g. the size). Any smart parsing of the contents into more - structured metadata should happen in other models that hang off of - RawContent. - - RawContent models are not versioned in any way. The concept of versioning - only exists at a higher level. - - RawContent is optimized for cheap storage, not low latency. It stores - content in a FileField. If you need faster text access across multiple rows, - add a TextContent entry that corresponds to the relevant RawContent. - - If you need to transform this RawContent into more structured data for your - application, create a model with a OneToOneField(primary_key=True) - relationship to RawContent. Just remember that *you should always create the - RawContent entry* first, to ensure content is always exportable, even if - your app goes away in the future. - """ - - # 50 MB is our current limit, based on the current Open edX Studio file - # upload size limit. - MAX_FILE_SIZE = 50_000_000 - - learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE) - - # This hash value may be calculated using create_hash_digest from the - # openedx.lib.fields module. - hash_digest = hash_field() - - # MIME type, such as "text/html", "image/png", etc. Per RFC 4288, MIME type - # and sub-type may each be 127 chars, making a max of 255 (including the "/" - # in between). - # - # DO NOT STORE parameters here, e.g. "charset=". We can make a new field if - # that becomes necessary. - mime_type = models.CharField(max_length=255, blank=False, null=False) - - # This is the size of the raw data file in bytes. This can be different than - # the character length, since UTF-8 encoding can use anywhere between 1-4 - # bytes to represent any given character. - size = models.PositiveBigIntegerField( - validators=[MaxValueValidator(MAX_FILE_SIZE)], - ) - - # This should be manually set so that multiple RawContent rows being set in - # the same transaction are created with the same timestamp. The timestamp - # should be UTC. - created = manual_date_time_field() - - # All content for the LearningPackage should be stored in files. See model - # docstring for more details on how to store this data in supplementary data - # models that offer better latency guarantees. - file = models.FileField( - null=True, - storage=settings.OPENEDX_LEARNING.get( - "STORAGE", - settings.DEFAULT_FILE_STORAGE, - ), - ) - - class Meta: - constraints = [ - # Make sure we don't store duplicates of this raw data within the - # same LearningPackage, unless they're of different MIME types. - models.UniqueConstraint( - fields=[ - "learning_package", - "mime_type", - "hash_digest", - ], - name="content_uniq_lc_mime_type_hash_digest", - ), - ] - indexes = [ - # LearningPackage MIME type Index: - # * Break down Content counts by type/subtype with in a - # LearningPackage. - # * Find all the Content in a LearningPackage that matches a - # certain MIME type (e.g. "image/png", "application/pdf". - models.Index( - fields=["learning_package", "mime_type"], - name="content_idx_lp_mime_type", - ), - # LearningPackage (reverse) Size Index: - # * Find largest Content in a LearningPackage. - # * Find the sum of Content size for a given LearningPackage. - models.Index( - fields=["learning_package", "-size"], - name="content_idx_lp_rsize", - ), - # LearningPackage (reverse) Created Index: - # * Find most recently added Content. - models.Index( - fields=["learning_package", "-created"], - name="content_idx_lp_rcreated", - ), - ] - verbose_name = "Raw Content" - verbose_name_plural = "Raw Contents" - - -class TextContent(models.Model): - """ - TextContent supplements RawContent to give an in-table text copy. - - This model exists so that we can have lower-latency access to this data, - particularly if we're pulling back multiple rows at once. - - Apps are encouraged to create their own data models that further extend this - one with a more intelligent, parsed data model. For example, individual - XBlocks might parse the OLX in this model into separate data models for - VideoBlock, ProblemBlock, etc. - - The reason this is built directly into the Learning Core data model is - because we want to be able to easily access and browse this data even if the - app-extended models get deleted (e.g. if they are deprecated and removed). - """ - - # 100K is our limit for text data, like OLX. This means 100K *characters*, - # not bytes. Since UTF-8 encodes characters using as many as 4 bytes, this - # couled be as much as 400K of data if we had nothing but emojis. - MAX_TEXT_LENGTH = 100_000 - - raw_content = models.OneToOneField( - RawContent, - on_delete=models.CASCADE, - primary_key=True, - related_name="text_content", - ) - text = models.TextField(null=False, blank=True, max_length=MAX_TEXT_LENGTH) - length = models.PositiveIntegerField(null=False) - - class ComponentVersionRawContent(models.Model): """ Determines the RawContent for a given ComponentVersion. @@ -441,9 +170,9 @@ class ComponentVersionRawContent(models.Model): transcripts in different languages. When RawContent is associated with an ComponentVersion, it has some local - identifier that is unique within the the context of that ComponentVersion. - This allows the ComponentVersion to do things like store an image file and - reference it by a "path" identifier. + key that is unique within the the context of that ComponentVersion. This + allows the ComponentVersion to do things like store an image file and + reference it by a "path" key. RawContent is immutable and sharable across multiple ComponentVersions and even across LearningPackages. @@ -453,8 +182,10 @@ class ComponentVersionRawContent(models.Model): component_version = models.ForeignKey(ComponentVersion, on_delete=models.CASCADE) uuid = immutable_uuid_field() - identifier = identifier_field() + key = key_field() + # Long explanation for the ``learner_downloadable`` field: + # # Is this RawContent downloadable during the learning experience? This is # NOT about public vs. private permissions on course assets, as that will be # a policy that can be changed independently of new versions of the content. @@ -492,21 +223,21 @@ class ComponentVersionRawContent(models.Model): class Meta: constraints = [ - # Uniqueness is only by ComponentVersion and identifier. If for some - # reason a ComponentVersion wants to associate the same piece of - # content with two different identifiers, that is permitted. + # Uniqueness is only by ComponentVersion and key. If for some reason + # a ComponentVersion wants to associate the same piece of content + # with two different identifiers, that is permitted. models.UniqueConstraint( - fields=["component_version", "identifier"], - name="cvrawcontent_uniq_cv_id", + fields=["component_version", "key"], + name="oel_cvrawcontent_uniq_cv_key", ), ] indexes = [ models.Index( fields=["raw_content", "component_version"], - name="cvrawcontent_c_cv", + name="oel_cvrawcontent_c_cv", ), models.Index( fields=["component_version", "raw_content"], - name="cvrawcontent_cv_d", + name="oel_cvrawcontent_cv_d", ), ] diff --git a/openedx_learning/core/contents/__init__.py b/openedx_learning/core/contents/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/openedx_learning/core/contents/admin.py b/openedx_learning/core/contents/admin.py new file mode 100644 index 000000000..81230d3d2 --- /dev/null +++ b/openedx_learning/core/contents/admin.py @@ -0,0 +1,53 @@ +from django.contrib import admin +from django.utils.html import format_html + +from .models import RawContent + +from openedx_learning.lib.admin_utils import ReadOnlyModelAdmin + + +@admin.register(RawContent) +class RawContentAdmin(ReadOnlyModelAdmin): + list_display = [ + "hash_digest", + "file_link", + "learning_package", + "mime_type", + "size", + "created", + ] + fields = [ + "learning_package", + "hash_digest", + "mime_type", + "size", + "created", + "file_link", + "text_preview", + ] + readonly_fields = [ + "learning_package", + "hash_digest", + "mime_type", + "size", + "created", + "file_link", + "text_preview", + ] + list_filter = ("mime_type", "learning_package") + search_fields = ("hash_digest",) + + def file_link(self, raw_content): + return format_html( + 'Download', + raw_content.file.url, + ) + + def text_preview(self, raw_content): + if not hasattr(raw_content, "text_content"): + return "(not available)" + + return format_html( + '
\n{}\n
', + raw_content.text_content.text, + ) diff --git a/openedx_learning/core/contents/api.py b/openedx_learning/core/contents/api.py new file mode 100644 index 000000000..282bb820f --- /dev/null +++ b/openedx_learning/core/contents/api.py @@ -0,0 +1,85 @@ +""" +Low Level Contents API (warning: UNSTABLE, in progress API) + +Please look at the models.py file for more information about the kinds of data +are stored in this app. +""" +import codecs + +from django.core.files.base import ContentFile +from django.db.transaction import atomic + +from openedx_learning.lib.fields import create_hash_digest +from .models import RawContent, TextContent + + +def create_raw_content( + learning_package_id, data_bytes, mime_type, created, hash_digest=None +): + hash_digest = hash_digest or create_hash_digest(data_bytes) + raw_content = RawContent.objects.create( + learning_package_id=learning_package_id, + mime_type=mime_type, + hash_digest=hash_digest, + size=len(data_bytes), + created=created, + ) + raw_content.file.save( + f"{raw_content.learning_package.uuid}/{hash_digest}", + ContentFile(data_bytes), + ) + return raw_content + + +def create_text_from_raw_content(raw_content, encoding="utf-8-sig"): + text = codecs.decode(raw_content.file.open().read(), "utf-8-sig") + return TextContent.objects.create( + raw_content=raw_content, + text=text, + length=len(text), + ) + + +def get_or_create_raw_content( + learning_package_id, data_bytes, mime_type, created, hash_digest=None +): + hash_digest = hash_digest or create_hash_digest(data_bytes) + try: + raw_content = RawContent.objects.get( + learning_package_id=learning_package_id, hash_digest=hash_digest + ) + created = False + except RawContent.DoesNotExist: + raw_content = create_raw_content( + learning_package_id, data_bytes, mime_type, created, hash_digest + ) + created = True + + return raw_content, created + + +def get_or_create_text_content_from_bytes( + learning_package_id, + data_bytes, + mime_type, + created, + hash_digest=None, + encoding="utf-8-sig", +): + with atomic(): + raw_content, rc_created = get_or_create_raw_content( + learning_package_id, data_bytes, mime_type, created, hash_digest=None + ) + if rc_created or not hasattr(raw_content, "text_content"): + text = codecs.decode(data_bytes, "utf-8-sig") + text_content = TextContent.objects.create( + raw_content=raw_content, + text=text, + length=len(text), + ) + tc_created = True + else: + text_content = raw_content.text_content + tc_created = False + + return (text_content, tc_created) diff --git a/openedx_learning/core/contents/apps.py b/openedx_learning/core/contents/apps.py new file mode 100644 index 000000000..4c736b812 --- /dev/null +++ b/openedx_learning/core/contents/apps.py @@ -0,0 +1,12 @@ +from django.apps import AppConfig + + +class ContentsConfig(AppConfig): + """ + Configuration for the Contents Django application. + """ + + name = "openedx_learning.core.contents" + verbose_name = "Learning Core: Contents" + default_auto_field = "django.db.models.BigAutoField" + label = "oel_contents" diff --git a/openedx_learning/core/contents/migrations/0001_initial.py b/openedx_learning/core/contents/migrations/0001_initial.py new file mode 100644 index 000000000..f106553e7 --- /dev/null +++ b/openedx_learning/core/contents/migrations/0001_initial.py @@ -0,0 +1,103 @@ +# Generated by Django 3.2.18 on 2023-05-11 02:07 + +import django.core.validators +from django.db import migrations, models +import django.db.models.deletion +import openedx_learning.lib.validators + + +class Migration(migrations.Migration): + initial = True + + dependencies = [ + ("oel_publishing", "0001_initial"), + ] + + operations = [ + migrations.CreateModel( + name="RawContent", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ("hash_digest", models.CharField(editable=False, max_length=40)), + ("mime_type", models.CharField(max_length=255)), + ( + "size", + models.PositiveBigIntegerField( + validators=[django.core.validators.MaxValueValidator(50000000)] + ), + ), + ( + "created", + models.DateTimeField( + validators=[ + openedx_learning.lib.validators.validate_utc_datetime + ] + ), + ), + ("file", models.FileField(null=True, upload_to="")), + ( + "learning_package", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="oel_publishing.learningpackage", + ), + ), + ], + options={ + "verbose_name": "Raw Content", + "verbose_name_plural": "Raw Contents", + }, + ), + migrations.CreateModel( + name="TextContent", + fields=[ + ( + "raw_content", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + primary_key=True, + related_name="text_content", + serialize=False, + to="oel_contents.rawcontent", + ), + ), + ("text", models.TextField(blank=True, max_length=100000)), + ("length", models.PositiveIntegerField()), + ], + ), + migrations.AddIndex( + model_name="rawcontent", + index=models.Index( + fields=["learning_package", "mime_type"], + name="oel_content_idx_lp_mime_type", + ), + ), + migrations.AddIndex( + model_name="rawcontent", + index=models.Index( + fields=["learning_package", "-size"], name="oel_content_idx_lp_rsize" + ), + ), + migrations.AddIndex( + model_name="rawcontent", + index=models.Index( + fields=["learning_package", "-created"], + name="oel_content_idx_lp_rcreated", + ), + ), + migrations.AddConstraint( + model_name="rawcontent", + constraint=models.UniqueConstraint( + fields=("learning_package", "mime_type", "hash_digest"), + name="oel_content_uniq_lc_mime_type_hash_digest", + ), + ), + ] diff --git a/openedx_learning/core/contents/migrations/__init__.py b/openedx_learning/core/contents/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/openedx_learning/core/contents/models.py b/openedx_learning/core/contents/models.py new file mode 100644 index 000000000..e19f151cb --- /dev/null +++ b/openedx_learning/core/contents/models.py @@ -0,0 +1,176 @@ +""" +These models are the most basic pieces of content we support. Think of them as +the simplest building blocks to store data with. They need to be composed into +more intelligent data models to be useful. +""" +from django.db import models +from django.conf import settings +from django.core.validators import MaxValueValidator + +from openedx_learning.lib.fields import hash_field, manual_date_time_field + +from ..publishing.models import LearningPackage + + +class RawContent(models.Model): + """ + This is the most basic piece of raw content data, with no version metadata. + + RawContent stores data using the "file" field. This data is not + auto-normalized in any way, meaning that pieces of content that are + semantically equivalent (e.g. differently spaced/sorted JSON) may result in + new entries. This model is intentionally ignorant of what these things mean, + because it expects supplemental data models to build on top of it. + + Two RawContent instances _can_ have the same hash_digest if they are of + different MIME types. For instance, an empty text file and an empty SRT file + will both hash the same way, but be considered different entities. + + The other fields on RawContent are for data that is intrinsic to the file + data itself (e.g. the size). Any smart parsing of the contents into more + structured metadata should happen in other models that hang off of + RawContent. + + RawContent models are not versioned in any way. The concept of versioning + only exists at a higher level. + + RawContent is optimized for cheap storage, not low latency. It stores + content in a FileField. If you need faster text access across multiple rows, + add a TextContent entry that corresponds to the relevant RawContent. + + If you need to transform this RawContent into more structured data for your + application, create a model with a OneToOneField(primary_key=True) + relationship to RawContent. Just remember that *you should always create the + RawContent entry* first, to ensure content is always exportable, even if + your app goes away in the future. + + Operational Notes + ----------------- + + RawContent stores data using a FileField, which you'd typically want to back + with something like S3 when running in a production environment. That file + storage backend will not support rollback, meaning that if you start the + import process and things break halfway through, the RawContent model rows + will be rolled back, but the uploaded files will still remain on your file + storage system. The files are based on a hash of the contents though, so it + should still work later on when the import succeeds (it'll just have to + upload fewer files). + + TODO: Write about cleaning up accidental uploads of really large/unnecessary + files. Pruning of unreferenced (never published, or currently unused) + component versions and assets, and how that ties in? + """ + + # 50 MB is our current limit, based on the current Open edX Studio file + # upload size limit. + MAX_FILE_SIZE = 50_000_000 + + learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE) + + # This hash value may be calculated using create_hash_digest from the + # openedx.lib.fields module. + hash_digest = hash_field() + + # MIME type, such as "text/html", "image/png", etc. Per RFC 4288, MIME type + # and sub-type may each be 127 chars, making a max of 255 (including the "/" + # in between). + # + # DO NOT STORE parameters here, e.g. "charset=". We can make a new field if + # that becomes necessary. + mime_type = models.CharField(max_length=255, blank=False, null=False) + + # This is the size of the raw data file in bytes. This can be different than + # the character length, since UTF-8 encoding can use anywhere between 1-4 + # bytes to represent any given character. + size = models.PositiveBigIntegerField( + validators=[MaxValueValidator(MAX_FILE_SIZE)], + ) + + # This should be manually set so that multiple RawContent rows being set in + # the same transaction are created with the same timestamp. The timestamp + # should be UTC. + created = manual_date_time_field() + + # All content for the LearningPackage should be stored in files. See model + # docstring for more details on how to store this data in supplementary data + # models that offer better latency guarantees. + file = models.FileField( + null=True, + storage=settings.OPENEDX_LEARNING.get( + "STORAGE", + settings.DEFAULT_FILE_STORAGE, + ), + ) + + class Meta: + constraints = [ + # Make sure we don't store duplicates of this raw data within the + # same LearningPackage, unless they're of different MIME types. + models.UniqueConstraint( + fields=[ + "learning_package", + "mime_type", + "hash_digest", + ], + name="oel_content_uniq_lc_mime_type_hash_digest", + ), + ] + indexes = [ + # LearningPackage MIME type Index: + # * Break down Content counts by type/subtype with in a + # LearningPackage. + # * Find all the Content in a LearningPackage that matches a + # certain MIME type (e.g. "image/png", "application/pdf". + models.Index( + fields=["learning_package", "mime_type"], + name="oel_content_idx_lp_mime_type", + ), + # LearningPackage (reverse) Size Index: + # * Find largest Content in a LearningPackage. + # * Find the sum of Content size for a given LearningPackage. + models.Index( + fields=["learning_package", "-size"], + name="oel_content_idx_lp_rsize", + ), + # LearningPackage (reverse) Created Index: + # * Find most recently added Content. + models.Index( + fields=["learning_package", "-created"], + name="oel_content_idx_lp_rcreated", + ), + ] + verbose_name = "Raw Content" + verbose_name_plural = "Raw Contents" + + +class TextContent(models.Model): + """ + TextContent supplements RawContent to give an in-table text copy. + + This model exists so that we can have lower-latency access to this data, + particularly if we're pulling back multiple rows at once. + + Apps are encouraged to create their own data models that further extend this + one with a more intelligent, parsed data model. For example, individual + XBlocks might parse the OLX in this model into separate data models for + VideoBlock, ProblemBlock, etc. You can do this by making your supplementary + model linked to this model via OneToOneField with primary_key=True. + + The reason this is built directly into the Learning Core data model is + because we want to be able to easily access and browse this data even if the + app-extended models get deleted (e.g. if they are deprecated and removed). + """ + + # 100K is our limit for text data, like OLX. This means 100K *characters*, + # not bytes. Since UTF-8 encodes characters using as many as 4 bytes, this + # could be as much as 400K of data if we had nothing but emojis. + MAX_TEXT_LENGTH = 100_000 + + raw_content = models.OneToOneField( + RawContent, + on_delete=models.CASCADE, + primary_key=True, + related_name="text_content", + ) + text = models.TextField(null=False, blank=True, max_length=MAX_TEXT_LENGTH) + length = models.PositiveIntegerField(null=False) diff --git a/openedx_learning/core/publishing/admin.py b/openedx_learning/core/publishing/admin.py index 7040f9008..258b6ffaf 100644 --- a/openedx_learning/core/publishing/admin.py +++ b/openedx_learning/core/publishing/admin.py @@ -1,29 +1,122 @@ from django.contrib import admin -from .models import LearningPackage, PublishLogEntry +from openedx_learning.lib.admin_utils import ReadOnlyModelAdmin +from .models import ( + Draft, + LearningPackage, + PublishableEntity, + Published, + PublishLog, + PublishLogRecord, +) @admin.register(LearningPackage) -class LearningPackageAdmin(admin.ModelAdmin): - fields = ("identifier", "title", "uuid", "created", "updated") - readonly_fields = ("identifier", "title", "uuid", "created", "updated") - list_display = ("identifier", "title", "uuid", "created", "updated") +class LearningPackageAdmin(ReadOnlyModelAdmin): + fields = ["key", "title", "uuid", "created", "updated"] + readonly_fields = ["key", "title", "uuid", "created", "updated"] + list_display = ["key", "title", "uuid", "created", "updated"] + search_fields = ["key", "title", "uuid"] -@admin.register(PublishLogEntry) -class PublishLogEntryAdmin(admin.ModelAdmin): - fields = ("uuid", "learning_package", "published_at", "published_by", "message") - readonly_fields = ( - "uuid", - "learning_package", - "published_at", - "published_by", - "message", - ) - list_display = ( +class PublishLogRecordTabularInline(admin.TabularInline): + model = PublishLogRecord + fields = [ + "entity", + "title", + "old_version_num", + "new_version_num", + ] + readonly_fields = fields + + def get_queryset(self, request): + queryset = super().get_queryset(request) + return queryset.select_related("entity", "old_version", "new_version") + + def old_version_num(self, pl_record: PublishLogRecord): + if pl_record.old_version is None: + return "-" + return pl_record.old_version.version_num + + def new_version_num(self, pl_record: PublishLogRecord): + if pl_record.new_version is None: + return "-" + return pl_record.new_version.version_num + + def title(self, pl_record: PublishLogRecord): + if pl_record.new_version: + return pl_record.new_version.title + if pl_record.old_version: + return pl_record.old_version.title + return "" + + +@admin.register(PublishLog) +class PublishLogAdmin(ReadOnlyModelAdmin): + inlines = [PublishLogRecordTabularInline] + + fields = ["uuid", "learning_package", "published_at", "published_by", "message"] + readonly_fields = fields + list_display = fields + list_filter = ["learning_package"] + + +@admin.register(PublishableEntity) +class PublishableEntityAdmin(ReadOnlyModelAdmin): + fields = [ + "key", + "draft_version", + "published_version", "uuid", "learning_package", - "published_at", - "published_by", - "message", - ) + "created", + "created_by", + ] + readonly_fields = fields + list_display = fields + list_filter = ["learning_package"] + search_fields = ["key", "uuid"] + + def get_queryset(self, request): + queryset = super().get_queryset(request) + return queryset.select_related( + "learning_package", "published__version", "draft__version" + ) + + def draft_version(self, entity): + return entity.draft.version.version_num + + def published_version(self, entity): + return entity.published.version.version_num + + +@admin.register(Published) +class PublishedAdmin(ReadOnlyModelAdmin): + fields = ["entity", "version_num", "previous", "published_at", "message"] + list_display = fields + + def get_queryset(self, request): + queryset = super().get_queryset(request) + return queryset.select_related( + "entity", + "version", + "publish_log_record", + "publish_log_record__old_version", + "publish_log_record__publish_log", + ) + + def version_num(self, published_obj): + return published_obj.version.version_num + + def previous(self, published_obj): + old_version = published_obj.publish_log_record.old_version + # if there was no previous old version, old version is None + if not old_version: + return old_version + return old_version.version_num + + def published_at(self, published_obj): + return published_obj.publish_log_record.publish_log.published_at + + def message(self, published_obj): + return published_obj.publish_log_record.publish_log.message diff --git a/openedx_learning/core/publishing/api.py b/openedx_learning/core/publishing/api.py new file mode 100644 index 000000000..52ac7a90e --- /dev/null +++ b/openedx_learning/core/publishing/api.py @@ -0,0 +1,192 @@ +""" +Publishing API (warning: UNSTABLE, in progress API) + +Please look at the models.py file for more information about the kinds of data +are stored in this app. +""" +from datetime import datetime, timezone +from typing import Optional + +from django.core.exceptions import ObjectDoesNotExist +from django.db.models import F, QuerySet +from django.db.transaction import atomic + +from .models import ( + Draft, + LearningPackage, + Published, + PublishLog, + PublishLogRecord, + PublishableEntity, + PublishableEntityVersion, +) +from .model_mixins import PublishableContentModelRegistry + + +def create_learning_package( + key: str, title: str, created: Optional[datetime] = None +) -> LearningPackage: + """ + Create a new LearningPackage. + + The ``key`` must be unique. + + Errors that can be raised: + + * django.core.exceptions.ValidationError + """ + if not created: + created = datetime.now(tz=timezone.utc) + + package = LearningPackage( + key=key, + title=title, + created=created, + updated=created, + ) + package.full_clean() + package.save() + + return package + + +def create_publishable_entity(learning_package_id, key, created, created_by): + """ + Create a PublishableEntity. + + You'd typically want to call this right before creating your own content + model that points to it. + """ + return PublishableEntity.objects.create( + learning_package_id=learning_package_id, + key=key, + created=created, + created_by=created_by, + ) + + +def create_publishable_entity_version( + entity_id, version_num, title, created, created_by +): + """ + Create a PublishableEntityVersion. + + You'd typically want to call this right before creating your own content + version model that points to it. + """ + with atomic(): + version = PublishableEntityVersion.objects.create( + entity_id=entity_id, + version_num=version_num, + title=title, + created=created, + created_by=created_by, + ) + Draft.objects.create( + entity_id=entity_id, + version=version, + ) + return version + + +def learning_package_exists(key: str) -> bool: + """ + Check whether a LearningPackage with a particular key exists. + """ + LearningPackage.objects.filter(key=key).exists() + + +def publish_all_drafts( + learning_package_id, message="", published_at=None, published_by=None +): + """ + Publish everything that is a Draft and is not already published. + """ + draft_qset = ( + Draft.objects.select_related("entity__published") + .filter(entity__learning_package_id=learning_package_id) + .exclude(entity__published__version_id=F("version_id")) + ) + return publish_from_drafts( + learning_package_id, draft_qset, message, published_at, published_by + ) + + +def publish_from_drafts( + learning_package_id: int, # LearningPackage.id + draft_qset: QuerySet, + message: str = "", + published_at: Optional[datetime] = None, + published_by: Optional[int] = None, # User.id +) -> PublishLog: + """ + Publish the rows in the ``draft_model_qsets`` args passed in. + """ + if published_at is None: + published_at = datetime.now(tz=timezone.utc) + + with atomic(): + # One PublishLog for this entire publish operation. + publish_log = PublishLog( + learning_package_id=learning_package_id, + message=message, + published_at=published_at, + published_by=published_by, + ) + publish_log.full_clean() + publish_log.save(force_insert=True) + + for draft in draft_qset.select_related("entity__published__version"): + try: + old_version = draft.entity.published.version + except ObjectDoesNotExist: + # This means there is no published version yet. + old_version = None + + # Create a record describing publishing this particular Publishable + # (useful for auditing and reverting). + publish_log_record = PublishLogRecord( + publish_log=publish_log, + entity=draft.entity, + old_version=old_version, + new_version=draft.version, + ) + publish_log_record.full_clean() + publish_log_record.save(force_insert=True) + + # Update the lookup we use to fetch the published versions + Published.objects.update_or_create( + entity=draft.entity, + defaults={ + "version": draft.version, + "publish_log_record": publish_log_record, + }, + ) + + return publish_log + + +def register_content_models(content_model_cls, content_version_model_cls): + """ + Register what content model maps to what content version model. + + This is so that we can provide convenience links between content models and + content version models *through* the publishing apps, so that you can do + things like finding the draft version of a content model more easily. See + the model_mixins.py module for more details. + + This should only be imported and run from the your app's AppConfig.ready() + method. For example, in the components app, this looks like: + + def ready(self): + from ..publishing.api import register_content_models + from .models import Component, ComponentVersion + + register_content_models(Component, ComponentVersion) + + There may be a more clever way to introspect this information from the model + metadata, but this is simple and explicit. + """ + return PublishableContentModelRegistry.register( + content_model_cls, content_version_model_cls + ) diff --git a/openedx_learning/core/publishing/apps.py b/openedx_learning/core/publishing/apps.py index 4a98507cc..fbd42a0e4 100644 --- a/openedx_learning/core/publishing/apps.py +++ b/openedx_learning/core/publishing/apps.py @@ -13,3 +13,4 @@ class PublishingConfig(AppConfig): name = "openedx_learning.core.publishing" verbose_name = "Learning Core: Publishing" default_auto_field = "django.db.models.BigAutoField" + label = "oel_publishing" diff --git a/openedx_learning/core/publishing/migrations/0001_initial.py b/openedx_learning/core/publishing/migrations/0001_initial.py index 350a1849a..76e02b316 100644 --- a/openedx_learning/core/publishing/migrations/0001_initial.py +++ b/openedx_learning/core/publishing/migrations/0001_initial.py @@ -1,6 +1,7 @@ -# Generated by Django 4.1.6 on 2023-04-14 00:12 +# Generated by Django 3.2.18 on 2023-05-11 02:07 from django.conf import settings +import django.core.validators from django.db import migrations, models import django.db.models.deletion import openedx_learning.lib.validators @@ -36,7 +37,7 @@ class Migration(migrations.Migration): verbose_name="UUID", ), ), - ("identifier", models.CharField(max_length=255)), + ("key", models.CharField(max_length=255)), ("title", models.CharField(max_length=1000)), ( "created", @@ -61,7 +62,118 @@ class Migration(migrations.Migration): }, ), migrations.CreateModel( - name="PublishLogEntry", + name="PublishableEntity", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "uuid", + models.UUIDField( + default=uuid.uuid4, + editable=False, + unique=True, + verbose_name="UUID", + ), + ), + ("key", models.CharField(max_length=255)), + ( + "created", + models.DateTimeField( + validators=[ + openedx_learning.lib.validators.validate_utc_datetime + ] + ), + ), + ( + "created_by", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to=settings.AUTH_USER_MODEL, + ), + ), + ( + "learning_package", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="oel_publishing.learningpackage", + ), + ), + ], + options={ + "verbose_name": "Publishable Entity", + "verbose_name_plural": "Publishable Entities", + }, + ), + migrations.CreateModel( + name="PublishableEntityVersion", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "uuid", + models.UUIDField( + default=uuid.uuid4, + editable=False, + unique=True, + verbose_name="UUID", + ), + ), + ("title", models.CharField(blank=True, default="", max_length=1000)), + ( + "version_num", + models.PositiveBigIntegerField( + validators=[django.core.validators.MinValueValidator(1)] + ), + ), + ( + "created", + models.DateTimeField( + validators=[ + openedx_learning.lib.validators.validate_utc_datetime + ] + ), + ), + ( + "created_by", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to=settings.AUTH_USER_MODEL, + ), + ), + ( + "entity", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="versions", + to="oel_publishing.publishableentity", + ), + ), + ], + options={ + "verbose_name": "Publishable Entity Version", + "verbose_name_plural": "Publishable Entity Versions", + }, + ), + migrations.CreateModel( + name="PublishLog", fields=[ ( "id", @@ -94,12 +206,13 @@ class Migration(migrations.Migration): "learning_package", models.ForeignKey( on_delete=django.db.models.deletion.CASCADE, - to="publishing.learningpackage", + to="oel_publishing.learningpackage", ), ), ( "published_by", models.ForeignKey( + blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL, @@ -107,14 +220,157 @@ class Migration(migrations.Migration): ), ], options={ - "verbose_name": "Publish Log Entry", - "verbose_name_plural": "Publish Log Entries", + "verbose_name": "Publish Log", + "verbose_name_plural": "Publish Logs", + }, + ), + migrations.CreateModel( + name="Draft", + fields=[ + ( + "entity", + models.OneToOneField( + on_delete=django.db.models.deletion.RESTRICT, + primary_key=True, + serialize=False, + to="oel_publishing.publishableentity", + ), + ), + ], + ), + migrations.CreateModel( + name="Published", + fields=[ + ( + "entity", + models.OneToOneField( + on_delete=django.db.models.deletion.RESTRICT, + primary_key=True, + serialize=False, + to="oel_publishing.publishableentity", + ), + ), + ], + options={ + "verbose_name": "Published Entity", + "verbose_name_plural": "Published Entities", + }, + ), + migrations.CreateModel( + name="PublishLogRecord", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "entity", + models.ForeignKey( + on_delete=django.db.models.deletion.RESTRICT, + to="oel_publishing.publishableentity", + ), + ), + ( + "new_version", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.RESTRICT, + to="oel_publishing.publishableentityversion", + ), + ), + ( + "old_version", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.RESTRICT, + related_name="+", + to="oel_publishing.publishableentityversion", + ), + ), + ( + "publish_log", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + to="oel_publishing.publishlog", + ), + ), + ], + options={ + "verbose_name": "Publish Log Record", + "verbose_name_plural": "Publish Log Records", }, ), migrations.AddConstraint( model_name="learningpackage", constraint=models.UniqueConstraint( - fields=("identifier",), name="lp_uniq_identifier" + fields=("key",), name="oel_publishing_lp_uniq_key" + ), + ), + migrations.AddField( + model_name="published", + name="publish_log_record", + field=models.ForeignKey( + on_delete=django.db.models.deletion.RESTRICT, + to="oel_publishing.publishlogrecord", + ), + ), + migrations.AddField( + model_name="published", + name="version", + field=models.OneToOneField( + null=True, + on_delete=django.db.models.deletion.RESTRICT, + to="oel_publishing.publishableentityversion", + ), + ), + migrations.AddIndex( + model_name="publishableentityversion", + index=models.Index( + fields=["entity", "-created"], name="oel_pv_idx_entity_rcreated" + ), + ), + migrations.AddIndex( + model_name="publishableentityversion", + index=models.Index(fields=["title"], name="oel_pv_idx_title"), + ), + migrations.AddConstraint( + model_name="publishableentityversion", + constraint=models.UniqueConstraint( + fields=("entity", "version_num"), name="oel_pv_uniq_entity_version_num" + ), + ), + migrations.AddIndex( + model_name="publishableentity", + index=models.Index(fields=["key"], name="oel_pub_ent_idx_key"), + ), + migrations.AddIndex( + model_name="publishableentity", + index=models.Index( + fields=["learning_package", "-created"], + name="oel_pub_ent_idx_lp_rcreated", + ), + ), + migrations.AddConstraint( + model_name="publishableentity", + constraint=models.UniqueConstraint( + fields=("learning_package", "key"), name="oel_pub_ent_uniq_lp_key" + ), + ), + migrations.AddField( + model_name="draft", + name="version", + field=models.OneToOneField( + blank=True, + null=True, + on_delete=django.db.models.deletion.RESTRICT, + to="oel_publishing.publishableentityversion", ), ), ] diff --git a/openedx_learning/core/publishing/model_mixins.py b/openedx_learning/core/publishing/model_mixins.py new file mode 100644 index 000000000..9d39c645c --- /dev/null +++ b/openedx_learning/core/publishing/model_mixins.py @@ -0,0 +1,276 @@ +""" +Helper mixin classes for content apps that want to use the publishing app. +""" +from functools import cached_property + +from django.core.exceptions import ImproperlyConfigured +from django.db import models +from django.db.models.query import QuerySet + +from .models import Draft, Published, PublishableEntity, PublishableEntityVersion + + +class PublishableEntityMixin(models.Model): + """ + Convenience mixin to link your models against PublishableEntity. + + Please see docstring for PublishableEntity for more details. + + If you use this class, you *MUST* also use PublishableEntityVersionMixin and + the publishing app's api.register_content_models (see its docstring for + details). + """ + + class PublishableEntityMixinManager(models.Manager): + def get_queryset(self) -> QuerySet: + return super().get_queryset().select_related("publishable_entity") + + objects = PublishableEntityMixinManager() + + publishable_entity = models.OneToOneField( + PublishableEntity, on_delete=models.CASCADE, primary_key=True + ) + + @cached_property + def versioning(self): + return self.VersioningHelper(self) + + @property + def uuid(self): + return self.publishable_entity.uuid + + @property + def key(self): + return self.publishable_entity.key + + @property + def created(self): + return self.publishable_entity.created + + class Meta: + abstract = True + + class VersioningHelper: + """ + Helper class to link content models to their versions. + + The publishing app has PublishableEntity and PublishableEntityVersion. + This is a helper class so that if you mix PublishableEntityMixin into + a content model like Component, then you can do something like:: + + component.versioning.draft # current draft ComponentVersion + component.versioning.published # current published ComponentVersion + + It links the relationships between content models and their versioned + counterparts *through* the connection between PublishableEntity and + PublishableEntityVersion. So ``component.versioning.draft`` ends up + querying: Component -> PublishableEntity -> Draft -> + PublishableEntityVersion -> ComponentVersion. But the people writing + Component don't need to understand how the publishing models work to do + these common queries. + + Caching Warning + --------------- + Note that because we're just using the underlying model's relations, + calling this a second time will returned the cached relation, and + not cause a fetch of new data from the database. So for instance, if + you do:: + + # Create a new Component + ComponentVersion + component, component_version = create_component_and_version( + learning_package_id=learning_package.id, + namespace="xblock.v1", + type="problem", + local_key="monty_hall", + title="Monty Hall Problem", + created=now, + created_by=None, + ) + + # This will work, because it's never been published + assert component.versioning.published is None + + # Publishing happens + publish_all_drafts(learning_package.id, published_at=now) + + # This will FAIL because it's going to use the relation value + # cached on component instead of going to the database again. + # You need to re-fetch the component for this to work. + assert component.versioning.published == component_version + + # You need to manually refetch it from the database to see the new + # publish status: + component = get_component_by_pk(component.pk) + + # Now this will work: + assert component.versioning.published == component_version + + TODO: This probably means we should use a custom Manager to select + related fields. + """ + + def __init__(self, content_obj): + self.content_obj = content_obj + + self.content_version_model_cls = ( + PublishableContentModelRegistry.get_versioned_model_cls( + type(content_obj) + ) + ) + # Get the field that points from the *versioned* content model + # (e.g. ComponentVersion) to the PublishableEntityVersion. + field_to_pev = self.content_version_model_cls._meta.get_field( + "publishable_entity_version" + ) + + # Now that we know the field that leads to PublishableEntityVersion, + # get the reverse related field name so that we can use that later. + self.related_name = field_to_pev.related_query_name() + + @property + def draft(self): + """ + Return the content version object that is the current draft. + """ + try: + draft = Draft.objects.get(entity_id=self.content_obj.publishable_entity_id) + except Draft.DoesNotExist: + # If no Draft object exists at all, then no + # PublishableEntityVersion was ever created (just the + # PublishableEntity). + return None + + draft_pub_ent_version = draft.version + + # There is an entry for this Draft, but the entry is None. This means + # there was a Draft at some point, but it's been "deleted" (though the + # actual history is still preserved). + if draft_pub_ent_version is None: + return None + + # If we've gotten this far, then draft_pub_ent_version points to an + # actual PublishableEntityVersion, but we want to follow that and go to + # the matching content version model. + return getattr(draft_pub_ent_version, self.related_name) + + @property + def published(self): + """ + Return the content version object that is currently published. + """ + try: + published = Published.objects.get(entity_id=self.content_obj.publishable_entity_id) + except Published.DoesNotExist: + # This means it's never been published. + return None + + published_pub_ent_version = published.version + + # There is a Published entry, but the entry is None. This means that + # it was published at some point, but it's been "deleted" or + # unpublished (though the actual history is still preserved). + if published_pub_ent_version is None: + return None + + # If we've gotten this far, then published_pub_ent_version points to an + # actual PublishableEntityVersion, but we want to follow that and go to + # the matching content version model. + return getattr(published_pub_ent_version, self.related_name) + + @property + def versions(self): + """ + Return a QuerySet of content version models for this content model. + + Example: If you mix PublishableEntityMixin into a Component model, + This would return you a QuerySet of ComponentVersion models. + """ + pub_ent = self.content_obj.publishable_entity + return self.content_version_model_cls.objects.filter( + publishable_entity_version__entity_id=pub_ent.id + ) + + +class PublishableEntityVersionMixin(models.Model): + """ + Convenience mixin to link your models against PublishableEntityVersion. + + Please see docstring for PublishableEntityVersion for more details. + + If you use this class, you *MUST* also use PublishableEntityMixin and the + publishing app's api.register_content_models (see its docstring for + details). + """ + + class PublishableEntityVersionMixinManager(models.Manager): + def get_queryset(self) -> QuerySet: + return ( + super() + .get_queryset() + .select_related( + "publishable_entity_version", + ) + ) + + objects = PublishableEntityVersionMixinManager() + + publishable_entity_version = models.OneToOneField( + PublishableEntityVersion, on_delete=models.CASCADE, primary_key=True + ) + + @property + def uuid(self): + return self.publishable_entity_version.uuid + + @property + def title(self): + return self.publishable_entity_version.title + + @property + def created(self): + return self.publishable_entity_version.created + + @property + def version_num(self): + return self.publishable_entity_version.version_num + + class Meta: + abstract = True + + +class PublishableContentModelRegistry: + """ + This class tracks content models built on PublishableEntity(Version). + """ + + _unversioned_to_versioned = {} + _versioned_to_unversioned = {} + + @classmethod + def register(cls, content_model_cls, content_version_model_cls): + """ + Register what content model maps to what content version model. + + If you want to call this from another app, please use the + ``register_content_models`` function in this app's ``api`` module + instead. + """ + if not issubclass(content_model_cls, PublishableEntityMixin): + raise ImproperlyConfigured( + f"{content_model_cls} must inherit from PublishableEntityMixin" + ) + if not issubclass(content_version_model_cls, PublishableEntityVersionMixin): + raise ImproperlyConfigured( + f"{content_version_model_cls} must inherit from PublishableEntityMixin" + ) + + cls._unversioned_to_versioned[content_model_cls] = content_version_model_cls + cls._versioned_to_unversioned[content_version_model_cls] = content_model_cls + + @classmethod + def get_versioned_model_cls(cls, content_model_cls): + return cls._unversioned_to_versioned[content_model_cls] + + @classmethod + def get_unversioned_model_cls(cls, content_version_model_cls): + return cls._versioned_to_unversioned[content_version_model_cls] diff --git a/openedx_learning/core/publishing/models.py b/openedx_learning/core/publishing/models.py index 293555ffa..0ce3cc2b7 100644 --- a/openedx_learning/core/publishing/models.py +++ b/openedx_learning/core/publishing/models.py @@ -1,48 +1,340 @@ """ -Idea: This app has _only_ things related to Publishing any kind of content -associated with a LearningPackage. +The data models here are intended to be used by other apps to publish different +types of content, such as Components, Units, Sections, etc. These models should +support the logic for the management of the publishing process: + +* The relationship between publishable entities and their many versions. +* The management of drafts. +* Publishing specific versions of publishable entities. +* Finding the currently published versions. +* The act of publishing, and doing so atomically. +* Managing reverts. +* Storing and querying publish history. """ from django.db import models + from django.conf import settings +from django.core.validators import MinValueValidator from openedx_learning.lib.fields import ( - identifier_field, + key_field, immutable_uuid_field, manual_date_time_field, ) class LearningPackage(models.Model): + """ + Top level container for a grouping of authored content. + + Each PublishableEntity belongs to exactly one LearningPackage. + """ + uuid = immutable_uuid_field() - identifier = identifier_field() + key = key_field() title = models.CharField(max_length=1000, null=False, blank=False) - created = manual_date_time_field() updated = manual_date_time_field() def __str__(self): - return f"{self.identifier}: {self.title}" + return f"{self.key}" class Meta: constraints = [ - # LearningPackage identifiers must be globally unique. This is - # something that might be relaxed in the future if this system were - # to be extensible to something like multi-tenancy, in which case - # we'd tie it to something like a Site or Org. - models.UniqueConstraint(fields=["identifier"], name="lp_uniq_identifier") + # LearningPackage keys must be globally unique. This is something + # that might be relaxed in the future if this system were to be + # extensible to something like multi-tenancy, in which case we'd tie + # it to something like a Site or Org. + models.UniqueConstraint( + fields=["key"], + name="oel_publishing_lp_uniq_key", + ) ] verbose_name = "Learning Package" verbose_name_plural = "Learning Packages" -class PublishLogEntry(models.Model): +class PublishableEntity(models.Model): """ - This model tracks Publishing activity. + This represents any publishable thing that has ever existed in a + LearningPackage. It serves as a stable model that will not go away even if + these things are later unpublished or deleted. + + A PublishableEntity belongs to exactly one LearningPackage. + + Examples of Publishable Entities + -------------------------------- + + Components (e.g. VideoBlock, ProblemBlock), Units, and Sections/Subsections + would all be considered Publishable Entites. But anything that can be + imported, exported, published, and reverted in a course or library could be + modeled as a PublishableEntity, including things like Grading Policy or + possibly Taxonomies (?). + + How to use this model + --------------------- + + The publishing app understands that publishable entities exist, along with + their drafts and published versions. It has some basic metadata, such as + identifiers, who created it, and when it was created. It's meant to + encapsulate the draft and publishing related aspects of your content, but + the ``publishing`` app doesn't know anything about the actual content being + referenced. + + You have to provide actual meaning to PublishableEntity by creating your own + models that will represent your particular content and associating them to + PublishableEntity via a OneToOneField with primary_key=True. The easiest way + to do this is to have your model inherit from PublishableEntityMixin. + + Identifiers + ----------- + The UUID is globally unique and should be treated as immutable. + + The key field *is* mutable, but changing it will affect all + PublishedEntityVersions. They are locally unique within the LearningPackage. - It is expected that other apps make foreign keys to this table to mark when - their content gets published. This is to allow us to tie together many - different entities (e.g. Components, Units, etc.) that are all published at - the same time. + If you are referencing this model from within the same process, use a + foreign key to the id. If you are referencing this PublishedEntity from an + external system/service, use the UUID. The key is the part that is most + likely to be human-readable, and may be exported/copied, but try not to rely + on it, since this value may change. + + Note: When we actually implement the ability to change identifiers, we + should make a history table and a modified attribute on this model. + + Why are Identifiers in this Model? + ---------------------------------- + + A PublishableEntity never stands alone–it's always intended to be used with + a 1:1 model like Component or Unit. So why have all the identifiers in this + model instead of storing them in those other models? Two reasons: + + * Published things need to have the right identifiers so they can be used + throughout the system, and the UUID is serving the role of ISBN in physical + book publishing. + * We want to be able to enforce the idea that "key" is locally unique across + all PublishableEntities within a given LearningPackage. Component and Unit + can't do that without a shared model. + + That being said, models that build on PublishableEntity are free to add + their own identifiers if it's useful to do so. + + Why not Inherit from this Model? + -------------------------------- + + Django supports multi-table inheritance: + + https://docs.djangoproject.com/en/4.2/topics/db/models/#multi-table-inheritance + + We don't use that, primarily because we want to more clearly decouple + publishing concerns from the rest of the logic around Components, Units, + etc. If you made a Component and ComponentVersion models that subclassed + PublishableEntity and PublishableEntityVersion, and then accessed + ``component.versions``, you might expect ComponentVersions to come back and + be surprised when you get EntityVersions instead. + + In general, we want freedom to add new Publishing models, fields, and + methods without having to worry about the downstream name collisions with + other apps (many of which might live in other repositories). The helper + mixins will provide a little syntactic sugar to make common access patterns + more convenient, like file access. + """ + + uuid = immutable_uuid_field() + learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE) + key = key_field() + created = manual_date_time_field() + created_by = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.SET_NULL, + null=True, + blank=True, + ) + + class Meta: + constraints = [ + # Keys are unique within a given LearningPackage. + models.UniqueConstraint( + fields=[ + "learning_package", + "key", + ], + name="oel_pub_ent_uniq_lp_key", + ) + ] + indexes = [ + # Global Key Index: + # * Search by key across all PublishableEntities on the site. This + # would be a support-oriented tool from Django Admin. + models.Index( + fields=["key"], + name="oel_pub_ent_idx_key", + ), + # LearningPackage (reverse) Created Index: + # * Search for most recently *created* PublishableEntities for a + # given LearningPackage, since they're the most likely to be + # actively worked on. + models.Index( + fields=["learning_package", "-created"], + name="oel_pub_ent_idx_lp_rcreated", + ), + ] + # These are for the Django Admin UI. + verbose_name = "Publishable Entity" + verbose_name_plural = "Publishable Entities" + + def __str__(self): + return f"{self.key}" + + +class PublishableEntityVersion(models.Model): + """ + A particular version of a PublishableEntity. + + This model has its own ``uuid`` so that it can be referenced directly. The + ``uuid`` should be treated as immutable. + + PublishableEntityVersions are created once and never updated. So for + instance, the ``title`` should never be modified. + + Like PublishableEntity, the data in this model is only enough to cover the + parts that are most important for the actual process of managing drafts and + publishes. You will want to create your own models to represent the actual + content data that's associated with this PublishableEntityVersion, and + connect them using a OneToOneField with primary_key=True. The easiest way to + do this is to inherit from PublishableEntityVersionMixin. Be sure to treat + these versioned models in your app as immutable as well. + """ + + uuid = immutable_uuid_field() + entity = models.ForeignKey( + PublishableEntity, on_delete=models.CASCADE, related_name="versions" + ) + + # Most publishable things will have some sort of title, but blanks are + # allowed for those that don't require one. + title = models.CharField(max_length=1000, default="", null=False, blank=True) + + # The version_num starts at 1 and increments by 1 with each new version for + # a given PublishableEntity. Doing it this way makes it more convenient for + # users to refer to than a hash or UUID value. It also helps us catch race + # conditions on save, by setting a unique constraint on the entity and + # version_num. + version_num = models.PositiveBigIntegerField( + null=False, + validators=[MinValueValidator(1)], + ) + + # All PublishableEntityVersions created as part of the same publish should + # have the exact same created datetime (not off by a handful of + # microseconds). + created = manual_date_time_field() + + # User who created the PublishableEntityVersion. This can be null if the + # user is later removed. Open edX in general doesn't let you remove users, + # but we should try to model it so that this is possible eventually. + created_by = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.SET_NULL, + null=True, + blank=True, + ) + + class Meta: + constraints = [ + # Prevent the situation where we have multiple + # PublishableEntityVersions claiming to be the same version_num for + # a given PublishableEntity. This can happen if there's a race + # condition between concurrent editors in different browsers, + # working on the same Publishable. With this constraint, one of + # those processes will raise an IntegrityError. + models.UniqueConstraint( + fields=[ + "entity", + "version_num", + ], + name="oel_pv_uniq_entity_version_num", + ) + ] + indexes = [ + # LearningPackage (reverse) Created Index: + # * Make it cheap to find the most recently created + # PublishableEntityVersions for a given LearningPackage. This + # represents the most recently saved work for a LearningPackage + # and would be the most likely areas to get worked on next. + models.Index( + fields=["entity", "-created"], + name="oel_pv_idx_entity_rcreated", + ), + # Title Index: + # * Search by title. + models.Index( + fields=[ + "title", + ], + name="oel_pv_idx_title", + ), + ] + + # These are for the Django Admin UI. + verbose_name = "Publishable Entity Version" + verbose_name_plural = "Publishable Entity Versions" + + +class Draft(models.Model): + """ + Find the active draft version of an entity (usually most recently created). + + This model mostly only exists to allow us to join against a bunch of + PublishableEntity objects at once and get all their latest drafts. You might + use this together with Published in order to see which Drafts haven't been + published yet. + + A Draft entry should be created whenever a new PublishableEntityVersion is + created. This means there are three possible states: + + 1. No Draft entry for a PublishableEntity: This means a PublishableEntity + was created, but no PublishableEntityVersion was ever made for it, so + there was never a Draft version. + 2. A Draft entry exists and points to a PublishableEntityVersion: This is + the most common state. + 3. A Draft entry exists and points to a null version: This means a version + used to be the draft, but it's been functionally "deleted". The versions + still exist in our history, but we're done using it. + + It would have saved a little space to add this data to the Published model + (and possibly call the combined model something else). Split Modulestore did + this with its active_versions table. I keep it separate here to get a better + separation of lifecycle events: i.e. this table *only* changes when drafts + are updated, not when publishing happens. The Published model only changes + when something is published. + """ + + entity = models.OneToOneField( + PublishableEntity, + on_delete=models.RESTRICT, + primary_key=True, + ) + version = models.OneToOneField( + PublishableEntityVersion, + on_delete=models.RESTRICT, + null=True, + blank=True, + ) + + +class PublishLog(models.Model): + """ + There is one row in this table for every time content is published. + + Each PublishLog has 0 or more PublishLogRecords describing exactly which + PublishableEntites were published and what the version changes are. A + PublishLog is like a git commit in that sense, with individual + PublishLogRecords representing the files changed. + + Open question: Empty publishes are allowed at this time, and might be useful + for "fake" publishes that are necessary to invoke other post-publish + actions. It's not clear at this point how useful this will actually be. """ uuid = immutable_uuid_field() @@ -53,8 +345,102 @@ class PublishLogEntry(models.Model): settings.AUTH_USER_MODEL, on_delete=models.SET_NULL, null=True, + blank=True, + ) + + class Meta: + verbose_name = "Publish Log" + verbose_name_plural = "Publish Logs" + + +class PublishLogRecord(models.Model): + """ + A record for each publishable entity version changed, for each publish. + + To revert a publish, we would make a new publish that swaps ``old_version`` + and ``new_version`` field values. + """ + + publish_log = models.ForeignKey(PublishLog, on_delete=models.CASCADE) + entity = models.ForeignKey(PublishableEntity, on_delete=models.RESTRICT) + old_version = models.ForeignKey( + PublishableEntityVersion, + on_delete=models.RESTRICT, + null=True, + blank=True, + related_name="+", + ) + new_version = models.ForeignKey( + PublishableEntityVersion, on_delete=models.RESTRICT, null=True, blank=True + ) + + class Meta: + constraints = [ + # A Publishable can have only one PublishLogRecord per PublishLog. + # You can't simultaneously publish two different versions of the + # same publishable. + models.UniqueConstraint( + fields=[ + "publish_log", + "entity", + ], + name="oel_plr_uniq_pl_publishable", + ) + ] + indexes = [ + # Publishable (reverse) Publish Log Index: + # * Find the history of publishes for a given Publishable, + # starting with the most recent (since IDs are ascending ints). + models.Index( + fields=["entity", "-publish_log"], + name="oel_plr_idx_entity_rplr", + ), + ] + + class Meta: + verbose_name = "Publish Log Record" + verbose_name_plural = "Publish Log Records" + + +class Published(models.Model): + """ + Find the currently published version of an entity. + + Notes: + + * There is only ever one published PublishableEntityVersion per + PublishableEntity at any given time. + * It may be possible for a PublishableEntity to exist only as a Draft (and thus + not show up in this table). + * If a row exists for a PublishableEntity, but the ``version`` field is + None, it means that the entity was published at some point, but is no + longer published now–i.e. it's functionally "deleted", even though all + the version history is preserved behind the scenes. + + TODO: Do we need to create a (redundant) title field in this model so that + we can more efficiently search across titles within a LearningPackage? + Probably not an immediate concern because the number of rows currently + shouldn't be > 10,000 in the more extreme cases. + + TODO: Do we need to make a "most_recently" published version when an entry + is unpublished/deleted? + """ + + entity = models.OneToOneField( + PublishableEntity, + on_delete=models.RESTRICT, + primary_key=True, + ) + version = models.OneToOneField( + PublishableEntityVersion, + on_delete=models.RESTRICT, + null=True, + ) + publish_log_record = models.ForeignKey( + PublishLogRecord, + on_delete=models.RESTRICT, ) class Meta: - verbose_name = "Publish Log Entry" - verbose_name_plural = "Publish Log Entries" + verbose_name = "Published Entity" + verbose_name_plural = "Published Entities" diff --git a/openedx_learning/lib/fields.py b/openedx_learning/lib/fields.py index 4955ad11c..71273afa0 100644 --- a/openedx_learning/lib/fields.py +++ b/openedx_learning/lib/fields.py @@ -8,8 +8,8 @@ https://open-edx-proposals.readthedocs.io/en/latest/best-practices/oep-0038-Data-Modeling.html * The UUID fields are intended to be globally unique identifiers that other services can store and rely on staying the same. -* The "identifier" fields can be more human-friendly strings, but these may only - be unique within a given context. These values should be treated as mutable, +* The "key" fields can be more human-friendly strings, but these may only + be unique within a given scope. These values should be treated as mutable, even if they rarely change in practice. TODO: @@ -30,7 +30,7 @@ from .validators import validate_utc_datetime -def identifier_field(): +def key_field(): """ Externally created Identifier fields. diff --git a/projects/dev.py b/projects/dev.py index 13c351d02..8bc2f2524 100644 --- a/projects/dev.py +++ b/projects/dev.py @@ -31,6 +31,7 @@ "django.contrib.admindocs", # Learning Core Apps "openedx_learning.core.components.apps.ComponentsConfig", + "openedx_learning.core.contents.apps.ContentsConfig", "openedx_learning.core.publishing.apps.PublishingConfig", # Learning Contrib Apps "openedx_learning.contrib.media_server.apps.MediaServerConfig", @@ -40,9 +41,14 @@ # REST API "rest_framework", "openedx_learning.rest_api.apps.RESTAPIConfig", + + # Debugging + "debug_toolbar", ) MIDDLEWARE = [ + "debug_toolbar.middleware.DebugToolbarMiddleware", + "django.middleware.security.SecurityMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", "django.middleware.common.CommonMiddleware", @@ -95,3 +101,6 @@ # STORAGES setting in Django >= 4.2 "STORAGE": None, } +INTERNAL_IPS = [ + "127.0.0.1", +] diff --git a/projects/urls.py b/projects/urls.py index 67364cfcb..e27da7532 100644 --- a/projects/urls.py +++ b/projects/urls.py @@ -1,9 +1,13 @@ +from django.conf import settings from django.contrib import admin from django.urls import include, path +from django.conf.urls.static import static + urlpatterns = [ path("admin/doc/", include("django.contrib.admindocs.urls")), path("admin/", admin.site.urls), path("media_server/", include("openedx_learning.contrib.media_server.urls")), path("rest_api/", include("openedx_learning.rest_api.urls")), -] + path('__debug__/', include('debug_toolbar.urls')), +] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) diff --git a/test_settings.py b/test_settings.py index d2d0820fc..896092ca5 100644 --- a/test_settings.py +++ b/test_settings.py @@ -36,8 +36,9 @@ def root(*args): # 'django.contrib.admin', # 'django.contrib.admindocs', # Our own apps - "openedx_learning.core.publishing.apps.PublishingConfig", "openedx_learning.core.components.apps.ComponentsConfig", + "openedx_learning.core.contents.apps.ContentsConfig", + "openedx_learning.core.publishing.apps.PublishingConfig", ] LOCALE_PATHS = [ diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/openedx_learning/__init__.py b/tests/openedx_learning/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/openedx_learning/core/__init__.py b/tests/openedx_learning/core/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/openedx_learning/core/components/__init__.py b/tests/openedx_learning/core/components/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/openedx_learning/core/components/test_models.py b/tests/openedx_learning/core/components/test_models.py new file mode 100644 index 000000000..f41df14d0 --- /dev/null +++ b/tests/openedx_learning/core/components/test_models.py @@ -0,0 +1,43 @@ +from datetime import datetime, timezone + +from django.test.testcases import TestCase + +from openedx_learning.core.publishing.api import ( + create_learning_package, + publish_all_drafts, +) +from openedx_learning.core.components.api import create_component_and_version + + +class TestModelVersioningQueries(TestCase): + """ + Test that Component/ComponentVersion are registered with the publishing app. + """ + + @classmethod + def setUpTestData(cls): + cls.learning_package = create_learning_package( + "components.TestVersioning", + "Learning Package for Testing Component Versioning", + ) + cls.now = datetime(2023, 5, 8, tzinfo=timezone.utc) + + def test_latest_version(self): + component, component_version = create_component_and_version( + learning_package_id=self.learning_package.id, + namespace="xblock.v1", + type="problem", + local_key="monty_hall", + title="Monty Hall Problem", + created=self.now, + created_by=None, + ) + assert component.versioning.draft == component_version + assert component.versioning.published is None + publish_all_drafts(self.learning_package.pk, published_at=self.now) + + # Force the re-fetch from the database + assert component.versioning.published == component_version + + # Grabbing the list of versions for this component + assert list(component.versioning.versions) == [component_version] diff --git a/tests/openedx_learning/core/publishing/__init__.py b/tests/openedx_learning/core/publishing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/openedx_learning/core/publishing/test_api.py b/tests/openedx_learning/core/publishing/test_api.py new file mode 100644 index 000000000..ebfdf05e1 --- /dev/null +++ b/tests/openedx_learning/core/publishing/test_api.py @@ -0,0 +1,66 @@ +from datetime import datetime, timezone +from uuid import UUID + +from django.core.exceptions import ValidationError +from django.test import TestCase +import pytest + +from openedx_learning.core.publishing.api import create_learning_package + + +class CreateLearningPackageTestCase(TestCase): + def test_normal(self): + """Normal flow with no errors.""" + key = "my_key" + title = "My Excellent Title with Emoji 🔥" + created = datetime(2023, 4, 2, 15, 9, 0, tzinfo=timezone.utc) + package = create_learning_package(key, title, created) + + assert package.key == "my_key" + assert package.title == "My Excellent Title with Emoji 🔥" + assert package.created == created + assert package.updated == created + + # Should be auto-generated + assert isinstance(package.uuid, UUID) + + # Having an actual value here means we were persisted to the database. + assert isinstance(package.id, int) + + def test_auto_datetime(self): + """Auto-generated created datetime works as expected.""" + key = "my_key" + title = "My Excellent Title with Emoji 🔥" + package = create_learning_package(key, title) + + assert package.key == "my_key" + assert package.title == "My Excellent Title with Emoji 🔥" + + # Auto-generated datetime checking... + assert isinstance(package.created, datetime) + assert package.created == package.updated + assert package.created.tzinfo == timezone.utc + + # Should be auto-generated + assert isinstance(package.uuid, UUID) + + # Having an actual value here means we were persisted to the database. + assert isinstance(package.id, int) + + def test_non_utc_time(self): + """Require UTC timezone for created.""" + with pytest.raises(ValidationError) as excinfo: + create_learning_package("my_key", "A Title", datetime(2023, 4, 2)) + message_dict = excinfo.value.message_dict + + # Both datetime fields should be marked as invalid + assert "created" in message_dict + assert "updated" in message_dict + + def test_already_exists(self): + """Raises ValidationError for duplicate keys.""" + create_learning_package("my_key", "Original") + with pytest.raises(ValidationError) as excinfo: + create_learning_package("my_key", "Duplicate") + message_dict = excinfo.value.message_dict + assert "key" in message_dict diff --git a/tests/test_models.py b/tests/test_models.py deleted file mode 100644 index 0e2faf6e6..000000000 --- a/tests/test_models.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python -""" -Tests for the `openedx-learning` models module. -""" - - -class TestPublishedLearningPackage: - """ - Tests of the PublishedLearningPackage model. - """ - - def test_something(self): - """TODO: Write real test cases."""