openedx · ormsbee · May 12, 2023 · Apr 19, 2023 · May 11, 2023 · May 11, 2023
diff --git a/.gitignore b/.gitignore
@@ -61,7 +61,7 @@ requirements/private.in
 requirements/private.txt
 
 # database file
-dev.db
+dev.db*
 
 .vscode
 

diff --git a/.importlinter b/.importlinter
@@ -41,4 +41,5 @@ name = Core App Dependency Layering
 type = layers
 layers=
     openedx_learning.core.components
+    openedx_learning.core.contents
     openedx_learning.core.publishing
diff --git a/README.rst b/README.rst
@@ -45,8 +45,8 @@ We have a few different identifier types in the schema, and we try to avoid ``_i
 
 * ``id`` is the auto-generated, internal row ID and primary key. This never changes. Data models should make foreign keys to this field, as per Django convention.
 * ``uuid`` is a randomly generated UUID4. This is the stable way to refer to a row/resource from an external service. This never changes. This is separate from ``id`` mostly because there are performance penalties when using UUIDs as primary keys with MySQL.
-* ``identifier`` is intended to be a case-sensitive, alphanumeric identifier, which holds some meaning to library clients. This is usually stable, but can be changed, depending on the business logic of the client. The apps in this repo should make no assumptions about it being stable. It can be used as a suffix.
-* ``num`` is like ``identifier``, but for use when it's strictly numeric. It can also be used as a suffix.
+* ``key`` is intended to be a case-sensitive, alphanumeric key, which holds some meaning to library clients. This is usually stable, but can be changed, depending on the business logic of the client. The apps in this repo should make no assumptions about it being stable. It can be used as a suffix.
+* ``num`` is like ``key``, but for use when it's strictly numeric. It can also be used as a suffix.
 
 
 See Also

diff --git a/docs/decisions/0005-identifier-conventions.rst b/docs/decisions/0005-identifier-conventions.rst
@@ -0,0 +1,39 @@
+5. Identifier Conventions
+=========================
+
+Context
+-------
+
+We want a common set of conventions for how to reference models in various situations, such as:
+
+* From a model in a different app, but in the same Python process.
+* From a different server.
+
+Decision
+--------
+
+The content-related data models in our system will use the following convention for identifier fields:
+
+Primary Key
+  The primary key will be a BigAutoField. This will usually  be ``id``, but it can be different if the model builds off another one via a ``OneToOneField`` primary key. Other apps that are running in the same process should directly make foreign key field references to this. This value will not change.
+
+UUID
+  The ``uuid`` field is a randomly generated UUID4 that is globally unique and should be treated as immutable. If you are making a reference to this record in another system (e.g. an external service), this is the identifier to store.
+
+Key
+  The ``key`` field is chosen by apps or users, and is the most likely piece to be human-readable (though it doesn't have to be). These values are only locally unique within a given scope, such as a particular LearningPackage or ComponentVersion.
+
+  The correlates most closely to OpaqueKeys, though they are not precisely the same. In particular, we don't want to directly store BlockUsageKeys that have the LearningContextKey baked into them, because that makes it much harder to change the LearningContextKey later, e.g. if we ever want to change a CourseKey for a course. Different models can choose whether the ``key`` value is mutable or not, but outside users should assume that it can change, even if it rarely does in practice.
+
+Implementation Notes
+--------------------
+
+Helpers to generate these field types are in the ``openedx_learning.lib.fields`` module.
+
+Rejected Alternatives
+---------------------
+
+A number of names were considered for ``key``, and were rejected for different reasons:
+
+* ``identifier`` is used in some standards like QTI, but it's too easily confused with ``id`` or the general concept of the three types of identity-related fields we have.
+* ``slug`` was considered, but ultimately rejected because that implied these fields would be human-readable, and that's not guaranteed. Most XBlock content that comes from MongoDB will be using GUIDs, for instance.
diff --git a/docs/decisions/0006-app-label-prefix.rst b/docs/decisions/0006-app-label-prefix.rst
@@ -0,0 +1,13 @@
+6. App Label Prefix
+===================
+
+Context
+-------
+
+We want this repo to be useful in different Django projects outside of just edx-platform, and we want to avoid downstream collisions with our app names.
+
+
+Decision
+--------
+
+All apps in this repo will create a default AppConfig that sets the ``label`` to have a prefix of ``oel_`` before the app name. So if the app name is ``publishing``, the ``label`` will be ``oel_publishing``. This means that all generated database tables will similarly be prefixed with ``oel_``.
diff --git a/olx_importer/management/commands/load_components.py b/olx_importer/management/commands/load_components.py
@@ -18,33 +18,24 @@
 changed?" in order to decide if we really make a new ComponentVersion or not.
 """
 from datetime import datetime, timezone
-import codecs
 import logging
 import mimetypes
 import pathlib
 import re
 import xml.etree.ElementTree as ET
 
-from django.core.files.base import ContentFile
-from django.core.management.base import BaseCommand
+from django.core.management.base import BaseCommand, CommandError
 from django.db import transaction
 
-from openedx_learning.core.publishing.models import LearningPackage, PublishLogEntry
-from openedx_learning.core.components.models import (
-    Component,
-    ComponentVersion,
-    ComponentVersionRawContent,
-    ComponentPublishLogEntry,
-    PublishedComponent,
-    RawContent,
-    TextContent,
-)
-from openedx_learning.lib.fields import create_hash_digest
+# Model references to remove
+from openedx_learning.core.components import api as components_api
+from openedx_learning.core.contents import api as contents_api
+from openedx_learning.core.publishing import api as publishing_api
 
 SUPPORTED_TYPES = ["problem", "video", "html"]
 logger = logging.getLogger(__name__)
 
-
+ 
 class Command(BaseCommand):
     help = "Load sample Component data from course export"
 
@@ -70,178 +61,126 @@ def init_known_types(self):
 
     def add_arguments(self, parser):
         parser.add_argument("course_data_path", type=pathlib.Path)
-        parser.add_argument("learning_package_identifier", type=str)
+        parser.add_argument("learning_package_key", type=str)
 
-    def handle(self, course_data_path, learning_package_identifier, **options):
+    def handle(self, course_data_path, learning_package_key, **options):
         self.course_data_path = course_data_path
-        self.learning_package_identifier = learning_package_identifier
-        self.load_course_data(learning_package_identifier)
+        self.learning_package_key = learning_package_key
+        self.load_course_data(learning_package_key)
 
     def get_course_title(self):
         course_type_dir = self.course_data_path / "course"
         course_xml_file = next(course_type_dir.glob("*.xml"))
         course_root = ET.parse(course_xml_file).getroot()
         return course_root.attrib.get("display_name", "Unknown Course")
 
-    def load_course_data(self, learning_package_identifier):
+    def load_course_data(self, learning_package_key):
         print(f"Importing course from: {self.course_data_path}")
         now = datetime.now(timezone.utc)
         title = self.get_course_title()
 
+        if publishing_api.learning_package_exists(learning_package_key):
+            raise CommandError(
+                f"{learning_package_key} already exists. "
+                "This command currently only supports initial import."
+            )
+
         with transaction.atomic():
-            learning_package, _created = LearningPackage.objects.get_or_create(
-                identifier=learning_package_identifier,
-                defaults={
-                    "title": title,
-                    "created": now,
-                    "updated": now,
-                },
+            self.learning_package = publishing_api.create_learning_package(
+                learning_package_key, title, created=now,
             )
-            self.learning_package = learning_package
+            for block_type in SUPPORTED_TYPES:
+                self.import_block_type(block_type, now) #, publish_log_entry)
 
-            publish_log_entry = PublishLogEntry.objects.create(
-                learning_package=learning_package,
-                message="Initial Import",
-                published_at=now,
-                published_by=None,
+            publishing_api.publish_all_drafts(
+                self.learning_package.id, 
+                message="Initial Import from load_components script"
             )
 
-            for block_type in SUPPORTED_TYPES:
-                self.import_block_type(block_type, now, publish_log_entry)
 
     def create_content(self, static_local_path, now, component_version):
-        identifier = pathlib.Path("static") / static_local_path
-        real_path = self.course_data_path / identifier
-        mime_type, _encoding = mimetypes.guess_type(identifier)
+        key = pathlib.Path("static") / static_local_path
+        real_path = self.course_data_path / key
+        mime_type, _encoding = mimetypes.guess_type(key)
         if mime_type is None:
             logger.error(
-                f"  no mimetype found for {real_path}, defaulting to application/binary"
+                f'  no mimetype found for "{real_path}", defaulting to application/binary'
             )
             mime_type = "application/binary"
 
         try:
             data_bytes = real_path.read_bytes()
         except FileNotFoundError:
-            logger.warning(f"  Static reference not found: {real_path}")
+            logger.warning(f'  Static reference not found: "{real_path}"')
             return  # Might as well bail if we can't find the file.
 
-        hash_digest = create_hash_digest(data_bytes)
-
-        raw_content, created = RawContent.objects.get_or_create(
-            learning_package=self.learning_package,
+        raw_content, _created = contents_api.get_or_create_raw_content(
+            learning_package_id=self.learning_package.id,
+            data_bytes=data_bytes,
             mime_type=mime_type,
-            hash_digest=hash_digest,
-            defaults=dict(
-                size=len(data_bytes),
-                created=now,
-            ),
+            created=now,
         )
-        if created:
-            raw_content.file.save(
-                f"{raw_content.learning_package.uuid}/{hash_digest}",
-                ContentFile(data_bytes),
-            )
-
-        ComponentVersionRawContent.objects.get_or_create(
-            component_version=component_version,
-            raw_content=raw_content,
-            identifier=identifier,
+        components_api.add_content_to_component_version(
+            component_version,
+            raw_content_id=raw_content.id,
+            key=key,
             learner_downloadable=True,
         )
 
-    def import_block_type(self, block_type, now, publish_log_entry):
+    def import_block_type(self, block_type, now): # , publish_log_entry):
         components_found = 0
+        components_skipped = 0
 
         # Find everything that looks like a reference to a static file appearing
         # in attribute quotes, stripping off the querystring at the end. This is
         # not fool-proof as it will match static file references that are
         # outside of tag declarations as well.
         static_files_regex = re.compile(r"""['"]\/static\/(.+?)["'\?]""")
         block_data_path = self.course_data_path / block_type
+        namespace="xblock.v1"
 
         for xml_file_path in block_data_path.glob("*.xml"):
             components_found += 1
-            identifier = xml_file_path.stem
-
-            # Find or create the Component itself
-            component, _created = Component.objects.get_or_create(
-                learning_package=self.learning_package,
-                namespace="xblock.v1",
-                type=block_type,
-                identifier=identifier,
-                defaults={
-                    "created": now,
-                },
-            )
-
-            # Create the RawContent entry for the raw data...
-            data_bytes = xml_file_path.read_bytes()
-            hash_digest = create_hash_digest(data_bytes)
-
-            raw_content, created = RawContent.objects.get_or_create(
-                learning_package=self.learning_package,
-                mime_type=f"application/vnd.openedx.xblock.v1.{block_type}+xml",
-                hash_digest=hash_digest,
-                defaults=dict(
-                    # text=data_str,
-                    size=len(data_bytes),
-                    created=now,
-                ),
-            )
-            if created:
-                raw_content.file.save(
-                    f"{raw_content.learning_package.uuid}/{hash_digest}",
-                    ContentFile(data_bytes),
-                )
-
-                # Decode as utf-8-sig in order to strip any BOM from the data.
-                data_str = codecs.decode(data_bytes, "utf-8-sig")
-                TextContent.objects.create(
-                    raw_content=raw_content,
-                    text=data_str,
-                    length=len(data_str),
-                )
-
-            # TODO: Get associated file contents, both with the static regex, as
-            # well as with XBlock-specific code that examines attributes in
-            # video and HTML tag definitions.
+            local_key = xml_file_path.stem
 
+            # Do some basic parsing of the content to see if it's even well
+            # constructed enough to add (or whether we should skip/error on it).
             try:
-                block_root = ET.fromstring(data_str)
+                block_root = ET.parse(xml_file_path).getroot()
             except ET.ParseError as err:
                 logger.error(f"Parse error for {xml_file_path}: {err}")
+                components_skipped += 1
                 continue
-
+            
             display_name = block_root.attrib.get("display_name", "")
-
-            # Create the ComponentVersion
-            component_version = ComponentVersion.objects.create(
-                component=component,
-                version_num=1,  # This only works for initial import
+            _component, component_version = components_api.create_component_and_version(
+                learning_package_id=self.learning_package.id,
+                namespace=namespace,
+                type=block_type,
+                local_key=local_key,
                 title=display_name,
                 created=now,
                 created_by=None,
             )
-            ComponentVersionRawContent.objects.create(
-                component_version=component_version,
-                raw_content=raw_content,
-                identifier="source.xml",
-                learner_downloadable=False,
-            )
-            static_files_found = static_files_regex.findall(data_str)
-            for static_local_path in static_files_found:
-                self.create_content(static_local_path, now, component_version)
 
-            # Mark that Component as Published
-            component_publish_log_entry = ComponentPublishLogEntry.objects.create(
-                component=component,
-                component_version=component_version,
-                publish_log_entry=publish_log_entry,
+            # Create the RawContent entry for the raw data...
+            data_bytes = xml_file_path.read_bytes()
+            text_content, _created = contents_api.get_or_create_text_content_from_bytes(
+                learning_package_id=self.learning_package.id,
+                data_bytes=data_bytes,
+                mime_type=f"application/vnd.openedx.xblock.v1.{block_type}+xml",
+                created=now,
             )
-            PublishedComponent.objects.create(
-                component=component,
-                component_version=component_version,
-                component_publish_log_entry=component_publish_log_entry,
+            # Add the OLX source text to the ComponentVersion
+            components_api.add_content_to_component_version(
+                component_version,
+                raw_content_id=text_content.pk,
+                key="source.xml",
+                learner_downloadable=False
             )
 
-        print(f"{block_type}: {components_found}")
+            # Cycle through static assets references and add those as well...
+            for static_local_path in static_files_regex.findall(text_content.text):
+                self.create_content(static_local_path, now, component_version)
+
+        print(f"{block_type}: {components_found} (skipped: {components_skipped})")
diff --git a/openedx_learning/contrib/media_server/urls.py b/openedx_learning/contrib/media_server/urls.py
@@ -6,8 +6,8 @@
     path(
         (
             "component_asset/"
-            "<str:learning_package_identifier>/"
-            "<str:component_identifier>/"
+            "<str:learning_package_key>/"
+            "<str:component_key>/"
             "<int:version_num>/"
             "<path:asset_path>"
         ),

diff --git a/openedx_learning/contrib/media_server/views.py b/openedx_learning/contrib/media_server/views.py
@@ -8,7 +8,7 @@
 
 
 def component_asset(
-    request, learning_package_identifier, component_identifier, version_num, asset_path
+    request, learning_package_key, component_key, version_num, asset_path
 ):
     """
     Serve the ComponentVersion asset data.
@@ -25,7 +25,7 @@ def component_asset(
     """
     try:
         cvc = get_component_version_content(
-            learning_package_identifier, component_identifier, version_num, asset_path
+            learning_package_key, component_key, version_num, asset_path
         )
     except ObjectDoesNotExist:
         raise Http404("File not found")
-Original file line number
+Diff line change
@@ Expand Up / @@ -61,7 +61,7 @@ requirements/private.in @@
     requirements/private.txt
     # database file
-    dev.db
+    dev.db*
     .vscode
@@ Expand Down @@