diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py
index 73a0541a..8b9f3bfc 100644
--- a/openedx_learning/__init__.py
+++ b/openedx_learning/__init__.py
@@ -1,4 +1,4 @@
 """
 Open edX Learning ("Learning Core").
 """
-__version__ = "0.3.7"
+__version__ = "0.4.0"
diff --git a/openedx_learning/core/components/admin.py b/openedx_learning/core/components/admin.py
index 5710303a..c5fb9b16 100644
--- a/openedx_learning/core/components/admin.py
+++ b/openedx_learning/core/components/admin.py
@@ -158,7 +158,7 @@ def content_preview(cvc_obj: ComponentVersionRawContent) -> SafeText:
     """
     raw_content_obj = cvc_obj.raw_content
 
-    if raw_content_obj.mime_type.startswith("image/"):
+    if raw_content_obj.media_type.type == "image":
         return format_html(
             '<img src="{}" style="max-width: 100%;" />',
             # TODO: configure with settings value:
diff --git a/openedx_learning/core/components/api.py b/openedx_learning/core/components/api.py
index 61e7e574..4275c5d1 100644
--- a/openedx_learning/core/components/api.py
+++ b/openedx_learning/core/components/api.py
@@ -117,6 +117,7 @@ def get_component_version_content(
     """
     return ComponentVersionRawContent.objects.select_related(
         "raw_content",
+        "raw_content__media_type",
         "component_version",
         "component_version__component",
         "component_version__component__learning_package",
diff --git a/openedx_learning/core/components/migrations/0001_initial.py b/openedx_learning/core/components/migrations/0001_initial.py
index 0ca82b95..bbe0e15d 100644
--- a/openedx_learning/core/components/migrations/0001_initial.py
+++ b/openedx_learning/core/components/migrations/0001_initial.py
@@ -1,4 +1,4 @@
-# Generated by Django 3.2.19 on 2023-06-15 14:43
+# Generated by Django 3.2.23 on 2023-12-04 00:41
 
 import uuid
 
@@ -13,7 +13,7 @@ class Migration(migrations.Migration):
     initial = True
 
     dependencies = [
-        ('oel_publishing', '0001_initial'),
+        ('oel_publishing', '0002_alter_fk_on_delete'),
         ('oel_contents', '0001_initial'),
     ]
 
diff --git a/openedx_learning/core/contents/admin.py b/openedx_learning/core/contents/admin.py
index 0ce7ed61..8409fe90 100644
--- a/openedx_learning/core/contents/admin.py
+++ b/openedx_learning/core/contents/admin.py
@@ -18,14 +18,14 @@ class RawContentAdmin(ReadOnlyModelAdmin):
         "hash_digest",
         "file_link",
         "learning_package",
-        "mime_type",
+        "media_type",
         "size",
         "created",
     ]
     fields = [
         "learning_package",
         "hash_digest",
-        "mime_type",
+        "media_type",
         "size",
         "created",
         "file_link",
@@ -34,13 +34,13 @@ class RawContentAdmin(ReadOnlyModelAdmin):
     readonly_fields = [
         "learning_package",
         "hash_digest",
-        "mime_type",
+        "media_type",
         "size",
         "created",
         "file_link",
         "text_preview",
     ]
-    list_filter = ("mime_type", "learning_package")
+    list_filter = ("media_type", "learning_package")
     search_fields = ("hash_digest",)
 
     def file_link(self, raw_content):
diff --git a/openedx_learning/core/contents/api.py b/openedx_learning/core/contents/api.py
index fd4e069a..c29255b2 100644
--- a/openedx_learning/core/contents/api.py
+++ b/openedx_learning/core/contents/api.py
@@ -12,9 +12,10 @@
 from django.core.files.base import ContentFile
 from django.db.transaction import atomic
 
+from openedx_learning.lib.cache import lru_cache
 from openedx_learning.lib.fields import create_hash_digest
 
-from .models import RawContent, TextContent
+from .models import MediaType, RawContent, TextContent
 
 
 def create_raw_content(
@@ -28,9 +29,10 @@ def create_raw_content(
     Create a new RawContent instance and persist it to storage.
     """
     hash_digest = hash_digest or create_hash_digest(data_bytes)
+
     raw_content = RawContent.objects.create(
         learning_package_id=learning_package_id,
-        mime_type=mime_type,
+        media_type_id=get_media_type_id(mime_type),
         hash_digest=hash_digest,
         size=len(data_bytes),
         created=created,
@@ -54,6 +56,39 @@ def create_text_from_raw_content(raw_content: RawContent, encoding="utf-8-sig")
     )
 
 
+@lru_cache(maxsize=128)
+def get_media_type_id(mime_type: str) -> int:
+    """
+    Return the MediaType.id for the desired mime_type string.
+
+    If it is not found in the database, a new entry will be created for it. This
+    lazy-writing means that MediaType entry IDs will *not* be the same across
+    different server instances, and apps should not assume that will be the
+    case. Even if we were to preload a bunch of common ones, we can't anticipate
+    the different XBlocks that will be installed in different server instances,
+    each of which will use their own MediaType.
+
+    This will typically only be called when create_raw_content is calling it to
+    lookup the media_type_id it should use for a new RawContent. If you already
+    have a RawContent instance, it makes much more sense to access its
+    media_type relation.
+    """
+    if "+" in mime_type:
+        base, suffix = mime_type.split("+")
+    else:
+        base = mime_type
+        suffix = ""
+
+    main_type, sub_type = base.split("/")
+    mt, _created = MediaType.objects.get_or_create(
+        type=main_type,
+        sub_type=sub_type,
+        suffix=suffix,
+    )
+
+    return mt.id
+
+
 def get_or_create_raw_content(
     learning_package_id: int,
     data_bytes: bytes,
diff --git a/openedx_learning/core/contents/migrations/0001_initial.py b/openedx_learning/core/contents/migrations/0001_initial.py
index 20b6b01c..7dac9a3f 100644
--- a/openedx_learning/core/contents/migrations/0001_initial.py
+++ b/openedx_learning/core/contents/migrations/0001_initial.py
@@ -1,4 +1,4 @@
-# Generated by Django 3.2.19 on 2023-06-15 14:43
+# Generated by Django 3.2.23 on 2023-12-04 00:41
 
 import django.core.validators
 import django.db.models.deletion
@@ -13,20 +13,29 @@ class Migration(migrations.Migration):
     initial = True
 
     dependencies = [
-        ('oel_publishing', '0001_initial'),
+        ('oel_publishing', '0002_alter_fk_on_delete'),
     ]
 
     operations = [
+        migrations.CreateModel(
+            name='MediaType',
+            fields=[
+                ('id', models.AutoField(primary_key=True, serialize=False)),
+                ('type', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=127)),
+                ('sub_type', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=127)),
+                ('suffix', openedx_learning.lib.fields.MultiCollationCharField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=127)),
+            ],
+        ),
         migrations.CreateModel(
             name='RawContent',
             fields=[
                 ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                 ('hash_digest', models.CharField(editable=False, max_length=40)),
-                ('mime_type', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=255)),
                 ('size', models.PositiveBigIntegerField(validators=[django.core.validators.MaxValueValidator(50000000)])),
                 ('created', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])),
                 ('file', models.FileField(null=True, upload_to='')),
                 ('learning_package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.learningpackage')),
+                ('media_type', models.ForeignKey(on_delete=django.db.models.deletion.PROTECT, to='oel_contents.mediatype')),
             ],
             options={
                 'verbose_name': 'Raw Content',
@@ -41,10 +50,13 @@ class Migration(migrations.Migration):
                 ('length', models.PositiveIntegerField()),
             ],
         ),
-        # Call out to custom code here to change row format for TextContent
+        migrations.AddConstraint(
+            model_name='mediatype',
+            constraint=models.UniqueConstraint(fields=('type', 'sub_type', 'suffix'), name='oel_contents_uniq_t_st_sfx'),
+        ),
         migrations.AddIndex(
             model_name='rawcontent',
-            index=models.Index(fields=['learning_package', 'mime_type'], name='oel_content_idx_lp_mime_type'),
+            index=models.Index(fields=['learning_package', 'media_type'], name='oel_content_idx_lp_media_type'),
         ),
         migrations.AddIndex(
             model_name='rawcontent',
@@ -56,6 +68,6 @@ class Migration(migrations.Migration):
         ),
         migrations.AddConstraint(
             model_name='rawcontent',
-            constraint=models.UniqueConstraint(fields=('learning_package', 'mime_type', 'hash_digest'), name='oel_content_uniq_lc_mime_type_hash_digest'),
+            constraint=models.UniqueConstraint(fields=('learning_package', 'media_type', 'hash_digest'), name='oel_content_uniq_lc_media_type_hash_digest'),
         ),
     ]
diff --git a/openedx_learning/core/contents/models.py b/openedx_learning/core/contents/models.py
index 2da7ebfe..e7de7375 100644
--- a/openedx_learning/core/contents/models.py
+++ b/openedx_learning/core/contents/models.py
@@ -3,6 +3,8 @@
 the simplest building blocks to store data with. They need to be composed into
 more intelligent data models to be useful.
 """
+from functools import cached_property
+
 from django.conf import settings
 from django.core.files.storage import default_storage
 from django.core.validators import MaxValueValidator
@@ -18,6 +20,81 @@
 from ..publishing.models import LearningPackage
 
 
+class MediaType(models.Model):
+    """
+    Stores Media types for use by RawContent models.
+
+    This is the same as MIME types (the IANA renamed MIME Types to Media Types).
+    We don't pre-populate this table, so APIs that add RawContent must ensure
+    that the desired Media Type exists.
+
+    Media types are written as {type}/{sub_type}+{suffix}, where suffixes are
+    seldom used.
+
+    * application/json
+    * text/css
+    * image/svg+xml
+    * application/vnd.openedx.xblock.v1.problem+xml
+
+    We have this as a separate model (instead of a field on RawContent) because:
+
+    1. We can save a lot on storage and indexing for RawContent if we're just
+       storing foreign key references there, rather than the entire content
+       string to be indexed. This is especially relevant for our (long) custom
+       types like "application/vnd.openedx.xblock.v1.problem+xml".
+    2. These values can occasionally change. For instance, "text/javascript" vs.
+       "application/javascript". Also, we will be using a fair number of "vnd."
+       style of custom content types, and we may want the flexibility of
+       changing that without having to worry about migrating millions of rows of
+       RawContent.
+    """
+    # We're going to have many foreign key references from RawContent into this
+    # model, and we don't need to store those as 8-byte BigAutoField, as is the
+    # default for this app. It's likely that a SmallAutoField would work, but I
+    # can just barely imagine using more than 32K Media types if we have a bunch
+    # of custom "vnd." entries, or start tracking suffixes and parameters. Which
+    # is how we end up at the 4-byte AutoField.
+    id = models.AutoField(primary_key=True)
+
+    # Media types are denoted as {type}/{sub_type}+{suffix}. We currently do not
+    # support parameters.
+
+    # Media type, e.g. "application", "text", "image". Per RFC 4288, this can be
+    # at most 127 chars long and is case insensitive. In practice, it's almost
+    # always written in lowercase.
+    type = case_insensitive_char_field(max_length=127, blank=False, null=False)
+
+    # Media sub-type, e.g. "json", "css", "png". Per RFC 4288, this can be at
+    # most 127 chars long and is case insensitive. In practice, it's almost
+    # always written in lowercase.
+    sub_type = case_insensitive_char_field(max_length=127, blank=False, null=False)
+
+    # Suffix, usually just "xml" (e.g. "image/svg+xml"). Usually blank. I
+    # couldn't find an RFC description of the length limit, and 127 is probably
+    # excessive. But this table should be small enough where it doesn't really
+    # matter.
+    suffix = case_insensitive_char_field(max_length=127, blank=True, null=False)
+
+    class Meta:
+        constraints = [
+            # Make sure all (type + sub_type + suffix) combinations are unique.
+            models.UniqueConstraint(
+                fields=[
+                    "type",
+                    "sub_type",
+                    "suffix",
+                ],
+                name="oel_contents_uniq_t_st_sfx",
+            ),
+        ]
+
+    def __str__(self):
+        base = f"{self.type}/{self.sub_type}"
+        if self.suffix:
+            return f"{base}+{self.suffix}"
+        return base
+
+
 class RawContent(models.Model):  # type: ignore[django-manager-missing]
     """
     This is the most basic piece of raw content data, with no version metadata.
@@ -77,15 +154,8 @@ class RawContent(models.Model):  # type: ignore[django-manager-missing]
     # openedx.lib.fields module.
     hash_digest = hash_field()
 
-    # MIME type, such as "text/html", "image/png", etc. Per RFC 4288, MIME type
-    # and sub-type may each be 127 chars, making a max of 255 (including the "/"
-    # in between). Also, while MIME types are almost always written in lowercase
-    # as a matter of convention, by spec they are NOT case sensitive.
-    #
-    # DO NOT STORE parameters here, e.g. "charset=". We can make a new field if
-    # that becomes necessary. If we do decide to store parameters and values
-    # later, note that those *may be* case sensitive.
-    mime_type = case_insensitive_char_field(max_length=255, blank=False)
+    # What is the Media type (a.k.a. MIME type) of this data?
+    media_type = models.ForeignKey(MediaType, on_delete=models.PROTECT)
 
     # This is the size of the raw data file in bytes. This can be different than
     # the character length, since UTF-8 encoding can use anywhere between 1-4
@@ -107,6 +177,10 @@ class RawContent(models.Model):  # type: ignore[django-manager-missing]
         storage=settings.OPENEDX_LEARNING.get("STORAGE", default_storage),  # type: ignore
     )
 
+    @cached_property
+    def mime_type(self):
+        return str(self.media_type)
+
     class Meta:
         constraints = [
             # Make sure we don't store duplicates of this raw data within the
@@ -114,21 +188,21 @@ class Meta:
             models.UniqueConstraint(
                 fields=[
                     "learning_package",
-                    "mime_type",
+                    "media_type",
                     "hash_digest",
                 ],
-                name="oel_content_uniq_lc_mime_type_hash_digest",
+                name="oel_content_uniq_lc_media_type_hash_digest",
             ),
         ]
         indexes = [
-            # LearningPackage MIME type Index:
+            # LearningPackage Media type Index:
             #   * Break down Content counts by type/subtype with in a
             #     LearningPackage.
             #   * Find all the Content in a LearningPackage that matches a
             #     certain MIME type (e.g. "image/png", "application/pdf".
             models.Index(
-                fields=["learning_package", "mime_type"],
-                name="oel_content_idx_lp_mime_type",
+                fields=["learning_package", "media_type"],
+                name="oel_content_idx_lp_media_type",
             ),
             # LearningPackage (reverse) Size Index:
             #   * Find largest Content in a LearningPackage.
diff --git a/openedx_learning/lib/cache.py b/openedx_learning/lib/cache.py
new file mode 100644
index 00000000..098dbab0
--- /dev/null
+++ b/openedx_learning/lib/cache.py
@@ -0,0 +1,34 @@
+"""
+Test-friendly helpers for caching.
+
+LRU caching can be especially helpful for Learning Core data models because so
+much of it is immutable, e.g. Media Type lookups. But while these data models
+are immutable within a given site, we also want to be able to track and clear
+these caches across test runs. Later on, we may also want to inspect them to
+make sure they are not growing overly large.
+"""
+import functools
+
+# List of functions that have our
+_lru_cached_fns = []
+
+
+def lru_cache(*args, **kwargs):
+    """
+    Thin wrapper over functools.lru_cache that lets us clear all caches later.
+    """
+    def decorator(fn):
+        wrapped_fn = functools.lru_cache(*args, **kwargs)(fn)
+        _lru_cached_fns.append(wrapped_fn)
+        return wrapped_fn
+    return decorator
+
+
+def clear_lru_caches():
+    """
+    Clear all LRU caches that use our lru_cache decorator.
+
+    Useful for tests.
+    """
+    for fn in _lru_cached_fns:
+        fn.cache_clear()
diff --git a/openedx_learning/lib/fields.py b/openedx_learning/lib/fields.py
index b239eb8c..900e590f 100644
--- a/openedx_learning/lib/fields.py
+++ b/openedx_learning/lib/fields.py
@@ -121,6 +121,16 @@ def hash_field() -> models.CharField:
 
     Use the create_hash_digest function to generate data suitable for this
     field.
+
+    There are a couple of ways that we could have stored this more efficiently,
+    but we don't at this time:
+
+    1. A BinaryField would be the most space efficient, but Django doesn't
+       support indexing a BinaryField in a MySQL database.
+    2. We could make the field case-sensitive and run it through a URL-safe
+       base64 encoding. But the amount of space this saves vs. the complexity
+       didn't seem worthwhile, particularly the possibility of case-sensitivity
+       related bugs.
     """
     return models.CharField(
         max_length=40,
diff --git a/openedx_learning/lib/test_utils.py b/openedx_learning/lib/test_utils.py
new file mode 100644
index 00000000..5c947311
--- /dev/null
+++ b/openedx_learning/lib/test_utils.py
@@ -0,0 +1,22 @@
+"""
+Test utilities for Learning Core.
+
+The only thing here now is a TestCase class that knows how to clean up the
+caching used by the cache module in this package.
+"""
+import django.test
+
+from .cache import clear_lru_caches
+
+
+class TestCase(django.test.TestCase):
+    """
+    Subclass of Django's TestCase that knows how to reset caching we might use.
+    """
+    def setUp(self) -> None:
+        clear_lru_caches()
+        super().setUp()
+
+    def tearDown(self) -> None:
+        clear_lru_caches()
+        super().tearDown()
diff --git a/openedx_tagging/core/tagging/import_export/api.py b/openedx_tagging/core/tagging/import_export/api.py
index bdc0992a..7d6ae5e7 100644
--- a/openedx_tagging/core/tagging/import_export/api.py
+++ b/openedx_tagging/core/tagging/import_export/api.py
@@ -115,7 +115,7 @@ def import_tags(
         tag_import_plan.execute(task)
         task.end_success()
         return True
-    except Exception as exception:  # pylint: disable=broad-exception-caught
+    except Exception as exception:
         # Log any exception
         task.log_exception(exception)
         return False
diff --git a/tests/openedx_learning/core/components/test_models.py b/tests/openedx_learning/core/components/test_models.py
index 54af3de1..7675f705 100644
--- a/tests/openedx_learning/core/components/test_models.py
+++ b/tests/openedx_learning/core/components/test_models.py
@@ -3,10 +3,9 @@
 """
 from datetime import datetime, timezone
 
-from django.test.testcases import TestCase
-
 from openedx_learning.core.components.api import create_component_and_version
 from openedx_learning.core.publishing.api import LearningPackage, create_learning_package, publish_all_drafts
+from openedx_learning.lib.test_utils import TestCase
 
 
 class TestModelVersioningQueries(TestCase):
diff --git a/tests/openedx_learning/core/contents/__init__.py b/tests/openedx_learning/core/contents/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/openedx_learning/core/contents/test_media_types.py b/tests/openedx_learning/core/contents/test_media_types.py
new file mode 100644
index 00000000..c880a008
--- /dev/null
+++ b/tests/openedx_learning/core/contents/test_media_types.py
@@ -0,0 +1,34 @@
+"""
+A few tests to make sure our MediaType lookups are working as expected.
+"""
+from openedx_learning.core.contents import api as contents_api
+from openedx_learning.lib.test_utils import TestCase
+
+
+class MediaTypeCachingTestCase(TestCase):
+    """
+    Test that our LRU caching is working as expected.
+    """
+    def test_media_query_caching(self):
+        """Test MediaType queries auto-create and caching."""
+        assert contents_api.get_media_type_id.cache_info().currsize == 0
+
+        mime_type_str = "application/vnd.openedx.xblock.v1.problem+xml"
+        media_type_id = contents_api.get_media_type_id(mime_type_str)
+
+        # Now it should be loaded in the cache
+        assert contents_api.get_media_type_id.cache_info().currsize == 1
+
+        # Second call pulls from cache instead of the database
+        with self.assertNumQueries(0):
+            # Should also return the same thing it did last time.
+            assert media_type_id == contents_api.get_media_type_id(mime_type_str)
+
+    def test_media_query_caching_reset(self):
+        """
+        Test that setUp/tearDown reset the get_media_type_id LRU cache.
+
+        This test method's *must* execute after test_media_query_caching to be
+        meaningful (they execute in string sort order).
+        """
+        assert contents_api.get_media_type_id.cache_info().currsize == 0
diff --git a/tests/openedx_learning/core/publishing/test_api.py b/tests/openedx_learning/core/publishing/test_api.py
index 9327a100..a258f5b0 100644
--- a/tests/openedx_learning/core/publishing/test_api.py
+++ b/tests/openedx_learning/core/publishing/test_api.py
@@ -6,9 +6,9 @@
 
 import pytest
 from django.core.exceptions import ValidationError
-from django.test import TestCase
 
 from openedx_learning.core.publishing import api as publishing_api
+from openedx_learning.lib.test_utils import TestCase
 
 
 class CreateLearningPackageTestCase(TestCase):