From cac27a18f68685b5971be01255b33cdfea14778f Mon Sep 17 00:00:00 2001
From: Tim McCormack <tmccormack@edx.org>
Date: Wed, 16 Apr 2025 18:06:24 -0400
Subject: [PATCH] fix: Add much more codejail darklaunch info; fix remote error
 bug

- Fix bug where we were overwriting `remote_emsg` with None, and add test
  that would have caught it.
- Suppress differences due solely to the codejail sandbox directory name
  differing (in stack traces), and add test for this. Configurable because
  we'll need to add an additional search/replace pair for the sandbox venv
  paths.
- Add a variety of custom attributes, replacing existing ones. The attrs
  now have a prefixed naming scheme to simplify searching.
- Add slug to log output so we can more readily correlate traces and logs,
  as well as logs across services.
- Fix typo in error message.
- Fix existing import sort order lint.
---
 xmodule/capa/safe_exec/safe_exec.py           | 176 ++++++++++++++++--
 .../capa/safe_exec/tests/test_safe_exec.py    | 137 ++++++++++++--
 2 files changed, 286 insertions(+), 27 deletions(-)

diff --git a/xmodule/capa/safe_exec/safe_exec.py b/xmodule/capa/safe_exec/safe_exec.py
index b3a1b888f6d7..de80fbb6e97d 100644
--- a/xmodule/capa/safe_exec/safe_exec.py
+++ b/xmodule/capa/safe_exec/safe_exec.py
@@ -2,14 +2,20 @@
 import copy
 import hashlib
 import logging
+import re
+from functools import lru_cache
+from typing import assert_type
 
 from codejail.safe_exec import SafeExecException, json_safe
 from codejail.safe_exec import not_safe_exec as codejail_not_safe_exec
 from codejail.safe_exec import safe_exec as codejail_safe_exec
+from django.conf import settings
+from django.dispatch import receiver
+from django.test.signals import setting_changed
 from edx_django_utils.monitoring import function_trace, record_exception, set_custom_attribute
 
 from . import lazymod
-from .remote_exec import is_codejail_rest_service_enabled, is_codejail_in_darklaunch, get_remote_exec
+from .remote_exec import get_remote_exec, is_codejail_in_darklaunch, is_codejail_rest_service_enabled
 
 log = logging.getLogger(__name__)
 
@@ -219,6 +225,28 @@ def safe_exec(
         # Run the code in both the remote codejail service as well as the local codejail
         # when in darklaunch mode.
         if is_codejail_in_darklaunch():
+            # Start adding attributes only once we're in a darklaunch
+            # comparison, even though these particular ones aren't specific to
+            # darklaunch. There can be multiple codejail calls per trace, and
+            # these attrs will overwrite previous values in the same trace. When
+            # that happens, we need to ensure we overwrite *all* of them,
+            # otherwise we could end up with inconsistent combinations of values.
+
+            # .. custom_attribute_name: codejail.slug
+            # .. custom_attribute_description: Value of the slug parameter. This
+            #   might be a problem ID, if present.
+            set_custom_attribute('codejail.slug', slug)
+            # .. custom_attribute_name: codejail.limit_overrides_context
+            # .. custom_attribute_description: Value of the limit_overrides_context
+            #   parameter to this code execution. Generally this will be the
+            #   course name, if present at all.
+            set_custom_attribute('codejail.limit_overrides_context', limit_overrides_context)
+            # .. custom_attribute_name: codejail.extra_files_count
+            # .. custom_attribute_description: Number of extra_files included
+            #   in request. This should be 0 or 1, the latter indicating a
+            #   python_lib.zip was present.
+            set_custom_attribute('codejail.extra_files_count', len(extra_files) if extra_files else 0)
+
             try:
                 data = {
                     "code": code_prolog + LAZY_IMPORTS + code,
@@ -230,30 +258,26 @@ def safe_exec(
                     "extra_files": extra_files,
                 }
                 with function_trace('safe_exec.remote_exec_darklaunch'):
+                    # Ignore the returned exception, because it's just a
+                    # SafeExecException wrapped around emsg (if present).
                     remote_emsg, _ = get_remote_exec(data)
+                    remote_exception = None
             except BaseException as e:  # pragma: no cover  # pylint: disable=broad-except
                 # Swallow all exceptions and log it in monitoring so that dark launch doesn't cause issues during
                 # deploy.
                 remote_emsg = None
                 remote_exception = e
-            else:
-                remote_emsg = None
-                remote_exception = None
 
             try:
-                log.info(
-                    f"Remote execution in darklaunch mode produces globals={darklaunch_globals!r}, "
-                    f"emsg={remote_emsg!r}, exception={remote_exception!r}"
-                )
                 local_exc_unexpected = None if isinstance(exception, SafeExecException) else exception
-                log.info(
-                    f"Local execution in darklaunch mode produces globals={globals_dict!r}, "
-                    f"emsg={emsg!r}, exception={local_exc_unexpected!r}")
-                set_custom_attribute('dark_launch_emsg_match', remote_emsg == emsg)
-                set_custom_attribute('remote_emsg_exists', remote_emsg is not None)
-                set_custom_attribute('local_emsg_exists', emsg is not None)
+
+                report_darklaunch_results(
+                    slug=slug,
+                    globals_local=globals_dict, emsg_local=emsg, unexpected_exc_local=local_exc_unexpected,
+                    globals_remote=darklaunch_globals, emsg_remote=remote_emsg, unexpected_exc_remote=remote_exception,
+                )
             except BaseException as e:  # pragma: no cover  # pylint: disable=broad-except
-                log.exception("Error occurred while trying to report codejail darklauch data.")
+                log.exception("Error occurred while trying to report codejail darklaunch data.")
                 record_exception()
 
     # Put the result back in the cache.  This is complicated by the fact that
@@ -265,3 +289,125 @@ def safe_exec(
     # If an exception happened, raise it now.
     if exception:
         raise exception
+
+
+@lru_cache(maxsize=1)
+def emsg_normalizers():
+    """
+    Load emsg normalization settings.
+
+    The output is like the setting value, except the 'search' patterns have
+    been compiled.
+    """
+    default = [
+        {
+            'search': r'/tmp/codejail-[0-9a-zA-Z]+',
+            'replace': r'/tmp/codejail-<SANDBOX_DIR_NAME>',
+        },
+    ]
+    try:
+        # .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS
+        # .. setting_default: (see description)
+        # .. setting_description: A list of patterns to search and replace in codejail error
+        #   messages during comparison in codejail-service darklaunch. Each entry is a dict
+        #   of 'search' (a regular expression string) and 'replace' (the replacement string).
+        #   The default value suppresses differences matching '/tmp/codejail-[0-9a-zA-Z]+',
+        #   the directory structure codejail uses for its random-named sandboxes. Deployers
+        #   may also need to add a search/replace pair for the location of the sandbox
+        #   virtualenv, or any other paths that show up in stack traces.
+        # .. setting_warning: Note that `replace' is a pattern, allowing for
+        #   backreferences. Any backslashes in the replacement pattern that are not
+        #   intended as backreferences should be escaped as `\\`.
+        setting = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS', default)
+
+        compiled = []
+        for pair in setting:
+            compiled.append({
+                'search': re.compile(assert_type(pair['search'], str)),
+                'replace': assert_type(pair['replace'], str),
+            })
+        return compiled
+    except BaseException as e:
+        record_exception()
+        return []
+
+
+def normalize_error_message(emsg):
+    """
+    Remove any uninteresting sources of discrepancy from an emsg.
+    """
+    if emsg is None:
+        return None
+
+    for replacer in emsg_normalizers():
+        emsg = re.sub(replacer['search'], replacer['replace'], emsg, count=0)
+
+    return emsg
+
+
+def report_darklaunch_results(
+        *, slug,
+        globals_local, emsg_local, unexpected_exc_local,
+        globals_remote, emsg_remote, unexpected_exc_remote,
+):
+    """Send telemetry for results of darklaunch."""
+    can_compare_output = True
+
+    def report_arm(arm, globals_dict, emsg, unexpected_exception):
+        nonlocal can_compare_output
+        if unexpected_exception:
+            # .. custom_attribute_name: codejail.darklaunch.status.{local,remote}
+            # .. custom_attribute_description: Outcome of this arm of the
+            #   darklaunch comparison. Values can be 'ok' (normal execution),
+            #   'safe_error' (submitted code raised an exception), or
+            #   'unexpected_error' (uncaught error in submitting or evaluating code).
+            set_custom_attribute(f'codejail.darklaunch.status.{arm}', 'unexpected_error')
+            # .. custom_attribute_name: codejail.darklaunch.exception.{local,remote}
+            # .. custom_attribute_description: When the status attribute indicates
+            #   an unexpected error, this is a string representation of the error,
+            #   otherwise None.
+            set_custom_attribute(f'codejail.darklaunch.exception.{arm}', repr(unexpected_exception))
+            can_compare_output = False
+        else:
+            set_custom_attribute(f'codejail.darklaunch.status.{arm}', 'ok' if emsg is None else 'safe_error')
+            set_custom_attribute(f'codejail.darklaunch.exception.{arm}', None)
+
+        # Logs include full globals and emsg
+        log.info(
+            f"Codejail darklaunch {arm} results for slug={slug}: globals={globals_dict!r}, "
+            f"emsg={emsg!r}, exception={unexpected_exception!r}"
+        )
+
+    report_arm('local', globals_local, emsg_local, unexpected_exc_local)
+    report_arm('remote', globals_remote, emsg_remote, unexpected_exc_remote)
+
+    # If the arms can't be compared (unexpected errors), stop early -- the rest
+    # is about output comparison.
+    if not can_compare_output:
+        set_custom_attribute('codejail.darklaunch.globals_match', 'N/A')
+        set_custom_attribute('codejail.darklaunch.emsg_match', 'N/A')
+        return
+
+    globals_match = globals_local == globals_remote
+    emsg_match = normalize_error_message(emsg_local) == normalize_error_message(emsg_remote)
+
+    # .. custom_attribute_name: codejail.darklaunch.globals_match
+    # .. custom_attribute_description: True if local and remote globals_dict
+    #   values match, False otherwise. 'N/A' when either arm raised an
+    #   uncaught error.
+    set_custom_attribute('codejail.darklaunch.globals_match', globals_match)
+    # .. custom_attribute_name: codejail.darklaunch.emsg_match
+    # .. custom_attribute_description: True if the local and remote emsg values
+    #   (errors returned from sandbox) match, False otherwise. Differences due
+    #   to known irrelevant factors are suppressed in this comparison, such as
+    #   the randomized directory names used for sandboxes. 'N/A' when either
+    #   arm raised an uncaught error.
+    set_custom_attribute('codejail.darklaunch.emsg_match', emsg_match)
+
+
+@receiver(setting_changed)
+def reset_caches(sender, **kwargs):
+    """
+    Reset cached settings during unit tests.
+    """
+    emsg_normalizers.cache_clear()
diff --git a/xmodule/capa/safe_exec/tests/test_safe_exec.py b/xmodule/capa/safe_exec/tests/test_safe_exec.py
index a45c5911a262..05757b6afb79 100644
--- a/xmodule/capa/safe_exec/tests/test_safe_exec.py
+++ b/xmodule/capa/safe_exec/tests/test_safe_exec.py
@@ -23,6 +23,7 @@
 from openedx.core.djangolib.testing.utils import skip_unless_lms
 from xmodule.capa.safe_exec import safe_exec, update_hash
 from xmodule.capa.safe_exec.remote_exec import is_codejail_in_darklaunch, is_codejail_rest_service_enabled
+from xmodule.capa.safe_exec.safe_exec import emsg_normalizers, normalize_error_message
 
 
 class TestSafeExec(unittest.TestCase):  # lint-amnesty, pylint: disable=missing-class-docstring
@@ -212,6 +213,13 @@ def run_dark_launch(
 
         return {'raised': safe_exec_e}
 
+    # These don't change between the tests
+    standard_codejail_attr_calls = [
+        call('codejail.slug', None),
+        call('codejail.limit_overrides_context', None),
+        call('codejail.extra_files_count', 0),
+    ]
+
     def test_separate_globals(self):
         """Test that local and remote globals are isolated from each other's side effects."""
         # Both will attempt to read and write the 'overwrite' key.
@@ -238,17 +246,21 @@ def remote_exec(data):
         results = self.run_dark_launch(
             globals_dict=globals_dict, local=local_exec, remote=remote_exec,
             expect_attr_calls=[
-                call('local_emsg_exists', False),
-                call('remote_emsg_exists', False),
-                call('dark_launch_emsg_match', True),
+                *self.standard_codejail_attr_calls,
+                call('codejail.darklaunch.status.local', 'ok'),
+                call('codejail.darklaunch.status.remote', 'ok'),
+                call('codejail.darklaunch.exception.local', None),
+                call('codejail.darklaunch.exception.remote', None),
+                call('codejail.darklaunch.globals_match', False),  # mismatch revealed here
+                call('codejail.darklaunch.emsg_match', True),
             ],
             expect_log_info_calls=[
                 call(
-                    "Local execution in darklaunch mode produces globals={'overwrite': 'mock local'}, "
+                    "Codejail darklaunch local results for slug=None: globals={'overwrite': 'mock local'}, "
                     "emsg=None, exception=None"
                 ),
                 call(
-                    "Remote execution in darklaunch mode produces globals={'overwrite': 'mock remote'}, "
+                    "Codejail darklaunch remote results for slug=None: globals={'overwrite': 'mock remote'}, "
                     "emsg=None, exception=None"
                 ),
             ],
@@ -274,18 +286,22 @@ def remote_exec(data):
         results = self.run_dark_launch(
             globals_dict={}, local=local_exec, remote=remote_exec,
             expect_attr_calls=[
-                call('local_emsg_exists', True),
-                call('remote_emsg_exists', False),
-                call('dark_launch_emsg_match', False),
+                *self.standard_codejail_attr_calls,
+                call('codejail.darklaunch.status.local', 'unexpected_error'),
+                call('codejail.darklaunch.status.remote', 'ok'),
+                call('codejail.darklaunch.exception.local', "BaseException('unexpected')"),
+                call('codejail.darklaunch.exception.remote', None),
+                call('codejail.darklaunch.globals_match', "N/A"),
+                call('codejail.darklaunch.emsg_match', "N/A"),
             ],
             expect_log_info_calls=[
                 call(
-                    "Remote execution in darklaunch mode produces globals={}, "
-                    "emsg=None, exception=None"
+                    "Codejail darklaunch local results for slug=None: globals={}, "
+                    "emsg='unexpected', exception=BaseException('unexpected')"
                 ),
                 call(
-                    "Local execution in darklaunch mode produces globals={}, "
-                    "emsg='unexpected', exception=BaseException('unexpected')"
+                    "Codejail darklaunch remote results for slug=None: globals={}, "
+                    "emsg=None, exception=None"
                 ),
             ],
             expect_globals_contains={},
@@ -295,6 +311,103 @@ def remote_exec(data):
         assert isinstance(results['raised'], BaseException)
         assert 'unexpected' in repr(results['raised'])
 
+    def test_emsg_mismatch(self):
+        """Test that local and remote error messages are compared."""
+        def local_exec(code, globals_dict, **kwargs):
+            raise SafeExecException("oops")
+
+        def remote_exec(data):
+            return ("OH NO", SafeExecException("OH NO"))
+
+        results = self.run_dark_launch(
+            globals_dict={}, local=local_exec, remote=remote_exec,
+            expect_attr_calls=[
+                *self.standard_codejail_attr_calls,
+                call('codejail.darklaunch.status.local', 'safe_error'),
+                call('codejail.darklaunch.status.remote', 'safe_error'),
+                call('codejail.darklaunch.exception.local', None),
+                call('codejail.darklaunch.exception.remote', None),
+                call('codejail.darklaunch.globals_match', True),
+                call('codejail.darklaunch.emsg_match', False),  # mismatch revealed here
+            ],
+            expect_log_info_calls=[
+                call(
+                    "Codejail darklaunch local results for slug=None: globals={}, "
+                    "emsg='oops', exception=None"
+                ),
+                call(
+                    "Codejail darklaunch remote results for slug=None: globals={}, "
+                    "emsg='OH NO', exception=None"
+                ),
+            ],
+            expect_globals_contains={},
+        )
+        assert isinstance(results['raised'], SafeExecException)
+        assert 'oops' in repr(results['raised'])
+
+    def test_ignore_sandbox_dir_mismatch(self):
+        """Mismatch due only to differences in sandbox directory should be ignored."""
+        def local_exec(code, globals_dict, **kwargs):
+            raise SafeExecException("stack trace involving /tmp/codejail-1234567/whatever.py")
+
+        def remote_exec(data):
+            emsg = "stack trace involving /tmp/codejail-abcdefgh/whatever.py"
+            return (emsg, SafeExecException(emsg))
+
+        results = self.run_dark_launch(
+            globals_dict={}, local=local_exec, remote=remote_exec,
+            expect_attr_calls=[
+                *self.standard_codejail_attr_calls,
+                call('codejail.darklaunch.status.local', 'safe_error'),
+                call('codejail.darklaunch.status.remote', 'safe_error'),
+                call('codejail.darklaunch.exception.local', None),
+                call('codejail.darklaunch.exception.remote', None),
+                call('codejail.darklaunch.globals_match', True),
+                call('codejail.darklaunch.emsg_match', True),  # even though not exact match
+            ],
+            expect_log_info_calls=[
+                call(
+                    "Codejail darklaunch local results for slug=None: globals={}, "
+                    "emsg='stack trace involving /tmp/codejail-1234567/whatever.py', exception=None"
+                ),
+                call(
+                    "Codejail darklaunch remote results for slug=None: globals={}, "
+                    "emsg='stack trace involving /tmp/codejail-abcdefgh/whatever.py', exception=None"
+                ),
+            ],
+            expect_globals_contains={},
+        )
+        assert isinstance(results['raised'], SafeExecException)
+        assert 'whatever.py' in repr(results['raised'])
+
+    @override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[
+        {
+            'search': r'/tmp/codejail-[0-9a-zA-Z]+',
+            'replace': r'/tmp/codejail-<RAND>',
+        },
+        {
+            'search': r'[0-9]+',
+            'replace': r'<NUM>',
+        },
+    ])
+    def test_configurable_normalizers(self):
+        """We can override the normalizers, and they run in order."""
+        emsg_in = "Error in /tmp/codejail-1234abcd/whatever.py: something 12 34 other"
+        expect_out = "Error in /tmp/codejail-<RAND>/whatever.py: something <NUM> <NUM> other"
+        assert expect_out == normalize_error_message(emsg_in)
+
+    @override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[
+        {
+            'search': r'broken [',
+            'replace': r'replace',
+        },
+    ])
+    @patch('xmodule.capa.safe_exec.safe_exec.record_exception')
+    def test_normalizers_validate(self, mock_record_exception):
+        """Normalizers are validated, and fall back to empty list on error."""
+        assert emsg_normalizers() == []  # pylint: disable=use-implicit-booleaness-not-comparison
+        mock_record_exception.assert_called_once()
+
 
 class TestLimitConfiguration(unittest.TestCase):
     """