From cac27a18f68685b5971be01255b33cdfea14778f Mon Sep 17 00:00:00 2001 From: Tim McCormack Date: Wed, 16 Apr 2025 18:06:24 -0400 Subject: [PATCH] fix: Add much more codejail darklaunch info; fix remote error bug - Fix bug where we were overwriting `remote_emsg` with None, and add test that would have caught it. - Suppress differences due solely to the codejail sandbox directory name differing (in stack traces), and add test for this. Configurable because we'll need to add an additional search/replace pair for the sandbox venv paths. - Add a variety of custom attributes, replacing existing ones. The attrs now have a prefixed naming scheme to simplify searching. - Add slug to log output so we can more readily correlate traces and logs, as well as logs across services. - Fix typo in error message. - Fix existing import sort order lint. --- xmodule/capa/safe_exec/safe_exec.py | 176 ++++++++++++++++-- .../capa/safe_exec/tests/test_safe_exec.py | 137 ++++++++++++-- 2 files changed, 286 insertions(+), 27 deletions(-) diff --git a/xmodule/capa/safe_exec/safe_exec.py b/xmodule/capa/safe_exec/safe_exec.py index b3a1b888f6d7..de80fbb6e97d 100644 --- a/xmodule/capa/safe_exec/safe_exec.py +++ b/xmodule/capa/safe_exec/safe_exec.py @@ -2,14 +2,20 @@ import copy import hashlib import logging +import re +from functools import lru_cache +from typing import assert_type from codejail.safe_exec import SafeExecException, json_safe from codejail.safe_exec import not_safe_exec as codejail_not_safe_exec from codejail.safe_exec import safe_exec as codejail_safe_exec +from django.conf import settings +from django.dispatch import receiver +from django.test.signals import setting_changed from edx_django_utils.monitoring import function_trace, record_exception, set_custom_attribute from . import lazymod -from .remote_exec import is_codejail_rest_service_enabled, is_codejail_in_darklaunch, get_remote_exec +from .remote_exec import get_remote_exec, is_codejail_in_darklaunch, is_codejail_rest_service_enabled log = logging.getLogger(__name__) @@ -219,6 +225,28 @@ def safe_exec( # Run the code in both the remote codejail service as well as the local codejail # when in darklaunch mode. if is_codejail_in_darklaunch(): + # Start adding attributes only once we're in a darklaunch + # comparison, even though these particular ones aren't specific to + # darklaunch. There can be multiple codejail calls per trace, and + # these attrs will overwrite previous values in the same trace. When + # that happens, we need to ensure we overwrite *all* of them, + # otherwise we could end up with inconsistent combinations of values. + + # .. custom_attribute_name: codejail.slug + # .. custom_attribute_description: Value of the slug parameter. This + # might be a problem ID, if present. + set_custom_attribute('codejail.slug', slug) + # .. custom_attribute_name: codejail.limit_overrides_context + # .. custom_attribute_description: Value of the limit_overrides_context + # parameter to this code execution. Generally this will be the + # course name, if present at all. + set_custom_attribute('codejail.limit_overrides_context', limit_overrides_context) + # .. custom_attribute_name: codejail.extra_files_count + # .. custom_attribute_description: Number of extra_files included + # in request. This should be 0 or 1, the latter indicating a + # python_lib.zip was present. + set_custom_attribute('codejail.extra_files_count', len(extra_files) if extra_files else 0) + try: data = { "code": code_prolog + LAZY_IMPORTS + code, @@ -230,30 +258,26 @@ def safe_exec( "extra_files": extra_files, } with function_trace('safe_exec.remote_exec_darklaunch'): + # Ignore the returned exception, because it's just a + # SafeExecException wrapped around emsg (if present). remote_emsg, _ = get_remote_exec(data) + remote_exception = None except BaseException as e: # pragma: no cover # pylint: disable=broad-except # Swallow all exceptions and log it in monitoring so that dark launch doesn't cause issues during # deploy. remote_emsg = None remote_exception = e - else: - remote_emsg = None - remote_exception = None try: - log.info( - f"Remote execution in darklaunch mode produces globals={darklaunch_globals!r}, " - f"emsg={remote_emsg!r}, exception={remote_exception!r}" - ) local_exc_unexpected = None if isinstance(exception, SafeExecException) else exception - log.info( - f"Local execution in darklaunch mode produces globals={globals_dict!r}, " - f"emsg={emsg!r}, exception={local_exc_unexpected!r}") - set_custom_attribute('dark_launch_emsg_match', remote_emsg == emsg) - set_custom_attribute('remote_emsg_exists', remote_emsg is not None) - set_custom_attribute('local_emsg_exists', emsg is not None) + + report_darklaunch_results( + slug=slug, + globals_local=globals_dict, emsg_local=emsg, unexpected_exc_local=local_exc_unexpected, + globals_remote=darklaunch_globals, emsg_remote=remote_emsg, unexpected_exc_remote=remote_exception, + ) except BaseException as e: # pragma: no cover # pylint: disable=broad-except - log.exception("Error occurred while trying to report codejail darklauch data.") + log.exception("Error occurred while trying to report codejail darklaunch data.") record_exception() # Put the result back in the cache. This is complicated by the fact that @@ -265,3 +289,125 @@ def safe_exec( # If an exception happened, raise it now. if exception: raise exception + + +@lru_cache(maxsize=1) +def emsg_normalizers(): + """ + Load emsg normalization settings. + + The output is like the setting value, except the 'search' patterns have + been compiled. + """ + default = [ + { + 'search': r'/tmp/codejail-[0-9a-zA-Z]+', + 'replace': r'/tmp/codejail-', + }, + ] + try: + # .. setting_name: CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS + # .. setting_default: (see description) + # .. setting_description: A list of patterns to search and replace in codejail error + # messages during comparison in codejail-service darklaunch. Each entry is a dict + # of 'search' (a regular expression string) and 'replace' (the replacement string). + # The default value suppresses differences matching '/tmp/codejail-[0-9a-zA-Z]+', + # the directory structure codejail uses for its random-named sandboxes. Deployers + # may also need to add a search/replace pair for the location of the sandbox + # virtualenv, or any other paths that show up in stack traces. + # .. setting_warning: Note that `replace' is a pattern, allowing for + # backreferences. Any backslashes in the replacement pattern that are not + # intended as backreferences should be escaped as `\\`. + setting = getattr(settings, 'CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS', default) + + compiled = [] + for pair in setting: + compiled.append({ + 'search': re.compile(assert_type(pair['search'], str)), + 'replace': assert_type(pair['replace'], str), + }) + return compiled + except BaseException as e: + record_exception() + return [] + + +def normalize_error_message(emsg): + """ + Remove any uninteresting sources of discrepancy from an emsg. + """ + if emsg is None: + return None + + for replacer in emsg_normalizers(): + emsg = re.sub(replacer['search'], replacer['replace'], emsg, count=0) + + return emsg + + +def report_darklaunch_results( + *, slug, + globals_local, emsg_local, unexpected_exc_local, + globals_remote, emsg_remote, unexpected_exc_remote, +): + """Send telemetry for results of darklaunch.""" + can_compare_output = True + + def report_arm(arm, globals_dict, emsg, unexpected_exception): + nonlocal can_compare_output + if unexpected_exception: + # .. custom_attribute_name: codejail.darklaunch.status.{local,remote} + # .. custom_attribute_description: Outcome of this arm of the + # darklaunch comparison. Values can be 'ok' (normal execution), + # 'safe_error' (submitted code raised an exception), or + # 'unexpected_error' (uncaught error in submitting or evaluating code). + set_custom_attribute(f'codejail.darklaunch.status.{arm}', 'unexpected_error') + # .. custom_attribute_name: codejail.darklaunch.exception.{local,remote} + # .. custom_attribute_description: When the status attribute indicates + # an unexpected error, this is a string representation of the error, + # otherwise None. + set_custom_attribute(f'codejail.darklaunch.exception.{arm}', repr(unexpected_exception)) + can_compare_output = False + else: + set_custom_attribute(f'codejail.darklaunch.status.{arm}', 'ok' if emsg is None else 'safe_error') + set_custom_attribute(f'codejail.darklaunch.exception.{arm}', None) + + # Logs include full globals and emsg + log.info( + f"Codejail darklaunch {arm} results for slug={slug}: globals={globals_dict!r}, " + f"emsg={emsg!r}, exception={unexpected_exception!r}" + ) + + report_arm('local', globals_local, emsg_local, unexpected_exc_local) + report_arm('remote', globals_remote, emsg_remote, unexpected_exc_remote) + + # If the arms can't be compared (unexpected errors), stop early -- the rest + # is about output comparison. + if not can_compare_output: + set_custom_attribute('codejail.darklaunch.globals_match', 'N/A') + set_custom_attribute('codejail.darklaunch.emsg_match', 'N/A') + return + + globals_match = globals_local == globals_remote + emsg_match = normalize_error_message(emsg_local) == normalize_error_message(emsg_remote) + + # .. custom_attribute_name: codejail.darklaunch.globals_match + # .. custom_attribute_description: True if local and remote globals_dict + # values match, False otherwise. 'N/A' when either arm raised an + # uncaught error. + set_custom_attribute('codejail.darklaunch.globals_match', globals_match) + # .. custom_attribute_name: codejail.darklaunch.emsg_match + # .. custom_attribute_description: True if the local and remote emsg values + # (errors returned from sandbox) match, False otherwise. Differences due + # to known irrelevant factors are suppressed in this comparison, such as + # the randomized directory names used for sandboxes. 'N/A' when either + # arm raised an uncaught error. + set_custom_attribute('codejail.darklaunch.emsg_match', emsg_match) + + +@receiver(setting_changed) +def reset_caches(sender, **kwargs): + """ + Reset cached settings during unit tests. + """ + emsg_normalizers.cache_clear() diff --git a/xmodule/capa/safe_exec/tests/test_safe_exec.py b/xmodule/capa/safe_exec/tests/test_safe_exec.py index a45c5911a262..05757b6afb79 100644 --- a/xmodule/capa/safe_exec/tests/test_safe_exec.py +++ b/xmodule/capa/safe_exec/tests/test_safe_exec.py @@ -23,6 +23,7 @@ from openedx.core.djangolib.testing.utils import skip_unless_lms from xmodule.capa.safe_exec import safe_exec, update_hash from xmodule.capa.safe_exec.remote_exec import is_codejail_in_darklaunch, is_codejail_rest_service_enabled +from xmodule.capa.safe_exec.safe_exec import emsg_normalizers, normalize_error_message class TestSafeExec(unittest.TestCase): # lint-amnesty, pylint: disable=missing-class-docstring @@ -212,6 +213,13 @@ def run_dark_launch( return {'raised': safe_exec_e} + # These don't change between the tests + standard_codejail_attr_calls = [ + call('codejail.slug', None), + call('codejail.limit_overrides_context', None), + call('codejail.extra_files_count', 0), + ] + def test_separate_globals(self): """Test that local and remote globals are isolated from each other's side effects.""" # Both will attempt to read and write the 'overwrite' key. @@ -238,17 +246,21 @@ def remote_exec(data): results = self.run_dark_launch( globals_dict=globals_dict, local=local_exec, remote=remote_exec, expect_attr_calls=[ - call('local_emsg_exists', False), - call('remote_emsg_exists', False), - call('dark_launch_emsg_match', True), + *self.standard_codejail_attr_calls, + call('codejail.darklaunch.status.local', 'ok'), + call('codejail.darklaunch.status.remote', 'ok'), + call('codejail.darklaunch.exception.local', None), + call('codejail.darklaunch.exception.remote', None), + call('codejail.darklaunch.globals_match', False), # mismatch revealed here + call('codejail.darklaunch.emsg_match', True), ], expect_log_info_calls=[ call( - "Local execution in darklaunch mode produces globals={'overwrite': 'mock local'}, " + "Codejail darklaunch local results for slug=None: globals={'overwrite': 'mock local'}, " "emsg=None, exception=None" ), call( - "Remote execution in darklaunch mode produces globals={'overwrite': 'mock remote'}, " + "Codejail darklaunch remote results for slug=None: globals={'overwrite': 'mock remote'}, " "emsg=None, exception=None" ), ], @@ -274,18 +286,22 @@ def remote_exec(data): results = self.run_dark_launch( globals_dict={}, local=local_exec, remote=remote_exec, expect_attr_calls=[ - call('local_emsg_exists', True), - call('remote_emsg_exists', False), - call('dark_launch_emsg_match', False), + *self.standard_codejail_attr_calls, + call('codejail.darklaunch.status.local', 'unexpected_error'), + call('codejail.darklaunch.status.remote', 'ok'), + call('codejail.darklaunch.exception.local', "BaseException('unexpected')"), + call('codejail.darklaunch.exception.remote', None), + call('codejail.darklaunch.globals_match', "N/A"), + call('codejail.darklaunch.emsg_match', "N/A"), ], expect_log_info_calls=[ call( - "Remote execution in darklaunch mode produces globals={}, " - "emsg=None, exception=None" + "Codejail darklaunch local results for slug=None: globals={}, " + "emsg='unexpected', exception=BaseException('unexpected')" ), call( - "Local execution in darklaunch mode produces globals={}, " - "emsg='unexpected', exception=BaseException('unexpected')" + "Codejail darklaunch remote results for slug=None: globals={}, " + "emsg=None, exception=None" ), ], expect_globals_contains={}, @@ -295,6 +311,103 @@ def remote_exec(data): assert isinstance(results['raised'], BaseException) assert 'unexpected' in repr(results['raised']) + def test_emsg_mismatch(self): + """Test that local and remote error messages are compared.""" + def local_exec(code, globals_dict, **kwargs): + raise SafeExecException("oops") + + def remote_exec(data): + return ("OH NO", SafeExecException("OH NO")) + + results = self.run_dark_launch( + globals_dict={}, local=local_exec, remote=remote_exec, + expect_attr_calls=[ + *self.standard_codejail_attr_calls, + call('codejail.darklaunch.status.local', 'safe_error'), + call('codejail.darklaunch.status.remote', 'safe_error'), + call('codejail.darklaunch.exception.local', None), + call('codejail.darklaunch.exception.remote', None), + call('codejail.darklaunch.globals_match', True), + call('codejail.darklaunch.emsg_match', False), # mismatch revealed here + ], + expect_log_info_calls=[ + call( + "Codejail darklaunch local results for slug=None: globals={}, " + "emsg='oops', exception=None" + ), + call( + "Codejail darklaunch remote results for slug=None: globals={}, " + "emsg='OH NO', exception=None" + ), + ], + expect_globals_contains={}, + ) + assert isinstance(results['raised'], SafeExecException) + assert 'oops' in repr(results['raised']) + + def test_ignore_sandbox_dir_mismatch(self): + """Mismatch due only to differences in sandbox directory should be ignored.""" + def local_exec(code, globals_dict, **kwargs): + raise SafeExecException("stack trace involving /tmp/codejail-1234567/whatever.py") + + def remote_exec(data): + emsg = "stack trace involving /tmp/codejail-abcdefgh/whatever.py" + return (emsg, SafeExecException(emsg)) + + results = self.run_dark_launch( + globals_dict={}, local=local_exec, remote=remote_exec, + expect_attr_calls=[ + *self.standard_codejail_attr_calls, + call('codejail.darklaunch.status.local', 'safe_error'), + call('codejail.darklaunch.status.remote', 'safe_error'), + call('codejail.darklaunch.exception.local', None), + call('codejail.darklaunch.exception.remote', None), + call('codejail.darklaunch.globals_match', True), + call('codejail.darklaunch.emsg_match', True), # even though not exact match + ], + expect_log_info_calls=[ + call( + "Codejail darklaunch local results for slug=None: globals={}, " + "emsg='stack trace involving /tmp/codejail-1234567/whatever.py', exception=None" + ), + call( + "Codejail darklaunch remote results for slug=None: globals={}, " + "emsg='stack trace involving /tmp/codejail-abcdefgh/whatever.py', exception=None" + ), + ], + expect_globals_contains={}, + ) + assert isinstance(results['raised'], SafeExecException) + assert 'whatever.py' in repr(results['raised']) + + @override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[ + { + 'search': r'/tmp/codejail-[0-9a-zA-Z]+', + 'replace': r'/tmp/codejail-', + }, + { + 'search': r'[0-9]+', + 'replace': r'', + }, + ]) + def test_configurable_normalizers(self): + """We can override the normalizers, and they run in order.""" + emsg_in = "Error in /tmp/codejail-1234abcd/whatever.py: something 12 34 other" + expect_out = "Error in /tmp/codejail-/whatever.py: something other" + assert expect_out == normalize_error_message(emsg_in) + + @override_settings(CODEJAIL_DARKLAUNCH_EMSG_NORMALIZERS=[ + { + 'search': r'broken [', + 'replace': r'replace', + }, + ]) + @patch('xmodule.capa.safe_exec.safe_exec.record_exception') + def test_normalizers_validate(self, mock_record_exception): + """Normalizers are validated, and fall back to empty list on error.""" + assert emsg_normalizers() == [] # pylint: disable=use-implicit-booleaness-not-comparison + mock_record_exception.assert_called_once() + class TestLimitConfiguration(unittest.TestCase): """