diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index b453328f15649eb..168295534e036ca 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -63,6 +63,7 @@ struct _ceval_runtime_state { } perf; /* Pending calls to be made only on the main thread. */ struct _pending_calls pending_mainthread; + PyMutex sys_trace_profile_mutex; }; #ifdef PY_HAVE_PERF_TRAMPOLINE diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index e441600d54e1aac..b4336ff1f7a6fa1 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -23,18 +23,25 @@ extern "C" { #define FT_ATOMIC_LOAD_SSIZE(value) _Py_atomic_load_ssize(&value) #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) \ _Py_atomic_load_ssize_relaxed(&value) +#define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) \ + _Py_atomic_load_ptr_acquire(&value) #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ _Py_atomic_store_ptr_relaxed(&value, new_value) #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ _Py_atomic_store_ptr_release(&value, new_value) #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \ _Py_atomic_store_ssize_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \ + _Py_atomic_store_uint8_relaxed(&value, new_value) #else #define FT_ATOMIC_LOAD_SSIZE(value) value #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value +#define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) value #define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value #define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value + #endif #ifdef __cplusplus diff --git a/Lib/test/test_free_threading/__init__.py b/Lib/test/test_free_threading/__init__.py new file mode 100644 index 000000000000000..9a89d27ba9f979d --- /dev/null +++ b/Lib/test/test_free_threading/__init__.py @@ -0,0 +1,7 @@ +import os + +from test import support + + +def load_tests(*args): + return support.load_package_tests(os.path.dirname(__file__), *args) diff --git a/Lib/test/test_free_threading/test_monitoring.py b/Lib/test/test_free_threading/test_monitoring.py new file mode 100644 index 000000000000000..b10a0e39053c1c3 --- /dev/null +++ b/Lib/test/test_free_threading/test_monitoring.py @@ -0,0 +1,232 @@ +"""Tests monitoring, sys.settrace, and sys.setprofile in a multi-threaded +environmenet to verify things are thread-safe in a free-threaded build""" + +import sys +import time +import unittest +import weakref + +from sys import monitoring +from test.support import is_wasi +from threading import Thread +from unittest import TestCase + + +class InstrumentationMultiThreadedMixin: + if not hasattr(sys, "gettotalrefcount"): + thread_count = 50 + func_count = 1000 + fib = 15 + else: + # Run a little faster in debug builds... + thread_count = 25 + func_count = 500 + fib = 15 + + def after_threads(self): + """Runs once after all the threads have started""" + pass + + def during_threads(self): + """Runs repeatedly while the threads are still running""" + pass + + def work(self, n, funcs): + """Fibonacci function which also calls a bunch of random functions""" + for func in funcs: + func() + if n < 2: + return n + return self.work(n - 1, funcs) + self.work(n - 2, funcs) + + def start_work(self, n, funcs): + # With the GIL builds we need to make sure that the hooks have + # a chance to run as it's possible to run w/o releasing the GIL. + time.sleep(1) + self.work(n, funcs) + + def after_test(self): + """Runs once after the test is done""" + pass + + def test_instrumention(self): + # Setup a bunch of functions which will need instrumentation... + funcs = [] + for i in range(self.func_count): + x = {} + exec("def f(): pass", x) + funcs.append(x["f"]) + + threads = [] + for i in range(self.thread_count): + # Each thread gets a copy of the func list to avoid contention + t = Thread(target=self.start_work, args=(self.fib, list(funcs))) + t.start() + threads.append(t) + + self.after_threads() + + while True: + any_alive = False + for t in threads: + if t.is_alive(): + any_alive = True + break + + if not any_alive: + break + + self.during_threads() + + self.after_test() + + +class MonitoringTestMixin: + def setUp(self): + for i in range(6): + if monitoring.get_tool(i) is None: + self.tool_id = i + monitoring.use_tool_id(i, self.__class__.__name__) + break + + def tearDown(self): + monitoring.free_tool_id(self.tool_id) + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class SetPreTraceMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): + """Sets tracing one time after the threads have started""" + + def setUp(self): + super().setUp() + self.called = False + + def after_test(self): + self.assertTrue(self.called) + + def trace_func(self, frame, event, arg): + self.called = True + return self.trace_func + + def after_threads(self): + sys.settrace(self.trace_func) + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class MonitoringMultiThreaded( + MonitoringTestMixin, InstrumentationMultiThreadedMixin, TestCase +): + """Uses sys.monitoring and repeatedly toggles instrumentation on and off""" + + def setUp(self): + super().setUp() + self.set = False + self.called = False + monitoring.register_callback( + self.tool_id, monitoring.events.LINE, self.callback + ) + + def tearDown(self): + monitoring.set_events(self.tool_id, 0) + super().tearDown() + + def callback(self, *args): + self.called = True + + def after_test(self): + self.assertTrue(self.called) + + def during_threads(self): + if self.set: + monitoring.set_events( + self.tool_id, monitoring.events.CALL | monitoring.events.LINE + ) + else: + monitoring.set_events(self.tool_id, 0) + self.set = not self.set + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class SetTraceMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): + """Uses sys.settrace and repeatedly toggles instrumentation on and off""" + + def setUp(self): + self.set = False + self.called = False + + def after_test(self): + self.assertTrue(self.called) + + def tearDown(self): + sys.settrace(None) + + def trace_func(self, frame, event, arg): + self.called = True + return self.trace_func + + def during_threads(self): + if self.set: + sys.settrace(self.trace_func) + else: + sys.settrace(None) + self.set = not self.set + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class SetProfileMultiThreaded(InstrumentationMultiThreadedMixin, TestCase): + """Uses sys.setprofile and repeatedly toggles instrumentation on and off""" + thread_count = 25 + func_count = 200 + fib = 15 + + def setUp(self): + self.set = False + self.called = False + + def after_test(self): + self.assertTrue(self.called) + + def tearDown(self): + sys.setprofile(None) + + def trace_func(self, frame, event, arg): + self.called = True + return self.trace_func + + def during_threads(self): + if self.set: + sys.setprofile(self.trace_func) + else: + sys.setprofile(None) + self.set = not self.set + + +@unittest.skipIf(is_wasi, "WASI has no threads.") +class MonitoringMisc(MonitoringTestMixin, TestCase): + def register_callback(self): + def callback(*args): + pass + + for i in range(200): + monitoring.register_callback(self.tool_id, monitoring.events.LINE, callback) + + self.refs.append(weakref.ref(callback)) + + def test_register_callback(self): + self.refs = [] + threads = [] + for i in range(50): + t = Thread(target=self.register_callback) + t.start() + threads.append(t) + + for thread in threads: + thread.join() + + monitoring.register_callback(self.tool_id, monitoring.events.LINE, None) + for ref in self.refs: + self.assertEqual(ref(), None) + + +if __name__ == "__main__": + unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index 3cf4de08a0c8427..77f957b6604a86c 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2374,6 +2374,7 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_doctest \ test/test_email \ test/test_email/data \ + test/test_free_threading \ test/test_future_stmt \ test/test_gdb \ test/test_import \ diff --git a/Python/instrumentation.c b/Python/instrumentation.c index 018cd662b1561af..5b47a353394c782 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -6,6 +6,7 @@ #include "pycore_call.h" #include "pycore_ceval.h" // _PY_EVAL_EVENTS_BITS #include "pycore_code.h" // _PyCode_Clear_Executors() +#include "pycore_critical_section.h" #include "pycore_frame.h" #include "pycore_interp.h" #include "pycore_long.h" @@ -13,12 +14,22 @@ #include "pycore_namespace.h" #include "pycore_object.h" #include "pycore_opcode_metadata.h" // IS_VALID_OPCODE, _PyOpcode_Caches +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_UINT8_RELAXED #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyInterpreterState_GET() /* Uncomment this to dump debugging output when assertions fail */ // #define INSTRUMENT_DEBUG 1 +#if defined(Py_DEBUG) && defined(Py_GIL_DISABLED) +#define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) \ + if (!_PyInterpreterState_GET()->stoptheworld.world_stopped) { \ + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(obj); \ + } +#else +#define ASSERT_WORLD_STOPPED_OR_LOCKED(obj) +#endif + PyObject _PyInstrumentation_DISABLE = _PyObject_HEAD_INIT(&PyBaseObject_Type); PyObject _PyInstrumentation_MISSING = _PyObject_HEAD_INIT(&PyBaseObject_Type); @@ -275,17 +286,39 @@ compute_line(PyCodeObject *code, int offset, int8_t line_delta) return PyCode_Addr2Line(code, offset * sizeof(_Py_CODEUNIT)); } +static inline _PyCoMonitoringData * +get_monitoring(PyCodeObject *code) { + return FT_ATOMIC_LOAD_PTR_ACQUIRE(code->_co_monitoring); +} + +static inline _PyCoLineInstrumentationData * +get_lines_data(PyCodeObject *code) +{ + _PyCoMonitoringData *monitoring = get_monitoring(code); + return FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->lines); +} + +static inline uint8_t * +get_per_instruction_opcodes(PyCodeObject *code) +{ + _PyCoMonitoringData *monitoring = get_monitoring(code); + return FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->per_instruction_opcodes); +} + int _PyInstruction_GetLength(PyCodeObject *code, int offset) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + + _PyCoMonitoringData *monitoring = get_monitoring(code); int opcode = _PyCode_CODE(code)[offset].op.code; assert(opcode != 0); assert(opcode != RESERVED); if (opcode == INSTRUMENTED_LINE) { - opcode = code->_co_monitoring->lines[offset].original_opcode; + opcode = monitoring->lines[offset].original_opcode; } if (opcode == INSTRUMENTED_INSTRUCTION) { - opcode = code->_co_monitoring->per_instruction_opcodes[offset]; + opcode = monitoring->per_instruction_opcodes[offset]; } int deinstrumented = DE_INSTRUMENT[opcode]; if (deinstrumented) { @@ -424,6 +457,7 @@ dump_instrumentation_data(PyCodeObject *code, int star, FILE*out) } #define CHECK(test) do { \ + ASSERT_WORLD_STOPPED_OR_LOCKED(code); \ if (!(test)) { \ dump_instrumentation_data(code, i, stderr); \ } \ @@ -449,6 +483,8 @@ valid_opcode(int opcode) static void sanity_check_instrumentation(PyCodeObject *code) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + _PyCoMonitoringData *data = code->_co_monitoring; if (data == NULL) { return; @@ -551,10 +587,12 @@ int _Py_GetBaseOpcode(PyCodeObject *code, int i) { int opcode = _PyCode_CODE(code)[i].op.code; if (opcode == INSTRUMENTED_LINE) { - opcode = code->_co_monitoring->lines[i].original_opcode; + _PyCoLineInstrumentationData *lines = get_lines_data(code); + opcode = lines[i].original_opcode; } if (opcode == INSTRUMENTED_INSTRUCTION) { - opcode = code->_co_monitoring->per_instruction_opcodes[i]; + uint8_t *per_instr_opcodes = get_per_instruction_opcodes(code); + opcode = per_instr_opcodes[i]; } CHECK(opcode != INSTRUMENTED_INSTRUCTION); CHECK(opcode != INSTRUMENTED_LINE); @@ -588,7 +626,7 @@ de_instrument(PyCodeObject *code, int i, int event) return; } CHECK(_PyOpcode_Deopt[deinstrumented] == deinstrumented); - *opcode_ptr = deinstrumented; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, deinstrumented); if (_PyOpcode_Caches[deinstrumented]) { instr[1].cache = adaptive_counter_warmup(); } @@ -632,7 +670,7 @@ de_instrument_per_instruction(PyCodeObject *code, int i) int original_opcode = code->_co_monitoring->per_instruction_opcodes[i]; CHECK(original_opcode != 0); CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]); - *opcode_ptr = original_opcode; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, original_opcode); if (_PyOpcode_Caches[original_opcode]) { instr[1].cache = adaptive_counter_warmup(); } @@ -665,7 +703,7 @@ instrument(PyCodeObject *code, int i) int deopt = _PyOpcode_Deopt[opcode]; int instrumented = INSTRUMENTED_OPCODES[deopt]; assert(instrumented); - *opcode_ptr = instrumented; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, instrumented); if (_PyOpcode_Caches[deopt]) { instr[1].cache = adaptive_counter_warmup(); } @@ -683,7 +721,7 @@ instrument_line(PyCodeObject *code, int i) _PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i]; lines->original_opcode = _PyOpcode_Deopt[opcode]; CHECK(lines->original_opcode > 0); - *opcode_ptr = INSTRUMENTED_LINE; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_LINE); } static void @@ -712,12 +750,13 @@ instrument_per_instruction(PyCodeObject *code, int i) code->_co_monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode]; } assert(code->_co_monitoring->per_instruction_opcodes[i] > 0); - *opcode_ptr = INSTRUMENTED_INSTRUCTION; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_INSTRUCTION); } static void remove_tools(PyCodeObject * code, int offset, int event, int tools) { + Py_BEGIN_CRITICAL_SECTION(code); assert(event != PY_MONITORING_EVENT_LINE); assert(event != PY_MONITORING_EVENT_INSTRUCTION); assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event)); @@ -737,6 +776,7 @@ remove_tools(PyCodeObject * code, int offset, int event, int tools) de_instrument(code, offset, event); } } + Py_END_CRITICAL_SECTION(); } #ifndef NDEBUG @@ -752,6 +792,7 @@ tools_is_subset_for_event(PyCodeObject * code, int event, int tools) static void remove_line_tools(PyCodeObject * code, int offset, int tools) { + Py_BEGIN_CRITICAL_SECTION(code); assert(code->_co_monitoring); if (code->_co_monitoring->line_tools) { @@ -769,11 +810,13 @@ remove_line_tools(PyCodeObject * code, int offset, int tools) de_instrument_line(code, offset); } } + Py_END_CRITICAL_SECTION(); } static void add_tools(PyCodeObject * code, int offset, int event, int tools) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); assert(event != PY_MONITORING_EVENT_LINE); assert(event != PY_MONITORING_EVENT_INSTRUCTION); assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event)); @@ -794,6 +837,8 @@ add_tools(PyCodeObject * code, int offset, int event, int tools) static void add_line_tools(PyCodeObject * code, int offset, int tools) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(tools_is_subset_for_event(code, PY_MONITORING_EVENT_LINE, tools)); assert(code->_co_monitoring); if (code->_co_monitoring->line_tools) { @@ -810,6 +855,8 @@ add_line_tools(PyCodeObject * code, int offset, int tools) static void add_per_instruction_tools(PyCodeObject * code, int offset, int tools) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(tools_is_subset_for_event(code, PY_MONITORING_EVENT_INSTRUCTION, tools)); assert(code->_co_monitoring); if (code->_co_monitoring->per_instruction_tools) { @@ -826,6 +873,8 @@ add_per_instruction_tools(PyCodeObject * code, int offset, int tools) static void remove_per_instruction_tools(PyCodeObject * code, int offset, int tools) { + Py_BEGIN_CRITICAL_SECTION(code); + assert(code->_co_monitoring); if (code->_co_monitoring->per_instruction_tools) { uint8_t *toolsptr = &code->_co_monitoring->per_instruction_tools[offset]; @@ -842,6 +891,7 @@ remove_per_instruction_tools(PyCodeObject * code, int offset, int tools) de_instrument_per_instruction(code, offset); } } + Py_END_CRITICAL_SECTION(); } @@ -969,8 +1019,9 @@ get_tools_for_instruction(PyCodeObject *code, PyInterpreterState *interp, int i, if (PY_MONITORING_IS_INSTRUMENTED_EVENT(event)) { CHECK(is_version_up_to_date(code, interp)); CHECK(instrumentation_cross_checks(interp, code)); - if (code->_co_monitoring->tools) { - tools = code->_co_monitoring->tools[i]; + uint8_t *monitoring_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(code->_co_monitoring->tools); + if (monitoring_tools) { + tools = monitoring_tools[i]; } else { tools = code->_co_monitoring->active_monitors.tools[event]; @@ -1149,7 +1200,7 @@ _Py_call_instrumentation_exc2( int _Py_Instrumentation_GetLine(PyCodeObject *code, int index) { - _PyCoMonitoringData *monitoring = code->_co_monitoring; + _PyCoMonitoringData *monitoring = get_monitoring(code); assert(monitoring != NULL); assert(monitoring->lines != NULL); assert(index >= code->_co_firsttraceable); @@ -1168,7 +1219,7 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, assert(instrumentation_cross_checks(tstate->interp, code)); int i = (int)(instr - _PyCode_CODE(code)); - _PyCoMonitoringData *monitoring = code->_co_monitoring; + _PyCoMonitoringData *monitoring = get_monitoring(code); _PyCoLineInstrumentationData *line_data = &monitoring->lines[i]; if (tstate->tracing) { goto done; @@ -1189,10 +1240,12 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, goto done; } } - uint8_t tools = code->_co_monitoring->line_tools != NULL ? - code->_co_monitoring->line_tools[i] : + + uint8_t *line_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(monitoring->line_tools); + uint8_t tools = line_tools != NULL ? + line_tools[i] : (interp->monitors.tools[PY_MONITORING_EVENT_LINE] | - code->_co_monitoring->local_monitors.tools[PY_MONITORING_EVENT_LINE] + monitoring->local_monitors.tools[PY_MONITORING_EVENT_LINE] ); /* Special case sys.settrace to avoid boxing the line number, * only to immediately unbox it. */ @@ -1269,15 +1322,17 @@ _Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame* assert(is_version_up_to_date(code, tstate->interp)); assert(instrumentation_cross_checks(tstate->interp, code)); int offset = (int)(instr - _PyCode_CODE(code)); - _PyCoMonitoringData *instrumentation_data = code->_co_monitoring; - assert(instrumentation_data->per_instruction_opcodes); - int next_opcode = instrumentation_data->per_instruction_opcodes[offset]; + _PyCoMonitoringData *instr_data = get_monitoring(code); + uint8_t *per_instr_opcodes = FT_ATOMIC_LOAD_PTR_ACQUIRE(instr_data->per_instruction_opcodes); + assert(per_instr_opcodes); + int next_opcode = per_instr_opcodes[offset]; if (tstate->tracing) { return next_opcode; } PyInterpreterState *interp = tstate->interp; - uint8_t tools = instrumentation_data->per_instruction_tools != NULL ? - instrumentation_data->per_instruction_tools[offset] : + uint8_t *per_instr_tools = FT_ATOMIC_LOAD_PTR_ACQUIRE(instr_data->per_instruction_tools); + uint8_t tools = per_instr_tools != NULL ? + per_instr_tools[offset] : (interp->monitors.tools[PY_MONITORING_EVENT_INSTRUCTION] | code->_co_monitoring->local_monitors.tools[PY_MONITORING_EVENT_INSTRUCTION] ); @@ -1320,15 +1375,23 @@ _PyMonitoring_RegisterCallback(int tool_id, int event_id, PyObject *obj) PyInterpreterState *is = _PyInterpreterState_GET(); assert(0 <= tool_id && tool_id < PY_MONITORING_TOOL_IDS); assert(0 <= event_id && event_id < _PY_MONITORING_EVENTS); +#ifdef Py_GIL_DISABLED + PyObject *callback = _Py_atomic_exchange_ptr( + &is->monitoring_callables[tool_id][event_id], + Py_XNewRef(obj) + ); +#else PyObject *callback = is->monitoring_callables[tool_id][event_id]; is->monitoring_callables[tool_id][event_id] = Py_XNewRef(obj); +#endif return callback; } static void -initialize_tools(PyCodeObject *code) +initialize_tools(PyCodeObject *code, uint8_t* tools) { - uint8_t* tools = code->_co_monitoring->tools; + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(tools != NULL); int code_len = (int)Py_SIZE(code); for (int i = 0; i < code_len; i++) { @@ -1382,9 +1445,10 @@ initialize_tools(PyCodeObject *code) #define NO_LINE -128 static void -initialize_lines(PyCodeObject *code) +initialize_lines(PyCodeObject *code, _PyCoLineInstrumentationData *line_data) { - _PyCoLineInstrumentationData *line_data = code->_co_monitoring->lines; + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(line_data != NULL); int code_len = (int)Py_SIZE(code); PyCodeAddressRange range; @@ -1499,9 +1563,10 @@ initialize_lines(PyCodeObject *code) } static void -initialize_line_tools(PyCodeObject *code, _Py_LocalMonitors *all_events) +initialize_line_tools(PyCodeObject *code, uint8_t *line_tools, _Py_LocalMonitors *all_events) { - uint8_t *line_tools = code->_co_monitoring->line_tools; + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + assert(line_tools != NULL); int code_len = (int)Py_SIZE(code); for (int i = 0; i < code_len; i++) { @@ -1514,18 +1579,19 @@ allocate_instrumentation_data(PyCodeObject *code) { if (code->_co_monitoring == NULL) { - code->_co_monitoring = PyMem_Malloc(sizeof(_PyCoMonitoringData)); - if (code->_co_monitoring == NULL) { + _PyCoMonitoringData *monitoring = PyMem_Malloc(sizeof(_PyCoMonitoringData)); + if (monitoring == NULL) { PyErr_NoMemory(); return -1; } - code->_co_monitoring->local_monitors = (_Py_LocalMonitors){ 0 }; - code->_co_monitoring->active_monitors = (_Py_LocalMonitors){ 0 }; - code->_co_monitoring->tools = NULL; - code->_co_monitoring->lines = NULL; - code->_co_monitoring->line_tools = NULL; - code->_co_monitoring->per_instruction_opcodes = NULL; - code->_co_monitoring->per_instruction_tools = NULL; + monitoring->local_monitors = (_Py_LocalMonitors){ 0 }; + monitoring->active_monitors = (_Py_LocalMonitors){ 0 }; + monitoring->tools = NULL; + monitoring->lines = NULL; + monitoring->line_tools = NULL; + monitoring->per_instruction_opcodes = NULL; + monitoring->per_instruction_tools = NULL; + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring, monitoring); } return 0; } @@ -1533,6 +1599,8 @@ allocate_instrumentation_data(PyCodeObject *code) static int update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + int code_len = (int)Py_SIZE(code); if (allocate_instrumentation_data(code)) { return -1; @@ -1542,61 +1610,70 @@ update_instrumentation_data(PyCodeObject *code, PyInterpreterState *interp) code->_co_monitoring->local_monitors); bool multitools = multiple_tools(&all_events); if (code->_co_monitoring->tools == NULL && multitools) { - code->_co_monitoring->tools = PyMem_Malloc(code_len); - if (code->_co_monitoring->tools == NULL) { + uint8_t *tools = PyMem_Malloc(code_len); + if (tools == NULL) { PyErr_NoMemory(); return -1; } - initialize_tools(code); + initialize_tools(code, tools); + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->tools, tools); } if (all_events.tools[PY_MONITORING_EVENT_LINE]) { if (code->_co_monitoring->lines == NULL) { - code->_co_monitoring->lines = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); - if (code->_co_monitoring->lines == NULL) { + _PyCoLineInstrumentationData *lines = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); + if (lines == NULL) { PyErr_NoMemory(); return -1; } - initialize_lines(code); + initialize_lines(code, lines); + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->lines, lines); } if (multitools && code->_co_monitoring->line_tools == NULL) { - code->_co_monitoring->line_tools = PyMem_Malloc(code_len); - if (code->_co_monitoring->line_tools == NULL) { + uint8_t *line_tools = PyMem_Malloc(code_len); + if (line_tools == NULL) { PyErr_NoMemory(); return -1; } - initialize_line_tools(code, &all_events); + initialize_line_tools(code, line_tools, &all_events); + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->line_tools, line_tools); } } if (all_events.tools[PY_MONITORING_EVENT_INSTRUCTION]) { if (code->_co_monitoring->per_instruction_opcodes == NULL) { - code->_co_monitoring->per_instruction_opcodes = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); - if (code->_co_monitoring->per_instruction_opcodes == NULL) { + uint8_t *per_instruction_opcodes = PyMem_Malloc(code_len * sizeof(_PyCoLineInstrumentationData)); + if (per_instruction_opcodes == NULL) { PyErr_NoMemory(); return -1; } /* This may not be necessary, as we can initialize this memory lazily, but it helps catch errors. */ for (int i = 0; i < code_len; i++) { - code->_co_monitoring->per_instruction_opcodes[i] = 0; + per_instruction_opcodes[i] = 0; } + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->per_instruction_opcodes, + per_instruction_opcodes); } if (multitools && code->_co_monitoring->per_instruction_tools == NULL) { - code->_co_monitoring->per_instruction_tools = PyMem_Malloc(code_len); - if (code->_co_monitoring->per_instruction_tools == NULL) { + uint8_t *per_instruction_tools = PyMem_Malloc(code_len); + if (per_instruction_tools == NULL) { PyErr_NoMemory(); return -1; } /* This may not be necessary, as we can initialize this memory lazily, but it helps catch errors. */ for (int i = 0; i < code_len; i++) { - code->_co_monitoring->per_instruction_tools[i] = 0; + per_instruction_tools[i] = 0; } + FT_ATOMIC_STORE_PTR_RELEASE(code->_co_monitoring->per_instruction_tools, + per_instruction_tools); } } return 0; } -int -_Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) +static int +instrument_lock_held(PyCodeObject *code, PyInterpreterState *interp) { + ASSERT_WORLD_STOPPED_OR_LOCKED(code); + if (is_version_up_to_date(code, interp)) { assert( interp->ceval.instrumentation_version == 0 || @@ -1736,6 +1813,16 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) return 0; } +int +_Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) +{ + int res; + Py_BEGIN_CRITICAL_SECTION(code); + res = instrument_lock_held(code, interp); + Py_END_CRITICAL_SECTION(); + return res; +} + #define C_RETURN_EVENTS \ ((1 << PY_MONITORING_EVENT_C_RETURN) | \ (1 << PY_MONITORING_EVENT_C_RAISE)) @@ -1746,6 +1833,10 @@ _Py_Instrument(PyCodeObject *code, PyInterpreterState *interp) static int instrument_all_executing_code_objects(PyInterpreterState *interp) { +#ifdef Py_GIL_DISABLED + assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); +#endif + _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); PyThreadState* ts = PyInterpreterState_ThreadHead(interp); @@ -1754,7 +1845,7 @@ instrument_all_executing_code_objects(PyInterpreterState *interp) { _PyInterpreterFrame *frame = ts->current_frame; while (frame) { if (frame->owner != FRAME_OWNED_BY_CSTACK) { - if (_Py_Instrument(_PyFrame_GetCode(frame), interp)) { + if (instrument_lock_held(_PyFrame_GetCode(frame), interp)) { return -1; } } @@ -1817,19 +1908,24 @@ _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events) if (check_tool(interp, tool_id)) { return -1; } + _PyEval_StopTheWorld(interp); uint32_t existing_events = get_events(&interp->monitors, tool_id); if (existing_events == events) { + _PyEval_StartTheWorld(interp); return 0; } set_events(&interp->monitors, tool_id, events); uint32_t new_version = global_version(interp) + MONITORING_VERSION_INCREMENT; if (new_version == 0) { PyErr_Format(PyExc_OverflowError, "events set too many times"); + _PyEval_StartTheWorld(interp); return -1; } set_global_version(tstate, new_version); _Py_Executors_InvalidateAll(interp, 1); - return instrument_all_executing_code_objects(interp); + int res = instrument_all_executing_code_objects(interp); + _PyEval_StartTheWorld(interp); + return res; } int @@ -2158,15 +2254,21 @@ monitoring_restart_events_impl(PyObject *module) */ PyThreadState *tstate = _PyThreadState_GET(); PyInterpreterState *interp = tstate->interp; + + _PyEval_StopTheWorld(interp); uint32_t restart_version = global_version(interp) + MONITORING_VERSION_INCREMENT; uint32_t new_version = restart_version + MONITORING_VERSION_INCREMENT; if (new_version <= MONITORING_VERSION_INCREMENT) { PyErr_Format(PyExc_OverflowError, "events set too many times"); + _PyEval_StartTheWorld(interp); return NULL; } interp->last_restart_version = restart_version; set_global_version(tstate, new_version); - if (instrument_all_executing_code_objects(interp)) { + int res = instrument_all_executing_code_objects(interp); + _PyEval_StartTheWorld(interp); + + if (res) { return NULL; } Py_RETURN_NONE; diff --git a/Python/legacy_tracing.c b/Python/legacy_tracing.c index ccbb3eb3f7c82a2..ce25e7256643451 100644 --- a/Python/legacy_tracing.c +++ b/Python/legacy_tracing.c @@ -16,6 +16,13 @@ typedef struct _PyLegacyEventHandler { int event; } _PyLegacyEventHandler; +#ifdef Py_GIL_DISABLED +#define LOCK_SETUP() PyMutex_Lock(&_PyRuntime.ceval.sys_trace_profile_mutex); +#define UNLOCK_SETUP() PyMutex_Unlock(&_PyRuntime.ceval.sys_trace_profile_mutex); +#else +#define LOCK_SETUP() +#define UNLOCK_SETUP() +#endif /* The Py_tracefunc function expects the following arguments: * obj: the trace object (PyObject *) * frame: the current frame (PyFrameObject *) @@ -414,19 +421,10 @@ is_tstate_valid(PyThreadState *tstate) } #endif -int -_PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) +static Py_ssize_t +setup_profile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg, PyObject **old_profileobj) { - assert(is_tstate_valid(tstate)); - /* The caller must hold the GIL */ - assert(PyGILState_Check()); - - /* Call _PySys_Audit() in the context of the current thread state, - even if tstate is not the current thread state. */ - PyThreadState *current_tstate = _PyThreadState_GET(); - if (_PySys_Audit(current_tstate, "sys.setprofile", NULL) < 0) { - return -1; - } + *old_profileobj = NULL; /* Setup PEP 669 monitoring callbacks and events. */ if (!tstate->interp->sys_profile_initialized) { tstate->interp->sys_profile_initialized = true; @@ -469,25 +467,15 @@ _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) int delta = (func != NULL) - (tstate->c_profilefunc != NULL); tstate->c_profilefunc = func; - PyObject *old_profileobj = tstate->c_profileobj; + *old_profileobj = tstate->c_profileobj; tstate->c_profileobj = Py_XNewRef(arg); - Py_XDECREF(old_profileobj); tstate->interp->sys_profiling_threads += delta; assert(tstate->interp->sys_profiling_threads >= 0); - - uint32_t events = 0; - if (tstate->interp->sys_profiling_threads) { - events = - (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) | - (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) | - (1 << PY_MONITORING_EVENT_CALL) | (1 << PY_MONITORING_EVENT_PY_UNWIND) | - (1 << PY_MONITORING_EVENT_PY_THROW); - } - return _PyMonitoring_SetEvents(PY_MONITORING_SYS_PROFILE_ID, events); + return tstate->interp->sys_profiling_threads; } int -_PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) +_PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) { assert(is_tstate_valid(tstate)); /* The caller must hold the GIL */ @@ -496,11 +484,32 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) /* Call _PySys_Audit() in the context of the current thread state, even if tstate is not the current thread state. */ PyThreadState *current_tstate = _PyThreadState_GET(); - if (_PySys_Audit(current_tstate, "sys.settrace", NULL) < 0) { + if (_PySys_Audit(current_tstate, "sys.setprofile", NULL) < 0) { return -1; } - assert(tstate->interp->sys_tracing_threads >= 0); + // needs to be decref'd outside of the lock + PyObject *old_profileobj; + LOCK_SETUP(); + int profiling_threads = setup_profile(tstate, func, arg, &old_profileobj); + UNLOCK_SETUP(); + Py_XDECREF(old_profileobj); + + uint32_t events = 0; + if (profiling_threads) { + events = + (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) | + (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) | + (1 << PY_MONITORING_EVENT_CALL) | (1 << PY_MONITORING_EVENT_PY_UNWIND) | + (1 << PY_MONITORING_EVENT_PY_THROW); + } + return _PyMonitoring_SetEvents(PY_MONITORING_SYS_PROFILE_ID, events); +} + +static Py_ssize_t +setup_tracing(PyThreadState *tstate, Py_tracefunc func, PyObject *arg, PyObject **old_traceobj) +{ + *old_traceobj = NULL; /* Setup PEP 669 monitoring callbacks and events. */ if (!tstate->interp->sys_trace_initialized) { tstate->interp->sys_trace_initialized = true; @@ -553,14 +562,40 @@ _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) int delta = (func != NULL) - (tstate->c_tracefunc != NULL); tstate->c_tracefunc = func; - PyObject *old_traceobj = tstate->c_traceobj; + *old_traceobj = tstate->c_traceobj; tstate->c_traceobj = Py_XNewRef(arg); - Py_XDECREF(old_traceobj); tstate->interp->sys_tracing_threads += delta; assert(tstate->interp->sys_tracing_threads >= 0); + return tstate->interp->sys_tracing_threads; +} + +int +_PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg) +{ + assert(is_tstate_valid(tstate)); + /* The caller must hold the GIL */ + assert(PyGILState_Check()); + + /* Call _PySys_Audit() in the context of the current thread state, + even if tstate is not the current thread state. */ + PyThreadState *current_tstate = _PyThreadState_GET(); + if (_PySys_Audit(current_tstate, "sys.settrace", NULL) < 0) { + return -1; + } + + assert(tstate->interp->sys_tracing_threads >= 0); + // needs to be decref'd outside of the lock + PyObject *old_traceobj; + LOCK_SETUP(); + int tracing_threads = setup_tracing(tstate, func, arg, &old_traceobj); + UNLOCK_SETUP(); + Py_XDECREF(old_traceobj); + if (tracing_threads < 0) { + return -1; + } uint32_t events = 0; - if (tstate->interp->sys_tracing_threads) { + if (tracing_threads) { events = (1 << PY_MONITORING_EVENT_PY_START) | (1 << PY_MONITORING_EVENT_PY_RESUME) | (1 << PY_MONITORING_EVENT_PY_RETURN) | (1 << PY_MONITORING_EVENT_PY_YIELD) | diff --git a/Python/pystate.c b/Python/pystate.c index 635616c5648c180..3310c918f58722c 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -392,6 +392,7 @@ _Py_COMP_DIAG_POP &(runtime)->unicode_state.ids.mutex, \ &(runtime)->imports.extensions.mutex, \ &(runtime)->ceval.pending_mainthread.mutex, \ + &(runtime)->ceval.sys_trace_profile_mutex, \ &(runtime)->atexit.mutex, \ &(runtime)->audit_hooks.mutex, \ &(runtime)->allocators.mutex, \