From 6d746082a750cfcd7f1b8aeb2aecba46505af602 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 5 Apr 2024 09:58:50 -0400 Subject: [PATCH] gh-117224: Move some large uops to external functions --- .gitattributes | 2 + Include/internal/pycore_executor_externals.h | 44 ++++ Makefile.pre.in | 6 +- PCbuild/_freeze_module.vcxproj | 1 + PCbuild/_freeze_module.vcxproj.filters | 1 + PCbuild/pythoncore.vcxproj | 2 + PCbuild/pythoncore.vcxproj.filters | 6 + Python/bytecodes.c | 10 +- Python/ceval.c | 1 + Python/executor_cases.c.h | 183 +-------------- Python/executor_externals.c | 232 +++++++++++++++++++ Python/jit.c | 1 + Tools/cases_generator/analyzer.py | 4 + Tools/cases_generator/lexer.py | 1 + Tools/cases_generator/tier2_generator.py | 145 +++++++++++- Tools/jit/template.c | 1 + 16 files changed, 451 insertions(+), 189 deletions(-) create mode 100644 Include/internal/pycore_executor_externals.h create mode 100644 Python/executor_externals.c diff --git a/.gitattributes b/.gitattributes index 5b81d2cb3c90e9..08ca3f786a4fa8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -73,6 +73,7 @@ Doc/data/stable_abi.dat generated Doc/library/token-list.inc generated Include/internal/pycore_ast.h generated Include/internal/pycore_ast_state.h generated +Include/internal/pycore_executor_externals.h generated Include/internal/pycore_opcode.h generated Include/internal/pycore_opcode_metadata.h generated Include/internal/pycore_*_generated.h generated @@ -96,6 +97,7 @@ Parser/token.c generated Programs/test_frozenmain.h generated Python/Python-ast.c generated Python/executor_cases.c.h generated +Python/executor_externals.c generated Python/generated_cases.c.h generated Python/optimizer_cases.c.h generated Python/opcode_targets.h generated diff --git a/Include/internal/pycore_executor_externals.h b/Include/internal/pycore_executor_externals.h new file mode 100644 index 00000000000000..0cd80465035691 --- /dev/null +++ b/Include/internal/pycore_executor_externals.h @@ -0,0 +1,44 @@ +// This file is generated by Tools/cases_generator/tier2_generator.py +// from: +// Python/bytecodes.c +// Do not edit! + +#ifndef Py_EXECUTOR_EXTERNALS_H +#define Py_EXECUTOR_EXTERNALS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "Python.h" + +#include "pytypedefs.h" +#include "pycore_frame.h" + + PyObject ** _COPY_FREE_VARS_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer, int oparg); + + PyObject ** _INIT_CALL_BOUND_METHOD_EXACT_ARGS_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer, int oparg); + + PyObject ** _INIT_CALL_PY_EXACT_ARGS_0_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer); + + PyObject ** _INIT_CALL_PY_EXACT_ARGS_1_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer); + + PyObject ** _INIT_CALL_PY_EXACT_ARGS_2_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer); + + PyObject ** _INIT_CALL_PY_EXACT_ARGS_3_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer); + + PyObject ** _INIT_CALL_PY_EXACT_ARGS_4_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer); + + PyObject ** _INIT_CALL_PY_EXACT_ARGS_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer, int oparg); + + PyObject ** _SET_FUNCTION_ATTRIBUTE_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer, int oparg); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/Makefile.pre.in b/Makefile.pre.in index 5b89d6ba1acf71..4860236fd16090 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -422,6 +422,7 @@ PYTHON_OBJS= \ Python/crossinterp.o \ Python/dynamic_annotations.o \ Python/errors.o \ + Python/executor_externals.o \ Python/flowgraph.o \ Python/frame.o \ Python/frozenmain.o \ @@ -1145,6 +1146,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_dict_state.h \ $(srcdir)/Include/internal/pycore_dtoa.h \ $(srcdir)/Include/internal/pycore_exceptions.h \ + $(srcdir)/Include/internal/pycore_executor_externals.h \ $(srcdir)/Include/internal/pycore_faulthandler.h \ $(srcdir)/Include/internal/pycore_fileutils.h \ $(srcdir)/Include/internal/pycore_floatobject.h \ @@ -1905,8 +1907,10 @@ regen-generated-cases: .PHONY: regen-executor-cases regen-executor-cases: $(PYTHON_FOR_REGEN) $(srcdir)/Tools/cases_generator/tier2_generator.py \ - -o $(srcdir)/Python/executor_cases.c.h.new $(srcdir)/Python/bytecodes.c + -o $(srcdir)/Python/executor_cases.c.h.new $(srcdir)/Python/executor_externals.c.new $(srcdir)/Include/internal/pycore_executor_externals.h.new $(srcdir)/Python/bytecodes.c $(UPDATE_FILE) $(srcdir)/Python/executor_cases.c.h $(srcdir)/Python/executor_cases.c.h.new + $(UPDATE_FILE) $(srcdir)/Python/executor_externals.c $(srcdir)/Python/executor_externals.c.new + $(UPDATE_FILE) $(srcdir)/Include/internal/pycore_executor_externals.h $(srcdir)/Include/internal/pycore_executor_externals.h.new .PHONY: regen-optimizer-cases regen-optimizer-cases: diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 82471e0f140ec3..ed43a1252a62eb 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -202,6 +202,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 97c52fdadf7c05..296ed8cb623dcb 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -136,6 +136,7 @@ Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index c944bbafdba7e5..28f60c61c0a293 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -226,6 +226,7 @@ + @@ -563,6 +564,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 0afad125ce1e97..e48111c70def3f 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -510,6 +510,9 @@ Include\cpython + + Include\internal + Include\internal @@ -1283,6 +1286,9 @@ Python + + Python + Python diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 5cd9db97c71e37..1ec188190218f3 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1571,7 +1571,7 @@ dummy_func( Py_XDECREF(oldobj); } - inst(COPY_FREE_VARS, (--)) { + externalize inst(COPY_FREE_VARS, (--)) { /* Copy closure variables to free variables */ PyCodeObject *co = _PyFrame_GetCode(frame); assert(PyFunction_Check(frame->f_funcobj)); @@ -3129,7 +3129,7 @@ dummy_func( DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type); } - op(_INIT_CALL_BOUND_METHOD_EXACT_ARGS, (callable, unused, unused[oparg] -- func, self, unused[oparg])) { + externalize op(_INIT_CALL_BOUND_METHOD_EXACT_ARGS, (callable, unused, unused[oparg] -- func, self, unused[oparg])) { STAT_INC(CALL, hit); self = Py_NewRef(((PyMethodObject *)callable)->im_self); stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS @@ -3157,7 +3157,7 @@ dummy_func( DEOPT_IF(tstate->py_recursion_remaining <= 1); } - replicate(5) pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { + externalize replicate(5) pure op(_INIT_CALL_PY_EXACT_ARGS, (callable, self_or_null, args[oparg] -- new_frame: _PyInterpreterFrame*)) { int has_self = (self_or_null != NULL); STAT_INC(CALL, hit); PyFunctionObject *func = (PyFunctionObject *)callable; @@ -3538,7 +3538,7 @@ dummy_func( DISPATCH(); } - op(_CALL_METHOD_DESCRIPTOR_O, (callable, self_or_null, args[oparg] -- res)) { + op(_CALL_METHOD_DESCRIPTOR_O, (callable, self_or_null, args[oparg] -- res)) { int total_args = oparg; if (self_or_null != NULL) { args--; @@ -3847,7 +3847,7 @@ dummy_func( func = (PyObject *)func_obj; } - inst(SET_FUNCTION_ATTRIBUTE, (attr, func -- func)) { + externalize inst(SET_FUNCTION_ATTRIBUTE, (attr, func -- func)) { assert(PyFunction_Check(func)); PyFunctionObject *func_obj = (PyFunctionObject *)func; switch(oparg) { diff --git a/Python/ceval.c b/Python/ceval.c index d34db61eecbae2..f3fc703a7e7f83 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -8,6 +8,7 @@ #include "pycore_ceval.h" #include "pycore_code.h" #include "pycore_emscripten_signal.h" // _Py_CHECK_EMSCRIPTEN_SIGNALS +#include "pycore_executor_externals.h" #include "pycore_function.h" #include "pycore_instruments.h" #include "pycore_intrinsics.h" diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 224b600b8f6a4a..ccfd9cafc3d668 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1428,17 +1428,7 @@ } case _COPY_FREE_VARS: { - oparg = CURRENT_OPARG(); - /* Copy closure variables to free variables */ - PyCodeObject *co = _PyFrame_GetCode(frame); - assert(PyFunction_Check(frame->f_funcobj)); - PyObject *closure = ((PyFunctionObject *)frame->f_funcobj)->func_closure; - assert(oparg == co->co_nfreevars); - int offset = co->co_nlocalsplus - oparg; - for (int i = 0; i < oparg; ++i) { - PyObject *o = PyTuple_GET_ITEM(closure, i); - frame->localsplus[offset + i] = Py_NewRef(o); - } + stack_pointer = _COPY_FREE_VARS_func(tstate, frame, stack_pointer, CURRENT_OPARG()); break; } @@ -2711,19 +2701,7 @@ } case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: { - PyObject *callable; - PyObject *func; - PyObject *self; - oparg = CURRENT_OPARG(); - callable = stack_pointer[-2 - oparg]; - STAT_INC(CALL, hit); - self = Py_NewRef(((PyMethodObject *)callable)->im_self); - stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS - func = Py_NewRef(((PyMethodObject *)callable)->im_func); - stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization - Py_DECREF(callable); - stack_pointer[-2 - oparg] = func; - stack_pointer[-1 - oparg] = self; + stack_pointer = _INIT_CALL_BOUND_METHOD_EXACT_ARGS_func(tstate, frame, stack_pointer, CURRENT_OPARG()); break; } @@ -2759,145 +2737,32 @@ } case _INIT_CALL_PY_EXACT_ARGS_0: { - PyObject **args; - PyObject *self_or_null; - PyObject *callable; - _PyInterpreterFrame *new_frame; - oparg = 0; - assert(oparg == CURRENT_OPARG()); - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; - int has_self = (self_or_null != NULL); - STAT_INC(CALL, hit); - PyFunctionObject *func = (PyFunctionObject *)callable; - new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self); - PyObject **first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; - for (int i = 0; i < oparg; i++) { - first_non_self_local[i] = args[i]; - } - stack_pointer[-2 - oparg] = (PyObject *)new_frame; - stack_pointer += -1 - oparg; + stack_pointer = _INIT_CALL_PY_EXACT_ARGS_0_func(tstate, frame, stack_pointer); break; } case _INIT_CALL_PY_EXACT_ARGS_1: { - PyObject **args; - PyObject *self_or_null; - PyObject *callable; - _PyInterpreterFrame *new_frame; - oparg = 1; - assert(oparg == CURRENT_OPARG()); - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; - int has_self = (self_or_null != NULL); - STAT_INC(CALL, hit); - PyFunctionObject *func = (PyFunctionObject *)callable; - new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self); - PyObject **first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; - for (int i = 0; i < oparg; i++) { - first_non_self_local[i] = args[i]; - } - stack_pointer[-2 - oparg] = (PyObject *)new_frame; - stack_pointer += -1 - oparg; + stack_pointer = _INIT_CALL_PY_EXACT_ARGS_1_func(tstate, frame, stack_pointer); break; } case _INIT_CALL_PY_EXACT_ARGS_2: { - PyObject **args; - PyObject *self_or_null; - PyObject *callable; - _PyInterpreterFrame *new_frame; - oparg = 2; - assert(oparg == CURRENT_OPARG()); - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; - int has_self = (self_or_null != NULL); - STAT_INC(CALL, hit); - PyFunctionObject *func = (PyFunctionObject *)callable; - new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self); - PyObject **first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; - for (int i = 0; i < oparg; i++) { - first_non_self_local[i] = args[i]; - } - stack_pointer[-2 - oparg] = (PyObject *)new_frame; - stack_pointer += -1 - oparg; + stack_pointer = _INIT_CALL_PY_EXACT_ARGS_2_func(tstate, frame, stack_pointer); break; } case _INIT_CALL_PY_EXACT_ARGS_3: { - PyObject **args; - PyObject *self_or_null; - PyObject *callable; - _PyInterpreterFrame *new_frame; - oparg = 3; - assert(oparg == CURRENT_OPARG()); - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; - int has_self = (self_or_null != NULL); - STAT_INC(CALL, hit); - PyFunctionObject *func = (PyFunctionObject *)callable; - new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self); - PyObject **first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; - for (int i = 0; i < oparg; i++) { - first_non_self_local[i] = args[i]; - } - stack_pointer[-2 - oparg] = (PyObject *)new_frame; - stack_pointer += -1 - oparg; + stack_pointer = _INIT_CALL_PY_EXACT_ARGS_3_func(tstate, frame, stack_pointer); break; } case _INIT_CALL_PY_EXACT_ARGS_4: { - PyObject **args; - PyObject *self_or_null; - PyObject *callable; - _PyInterpreterFrame *new_frame; - oparg = 4; - assert(oparg == CURRENT_OPARG()); - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; - int has_self = (self_or_null != NULL); - STAT_INC(CALL, hit); - PyFunctionObject *func = (PyFunctionObject *)callable; - new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self); - PyObject **first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; - for (int i = 0; i < oparg; i++) { - first_non_self_local[i] = args[i]; - } - stack_pointer[-2 - oparg] = (PyObject *)new_frame; - stack_pointer += -1 - oparg; + stack_pointer = _INIT_CALL_PY_EXACT_ARGS_4_func(tstate, frame, stack_pointer); break; } case _INIT_CALL_PY_EXACT_ARGS: { - PyObject **args; - PyObject *self_or_null; - PyObject *callable; - _PyInterpreterFrame *new_frame; - oparg = CURRENT_OPARG(); - args = &stack_pointer[-oparg]; - self_or_null = stack_pointer[-1 - oparg]; - callable = stack_pointer[-2 - oparg]; - int has_self = (self_or_null != NULL); - STAT_INC(CALL, hit); - PyFunctionObject *func = (PyFunctionObject *)callable; - new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self); - PyObject **first_non_self_local = new_frame->localsplus + has_self; - new_frame->localsplus[0] = self_or_null; - for (int i = 0; i < oparg; i++) { - first_non_self_local[i] = args[i]; - } - stack_pointer[-2 - oparg] = (PyObject *)new_frame; - stack_pointer += -1 - oparg; + stack_pointer = _INIT_CALL_PY_EXACT_ARGS_func(tstate, frame, stack_pointer, CURRENT_OPARG()); break; } @@ -3393,37 +3258,7 @@ } case _SET_FUNCTION_ATTRIBUTE: { - PyObject *func; - PyObject *attr; - oparg = CURRENT_OPARG(); - func = stack_pointer[-1]; - attr = stack_pointer[-2]; - assert(PyFunction_Check(func)); - PyFunctionObject *func_obj = (PyFunctionObject *)func; - switch(oparg) { - case MAKE_FUNCTION_CLOSURE: - assert(func_obj->func_closure == NULL); - func_obj->func_closure = attr; - break; - case MAKE_FUNCTION_ANNOTATIONS: - assert(func_obj->func_annotations == NULL); - func_obj->func_annotations = attr; - break; - case MAKE_FUNCTION_KWDEFAULTS: - assert(PyDict_CheckExact(attr)); - assert(func_obj->func_kwdefaults == NULL); - func_obj->func_kwdefaults = attr; - break; - case MAKE_FUNCTION_DEFAULTS: - assert(PyTuple_CheckExact(attr)); - assert(func_obj->func_defaults == NULL); - func_obj->func_defaults = attr; - break; - default: - Py_UNREACHABLE(); - } - stack_pointer[-2] = func; - stack_pointer += -1; + stack_pointer = _SET_FUNCTION_ATTRIBUTE_func(tstate, frame, stack_pointer, CURRENT_OPARG()); break; } diff --git a/Python/executor_externals.c b/Python/executor_externals.c new file mode 100644 index 00000000000000..a493dbc99c8d52 --- /dev/null +++ b/Python/executor_externals.c @@ -0,0 +1,232 @@ +// This file is generated by Tools/cases_generator/tier2_generator.py +// from: +// Python/bytecodes.c +// Do not edit! + +#include "Python.h" + +#include "pycore_call.h" +#include "pycore_ceval.h" +#include "pycore_dict.h" +#include "pycore_emscripten_signal.h" +#include "pycore_executor_externals.h" +#include "pycore_intrinsics.h" +#include "pycore_long.h" +#include "pycore_opcode_metadata.h" +#include "pycore_opcode_utils.h" +#include "pycore_optimizer.h" +#include "pycore_range.h" +#include "pycore_setobject.h" +#include "pycore_sliceobject.h" +#include "pycore_descrobject.h" + +#include "ceval_macros.h" + +#define TIER_TWO 2 + + PyObject ** _COPY_FREE_VARS_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer, int oparg) { + /* Copy closure variables to free variables */ + PyCodeObject *co = _PyFrame_GetCode(frame); + assert(PyFunction_Check(frame->f_funcobj)); + PyObject *closure = ((PyFunctionObject *)frame->f_funcobj)->func_closure; + assert(oparg == co->co_nfreevars); + int offset = co->co_nlocalsplus - oparg; + for (int i = 0; i < oparg; ++i) { + PyObject *o = PyTuple_GET_ITEM(closure, i); + frame->localsplus[offset + i] = Py_NewRef(o); + } + return stack_pointer; + } + + PyObject ** _INIT_CALL_BOUND_METHOD_EXACT_ARGS_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer, int oparg) { + PyObject *callable; + PyObject *func; + PyObject *self; + callable = stack_pointer[-2 - oparg]; + STAT_INC(CALL, hit); + self = Py_NewRef(((PyMethodObject *)callable)->im_self); + stack_pointer[-1 - oparg] = self; // Patch stack as it is used by _INIT_CALL_PY_EXACT_ARGS + func = Py_NewRef(((PyMethodObject *)callable)->im_func); + stack_pointer[-2 - oparg] = func; // This is used by CALL, upon deoptimization + Py_DECREF(callable); + stack_pointer[-2 - oparg] = func; + stack_pointer[-1 - oparg] = self; + return stack_pointer; + } + + PyObject ** _INIT_CALL_PY_EXACT_ARGS_0_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer) { + int oparg; + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 0; + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int has_self = (self_or_null != NULL); + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self); + PyObject **first_non_self_local = new_frame->localsplus + has_self; + new_frame->localsplus[0] = self_or_null; + for (int i = 0; i < oparg; i++) { + first_non_self_local[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + return stack_pointer; + } + + PyObject ** _INIT_CALL_PY_EXACT_ARGS_1_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer) { + int oparg; + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 1; + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int has_self = (self_or_null != NULL); + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self); + PyObject **first_non_self_local = new_frame->localsplus + has_self; + new_frame->localsplus[0] = self_or_null; + for (int i = 0; i < oparg; i++) { + first_non_self_local[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + return stack_pointer; + } + + PyObject ** _INIT_CALL_PY_EXACT_ARGS_2_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer) { + int oparg; + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 2; + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int has_self = (self_or_null != NULL); + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self); + PyObject **first_non_self_local = new_frame->localsplus + has_self; + new_frame->localsplus[0] = self_or_null; + for (int i = 0; i < oparg; i++) { + first_non_self_local[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + return stack_pointer; + } + + PyObject ** _INIT_CALL_PY_EXACT_ARGS_3_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer) { + int oparg; + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 3; + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int has_self = (self_or_null != NULL); + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self); + PyObject **first_non_self_local = new_frame->localsplus + has_self; + new_frame->localsplus[0] = self_or_null; + for (int i = 0; i < oparg; i++) { + first_non_self_local[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + return stack_pointer; + } + + PyObject ** _INIT_CALL_PY_EXACT_ARGS_4_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer) { + int oparg; + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + oparg = 4; + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int has_self = (self_or_null != NULL); + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self); + PyObject **first_non_self_local = new_frame->localsplus + has_self; + new_frame->localsplus[0] = self_or_null; + for (int i = 0; i < oparg; i++) { + first_non_self_local[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + return stack_pointer; + } + + PyObject ** _INIT_CALL_PY_EXACT_ARGS_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer, int oparg) { + PyObject **args; + PyObject *self_or_null; + PyObject *callable; + _PyInterpreterFrame *new_frame; + args = &stack_pointer[-oparg]; + self_or_null = stack_pointer[-1 - oparg]; + callable = stack_pointer[-2 - oparg]; + int has_self = (self_or_null != NULL); + STAT_INC(CALL, hit); + PyFunctionObject *func = (PyFunctionObject *)callable; + new_frame = _PyFrame_PushUnchecked(tstate, func, oparg + has_self); + PyObject **first_non_self_local = new_frame->localsplus + has_self; + new_frame->localsplus[0] = self_or_null; + for (int i = 0; i < oparg; i++) { + first_non_self_local[i] = args[i]; + } + stack_pointer[-2 - oparg] = (PyObject *)new_frame; + stack_pointer += -1 - oparg; + return stack_pointer; + } + + PyObject ** _SET_FUNCTION_ATTRIBUTE_func(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject **stack_pointer, int oparg) { + PyObject *func; + PyObject *attr; + func = stack_pointer[-1]; + attr = stack_pointer[-2]; + assert(PyFunction_Check(func)); + PyFunctionObject *func_obj = (PyFunctionObject *)func; + switch(oparg) { + case MAKE_FUNCTION_CLOSURE: + assert(func_obj->func_closure == NULL); + func_obj->func_closure = attr; + break; + case MAKE_FUNCTION_ANNOTATIONS: + assert(func_obj->func_annotations == NULL); + func_obj->func_annotations = attr; + break; + case MAKE_FUNCTION_KWDEFAULTS: + assert(PyDict_CheckExact(attr)); + assert(func_obj->func_kwdefaults == NULL); + func_obj->func_kwdefaults = attr; + break; + case MAKE_FUNCTION_DEFAULTS: + assert(PyTuple_CheckExact(attr)); + assert(func_obj->func_defaults == NULL); + func_obj->func_defaults = attr; + break; + default: + Py_UNREACHABLE(); + } + stack_pointer[-2] = func; + stack_pointer += -1; + return stack_pointer; + } + diff --git a/Python/jit.c b/Python/jit.c index 03bcf1142715f3..7be7960125349a 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -6,6 +6,7 @@ #include "pycore_call.h" #include "pycore_ceval.h" #include "pycore_dict.h" +#include "pycore_executor_externals.h" #include "pycore_intrinsics.h" #include "pycore_long.h" #include "pycore_opcode_metadata.h" diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 2329205ad31d09..fb2f4304020ac0 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -28,6 +28,7 @@ class Properties: tier: int | None = None oparg_and_1: bool = False const_oparg: int = -1 + externalize: bool = False def dump(self, indent: str) -> None: print(indent, end="") @@ -55,6 +56,7 @@ def from_list(properties: list["Properties"]) -> "Properties": side_exit=any(p.side_exit for p in properties), pure=all(p.pure for p in properties), passthrough=all(p.passthrough for p in properties), + externalize=all(p.externalize for p in properties), ) @property @@ -82,6 +84,7 @@ def infallible(self) -> bool: side_exit=False, pure=False, passthrough=False, + externalize=False, ) @@ -559,6 +562,7 @@ def compute_properties(op: parser.InstDef) -> Properties: pure="pure" in op.annotations, passthrough=passthrough, tier=tier_variable(op), + externalize="externalize" in op.annotations, ) diff --git a/Tools/cases_generator/lexer.py b/Tools/cases_generator/lexer.py index 13aee94f2b957c..be4f5119ae713a 100644 --- a/Tools/cases_generator/lexer.py +++ b/Tools/cases_generator/lexer.py @@ -226,6 +226,7 @@ def choice(*opts: str) -> str: "replicate", "tier1", "tier2", + "externalize", } __all__ = [] diff --git a/Tools/cases_generator/tier2_generator.py b/Tools/cases_generator/tier2_generator.py index 114d28ee745632..95f2302710fe3e 100644 --- a/Tools/cases_generator/tier2_generator.py +++ b/Tools/cases_generator/tier2_generator.py @@ -6,6 +6,7 @@ import argparse import os.path import sys +import textwrap from analyzer import ( Analysis, @@ -30,7 +31,11 @@ from lexer import Token from stack import StackOffset, Stack, SizeMismatch -DEFAULT_OUTPUT = ROOT / "Python/executor_cases.c.h" +DEFAULT_OUTPUT = [ + ROOT / "Python/executor_cases.c.h", + ROOT / "Python/executor_externals.c", + ROOT / "Include/internal/pycore_executor_externals.h" +] def declare_variable( @@ -150,12 +155,13 @@ def tier2_replace_oparg( def write_uop(uop: Uop, out: CWriter, stack: Stack) -> None: try: out.start_line() - if uop.properties.oparg: + if uop.properties.oparg and not uop.properties.externalize: out.emit("oparg = CURRENT_OPARG();\n") assert uop.properties.const_oparg < 0 elif uop.properties.const_oparg >= 0: out.emit(f"oparg = {uop.properties.const_oparg};\n") - out.emit(f"assert(oparg == CURRENT_OPARG());\n") + if not uop.properties.externalize: + out.emit("assert(oparg == CURRENT_OPARG());\n") for var in reversed(uop.stack.inputs): out.emit(stack.pop(var)) if not uop.properties.stores_sp: @@ -192,6 +198,7 @@ def generate_tier2( #define TIER_TWO 2 """ ) + out = CWriter(outfile, 2, lines) out.emit("\n") for name, uop in analysis.uops.items(): @@ -207,12 +214,20 @@ def generate_tier2( out.emit(f"/* {uop.name} is not a viable micro-op for tier 2 because it {why_not_viable} */\n\n") continue out.emit(f"case {uop.name}: {{\n") - declare_variables(uop, out) - stack = Stack() - write_uop(uop, out, stack) + if uop.properties.externalize: + stack = None + out.emit(f"stack_pointer = {uop.name}_func(tstate, frame, stack_pointer") + if uop.properties.const_oparg < 0: + out.emit(", CURRENT_OPARG()") + out.emit(");\n") + else: + declare_variables(uop, out) + stack = Stack() + write_uop(uop, out, stack) out.start_line() if not uop.properties.always_exits: - stack.flush(out) + if stack is not None: + stack.flush(out) if uop.properties.ends_with_eval_breaker: out.emit("CHECK_EVAL_BREAKER();\n") out.emit("break;\n") @@ -222,13 +237,121 @@ def generate_tier2( outfile.write("#undef TIER_TWO\n") +def get_external_signature(name: str, uop: Uop) -> str: + args = [ + "PyThreadState *tstate", + "_PyInterpreterFrame *frame", + "PyObject **stack_pointer" + ] + if uop.properties.const_oparg < 0: + args.append("int oparg") + if not name.startswith("_"): + name = "_" + name + return f"PyObject ** {name}_func({', '.join(args)})" + + +def generate_tier2_externals( + filenames: list[str], analysis: Analysis, outfile: TextIO, lines: bool +) -> None: + write_header(__file__, filenames, outfile) + outfile.write(textwrap.dedent( + """ + #include "Python.h" + + #include "pycore_call.h" + #include "pycore_ceval.h" + #include "pycore_dict.h" + #include "pycore_emscripten_signal.h" + #include "pycore_executor_externals.h" + #include "pycore_intrinsics.h" + #include "pycore_long.h" + #include "pycore_opcode_metadata.h" + #include "pycore_opcode_utils.h" + #include "pycore_optimizer.h" + #include "pycore_range.h" + #include "pycore_setobject.h" + #include "pycore_sliceobject.h" + #include "pycore_descrobject.h" + + #include "ceval_macros.h" + + #define TIER_TWO 2 + """ + )) + + out = CWriter(outfile, 2, lines) + out.emit("\n") + for name, uop in analysis.uops.items(): + if uop.properties.tier == 1: + continue + if uop.properties.externalize: + out.emit(get_external_signature(name, uop)) + out.emit(" {\n") + + if uop.properties.const_oparg >= 0: + out.emit("int oparg;\n") + declare_variables(uop, out) + stack = Stack() + write_uop(uop, out, stack) + stack.flush(out) + out.start_line() + out.emit("return stack_pointer;\n") + + out.emit("}\n\n") + + +def generate_tier2_externals_header( + filenames: list[str], analysis: Analysis, outfile: TextIO, lines: bool +) -> None: + write_header(__file__, filenames, outfile) + + outfile.write(textwrap.dedent( + """ + #ifndef Py_EXECUTOR_EXTERNALS_H + #define Py_EXECUTOR_EXTERNALS_H + #ifdef __cplusplus + extern "C" { + #endif + + #ifndef Py_BUILD_CORE + # error "this header requires Py_BUILD_CORE define" + #endif + + #include "Python.h" + + #include "pytypedefs.h" + #include "pycore_frame.h" + """ + )) + + out = CWriter(outfile, 2, lines) + out.emit("\n") + for name, uop in analysis.uops.items(): + if uop.properties.tier == 1: + continue + if uop.properties.externalize: + out.emit(get_external_signature(name, uop)) + out.emit(";\n\n") + + out.start_line() + outfile.write(textwrap.dedent( + """ + #ifdef __cplusplus + } + #endif + + #endif + """ + )) + + arg_parser = argparse.ArgumentParser( description="Generate the code for the tier 2 interpreter.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) arg_parser.add_argument( - "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT + "-o", "--output", type=str, help="Generated code", nargs=3, default=DEFAULT_OUTPUT ) arg_parser.add_argument( @@ -244,5 +367,9 @@ def generate_tier2( if len(args.input) == 0: args.input.append(DEFAULT_INPUT) data = analyze_files(args.input) - with open(args.output, "w") as outfile: + with open(args.output[0], "w") as outfile: generate_tier2(args.input, data, outfile, args.emit_line_directives) + with open(args.output[1], "w") as outfile: + generate_tier2_externals(args.input, data, outfile, args.emit_line_directives) + with open(args.output[2], "w") as outfile: + generate_tier2_externals_header(args.input, data, outfile, args.emit_line_directives) diff --git a/Tools/jit/template.c b/Tools/jit/template.c index f8be4d7f78facd..550ca16f7906dd 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -4,6 +4,7 @@ #include "pycore_ceval.h" #include "pycore_dict.h" #include "pycore_emscripten_signal.h" +#include "pycore_executor_externals.h" #include "pycore_intrinsics.h" #include "pycore_jit.h" #include "pycore_long.h"